xref: /vim-8.2.3635/src/spell.c (revision 92d640fa)
1 /* vi:set ts=8 sts=4 sw=4:
2  *
3  * VIM - Vi IMproved	by Bram Moolenaar
4  *
5  * Do ":help uganda"  in Vim to read copying and usage conditions.
6  * Do ":help credits" in Vim to see a list of people who contributed.
7  * See README.txt for an overview of the Vim source code.
8  */
9 
10 /*
11  * spell.c: code for spell checking
12  *
13  * The spell checking mechanism uses a tree (aka trie).  Each node in the tree
14  * has a list of bytes that can appear (siblings).  For each byte there is a
15  * pointer to the node with the byte that follows in the word (child).
16  *
17  * A NUL byte is used where the word may end.  The bytes are sorted, so that
18  * binary searching can be used and the NUL bytes are at the start.  The
19  * number of possible bytes is stored before the list of bytes.
20  *
21  * The tree uses two arrays: "byts" stores the characters, "idxs" stores
22  * either the next index or flags.  The tree starts at index 0.  For example,
23  * to lookup "vi" this sequence is followed:
24  *	i = 0
25  *	len = byts[i]
26  *	n = where "v" appears in byts[i + 1] to byts[i + len]
27  *	i = idxs[n]
28  *	len = byts[i]
29  *	n = where "i" appears in byts[i + 1] to byts[i + len]
30  *	i = idxs[n]
31  *	len = byts[i]
32  *	find that byts[i + 1] is 0, idxs[i + 1] has flags for "vi".
33  *
34  * There are two word trees: one with case-folded words and one with words in
35  * original case.  The second one is only used for keep-case words and is
36  * usually small.
37  *
38  * There is one additional tree for when not all prefixes are applied when
39  * generating the .spl file.  This tree stores all the possible prefixes, as
40  * if they were words.  At each word (prefix) end the prefix nr is stored, the
41  * following word must support this prefix nr.  And the condition nr is
42  * stored, used to lookup the condition that the word must match with.
43  *
44  * Thanks to Olaf Seibert for providing an example implementation of this tree
45  * and the compression mechanism.
46  *
47  * Matching involves checking the caps type: Onecap ALLCAP KeepCap.
48  *
49  * Why doesn't Vim use aspell/ispell/myspell/etc.?
50  * See ":help develop-spell".
51  */
52 
53 /* Use SPELL_PRINTTREE for debugging: dump the word tree after adding a word.
54  * Only use it for small word lists! */
55 #if 0
56 # define SPELL_PRINTTREE
57 #endif
58 
59 /*
60  * Use this to adjust the score after finding suggestions, based on the
61  * suggested word sounding like the bad word.  This is much faster than doing
62  * it for every possible suggestion.
63  * Disadvantage: When "the" is typed as "hte" it sounds different and goes
64  * down in the list.
65  * Used when 'spellsuggest' is set to "best".
66  */
67 #define RESCORE(word_score, sound_score) ((3 * word_score + sound_score) / 4)
68 
69 /*
70  * Vim spell file format: <HEADER>
71  *			  <SECTIONS>
72  *			  <LWORDTREE>
73  *			  <KWORDTREE>
74  *			  <PREFIXTREE>
75  *
76  * <HEADER>: <fileID> <versionnr>
77  *
78  * <fileID>     8 bytes    "VIMspell"
79  * <versionnr>  1 byte	    VIMSPELLVERSION
80  *
81  *
82  * Sections make it possible to add information to the .spl file without
83  * making it incompatible with previous versions.  There are two kinds of
84  * sections:
85  * 1. Not essential for correct spell checking.  E.g. for making suggestions.
86  *    These are skipped when not supported.
87  * 2. Optional information, but essential for spell checking when present.
88  *    E.g. conditions for affixes.  When this section is present but not
89  *    supported an error message is given.
90  *
91  * <SECTIONS>: <section> ... <sectionend>
92  *
93  * <section>: <sectionID> <sectionflags> <sectionlen> (section contents)
94  *
95  * <sectionID>	  1 byte    number from 0 to 254 identifying the section
96  *
97  * <sectionflags> 1 byte    SNF_REQUIRED: this section is required for correct
98  *					    spell checking
99  *
100  * <sectionlen>   4 bytes   length of section contents, MSB first
101  *
102  * <sectionend>	  1 byte    SN_END
103  *
104  *
105  * sectionID == SN_REGION: <regionname> ...
106  * <regionname>	 2 bytes    Up to 8 region names: ca, au, etc.  Lower case.
107  *			    First <regionname> is region 1.
108  *
109  * sectionID == SN_CHARFLAGS: <charflagslen> <charflags>
110  *				<folcharslen> <folchars>
111  * <charflagslen> 1 byte    Number of bytes in <charflags> (should be 128).
112  * <charflags>  N bytes     List of flags (first one is for character 128):
113  *			    0x01  word character	CF_WORD
114  *			    0x02  upper-case character	CF_UPPER
115  * <folcharslen>  2 bytes   Number of bytes in <folchars>.
116  * <folchars>     N bytes   Folded characters, first one is for character 128.
117  *
118  * sectionID == SN_MIDWORD: <midword>
119  * <midword>     N bytes    Characters that are word characters only when used
120  *			    in the middle of a word.
121  *
122  * sectionID == SN_PREFCOND: <prefcondcnt> <prefcond> ...
123  * <prefcondcnt> 2 bytes    Number of <prefcond> items following.
124  * <prefcond> : <condlen> <condstr>
125  * <condlen>	1 byte	    Length of <condstr>.
126  * <condstr>	N bytes	    Condition for the prefix.
127  *
128  * sectionID == SN_REP: <repcount> <rep> ...
129  * <repcount>	 2 bytes    number of <rep> items, MSB first.
130  * <rep> : <repfromlen> <repfrom> <reptolen> <repto>
131  * <repfromlen>	 1 byte	    length of <repfrom>
132  * <repfrom>	 N bytes    "from" part of replacement
133  * <reptolen>	 1 byte	    length of <repto>
134  * <repto>	 N bytes    "to" part of replacement
135  *
136  * sectionID == SN_SAL: <salflags> <salcount> <sal> ...
137  * <salflags>	 1 byte	    flags for soundsalike conversion:
138  *			    SAL_F0LLOWUP
139  *			    SAL_COLLAPSE
140  *			    SAL_REM_ACCENTS
141  * <salcount>    2 bytes    number of <sal> items following
142  * <sal> : <salfromlen> <salfrom> <saltolen> <salto>
143  * <salfromlen>	 1 byte	    length of <salfrom>
144  * <salfrom>	 N bytes    "from" part of soundsalike
145  * <saltolen>	 1 byte	    length of <salto>
146  * <salto>	 N bytes    "to" part of soundsalike
147  *
148  * sectionID == SN_SOFO: <sofofromlen> <sofofrom> <sofotolen> <sofoto>
149  * <sofofromlen> 2 bytes    length of <sofofrom>
150  * <sofofrom>	 N bytes    "from" part of soundfold
151  * <sofotolen>	 2 bytes    length of <sofoto>
152  * <sofoto>	 N bytes    "to" part of soundfold
153  *
154  * sectionID == SN_MAP: <mapstr>
155  * <mapstr>	 N bytes    String with sequences of similar characters,
156  *			    separated by slashes.
157  *
158  * sectionID == SN_COMPOUND: <compmax> <compminlen> <compsylmax> <compflags>
159  * <compmax>     1 byte	    Maximum nr of words in compound word.
160  * <compminlen>  1 byte	    Minimal word length for compounding.
161  * <compsylmax>  1 byte	    Maximum nr of syllables in compound word.
162  * <compflags>   N bytes    Flags from COMPOUNDFLAGS items, separated by
163  *			    slashes.
164  *
165  * sectionID == SN_NOBREAK: (empty, its presence is enough)
166  *
167  * sectionID == SN_SYLLABLE: <syllable>
168  * <syllable>    N bytes    String from SYLLABLE item.
169  *
170  * <LWORDTREE>: <wordtree>
171  *
172  * <KWORDTREE>: <wordtree>
173  *
174  * <PREFIXTREE>: <wordtree>
175  *
176  *
177  * <wordtree>: <nodecount> <nodedata> ...
178  *
179  * <nodecount>	4 bytes	    Number of nodes following.  MSB first.
180  *
181  * <nodedata>: <siblingcount> <sibling> ...
182  *
183  * <siblingcount> 1 byte    Number of siblings in this node.  The siblings
184  *			    follow in sorted order.
185  *
186  * <sibling>: <byte> [ <nodeidx> <xbyte>
187  *		      | <flags> [<flags2>] [<region>] [<affixID>]
188  *		      | [<pflags>] <affixID> <prefcondnr> ]
189  *
190  * <byte>	1 byte	    Byte value of the sibling.  Special cases:
191  *			    BY_NOFLAGS: End of word without flags and for all
192  *					regions.
193  *					For PREFIXTREE <affixID> and
194  *					<prefcondnr> follow.
195  *			    BY_FLAGS:   End of word, <flags> follow.
196  *					For PREFIXTREE <pflags>, <affixID>
197  *					and <prefcondnr> follow.
198  *			    BY_FLAGS2:  End of word, <flags> and <flags2>
199  *					follow.  Not used in PREFIXTREE.
200  *			    BY_INDEX:   Child of sibling is shared, <nodeidx>
201  *					and <xbyte> follow.
202  *
203  * <nodeidx>	3 bytes	    Index of child for this sibling, MSB first.
204  *
205  * <xbyte>	1 byte	    byte value of the sibling.
206  *
207  * <flags>	1 byte	    bitmask of:
208  *			    WF_ALLCAP	word must have only capitals
209  *			    WF_ONECAP   first char of word must be capital
210  *			    WF_KEEPCAP	keep-case word
211  *			    WF_FIXCAP   keep-case word, all caps not allowed
212  *			    WF_RARE	rare word
213  *			    WF_BANNED	bad word
214  *			    WF_REGION	<region> follows
215  *			    WF_AFX	<affixID> follows
216  *
217  * <flags2>	1 byte	    Bitmask of:
218  *			    WF_HAS_AFF >> 8   word includes affix
219  *			    WF_NEEDCOMP >> 8  word only valid in compound
220  *
221  * <pflags>	1 byte	    bitmask of:
222  *			    WFP_RARE	rare prefix
223  *			    WFP_NC	non-combining prefix
224  *			    WFP_UP	letter after prefix made upper case
225  *
226  * <region>	1 byte	    Bitmask for regions in which word is valid.  When
227  *			    omitted it's valid in all regions.
228  *			    Lowest bit is for region 1.
229  *
230  * <affixID>	1 byte	    ID of affix that can be used with this word.  In
231  *			    PREFIXTREE used for the required prefix ID.
232  *
233  * <prefcondnr>	2 bytes	    Prefix condition number, index in <prefcond> list
234  *			    from HEADER.
235  *
236  * All text characters are in 'encoding', but stored as single bytes.
237  */
238 
239 #if defined(MSDOS) || defined(WIN16) || defined(WIN32) || defined(_WIN64)
240 # include <io.h>	/* for lseek(), must be before vim.h */
241 #endif
242 
243 #include "vim.h"
244 
245 #if defined(FEAT_SYN_HL) || defined(PROTO)
246 
247 #ifdef HAVE_FCNTL_H
248 # include <fcntl.h>
249 #endif
250 
251 #define MAXWLEN 250		/* Assume max. word len is this many bytes.
252 				   Some places assume a word length fits in a
253 				   byte, thus it can't be above 255. */
254 
255 /* Type used for indexes in the word tree need to be at least 4 bytes.  If int
256  * is 8 bytes we could use something smaller, but what? */
257 #if SIZEOF_INT > 3
258 typedef int idx_T;
259 #else
260 typedef long idx_T;
261 #endif
262 
263 /* Flags used for a word.  Only the lowest byte can be used, the region byte
264  * comes above it. */
265 #define WF_REGION   0x01	/* region byte follows */
266 #define WF_ONECAP   0x02	/* word with one capital (or all capitals) */
267 #define WF_ALLCAP   0x04	/* word must be all capitals */
268 #define WF_RARE	    0x08	/* rare word */
269 #define WF_BANNED   0x10	/* bad word */
270 #define WF_AFX	    0x20	/* affix ID follows */
271 #define WF_FIXCAP   0x40	/* keep-case word, allcap not allowed */
272 #define WF_KEEPCAP  0x80	/* keep-case word */
273 
274 /* for <flags2>, shifted up one byte to be used in wn_flags */
275 #define WF_HAS_AFF  0x0100	/* word includes affix */
276 #define WF_NEEDCOMP 0x0200	/* word only valid in compound */
277 
278 #define WF_CAPMASK (WF_ONECAP | WF_ALLCAP | WF_KEEPCAP | WF_FIXCAP)
279 
280 /* flags for <pflags> */
281 #define WFP_RARE    0x01	/* rare prefix */
282 #define WFP_NC	    0x02	/* prefix is not combining */
283 #define WFP_UP	    0x04	/* to-upper prefix */
284 
285 /* Flags for postponed prefixes.  Must be above affixID (one byte)
286  * and prefcondnr (two bytes). */
287 #define WF_RAREPFX  (WFP_RARE << 24)	/* in sl_pidxs: flag for rare
288 					 * postponed prefix */
289 #define WF_PFX_NC   (WFP_NC << 24)	/* in sl_pidxs: flag for non-combining
290 					 * postponed prefix */
291 #define WF_PFX_UP   (WFP_UP << 24)	/* in sl_pidxs: flag for to-upper
292 					 * postponed prefix */
293 
294 /* Special byte values for <byte>.  Some are only used in the tree for
295  * postponed prefixes, some only in the other trees.  This is a bit messy... */
296 #define BY_NOFLAGS	0	/* end of word without flags or region; for
297 				 * postponed prefix: no <pflags> */
298 #define BY_INDEX	1	/* child is shared, index follows */
299 #define BY_FLAGS	2	/* end of word, <flags> byte follows; for
300 				 * postponed prefix: <pflags> follows */
301 #define BY_FLAGS2	3	/* end of word, <flags> and <flags2> bytes
302 				 * follow; never used in prefix tree */
303 #define BY_SPECIAL  BY_FLAGS2	/* highest special byte value */
304 
305 /* Info from "REP" and "SAL" entries in ".aff" file used in si_rep, sl_rep,
306  * and si_sal.  Not for sl_sal!
307  * One replacement: from "ft_from" to "ft_to". */
308 typedef struct fromto_S
309 {
310     char_u	*ft_from;
311     char_u	*ft_to;
312 } fromto_T;
313 
314 /* Info from "SAL" entries in ".aff" file used in sl_sal.
315  * The info is split for quick processing by spell_soundfold().
316  * Note that "sm_oneof" and "sm_rules" point into sm_lead. */
317 typedef struct salitem_S
318 {
319     char_u	*sm_lead;	/* leading letters */
320     int		sm_leadlen;	/* length of "sm_lead" */
321     char_u	*sm_oneof;	/* letters from () or NULL */
322     char_u	*sm_rules;	/* rules like ^, $, priority */
323     char_u	*sm_to;		/* replacement. */
324 #ifdef FEAT_MBYTE
325     int		*sm_lead_w;	/* wide character copy of "sm_lead" */
326     int		*sm_oneof_w;	/* wide character copy of "sm_oneof" */
327     int		*sm_to_w;	/* wide character copy of "sm_to" */
328 #endif
329 } salitem_T;
330 
331 #ifdef FEAT_MBYTE
332 typedef int salfirst_T;
333 #else
334 typedef short salfirst_T;
335 #endif
336 
337 /* Values for SP_*ERROR are negative, positive values are used by
338  * read_cnt_string(). */
339 #define	SP_TRUNCERROR	-1	/* spell file truncated error */
340 #define	SP_FORMERROR	-2	/* format error in spell file */
341 #define SP_OTHERERROR	-3	/* other error while reading spell file */
342 
343 /*
344  * Structure used to store words and other info for one language, loaded from
345  * a .spl file.
346  * The main access is through the tree in "sl_fbyts/sl_fidxs", storing the
347  * case-folded words.  "sl_kbyts/sl_kidxs" is for keep-case words.
348  *
349  * The "byts" array stores the possible bytes in each tree node, preceded by
350  * the number of possible bytes, sorted on byte value:
351  *	<len> <byte1> <byte2> ...
352  * The "idxs" array stores the index of the child node corresponding to the
353  * byte in "byts".
354  * Exception: when the byte is zero, the word may end here and "idxs" holds
355  * the flags, region mask and affixID for the word.  There may be several
356  * zeros in sequence for alternative flag/region/affixID combinations.
357  */
358 typedef struct slang_S slang_T;
359 struct slang_S
360 {
361     slang_T	*sl_next;	/* next language */
362     char_u	*sl_name;	/* language name "en", "en.rare", "nl", etc. */
363     char_u	*sl_fname;	/* name of .spl file */
364     int		sl_add;		/* TRUE if it's a .add file. */
365 
366     char_u	*sl_fbyts;	/* case-folded word bytes */
367     idx_T	*sl_fidxs;	/* case-folded word indexes */
368     char_u	*sl_kbyts;	/* keep-case word bytes */
369     idx_T	*sl_kidxs;	/* keep-case word indexes */
370     char_u	*sl_pbyts;	/* prefix tree word bytes */
371     idx_T	*sl_pidxs;	/* prefix tree word indexes */
372 
373     char_u	sl_regions[17];	/* table with up to 8 region names plus NUL */
374 
375     char_u	*sl_midword;	/* MIDWORD string or NULL */
376 
377     int		sl_compmax;	/* COMPOUNDMAX (default: MAXWLEN) */
378     int		sl_compminlen;	/* COMPOUNDMIN (default: 0) */
379     int		sl_compsylmax;	/* COMPOUNDSYLMAX (default: MAXWLEN) */
380     regprog_T	*sl_compprog;	/* COMPOUNDFLAGS turned into a regexp progrm
381 				 * (NULL when no compounding) */
382     char_u	*sl_compstartflags; /* flags for first compound word */
383     char_u	*sl_compallflags; /* all flags for compound words */
384     char_u	sl_nobreak;	/* When TRUE: no spaces between words */
385     char_u	*sl_syllable;	/* SYLLABLE repeatable chars or NULL */
386     garray_T	sl_syl_items;	/* syllable items */
387 
388     int		sl_prefixcnt;	/* number of items in "sl_prefprog" */
389     regprog_T	**sl_prefprog;	/* table with regprogs for prefixes */
390 
391     garray_T	sl_rep;		/* list of fromto_T entries from REP lines */
392     short	sl_rep_first[256];  /* indexes where byte first appears, -1 if
393 				       there is none */
394     garray_T	sl_sal;		/* list of salitem_T entries from SAL lines */
395     salfirst_T	sl_sal_first[256];  /* indexes where byte first appears, -1 if
396 				       there is none */
397     int		sl_sofo;	/* SOFOFROM and SOFOTO instead of SAL items:
398 				 * "sl_sal_first" maps chars, when has_mbyte
399 				 * "sl_sal" is a list of wide char lists. */
400     int		sl_followup;	/* SAL followup */
401     int		sl_collapse;	/* SAL collapse_result */
402     int		sl_rem_accents;	/* SAL remove_accents */
403     int		sl_has_map;	/* TRUE if there is a MAP line */
404 #ifdef FEAT_MBYTE
405     hashtab_T	sl_map_hash;	/* MAP for multi-byte chars */
406     int		sl_map_array[256]; /* MAP for first 256 chars */
407 #else
408     char_u	sl_map_array[256]; /* MAP for first 256 chars */
409 #endif
410 };
411 
412 /* First language that is loaded, start of the linked list of loaded
413  * languages. */
414 static slang_T *first_lang = NULL;
415 
416 /* Flags used in .spl file for soundsalike flags. */
417 #define SAL_F0LLOWUP		1
418 #define SAL_COLLAPSE		2
419 #define SAL_REM_ACCENTS		4
420 
421 /*
422  * Structure used in "b_langp", filled from 'spelllang'.
423  */
424 typedef struct langp_S
425 {
426     slang_T	*lp_slang;	/* info for this language */
427     slang_T	*lp_sallang;	/* language used for sound folding or NULL */
428     slang_T	*lp_replang;	/* language used for REP items or NULL */
429     int		lp_region;	/* bitmask for region or REGION_ALL */
430 } langp_T;
431 
432 #define LANGP_ENTRY(ga, i)	(((langp_T *)(ga).ga_data) + (i))
433 
434 #define REGION_ALL 0xff		/* word valid in all regions */
435 
436 #define VIMSPELLMAGIC "VIMspell"  /* string at start of Vim spell file */
437 #define VIMSPELLMAGICL 8
438 #define VIMSPELLVERSION 50
439 
440 /* Section IDs.  Only renumber them when VIMSPELLVERSION changes! */
441 #define SN_REGION	0	/* <regionname> section */
442 #define SN_CHARFLAGS	1	/* charflags section */
443 #define SN_MIDWORD	2	/* <midword> section */
444 #define SN_PREFCOND	3	/* <prefcond> section */
445 #define SN_REP		4	/* REP items section */
446 #define SN_SAL		5	/* SAL items section */
447 #define SN_SOFO		6	/* soundfolding section */
448 #define SN_MAP		7	/* MAP items section */
449 #define SN_COMPOUND	8	/* compound words section */
450 #define SN_SYLLABLE	9	/* syllable section */
451 #define SN_NOBREAK	10	/* NOBREAK section */
452 #define SN_END		255	/* end of sections */
453 
454 #define SNF_REQUIRED	1	/* <sectionflags>: required section */
455 
456 /* Result values.  Lower number is accepted over higher one. */
457 #define SP_BANNED	-1
458 #define SP_OK		0
459 #define SP_RARE		1
460 #define SP_LOCAL	2
461 #define SP_BAD		3
462 
463 /* file used for "zG" and "zW" */
464 static char_u	*int_wordlist = NULL;
465 
466 /*
467  * Information used when looking for suggestions.
468  */
469 typedef struct suginfo_S
470 {
471     garray_T	su_ga;		    /* suggestions, contains "suggest_T" */
472     int		su_maxcount;	    /* max. number of suggestions displayed */
473     int		su_maxscore;	    /* maximum score for adding to su_ga */
474     garray_T	su_sga;		    /* like su_ga, sound-folded scoring */
475     char_u	*su_badptr;	    /* start of bad word in line */
476     int		su_badlen;	    /* length of detected bad word in line */
477     int		su_badflags;	    /* caps flags for bad word */
478     char_u	su_badword[MAXWLEN]; /* bad word truncated at su_badlen */
479     char_u	su_fbadword[MAXWLEN]; /* su_badword case-folded */
480     hashtab_T	su_banned;	    /* table with banned words */
481     slang_T	*su_sallang;	    /* default language for sound folding */
482 } suginfo_T;
483 
484 /* One word suggestion.  Used in "si_ga". */
485 typedef struct suggest_S
486 {
487     char_u	*st_word;	/* suggested word, allocated string */
488     int		st_orglen;	/* length of replaced text */
489     int		st_score;	/* lower is better */
490     int		st_altscore;	/* used when st_score compares equal */
491     int		st_salscore;	/* st_score is for soundalike */
492     int		st_had_bonus;	/* bonus already included in score */
493     slang_T	*st_slang;	/* language used for sound folding */
494 } suggest_T;
495 
496 #define SUG(ga, i) (((suggest_T *)(ga).ga_data)[i])
497 
498 /* Number of suggestions kept when cleaning up.  When rescore_suggestions() is
499  * called the score may change, thus we need to keep more than what is
500  * displayed. */
501 #define SUG_CLEAN_COUNT(su)    ((su)->su_maxcount < 50 ? 50 : (su)->su_maxcount)
502 
503 /* Threshold for sorting and cleaning up suggestions.  Don't want to keep lots
504  * of suggestions that are not going to be displayed. */
505 #define SUG_MAX_COUNT(su)    ((su)->su_maxcount + 50)
506 
507 /* score for various changes */
508 #define SCORE_SPLIT	149	/* split bad word */
509 #define SCORE_ICASE	52	/* slightly different case */
510 #define SCORE_REGION	200	/* word is for different region */
511 #define SCORE_RARE	180	/* rare word */
512 #define SCORE_SWAP	90	/* swap two characters */
513 #define SCORE_SWAP3	110	/* swap two characters in three */
514 #define SCORE_REP	87	/* REP replacement */
515 #define SCORE_SUBST	93	/* substitute a character */
516 #define SCORE_SIMILAR	33	/* substitute a similar character */
517 #define SCORE_SUBCOMP	33	/* substitute a composing character */
518 #define SCORE_DEL	94	/* delete a character */
519 #define SCORE_DELDUP	64	/* delete a duplicated character */
520 #define SCORE_DELCOMP	28	/* delete a composing character */
521 #define SCORE_INS	96	/* insert a character */
522 #define SCORE_INSDUP	66	/* insert a duplicate character */
523 #define SCORE_INSCOMP	30	/* insert a composing character */
524 #define SCORE_NONWORD	103	/* change non-word to word char */
525 
526 #define SCORE_FILE	30	/* suggestion from a file */
527 #define SCORE_MAXINIT	350	/* Initial maximum score: higher == slower.
528 				 * 350 allows for about three changes. */
529 
530 #define SCORE_BIG	SCORE_INS * 3	/* big difference */
531 #define SCORE_MAXMAX	999999	/* accept any score */
532 
533 /*
534  * Structure to store info for word matching.
535  */
536 typedef struct matchinf_S
537 {
538     langp_T	*mi_lp;			/* info for language and region */
539 
540     /* pointers to original text to be checked */
541     char_u	*mi_word;		/* start of word being checked */
542     char_u	*mi_end;		/* end of matching word so far */
543     char_u	*mi_fend;		/* next char to be added to mi_fword */
544     char_u	*mi_cend;		/* char after what was used for
545 					   mi_capflags */
546 
547     /* case-folded text */
548     char_u	mi_fword[MAXWLEN + 1];	/* mi_word case-folded */
549     int		mi_fwordlen;		/* nr of valid bytes in mi_fword */
550 
551     /* for when checking word after a prefix */
552     int		mi_prefarridx;		/* index in sl_pidxs with list of
553 					   affixID/condition */
554     int		mi_prefcnt;		/* number of entries at mi_prefarridx */
555     int		mi_prefixlen;		/* byte length of prefix */
556 #ifdef FEAT_MBYTE
557     int		mi_cprefixlen;		/* byte length of prefix in original
558 					   case */
559 #else
560 # define mi_cprefixlen mi_prefixlen	/* it's the same value */
561 #endif
562 
563     /* for when checking a compound word */
564     int		mi_compoff;		/* start of following word offset */
565     char_u	mi_compflags[MAXWLEN];	/* flags for compound words used */
566     int		mi_complen;		/* nr of compound words used */
567 
568     /* others */
569     int		mi_result;		/* result so far: SP_BAD, SP_OK, etc. */
570     int		mi_capflags;		/* WF_ONECAP WF_ALLCAP WF_KEEPCAP */
571     buf_T	*mi_buf;		/* buffer being checked */
572 
573     /* for NOBREAK */
574     int		mi_result2;		/* "mi_resul" without following word */
575     char_u	*mi_end2;		/* "mi_end" without following word */
576 } matchinf_T;
577 
578 /*
579  * The tables used for recognizing word characters according to spelling.
580  * These are only used for the first 256 characters of 'encoding'.
581  */
582 typedef struct spelltab_S
583 {
584     char_u  st_isw[256];	/* flags: is word char */
585     char_u  st_isu[256];	/* flags: is uppercase char */
586     char_u  st_fold[256];	/* chars: folded case */
587     char_u  st_upper[256];	/* chars: upper case */
588 } spelltab_T;
589 
590 static spelltab_T   spelltab;
591 static int	    did_set_spelltab;
592 
593 #define CF_WORD		0x01
594 #define CF_UPPER	0x02
595 
596 static void clear_spell_chartab __ARGS((spelltab_T *sp));
597 static int set_spell_finish __ARGS((spelltab_T	*new_st));
598 static int spell_iswordp __ARGS((char_u *p, buf_T *buf));
599 static int spell_iswordp_nmw __ARGS((char_u *p));
600 #ifdef FEAT_MBYTE
601 static int spell_iswordp_w __ARGS((int *p, buf_T *buf));
602 #endif
603 static int write_spell_prefcond __ARGS((FILE *fd, garray_T *gap));
604 
605 /*
606  * For finding suggestions: At each node in the tree these states are tried:
607  */
608 typedef enum
609 {
610     STATE_START = 0,	/* At start of node check for NUL bytes (goodword
611 			 * ends); if badword ends there is a match, otherwise
612 			 * try splitting word. */
613     STATE_NOPREFIX,	/* try without prefix */
614     STATE_SPLITUNDO,	/* Undo splitting. */
615     STATE_ENDNUL,	/* Past NUL bytes at start of the node. */
616     STATE_PLAIN,	/* Use each byte of the node. */
617     STATE_DEL,		/* Delete a byte from the bad word. */
618     STATE_INS,		/* Insert a byte in the bad word. */
619     STATE_SWAP,		/* Swap two bytes. */
620     STATE_UNSWAP,	/* Undo swap two characters. */
621     STATE_SWAP3,	/* Swap two characters over three. */
622     STATE_UNSWAP3,	/* Undo Swap two characters over three. */
623     STATE_UNROT3L,	/* Undo rotate three characters left */
624     STATE_UNROT3R,	/* Undo rotate three characters right */
625     STATE_REP_INI,	/* Prepare for using REP items. */
626     STATE_REP,		/* Use matching REP items from the .aff file. */
627     STATE_REP_UNDO,	/* Undo a REP item replacement. */
628     STATE_FINAL		/* End of this node. */
629 } state_T;
630 
631 /*
632  * Struct to keep the state at each level in suggest_try_change().
633  */
634 typedef struct trystate_S
635 {
636     state_T	ts_state;	/* state at this level, STATE_ */
637     int		ts_score;	/* score */
638     idx_T	ts_arridx;	/* index in tree array, start of node */
639     short	ts_curi;	/* index in list of child nodes */
640     char_u	ts_fidx;	/* index in fword[], case-folded bad word */
641     char_u	ts_fidxtry;	/* ts_fidx at which bytes may be changed */
642     char_u	ts_twordlen;	/* valid length of tword[] */
643     char_u	ts_prefixdepth;	/* stack depth for end of prefix or
644 				 * PFD_PREFIXTREE or PFD_NOPREFIX */
645     char_u	ts_flags;	/* TSF_ flags */
646 #ifdef FEAT_MBYTE
647     char_u	ts_tcharlen;	/* number of bytes in tword character */
648     char_u	ts_tcharidx;	/* current byte index in tword character */
649     char_u	ts_isdiff;	/* DIFF_ values */
650     char_u	ts_fcharstart;	/* index in fword where badword char started */
651 #endif
652     char_u	ts_prewordlen;	/* length of word in "preword[]" */
653     char_u	ts_splitoff;	/* index in "tword" after last split */
654     char_u	ts_splitfidx;	/* "ts_fidx" at word split */
655     char_u	ts_complen;	/* nr of compound words used */
656     char_u	ts_compsplit;	/* index for "compflags" where word was spit */
657     char_u	ts_save_badflags;   /* su_badflags saved here */
658 } trystate_T;
659 
660 /* values for ts_isdiff */
661 #define DIFF_NONE	0	/* no different byte (yet) */
662 #define DIFF_YES	1	/* different byte found */
663 #define DIFF_INSERT	2	/* inserting character */
664 
665 /* values for ts_flags */
666 #define TSF_PREFIXOK	1	/* already checked that prefix is OK */
667 #define TSF_DIDSPLIT	2	/* tried split at this point */
668 
669 /* special values ts_prefixdepth */
670 #define PFD_NOPREFIX	0xff	/* not using prefixes */
671 #define PFD_PREFIXTREE	0xfe	/* walking through the prefix tree */
672 #define PFD_NOTSPECIAL	0xfd	/* first value that's not special */
673 
674 /* mode values for find_word */
675 #define FIND_FOLDWORD	    0	/* find word case-folded */
676 #define FIND_KEEPWORD	    1	/* find keep-case word */
677 #define FIND_PREFIX	    2	/* find word after prefix */
678 #define FIND_COMPOUND	    3	/* find case-folded compound word */
679 #define FIND_KEEPCOMPOUND   4	/* find keep-case compound word */
680 
681 static slang_T *slang_alloc __ARGS((char_u *lang));
682 static void slang_free __ARGS((slang_T *lp));
683 static void slang_clear __ARGS((slang_T *lp));
684 static void find_word __ARGS((matchinf_T *mip, int mode));
685 static int can_compound __ARGS((slang_T *slang, char_u *word, char_u *flags));
686 static int valid_word_prefix __ARGS((int totprefcnt, int arridx, int flags, char_u *word, slang_T *slang, int cond_req));
687 static void find_prefix __ARGS((matchinf_T *mip, int mode));
688 static int fold_more __ARGS((matchinf_T *mip));
689 static int spell_valid_case __ARGS((int wordflags, int treeflags));
690 static int no_spell_checking __ARGS((win_T *wp));
691 static void spell_load_lang __ARGS((char_u *lang));
692 static char_u *spell_enc __ARGS((void));
693 static void int_wordlist_spl __ARGS((char_u *fname));
694 static void spell_load_cb __ARGS((char_u *fname, void *cookie));
695 static slang_T *spell_load_file __ARGS((char_u *fname, char_u *lang, slang_T *old_lp, int silent));
696 static char_u *read_cnt_string __ARGS((FILE *fd, int cnt_bytes, int *lenp));
697 static char_u *read_string __ARGS((FILE *fd, int cnt));
698 static int read_region_section __ARGS((FILE *fd, slang_T *slang, int len));
699 static int read_charflags_section __ARGS((FILE *fd));
700 static int read_prefcond_section __ARGS((FILE *fd, slang_T *lp));
701 static int read_rep_section __ARGS((FILE *fd, slang_T *slang));
702 static int read_sal_section __ARGS((FILE *fd, slang_T *slang));
703 static int read_sofo_section __ARGS((FILE *fd, slang_T *slang));
704 static int read_compound __ARGS((FILE *fd, slang_T *slang, int len));
705 static int byte_in_str __ARGS((char_u *str, int byte));
706 static int init_syl_tab __ARGS((slang_T *slang));
707 static int count_syllables __ARGS((slang_T *slang, char_u *word));
708 static int set_sofo __ARGS((slang_T *lp, char_u *from, char_u *to));
709 static void set_sal_first __ARGS((slang_T *lp));
710 #ifdef FEAT_MBYTE
711 static int *mb_str2wide __ARGS((char_u *s));
712 #endif
713 static idx_T read_tree __ARGS((FILE *fd, char_u *byts, idx_T *idxs, int maxidx, int startidx, int prefixtree, int maxprefcondnr));
714 static void clear_midword __ARGS((buf_T *buf));
715 static void use_midword __ARGS((slang_T *lp, buf_T *buf));
716 static int find_region __ARGS((char_u *rp, char_u *region));
717 static int captype __ARGS((char_u *word, char_u *end));
718 static int badword_captype __ARGS((char_u *word, char_u *end));
719 static void spell_reload_one __ARGS((char_u *fname, int added_word));
720 static void set_spell_charflags __ARGS((char_u *flags, int cnt, char_u *upp));
721 static int set_spell_chartab __ARGS((char_u *fol, char_u *low, char_u *upp));
722 static int spell_casefold __ARGS((char_u *p, int len, char_u *buf, int buflen));
723 static int check_need_cap __ARGS((linenr_T lnum, colnr_T col));
724 static void spell_find_suggest __ARGS((char_u *badptr, suginfo_T *su, int maxcount, int banbadword, int need_cap));
725 #ifdef FEAT_EVAL
726 static void spell_suggest_expr __ARGS((suginfo_T *su, char_u *expr));
727 #endif
728 static void spell_suggest_file __ARGS((suginfo_T *su, char_u *fname));
729 static void spell_suggest_intern __ARGS((suginfo_T *su));
730 static void spell_find_cleanup __ARGS((suginfo_T *su));
731 static void onecap_copy __ARGS((char_u *word, char_u *wcopy, int upper));
732 static void allcap_copy __ARGS((char_u *word, char_u *wcopy));
733 static void suggest_try_special __ARGS((suginfo_T *su));
734 static void suggest_try_change __ARGS((suginfo_T *su));
735 static int try_deeper __ARGS((suginfo_T *su, trystate_T *stack, int depth, int score_add));
736 #ifdef FEAT_MBYTE
737 static int nofold_len __ARGS((char_u *fword, int flen, char_u *word));
738 #endif
739 static void find_keepcap_word __ARGS((slang_T *slang, char_u *fword, char_u *kword));
740 static void score_comp_sal __ARGS((suginfo_T *su));
741 static void score_combine __ARGS((suginfo_T *su));
742 static int stp_sal_score __ARGS((suggest_T *stp, suginfo_T *su, slang_T *slang, char_u *badsound));
743 static void suggest_try_soundalike __ARGS((suginfo_T *su));
744 static void make_case_word __ARGS((char_u *fword, char_u *cword, int flags));
745 static void set_map_str __ARGS((slang_T *lp, char_u *map));
746 static int similar_chars __ARGS((slang_T *slang, int c1, int c2));
747 static void add_suggestion __ARGS((suginfo_T *su, garray_T *gap, char_u *goodword, int badlen, int score, int altscore, int had_bonus, slang_T *slang));
748 static void add_banned __ARGS((suginfo_T *su, char_u *word));
749 static int was_banned __ARGS((suginfo_T *su, char_u *word));
750 static void free_banned __ARGS((suginfo_T *su));
751 static void rescore_suggestions __ARGS((suginfo_T *su));
752 static int cleanup_suggestions __ARGS((garray_T *gap, int maxscore, int keep));
753 static void spell_soundfold __ARGS((slang_T *slang, char_u *inword, int folded, char_u *res));
754 static void spell_soundfold_sofo __ARGS((slang_T *slang, char_u *inword, char_u *res));
755 static void spell_soundfold_sal __ARGS((slang_T *slang, char_u *inword, char_u *res));
756 #ifdef FEAT_MBYTE
757 static void spell_soundfold_wsal __ARGS((slang_T *slang, char_u *inword, char_u *res));
758 #endif
759 static int soundalike_score __ARGS((char_u *goodsound, char_u *badsound));
760 static int spell_edit_score __ARGS((char_u *badword, char_u *goodword));
761 static void dump_word __ARGS((char_u *word, int round, int flags, linenr_T lnum));
762 static linenr_T dump_prefixes __ARGS((slang_T *slang, char_u *word, int round, int flags, linenr_T startlnum));
763 
764 /*
765  * Use our own character-case definitions, because the current locale may
766  * differ from what the .spl file uses.
767  * These must not be called with negative number!
768  */
769 #ifndef FEAT_MBYTE
770 /* Non-multi-byte implementation. */
771 # define SPELL_TOFOLD(c) ((c) < 256 ? spelltab.st_fold[c] : (c))
772 # define SPELL_TOUPPER(c) ((c) < 256 ? spelltab.st_upper[c] : (c))
773 # define SPELL_ISUPPER(c) ((c) < 256 ? spelltab.st_isu[c] : FALSE)
774 #else
775 # if defined(HAVE_WCHAR_H)
776 #  include <wchar.h>	    /* for towupper() and towlower() */
777 # endif
778 /* Multi-byte implementation.  For Unicode we can call utf_*(), but don't do
779  * that for ASCII, because we don't want to use 'casemap' here.  Otherwise use
780  * the "w" library function for characters above 255 if available. */
781 # ifdef HAVE_TOWLOWER
782 #  define SPELL_TOFOLD(c) (enc_utf8 && (c) >= 128 ? utf_fold(c) \
783 	    : (c) < 256 ? spelltab.st_fold[c] : towlower(c))
784 # else
785 #  define SPELL_TOFOLD(c) (enc_utf8 && (c) >= 128 ? utf_fold(c) \
786 	    : (c) < 256 ? spelltab.st_fold[c] : (c))
787 # endif
788 
789 # ifdef HAVE_TOWUPPER
790 #  define SPELL_TOUPPER(c) (enc_utf8 && (c) >= 128 ? utf_toupper(c) \
791 	    : (c) < 256 ? spelltab.st_upper[c] : towupper(c))
792 # else
793 #  define SPELL_TOUPPER(c) (enc_utf8 && (c) >= 128 ? utf_toupper(c) \
794 	    : (c) < 256 ? spelltab.st_upper[c] : (c))
795 # endif
796 
797 # ifdef HAVE_ISWUPPER
798 #  define SPELL_ISUPPER(c) (enc_utf8 && (c) >= 128 ? utf_isupper(c) \
799 	    : (c) < 256 ? spelltab.st_isu[c] : iswupper(c))
800 # else
801 #  define SPELL_ISUPPER(c) (enc_utf8 && (c) >= 128 ? utf_isupper(c) \
802 	    : (c) < 256 ? spelltab.st_isu[c] : (FALSE))
803 # endif
804 #endif
805 
806 
807 static char *e_format = N_("E759: Format error in spell file");
808 static char *e_spell_trunc = N_("E758: Truncated spell file");
809 static char *e_afftrailing = N_("Trailing text in %s line %d: %s");
810 static char *e_affname = N_("Affix name too long in %s line %d: %s");
811 static char *e_affform = N_("E761: Format error in affix file FOL, LOW or UPP");
812 static char *e_affrange = N_("E762: Character in FOL, LOW or UPP is out of range");
813 static char *msg_compressing = N_("Compressing word tree...");
814 
815 /*
816  * Main spell-checking function.
817  * "ptr" points to a character that could be the start of a word.
818  * "*attrp" is set to the attributes for a badly spelled word.  For a non-word
819  * or when it's OK it remains unchanged.
820  * This must only be called when 'spelllang' is not empty.
821  *
822  * "capcol" is used to check for a Capitalised word after the end of a
823  * sentence.  If it's zero then perform the check.  Return the column where to
824  * check next, or -1 when no sentence end was found.  If it's NULL then don't
825  * worry.
826  *
827  * Returns the length of the word in bytes, also when it's OK, so that the
828  * caller can skip over the word.
829  */
830     int
831 spell_check(wp, ptr, attrp, capcol)
832     win_T	*wp;		/* current window */
833     char_u	*ptr;
834     int		*attrp;
835     int		*capcol;	/* column to check for Capital */
836 {
837     matchinf_T	mi;		/* Most things are put in "mi" so that it can
838 				   be passed to functions quickly. */
839     int		nrlen = 0;	/* found a number first */
840     int		c;
841     int		wrongcaplen = 0;
842     int		lpi;
843 
844     /* A word never starts at a space or a control character.  Return quickly
845      * then, skipping over the character. */
846     if (*ptr <= ' ')
847 	return 1;
848     vim_memset(&mi, 0, sizeof(matchinf_T));
849 
850     /* A number is always OK.  Also skip hexadecimal numbers 0xFF99 and
851      * 0X99FF.  But when a word character follows do check spelling to find
852      * "3GPP". */
853     if (*ptr >= '0' && *ptr <= '9')
854     {
855 	if (*ptr == '0' && (ptr[1] == 'x' || ptr[1] == 'X'))
856 	    mi.mi_end = skiphex(ptr + 2);
857 	else
858 	{
859 	    mi.mi_end = skipdigits(ptr);
860 	    nrlen = mi.mi_end - ptr;
861 	}
862 	if (!spell_iswordp(mi.mi_end, wp->w_buffer))
863 	    return (int)(mi.mi_end - ptr);
864 
865 	/* Try including the digits in the word. */
866 	mi.mi_fend = ptr + nrlen;
867     }
868     else
869 	mi.mi_fend = ptr;
870 
871     /* Find the normal end of the word (until the next non-word character). */
872     mi.mi_word = ptr;
873     if (spell_iswordp(mi.mi_fend, wp->w_buffer))
874     {
875 	do
876 	{
877 	    mb_ptr_adv(mi.mi_fend);
878 	} while (*mi.mi_fend != NUL && spell_iswordp(mi.mi_fend, wp->w_buffer));
879 
880 	if (capcol != NULL && *capcol == 0 && wp->w_buffer->b_cap_prog != NULL)
881 	{
882 	    /* Check word starting with capital letter. */
883 	    c = PTR2CHAR(ptr);
884 	    if (!SPELL_ISUPPER(c))
885 		wrongcaplen = (int)(mi.mi_fend - ptr);
886 	}
887     }
888     if (capcol != NULL)
889 	*capcol = -1;
890 
891     /* We always use the characters up to the next non-word character,
892      * also for bad words. */
893     mi.mi_end = mi.mi_fend;
894 
895     /* Check caps type later. */
896     mi.mi_buf = wp->w_buffer;
897 
898     /* case-fold the word with one non-word character, so that we can check
899      * for the word end. */
900     if (*mi.mi_fend != NUL)
901 	mb_ptr_adv(mi.mi_fend);
902 
903     (void)spell_casefold(ptr, (int)(mi.mi_fend - ptr), mi.mi_fword,
904 							     MAXWLEN + 1);
905     mi.mi_fwordlen = STRLEN(mi.mi_fword);
906 
907     /* The word is bad unless we recognize it. */
908     mi.mi_result = SP_BAD;
909     mi.mi_result2 = SP_BAD;
910 
911     /*
912      * Loop over the languages specified in 'spelllang'.
913      * We check them all, because a matching word may be longer than an
914      * already found matching word.
915      */
916     for (lpi = 0; lpi < wp->w_buffer->b_langp.ga_len; ++lpi)
917     {
918 	mi.mi_lp = LANGP_ENTRY(wp->w_buffer->b_langp, lpi);
919 
920 	/* If reloading fails the language is still in the list but everything
921 	 * has been cleared. */
922 	if (mi.mi_lp->lp_slang->sl_fidxs == NULL)
923 	    continue;
924 
925 	/* Check for a matching word in case-folded words. */
926 	find_word(&mi, FIND_FOLDWORD);
927 
928 	/* Check for a matching word in keep-case words. */
929 	find_word(&mi, FIND_KEEPWORD);
930 
931 	/* Check for matching prefixes. */
932 	find_prefix(&mi, FIND_FOLDWORD);
933 
934 	/* For a NOBREAK language, may want to use a word without a following
935 	 * word as a backup. */
936 	if (mi.mi_lp->lp_slang->sl_nobreak && mi.mi_result == SP_BAD
937 						   && mi.mi_result2 != SP_BAD)
938 	{
939 	    mi.mi_result = mi.mi_result2;
940 	    mi.mi_end = mi.mi_end2;
941 	}
942     }
943 
944     if (mi.mi_result != SP_OK)
945     {
946 	/* If we found a number skip over it.  Allows for "42nd".  Do flag
947 	 * rare and local words, e.g., "3GPP". */
948 	if (nrlen > 0)
949 	{
950 	    if (mi.mi_result == SP_BAD || mi.mi_result == SP_BANNED)
951 		return nrlen;
952 	}
953 
954 	/* When we are at a non-word character there is no error, just
955 	 * skip over the character (try looking for a word after it). */
956 	else if (!spell_iswordp_nmw(ptr))
957 	{
958 	    if (capcol != NULL && wp->w_buffer->b_cap_prog != NULL)
959 	    {
960 		regmatch_T	regmatch;
961 
962 		/* Check for end of sentence. */
963 		regmatch.regprog = wp->w_buffer->b_cap_prog;
964 		regmatch.rm_ic = FALSE;
965 		if (vim_regexec(&regmatch, ptr, 0))
966 		    *capcol = (int)(regmatch.endp[0] - ptr);
967 	    }
968 
969 #ifdef FEAT_MBYTE
970 	    if (has_mbyte)
971 		return (*mb_ptr2len)(ptr);
972 #endif
973 	    return 1;
974 	}
975 	else if (mi.mi_end == ptr)
976 	    /* Always include at least one character.  Required for when there
977 	     * is a mixup in "midword". */
978 	    mb_ptr_adv(mi.mi_end);
979 	else if (mi.mi_result == SP_BAD
980 		&& LANGP_ENTRY(wp->w_buffer->b_langp, 0)->lp_slang->sl_nobreak)
981 	{
982 	    char_u	*p, *fp;
983 	    int		save_result = mi.mi_result;
984 
985 	    /* First language in 'spelllang' is NOBREAK.  Find first position
986 	     * at which any word would be valid. */
987 	    mi.mi_lp = LANGP_ENTRY(wp->w_buffer->b_langp, 0);
988 	    if (mi.mi_lp->lp_slang->sl_fidxs != NULL)
989 	    {
990 		p = mi.mi_word;
991 		fp = mi.mi_fword;
992 		for (;;)
993 		{
994 		    mb_ptr_adv(p);
995 		    mb_ptr_adv(fp);
996 		    if (p >= mi.mi_end)
997 			break;
998 		    mi.mi_compoff = fp - mi.mi_fword;
999 		    find_word(&mi, FIND_COMPOUND);
1000 		    if (mi.mi_result != SP_BAD)
1001 		    {
1002 			mi.mi_end = p;
1003 			break;
1004 		    }
1005 		}
1006 		mi.mi_result = save_result;
1007 	    }
1008 	}
1009 
1010 	if (mi.mi_result == SP_BAD || mi.mi_result == SP_BANNED)
1011 	    *attrp = highlight_attr[HLF_SPB];
1012 	else if (mi.mi_result == SP_RARE)
1013 	    *attrp = highlight_attr[HLF_SPR];
1014 	else
1015 	    *attrp = highlight_attr[HLF_SPL];
1016     }
1017 
1018     if (wrongcaplen > 0 && (mi.mi_result == SP_OK || mi.mi_result == SP_RARE))
1019     {
1020 	/* Report SpellCap only when the word isn't badly spelled. */
1021 	*attrp = highlight_attr[HLF_SPC];
1022 	return wrongcaplen;
1023     }
1024 
1025     return (int)(mi.mi_end - ptr);
1026 }
1027 
1028 /*
1029  * Check if the word at "mip->mi_word" is in the tree.
1030  * When "mode" is FIND_FOLDWORD check in fold-case word tree.
1031  * When "mode" is FIND_KEEPWORD check in keep-case word tree.
1032  * When "mode" is FIND_PREFIX check for word after prefix in fold-case word
1033  * tree.
1034  *
1035  * For a match mip->mi_result is updated.
1036  */
1037     static void
1038 find_word(mip, mode)
1039     matchinf_T	*mip;
1040     int		mode;
1041 {
1042     idx_T	arridx = 0;
1043     int		endlen[MAXWLEN];    /* length at possible word endings */
1044     idx_T	endidx[MAXWLEN];    /* possible word endings */
1045     int		endidxcnt = 0;
1046     int		len;
1047     int		wlen = 0;
1048     int		flen;
1049     int		c;
1050     char_u	*ptr;
1051     idx_T	lo, hi, m;
1052 #ifdef FEAT_MBYTE
1053     char_u	*s;
1054 #endif
1055     char_u	*p;
1056     int		res = SP_BAD;
1057     slang_T	*slang = mip->mi_lp->lp_slang;
1058     unsigned	flags;
1059     char_u	*byts;
1060     idx_T	*idxs;
1061     int		word_ends;
1062     int		prefix_found;
1063     int		nobreak_result;
1064 
1065     if (mode == FIND_KEEPWORD || mode == FIND_KEEPCOMPOUND)
1066     {
1067 	/* Check for word with matching case in keep-case tree. */
1068 	ptr = mip->mi_word;
1069 	flen = 9999;		    /* no case folding, always enough bytes */
1070 	byts = slang->sl_kbyts;
1071 	idxs = slang->sl_kidxs;
1072 
1073 	if (mode == FIND_KEEPCOMPOUND)
1074 	    /* Skip over the previously found word(s). */
1075 	    wlen += mip->mi_compoff;
1076     }
1077     else
1078     {
1079 	/* Check for case-folded in case-folded tree. */
1080 	ptr = mip->mi_fword;
1081 	flen = mip->mi_fwordlen;    /* available case-folded bytes */
1082 	byts = slang->sl_fbyts;
1083 	idxs = slang->sl_fidxs;
1084 
1085 	if (mode == FIND_PREFIX)
1086 	{
1087 	    /* Skip over the prefix. */
1088 	    wlen = mip->mi_prefixlen;
1089 	    flen -= mip->mi_prefixlen;
1090 	}
1091 	else if (mode == FIND_COMPOUND)
1092 	{
1093 	    /* Skip over the previously found word(s). */
1094 	    wlen = mip->mi_compoff;
1095 	    flen -= mip->mi_compoff;
1096 	}
1097 
1098     }
1099 
1100     if (byts == NULL)
1101 	return;			/* array is empty */
1102 
1103     /*
1104      * Repeat advancing in the tree until:
1105      * - there is a byte that doesn't match,
1106      * - we reach the end of the tree,
1107      * - or we reach the end of the line.
1108      */
1109     for (;;)
1110     {
1111 	if (flen <= 0 && *mip->mi_fend != NUL)
1112 	    flen = fold_more(mip);
1113 
1114 	len = byts[arridx++];
1115 
1116 	/* If the first possible byte is a zero the word could end here.
1117 	 * Remember this index, we first check for the longest word. */
1118 	if (byts[arridx] == 0)
1119 	{
1120 	    if (endidxcnt == MAXWLEN)
1121 	    {
1122 		/* Must be a corrupted spell file. */
1123 		EMSG(_(e_format));
1124 		return;
1125 	    }
1126 	    endlen[endidxcnt] = wlen;
1127 	    endidx[endidxcnt++] = arridx++;
1128 	    --len;
1129 
1130 	    /* Skip over the zeros, there can be several flag/region
1131 	     * combinations. */
1132 	    while (len > 0 && byts[arridx] == 0)
1133 	    {
1134 		++arridx;
1135 		--len;
1136 	    }
1137 	    if (len == 0)
1138 		break;	    /* no children, word must end here */
1139 	}
1140 
1141 	/* Stop looking at end of the line. */
1142 	if (ptr[wlen] == NUL)
1143 	    break;
1144 
1145 	/* Perform a binary search in the list of accepted bytes. */
1146 	c = ptr[wlen];
1147 	if (c == TAB)	    /* <Tab> is handled like <Space> */
1148 	    c = ' ';
1149 	lo = arridx;
1150 	hi = arridx + len - 1;
1151 	while (lo < hi)
1152 	{
1153 	    m = (lo + hi) / 2;
1154 	    if (byts[m] > c)
1155 		hi = m - 1;
1156 	    else if (byts[m] < c)
1157 		lo = m + 1;
1158 	    else
1159 	    {
1160 		lo = hi = m;
1161 		break;
1162 	    }
1163 	}
1164 
1165 	/* Stop if there is no matching byte. */
1166 	if (hi < lo || byts[lo] != c)
1167 	    break;
1168 
1169 	/* Continue at the child (if there is one). */
1170 	arridx = idxs[lo];
1171 	++wlen;
1172 	--flen;
1173 
1174 	/* One space in the good word may stand for several spaces in the
1175 	 * checked word. */
1176 	if (c == ' ')
1177 	{
1178 	    for (;;)
1179 	    {
1180 		if (flen <= 0 && *mip->mi_fend != NUL)
1181 		    flen = fold_more(mip);
1182 		if (ptr[wlen] != ' ' && ptr[wlen] != TAB)
1183 		    break;
1184 		++wlen;
1185 		--flen;
1186 	    }
1187 	}
1188     }
1189 
1190     /*
1191      * Verify that one of the possible endings is valid.  Try the longest
1192      * first.
1193      */
1194     while (endidxcnt > 0)
1195     {
1196 	--endidxcnt;
1197 	arridx = endidx[endidxcnt];
1198 	wlen = endlen[endidxcnt];
1199 
1200 #ifdef FEAT_MBYTE
1201 	if ((*mb_head_off)(ptr, ptr + wlen) > 0)
1202 	    continue;	    /* not at first byte of character */
1203 #endif
1204 	if (spell_iswordp(ptr + wlen, mip->mi_buf))
1205 	{
1206 	    if (slang->sl_compprog == NULL && !slang->sl_nobreak)
1207 		continue;	    /* next char is a word character */
1208 	    word_ends = FALSE;
1209 	}
1210 	else
1211 	    word_ends = TRUE;
1212 	/* The prefix flag is before compound flags.  Once a valid prefix flag
1213 	 * has been found we try compound flags. */
1214 	prefix_found = FALSE;
1215 
1216 #ifdef FEAT_MBYTE
1217 	if (mode != FIND_KEEPWORD && has_mbyte)
1218 	{
1219 	    /* Compute byte length in original word, length may change
1220 	     * when folding case.  This can be slow, take a shortcut when the
1221 	     * case-folded word is equal to the keep-case word. */
1222 	    p = mip->mi_word;
1223 	    if (STRNCMP(ptr, p, wlen) != 0)
1224 	    {
1225 		for (s = ptr; s < ptr + wlen; mb_ptr_adv(s))
1226 		    mb_ptr_adv(p);
1227 		wlen = p - mip->mi_word;
1228 	    }
1229 	}
1230 #endif
1231 
1232 	/* Check flags and region.  For FIND_PREFIX check the condition and
1233 	 * prefix ID.
1234 	 * Repeat this if there are more flags/region alternatives until there
1235 	 * is a match. */
1236 	res = SP_BAD;
1237 	for (len = byts[arridx - 1]; len > 0 && byts[arridx] == 0;
1238 							      --len, ++arridx)
1239 	{
1240 	    flags = idxs[arridx];
1241 
1242 	    /* For the fold-case tree check that the case of the checked word
1243 	     * matches with what the word in the tree requires.
1244 	     * For keep-case tree the case is always right.  For prefixes we
1245 	     * don't bother to check. */
1246 	    if (mode == FIND_FOLDWORD)
1247 	    {
1248 		if (mip->mi_cend != mip->mi_word + wlen)
1249 		{
1250 		    /* mi_capflags was set for a different word length, need
1251 		     * to do it again. */
1252 		    mip->mi_cend = mip->mi_word + wlen;
1253 		    mip->mi_capflags = captype(mip->mi_word, mip->mi_cend);
1254 		}
1255 
1256 		if (mip->mi_capflags == WF_KEEPCAP
1257 				|| !spell_valid_case(mip->mi_capflags, flags))
1258 		    continue;
1259 	    }
1260 
1261 	    /* When mode is FIND_PREFIX the word must support the prefix:
1262 	     * check the prefix ID and the condition.  Do that for the list at
1263 	     * mip->mi_prefarridx that find_prefix() filled. */
1264 	    else if (mode == FIND_PREFIX && !prefix_found)
1265 	    {
1266 		c = valid_word_prefix(mip->mi_prefcnt, mip->mi_prefarridx,
1267 				    flags,
1268 				    mip->mi_word + mip->mi_cprefixlen, slang,
1269 				    FALSE);
1270 		if (c == 0)
1271 		    continue;
1272 
1273 		/* Use the WF_RARE flag for a rare prefix. */
1274 		if (c & WF_RAREPFX)
1275 		    flags |= WF_RARE;
1276 		prefix_found = TRUE;
1277 	    }
1278 
1279 	    if (slang->sl_nobreak)
1280 	    {
1281 		if ((mode == FIND_COMPOUND || mode == FIND_KEEPCOMPOUND)
1282 			&& (flags & WF_BANNED) == 0)
1283 		{
1284 		    /* NOBREAK: found a valid following word.  That's all we
1285 		     * need to know, so return. */
1286 		    mip->mi_result = SP_OK;
1287 		    break;
1288 		}
1289 	    }
1290 
1291 	    else if ((mode == FIND_COMPOUND || mode == FIND_KEEPCOMPOUND
1292 								|| !word_ends))
1293 	    {
1294 		/* If there is no  flag or the word is shorter than
1295 		 * COMPOUNDMIN reject it quickly.
1296 		 * Makes you wonder why someone puts a compound flag on a word
1297 		 * that's too short...  Myspell compatibility requires this
1298 		 * anyway. */
1299 		if (((unsigned)flags >> 24) == 0
1300 			     || wlen - mip->mi_compoff < slang->sl_compminlen)
1301 		    continue;
1302 #ifdef FEAT_MBYTE
1303 		/* For multi-byte chars check character length against
1304 		 * COMPOUNDMIN. */
1305 		if (has_mbyte
1306 			&& slang->sl_compminlen > 0
1307 			&& mb_charlen_len(mip->mi_word + mip->mi_compoff,
1308 				wlen - mip->mi_compoff) < slang->sl_compminlen)
1309 			continue;
1310 #endif
1311 
1312 		/* Limit the number of compound words to COMPOUNDMAX if no
1313 		 * maximum for syllables is specified. */
1314 		if (!word_ends && mip->mi_complen + 2 > slang->sl_compmax
1315 					   && slang->sl_compsylmax == MAXWLEN)
1316 		    continue;
1317 
1318 		/* Quickly check if compounding is possible with this flag. */
1319 		if (!byte_in_str(mip->mi_complen == 0
1320 					? slang->sl_compstartflags
1321 					: slang->sl_compallflags,
1322 					    ((unsigned)flags >> 24)))
1323 		    continue;
1324 
1325 		if (mode == FIND_COMPOUND)
1326 		{
1327 		    int	    capflags;
1328 
1329 		    /* Need to check the caps type of the appended compound
1330 		     * word. */
1331 #ifdef FEAT_MBYTE
1332 		    if (has_mbyte && STRNCMP(ptr, mip->mi_word,
1333 							mip->mi_compoff) != 0)
1334 		    {
1335 			/* case folding may have changed the length */
1336 			p = mip->mi_word;
1337 			for (s = ptr; s < ptr + mip->mi_compoff; mb_ptr_adv(s))
1338 			    mb_ptr_adv(p);
1339 		    }
1340 		    else
1341 #endif
1342 			p = mip->mi_word + mip->mi_compoff;
1343 		    capflags = captype(p, mip->mi_word + wlen);
1344 		    if (capflags == WF_KEEPCAP || (capflags == WF_ALLCAP
1345 						 && (flags & WF_FIXCAP) != 0))
1346 			continue;
1347 
1348 		    if (capflags != WF_ALLCAP)
1349 		    {
1350 			/* When the character before the word is a word
1351 			 * character we do not accept a Onecap word.  We do
1352 			 * accept a no-caps word, even when the dictionary
1353 			 * word specifies ONECAP. */
1354 			mb_ptr_back(mip->mi_word, p);
1355 			if (spell_iswordp_nmw(p)
1356 				? capflags == WF_ONECAP
1357 				: (flags & WF_ONECAP) != 0
1358 						     && capflags != WF_ONECAP)
1359 			    continue;
1360 		    }
1361 		}
1362 
1363 		/* If the word ends the sequence of compound flags of the
1364 		 * words must match with one of the COMPOUNDFLAGS items and
1365 		 * the number of syllables must not be too large. */
1366 		mip->mi_compflags[mip->mi_complen] = ((unsigned)flags >> 24);
1367 		mip->mi_compflags[mip->mi_complen + 1] = NUL;
1368 		if (word_ends)
1369 		{
1370 		    char_u	fword[MAXWLEN];
1371 
1372 		    if (slang->sl_compsylmax < MAXWLEN)
1373 		    {
1374 			/* "fword" is only needed for checking syllables. */
1375 			if (ptr == mip->mi_word)
1376 			    (void)spell_casefold(ptr, wlen, fword, MAXWLEN);
1377 			else
1378 			    vim_strncpy(fword, ptr, endlen[endidxcnt]);
1379 		    }
1380 		    if (!can_compound(slang, fword, mip->mi_compflags))
1381 			continue;
1382 		}
1383 	    }
1384 
1385 	    /* Check NEEDCOMPOUND: can't use word without compounding. */
1386 	    else if (flags & WF_NEEDCOMP)
1387 		continue;
1388 
1389 	    nobreak_result = SP_OK;
1390 
1391 	    if (!word_ends)
1392 	    {
1393 		int	save_result = mip->mi_result;
1394 		char_u	*save_end = mip->mi_end;
1395 		langp_T	*save_lp = mip->mi_lp;
1396 		int	lpi;
1397 
1398 		/* Check that a valid word follows.  If there is one and we
1399 		 * are compounding, it will set "mi_result", thus we are
1400 		 * always finished here.  For NOBREAK we only check that a
1401 		 * valid word follows.
1402 		 * Recursive! */
1403 		if (slang->sl_nobreak)
1404 		    mip->mi_result = SP_BAD;
1405 
1406 		/* Find following word in case-folded tree. */
1407 		mip->mi_compoff = endlen[endidxcnt];
1408 #ifdef FEAT_MBYTE
1409 		if (has_mbyte && mode == FIND_KEEPWORD)
1410 		{
1411 		    /* Compute byte length in case-folded word from "wlen":
1412 		     * byte length in keep-case word.  Length may change when
1413 		     * folding case.  This can be slow, take a shortcut when
1414 		     * the case-folded word is equal to the keep-case word. */
1415 		    p = mip->mi_fword;
1416 		    if (STRNCMP(ptr, p, wlen) != 0)
1417 		    {
1418 			for (s = ptr; s < ptr + wlen; mb_ptr_adv(s))
1419 			    mb_ptr_adv(p);
1420 			mip->mi_compoff = p - mip->mi_fword;
1421 		    }
1422 		}
1423 #endif
1424 		c = mip->mi_compoff;
1425 		++mip->mi_complen;
1426 
1427 		/* For NOBREAK we need to try all NOBREAK languages, at least
1428 		 * to find the ".add" file(s). */
1429 		for (lpi = 0; lpi < mip->mi_buf->b_langp.ga_len; ++lpi)
1430 		{
1431 		    if (slang->sl_nobreak)
1432 		    {
1433 			mip->mi_lp = LANGP_ENTRY(mip->mi_buf->b_langp, lpi);
1434 			if (mip->mi_lp->lp_slang->sl_fidxs == NULL
1435 					 || !mip->mi_lp->lp_slang->sl_nobreak)
1436 			    continue;
1437 		    }
1438 
1439 		    find_word(mip, FIND_COMPOUND);
1440 
1441 		    /* When NOBREAK any word that matches is OK.  Otherwise we
1442 		     * need to find the longest match, thus try with keep-case
1443 		     * and prefix too. */
1444 		    if (!slang->sl_nobreak || mip->mi_result == SP_BAD)
1445 		    {
1446 			/* Find following word in keep-case tree. */
1447 			mip->mi_compoff = wlen;
1448 			find_word(mip, FIND_KEEPCOMPOUND);
1449 
1450 			if (!slang->sl_nobreak || mip->mi_result == SP_BAD)
1451 			{
1452 			    /* Check for following word with prefix. */
1453 			    mip->mi_compoff = c;
1454 			    find_prefix(mip, FIND_COMPOUND);
1455 			}
1456 		    }
1457 
1458 		    if (!slang->sl_nobreak)
1459 			break;
1460 		}
1461 		--mip->mi_complen;
1462 		mip->mi_lp = save_lp;
1463 
1464 		if (slang->sl_nobreak)
1465 		{
1466 		    nobreak_result = mip->mi_result;
1467 		    mip->mi_result = save_result;
1468 		    mip->mi_end = save_end;
1469 		}
1470 		else
1471 		{
1472 		    if (mip->mi_result == SP_OK)
1473 			break;
1474 		    continue;
1475 		}
1476 	    }
1477 
1478 	    if (flags & WF_BANNED)
1479 		res = SP_BANNED;
1480 	    else if (flags & WF_REGION)
1481 	    {
1482 		/* Check region. */
1483 		if ((mip->mi_lp->lp_region & (flags >> 16)) != 0)
1484 		    res = SP_OK;
1485 		else
1486 		    res = SP_LOCAL;
1487 	    }
1488 	    else if (flags & WF_RARE)
1489 		res = SP_RARE;
1490 	    else
1491 		res = SP_OK;
1492 
1493 	    /* Always use the longest match and the best result.  For NOBREAK
1494 	     * we separately keep the longest match without a following good
1495 	     * word as a fall-back. */
1496 	    if (nobreak_result == SP_BAD)
1497 	    {
1498 		if (mip->mi_result2 > res)
1499 		{
1500 		    mip->mi_result2 = res;
1501 		    mip->mi_end2 = mip->mi_word + wlen;
1502 		}
1503 		else if (mip->mi_result2 == res
1504 					&& mip->mi_end2 < mip->mi_word + wlen)
1505 		    mip->mi_end2 = mip->mi_word + wlen;
1506 	    }
1507 	    else if (mip->mi_result > res)
1508 	    {
1509 		mip->mi_result = res;
1510 		mip->mi_end = mip->mi_word + wlen;
1511 	    }
1512 	    else if (mip->mi_result == res && mip->mi_end < mip->mi_word + wlen)
1513 		mip->mi_end = mip->mi_word + wlen;
1514 
1515 	    if (mip->mi_result == SP_OK)
1516 		break;
1517 	}
1518 
1519 	if (mip->mi_result == SP_OK)
1520 	    break;
1521     }
1522 }
1523 
1524 /*
1525  * Return TRUE if "flags" is a valid sequence of compound flags and
1526  * "word[len]" does not have too many syllables.
1527  */
1528     static int
1529 can_compound(slang, word, flags)
1530     slang_T	*slang;
1531     char_u	*word;
1532     char_u	*flags;
1533 {
1534     regmatch_T	regmatch;
1535 #ifdef FEAT_MBYTE
1536     char_u	uflags[MAXWLEN * 2];
1537     int		i;
1538 #endif
1539     char_u	*p;
1540 
1541     if (slang->sl_compprog == NULL)
1542 	return FALSE;
1543 #ifdef FEAT_MBYTE
1544     if (enc_utf8)
1545     {
1546 	/* Need to convert the single byte flags to utf8 characters. */
1547 	p = uflags;
1548 	for (i = 0; flags[i] != NUL; ++i)
1549 	    p += mb_char2bytes(flags[i], p);
1550 	*p = NUL;
1551 	p = uflags;
1552     }
1553     else
1554 #endif
1555 	p = flags;
1556     regmatch.regprog = slang->sl_compprog;
1557     regmatch.rm_ic = FALSE;
1558     if (!vim_regexec(&regmatch, p, 0))
1559 	return FALSE;
1560 
1561     /* Count the number of syllables.  This may be slow, do it last.  If there
1562      * are too many syllables AND the number of compound words is above
1563      * COMPOUNDMAX then compounding is not allowed. */
1564     if (slang->sl_compsylmax < MAXWLEN
1565 		       && count_syllables(slang, word) > slang->sl_compsylmax)
1566 	return (int)STRLEN(flags) < slang->sl_compmax;
1567     return TRUE;
1568 }
1569 
1570 /*
1571  * Return non-zero if the prefix indicated by "arridx" matches with the prefix
1572  * ID in "flags" for the word "word".
1573  * The WF_RAREPFX flag is included in the return value for a rare prefix.
1574  */
1575     static int
1576 valid_word_prefix(totprefcnt, arridx, flags, word, slang, cond_req)
1577     int		totprefcnt;	/* nr of prefix IDs */
1578     int		arridx;		/* idx in sl_pidxs[] */
1579     int		flags;
1580     char_u	*word;
1581     slang_T	*slang;
1582     int		cond_req;	/* only use prefixes with a condition */
1583 {
1584     int		prefcnt;
1585     int		pidx;
1586     regprog_T	*rp;
1587     regmatch_T	regmatch;
1588     int		prefid;
1589 
1590     prefid = (unsigned)flags >> 24;
1591     for (prefcnt = totprefcnt - 1; prefcnt >= 0; --prefcnt)
1592     {
1593 	pidx = slang->sl_pidxs[arridx + prefcnt];
1594 
1595 	/* Check the prefix ID. */
1596 	if (prefid != (pidx & 0xff))
1597 	    continue;
1598 
1599 	/* Check if the prefix doesn't combine and the word already has a
1600 	 * suffix. */
1601 	if ((flags & WF_HAS_AFF) && (pidx & WF_PFX_NC))
1602 	    continue;
1603 
1604 	/* Check the condition, if there is one.  The condition index is
1605 	 * stored in the two bytes above the prefix ID byte.  */
1606 	rp = slang->sl_prefprog[((unsigned)pidx >> 8) & 0xffff];
1607 	if (rp != NULL)
1608 	{
1609 	    regmatch.regprog = rp;
1610 	    regmatch.rm_ic = FALSE;
1611 	    if (!vim_regexec(&regmatch, word, 0))
1612 		continue;
1613 	}
1614 	else if (cond_req)
1615 	    continue;
1616 
1617 	/* It's a match!  Return the WF_ flags. */
1618 	return pidx;
1619     }
1620     return 0;
1621 }
1622 
1623 /*
1624  * Check if the word at "mip->mi_word" has a matching prefix.
1625  * If it does, then check the following word.
1626  *
1627  * If "mode" is "FIND_COMPOUND" then do the same after another word, find a
1628  * prefix in a compound word.
1629  *
1630  * For a match mip->mi_result is updated.
1631  */
1632     static void
1633 find_prefix(mip, mode)
1634     matchinf_T	*mip;
1635     int		mode;
1636 {
1637     idx_T	arridx = 0;
1638     int		len;
1639     int		wlen = 0;
1640     int		flen;
1641     int		c;
1642     char_u	*ptr;
1643     idx_T	lo, hi, m;
1644     slang_T	*slang = mip->mi_lp->lp_slang;
1645     char_u	*byts;
1646     idx_T	*idxs;
1647 
1648     byts = slang->sl_pbyts;
1649     if (byts == NULL)
1650 	return;			/* array is empty */
1651 
1652     /* We use the case-folded word here, since prefixes are always
1653      * case-folded. */
1654     ptr = mip->mi_fword;
1655     flen = mip->mi_fwordlen;    /* available case-folded bytes */
1656     if (mode == FIND_COMPOUND)
1657     {
1658 	/* Skip over the previously found word(s). */
1659 	ptr += mip->mi_compoff;
1660 	flen -= mip->mi_compoff;
1661     }
1662     idxs = slang->sl_pidxs;
1663 
1664     /*
1665      * Repeat advancing in the tree until:
1666      * - there is a byte that doesn't match,
1667      * - we reach the end of the tree,
1668      * - or we reach the end of the line.
1669      */
1670     for (;;)
1671     {
1672 	if (flen == 0 && *mip->mi_fend != NUL)
1673 	    flen = fold_more(mip);
1674 
1675 	len = byts[arridx++];
1676 
1677 	/* If the first possible byte is a zero the prefix could end here.
1678 	 * Check if the following word matches and supports the prefix. */
1679 	if (byts[arridx] == 0)
1680 	{
1681 	    /* There can be several prefixes with different conditions.  We
1682 	     * try them all, since we don't know which one will give the
1683 	     * longest match.  The word is the same each time, pass the list
1684 	     * of possible prefixes to find_word(). */
1685 	    mip->mi_prefarridx = arridx;
1686 	    mip->mi_prefcnt = len;
1687 	    while (len > 0 && byts[arridx] == 0)
1688 	    {
1689 		++arridx;
1690 		--len;
1691 	    }
1692 	    mip->mi_prefcnt -= len;
1693 
1694 	    /* Find the word that comes after the prefix. */
1695 	    mip->mi_prefixlen = wlen;
1696 	    if (mode == FIND_COMPOUND)
1697 		/* Skip over the previously found word(s). */
1698 		mip->mi_prefixlen += mip->mi_compoff;
1699 
1700 #ifdef FEAT_MBYTE
1701 	    if (has_mbyte)
1702 	    {
1703 		/* Case-folded length may differ from original length. */
1704 		mip->mi_cprefixlen = nofold_len(mip->mi_fword,
1705 					     mip->mi_prefixlen, mip->mi_word);
1706 	    }
1707 	    else
1708 		mip->mi_cprefixlen = mip->mi_prefixlen;
1709 #endif
1710 	    find_word(mip, FIND_PREFIX);
1711 
1712 
1713 	    if (len == 0)
1714 		break;	    /* no children, word must end here */
1715 	}
1716 
1717 	/* Stop looking at end of the line. */
1718 	if (ptr[wlen] == NUL)
1719 	    break;
1720 
1721 	/* Perform a binary search in the list of accepted bytes. */
1722 	c = ptr[wlen];
1723 	lo = arridx;
1724 	hi = arridx + len - 1;
1725 	while (lo < hi)
1726 	{
1727 	    m = (lo + hi) / 2;
1728 	    if (byts[m] > c)
1729 		hi = m - 1;
1730 	    else if (byts[m] < c)
1731 		lo = m + 1;
1732 	    else
1733 	    {
1734 		lo = hi = m;
1735 		break;
1736 	    }
1737 	}
1738 
1739 	/* Stop if there is no matching byte. */
1740 	if (hi < lo || byts[lo] != c)
1741 	    break;
1742 
1743 	/* Continue at the child (if there is one). */
1744 	arridx = idxs[lo];
1745 	++wlen;
1746 	--flen;
1747     }
1748 }
1749 
1750 /*
1751  * Need to fold at least one more character.  Do until next non-word character
1752  * for efficiency.
1753  * Return the length of the folded chars in bytes.
1754  */
1755     static int
1756 fold_more(mip)
1757     matchinf_T	*mip;
1758 {
1759     int		flen;
1760     char_u	*p;
1761 
1762     p = mip->mi_fend;
1763     do
1764     {
1765 	mb_ptr_adv(mip->mi_fend);
1766     } while (*mip->mi_fend != NUL && spell_iswordp(mip->mi_fend, mip->mi_buf));
1767 
1768     /* Include the non-word character so that we can check for the
1769      * word end. */
1770     if (*mip->mi_fend != NUL)
1771 	mb_ptr_adv(mip->mi_fend);
1772 
1773     (void)spell_casefold(p, (int)(mip->mi_fend - p),
1774 			     mip->mi_fword + mip->mi_fwordlen,
1775 			     MAXWLEN - mip->mi_fwordlen);
1776     flen = STRLEN(mip->mi_fword + mip->mi_fwordlen);
1777     mip->mi_fwordlen += flen;
1778     return flen;
1779 }
1780 
1781 /*
1782  * Check case flags for a word.  Return TRUE if the word has the requested
1783  * case.
1784  */
1785     static int
1786 spell_valid_case(wordflags, treeflags)
1787     int	    wordflags;	    /* flags for the checked word. */
1788     int	    treeflags;	    /* flags for the word in the spell tree */
1789 {
1790     return ((wordflags == WF_ALLCAP && (treeflags & WF_FIXCAP) == 0)
1791 	    || ((treeflags & (WF_ALLCAP | WF_KEEPCAP)) == 0
1792 		&& ((treeflags & WF_ONECAP) == 0
1793 					   || (wordflags & WF_ONECAP) != 0)));
1794 }
1795 
1796 /*
1797  * Return TRUE if spell checking is not enabled.
1798  */
1799     static int
1800 no_spell_checking(wp)
1801     win_T	*wp;
1802 {
1803     if (!wp->w_p_spell || *wp->w_buffer->b_p_spl == NUL)
1804     {
1805 	EMSG(_("E756: Spell checking is not enabled"));
1806 	return TRUE;
1807     }
1808     return FALSE;
1809 }
1810 
1811 /*
1812  * Move to next spell error.
1813  * "curline" is FALSE for "[s", "]s", "[S" and "]S".
1814  * "curline" is TRUE to find word under/after cursor in the same line.
1815  * For Insert mode completion "dir" is BACKWARD and "curline" is TRUE: move
1816  * to after badly spelled word before the cursor.
1817  * Return 0 if not found, length of the badly spelled word otherwise.
1818  */
1819     int
1820 spell_move_to(wp, dir, allwords, curline, attrp)
1821     win_T	*wp;
1822     int		dir;		/* FORWARD or BACKWARD */
1823     int		allwords;	/* TRUE for "[s"/"]s", FALSE for "[S"/"]S" */
1824     int		curline;
1825     int		*attrp;		/* return: attributes of bad word or NULL */
1826 {
1827     linenr_T	lnum;
1828     pos_T	found_pos;
1829     int		found_len = 0;
1830     char_u	*line;
1831     char_u	*p;
1832     char_u	*endp;
1833     int		attr;
1834     int		len;
1835     int		has_syntax = syntax_present(wp->w_buffer);
1836     int		col;
1837     int		can_spell;
1838     char_u	*buf = NULL;
1839     int		buflen = 0;
1840     int		skip = 0;
1841     int		capcol = -1;
1842     int		found_one = FALSE;
1843     int		wrapped = FALSE;
1844 
1845     if (no_spell_checking(wp))
1846 	return 0;
1847 
1848     /*
1849      * Start looking for bad word at the start of the line, because we can't
1850      * start halfway a word, we don't know where the it starts or ends.
1851      *
1852      * When searching backwards, we continue in the line to find the last
1853      * bad word (in the cursor line: before the cursor).
1854      *
1855      * We concatenate the start of the next line, so that wrapped words work
1856      * (e.g. "et<line-break>cetera").  Doesn't work when searching backwards
1857      * though...
1858      */
1859     lnum = wp->w_cursor.lnum;
1860     found_pos.lnum = 0;
1861 
1862     while (!got_int)
1863     {
1864 	line = ml_get_buf(wp->w_buffer, lnum, FALSE);
1865 
1866 	len = STRLEN(line);
1867 	if (buflen < len + MAXWLEN + 2)
1868 	{
1869 	    vim_free(buf);
1870 	    buflen = len + MAXWLEN + 2;
1871 	    buf = alloc(buflen);
1872 	    if (buf == NULL)
1873 		break;
1874 	}
1875 
1876 	/* In first line check first word for Capital. */
1877 	if (lnum == 1)
1878 	    capcol = 0;
1879 
1880 	/* For checking first word with a capital skip white space. */
1881 	if (capcol == 0)
1882 	    capcol = skipwhite(line) - line;
1883 
1884 	/* Copy the line into "buf" and append the start of the next line if
1885 	 * possible. */
1886 	STRCPY(buf, line);
1887 	if (lnum < wp->w_buffer->b_ml.ml_line_count)
1888 	    spell_cat_line(buf + STRLEN(buf), ml_get(lnum + 1), MAXWLEN);
1889 
1890 	p = buf + skip;
1891 	endp = buf + len;
1892 	while (p < endp)
1893 	{
1894 	    /* When searching backward don't search after the cursor.  Unless
1895 	     * we wrapped around the end of the buffer. */
1896 	    if (dir == BACKWARD
1897 		    && lnum == wp->w_cursor.lnum
1898 		    && !wrapped
1899 		    && (colnr_T)(p - buf) >= wp->w_cursor.col)
1900 		break;
1901 
1902 	    /* start of word */
1903 	    attr = 0;
1904 	    len = spell_check(wp, p, &attr, &capcol);
1905 
1906 	    if (attr != 0)
1907 	    {
1908 		/* We found a bad word.  Check the attribute. */
1909 		if (allwords || attr == highlight_attr[HLF_SPB])
1910 		{
1911 		    found_one = TRUE;
1912 
1913 		    /* When searching forward only accept a bad word after
1914 		     * the cursor. */
1915 		    if (dir == BACKWARD
1916 			    || lnum != wp->w_cursor.lnum
1917 			    || (lnum == wp->w_cursor.lnum
1918 				&& (wrapped
1919 				    || (colnr_T)(curline ? p - buf + len
1920 						     : p - buf)
1921 						  > wp->w_cursor.col)))
1922 		    {
1923 			if (has_syntax)
1924 			{
1925 			    col = p - buf;
1926 			    (void)syn_get_id(wp, lnum, (colnr_T)col,
1927 						       FALSE, &can_spell);
1928 			}
1929 			else
1930 			    can_spell = TRUE;
1931 
1932 			if (can_spell)
1933 			{
1934 			    found_pos.lnum = lnum;
1935 			    found_pos.col = p - buf;
1936 #ifdef FEAT_VIRTUALEDIT
1937 			    found_pos.coladd = 0;
1938 #endif
1939 			    if (dir == FORWARD)
1940 			    {
1941 				/* No need to search further. */
1942 				wp->w_cursor = found_pos;
1943 				vim_free(buf);
1944 				if (attrp != NULL)
1945 				    *attrp = attr;
1946 				return len;
1947 			    }
1948 			    else if (curline)
1949 				/* Insert mode completion: put cursor after
1950 				 * the bad word. */
1951 				found_pos.col += len;
1952 			    found_len = len;
1953 			}
1954 		    }
1955 		}
1956 	    }
1957 
1958 	    /* advance to character after the word */
1959 	    p += len;
1960 	    capcol -= len;
1961 	}
1962 
1963 	if (dir == BACKWARD && found_pos.lnum != 0)
1964 	{
1965 	    /* Use the last match in the line (before the cursor). */
1966 	    wp->w_cursor = found_pos;
1967 	    vim_free(buf);
1968 	    return found_len;
1969 	}
1970 
1971 	if (curline)
1972 	    break;	/* only check cursor line */
1973 
1974 	/* Advance to next line. */
1975 	if (dir == BACKWARD)
1976 	{
1977 	    /* If we are back at the starting line and searched it again there
1978 	     * is no match, give up. */
1979 	    if (lnum == wp->w_cursor.lnum && wrapped)
1980 		break;
1981 
1982 	    if (lnum > 1)
1983 		--lnum;
1984 	    else if (!p_ws)
1985 		break;	    /* at first line and 'nowrapscan' */
1986 	    else
1987 	    {
1988 		/* Wrap around to the end of the buffer.  May search the
1989 		 * starting line again and accept the last match. */
1990 		lnum = wp->w_buffer->b_ml.ml_line_count;
1991 		wrapped = TRUE;
1992 		if (!shortmess(SHM_SEARCH))
1993 		    give_warning((char_u *)_(top_bot_msg), TRUE);
1994 	    }
1995 	    capcol = -1;
1996 	}
1997 	else
1998 	{
1999 	    if (lnum < wp->w_buffer->b_ml.ml_line_count)
2000 		++lnum;
2001 	    else if (!p_ws)
2002 		break;	    /* at first line and 'nowrapscan' */
2003 	    else
2004 	    {
2005 		/* Wrap around to the start of the buffer.  May search the
2006 		 * starting line again and accept the first match. */
2007 		lnum = 1;
2008 		wrapped = TRUE;
2009 		if (!shortmess(SHM_SEARCH))
2010 		    give_warning((char_u *)_(bot_top_msg), TRUE);
2011 	    }
2012 
2013 	    /* If we are back at the starting line and there is no match then
2014 	     * give up. */
2015 	    if (lnum == wp->w_cursor.lnum && !found_one)
2016 		break;
2017 
2018 	    /* Skip the characters at the start of the next line that were
2019 	     * included in a match crossing line boundaries. */
2020 	    if (attr == 0)
2021 		skip = p - endp;
2022 	    else
2023 		skip = 0;
2024 
2025 	    /* Capscol skips over the inserted space. */
2026 	    --capcol;
2027 
2028 	    /* But after empty line check first word in next line */
2029 	    if (*skipwhite(line) == NUL)
2030 		capcol = 0;
2031 	}
2032 
2033 	line_breakcheck();
2034     }
2035 
2036     vim_free(buf);
2037     return 0;
2038 }
2039 
2040 /*
2041  * For spell checking: concatenate the start of the following line "line" into
2042  * "buf", blanking-out special characters.  Copy less then "maxlen" bytes.
2043  */
2044     void
2045 spell_cat_line(buf, line, maxlen)
2046     char_u	*buf;
2047     char_u	*line;
2048     int		maxlen;
2049 {
2050     char_u	*p;
2051     int		n;
2052 
2053     p = skipwhite(line);
2054     while (vim_strchr((char_u *)"*#/\"\t", *p) != NULL)
2055 	p = skipwhite(p + 1);
2056 
2057     if (*p != NUL)
2058     {
2059 	*buf = ' ';
2060 	vim_strncpy(buf + 1, line, maxlen - 2);
2061 	n = p - line;
2062 	if (n >= maxlen)
2063 	    n = maxlen - 1;
2064 	vim_memset(buf + 1, ' ', n);
2065     }
2066 }
2067 
2068 typedef struct spelload_S
2069 {
2070     char_u  sl_lang[MAXWLEN + 1];	/* language name */
2071     slang_T *sl_slang;			/* resulting slang_T struct */
2072     int	    sl_nobreak;			/* NOBREAK language found */
2073 } spelload_T;
2074 
2075 /*
2076  * Load word list(s) for "lang" from Vim spell file(s).
2077  * "lang" must be the language without the region: e.g., "en".
2078  */
2079     static void
2080 spell_load_lang(lang)
2081     char_u	*lang;
2082 {
2083     char_u	fname_enc[85];
2084     int		r;
2085     spelload_T	sl;
2086 
2087     /* Copy the language name to pass it to spell_load_cb() as a cookie.
2088      * It's truncated when an error is detected. */
2089     STRCPY(sl.sl_lang, lang);
2090     sl.sl_slang = NULL;
2091     sl.sl_nobreak = FALSE;
2092 
2093     /*
2094      * Find the first spell file for "lang" in 'runtimepath' and load it.
2095      */
2096     vim_snprintf((char *)fname_enc, sizeof(fname_enc) - 5,
2097 					"spell/%s.%s.spl", lang, spell_enc());
2098     r = do_in_runtimepath(fname_enc, FALSE, spell_load_cb, &sl);
2099 
2100     if (r == FAIL && *sl.sl_lang != NUL)
2101     {
2102 	/* Try loading the ASCII version. */
2103 	vim_snprintf((char *)fname_enc, sizeof(fname_enc) - 5,
2104 						  "spell/%s.ascii.spl", lang);
2105 	r = do_in_runtimepath(fname_enc, FALSE, spell_load_cb, &sl);
2106     }
2107 
2108     if (r == FAIL)
2109 	smsg((char_u *)_("Warning: Cannot find word list \"%s.%s.spl\" or \"%s.ascii.spl\""),
2110 						     lang, spell_enc(), lang);
2111     else if (sl.sl_slang != NULL)
2112     {
2113 	/* At least one file was loaded, now load all the additions. */
2114 	STRCPY(fname_enc + STRLEN(fname_enc) - 3, "add.spl");
2115 	do_in_runtimepath(fname_enc, TRUE, spell_load_cb, &sl);
2116     }
2117 }
2118 
2119 /*
2120  * Return the encoding used for spell checking: Use 'encoding', except that we
2121  * use "latin1" for "latin9".  And limit to 60 characters (just in case).
2122  */
2123     static char_u *
2124 spell_enc()
2125 {
2126 
2127 #ifdef FEAT_MBYTE
2128     if (STRLEN(p_enc) < 60 && STRCMP(p_enc, "iso-8859-15") != 0)
2129 	return p_enc;
2130 #endif
2131     return (char_u *)"latin1";
2132 }
2133 
2134 /*
2135  * Get the name of the .spl file for the internal wordlist into
2136  * "fname[MAXPATHL]".
2137  */
2138     static void
2139 int_wordlist_spl(fname)
2140     char_u	    *fname;
2141 {
2142     vim_snprintf((char *)fname, MAXPATHL, "%s.%s.spl",
2143 						  int_wordlist, spell_enc());
2144 }
2145 
2146 /*
2147  * Allocate a new slang_T.
2148  * Caller must fill "sl_next".
2149  */
2150     static slang_T *
2151 slang_alloc(lang)
2152     char_u	*lang;
2153 {
2154     slang_T *lp;
2155 
2156     lp = (slang_T *)alloc_clear(sizeof(slang_T));
2157     if (lp != NULL)
2158     {
2159 	lp->sl_name = vim_strsave(lang);
2160 	ga_init2(&lp->sl_rep, sizeof(fromto_T), 10);
2161 	lp->sl_compmax = MAXWLEN;
2162 	lp->sl_compsylmax = MAXWLEN;
2163     }
2164     return lp;
2165 }
2166 
2167 /*
2168  * Free the contents of an slang_T and the structure itself.
2169  */
2170     static void
2171 slang_free(lp)
2172     slang_T	*lp;
2173 {
2174     vim_free(lp->sl_name);
2175     vim_free(lp->sl_fname);
2176     slang_clear(lp);
2177     vim_free(lp);
2178 }
2179 
2180 /*
2181  * Clear an slang_T so that the file can be reloaded.
2182  */
2183     static void
2184 slang_clear(lp)
2185     slang_T	*lp;
2186 {
2187     garray_T	*gap;
2188     fromto_T	*ftp;
2189     salitem_T	*smp;
2190     int		i;
2191 
2192     vim_free(lp->sl_fbyts);
2193     lp->sl_fbyts = NULL;
2194     vim_free(lp->sl_kbyts);
2195     lp->sl_kbyts = NULL;
2196     vim_free(lp->sl_pbyts);
2197     lp->sl_pbyts = NULL;
2198 
2199     vim_free(lp->sl_fidxs);
2200     lp->sl_fidxs = NULL;
2201     vim_free(lp->sl_kidxs);
2202     lp->sl_kidxs = NULL;
2203     vim_free(lp->sl_pidxs);
2204     lp->sl_pidxs = NULL;
2205 
2206     gap = &lp->sl_rep;
2207     while (gap->ga_len > 0)
2208     {
2209 	ftp = &((fromto_T *)gap->ga_data)[--gap->ga_len];
2210 	vim_free(ftp->ft_from);
2211 	vim_free(ftp->ft_to);
2212     }
2213     ga_clear(gap);
2214 
2215     gap = &lp->sl_sal;
2216     if (lp->sl_sofo)
2217     {
2218 	/* "ga_len" is set to 1 without adding an item for latin1 */
2219 	if (gap->ga_data != NULL)
2220 	    /* SOFOFROM and SOFOTO items: free lists of wide characters. */
2221 	    for (i = 0; i < gap->ga_len; ++i)
2222 		vim_free(((int **)gap->ga_data)[i]);
2223     }
2224     else
2225 	/* SAL items: free salitem_T items */
2226 	while (gap->ga_len > 0)
2227 	{
2228 	    smp = &((salitem_T *)gap->ga_data)[--gap->ga_len];
2229 	    vim_free(smp->sm_lead);
2230 	    /* Don't free sm_oneof and sm_rules, they point into sm_lead. */
2231 	    vim_free(smp->sm_to);
2232 #ifdef FEAT_MBYTE
2233 	    vim_free(smp->sm_lead_w);
2234 	    vim_free(smp->sm_oneof_w);
2235 	    vim_free(smp->sm_to_w);
2236 #endif
2237 	}
2238     ga_clear(gap);
2239 
2240     for (i = 0; i < lp->sl_prefixcnt; ++i)
2241 	vim_free(lp->sl_prefprog[i]);
2242     lp->sl_prefixcnt = 0;
2243     vim_free(lp->sl_prefprog);
2244     lp->sl_prefprog = NULL;
2245 
2246     vim_free(lp->sl_midword);
2247     lp->sl_midword = NULL;
2248 
2249     vim_free(lp->sl_compprog);
2250     vim_free(lp->sl_compstartflags);
2251     vim_free(lp->sl_compallflags);
2252     lp->sl_compprog = NULL;
2253     lp->sl_compstartflags = NULL;
2254     lp->sl_compallflags = NULL;
2255 
2256     vim_free(lp->sl_syllable);
2257     lp->sl_syllable = NULL;
2258     ga_clear(&lp->sl_syl_items);
2259 
2260 #ifdef FEAT_MBYTE
2261     {
2262 	int	    todo = lp->sl_map_hash.ht_used;
2263 	hashitem_T  *hi;
2264 
2265 	for (hi = lp->sl_map_hash.ht_array; todo > 0; ++hi)
2266 	    if (!HASHITEM_EMPTY(hi))
2267 	    {
2268 		--todo;
2269 		vim_free(hi->hi_key);
2270 	    }
2271     }
2272     hash_clear(&lp->sl_map_hash);
2273 #endif
2274 
2275     lp->sl_compmax = MAXWLEN;
2276     lp->sl_compminlen = 0;
2277     lp->sl_compsylmax = MAXWLEN;
2278     lp->sl_regions[0] = NUL;
2279 }
2280 
2281 /*
2282  * Load one spell file and store the info into a slang_T.
2283  * Invoked through do_in_runtimepath().
2284  */
2285     static void
2286 spell_load_cb(fname, cookie)
2287     char_u	*fname;
2288     void	*cookie;
2289 {
2290     spelload_T	*slp = (spelload_T *)cookie;
2291     slang_T	*slang;
2292 
2293     slang = spell_load_file(fname, slp->sl_lang, NULL, FALSE);
2294     if (slang != NULL)
2295     {
2296 	/* When a previously loaded file has NOBREAK also use it for the
2297 	 * ".add" files. */
2298 	if (slp->sl_nobreak && slang->sl_add)
2299 	    slang->sl_nobreak = TRUE;
2300 	else if (slang->sl_nobreak)
2301 	    slp->sl_nobreak = TRUE;
2302 
2303 	slp->sl_slang = slang;
2304     }
2305 }
2306 
2307 /*
2308  * Load one spell file and store the info into a slang_T.
2309  *
2310  * This is invoked in two ways:
2311  * - From spell_load_cb() to load a spell file for the first time.  "lang" is
2312  *   the language name, "old_lp" is NULL.  Will allocate an slang_T.
2313  * - To reload a spell file that was changed.  "lang" is NULL and "old_lp"
2314  *   points to the existing slang_T.
2315  * Returns the slang_T the spell file was loaded into.  NULL for error.
2316  */
2317     static slang_T *
2318 spell_load_file(fname, lang, old_lp, silent)
2319     char_u	*fname;
2320     char_u	*lang;
2321     slang_T	*old_lp;
2322     int		silent;		/* no error if file doesn't exist */
2323 {
2324     FILE	*fd;
2325     char_u	buf[VIMSPELLMAGICL];
2326     char_u	*p;
2327     char_u	*bp;
2328     idx_T	*ip;
2329     int		i;
2330     int		n;
2331     int		len;
2332     int		round;
2333     char_u	*save_sourcing_name = sourcing_name;
2334     linenr_T	save_sourcing_lnum = sourcing_lnum;
2335     slang_T	*lp = NULL;
2336     idx_T	idx;
2337     int		c = 0;
2338     int		res;
2339 
2340     fd = mch_fopen((char *)fname, "r");
2341     if (fd == NULL)
2342     {
2343 	if (!silent)
2344 	    EMSG2(_(e_notopen), fname);
2345 	else if (p_verbose > 2)
2346 	{
2347 	    verbose_enter();
2348 	    smsg((char_u *)e_notopen, fname);
2349 	    verbose_leave();
2350 	}
2351 	goto endFAIL;
2352     }
2353     if (p_verbose > 2)
2354     {
2355 	verbose_enter();
2356 	smsg((char_u *)_("Reading spell file \"%s\""), fname);
2357 	verbose_leave();
2358     }
2359 
2360     if (old_lp == NULL)
2361     {
2362 	lp = slang_alloc(lang);
2363 	if (lp == NULL)
2364 	    goto endFAIL;
2365 
2366 	/* Remember the file name, used to reload the file when it's updated. */
2367 	lp->sl_fname = vim_strsave(fname);
2368 	if (lp->sl_fname == NULL)
2369 	    goto endFAIL;
2370 
2371 	/* Check for .add.spl. */
2372 	lp->sl_add = strstr((char *)gettail(fname), ".add.") != NULL;
2373     }
2374     else
2375 	lp = old_lp;
2376 
2377     /* Set sourcing_name, so that error messages mention the file name. */
2378     sourcing_name = fname;
2379     sourcing_lnum = 0;
2380 
2381     /* <HEADER>: <fileID>
2382      */
2383     for (i = 0; i < VIMSPELLMAGICL; ++i)
2384 	buf[i] = getc(fd);				/* <fileID> */
2385     if (STRNCMP(buf, VIMSPELLMAGIC, VIMSPELLMAGICL) != 0)
2386     {
2387 	EMSG(_("E757: This does not look like a spell file"));
2388 	goto endFAIL;
2389     }
2390     c = getc(fd);					/* <versionnr> */
2391     if (c < VIMSPELLVERSION)
2392     {
2393 	EMSG(_("E771: Old spell file, needs to be updated"));
2394 	goto endFAIL;
2395     }
2396     else if (c > VIMSPELLVERSION)
2397     {
2398 	EMSG(_("E772: Spell file is for newer version of Vim"));
2399 	goto endFAIL;
2400     }
2401 
2402 
2403     /*
2404      * <SECTIONS>: <section> ... <sectionend>
2405      * <section>: <sectionID> <sectionflags> <sectionlen> (section contents)
2406      */
2407     for (;;)
2408     {
2409 	n = getc(fd);			    /* <sectionID> or <sectionend> */
2410 	if (n == SN_END)
2411 	    break;
2412 	c = getc(fd);					/* <sectionflags> */
2413 	len = (getc(fd) << 24) + (getc(fd) << 16) + (getc(fd) << 8) + getc(fd);
2414 							/* <sectionlen> */
2415 	if (len < 0)
2416 	    goto truncerr;
2417 
2418 	res = 0;
2419 	switch (n)
2420 	{
2421 	    case SN_REGION:
2422 		res = read_region_section(fd, lp, len);
2423 		break;
2424 
2425 	    case SN_CHARFLAGS:
2426 		res = read_charflags_section(fd);
2427 		break;
2428 
2429 	    case SN_MIDWORD:
2430 		lp->sl_midword = read_string(fd, len);	/* <midword> */
2431 		if (lp->sl_midword == NULL)
2432 		    goto endFAIL;
2433 		break;
2434 
2435 	    case SN_PREFCOND:
2436 		res = read_prefcond_section(fd, lp);
2437 		break;
2438 
2439 	    case SN_REP:
2440 		res = read_rep_section(fd, lp);
2441 		break;
2442 
2443 	    case SN_SAL:
2444 		res = read_sal_section(fd, lp);
2445 		break;
2446 
2447 	    case SN_SOFO:
2448 		res = read_sofo_section(fd, lp);
2449 		break;
2450 
2451 	    case SN_MAP:
2452 		p = read_string(fd, len);		/* <mapstr> */
2453 		if (p == NULL)
2454 		    goto endFAIL;
2455 		set_map_str(lp, p);
2456 		vim_free(p);
2457 		break;
2458 
2459 	    case SN_COMPOUND:
2460 		res = read_compound(fd, lp, len);
2461 		break;
2462 
2463 	    case SN_NOBREAK:
2464 		lp->sl_nobreak = TRUE;
2465 		break;
2466 
2467 	    case SN_SYLLABLE:
2468 		lp->sl_syllable = read_string(fd, len);	/* <syllable> */
2469 		if (lp->sl_syllable == NULL)
2470 		    goto endFAIL;
2471 		if (init_syl_tab(lp) == FAIL)
2472 		    goto endFAIL;
2473 		break;
2474 
2475 	    default:
2476 		/* Unsupported section.  When it's required give an error
2477 		 * message.  When it's not required skip the contents. */
2478 		if (c & SNF_REQUIRED)
2479 		{
2480 		    EMSG(_("E770: Unsupported section in spell file"));
2481 		    goto endFAIL;
2482 		}
2483 		while (--len >= 0)
2484 		    if (getc(fd) < 0)
2485 			goto truncerr;
2486 		break;
2487 	}
2488 	if (res == SP_FORMERROR)
2489 	{
2490 formerr:
2491 	    EMSG(_(e_format));
2492 	    goto endFAIL;
2493 	}
2494 	if (res == SP_TRUNCERROR)
2495 	{
2496 truncerr:
2497 	    EMSG(_(e_spell_trunc));
2498 	    goto endFAIL;
2499 	}
2500 	if (res == SP_OTHERERROR)
2501 	    goto endFAIL;
2502     }
2503 
2504     /* round 1: <LWORDTREE>
2505      * round 2: <KWORDTREE>
2506      * round 3: <PREFIXTREE> */
2507     for (round = 1; round <= 3; ++round)
2508     {
2509 	/* The tree size was computed when writing the file, so that we can
2510 	 * allocate it as one long block. <nodecount> */
2511 	len = (getc(fd) << 24) + (getc(fd) << 16) + (getc(fd) << 8) + getc(fd);
2512 	if (len < 0)
2513 	    goto truncerr;
2514 	if (len > 0)
2515 	{
2516 	    /* Allocate the byte array. */
2517 	    bp = lalloc((long_u)len, TRUE);
2518 	    if (bp == NULL)
2519 		goto endFAIL;
2520 	    if (round == 1)
2521 		lp->sl_fbyts = bp;
2522 	    else if (round == 2)
2523 		lp->sl_kbyts = bp;
2524 	    else
2525 		lp->sl_pbyts = bp;
2526 
2527 	    /* Allocate the index array. */
2528 	    ip = (idx_T *)lalloc_clear((long_u)(len * sizeof(int)), TRUE);
2529 	    if (ip == NULL)
2530 		goto endFAIL;
2531 	    if (round == 1)
2532 		lp->sl_fidxs = ip;
2533 	    else if (round == 2)
2534 		lp->sl_kidxs = ip;
2535 	    else
2536 		lp->sl_pidxs = ip;
2537 
2538 	    /* Read the tree and store it in the array. */
2539 	    idx = read_tree(fd, bp, ip, len, 0, round == 3, lp->sl_prefixcnt);
2540 	    if (idx == -1)
2541 		goto truncerr;
2542 	    if (idx < 0)
2543 		goto formerr;
2544 	}
2545     }
2546 
2547     /* For a new file link it in the list of spell files. */
2548     if (old_lp == NULL)
2549     {
2550 	lp->sl_next = first_lang;
2551 	first_lang = lp;
2552     }
2553 
2554     goto endOK;
2555 
2556 endFAIL:
2557     if (lang != NULL)
2558 	/* truncating the name signals the error to spell_load_lang() */
2559 	*lang = NUL;
2560     if (lp != NULL && old_lp == NULL)
2561 	slang_free(lp);
2562     lp = NULL;
2563 
2564 endOK:
2565     if (fd != NULL)
2566 	fclose(fd);
2567     sourcing_name = save_sourcing_name;
2568     sourcing_lnum = save_sourcing_lnum;
2569 
2570     return lp;
2571 }
2572 
2573 /*
2574  * Read a length field from "fd" in "cnt_bytes" bytes.
2575  * Allocate memory, read the string into it and add a NUL at the end.
2576  * Returns NULL when the count is zero.
2577  * Sets "*cntp" to SP_*ERROR when there is an error, length of the result
2578  * otherwise.
2579  */
2580     static char_u *
2581 read_cnt_string(fd, cnt_bytes, cntp)
2582     FILE	*fd;
2583     int		cnt_bytes;
2584     int		*cntp;
2585 {
2586     int		cnt = 0;
2587     int		i;
2588     char_u	*str;
2589 
2590     /* read the length bytes, MSB first */
2591     for (i = 0; i < cnt_bytes; ++i)
2592 	cnt = (cnt << 8) + getc(fd);
2593     if (cnt < 0)
2594     {
2595 	*cntp = SP_TRUNCERROR;
2596 	return NULL;
2597     }
2598     *cntp = cnt;
2599     if (cnt == 0)
2600 	return NULL;	    /* nothing to read, return NULL */
2601 
2602     str = read_string(fd, cnt);
2603     if (str == NULL)
2604 	*cntp = SP_OTHERERROR;
2605     return str;
2606 }
2607 
2608 /*
2609  * Read a string of length "cnt" from "fd" into allocated memory.
2610  * Returns NULL when out of memory.
2611  */
2612     static char_u *
2613 read_string(fd, cnt)
2614     FILE	*fd;
2615     int		cnt;
2616 {
2617     char_u	*str;
2618     int		i;
2619 
2620     /* allocate memory */
2621     str = alloc((unsigned)cnt + 1);
2622     if (str != NULL)
2623     {
2624 	/* Read the string.  Doesn't check for truncated file. */
2625 	for (i = 0; i < cnt; ++i)
2626 	    str[i] = getc(fd);
2627 	str[i] = NUL;
2628     }
2629     return str;
2630 }
2631 
2632 /*
2633  * Read SN_REGION: <regionname> ...
2634  * Return SP_*ERROR flags.
2635  */
2636     static int
2637 read_region_section(fd, lp, len)
2638     FILE	*fd;
2639     slang_T	*lp;
2640     int		len;
2641 {
2642     int		i;
2643 
2644     if (len > 16)
2645 	return SP_FORMERROR;
2646     for (i = 0; i < len; ++i)
2647 	lp->sl_regions[i] = getc(fd);			/* <regionname> */
2648     lp->sl_regions[len] = NUL;
2649     return 0;
2650 }
2651 
2652 /*
2653  * Read SN_CHARFLAGS section: <charflagslen> <charflags>
2654  *				<folcharslen> <folchars>
2655  * Return SP_*ERROR flags.
2656  */
2657     static int
2658 read_charflags_section(fd)
2659     FILE	*fd;
2660 {
2661     char_u	*flags;
2662     char_u	*fol;
2663     int		flagslen, follen;
2664 
2665     /* <charflagslen> <charflags> */
2666     flags = read_cnt_string(fd, 1, &flagslen);
2667     if (flagslen < 0)
2668 	return flagslen;
2669 
2670     /* <folcharslen> <folchars> */
2671     fol = read_cnt_string(fd, 2, &follen);
2672     if (follen < 0)
2673     {
2674 	vim_free(flags);
2675 	return follen;
2676     }
2677 
2678     /* Set the word-char flags and fill SPELL_ISUPPER() table. */
2679     if (flags != NULL && fol != NULL)
2680 	set_spell_charflags(flags, flagslen, fol);
2681 
2682     vim_free(flags);
2683     vim_free(fol);
2684 
2685     /* When <charflagslen> is zero then <fcharlen> must also be zero. */
2686     if ((flags == NULL) != (fol == NULL))
2687 	return SP_FORMERROR;
2688     return 0;
2689 }
2690 
2691 /*
2692  * Read SN_PREFCOND section.
2693  * Return SP_*ERROR flags.
2694  */
2695     static int
2696 read_prefcond_section(fd, lp)
2697     FILE	*fd;
2698     slang_T	*lp;
2699 {
2700     int		cnt;
2701     int		i;
2702     int		n;
2703     char_u	*p;
2704     char_u	buf[MAXWLEN + 1];
2705 
2706     /* <prefcondcnt> <prefcond> ... */
2707     cnt = (getc(fd) << 8) + getc(fd);			/* <prefcondcnt> */
2708     if (cnt <= 0)
2709 	return SP_FORMERROR;
2710 
2711     lp->sl_prefprog = (regprog_T **)alloc_clear(
2712 					 (unsigned)sizeof(regprog_T *) * cnt);
2713     if (lp->sl_prefprog == NULL)
2714 	return SP_OTHERERROR;
2715     lp->sl_prefixcnt = cnt;
2716 
2717     for (i = 0; i < cnt; ++i)
2718     {
2719 	/* <prefcond> : <condlen> <condstr> */
2720 	n = getc(fd);					/* <condlen> */
2721 	if (n < 0 || n >= MAXWLEN)
2722 	    return SP_FORMERROR;
2723 
2724 	/* When <condlen> is zero we have an empty condition.  Otherwise
2725 	 * compile the regexp program used to check for the condition. */
2726 	if (n > 0)
2727 	{
2728 	    buf[0] = '^';	    /* always match at one position only */
2729 	    p = buf + 1;
2730 	    while (n-- > 0)
2731 		*p++ = getc(fd);			/* <condstr> */
2732 	    *p = NUL;
2733 	    lp->sl_prefprog[i] = vim_regcomp(buf, RE_MAGIC + RE_STRING);
2734 	}
2735     }
2736     return 0;
2737 }
2738 
2739 /*
2740  * Read REP items section from "fd": <repcount> <rep> ...
2741  * Return SP_*ERROR flags.
2742  */
2743     static int
2744 read_rep_section(fd, slang)
2745     FILE	*fd;
2746     slang_T	*slang;
2747 {
2748     int		cnt;
2749     garray_T	*gap;
2750     fromto_T	*ftp;
2751     short	*first;
2752     int		i;
2753 
2754     cnt = (getc(fd) << 8) + getc(fd);			/* <repcount> */
2755     if (cnt < 0)
2756 	return SP_TRUNCERROR;
2757 
2758     gap = &slang->sl_rep;
2759     if (ga_grow(gap, cnt) == FAIL)
2760 	return SP_OTHERERROR;
2761 
2762     /* <rep> : <repfromlen> <repfrom> <reptolen> <repto> */
2763     for (; gap->ga_len < cnt; ++gap->ga_len)
2764     {
2765 	ftp = &((fromto_T *)gap->ga_data)[gap->ga_len];
2766 	ftp->ft_from = read_cnt_string(fd, 1, &i);
2767 	if (i < 0)
2768 	    return i;
2769 	if (i == 0)
2770 	    return SP_FORMERROR;
2771 	ftp->ft_to = read_cnt_string(fd, 1, &i);
2772 	if (i <= 0)
2773 	{
2774 	    vim_free(ftp->ft_from);
2775 	    if (i < 0)
2776 		return i;
2777 	    return SP_FORMERROR;
2778 	}
2779     }
2780 
2781     /* Fill the first-index table. */
2782     first = slang->sl_rep_first;
2783     for (i = 0; i < 256; ++i)
2784 	first[i] = -1;
2785     for (i = 0; i < gap->ga_len; ++i)
2786     {
2787 	ftp = &((fromto_T *)gap->ga_data)[i];
2788 	if (first[*ftp->ft_from] == -1)
2789 	    first[*ftp->ft_from] = i;
2790     }
2791     return 0;
2792 }
2793 
2794 /*
2795  * Read SN_SAL section: <salflags> <salcount> <sal> ...
2796  * Return SP_*ERROR flags.
2797  */
2798     static int
2799 read_sal_section(fd, slang)
2800     FILE	*fd;
2801     slang_T	*slang;
2802 {
2803     int		i;
2804     int		cnt;
2805     garray_T	*gap;
2806     salitem_T	*smp;
2807     int		ccnt;
2808     char_u	*p;
2809     int		c = NUL;
2810 
2811     slang->sl_sofo = FALSE;
2812 
2813     i = getc(fd);				/* <salflags> */
2814     if (i & SAL_F0LLOWUP)
2815 	slang->sl_followup = TRUE;
2816     if (i & SAL_COLLAPSE)
2817 	slang->sl_collapse = TRUE;
2818     if (i & SAL_REM_ACCENTS)
2819 	slang->sl_rem_accents = TRUE;
2820 
2821     cnt = (getc(fd) << 8) + getc(fd);		/* <salcount> */
2822     if (cnt < 0)
2823 	return SP_TRUNCERROR;
2824 
2825     gap = &slang->sl_sal;
2826     ga_init2(gap, sizeof(salitem_T), 10);
2827     if (ga_grow(gap, cnt) == FAIL)
2828 	return SP_OTHERERROR;
2829 
2830     /* <sal> : <salfromlen> <salfrom> <saltolen> <salto> */
2831     for (; gap->ga_len < cnt; ++gap->ga_len)
2832     {
2833 	smp = &((salitem_T *)gap->ga_data)[gap->ga_len];
2834 	ccnt = getc(fd);			/* <salfromlen> */
2835 	if (ccnt < 0)
2836 	    return SP_TRUNCERROR;
2837 	if ((p = alloc(ccnt + 2)) == NULL)
2838 	    return SP_OTHERERROR;
2839 	smp->sm_lead = p;
2840 
2841 	/* Read up to the first special char into sm_lead. */
2842 	for (i = 0; i < ccnt; ++i)
2843 	{
2844 	    c = getc(fd);			/* <salfrom> */
2845 	    if (vim_strchr((char_u *)"0123456789(-<^$", c) != NULL)
2846 		break;
2847 	    *p++ = c;
2848 	}
2849 	smp->sm_leadlen = p - smp->sm_lead;
2850 	*p++ = NUL;
2851 
2852 	/* Put (abc) chars in sm_oneof, if any. */
2853 	if (c == '(')
2854 	{
2855 	    smp->sm_oneof = p;
2856 	    for (++i; i < ccnt; ++i)
2857 	    {
2858 		c = getc(fd);			/* <salfrom> */
2859 		if (c == ')')
2860 		    break;
2861 		*p++ = c;
2862 	    }
2863 	    *p++ = NUL;
2864 	    if (++i < ccnt)
2865 		c = getc(fd);
2866 	}
2867 	else
2868 	    smp->sm_oneof = NULL;
2869 
2870 	/* Any following chars go in sm_rules. */
2871 	smp->sm_rules = p;
2872 	if (i < ccnt)
2873 	    /* store the char we got while checking for end of sm_lead */
2874 	    *p++ = c;
2875 	for (++i; i < ccnt; ++i)
2876 	    *p++ = getc(fd);			/* <salfrom> */
2877 	*p++ = NUL;
2878 
2879 	/* <saltolen> <salto> */
2880 	smp->sm_to = read_cnt_string(fd, 1, &ccnt);
2881 	if (ccnt < 0)
2882 	{
2883 	    vim_free(smp->sm_lead);
2884 	    return ccnt;
2885 	}
2886 
2887 #ifdef FEAT_MBYTE
2888 	if (has_mbyte)
2889 	{
2890 	    /* convert the multi-byte strings to wide char strings */
2891 	    smp->sm_lead_w = mb_str2wide(smp->sm_lead);
2892 	    smp->sm_leadlen = mb_charlen(smp->sm_lead);
2893 	    if (smp->sm_oneof == NULL)
2894 		smp->sm_oneof_w = NULL;
2895 	    else
2896 		smp->sm_oneof_w = mb_str2wide(smp->sm_oneof);
2897 	    if (smp->sm_to == NULL)
2898 		smp->sm_to_w = NULL;
2899 	    else
2900 		smp->sm_to_w = mb_str2wide(smp->sm_to);
2901 	    if (smp->sm_lead_w == NULL
2902 		    || (smp->sm_oneof_w == NULL && smp->sm_oneof != NULL)
2903 		    || (smp->sm_to_w == NULL && smp->sm_to != NULL))
2904 	    {
2905 		vim_free(smp->sm_lead);
2906 		vim_free(smp->sm_to);
2907 		vim_free(smp->sm_lead_w);
2908 		vim_free(smp->sm_oneof_w);
2909 		vim_free(smp->sm_to_w);
2910 		return SP_OTHERERROR;
2911 	    }
2912 	}
2913 #endif
2914     }
2915 
2916     /* Fill the first-index table. */
2917     set_sal_first(slang);
2918 
2919     return 0;
2920 }
2921 
2922 /*
2923  * SN_SOFO: <sofofromlen> <sofofrom> <sofotolen> <sofoto>
2924  * Return SP_*ERROR flags.
2925  */
2926     static int
2927 read_sofo_section(fd, slang)
2928     FILE	*fd;
2929     slang_T	*slang;
2930 {
2931     int		cnt;
2932     char_u	*from, *to;
2933     int		res;
2934 
2935     slang->sl_sofo = TRUE;
2936 
2937     /* <sofofromlen> <sofofrom> */
2938     from = read_cnt_string(fd, 2, &cnt);
2939     if (cnt < 0)
2940 	return cnt;
2941 
2942     /* <sofotolen> <sofoto> */
2943     to = read_cnt_string(fd, 2, &cnt);
2944     if (cnt < 0)
2945     {
2946 	vim_free(from);
2947 	return cnt;
2948     }
2949 
2950     /* Store the info in slang->sl_sal and/or slang->sl_sal_first. */
2951     if (from != NULL && to != NULL)
2952 	res = set_sofo(slang, from, to);
2953     else if (from != NULL || to != NULL)
2954 	res = SP_FORMERROR;    /* only one of two strings is an error */
2955     else
2956 	res = 0;
2957 
2958     vim_free(from);
2959     vim_free(to);
2960     return res;
2961 }
2962 
2963 /*
2964  * Read the compound section from the .spl file:
2965  *	<compmax> <compminlen> <compsylmax> <compflags>
2966  * Returns SP_*ERROR flags.
2967  */
2968     static int
2969 read_compound(fd, slang, len)
2970     FILE	*fd;
2971     slang_T	*slang;
2972     int		len;
2973 {
2974     int		todo = len;
2975     int		c;
2976     int		atstart;
2977     char_u	*pat;
2978     char_u	*pp;
2979     char_u	*cp;
2980     char_u	*ap;
2981 
2982     if (todo < 2)
2983 	return SP_FORMERROR;	/* need at least two bytes */
2984 
2985     --todo;
2986     c = getc(fd);					/* <compmax> */
2987     if (c < 2)
2988 	c = MAXWLEN;
2989     slang->sl_compmax = c;
2990 
2991     --todo;
2992     c = getc(fd);					/* <compminlen> */
2993     if (c < 1)
2994 	c = 0;
2995     slang->sl_compminlen = c;
2996 
2997     --todo;
2998     c = getc(fd);					/* <compsylmax> */
2999     if (c < 1)
3000 	c = MAXWLEN;
3001     slang->sl_compsylmax = c;
3002 
3003     /* Turn the COMPOUNDFLAGS items into a regexp pattern:
3004      * "a[bc]/a*b+" -> "^\(a[bc]\|a*b\+\)$".
3005      * Inserting backslashes may double the length, "^\(\)$<Nul>" is 7 bytes.
3006      * Conversion to utf-8 may double the size. */
3007     c = todo * 2 + 7;
3008 #ifdef FEAT_MBYTE
3009     if (enc_utf8)
3010 	c += todo * 2;
3011 #endif
3012     pat = alloc((unsigned)c);
3013     if (pat == NULL)
3014 	return SP_OTHERERROR;
3015 
3016     /* We also need a list of all flags that can appear at the start and one
3017      * for all flags. */
3018     cp = alloc(todo + 1);
3019     if (cp == NULL)
3020     {
3021 	vim_free(pat);
3022 	return SP_OTHERERROR;
3023     }
3024     slang->sl_compstartflags = cp;
3025     *cp = NUL;
3026 
3027     ap = alloc(todo + 1);
3028     if (ap == NULL)
3029     {
3030 	vim_free(pat);
3031 	return SP_OTHERERROR;
3032     }
3033     slang->sl_compallflags = ap;
3034     *ap = NUL;
3035 
3036     pp = pat;
3037     *pp++ = '^';
3038     *pp++ = '\\';
3039     *pp++ = '(';
3040 
3041     atstart = 1;
3042     while (todo-- > 0)
3043     {
3044 	c = getc(fd);					/* <compflags> */
3045 
3046 	/* Add all flags to "sl_compallflags". */
3047 	if (vim_strchr((char_u *)"+*[]/", c) == NULL
3048 		&& !byte_in_str(slang->sl_compallflags, c))
3049 	{
3050 	    *ap++ = c;
3051 	    *ap = NUL;
3052 	}
3053 
3054 	if (atstart != 0)
3055 	{
3056 	    /* At start of item: copy flags to "sl_compstartflags".  For a
3057 	     * [abc] item set "atstart" to 2 and copy up to the ']'. */
3058 	    if (c == '[')
3059 		atstart = 2;
3060 	    else if (c == ']')
3061 		atstart = 0;
3062 	    else
3063 	    {
3064 		if (!byte_in_str(slang->sl_compstartflags, c))
3065 		{
3066 		    *cp++ = c;
3067 		    *cp = NUL;
3068 		}
3069 		if (atstart == 1)
3070 		    atstart = 0;
3071 	    }
3072 	}
3073 	if (c == '/')	    /* slash separates two items */
3074 	{
3075 	    *pp++ = '\\';
3076 	    *pp++ = '|';
3077 	    atstart = 1;
3078 	}
3079 	else		    /* normal char, "[abc]" and '*' are copied as-is */
3080 	{
3081 	    if (c == '+' || c == '~')
3082 		*pp++ = '\\';	    /* "a+" becomes "a\+" */
3083 #ifdef FEAT_MBYTE
3084 	    if (enc_utf8)
3085 		pp += mb_char2bytes(c, pp);
3086 	    else
3087 #endif
3088 		*pp++ = c;
3089 	}
3090     }
3091 
3092     *pp++ = '\\';
3093     *pp++ = ')';
3094     *pp++ = '$';
3095     *pp = NUL;
3096 
3097     slang->sl_compprog = vim_regcomp(pat, RE_MAGIC + RE_STRING + RE_STRICT);
3098     vim_free(pat);
3099     if (slang->sl_compprog == NULL)
3100 	return SP_FORMERROR;
3101 
3102     return 0;
3103 }
3104 
3105 /*
3106  * Return TRUE if byte "n" appears in "str".
3107  * Like strchr() but independent of locale.
3108  */
3109     static int
3110 byte_in_str(str, n)
3111     char_u	*str;
3112     int		n;
3113 {
3114     char_u	*p;
3115 
3116     for (p = str; *p != NUL; ++p)
3117 	if (*p == n)
3118 	    return TRUE;
3119     return FALSE;
3120 }
3121 
3122 #define SY_MAXLEN   30
3123 typedef struct syl_item_S
3124 {
3125     char_u	sy_chars[SY_MAXLEN];	    /* the sequence of chars */
3126     int		sy_len;
3127 } syl_item_T;
3128 
3129 /*
3130  * Truncate "slang->sl_syllable" at the first slash and put the following items
3131  * in "slang->sl_syl_items".
3132  */
3133     static int
3134 init_syl_tab(slang)
3135     slang_T	*slang;
3136 {
3137     char_u	*p;
3138     char_u	*s;
3139     int		l;
3140     syl_item_T	*syl;
3141 
3142     ga_init2(&slang->sl_syl_items, sizeof(syl_item_T), 4);
3143     p = vim_strchr(slang->sl_syllable, '/');
3144     while (p != NULL)
3145     {
3146 	*p++ = NUL;
3147 	if (*p == NUL)	    /* trailing slash */
3148 	    break;
3149 	s = p;
3150 	p = vim_strchr(p, '/');
3151 	if (p == NULL)
3152 	    l = STRLEN(s);
3153 	else
3154 	    l = p - s;
3155 	if (l >= SY_MAXLEN)
3156 	    return SP_FORMERROR;
3157 	if (ga_grow(&slang->sl_syl_items, 1) == FAIL)
3158 	    return SP_OTHERERROR;
3159 	syl = ((syl_item_T *)slang->sl_syl_items.ga_data)
3160 					       + slang->sl_syl_items.ga_len++;
3161 	vim_strncpy(syl->sy_chars, s, l);
3162 	syl->sy_len = l;
3163     }
3164     return OK;
3165 }
3166 
3167 /*
3168  * Count the number of syllables in "word".
3169  * When "word" contains spaces the syllables after the last space are counted.
3170  * Returns zero if syllables are not defines.
3171  */
3172     static int
3173 count_syllables(slang, word)
3174     slang_T	*slang;
3175     char_u	*word;
3176 {
3177     int		cnt = 0;
3178     int		skip = FALSE;
3179     char_u	*p;
3180     int		len;
3181     int		i;
3182     syl_item_T	*syl;
3183     int		c;
3184 
3185     if (slang->sl_syllable == NULL)
3186 	return 0;
3187 
3188     for (p = word; *p != NUL; p += len)
3189     {
3190 	/* When running into a space reset counter. */
3191 	if (*p == ' ')
3192 	{
3193 	    len = 1;
3194 	    cnt = 0;
3195 	    continue;
3196 	}
3197 
3198 	/* Find longest match of syllable items. */
3199 	len = 0;
3200 	for (i = 0; i < slang->sl_syl_items.ga_len; ++i)
3201 	{
3202 	    syl = ((syl_item_T *)slang->sl_syl_items.ga_data) + i;
3203 	    if (syl->sy_len > len
3204 			       && STRNCMP(p, syl->sy_chars, syl->sy_len) == 0)
3205 		len = syl->sy_len;
3206 	}
3207 	if (len != 0)	/* found a match, count syllable  */
3208 	{
3209 	    ++cnt;
3210 	    skip = FALSE;
3211 	}
3212 	else
3213 	{
3214 	    /* No recognized syllable item, at least a syllable char then? */
3215 #ifdef FEAT_MBYTE
3216 	    c = mb_ptr2char(p);
3217 	    len = (*mb_ptr2len)(p);
3218 #else
3219 	    c = *p;
3220 	    len = 1;
3221 #endif
3222 	    if (vim_strchr(slang->sl_syllable, c) == NULL)
3223 		skip = FALSE;	    /* No, search for next syllable */
3224 	    else if (!skip)
3225 	    {
3226 		++cnt;		    /* Yes, count it */
3227 		skip = TRUE;	    /* don't count following syllable chars */
3228 	    }
3229 	}
3230     }
3231     return cnt;
3232 }
3233 
3234 /*
3235  * Set the SOFOFROM and SOFOTO items in language "lp".
3236  * Returns SP_*ERROR flags when there is something wrong.
3237  */
3238     static int
3239 set_sofo(lp, from, to)
3240     slang_T	*lp;
3241     char_u	*from;
3242     char_u	*to;
3243 {
3244     int		i;
3245 
3246 #ifdef FEAT_MBYTE
3247     garray_T	*gap;
3248     char_u	*s;
3249     char_u	*p;
3250     int		c;
3251     int		*inp;
3252 
3253     if (has_mbyte)
3254     {
3255 	/* Use "sl_sal" as an array with 256 pointers to a list of wide
3256 	 * characters.  The index is the low byte of the character.
3257 	 * The list contains from-to pairs with a terminating NUL.
3258 	 * sl_sal_first[] is used for latin1 "from" characters. */
3259 	gap = &lp->sl_sal;
3260 	ga_init2(gap, sizeof(int *), 1);
3261 	if (ga_grow(gap, 256) == FAIL)
3262 	    return SP_OTHERERROR;
3263 	vim_memset(gap->ga_data, 0, sizeof(int *) * 256);
3264 	gap->ga_len = 256;
3265 
3266 	/* First count the number of items for each list.  Temporarily use
3267 	 * sl_sal_first[] for this. */
3268 	for (p = from, s = to; *p != NUL && *s != NUL; )
3269 	{
3270 	    c = mb_cptr2char_adv(&p);
3271 	    mb_cptr_adv(s);
3272 	    if (c >= 256)
3273 		++lp->sl_sal_first[c & 0xff];
3274 	}
3275 	if (*p != NUL || *s != NUL)	    /* lengths differ */
3276 	    return SP_FORMERROR;
3277 
3278 	/* Allocate the lists. */
3279 	for (i = 0; i < 256; ++i)
3280 	    if (lp->sl_sal_first[i] > 0)
3281 	    {
3282 		p = alloc(sizeof(int) * (lp->sl_sal_first[i] * 2 + 1));
3283 		if (p == NULL)
3284 		    return SP_OTHERERROR;
3285 		((int **)gap->ga_data)[i] = (int *)p;
3286 		*(int *)p = 0;
3287 	    }
3288 
3289 	/* Put the characters up to 255 in sl_sal_first[] the rest in a sl_sal
3290 	 * list. */
3291 	vim_memset(lp->sl_sal_first, 0, sizeof(salfirst_T) * 256);
3292 	for (p = from, s = to; *p != NUL && *s != NUL; )
3293 	{
3294 	    c = mb_cptr2char_adv(&p);
3295 	    i = mb_cptr2char_adv(&s);
3296 	    if (c >= 256)
3297 	    {
3298 		/* Append the from-to chars at the end of the list with
3299 		 * the low byte. */
3300 		inp = ((int **)gap->ga_data)[c & 0xff];
3301 		while (*inp != 0)
3302 		    ++inp;
3303 		*inp++ = c;		/* from char */
3304 		*inp++ = i;		/* to char */
3305 		*inp++ = NUL;		/* NUL at the end */
3306 	    }
3307 	    else
3308 		/* mapping byte to char is done in sl_sal_first[] */
3309 		lp->sl_sal_first[c] = i;
3310 	}
3311     }
3312     else
3313 #endif
3314     {
3315 	/* mapping bytes to bytes is done in sl_sal_first[] */
3316 	if (STRLEN(from) != STRLEN(to))
3317 	    return SP_FORMERROR;
3318 
3319 	for (i = 0; to[i] != NUL; ++i)
3320 	    lp->sl_sal_first[from[i]] = to[i];
3321 	lp->sl_sal.ga_len = 1;		/* indicates we have soundfolding */
3322     }
3323 
3324     return 0;
3325 }
3326 
3327 /*
3328  * Fill the first-index table for "lp".
3329  */
3330     static void
3331 set_sal_first(lp)
3332     slang_T	*lp;
3333 {
3334     salfirst_T	*sfirst;
3335     int		i;
3336     salitem_T	*smp;
3337     int		c;
3338     garray_T	*gap = &lp->sl_sal;
3339 
3340     sfirst = lp->sl_sal_first;
3341     for (i = 0; i < 256; ++i)
3342 	sfirst[i] = -1;
3343     smp = (salitem_T *)gap->ga_data;
3344     for (i = 0; i < gap->ga_len; ++i)
3345     {
3346 #ifdef FEAT_MBYTE
3347 	if (has_mbyte)
3348 	    /* Use the lowest byte of the first character.  For latin1 it's
3349 	     * the character, for other encodings it should differ for most
3350 	     * characters. */
3351 	    c = *smp[i].sm_lead_w & 0xff;
3352 	else
3353 #endif
3354 	    c = *smp[i].sm_lead;
3355 	if (sfirst[c] == -1)
3356 	{
3357 	    sfirst[c] = i;
3358 #ifdef FEAT_MBYTE
3359 	    if (has_mbyte)
3360 	    {
3361 		int		n;
3362 
3363 		/* Make sure all entries with this byte are following each
3364 		 * other.  Move the ones that are in the wrong position.  Do
3365 		 * keep the same ordering! */
3366 		while (i + 1 < gap->ga_len
3367 				       && (*smp[i + 1].sm_lead_w & 0xff) == c)
3368 		    /* Skip over entry with same index byte. */
3369 		    ++i;
3370 
3371 		for (n = 1; i + n < gap->ga_len; ++n)
3372 		    if ((*smp[i + n].sm_lead_w & 0xff) == c)
3373 		    {
3374 			salitem_T  tsal;
3375 
3376 			/* Move entry with same index byte after the entries
3377 			 * we already found. */
3378 			++i;
3379 			--n;
3380 			tsal = smp[i + n];
3381 			mch_memmove(smp + i + 1, smp + i,
3382 						       sizeof(salitem_T) * n);
3383 			smp[i] = tsal;
3384 		    }
3385 	    }
3386 #endif
3387 	}
3388     }
3389 }
3390 
3391 #ifdef FEAT_MBYTE
3392 /*
3393  * Turn a multi-byte string into a wide character string.
3394  * Return it in allocated memory (NULL for out-of-memory)
3395  */
3396     static int *
3397 mb_str2wide(s)
3398     char_u	*s;
3399 {
3400     int		*res;
3401     char_u	*p;
3402     int		i = 0;
3403 
3404     res = (int *)alloc(sizeof(int) * (mb_charlen(s) + 1));
3405     if (res != NULL)
3406     {
3407 	for (p = s; *p != NUL; )
3408 	    res[i++] = mb_ptr2char_adv(&p);
3409 	res[i] = NUL;
3410     }
3411     return res;
3412 }
3413 #endif
3414 
3415 /*
3416  * Read one row of siblings from the spell file and store it in the byte array
3417  * "byts" and index array "idxs".  Recursively read the children.
3418  *
3419  * NOTE: The code here must match put_node().
3420  *
3421  * Returns the index follosing the siblings.
3422  * Returns -1 if the file is shorter than expected.
3423  * Returns -2 if there is a format error.
3424  */
3425     static idx_T
3426 read_tree(fd, byts, idxs, maxidx, startidx, prefixtree, maxprefcondnr)
3427     FILE	*fd;
3428     char_u	*byts;
3429     idx_T	*idxs;
3430     int		maxidx;		    /* size of arrays */
3431     idx_T	startidx;	    /* current index in "byts" and "idxs" */
3432     int		prefixtree;	    /* TRUE for reading PREFIXTREE */
3433     int		maxprefcondnr;	    /* maximum for <prefcondnr> */
3434 {
3435     int		len;
3436     int		i;
3437     int		n;
3438     idx_T	idx = startidx;
3439     int		c;
3440     int		c2;
3441 #define SHARED_MASK	0x8000000
3442 
3443     len = getc(fd);					/* <siblingcount> */
3444     if (len <= 0)
3445 	return -1;
3446 
3447     if (startidx + len >= maxidx)
3448 	return -2;
3449     byts[idx++] = len;
3450 
3451     /* Read the byte values, flag/region bytes and shared indexes. */
3452     for (i = 1; i <= len; ++i)
3453     {
3454 	c = getc(fd);					/* <byte> */
3455 	if (c < 0)
3456 	    return -1;
3457 	if (c <= BY_SPECIAL)
3458 	{
3459 	    if (c == BY_NOFLAGS && !prefixtree)
3460 	    {
3461 		/* No flags, all regions. */
3462 		idxs[idx] = 0;
3463 		c = 0;
3464 	    }
3465 	    else if (c != BY_INDEX)
3466 	    {
3467 		if (prefixtree)
3468 		{
3469 		    /* Read the optional pflags byte, the prefix ID and the
3470 		     * condition nr.  In idxs[] store the prefix ID in the low
3471 		     * byte, the condition index shifted up 8 bits, the flags
3472 		     * shifted up 24 bits. */
3473 		    if (c == BY_FLAGS)
3474 			c = getc(fd) << 24;		/* <pflags> */
3475 		    else
3476 			c = 0;
3477 
3478 		    c |= getc(fd);			/* <affixID> */
3479 
3480 		    n = (getc(fd) << 8) + getc(fd);	/* <prefcondnr> */
3481 		    if (n >= maxprefcondnr)
3482 			return -2;
3483 		    c |= (n << 8);
3484 		}
3485 		else /* c must be BY_FLAGS or BY_FLAGS2 */
3486 		{
3487 		    /* Read flags and optional region and prefix ID.  In
3488 		     * idxs[] the flags go in the low two bytes, region above
3489 		     * that and prefix ID above the region. */
3490 		    c2 = c;
3491 		    c = getc(fd);			/* <flags> */
3492 		    if (c2 == BY_FLAGS2)
3493 			c = (getc(fd) << 8) + c;	/* <flags2> */
3494 		    if (c & WF_REGION)
3495 			c = (getc(fd) << 16) + c;	/* <region> */
3496 		    if (c & WF_AFX)
3497 			c = (getc(fd) << 24) + c;	/* <affixID> */
3498 		}
3499 
3500 		idxs[idx] = c;
3501 		c = 0;
3502 	    }
3503 	    else /* c == BY_INDEX */
3504 	    {
3505 							/* <nodeidx> */
3506 		n = (getc(fd) << 16) + (getc(fd) << 8) + getc(fd);
3507 		if (n < 0 || n >= maxidx)
3508 		    return -2;
3509 		idxs[idx] = n + SHARED_MASK;
3510 		c = getc(fd);				/* <xbyte> */
3511 	    }
3512 	}
3513 	byts[idx++] = c;
3514     }
3515 
3516     /* Recursively read the children for non-shared siblings.
3517      * Skip the end-of-word ones (zero byte value) and the shared ones (and
3518      * remove SHARED_MASK) */
3519     for (i = 1; i <= len; ++i)
3520 	if (byts[startidx + i] != 0)
3521 	{
3522 	    if (idxs[startidx + i] & SHARED_MASK)
3523 		idxs[startidx + i] &= ~SHARED_MASK;
3524 	    else
3525 	    {
3526 		idxs[startidx + i] = idx;
3527 		idx = read_tree(fd, byts, idxs, maxidx, idx,
3528 						     prefixtree, maxprefcondnr);
3529 		if (idx < 0)
3530 		    break;
3531 	    }
3532 	}
3533 
3534     return idx;
3535 }
3536 
3537 /*
3538  * Parse 'spelllang' and set buf->b_langp accordingly.
3539  * Returns NULL if it's OK, an error message otherwise.
3540  */
3541     char_u *
3542 did_set_spelllang(buf)
3543     buf_T	*buf;
3544 {
3545     garray_T	ga;
3546     char_u	*splp;
3547     char_u	*region;
3548     char_u	region_cp[3];
3549     int		filename;
3550     int		region_mask;
3551     slang_T	*slang;
3552     int		c;
3553     char_u	lang[MAXWLEN + 1];
3554     char_u	spf_name[MAXPATHL];
3555     int		len;
3556     char_u	*p;
3557     int		round;
3558     char_u	*spf;
3559     char_u	*use_region = NULL;
3560     int		dont_use_region = FALSE;
3561     int		nobreak = FALSE;
3562     int		i, j;
3563     langp_T	*lp, *lp2;
3564 
3565     ga_init2(&ga, sizeof(langp_T), 2);
3566     clear_midword(buf);
3567 
3568     /* loop over comma separated language names. */
3569     for (splp = buf->b_p_spl; *splp != NUL; )
3570     {
3571 	/* Get one language name. */
3572 	copy_option_part(&splp, lang, MAXWLEN, ",");
3573 
3574 	region = NULL;
3575 	len = STRLEN(lang);
3576 
3577 	/* If the name ends in ".spl" use it as the name of the spell file.
3578 	 * If there is a region name let "region" point to it and remove it
3579 	 * from the name. */
3580 	if (len > 4 && fnamecmp(lang + len - 4, ".spl") == 0)
3581 	{
3582 	    filename = TRUE;
3583 
3584 	    /* Locate a region and remove it from the file name. */
3585 	    p = vim_strchr(gettail(lang), '_');
3586 	    if (p != NULL && ASCII_ISALPHA(p[1]) && ASCII_ISALPHA(p[2])
3587 						      && !ASCII_ISALPHA(p[3]))
3588 	    {
3589 		vim_strncpy(region_cp, p + 1, 2);
3590 		mch_memmove(p, p + 3, len - (p - lang) - 2);
3591 		len -= 3;
3592 		region = region_cp;
3593 	    }
3594 	    else
3595 		dont_use_region = TRUE;
3596 
3597 	    /* Check if we loaded this language before. */
3598 	    for (slang = first_lang; slang != NULL; slang = slang->sl_next)
3599 		if (fullpathcmp(lang, slang->sl_fname, FALSE) == FPC_SAME)
3600 		    break;
3601 	}
3602 	else
3603 	{
3604 	    filename = FALSE;
3605 	    if (len > 3 && lang[len - 3] == '_')
3606 	    {
3607 		region = lang + len - 2;
3608 		len -= 3;
3609 		lang[len] = NUL;
3610 	    }
3611 	    else
3612 		dont_use_region = TRUE;
3613 
3614 	    /* Check if we loaded this language before. */
3615 	    for (slang = first_lang; slang != NULL; slang = slang->sl_next)
3616 		if (STRICMP(lang, slang->sl_name) == 0)
3617 		    break;
3618 	}
3619 
3620 	if (region != NULL)
3621 	{
3622 	    /* If the region differs from what was used before then don't
3623 	     * use it for 'spellfile'. */
3624 	    if (use_region != NULL && STRCMP(region, use_region) != 0)
3625 		dont_use_region = TRUE;
3626 	    use_region = region;
3627 	}
3628 
3629 	/* If not found try loading the language now. */
3630 	if (slang == NULL)
3631 	{
3632 	    if (filename)
3633 		(void)spell_load_file(lang, lang, NULL, FALSE);
3634 	    else
3635 		spell_load_lang(lang);
3636 	}
3637 
3638 	/*
3639 	 * Loop over the languages, there can be several files for "lang".
3640 	 */
3641 	for (slang = first_lang; slang != NULL; slang = slang->sl_next)
3642 	    if (filename ? fullpathcmp(lang, slang->sl_fname, FALSE) == FPC_SAME
3643 			 : STRICMP(lang, slang->sl_name) == 0)
3644 	    {
3645 		region_mask = REGION_ALL;
3646 		if (!filename && region != NULL)
3647 		{
3648 		    /* find region in sl_regions */
3649 		    c = find_region(slang->sl_regions, region);
3650 		    if (c == REGION_ALL)
3651 		    {
3652 			if (slang->sl_add)
3653 			{
3654 			    if (*slang->sl_regions != NUL)
3655 				/* This addition file is for other regions. */
3656 				region_mask = 0;
3657 			}
3658 			else
3659 			    /* This is probably an error.  Give a warning and
3660 			     * accept the words anyway. */
3661 			    smsg((char_u *)
3662 				    _("Warning: region %s not supported"),
3663 								      region);
3664 		    }
3665 		    else
3666 			region_mask = 1 << c;
3667 		}
3668 
3669 		if (region_mask != 0)
3670 		{
3671 		    if (ga_grow(&ga, 1) == FAIL)
3672 		    {
3673 			ga_clear(&ga);
3674 			return e_outofmem;
3675 		    }
3676 		    LANGP_ENTRY(ga, ga.ga_len)->lp_slang = slang;
3677 		    LANGP_ENTRY(ga, ga.ga_len)->lp_region = region_mask;
3678 		    ++ga.ga_len;
3679 		    use_midword(slang, buf);
3680 		    if (slang->sl_nobreak)
3681 			nobreak = TRUE;
3682 		}
3683 	    }
3684     }
3685 
3686     /* round 0: load int_wordlist, if possible.
3687      * round 1: load first name in 'spellfile'.
3688      * round 2: load second name in 'spellfile.
3689      * etc. */
3690     spf = curbuf->b_p_spf;
3691     for (round = 0; round == 0 || *spf != NUL; ++round)
3692     {
3693 	if (round == 0)
3694 	{
3695 	    /* Internal wordlist, if there is one. */
3696 	    if (int_wordlist == NULL)
3697 		continue;
3698 	    int_wordlist_spl(spf_name);
3699 	}
3700 	else
3701 	{
3702 	    /* One entry in 'spellfile'. */
3703 	    copy_option_part(&spf, spf_name, MAXPATHL - 5, ",");
3704 	    STRCAT(spf_name, ".spl");
3705 
3706 	    /* If it was already found above then skip it. */
3707 	    for (c = 0; c < ga.ga_len; ++c)
3708 	    {
3709 		p = LANGP_ENTRY(ga, c)->lp_slang->sl_fname;
3710 		if (p != NULL && fullpathcmp(spf_name, p, FALSE) == FPC_SAME)
3711 		    break;
3712 	    }
3713 	    if (c < ga.ga_len)
3714 		continue;
3715 	}
3716 
3717 	/* Check if it was loaded already. */
3718 	for (slang = first_lang; slang != NULL; slang = slang->sl_next)
3719 	    if (fullpathcmp(spf_name, slang->sl_fname, FALSE) == FPC_SAME)
3720 		break;
3721 	if (slang == NULL)
3722 	{
3723 	    /* Not loaded, try loading it now.  The language name includes the
3724 	     * region name, the region is ignored otherwise.  for int_wordlist
3725 	     * use an arbitrary name. */
3726 	    if (round == 0)
3727 		STRCPY(lang, "internal wordlist");
3728 	    else
3729 	    {
3730 		vim_strncpy(lang, gettail(spf_name), MAXWLEN);
3731 		p = vim_strchr(lang, '.');
3732 		if (p != NULL)
3733 		    *p = NUL;	/* truncate at ".encoding.add" */
3734 	    }
3735 	    slang = spell_load_file(spf_name, lang, NULL, TRUE);
3736 
3737 	    /* If one of the languages has NOBREAK we assume the addition
3738 	     * files also have this. */
3739 	    if (slang != NULL && nobreak)
3740 		slang->sl_nobreak = TRUE;
3741 	}
3742 	if (slang != NULL && ga_grow(&ga, 1) == OK)
3743 	{
3744 	    region_mask = REGION_ALL;
3745 	    if (use_region != NULL && !dont_use_region)
3746 	    {
3747 		/* find region in sl_regions */
3748 		c = find_region(slang->sl_regions, use_region);
3749 		if (c != REGION_ALL)
3750 		    region_mask = 1 << c;
3751 		else if (*slang->sl_regions != NUL)
3752 		    /* This spell file is for other regions. */
3753 		    region_mask = 0;
3754 	    }
3755 
3756 	    if (region_mask != 0)
3757 	    {
3758 		LANGP_ENTRY(ga, ga.ga_len)->lp_slang = slang;
3759 		LANGP_ENTRY(ga, ga.ga_len)->lp_sallang = NULL;
3760 		LANGP_ENTRY(ga, ga.ga_len)->lp_replang = NULL;
3761 		LANGP_ENTRY(ga, ga.ga_len)->lp_region = region_mask;
3762 		++ga.ga_len;
3763 		use_midword(slang, buf);
3764 	    }
3765 	}
3766     }
3767 
3768     /* Everything is fine, store the new b_langp value. */
3769     ga_clear(&buf->b_langp);
3770     buf->b_langp = ga;
3771 
3772     /* For each language figure out what language to use for sound folding and
3773      * REP items.  If the language doesn't support it itself use another one
3774      * with the same name.  E.g. for "en-math" use "en". */
3775     for (i = 0; i < ga.ga_len; ++i)
3776     {
3777 	lp = LANGP_ENTRY(ga, i);
3778 
3779 	/* sound folding */
3780 	if (lp->lp_slang->sl_sal.ga_len > 0)
3781 	    /* language does sound folding itself */
3782 	    lp->lp_sallang = lp->lp_slang;
3783 	else
3784 	    /* find first similar language that does sound folding */
3785 	    for (j = 0; j < ga.ga_len; ++j)
3786 	    {
3787 		lp2 = LANGP_ENTRY(ga, j);
3788 		if (lp2->lp_slang->sl_sal.ga_len > 0
3789 			&& STRNCMP(lp->lp_slang->sl_name,
3790 					      lp2->lp_slang->sl_name, 2) == 0)
3791 		{
3792 		    lp->lp_sallang = lp2->lp_slang;
3793 		    break;
3794 		}
3795 	    }
3796 
3797 	/* REP items */
3798 	if (lp->lp_slang->sl_rep.ga_len > 0)
3799 	    /* language has REP items itself */
3800 	    lp->lp_replang = lp->lp_slang;
3801 	else
3802 	    /* find first similar language that does sound folding */
3803 	    for (j = 0; j < ga.ga_len; ++j)
3804 	    {
3805 		lp2 = LANGP_ENTRY(ga, j);
3806 		if (lp2->lp_slang->sl_rep.ga_len > 0
3807 			&& STRNCMP(lp->lp_slang->sl_name,
3808 					      lp2->lp_slang->sl_name, 2) == 0)
3809 		{
3810 		    lp->lp_replang = lp2->lp_slang;
3811 		    break;
3812 		}
3813 	    }
3814     }
3815 
3816     return NULL;
3817 }
3818 
3819 /*
3820  * Clear the midword characters for buffer "buf".
3821  */
3822     static void
3823 clear_midword(buf)
3824     buf_T	*buf;
3825 {
3826     vim_memset(buf->b_spell_ismw, 0, 256);
3827 #ifdef FEAT_MBYTE
3828     vim_free(buf->b_spell_ismw_mb);
3829     buf->b_spell_ismw_mb = NULL;
3830 #endif
3831 }
3832 
3833 /*
3834  * Use the "sl_midword" field of language "lp" for buffer "buf".
3835  * They add up to any currently used midword characters.
3836  */
3837     static void
3838 use_midword(lp, buf)
3839     slang_T	*lp;
3840     buf_T	*buf;
3841 {
3842     char_u	*p;
3843 
3844     if (lp->sl_midword == NULL)	    /* there aren't any */
3845 	return;
3846 
3847     for (p = lp->sl_midword; *p != NUL; )
3848 #ifdef FEAT_MBYTE
3849 	if (has_mbyte)
3850 	{
3851 	    int	    c, l, n;
3852 	    char_u  *bp;
3853 
3854 	    c = mb_ptr2char(p);
3855 	    l = (*mb_ptr2len)(p);
3856 	    if (c < 256 && l <= 2)
3857 		buf->b_spell_ismw[c] = TRUE;
3858 	    else if (buf->b_spell_ismw_mb == NULL)
3859 		/* First multi-byte char in "b_spell_ismw_mb". */
3860 		buf->b_spell_ismw_mb = vim_strnsave(p, l);
3861 	    else
3862 	    {
3863 		/* Append multi-byte chars to "b_spell_ismw_mb". */
3864 		n = STRLEN(buf->b_spell_ismw_mb);
3865 		bp = vim_strnsave(buf->b_spell_ismw_mb, n + l);
3866 		if (bp != NULL)
3867 		{
3868 		    vim_free(buf->b_spell_ismw_mb);
3869 		    buf->b_spell_ismw_mb = bp;
3870 		    vim_strncpy(bp + n, p, l);
3871 		}
3872 	    }
3873 	    p += l;
3874 	}
3875 	else
3876 #endif
3877 	    buf->b_spell_ismw[*p++] = TRUE;
3878 }
3879 
3880 /*
3881  * Find the region "region[2]" in "rp" (points to "sl_regions").
3882  * Each region is simply stored as the two characters of it's name.
3883  * Returns the index if found (first is 0), REGION_ALL if not found.
3884  */
3885     static int
3886 find_region(rp, region)
3887     char_u	*rp;
3888     char_u	*region;
3889 {
3890     int		i;
3891 
3892     for (i = 0; ; i += 2)
3893     {
3894 	if (rp[i] == NUL)
3895 	    return REGION_ALL;
3896 	if (rp[i] == region[0] && rp[i + 1] == region[1])
3897 	    break;
3898     }
3899     return i / 2;
3900 }
3901 
3902 /*
3903  * Return case type of word:
3904  * w word	0
3905  * Word		WF_ONECAP
3906  * W WORD	WF_ALLCAP
3907  * WoRd	wOrd	WF_KEEPCAP
3908  */
3909     static int
3910 captype(word, end)
3911     char_u	*word;
3912     char_u	*end;	    /* When NULL use up to NUL byte. */
3913 {
3914     char_u	*p;
3915     int		c;
3916     int		firstcap;
3917     int		allcap;
3918     int		past_second = FALSE;	/* past second word char */
3919 
3920     /* find first letter */
3921     for (p = word; !spell_iswordp_nmw(p); mb_ptr_adv(p))
3922 	if (end == NULL ? *p == NUL : p >= end)
3923 	    return 0;	    /* only non-word characters, illegal word */
3924 #ifdef FEAT_MBYTE
3925     if (has_mbyte)
3926 	c = mb_ptr2char_adv(&p);
3927     else
3928 #endif
3929 	c = *p++;
3930     firstcap = allcap = SPELL_ISUPPER(c);
3931 
3932     /*
3933      * Need to check all letters to find a word with mixed upper/lower.
3934      * But a word with an upper char only at start is a ONECAP.
3935      */
3936     for ( ; end == NULL ? *p != NUL : p < end; mb_ptr_adv(p))
3937 	if (spell_iswordp_nmw(p))
3938 	{
3939 	    c = PTR2CHAR(p);
3940 	    if (!SPELL_ISUPPER(c))
3941 	    {
3942 		/* UUl -> KEEPCAP */
3943 		if (past_second && allcap)
3944 		    return WF_KEEPCAP;
3945 		allcap = FALSE;
3946 	    }
3947 	    else if (!allcap)
3948 		/* UlU -> KEEPCAP */
3949 		return WF_KEEPCAP;
3950 	    past_second = TRUE;
3951 	}
3952 
3953     if (allcap)
3954 	return WF_ALLCAP;
3955     if (firstcap)
3956 	return WF_ONECAP;
3957     return 0;
3958 }
3959 
3960 /*
3961  * Like captype() but for a KEEPCAP word add ONECAP if the word starts with a
3962  * capital.  So that make_case_word() can turn WOrd into Word.
3963  * Add ALLCAP for "WOrD".
3964  */
3965     static int
3966 badword_captype(word, end)
3967     char_u	*word;
3968     char_u	*end;
3969 {
3970     int		flags = captype(word, end);
3971     int		c;
3972     int		l, u;
3973     int		first;
3974     char_u	*p;
3975 
3976     if (flags & WF_KEEPCAP)
3977     {
3978 	/* Count the number of UPPER and lower case letters. */
3979 	l = u = 0;
3980 	first = FALSE;
3981 	for (p = word; p < end; mb_ptr_adv(p))
3982 	{
3983 	    c = PTR2CHAR(p);
3984 	    if (SPELL_ISUPPER(c))
3985 	    {
3986 		++u;
3987 		if (p == word)
3988 		    first = TRUE;
3989 	    }
3990 	    else
3991 		++l;
3992 	}
3993 
3994 	/* If there are more UPPER than lower case letters suggest an
3995 	 * ALLCAP word.  Otherwise, if the first letter is UPPER then
3996 	 * suggest ONECAP.  Exception: "ALl" most likely should be "All",
3997 	 * require three upper case letters. */
3998 	if (u > l && u > 2)
3999 	    flags |= WF_ALLCAP;
4000 	else if (first)
4001 	    flags |= WF_ONECAP;
4002     }
4003     return flags;
4004 }
4005 
4006 # if defined(FEAT_MBYTE) || defined(EXITFREE) || defined(PROTO)
4007 /*
4008  * Free all languages.
4009  */
4010     void
4011 spell_free_all()
4012 {
4013     slang_T	*slang;
4014     buf_T	*buf;
4015     char_u	fname[MAXPATHL];
4016 
4017     /* Go through all buffers and handle 'spelllang'. */
4018     for (buf = firstbuf; buf != NULL; buf = buf->b_next)
4019 	ga_clear(&buf->b_langp);
4020 
4021     while (first_lang != NULL)
4022     {
4023 	slang = first_lang;
4024 	first_lang = slang->sl_next;
4025 	slang_free(slang);
4026     }
4027 
4028     if (int_wordlist != NULL)
4029     {
4030 	/* Delete the internal wordlist and its .spl file */
4031 	mch_remove(int_wordlist);
4032 	int_wordlist_spl(fname);
4033 	mch_remove(fname);
4034 	vim_free(int_wordlist);
4035 	int_wordlist = NULL;
4036     }
4037 
4038     init_spell_chartab();
4039 }
4040 # endif
4041 
4042 # if defined(FEAT_MBYTE) || defined(PROTO)
4043 /*
4044  * Clear all spelling tables and reload them.
4045  * Used after 'encoding' is set and when ":mkspell" was used.
4046  */
4047     void
4048 spell_reload()
4049 {
4050     buf_T	*buf;
4051     win_T	*wp;
4052 
4053     /* Initialize the table for spell_iswordp(). */
4054     init_spell_chartab();
4055 
4056     /* Unload all allocated memory. */
4057     spell_free_all();
4058 
4059     /* Go through all buffers and handle 'spelllang'. */
4060     for (buf = firstbuf; buf != NULL; buf = buf->b_next)
4061     {
4062 	/* Only load the wordlists when 'spelllang' is set and there is a
4063 	 * window for this buffer in which 'spell' is set. */
4064 	if (*buf->b_p_spl != NUL)
4065 	{
4066 	    FOR_ALL_WINDOWS(wp)
4067 		if (wp->w_buffer == buf && wp->w_p_spell)
4068 		{
4069 		    (void)did_set_spelllang(buf);
4070 # ifdef FEAT_WINDOWS
4071 		    break;
4072 # endif
4073 		}
4074 	}
4075     }
4076 }
4077 # endif
4078 
4079 /*
4080  * Reload the spell file "fname" if it's loaded.
4081  */
4082     static void
4083 spell_reload_one(fname, added_word)
4084     char_u	*fname;
4085     int		added_word;	/* invoked through "zg" */
4086 {
4087     slang_T	*slang;
4088     int		didit = FALSE;
4089 
4090     for (slang = first_lang; slang != NULL; slang = slang->sl_next)
4091     {
4092 	if (fullpathcmp(fname, slang->sl_fname, FALSE) == FPC_SAME)
4093 	{
4094 	    slang_clear(slang);
4095 	    if (spell_load_file(fname, NULL, slang, FALSE) == NULL)
4096 		/* reloading failed, clear the language */
4097 		slang_clear(slang);
4098 	    redraw_all_later(NOT_VALID);
4099 	    didit = TRUE;
4100 	}
4101     }
4102 
4103     /* When "zg" was used and the file wasn't loaded yet, should redo
4104      * 'spelllang' to get it loaded. */
4105     if (added_word && !didit)
4106 	did_set_spelllang(curbuf);
4107 }
4108 
4109 
4110 /*
4111  * Functions for ":mkspell".
4112  */
4113 
4114 #define MAXLINELEN  500		/* Maximum length in bytes of a line in a .aff
4115 				   and .dic file. */
4116 /*
4117  * Main structure to store the contents of a ".aff" file.
4118  */
4119 typedef struct afffile_S
4120 {
4121     char_u	*af_enc;	/* "SET", normalized, alloc'ed string or NULL */
4122     int		af_flagtype;	/* AFT_CHAR, AFT_LONG, AFT_NUM or AFT_CAPLONG */
4123     int		af_slash;	/* character used in word for slash */
4124     unsigned	af_rar;		/* RAR ID for rare word */
4125     unsigned	af_kep;		/* KEP ID for keep-case word */
4126     unsigned	af_bad;		/* BAD ID for banned word */
4127     unsigned	af_needaffix;	/* NEEDAFFIX ID */
4128     unsigned	af_needcomp;	/* NEEDCOMPOUND ID */
4129     int		af_pfxpostpone;	/* postpone prefixes without chop string */
4130     hashtab_T	af_pref;	/* hashtable for prefixes, affheader_T */
4131     hashtab_T	af_suff;	/* hashtable for suffixes, affheader_T */
4132     hashtab_T	af_comp;	/* hashtable for compound flags, compitem_T */
4133 } afffile_T;
4134 
4135 #define AFT_CHAR	0	/* flags are one character */
4136 #define AFT_LONG	1	/* flags are two characters */
4137 #define AFT_CAPLONG	2	/* flags are one or two characters */
4138 #define AFT_NUM		3	/* flags are numbers, comma separated */
4139 
4140 typedef struct affentry_S affentry_T;
4141 /* Affix entry from ".aff" file.  Used for prefixes and suffixes. */
4142 struct affentry_S
4143 {
4144     affentry_T	*ae_next;	/* next affix with same name/number */
4145     char_u	*ae_chop;	/* text to chop off basic word (can be NULL) */
4146     char_u	*ae_add;	/* text to add to basic word (can be NULL) */
4147     char_u	*ae_cond;	/* condition (NULL for ".") */
4148     regprog_T	*ae_prog;	/* regexp program for ae_cond or NULL */
4149     char_u	ae_rare;	/* rare affix */
4150     char_u	ae_nocomp;	/* word with affix not compoundable */
4151 };
4152 
4153 #ifdef FEAT_MBYTE
4154 # define AH_KEY_LEN 17		/* 2 x 8 bytes + NUL */
4155 #else
4156 # define AH_KEY_LEN 7		/* 6 digits + NUL */
4157 #endif
4158 
4159 /* Affix header from ".aff" file.  Used for af_pref and af_suff. */
4160 typedef struct affheader_S
4161 {
4162     char_u	ah_key[AH_KEY_LEN]; /* key for hashtab == name of affix */
4163     unsigned	ah_flag;	/* affix name as number, uses "af_flagtype" */
4164     int		ah_newID;	/* prefix ID after renumbering; 0 if not used */
4165     int		ah_combine;	/* suffix may combine with prefix */
4166     int		ah_follows;	/* another affix block should be following */
4167     affentry_T	*ah_first;	/* first affix entry */
4168 } affheader_T;
4169 
4170 #define HI2AH(hi)   ((affheader_T *)(hi)->hi_key)
4171 
4172 /* Flag used in compound items. */
4173 typedef struct compitem_S
4174 {
4175     char_u	ci_key[AH_KEY_LEN]; /* key for hashtab == name of compound */
4176     unsigned	ci_flag;	/* affix name as number, uses "af_flagtype" */
4177     int		ci_newID;	/* affix ID after renumbering. */
4178 } compitem_T;
4179 
4180 #define HI2CI(hi)   ((compitem_T *)(hi)->hi_key)
4181 
4182 /*
4183  * Structure that is used to store the items in the word tree.  This avoids
4184  * the need to keep track of each allocated thing, everything is freed all at
4185  * once after ":mkspell" is done.
4186  */
4187 #define  SBLOCKSIZE 16000	/* size of sb_data */
4188 typedef struct sblock_S sblock_T;
4189 struct sblock_S
4190 {
4191     sblock_T	*sb_next;	/* next block in list */
4192     int		sb_used;	/* nr of bytes already in use */
4193     char_u	sb_data[1];	/* data, actually longer */
4194 };
4195 
4196 /*
4197  * A node in the tree.
4198  */
4199 typedef struct wordnode_S wordnode_T;
4200 struct wordnode_S
4201 {
4202     union   /* shared to save space */
4203     {
4204 	char_u	hashkey[6];	/* the hash key, only used while compressing */
4205 	int	index;		/* index in written nodes (valid after first
4206 				   round) */
4207     } wn_u1;
4208     union   /* shared to save space */
4209     {
4210 	wordnode_T *next;	/* next node with same hash key */
4211 	wordnode_T *wnode;	/* parent node that will write this node */
4212     } wn_u2;
4213     wordnode_T	*wn_child;	/* child (next byte in word) */
4214     wordnode_T  *wn_sibling;	/* next sibling (alternate byte in word,
4215 				   always sorted) */
4216     int		wn_refs;	/* Nr. of references to this node.  Only
4217 				   relevant for first node in a list of
4218 				   siblings, in following siblings it is
4219 				   always one. */
4220     char_u	wn_byte;	/* Byte for this node. NUL for word end */
4221     char_u	wn_affixID;	/* when "wn_byte" is NUL: supported/required
4222 				   prefix ID or 0 */
4223     short_u	wn_flags;	/* when "wn_byte" is NUL: WF_ flags */
4224     short	wn_region;	/* when "wn_byte" is NUL: region mask; for
4225 				   PREFIXTREE it's the prefcondnr */
4226 #ifdef SPELL_PRINTTREE
4227     int		wn_nr;		/* sequence nr for printing */
4228 #endif
4229 };
4230 
4231 #define WN_MASK	 0xffff		/* mask relevant bits of "wn_flags" */
4232 
4233 #define HI2WN(hi)    (wordnode_T *)((hi)->hi_key)
4234 
4235 /*
4236  * Info used while reading the spell files.
4237  */
4238 typedef struct spellinfo_S
4239 {
4240     wordnode_T	*si_foldroot;	/* tree with case-folded words */
4241     long	si_foldwcount;	/* nr of words in si_foldroot */
4242 
4243     wordnode_T	*si_keeproot;	/* tree with keep-case words */
4244     long	si_keepwcount;	/* nr of words in si_keeproot */
4245 
4246     wordnode_T	*si_prefroot;	/* tree with postponed prefixes */
4247 
4248     sblock_T	*si_blocks;	/* memory blocks used */
4249     long	si_blocks_cnt;	/* memory blocks allocated */
4250     long	si_compress_cnt;    /* words to add before lowering
4251 				       compression limit */
4252     wordnode_T	*si_first_free; /* List of nodes that have been freed during
4253 				   compression, linked by "wn_child" field. */
4254     long	si_free_count;	/* number of nodes in si_first_free */
4255 #ifdef SPELL_PRINTTREE
4256     int		si_wordnode_nr;	/* sequence nr for nodes */
4257 #endif
4258 
4259 
4260     int		si_ascii;	/* handling only ASCII words */
4261     int		si_add;		/* addition file */
4262     int		si_clear_chartab;   /* when TRUE clear char tables */
4263     int		si_region;	/* region mask */
4264     vimconv_T	si_conv;	/* for conversion to 'encoding' */
4265     int		si_memtot;	/* runtime memory used */
4266     int		si_verbose;	/* verbose messages */
4267     int		si_msg_count;	/* number of words added since last message */
4268     int		si_region_count; /* number of regions supported (1 when there
4269 				    are no regions) */
4270     char_u	si_region_name[16]; /* region names; used only if
4271 				     * si_region_count > 1) */
4272 
4273     garray_T	si_rep;		/* list of fromto_T entries from REP lines */
4274     garray_T	si_sal;		/* list of fromto_T entries from SAL lines */
4275     char_u	*si_sofofr;	/* SOFOFROM text */
4276     char_u	*si_sofoto;	/* SOFOTO text */
4277     int		si_followup;	/* soundsalike: ? */
4278     int		si_collapse;	/* soundsalike: ? */
4279     int		si_rem_accents;	/* soundsalike: remove accents */
4280     garray_T	si_map;		/* MAP info concatenated */
4281     char_u	*si_midword;	/* MIDWORD chars or NULL  */
4282     int		si_compmax;	/* max nr of words for compounding */
4283     int		si_compminlen;	/* minimal length for compounding */
4284     int		si_compsylmax;	/* max nr of syllables for compounding */
4285     char_u	*si_compflags;	/* flags used for compounding */
4286     char_u	si_nobreak;	/* NOBREAK */
4287     char_u	*si_syllable;	/* syllable string */
4288     garray_T	si_prefcond;	/* table with conditions for postponed
4289 				 * prefixes, each stored as a string */
4290     int		si_newprefID;	/* current value for ah_newID */
4291     int		si_newcompID;	/* current value for compound ID */
4292 } spellinfo_T;
4293 
4294 static afffile_T *spell_read_aff __ARGS((spellinfo_T *spin, char_u *fname));
4295 static unsigned affitem2flag __ARGS((int flagtype, char_u *item, char_u	*fname, int lnum));
4296 static unsigned get_affitem __ARGS((int flagtype, char_u **pp));
4297 static void process_compflags __ARGS((spellinfo_T *spin, afffile_T *aff, char_u *compflags));
4298 static void check_renumber __ARGS((spellinfo_T *spin));
4299 static int flag_in_afflist __ARGS((int flagtype, char_u *afflist, unsigned flag));
4300 static void aff_check_number __ARGS((int spinval, int affval, char *name));
4301 static void aff_check_string __ARGS((char_u *spinval, char_u *affval, char *name));
4302 static int str_equal __ARGS((char_u *s1, char_u	*s2));
4303 static void add_fromto __ARGS((spellinfo_T *spin, garray_T *gap, char_u	*from, char_u *to));
4304 static int sal_to_bool __ARGS((char_u *s));
4305 static int has_non_ascii __ARGS((char_u *s));
4306 static void spell_free_aff __ARGS((afffile_T *aff));
4307 static int spell_read_dic __ARGS((spellinfo_T *spin, char_u *fname, afffile_T *affile));
4308 static int get_pfxlist __ARGS((afffile_T *affile, char_u *afflist, char_u *store_afflist));
4309 static void get_compflags __ARGS((afffile_T *affile, char_u *afflist, char_u *store_afflist));
4310 static int store_aff_word __ARGS((spellinfo_T *spin, char_u *word, char_u *afflist, afffile_T *affile, hashtab_T *ht, hashtab_T *xht, int comb, int flags, char_u *pfxlist, int pfxlen));
4311 static int spell_read_wordfile __ARGS((spellinfo_T *spin, char_u *fname));
4312 static void *getroom __ARGS((spellinfo_T *spin, size_t len, int align));
4313 static char_u *getroom_save __ARGS((spellinfo_T *spin, char_u *s));
4314 static void free_blocks __ARGS((sblock_T *bl));
4315 static wordnode_T *wordtree_alloc __ARGS((spellinfo_T *spin));
4316 static int store_word __ARGS((spellinfo_T *spin, char_u *word, int flags, int region, char_u *pfxlist, int need_affix));
4317 static int tree_add_word __ARGS((spellinfo_T *spin, char_u *word, wordnode_T *tree, int flags, int region, int affixID));
4318 static wordnode_T *get_wordnode __ARGS((spellinfo_T *spin));
4319 static void deref_wordnode __ARGS((spellinfo_T *spin, wordnode_T *node));
4320 static void free_wordnode __ARGS((spellinfo_T *spin, wordnode_T *n));
4321 static void wordtree_compress __ARGS((spellinfo_T *spin, wordnode_T *root));
4322 static int node_compress __ARGS((spellinfo_T *spin, wordnode_T *node, hashtab_T *ht, int *tot));
4323 static int node_equal __ARGS((wordnode_T *n1, wordnode_T *n2));
4324 static int write_vim_spell __ARGS((spellinfo_T *spin, char_u *fname));
4325 static void clear_node __ARGS((wordnode_T *node));
4326 static int put_node __ARGS((FILE *fd, wordnode_T *node, int index, int regionmask, int prefixtree));
4327 static void mkspell __ARGS((int fcount, char_u **fnames, int ascii, int overwrite, int added_word));
4328 static void init_spellfile __ARGS((void));
4329 
4330 /* In the postponed prefixes tree wn_flags is used to store the WFP_ flags,
4331  * but it must be negative to indicate the prefix tree to tree_add_word().
4332  * Use a negative number with the lower 8 bits zero. */
4333 #define PFX_FLAGS	-256
4334 
4335 /*
4336  * Tunable parameters for when the tree is compressed.  See 'mkspellmem'.
4337  */
4338 static long compress_start = 30000;	/* memory / SBLOCKSIZE */
4339 static long compress_inc = 100;		/* memory / SBLOCKSIZE */
4340 static long compress_added = 500000;	/* word count */
4341 
4342 #ifdef SPELL_PRINTTREE
4343 /*
4344  * For debugging the tree code: print the current tree in a (more or less)
4345  * readable format, so that we can see what happens when adding a word and/or
4346  * compressing the tree.
4347  * Based on code from Olaf Seibert.
4348  */
4349 #define PRINTLINESIZE	1000
4350 #define PRINTWIDTH	6
4351 
4352 #define PRINTSOME(l, depth, fmt, a1, a2) vim_snprintf(l + depth * PRINTWIDTH, \
4353 	    PRINTLINESIZE - PRINTWIDTH * depth, fmt, a1, a2)
4354 
4355 static char line1[PRINTLINESIZE];
4356 static char line2[PRINTLINESIZE];
4357 static char line3[PRINTLINESIZE];
4358 
4359     static void
4360 spell_clear_flags(wordnode_T *node)
4361 {
4362     wordnode_T	*np;
4363 
4364     for (np = node; np != NULL; np = np->wn_sibling)
4365     {
4366 	np->wn_u1.index = FALSE;
4367 	spell_clear_flags(np->wn_child);
4368     }
4369 }
4370 
4371     static void
4372 spell_print_node(wordnode_T *node, int depth)
4373 {
4374     if (node->wn_u1.index)
4375     {
4376 	/* Done this node before, print the reference. */
4377 	PRINTSOME(line1, depth, "(%d)", node->wn_nr, 0);
4378 	PRINTSOME(line2, depth, "    ", 0, 0);
4379 	PRINTSOME(line3, depth, "    ", 0, 0);
4380 	msg(line1);
4381 	msg(line2);
4382 	msg(line3);
4383     }
4384     else
4385     {
4386 	node->wn_u1.index = TRUE;
4387 
4388 	if (node->wn_byte != NUL)
4389 	{
4390 	    if (node->wn_child != NULL)
4391 		PRINTSOME(line1, depth, " %c -> ", node->wn_byte, 0);
4392 	    else
4393 		/* Cannot happen? */
4394 		PRINTSOME(line1, depth, " %c ???", node->wn_byte, 0);
4395 	}
4396 	else
4397 	    PRINTSOME(line1, depth, " $    ", 0, 0);
4398 
4399 	PRINTSOME(line2, depth, "%d/%d    ", node->wn_nr, node->wn_refs);
4400 
4401 	if (node->wn_sibling != NULL)
4402 	    PRINTSOME(line3, depth, " |    ", 0, 0);
4403 	else
4404 	    PRINTSOME(line3, depth, "      ", 0, 0);
4405 
4406 	if (node->wn_byte == NUL)
4407 	{
4408 	    msg(line1);
4409 	    msg(line2);
4410 	    msg(line3);
4411 	}
4412 
4413 	/* do the children */
4414 	if (node->wn_byte != NUL && node->wn_child != NULL)
4415 	    spell_print_node(node->wn_child, depth + 1);
4416 
4417 	/* do the siblings */
4418 	if (node->wn_sibling != NULL)
4419 	{
4420 	    /* get rid of all parent details except | */
4421 	    STRCPY(line1, line3);
4422 	    STRCPY(line2, line3);
4423 	    spell_print_node(node->wn_sibling, depth);
4424 	}
4425     }
4426 }
4427 
4428     static void
4429 spell_print_tree(wordnode_T *root)
4430 {
4431     if (root != NULL)
4432     {
4433 	/* Clear the "wn_u1.index" fields, used to remember what has been
4434 	 * done. */
4435 	spell_clear_flags(root);
4436 
4437 	/* Recursively print the tree. */
4438 	spell_print_node(root, 0);
4439     }
4440 }
4441 #endif /* SPELL_PRINTTREE */
4442 
4443 /*
4444  * Read the affix file "fname".
4445  * Returns an afffile_T, NULL for complete failure.
4446  */
4447     static afffile_T *
4448 spell_read_aff(spin, fname)
4449     spellinfo_T	*spin;
4450     char_u	*fname;
4451 {
4452     FILE	*fd;
4453     afffile_T	*aff;
4454     char_u	rline[MAXLINELEN];
4455     char_u	*line;
4456     char_u	*pc = NULL;
4457 #define MAXITEMCNT  7
4458     char_u	*(items[MAXITEMCNT]);
4459     int		itemcnt;
4460     char_u	*p;
4461     int		lnum = 0;
4462     affheader_T	*cur_aff = NULL;
4463     int		did_postpone_prefix = FALSE;
4464     int		aff_todo = 0;
4465     hashtab_T	*tp;
4466     char_u	*low = NULL;
4467     char_u	*fol = NULL;
4468     char_u	*upp = NULL;
4469     int		do_rep;
4470     int		do_sal;
4471     int		do_map;
4472     int		found_map = FALSE;
4473     hashitem_T	*hi;
4474     int		l;
4475     int		compminlen = 0;		/* COMPOUNDMIN value */
4476     int		compsylmax = 0;		/* COMPOUNDSYLMAX value */
4477     int		compmax = 0;		/* COMPOUNDMAX value */
4478     char_u	*compflags = NULL;	/* COMPOUNDFLAG and COMPOUNDFLAGS
4479 					   concatenated */
4480     char_u	*midword = NULL;	/* MIDWORD value */
4481     char_u	*syllable = NULL;	/* SYLLABLE value */
4482     char_u	*sofofrom = NULL;	/* SOFOFROM value */
4483     char_u	*sofoto = NULL;		/* SOFOTO value */
4484 
4485     /*
4486      * Open the file.
4487      */
4488     fd = mch_fopen((char *)fname, "r");
4489     if (fd == NULL)
4490     {
4491 	EMSG2(_(e_notopen), fname);
4492 	return NULL;
4493     }
4494 
4495     if (spin->si_verbose || p_verbose > 2)
4496     {
4497 	if (!spin->si_verbose)
4498 	    verbose_enter();
4499 	smsg((char_u *)_("Reading affix file %s ..."), fname);
4500 	out_flush();
4501 	if (!spin->si_verbose)
4502 	    verbose_leave();
4503     }
4504 
4505     /* Only do REP lines when not done in another .aff file already. */
4506     do_rep = spin->si_rep.ga_len == 0;
4507 
4508     /* Only do SAL lines when not done in another .aff file already. */
4509     do_sal = spin->si_sal.ga_len == 0;
4510 
4511     /* Only do MAP lines when not done in another .aff file already. */
4512     do_map = spin->si_map.ga_len == 0;
4513 
4514     /*
4515      * Allocate and init the afffile_T structure.
4516      */
4517     aff = (afffile_T *)getroom(spin, sizeof(afffile_T), TRUE);
4518     if (aff == NULL)
4519 	return NULL;
4520     hash_init(&aff->af_pref);
4521     hash_init(&aff->af_suff);
4522     hash_init(&aff->af_comp);
4523 
4524     /*
4525      * Read all the lines in the file one by one.
4526      */
4527     while (!vim_fgets(rline, MAXLINELEN, fd) && !got_int)
4528     {
4529 	line_breakcheck();
4530 	++lnum;
4531 
4532 	/* Skip comment lines. */
4533 	if (*rline == '#')
4534 	    continue;
4535 
4536 	/* Convert from "SET" to 'encoding' when needed. */
4537 	vim_free(pc);
4538 #ifdef FEAT_MBYTE
4539 	if (spin->si_conv.vc_type != CONV_NONE)
4540 	{
4541 	    pc = string_convert(&spin->si_conv, rline, NULL);
4542 	    if (pc == NULL)
4543 	    {
4544 		smsg((char_u *)_("Conversion failure for word in %s line %d: %s"),
4545 							   fname, lnum, rline);
4546 		continue;
4547 	    }
4548 	    line = pc;
4549 	}
4550 	else
4551 #endif
4552 	{
4553 	    pc = NULL;
4554 	    line = rline;
4555 	}
4556 
4557 	/* Split the line up in white separated items.  Put a NUL after each
4558 	 * item. */
4559 	itemcnt = 0;
4560 	for (p = line; ; )
4561 	{
4562 	    while (*p != NUL && *p <= ' ')  /* skip white space and CR/NL */
4563 		++p;
4564 	    if (*p == NUL)
4565 		break;
4566 	    if (itemcnt == MAXITEMCNT)	    /* too many items */
4567 		break;
4568 	    items[itemcnt++] = p;
4569 	    while (*p > ' ')	    /* skip until white space or CR/NL */
4570 		++p;
4571 	    if (*p == NUL)
4572 		break;
4573 	    *p++ = NUL;
4574 	}
4575 
4576 	/* Handle non-empty lines. */
4577 	if (itemcnt > 0)
4578 	{
4579 	    if (STRCMP(items[0], "SET") == 0 && itemcnt == 2
4580 						       && aff->af_enc == NULL)
4581 	    {
4582 #ifdef FEAT_MBYTE
4583 		/* Setup for conversion from "ENC" to 'encoding'. */
4584 		aff->af_enc = enc_canonize(items[1]);
4585 		if (aff->af_enc != NULL && !spin->si_ascii
4586 			&& convert_setup(&spin->si_conv, aff->af_enc,
4587 							       p_enc) == FAIL)
4588 		    smsg((char_u *)_("Conversion in %s not supported: from %s to %s"),
4589 					       fname, aff->af_enc, p_enc);
4590 		spin->si_conv.vc_fail = TRUE;
4591 #else
4592 		    smsg((char_u *)_("Conversion in %s not supported"), fname);
4593 #endif
4594 	    }
4595 	    else if (STRCMP(items[0], "FLAG") == 0 && itemcnt == 2
4596 					      && aff->af_flagtype == AFT_CHAR)
4597 	    {
4598 		if (STRCMP(items[1], "long") == 0)
4599 		    aff->af_flagtype = AFT_LONG;
4600 		else if (STRCMP(items[1], "num") == 0)
4601 		    aff->af_flagtype = AFT_NUM;
4602 		else if (STRCMP(items[1], "caplong") == 0)
4603 		    aff->af_flagtype = AFT_CAPLONG;
4604 		else
4605 		    smsg((char_u *)_("Invalid value for FLAG in %s line %d: %s"),
4606 			    fname, lnum, items[1]);
4607 		if (aff->af_rar != 0 || aff->af_kep != 0 || aff->af_bad != 0
4608 			|| aff->af_needaffix != 0
4609 			|| aff->af_needcomp != 0
4610 			|| compflags != NULL
4611 			|| aff->af_suff.ht_used > 0
4612 			|| aff->af_pref.ht_used > 0)
4613 		    smsg((char_u *)_("FLAG after using flags in %s line %d: %s"),
4614 			    fname, lnum, items[1]);
4615 	    }
4616 	    else if (STRCMP(items[0], "MIDWORD") == 0 && itemcnt == 2
4617 							   && midword == NULL)
4618 	    {
4619 		midword = getroom_save(spin, items[1]);
4620 	    }
4621 	    else if (STRCMP(items[0], "NOSPLITSUGS") == 0 && itemcnt == 1)
4622 	    {
4623 		/* ignored, we always split */
4624 	    }
4625 	    else if (STRCMP(items[0], "TRY") == 0 && itemcnt == 2)
4626 	    {
4627 		/* ignored, we look in the tree for what chars may appear */
4628 	    }
4629 	    else if (STRCMP(items[0], "SLASH") == 0 && itemcnt == 2
4630 							&& aff->af_slash == 0)
4631 	    {
4632 		aff->af_slash = items[1][0];
4633 		if (items[1][1] != NUL)
4634 		    smsg((char_u *)_("Character used for SLASH must be ASCII; in %s line %d: %s"),
4635 			    fname, lnum, items[1]);
4636 	    }
4637 	    else if (STRCMP(items[0], "RAR") == 0 && itemcnt == 2
4638 						       && aff->af_rar == 0)
4639 	    {
4640 		aff->af_rar = affitem2flag(aff->af_flagtype, items[1],
4641 								 fname, lnum);
4642 	    }
4643 	    else if (STRCMP(items[0], "KEP") == 0 && itemcnt == 2
4644 						       && aff->af_kep == 0)
4645 	    {
4646 		aff->af_kep = affitem2flag(aff->af_flagtype, items[1],
4647 								 fname, lnum);
4648 	    }
4649 	    else if (STRCMP(items[0], "BAD") == 0 && itemcnt == 2
4650 						       && aff->af_bad == 0)
4651 	    {
4652 		aff->af_bad = affitem2flag(aff->af_flagtype, items[1],
4653 								 fname, lnum);
4654 	    }
4655 	    else if (STRCMP(items[0], "NEEDAFFIX") == 0 && itemcnt == 2
4656 						    && aff->af_needaffix == 0)
4657 	    {
4658 		aff->af_needaffix = affitem2flag(aff->af_flagtype, items[1],
4659 								 fname, lnum);
4660 	    }
4661 	    else if (STRCMP(items[0], "NEEDCOMPOUND") == 0 && itemcnt == 2
4662 						     && aff->af_needcomp == 0)
4663 	    {
4664 		aff->af_needcomp = affitem2flag(aff->af_flagtype, items[1],
4665 								 fname, lnum);
4666 	    }
4667 	    else if (STRCMP(items[0], "COMPOUNDFLAG") == 0 && itemcnt == 2
4668 							 && compflags == NULL)
4669 	    {
4670 		/* Turn flag "c" into COMPOUNDFLAGS compatible string "c+",
4671 		 * "Na" into "Na+", "1234" into "1234+". */
4672 		p = getroom(spin, STRLEN(items[1]) + 2, FALSE);
4673 		if (p != NULL)
4674 		{
4675 		    STRCPY(p, items[1]);
4676 		    STRCAT(p, "+");
4677 		    compflags = p;
4678 		}
4679 	    }
4680 	    else if (STRCMP(items[0], "COMPOUNDFLAGS") == 0 && itemcnt == 2)
4681 	    {
4682 		/* Concatenate this string to previously defined ones, using a
4683 		 * slash to separate them. */
4684 		l = STRLEN(items[1]) + 1;
4685 		if (compflags != NULL)
4686 		    l += STRLEN(compflags) + 1;
4687 		p = getroom(spin, l, FALSE);
4688 		if (p != NULL)
4689 		{
4690 		    if (compflags != NULL)
4691 		    {
4692 			STRCPY(p, compflags);
4693 			STRCAT(p, "/");
4694 		    }
4695 		    STRCAT(p, items[1]);
4696 		    compflags = p;
4697 		}
4698 	    }
4699 	    else if (STRCMP(items[0], "COMPOUNDMAX") == 0 && itemcnt == 2
4700 							      && compmax == 0)
4701 	    {
4702 		compmax = atoi((char *)items[1]);
4703 		if (compmax == 0)
4704 		    smsg((char_u *)_("Wrong COMPOUNDMAX value in %s line %d: %s"),
4705 						       fname, lnum, items[1]);
4706 	    }
4707 	    else if (STRCMP(items[0], "COMPOUNDMIN") == 0 && itemcnt == 2
4708 							   && compminlen == 0)
4709 	    {
4710 		compminlen = atoi((char *)items[1]);
4711 		if (compminlen == 0)
4712 		    smsg((char_u *)_("Wrong COMPOUNDMIN value in %s line %d: %s"),
4713 						       fname, lnum, items[1]);
4714 	    }
4715 	    else if (STRCMP(items[0], "COMPOUNDSYLMAX") == 0 && itemcnt == 2
4716 							   && compsylmax == 0)
4717 	    {
4718 		compsylmax = atoi((char *)items[1]);
4719 		if (compsylmax == 0)
4720 		    smsg((char_u *)_("Wrong COMPOUNDSYLMAX value in %s line %d: %s"),
4721 						       fname, lnum, items[1]);
4722 	    }
4723 	    else if (STRCMP(items[0], "SYLLABLE") == 0 && itemcnt == 2
4724 							  && syllable == NULL)
4725 	    {
4726 		syllable = getroom_save(spin, items[1]);
4727 	    }
4728 	    else if (STRCMP(items[0], "NOBREAK") == 0 && itemcnt == 1)
4729 	    {
4730 		spin->si_nobreak = TRUE;
4731 	    }
4732 	    else if (STRCMP(items[0], "PFXPOSTPONE") == 0 && itemcnt == 1)
4733 	    {
4734 		aff->af_pfxpostpone = TRUE;
4735 	    }
4736 	    else if ((STRCMP(items[0], "PFX") == 0
4737 					      || STRCMP(items[0], "SFX") == 0)
4738 		    && aff_todo == 0
4739 		    && itemcnt >= 4)
4740 	    {
4741 		int	lasti = 4;
4742 		char_u	key[AH_KEY_LEN];
4743 
4744 		if (*items[0] == 'P')
4745 		    tp = &aff->af_pref;
4746 		else
4747 		    tp = &aff->af_suff;
4748 
4749 		/* Myspell allows the same affix name to be used multiple
4750 		 * times.  The affix files that do this have an undocumented
4751 		 * "S" flag on all but the last block, thus we check for that
4752 		 * and store it in ah_follows. */
4753 		vim_strncpy(key, items[1], AH_KEY_LEN - 1);
4754 		hi = hash_find(tp, key);
4755 		if (!HASHITEM_EMPTY(hi))
4756 		{
4757 		    cur_aff = HI2AH(hi);
4758 		    if (cur_aff->ah_combine != (*items[2] == 'Y'))
4759 			smsg((char_u *)_("Different combining flag in continued affix block in %s line %d: %s"),
4760 						   fname, lnum, items[1]);
4761 		    if (!cur_aff->ah_follows)
4762 			smsg((char_u *)_("Duplicate affix in %s line %d: %s"),
4763 						       fname, lnum, items[1]);
4764 		}
4765 		else
4766 		{
4767 		    /* New affix letter. */
4768 		    cur_aff = (affheader_T *)getroom(spin,
4769 						   sizeof(affheader_T), TRUE);
4770 		    if (cur_aff == NULL)
4771 			break;
4772 		    cur_aff->ah_flag = affitem2flag(aff->af_flagtype, items[1],
4773 								 fname, lnum);
4774 		    if (cur_aff->ah_flag == 0 || STRLEN(items[1]) >= AH_KEY_LEN)
4775 			break;
4776 		    if (cur_aff->ah_flag == aff->af_bad
4777 			    || cur_aff->ah_flag == aff->af_rar
4778 			    || cur_aff->ah_flag == aff->af_kep
4779 			    || cur_aff->ah_flag == aff->af_needaffix
4780 			    || cur_aff->ah_flag == aff->af_needcomp)
4781 			smsg((char_u *)_("Affix also used for BAD/RAR/KEP/NEEDAFFIX/NEEDCOMPOUND in %s line %d: %s"),
4782 						       fname, lnum, items[1]);
4783 		    STRCPY(cur_aff->ah_key, items[1]);
4784 		    hash_add(tp, cur_aff->ah_key);
4785 
4786 		    cur_aff->ah_combine = (*items[2] == 'Y');
4787 		}
4788 
4789 		/* Check for the "S" flag, which apparently means that another
4790 		 * block with the same affix name is following. */
4791 		if (itemcnt > lasti && STRCMP(items[lasti], "S") == 0)
4792 		{
4793 		    ++lasti;
4794 		    cur_aff->ah_follows = TRUE;
4795 		}
4796 		else
4797 		    cur_aff->ah_follows = FALSE;
4798 
4799 		/* Myspell allows extra text after the item, but that might
4800 		 * mean mistakes go unnoticed.  Require a comment-starter. */
4801 		if (itemcnt > lasti && *items[lasti] != '#')
4802 		    smsg((char_u *)_("Trailing text in %s line %d: %s"),
4803 						       fname, lnum, items[4]);
4804 
4805 		if (STRCMP(items[2], "Y") != 0 && STRCMP(items[2], "N") != 0)
4806 		    smsg((char_u *)_("Expected Y or N in %s line %d: %s"),
4807 						       fname, lnum, items[2]);
4808 
4809 		if (*items[0] == 'P' && aff->af_pfxpostpone)
4810 		{
4811 		    if (cur_aff->ah_newID == 0)
4812 		    {
4813 			/* Use a new number in the .spl file later, to be able
4814 			 * to handle multiple .aff files. */
4815 			check_renumber(spin);
4816 			cur_aff->ah_newID = ++spin->si_newprefID;
4817 
4818 			/* We only really use ah_newID if the prefix is
4819 			 * postponed.  We know that only after handling all
4820 			 * the items. */
4821 			did_postpone_prefix = FALSE;
4822 		    }
4823 		    else
4824 			/* Did use the ID in a previous block. */
4825 			did_postpone_prefix = TRUE;
4826 		}
4827 
4828 		aff_todo = atoi((char *)items[3]);
4829 	    }
4830 	    else if ((STRCMP(items[0], "PFX") == 0
4831 					      || STRCMP(items[0], "SFX") == 0)
4832 		    && aff_todo > 0
4833 		    && STRCMP(cur_aff->ah_key, items[1]) == 0
4834 		    && itemcnt >= 5)
4835 	    {
4836 		affentry_T	*aff_entry;
4837 		int		rare = FALSE;
4838 		int		nocomp = FALSE;
4839 		int		upper = FALSE;
4840 		int		lasti = 5;
4841 
4842 		/* Check for "rare" and "nocomp" after the other info. */
4843 		while (itemcnt > lasti)
4844 		{
4845 		    if (!rare && STRICMP(items[lasti], "rare") == 0)
4846 		    {
4847 			rare = TRUE;
4848 			++lasti;
4849 		    }
4850 		    else if (!nocomp && STRICMP(items[lasti], "nocomp") == 0)
4851 		    {
4852 			nocomp = TRUE;
4853 			++lasti;
4854 		    }
4855 		    else
4856 			break;
4857 		}
4858 
4859 		/* Myspell allows extra text after the item, but that might
4860 		 * mean mistakes go unnoticed.  Require a comment-starter. */
4861 		if (itemcnt > lasti && *items[lasti] != '#')
4862 		    smsg((char_u *)_(e_afftrailing), fname, lnum, items[lasti]);
4863 
4864 		/* New item for an affix letter. */
4865 		--aff_todo;
4866 		aff_entry = (affentry_T *)getroom(spin,
4867 						    sizeof(affentry_T), TRUE);
4868 		if (aff_entry == NULL)
4869 		    break;
4870 		aff_entry->ae_rare = rare;
4871 		aff_entry->ae_nocomp = nocomp;
4872 
4873 		if (STRCMP(items[2], "0") != 0)
4874 		    aff_entry->ae_chop = getroom_save(spin, items[2]);
4875 		if (STRCMP(items[3], "0") != 0)
4876 		    aff_entry->ae_add = getroom_save(spin, items[3]);
4877 
4878 		/* Don't use an affix entry with non-ASCII characters when
4879 		 * "spin->si_ascii" is TRUE. */
4880 		if (!spin->si_ascii || !(has_non_ascii(aff_entry->ae_chop)
4881 					  || has_non_ascii(aff_entry->ae_add)))
4882 		{
4883 		    aff_entry->ae_next = cur_aff->ah_first;
4884 		    cur_aff->ah_first = aff_entry;
4885 
4886 		    if (STRCMP(items[4], ".") != 0)
4887 		    {
4888 			char_u	buf[MAXLINELEN];
4889 
4890 			aff_entry->ae_cond = getroom_save(spin, items[4]);
4891 			if (*items[0] == 'P')
4892 			    sprintf((char *)buf, "^%s", items[4]);
4893 			else
4894 			    sprintf((char *)buf, "%s$", items[4]);
4895 			aff_entry->ae_prog = vim_regcomp(buf,
4896 					    RE_MAGIC + RE_STRING + RE_STRICT);
4897 			if (aff_entry->ae_prog == NULL)
4898 			    smsg((char_u *)_("Broken condition in %s line %d: %s"),
4899 						       fname, lnum, items[4]);
4900 		    }
4901 
4902 		    /* For postponed prefixes we need an entry in si_prefcond
4903 		     * for the condition.  Use an existing one if possible. */
4904 		    if (*items[0] == 'P' && aff->af_pfxpostpone)
4905 		    {
4906 			/* When the chop string is one lower-case letter and
4907 			 * the add string ends in the upper-case letter we set
4908 			 * the "upper" flag, clear "ae_chop" and remove the
4909 			 * letters from "ae_add".  The condition must either
4910 			 * be empty or start with the same letter. */
4911 			if (aff_entry->ae_chop != NULL
4912 				&& aff_entry->ae_add != NULL
4913 #ifdef FEAT_MBYTE
4914 				&& aff_entry->ae_chop[(*mb_ptr2len)(
4915 						   aff_entry->ae_chop)] == NUL
4916 #else
4917 				&& aff_entry->ae_chop[1] == NUL
4918 #endif
4919 				)
4920 			{
4921 			    int		c, c_up;
4922 
4923 			    c = PTR2CHAR(aff_entry->ae_chop);
4924 			    c_up = SPELL_TOUPPER(c);
4925 			    if (c_up != c
4926 				    && (aff_entry->ae_cond == NULL
4927 					|| PTR2CHAR(aff_entry->ae_cond) == c))
4928 			    {
4929 				p = aff_entry->ae_add
4930 						  + STRLEN(aff_entry->ae_add);
4931 				mb_ptr_back(aff_entry->ae_add, p);
4932 				if (PTR2CHAR(p) == c_up)
4933 				{
4934 				    upper = TRUE;
4935 				    aff_entry->ae_chop = NULL;
4936 				    *p = NUL;
4937 
4938 				    /* The condition is matched with the
4939 				     * actual word, thus must check for the
4940 				     * upper-case letter. */
4941 				    if (aff_entry->ae_cond != NULL)
4942 				    {
4943 					char_u	buf[MAXLINELEN];
4944 #ifdef FEAT_MBYTE
4945 					if (has_mbyte)
4946 					{
4947 					    onecap_copy(items[4], buf, TRUE);
4948 					    aff_entry->ae_cond = getroom_save(
4949 								   spin, buf);
4950 					}
4951 					else
4952 #endif
4953 					    *aff_entry->ae_cond = c_up;
4954 					if (aff_entry->ae_cond != NULL)
4955 					{
4956 					    sprintf((char *)buf, "^%s",
4957 							  aff_entry->ae_cond);
4958 					    vim_free(aff_entry->ae_prog);
4959 					    aff_entry->ae_prog = vim_regcomp(
4960 						    buf, RE_MAGIC + RE_STRING);
4961 					}
4962 				    }
4963 				}
4964 			    }
4965 			}
4966 
4967 			if (aff_entry->ae_chop == NULL)
4968 			{
4969 			    int		idx;
4970 			    char_u	**pp;
4971 			    int		n;
4972 
4973 			    /* Find a previously used condition. */
4974 			    for (idx = spin->si_prefcond.ga_len - 1; idx >= 0;
4975 									--idx)
4976 			    {
4977 				p = ((char_u **)spin->si_prefcond.ga_data)[idx];
4978 				if (str_equal(p, aff_entry->ae_cond))
4979 				    break;
4980 			    }
4981 			    if (idx < 0 && ga_grow(&spin->si_prefcond, 1) == OK)
4982 			    {
4983 				/* Not found, add a new condition. */
4984 				idx = spin->si_prefcond.ga_len++;
4985 				pp = ((char_u **)spin->si_prefcond.ga_data)
4986 									+ idx;
4987 				if (aff_entry->ae_cond == NULL)
4988 				    *pp = NULL;
4989 				else
4990 				    *pp = getroom_save(spin,
4991 							  aff_entry->ae_cond);
4992 			    }
4993 
4994 			    /* Add the prefix to the prefix tree. */
4995 			    if (aff_entry->ae_add == NULL)
4996 				p = (char_u *)"";
4997 			    else
4998 				p = aff_entry->ae_add;
4999 			    /* PFX_FLAGS is a negative number, so that
5000 			     * tree_add_word() knows this is the prefix tree. */
5001 			    n = PFX_FLAGS;
5002 			    if (rare)
5003 				n |= WFP_RARE;
5004 			    if (!cur_aff->ah_combine)
5005 				n |= WFP_NC;
5006 			    if (upper)
5007 				n |= WFP_UP;
5008 			    tree_add_word(spin, p, spin->si_prefroot, n,
5009 						      idx, cur_aff->ah_newID);
5010 			    did_postpone_prefix = TRUE;
5011 			}
5012 
5013 			/* Didn't actually use ah_newID, backup si_newprefID. */
5014 			if (aff_todo == 0 && !did_postpone_prefix)
5015 			{
5016 			    --spin->si_newprefID;
5017 			    cur_aff->ah_newID = 0;
5018 			}
5019 		    }
5020 		}
5021 	    }
5022 	    else if (STRCMP(items[0], "FOL") == 0 && itemcnt == 2
5023 							       && fol == NULL)
5024 	    {
5025 		fol = vim_strsave(items[1]);
5026 	    }
5027 	    else if (STRCMP(items[0], "LOW") == 0 && itemcnt == 2
5028 							       && low == NULL)
5029 	    {
5030 		low = vim_strsave(items[1]);
5031 	    }
5032 	    else if (STRCMP(items[0], "UPP") == 0 && itemcnt == 2
5033 							       && upp == NULL)
5034 	    {
5035 		upp = vim_strsave(items[1]);
5036 	    }
5037 	    else if (STRCMP(items[0], "REP") == 0 && itemcnt == 2)
5038 	    {
5039 		/* Ignore REP count */;
5040 		if (!isdigit(*items[1]))
5041 		    smsg((char_u *)_("Expected REP count in %s line %d"),
5042 								 fname, lnum);
5043 	    }
5044 	    else if (STRCMP(items[0], "REP") == 0 && itemcnt >= 3)
5045 	    {
5046 		/* REP item */
5047 		/* Myspell ignores extra arguments, we require it starts with
5048 		 * # to detect mistakes. */
5049 		if (itemcnt > 3 && items[3][0] != '#')
5050 		    smsg((char_u *)_(e_afftrailing), fname, lnum, items[3]);
5051 		if (do_rep)
5052 		    add_fromto(spin, &spin->si_rep, items[1], items[2]);
5053 	    }
5054 	    else if (STRCMP(items[0], "MAP") == 0 && itemcnt == 2)
5055 	    {
5056 		/* MAP item or count */
5057 		if (!found_map)
5058 		{
5059 		    /* First line contains the count. */
5060 		    found_map = TRUE;
5061 		    if (!isdigit(*items[1]))
5062 			smsg((char_u *)_("Expected MAP count in %s line %d"),
5063 								 fname, lnum);
5064 		}
5065 		else if (do_map)
5066 		{
5067 		    int		c;
5068 
5069 		    /* Check that every character appears only once. */
5070 		    for (p = items[1]; *p != NUL; )
5071 		    {
5072 #ifdef FEAT_MBYTE
5073 			c = mb_ptr2char_adv(&p);
5074 #else
5075 			c = *p++;
5076 #endif
5077 			if ((spin->si_map.ga_len > 0
5078 				    && vim_strchr(spin->si_map.ga_data, c)
5079 								      != NULL)
5080 				|| vim_strchr(p, c) != NULL)
5081 			    smsg((char_u *)_("Duplicate character in MAP in %s line %d"),
5082 								 fname, lnum);
5083 		    }
5084 
5085 		    /* We simply concatenate all the MAP strings, separated by
5086 		     * slashes. */
5087 		    ga_concat(&spin->si_map, items[1]);
5088 		    ga_append(&spin->si_map, '/');
5089 		}
5090 	    }
5091 	    else if (STRCMP(items[0], "SAL") == 0 && itemcnt == 3)
5092 	    {
5093 		if (do_sal)
5094 		{
5095 		    /* SAL item (sounds-a-like)
5096 		     * Either one of the known keys or a from-to pair. */
5097 		    if (STRCMP(items[1], "followup") == 0)
5098 			spin->si_followup = sal_to_bool(items[2]);
5099 		    else if (STRCMP(items[1], "collapse_result") == 0)
5100 			spin->si_collapse = sal_to_bool(items[2]);
5101 		    else if (STRCMP(items[1], "remove_accents") == 0)
5102 			spin->si_rem_accents = sal_to_bool(items[2]);
5103 		    else
5104 			/* when "to" is "_" it means empty */
5105 			add_fromto(spin, &spin->si_sal, items[1],
5106 				     STRCMP(items[2], "_") == 0 ? (char_u *)""
5107 								: items[2]);
5108 		}
5109 	    }
5110 	    else if (STRCMP(items[0], "SOFOFROM") == 0 && itemcnt == 2
5111 							  && sofofrom == NULL)
5112 	    {
5113 		sofofrom = getroom_save(spin, items[1]);
5114 	    }
5115 	    else if (STRCMP(items[0], "SOFOTO") == 0 && itemcnt == 2
5116 							    && sofoto == NULL)
5117 	    {
5118 		sofoto = getroom_save(spin, items[1]);
5119 	    }
5120 	    else
5121 		smsg((char_u *)_("Unrecognized or duplicate item in %s line %d: %s"),
5122 						       fname, lnum, items[0]);
5123 	}
5124     }
5125 
5126     if (fol != NULL || low != NULL || upp != NULL)
5127     {
5128 	if (spin->si_clear_chartab)
5129 	{
5130 	    /* Clear the char type tables, don't want to use any of the
5131 	     * currently used spell properties. */
5132 	    init_spell_chartab();
5133 	    spin->si_clear_chartab = FALSE;
5134 	}
5135 
5136 	/*
5137 	 * Don't write a word table for an ASCII file, so that we don't check
5138 	 * for conflicts with a word table that matches 'encoding'.
5139 	 * Don't write one for utf-8 either, we use utf_*() and
5140 	 * mb_get_class(), the list of chars in the file will be incomplete.
5141 	 */
5142 	if (!spin->si_ascii
5143 #ifdef FEAT_MBYTE
5144 		&& !enc_utf8
5145 #endif
5146 		)
5147 	{
5148 	    if (fol == NULL || low == NULL || upp == NULL)
5149 		smsg((char_u *)_("Missing FOL/LOW/UPP line in %s"), fname);
5150 	    else
5151 		(void)set_spell_chartab(fol, low, upp);
5152 	}
5153 
5154 	vim_free(fol);
5155 	vim_free(low);
5156 	vim_free(upp);
5157     }
5158 
5159     /* Use compound specifications of the .aff file for the spell info. */
5160     if (compmax != 0)
5161     {
5162 	aff_check_number(spin->si_compmax, compmax, "COMPOUNDMAX");
5163 	spin->si_compmax = compmax;
5164     }
5165 
5166     if (compminlen != 0)
5167     {
5168 	aff_check_number(spin->si_compminlen, compminlen, "COMPOUNDMIN");
5169 	spin->si_compminlen = compminlen;
5170     }
5171 
5172     if (compsylmax != 0)
5173     {
5174 	if (syllable == NULL)
5175 	    smsg((char_u *)_("COMPOUNDSYLMAX used without SYLLABLE"));
5176 	aff_check_number(spin->si_compsylmax, compsylmax, "COMPOUNDSYLMAX");
5177 	spin->si_compsylmax = compsylmax;
5178     }
5179 
5180     if (compflags != NULL)
5181 	process_compflags(spin, aff, compflags);
5182 
5183     /* Check that we didn't use too many renumbered flags. */
5184     if (spin->si_newcompID < spin->si_newprefID)
5185     {
5186 	if (spin->si_newcompID == 127 || spin->si_newcompID == 255)
5187 	    MSG(_("Too many postponed prefixes"));
5188 	else if (spin->si_newprefID == 0 || spin->si_newprefID == 127)
5189 	    MSG(_("Too many compound flags"));
5190 	else
5191 	    MSG(_("Too many posponed prefixes and/or compound flags"));
5192     }
5193 
5194     if (syllable != NULL)
5195     {
5196 	aff_check_string(spin->si_syllable, syllable, "SYLLABLE");
5197 	spin->si_syllable = syllable;
5198     }
5199 
5200     if (sofofrom != NULL || sofoto != NULL)
5201     {
5202 	if (sofofrom == NULL || sofoto == NULL)
5203 	    smsg((char_u *)_("Missing SOFO%s line in %s"),
5204 				     sofofrom == NULL ? "FROM" : "TO", fname);
5205 	else if (spin->si_sal.ga_len > 0)
5206 	    smsg((char_u *)_("Both SAL and SOFO lines in %s"), fname);
5207 	else
5208 	{
5209 	    aff_check_string(spin->si_sofofr, sofofrom, "SOFOFROM");
5210 	    aff_check_string(spin->si_sofoto, sofoto, "SOFOTO");
5211 	    spin->si_sofofr = sofofrom;
5212 	    spin->si_sofoto = sofoto;
5213 	}
5214     }
5215 
5216     if (midword != NULL)
5217     {
5218 	aff_check_string(spin->si_midword, midword, "MIDWORD");
5219 	spin->si_midword = midword;
5220     }
5221 
5222     vim_free(pc);
5223     fclose(fd);
5224     return aff;
5225 }
5226 
5227 /*
5228  * Turn an affix flag name into a number, according to the FLAG type.
5229  * returns zero for failure.
5230  */
5231     static unsigned
5232 affitem2flag(flagtype, item, fname, lnum)
5233     int		flagtype;
5234     char_u	*item;
5235     char_u	*fname;
5236     int		lnum;
5237 {
5238     unsigned	res;
5239     char_u	*p = item;
5240 
5241     res = get_affitem(flagtype, &p);
5242     if (res == 0)
5243     {
5244 	if (flagtype == AFT_NUM)
5245 	    smsg((char_u *)_("Flag is not a number in %s line %d: %s"),
5246 							   fname, lnum, item);
5247 	else
5248 	    smsg((char_u *)_("Illegal flag in %s line %d: %s"),
5249 							   fname, lnum, item);
5250     }
5251     if (*p != NUL)
5252     {
5253 	smsg((char_u *)_(e_affname), fname, lnum, item);
5254 	return 0;
5255     }
5256 
5257     return res;
5258 }
5259 
5260 /*
5261  * Get one affix name from "*pp" and advance the pointer.
5262  * Returns zero for an error, still advances the pointer then.
5263  */
5264     static unsigned
5265 get_affitem(flagtype, pp)
5266     int		flagtype;
5267     char_u	**pp;
5268 {
5269     int		res;
5270 
5271     if (flagtype == AFT_NUM)
5272     {
5273 	if (!VIM_ISDIGIT(**pp))
5274 	{
5275 	    ++*pp;	/* always advance, avoid getting stuck */
5276 	    return 0;
5277 	}
5278 	res = getdigits(pp);
5279     }
5280     else
5281     {
5282 #ifdef FEAT_MBYTE
5283 	res = mb_ptr2char_adv(pp);
5284 #else
5285 	res = *(*pp)++;
5286 #endif
5287 	if (flagtype == AFT_LONG || (flagtype == AFT_CAPLONG
5288 						 && res >= 'A' && res <= 'Z'))
5289 	{
5290 	    if (**pp == NUL)
5291 		return 0;
5292 #ifdef FEAT_MBYTE
5293 	    res = mb_ptr2char_adv(pp) + (res << 16);
5294 #else
5295 	    res = *(*pp)++ + (res << 16);
5296 #endif
5297 	}
5298     }
5299     return res;
5300 }
5301 
5302 /*
5303  * Process the "compflags" string used in an affix file and append it to
5304  * spin->si_compflags.
5305  * The processing involves changing the affix names to ID numbers, so that
5306  * they fit in one byte.
5307  */
5308     static void
5309 process_compflags(spin, aff, compflags)
5310     spellinfo_T	*spin;
5311     afffile_T	*aff;
5312     char_u	*compflags;
5313 {
5314     char_u	*p;
5315     char_u	*prevp;
5316     unsigned	flag;
5317     compitem_T	*ci;
5318     int		id;
5319     int		len;
5320     char_u	*tp;
5321     char_u	key[AH_KEY_LEN];
5322     hashitem_T	*hi;
5323 
5324     /* Make room for the old and the new compflags, concatenated with a / in
5325      * between.  Processing it makes it shorter, but we don't know by how
5326      * much, thus allocate the maximum. */
5327     len = STRLEN(compflags) + 1;
5328     if (spin->si_compflags != NULL)
5329 	len += STRLEN(spin->si_compflags) + 1;
5330     p = getroom(spin, len, FALSE);
5331     if (p == NULL)
5332 	return;
5333     if (spin->si_compflags != NULL)
5334     {
5335 	STRCPY(p, spin->si_compflags);
5336 	STRCAT(p, "/");
5337     }
5338     spin->si_compflags = p;
5339     tp = p + STRLEN(p);
5340 
5341     for (p = compflags; *p != NUL; )
5342     {
5343 	if (vim_strchr((char_u *)"/*+[]", *p) != NULL)
5344 	    /* Copy non-flag characters directly. */
5345 	    *tp++ = *p++;
5346 	else
5347 	{
5348 	    /* First get the flag number, also checks validity. */
5349 	    prevp = p;
5350 	    flag = get_affitem(aff->af_flagtype, &p);
5351 	    if (flag != 0)
5352 	    {
5353 		/* Find the flag in the hashtable.  If it was used before, use
5354 		 * the existing ID.  Otherwise add a new entry. */
5355 		vim_strncpy(key, prevp, p - prevp);
5356 		hi = hash_find(&aff->af_comp, key);
5357 		if (!HASHITEM_EMPTY(hi))
5358 		    id = HI2CI(hi)->ci_newID;
5359 		else
5360 		{
5361 		    ci = (compitem_T *)getroom(spin, sizeof(compitem_T), TRUE);
5362 		    if (ci == NULL)
5363 			break;
5364 		    STRCPY(ci->ci_key, key);
5365 		    ci->ci_flag = flag;
5366 		    /* Avoid using a flag ID that has a special meaning in a
5367 		     * regexp (also inside []). */
5368 		    do
5369 		    {
5370 			check_renumber(spin);
5371 			id = spin->si_newcompID--;
5372 		    } while (vim_strchr((char_u *)"/+*[]\\-^", id) != NULL);
5373 		    ci->ci_newID = id;
5374 		    hash_add(&aff->af_comp, ci->ci_key);
5375 		}
5376 		*tp++ = id;
5377 	    }
5378 	    if (aff->af_flagtype == AFT_NUM && *p == ',')
5379 		++p;
5380 	}
5381     }
5382 
5383     *tp = NUL;
5384 }
5385 
5386 /*
5387  * Check that the new IDs for postponed affixes and compounding don't overrun
5388  * each other.  We have almost 255 available, but start at 0-127 to avoid
5389  * using two bytes for utf-8.  When the 0-127 range is used up go to 128-255.
5390  * When that is used up an error message is given.
5391  */
5392     static void
5393 check_renumber(spin)
5394     spellinfo_T	*spin;
5395 {
5396     if (spin->si_newprefID == spin->si_newcompID && spin->si_newcompID < 128)
5397     {
5398 	spin->si_newprefID = 127;
5399 	spin->si_newcompID = 255;
5400     }
5401 }
5402 
5403 /*
5404  * Return TRUE if flag "flag" appears in affix list "afflist".
5405  */
5406     static int
5407 flag_in_afflist(flagtype, afflist, flag)
5408     int		flagtype;
5409     char_u	*afflist;
5410     unsigned	flag;
5411 {
5412     char_u	*p;
5413     unsigned	n;
5414 
5415     switch (flagtype)
5416     {
5417 	case AFT_CHAR:
5418 	    return vim_strchr(afflist, flag) != NULL;
5419 
5420 	case AFT_CAPLONG:
5421 	case AFT_LONG:
5422 	    for (p = afflist; *p != NUL; )
5423 	    {
5424 #ifdef FEAT_MBYTE
5425 		n = mb_ptr2char_adv(&p);
5426 #else
5427 		n = *p++;
5428 #endif
5429 		if ((flagtype == AFT_LONG || (n >= 'A' && n <= 'Z'))
5430 								 && *p != NUL)
5431 #ifdef FEAT_MBYTE
5432 		    n = mb_ptr2char_adv(&p) + (n << 16);
5433 #else
5434 		    n = *p++ + (n << 16);
5435 #endif
5436 		if (n == flag)
5437 		    return TRUE;
5438 	    }
5439 	    break;
5440 
5441 	case AFT_NUM:
5442 	    for (p = afflist; *p != NUL; )
5443 	    {
5444 		n = getdigits(&p);
5445 		if (n == flag)
5446 		    return TRUE;
5447 		if (*p != NUL)	/* skip over comma */
5448 		    ++p;
5449 	    }
5450 	    break;
5451     }
5452     return FALSE;
5453 }
5454 
5455 /*
5456  * Give a warning when "spinval" and "affval" numbers are set and not the same.
5457  */
5458     static void
5459 aff_check_number(spinval, affval, name)
5460     int	    spinval;
5461     int	    affval;
5462     char    *name;
5463 {
5464     if (spinval != 0 && spinval != affval)
5465 	smsg((char_u *)_("%s value differs from what is used in another .aff file"), name);
5466 }
5467 
5468 /*
5469  * Give a warning when "spinval" and "affval" strings are set and not the same.
5470  */
5471     static void
5472 aff_check_string(spinval, affval, name)
5473     char_u	*spinval;
5474     char_u	*affval;
5475     char	*name;
5476 {
5477     if (spinval != NULL && STRCMP(spinval, affval) != 0)
5478 	smsg((char_u *)_("%s value differs from what is used in another .aff file"), name);
5479 }
5480 
5481 /*
5482  * Return TRUE if strings "s1" and "s2" are equal.  Also consider both being
5483  * NULL as equal.
5484  */
5485     static int
5486 str_equal(s1, s2)
5487     char_u	*s1;
5488     char_u	*s2;
5489 {
5490     if (s1 == NULL || s2 == NULL)
5491 	return s1 == s2;
5492     return STRCMP(s1, s2) == 0;
5493 }
5494 
5495 /*
5496  * Add a from-to item to "gap".  Used for REP and SAL items.
5497  * They are stored case-folded.
5498  */
5499     static void
5500 add_fromto(spin, gap, from, to)
5501     spellinfo_T	*spin;
5502     garray_T	*gap;
5503     char_u	*from;
5504     char_u	*to;
5505 {
5506     fromto_T	*ftp;
5507     char_u	word[MAXWLEN];
5508 
5509     if (ga_grow(gap, 1) == OK)
5510     {
5511 	ftp = ((fromto_T *)gap->ga_data) + gap->ga_len;
5512 	(void)spell_casefold(from, STRLEN(from), word, MAXWLEN);
5513 	ftp->ft_from = getroom_save(spin, word);
5514 	(void)spell_casefold(to, STRLEN(to), word, MAXWLEN);
5515 	ftp->ft_to = getroom_save(spin, word);
5516 	++gap->ga_len;
5517     }
5518 }
5519 
5520 /*
5521  * Convert a boolean argument in a SAL line to TRUE or FALSE;
5522  */
5523     static int
5524 sal_to_bool(s)
5525     char_u	*s;
5526 {
5527     return STRCMP(s, "1") == 0 || STRCMP(s, "true") == 0;
5528 }
5529 
5530 /*
5531  * Return TRUE if string "s" contains a non-ASCII character (128 or higher).
5532  * When "s" is NULL FALSE is returned.
5533  */
5534     static int
5535 has_non_ascii(s)
5536     char_u	*s;
5537 {
5538     char_u	*p;
5539 
5540     if (s != NULL)
5541 	for (p = s; *p != NUL; ++p)
5542 	    if (*p >= 128)
5543 		return TRUE;
5544     return FALSE;
5545 }
5546 
5547 /*
5548  * Free the structure filled by spell_read_aff().
5549  */
5550     static void
5551 spell_free_aff(aff)
5552     afffile_T	*aff;
5553 {
5554     hashtab_T	*ht;
5555     hashitem_T	*hi;
5556     int		todo;
5557     affheader_T	*ah;
5558     affentry_T	*ae;
5559 
5560     vim_free(aff->af_enc);
5561 
5562     /* All this trouble to free the "ae_prog" items... */
5563     for (ht = &aff->af_pref; ; ht = &aff->af_suff)
5564     {
5565 	todo = ht->ht_used;
5566 	for (hi = ht->ht_array; todo > 0; ++hi)
5567 	{
5568 	    if (!HASHITEM_EMPTY(hi))
5569 	    {
5570 		--todo;
5571 		ah = HI2AH(hi);
5572 		for (ae = ah->ah_first; ae != NULL; ae = ae->ae_next)
5573 		    vim_free(ae->ae_prog);
5574 	    }
5575 	}
5576 	if (ht == &aff->af_suff)
5577 	    break;
5578     }
5579 
5580     hash_clear(&aff->af_pref);
5581     hash_clear(&aff->af_suff);
5582     hash_clear(&aff->af_comp);
5583 }
5584 
5585 /*
5586  * Read dictionary file "fname".
5587  * Returns OK or FAIL;
5588  */
5589     static int
5590 spell_read_dic(spin, fname, affile)
5591     spellinfo_T	*spin;
5592     char_u	*fname;
5593     afffile_T	*affile;
5594 {
5595     hashtab_T	ht;
5596     char_u	line[MAXLINELEN];
5597     char_u	*p;
5598     char_u	*afflist;
5599     char_u	store_afflist[MAXWLEN];
5600     int		pfxlen;
5601     int		need_affix;
5602     char_u	*dw;
5603     char_u	*pc;
5604     char_u	*w;
5605     int		l;
5606     hash_T	hash;
5607     hashitem_T	*hi;
5608     FILE	*fd;
5609     int		lnum = 1;
5610     int		non_ascii = 0;
5611     int		retval = OK;
5612     char_u	message[MAXLINELEN + MAXWLEN];
5613     int		flags;
5614     int		duplicate = 0;
5615 
5616     /*
5617      * Open the file.
5618      */
5619     fd = mch_fopen((char *)fname, "r");
5620     if (fd == NULL)
5621     {
5622 	EMSG2(_(e_notopen), fname);
5623 	return FAIL;
5624     }
5625 
5626     /* The hashtable is only used to detect duplicated words. */
5627     hash_init(&ht);
5628 
5629     if (spin->si_verbose || p_verbose > 2)
5630     {
5631 	if (!spin->si_verbose)
5632 	    verbose_enter();
5633 	smsg((char_u *)_("Reading dictionary file %s ..."), fname);
5634 	out_flush();
5635 	if (!spin->si_verbose)
5636 	    verbose_leave();
5637     }
5638 
5639     /* start with a message for the first line */
5640     spin->si_msg_count = 999999;
5641 
5642     /* Read and ignore the first line: word count. */
5643     (void)vim_fgets(line, MAXLINELEN, fd);
5644     if (!vim_isdigit(*skipwhite(line)))
5645 	EMSG2(_("E760: No word count in %s"), fname);
5646 
5647     /*
5648      * Read all the lines in the file one by one.
5649      * The words are converted to 'encoding' here, before being added to
5650      * the hashtable.
5651      */
5652     while (!vim_fgets(line, MAXLINELEN, fd) && !got_int)
5653     {
5654 	line_breakcheck();
5655 	++lnum;
5656 	if (line[0] == '#' || line[0] == '/')
5657 	    continue;	/* comment line */
5658 
5659 	/* Remove CR, LF and white space from the end.  White space halfway
5660 	 * the word is kept to allow e.g., "et al.". */
5661 	l = STRLEN(line);
5662 	while (l > 0 && line[l - 1] <= ' ')
5663 	    --l;
5664 	if (l == 0)
5665 	    continue;	/* empty line */
5666 	line[l] = NUL;
5667 
5668 	/* Find the optional affix names.  Replace the SLASH character by a
5669 	 * slash. */
5670 	afflist = NULL;
5671 	for (p = line; *p != NUL; mb_ptr_adv(p))
5672 	{
5673 	    if (*p == affile->af_slash)
5674 		*p = '/';
5675 	    else if (*p == '/')
5676 	    {
5677 		*p = NUL;
5678 		afflist = p + 1;
5679 		break;
5680 	    }
5681 	}
5682 
5683 	/* Skip non-ASCII words when "spin->si_ascii" is TRUE. */
5684 	if (spin->si_ascii && has_non_ascii(line))
5685 	{
5686 	    ++non_ascii;
5687 	    continue;
5688 	}
5689 
5690 #ifdef FEAT_MBYTE
5691 	/* Convert from "SET" to 'encoding' when needed. */
5692 	if (spin->si_conv.vc_type != CONV_NONE)
5693 	{
5694 	    pc = string_convert(&spin->si_conv, line, NULL);
5695 	    if (pc == NULL)
5696 	    {
5697 		smsg((char_u *)_("Conversion failure for word in %s line %d: %s"),
5698 						       fname, lnum, line);
5699 		continue;
5700 	    }
5701 	    w = pc;
5702 	}
5703 	else
5704 #endif
5705 	{
5706 	    pc = NULL;
5707 	    w = line;
5708 	}
5709 
5710 	/* This takes time, print a message every 10000 words. */
5711 	if (spin->si_verbose && spin->si_msg_count > 10000)
5712 	{
5713 	    spin->si_msg_count = 0;
5714 	    vim_snprintf((char *)message, sizeof(message),
5715 		    _("line %6d, word %6d - %s"),
5716 		       lnum, spin->si_foldwcount + spin->si_keepwcount, w);
5717 	    msg_start();
5718 	    msg_puts_long_attr(message, 0);
5719 	    msg_clr_eos();
5720 	    msg_didout = FALSE;
5721 	    msg_col = 0;
5722 	    out_flush();
5723 	}
5724 
5725 	/* Store the word in the hashtable to be able to find duplicates. */
5726 	dw = (char_u *)getroom_save(spin, w);
5727 	if (dw == NULL)
5728 	    retval = FAIL;
5729 	vim_free(pc);
5730 	if (retval == FAIL)
5731 	    break;
5732 
5733 	hash = hash_hash(dw);
5734 	hi = hash_lookup(&ht, dw, hash);
5735 	if (!HASHITEM_EMPTY(hi))
5736 	{
5737 	    if (p_verbose > 0)
5738 		smsg((char_u *)_("Duplicate word in %s line %d: %s"),
5739 							     fname, lnum, dw);
5740 	    else if (duplicate == 0)
5741 		smsg((char_u *)_("First duplicate word in %s line %d: %s"),
5742 							     fname, lnum, dw);
5743 	    ++duplicate;
5744 	}
5745 	else
5746 	    hash_add_item(&ht, hi, dw, hash);
5747 
5748 	flags = 0;
5749 	store_afflist[0] = NUL;
5750 	pfxlen = 0;
5751 	need_affix = FALSE;
5752 	if (afflist != NULL)
5753 	{
5754 	    /* Check for affix name that stands for keep-case word and stands
5755 	     * for rare word (if defined). */
5756 	    if (affile->af_kep != 0 && flag_in_afflist(
5757 				affile->af_flagtype, afflist, affile->af_kep))
5758 		flags |= WF_KEEPCAP | WF_FIXCAP;
5759 	    if (affile->af_rar != 0 && flag_in_afflist(
5760 				affile->af_flagtype, afflist, affile->af_rar))
5761 		flags |= WF_RARE;
5762 	    if (affile->af_bad != 0 && flag_in_afflist(
5763 				affile->af_flagtype, afflist, affile->af_bad))
5764 		flags |= WF_BANNED;
5765 	    if (affile->af_needaffix != 0 && flag_in_afflist(
5766 			  affile->af_flagtype, afflist, affile->af_needaffix))
5767 		need_affix = TRUE;
5768 	    if (affile->af_needcomp != 0 && flag_in_afflist(
5769 			   affile->af_flagtype, afflist, affile->af_needcomp))
5770 		flags |= WF_NEEDCOMP;
5771 
5772 	    if (affile->af_pfxpostpone)
5773 		/* Need to store the list of prefix IDs with the word. */
5774 		pfxlen = get_pfxlist(affile, afflist, store_afflist);
5775 
5776 	    if (spin->si_compflags != NULL)
5777 		/* Need to store the list of compound flags with the word.
5778 		 * Concatenate them to the list of prefix IDs. */
5779 		get_compflags(affile, afflist, store_afflist + pfxlen);
5780 	}
5781 
5782 	/* Add the word to the word tree(s). */
5783 	if (store_word(spin, dw, flags, spin->si_region,
5784 					   store_afflist, need_affix) == FAIL)
5785 	    retval = FAIL;
5786 
5787 	if (afflist != NULL)
5788 	{
5789 	    /* Find all matching suffixes and add the resulting words.
5790 	     * Additionally do matching prefixes that combine. */
5791 	    if (store_aff_word(spin, dw, afflist, affile,
5792 			   &affile->af_suff, &affile->af_pref,
5793 				 FALSE, flags, store_afflist, pfxlen) == FAIL)
5794 		retval = FAIL;
5795 
5796 	    /* Find all matching prefixes and add the resulting words. */
5797 	    if (store_aff_word(spin, dw, afflist, affile,
5798 			  &affile->af_pref, NULL,
5799 				 FALSE, flags, store_afflist, pfxlen) == FAIL)
5800 		retval = FAIL;
5801 	}
5802     }
5803 
5804     if (duplicate > 0)
5805 	smsg((char_u *)_("%d duplicate word(s) in %s"), duplicate, fname);
5806     if (spin->si_ascii && non_ascii > 0)
5807 	smsg((char_u *)_("Ignored %d word(s) with non-ASCII characters in %s"),
5808 							    non_ascii, fname);
5809     hash_clear(&ht);
5810 
5811     fclose(fd);
5812     return retval;
5813 }
5814 
5815 /*
5816  * Get the list of prefix IDs from the affix list "afflist".
5817  * Used for PFXPOSTPONE.
5818  * Put the resulting flags in "store_afflist[MAXWLEN]" with a terminating NUL
5819  * and return the number of affixes.
5820  */
5821     static int
5822 get_pfxlist(affile, afflist, store_afflist)
5823     afffile_T	*affile;
5824     char_u	*afflist;
5825     char_u	*store_afflist;
5826 {
5827     char_u	*p;
5828     char_u	*prevp;
5829     int		cnt = 0;
5830     int		id;
5831     char_u	key[AH_KEY_LEN];
5832     hashitem_T	*hi;
5833 
5834     for (p = afflist; *p != NUL; )
5835     {
5836 	prevp = p;
5837 	if (get_affitem(affile->af_flagtype, &p) != 0)
5838 	{
5839 	    /* A flag is a postponed prefix flag if it appears in "af_pref"
5840 	     * and it's ID is not zero. */
5841 	    vim_strncpy(key, prevp, p - prevp);
5842 	    hi = hash_find(&affile->af_pref, key);
5843 	    if (!HASHITEM_EMPTY(hi))
5844 	    {
5845 		id = HI2AH(hi)->ah_newID;
5846 		if (id != 0)
5847 		    store_afflist[cnt++] = id;
5848 	    }
5849 	}
5850 	if (affile->af_flagtype == AFT_NUM && *p == ',')
5851 	    ++p;
5852     }
5853 
5854     store_afflist[cnt] = NUL;
5855     return cnt;
5856 }
5857 
5858 /*
5859  * Get the list of compound IDs from the affix list "afflist" that are used
5860  * for compound words.
5861  * Puts the flags in "store_afflist[]".
5862  */
5863     static void
5864 get_compflags(affile, afflist, store_afflist)
5865     afffile_T	*affile;
5866     char_u	*afflist;
5867     char_u	*store_afflist;
5868 {
5869     char_u	*p;
5870     char_u	*prevp;
5871     int		cnt = 0;
5872     char_u	key[AH_KEY_LEN];
5873     hashitem_T	*hi;
5874 
5875     for (p = afflist; *p != NUL; )
5876     {
5877 	prevp = p;
5878 	if (get_affitem(affile->af_flagtype, &p) != 0)
5879 	{
5880 	    /* A flag is a compound flag if it appears in "af_comp". */
5881 	    vim_strncpy(key, prevp, p - prevp);
5882 	    hi = hash_find(&affile->af_comp, key);
5883 	    if (!HASHITEM_EMPTY(hi))
5884 		store_afflist[cnt++] = HI2CI(hi)->ci_newID;
5885 	}
5886 	if (affile->af_flagtype == AFT_NUM && *p == ',')
5887 	    ++p;
5888     }
5889 
5890     store_afflist[cnt] = NUL;
5891 }
5892 
5893 /*
5894  * Apply affixes to a word and store the resulting words.
5895  * "ht" is the hashtable with affentry_T that need to be applied, either
5896  * prefixes or suffixes.
5897  * "xht", when not NULL, is the prefix hashtable, to be used additionally on
5898  * the resulting words for combining affixes.
5899  *
5900  * Returns FAIL when out of memory.
5901  */
5902     static int
5903 store_aff_word(spin, word, afflist, affile, ht, xht, comb, flags,
5904 							      pfxlist, pfxlen)
5905     spellinfo_T	*spin;		/* spell info */
5906     char_u	*word;		/* basic word start */
5907     char_u	*afflist;	/* list of names of supported affixes */
5908     afffile_T	*affile;
5909     hashtab_T	*ht;
5910     hashtab_T	*xht;
5911     int		comb;		/* only use affixes that combine */
5912     int		flags;		/* flags for the word */
5913     char_u	*pfxlist;	/* list of prefix IDs */
5914     int		pfxlen;		/* nr of flags in "pfxlist" for prefixes, rest
5915 				 * is compound flags */
5916 {
5917     int		todo;
5918     hashitem_T	*hi;
5919     affheader_T	*ah;
5920     affentry_T	*ae;
5921     regmatch_T	regmatch;
5922     char_u	newword[MAXWLEN];
5923     int		retval = OK;
5924     int		i;
5925     char_u	*p;
5926     int		use_flags;
5927     char_u	*use_pfxlist;
5928     char_u	pfx_pfxlist[MAXWLEN];
5929     size_t	wordlen = STRLEN(word);
5930 
5931     todo = ht->ht_used;
5932     for (hi = ht->ht_array; todo > 0 && retval == OK; ++hi)
5933     {
5934 	if (!HASHITEM_EMPTY(hi))
5935 	{
5936 	    --todo;
5937 	    ah = HI2AH(hi);
5938 
5939 	    /* Check that the affix combines, if required, and that the word
5940 	     * supports this affix. */
5941 	    if ((!comb || ah->ah_combine) && flag_in_afflist(
5942 				   affile->af_flagtype, afflist, ah->ah_flag))
5943 	    {
5944 		/* Loop over all affix entries with this name. */
5945 		for (ae = ah->ah_first; ae != NULL; ae = ae->ae_next)
5946 		{
5947 		    /* Check the condition.  It's not logical to match case
5948 		     * here, but it is required for compatibility with
5949 		     * Myspell.
5950 		     * Another requirement from Myspell is that the chop
5951 		     * string is shorter than the word itself.
5952 		     * For prefixes, when "PFXPOSTPONE" was used, only do
5953 		     * prefixes with a chop string. */
5954 		    regmatch.regprog = ae->ae_prog;
5955 		    regmatch.rm_ic = FALSE;
5956 		    if ((xht != NULL || !affile->af_pfxpostpone
5957 				|| ae->ae_chop != NULL)
5958 			    && (ae->ae_chop == NULL
5959 				|| STRLEN(ae->ae_chop) < wordlen)
5960 			    && (ae->ae_prog == NULL
5961 				|| vim_regexec(&regmatch, word, (colnr_T)0)))
5962 		    {
5963 			/* Match.  Remove the chop and add the affix. */
5964 			if (xht == NULL)
5965 			{
5966 			    /* prefix: chop/add at the start of the word */
5967 			    if (ae->ae_add == NULL)
5968 				*newword = NUL;
5969 			    else
5970 				STRCPY(newword, ae->ae_add);
5971 			    p = word;
5972 			    if (ae->ae_chop != NULL)
5973 			    {
5974 				/* Skip chop string. */
5975 #ifdef FEAT_MBYTE
5976 				if (has_mbyte)
5977 				{
5978 				    i = mb_charlen(ae->ae_chop);
5979 				    for ( ; i > 0; --i)
5980 					mb_ptr_adv(p);
5981 				}
5982 				else
5983 #endif
5984 				    p += STRLEN(ae->ae_chop);
5985 			    }
5986 			    STRCAT(newword, p);
5987 			}
5988 			else
5989 			{
5990 			    /* suffix: chop/add at the end of the word */
5991 			    STRCPY(newword, word);
5992 			    if (ae->ae_chop != NULL)
5993 			    {
5994 				/* Remove chop string. */
5995 				p = newword + STRLEN(newword);
5996 				i = MB_CHARLEN(ae->ae_chop);
5997 				for ( ; i > 0; --i)
5998 				    mb_ptr_back(newword, p);
5999 				*p = NUL;
6000 			    }
6001 			    if (ae->ae_add != NULL)
6002 				STRCAT(newword, ae->ae_add);
6003 			}
6004 
6005 			/* Obey the "rare" flag of the affix. */
6006 			if (ae->ae_rare)
6007 			    use_flags = flags | WF_RARE;
6008 			else
6009 			    use_flags = flags;
6010 
6011 			/* Obey the "nocomp" flag of the affix: don't use the
6012 			 * compound flags. */
6013 			use_pfxlist = pfxlist;
6014 			if (ae->ae_nocomp && pfxlist != NULL)
6015 			{
6016 			    vim_strncpy(pfx_pfxlist, pfxlist, pfxlen);
6017 			    use_pfxlist = pfx_pfxlist;
6018 			}
6019 
6020 			/* When there are postponed prefixes... */
6021 			if (spin->si_prefroot != NULL
6022 				&& spin->si_prefroot->wn_sibling != NULL)
6023 			{
6024 			    /* ... add a flag to indicate an affix was used. */
6025 			    use_flags |= WF_HAS_AFF;
6026 
6027 			    /* ... don't use a prefix list if combining
6028 			     * affixes is not allowed.  But do use the
6029 			     * compound flags after them. */
6030 			    if ((!ah->ah_combine || comb) && pfxlist != NULL)
6031 				use_pfxlist += pfxlen;
6032 			}
6033 
6034 			/* Store the modified word. */
6035 			if (store_word(spin, newword, use_flags,
6036 				 spin->si_region, use_pfxlist, FALSE) == FAIL)
6037 			    retval = FAIL;
6038 
6039 			/* When added a suffix and combining is allowed also
6040 			 * try adding prefixes additionally. */
6041 			if (xht != NULL && ah->ah_combine)
6042 			    if (store_aff_word(spin, newword, afflist, affile,
6043 					  xht, NULL, TRUE,
6044 				      use_flags, use_pfxlist, pfxlen) == FAIL)
6045 				retval = FAIL;
6046 		    }
6047 		}
6048 	    }
6049 	}
6050     }
6051 
6052     return retval;
6053 }
6054 
6055 /*
6056  * Read a file with a list of words.
6057  */
6058     static int
6059 spell_read_wordfile(spin, fname)
6060     spellinfo_T	*spin;
6061     char_u	*fname;
6062 {
6063     FILE	*fd;
6064     long	lnum = 0;
6065     char_u	rline[MAXLINELEN];
6066     char_u	*line;
6067     char_u	*pc = NULL;
6068     char_u	*p;
6069     int		l;
6070     int		retval = OK;
6071     int		did_word = FALSE;
6072     int		non_ascii = 0;
6073     int		flags;
6074     int		regionmask;
6075 
6076     /*
6077      * Open the file.
6078      */
6079     fd = mch_fopen((char *)fname, "r");
6080     if (fd == NULL)
6081     {
6082 	EMSG2(_(e_notopen), fname);
6083 	return FAIL;
6084     }
6085 
6086     if (spin->si_verbose || p_verbose > 2)
6087     {
6088 	if (!spin->si_verbose)
6089 	    verbose_enter();
6090 	smsg((char_u *)_("Reading word file %s ..."), fname);
6091 	out_flush();
6092 	if (!spin->si_verbose)
6093 	    verbose_leave();
6094     }
6095 
6096     /*
6097      * Read all the lines in the file one by one.
6098      */
6099     while (!vim_fgets(rline, MAXLINELEN, fd) && !got_int)
6100     {
6101 	line_breakcheck();
6102 	++lnum;
6103 
6104 	/* Skip comment lines. */
6105 	if (*rline == '#')
6106 	    continue;
6107 
6108 	/* Remove CR, LF and white space from the end. */
6109 	l = STRLEN(rline);
6110 	while (l > 0 && rline[l - 1] <= ' ')
6111 	    --l;
6112 	if (l == 0)
6113 	    continue;	/* empty or blank line */
6114 	rline[l] = NUL;
6115 
6116 	/* Convert from "=encoding={encoding}" to 'encoding' when needed. */
6117 	vim_free(pc);
6118 #ifdef FEAT_MBYTE
6119 	if (spin->si_conv.vc_type != CONV_NONE)
6120 	{
6121 	    pc = string_convert(&spin->si_conv, rline, NULL);
6122 	    if (pc == NULL)
6123 	    {
6124 		smsg((char_u *)_("Conversion failure for word in %s line %d: %s"),
6125 							   fname, lnum, rline);
6126 		continue;
6127 	    }
6128 	    line = pc;
6129 	}
6130 	else
6131 #endif
6132 	{
6133 	    pc = NULL;
6134 	    line = rline;
6135 	}
6136 
6137 	if (*line == '/')
6138 	{
6139 	    ++line;
6140 	    if (STRNCMP(line, "encoding=", 9) == 0)
6141 	    {
6142 		if (spin->si_conv.vc_type != CONV_NONE)
6143 		    smsg((char_u *)_("Duplicate /encoding= line ignored in %s line %d: %s"),
6144 						       fname, lnum, line - 1);
6145 		else if (did_word)
6146 		    smsg((char_u *)_("/encoding= line after word ignored in %s line %d: %s"),
6147 						       fname, lnum, line - 1);
6148 		else
6149 		{
6150 #ifdef FEAT_MBYTE
6151 		    char_u	*enc;
6152 
6153 		    /* Setup for conversion to 'encoding'. */
6154 		    line += 10;
6155 		    enc = enc_canonize(line);
6156 		    if (enc != NULL && !spin->si_ascii
6157 			    && convert_setup(&spin->si_conv, enc,
6158 							       p_enc) == FAIL)
6159 			smsg((char_u *)_("Conversion in %s not supported: from %s to %s"),
6160 							  fname, line, p_enc);
6161 		    vim_free(enc);
6162 		    spin->si_conv.vc_fail = TRUE;
6163 #else
6164 		    smsg((char_u *)_("Conversion in %s not supported"), fname);
6165 #endif
6166 		}
6167 		continue;
6168 	    }
6169 
6170 	    if (STRNCMP(line, "regions=", 8) == 0)
6171 	    {
6172 		if (spin->si_region_count > 1)
6173 		    smsg((char_u *)_("Duplicate /regions= line ignored in %s line %d: %s"),
6174 						       fname, lnum, line);
6175 		else
6176 		{
6177 		    line += 8;
6178 		    if (STRLEN(line) > 16)
6179 			smsg((char_u *)_("Too many regions in %s line %d: %s"),
6180 						       fname, lnum, line);
6181 		    else
6182 		    {
6183 			spin->si_region_count = STRLEN(line) / 2;
6184 			STRCPY(spin->si_region_name, line);
6185 
6186 			/* Adjust the mask for a word valid in all regions. */
6187 			spin->si_region = (1 << spin->si_region_count) - 1;
6188 		    }
6189 		}
6190 		continue;
6191 	    }
6192 
6193 	    smsg((char_u *)_("/ line ignored in %s line %d: %s"),
6194 						       fname, lnum, line - 1);
6195 	    continue;
6196 	}
6197 
6198 	flags = 0;
6199 	regionmask = spin->si_region;
6200 
6201 	/* Check for flags and region after a slash. */
6202 	p = vim_strchr(line, '/');
6203 	if (p != NULL)
6204 	{
6205 	    *p++ = NUL;
6206 	    while (*p != NUL)
6207 	    {
6208 		if (*p == '=')		/* keep-case word */
6209 		    flags |= WF_KEEPCAP | WF_FIXCAP;
6210 		else if (*p == '!')	/* Bad, bad, wicked word. */
6211 		    flags |= WF_BANNED;
6212 		else if (*p == '?')	/* Rare word. */
6213 		    flags |= WF_RARE;
6214 		else if (VIM_ISDIGIT(*p)) /* region number(s) */
6215 		{
6216 		    if ((flags & WF_REGION) == 0)   /* first one */
6217 			regionmask = 0;
6218 		    flags |= WF_REGION;
6219 
6220 		    l = *p - '0';
6221 		    if (l > spin->si_region_count)
6222 		    {
6223 			smsg((char_u *)_("Invalid region nr in %s line %d: %s"),
6224 							  fname, lnum, p);
6225 			break;
6226 		    }
6227 		    regionmask |= 1 << (l - 1);
6228 		}
6229 		else
6230 		{
6231 		    smsg((char_u *)_("Unrecognized flags in %s line %d: %s"),
6232 							      fname, lnum, p);
6233 		    break;
6234 		}
6235 		++p;
6236 	    }
6237 	}
6238 
6239 	/* Skip non-ASCII words when "spin->si_ascii" is TRUE. */
6240 	if (spin->si_ascii && has_non_ascii(line))
6241 	{
6242 	    ++non_ascii;
6243 	    continue;
6244 	}
6245 
6246 	/* Normal word: store it. */
6247 	if (store_word(spin, line, flags, regionmask, NULL, FALSE) == FAIL)
6248 	{
6249 	    retval = FAIL;
6250 	    break;
6251 	}
6252 	did_word = TRUE;
6253     }
6254 
6255     vim_free(pc);
6256     fclose(fd);
6257 
6258     if (spin->si_ascii && non_ascii > 0 && (spin->si_verbose || p_verbose > 2))
6259     {
6260 	if (p_verbose > 2)
6261 	    verbose_enter();
6262 	smsg((char_u *)_("Ignored %d words with non-ASCII characters"),
6263 								   non_ascii);
6264 	if (p_verbose > 2)
6265 	    verbose_leave();
6266     }
6267     return retval;
6268 }
6269 
6270 /*
6271  * Get part of an sblock_T, "len" bytes long.
6272  * This avoids calling free() for every little struct we use (and keeping
6273  * track of them).
6274  * The memory is cleared to all zeros.
6275  * Returns NULL when out of memory.
6276  */
6277     static void *
6278 getroom(spin, len, align)
6279     spellinfo_T *spin;
6280     size_t	len;		/* length needed */
6281     int		align;		/* align for pointer */
6282 {
6283     char_u	*p;
6284     sblock_T	*bl = spin->si_blocks;
6285 
6286     if (align && bl != NULL)
6287 	/* Round size up for alignment.  On some systems structures need to be
6288 	 * aligned to the size of a pointer (e.g., SPARC). */
6289 	bl->sb_used = (bl->sb_used + sizeof(char *) - 1)
6290 						      & ~(sizeof(char *) - 1);
6291 
6292     if (bl == NULL || bl->sb_used + len > SBLOCKSIZE)
6293     {
6294 	/* Allocate a block of memory. This is not freed until much later. */
6295 	bl = (sblock_T *)alloc_clear((unsigned)(sizeof(sblock_T) + SBLOCKSIZE));
6296 	if (bl == NULL)
6297 	    return NULL;
6298 	bl->sb_next = spin->si_blocks;
6299 	spin->si_blocks = bl;
6300 	bl->sb_used = 0;
6301 	++spin->si_blocks_cnt;
6302     }
6303 
6304     p = bl->sb_data + bl->sb_used;
6305     bl->sb_used += len;
6306 
6307     return p;
6308 }
6309 
6310 /*
6311  * Make a copy of a string into memory allocated with getroom().
6312  */
6313     static char_u *
6314 getroom_save(spin, s)
6315     spellinfo_T	*spin;
6316     char_u	*s;
6317 {
6318     char_u	*sc;
6319 
6320     sc = (char_u *)getroom(spin, STRLEN(s) + 1, FALSE);
6321     if (sc != NULL)
6322 	STRCPY(sc, s);
6323     return sc;
6324 }
6325 
6326 
6327 /*
6328  * Free the list of allocated sblock_T.
6329  */
6330     static void
6331 free_blocks(bl)
6332     sblock_T	*bl;
6333 {
6334     sblock_T	*next;
6335 
6336     while (bl != NULL)
6337     {
6338 	next = bl->sb_next;
6339 	vim_free(bl);
6340 	bl = next;
6341     }
6342 }
6343 
6344 /*
6345  * Allocate the root of a word tree.
6346  */
6347     static wordnode_T *
6348 wordtree_alloc(spin)
6349     spellinfo_T *spin;
6350 {
6351     return (wordnode_T *)getroom(spin, sizeof(wordnode_T), TRUE);
6352 }
6353 
6354 /*
6355  * Store a word in the tree(s).
6356  * Always store it in the case-folded tree.  For a keep-case word this is
6357  * useful when the word can also be used with all caps (no WF_FIXCAP flag) and
6358  * used to find suggestions.
6359  * For a keep-case word also store it in the keep-case tree.
6360  * When "pfxlist" is not NULL store the word for each postponed prefix ID and
6361  * compound flag.
6362  */
6363     static int
6364 store_word(spin, word, flags, region, pfxlist, need_affix)
6365     spellinfo_T	*spin;
6366     char_u	*word;
6367     int		flags;		/* extra flags, WF_BANNED */
6368     int		region;		/* supported region(s) */
6369     char_u	*pfxlist;	/* list of prefix IDs or NULL */
6370     int		need_affix;	/* only store word with affix ID */
6371 {
6372     int		len = STRLEN(word);
6373     int		ct = captype(word, word + len);
6374     char_u	foldword[MAXWLEN];
6375     int		res = OK;
6376     char_u	*p;
6377 
6378     (void)spell_casefold(word, len, foldword, MAXWLEN);
6379     for (p = pfxlist; res == OK; ++p)
6380     {
6381 	if (!need_affix || (p != NULL && *p != NUL))
6382 	    res = tree_add_word(spin, foldword, spin->si_foldroot, ct | flags,
6383 						  region, p == NULL ? 0 : *p);
6384 	if (p == NULL || *p == NUL)
6385 	    break;
6386     }
6387     ++spin->si_foldwcount;
6388 
6389     if (res == OK && (ct == WF_KEEPCAP || (flags & WF_KEEPCAP)))
6390     {
6391 	for (p = pfxlist; res == OK; ++p)
6392 	{
6393 	    if (!need_affix || (p != NULL && *p != NUL))
6394 		res = tree_add_word(spin, word, spin->si_keeproot, flags,
6395 						  region, p == NULL ? 0 : *p);
6396 	    if (p == NULL || *p == NUL)
6397 		break;
6398 	}
6399 	++spin->si_keepwcount;
6400     }
6401     return res;
6402 }
6403 
6404 /*
6405  * Add word "word" to a word tree at "root".
6406  * When "flags" < 0 we are adding to the prefix tree where flags is used for
6407  * "rare" and "region" is the condition nr.
6408  * Returns FAIL when out of memory.
6409  */
6410     static int
6411 tree_add_word(spin, word, root, flags, region, affixID)
6412     spellinfo_T	*spin;
6413     char_u	*word;
6414     wordnode_T	*root;
6415     int		flags;
6416     int		region;
6417     int		affixID;
6418 {
6419     wordnode_T	*node = root;
6420     wordnode_T	*np;
6421     wordnode_T	*copyp, **copyprev;
6422     wordnode_T	**prev = NULL;
6423     int		i;
6424 
6425     /* Add each byte of the word to the tree, including the NUL at the end. */
6426     for (i = 0; ; ++i)
6427     {
6428 	/* When there is more than one reference to this node we need to make
6429 	 * a copy, so that we can modify it.  Copy the whole list of siblings
6430 	 * (we don't optimize for a partly shared list of siblings). */
6431 	if (node != NULL && node->wn_refs > 1)
6432 	{
6433 	    --node->wn_refs;
6434 	    copyprev = prev;
6435 	    for (copyp = node; copyp != NULL; copyp = copyp->wn_sibling)
6436 	    {
6437 		/* Allocate a new node and copy the info. */
6438 		np = get_wordnode(spin);
6439 		if (np == NULL)
6440 		    return FAIL;
6441 		np->wn_child = copyp->wn_child;
6442 		if (np->wn_child != NULL)
6443 		    ++np->wn_child->wn_refs;	/* child gets extra ref */
6444 		np->wn_byte = copyp->wn_byte;
6445 		if (np->wn_byte == NUL)
6446 		{
6447 		    np->wn_flags = copyp->wn_flags;
6448 		    np->wn_region = copyp->wn_region;
6449 		    np->wn_affixID = copyp->wn_affixID;
6450 		}
6451 
6452 		/* Link the new node in the list, there will be one ref. */
6453 		np->wn_refs = 1;
6454 		*copyprev = np;
6455 		copyprev = &np->wn_sibling;
6456 
6457 		/* Let "node" point to the head of the copied list. */
6458 		if (copyp == node)
6459 		    node = np;
6460 	    }
6461 	}
6462 
6463 	/* Look for the sibling that has the same character.  They are sorted
6464 	 * on byte value, thus stop searching when a sibling is found with a
6465 	 * higher byte value.  For zero bytes (end of word) the sorting is
6466 	 * done on flags and then on affixID. */
6467 	while (node != NULL
6468 		&& (node->wn_byte < word[i]
6469 		    || (node->wn_byte == NUL
6470 			&& (flags < 0
6471 			    ? node->wn_affixID < affixID
6472 			    : node->wn_flags < (flags & WN_MASK)
6473 				|| (node->wn_flags == (flags & WN_MASK)
6474 				    && node->wn_affixID < affixID)))))
6475 	{
6476 	    prev = &node->wn_sibling;
6477 	    node = *prev;
6478 	}
6479 	if (node == NULL
6480 		|| node->wn_byte != word[i]
6481 		|| (word[i] == NUL
6482 		    && (flags < 0
6483 			|| node->wn_flags != (flags & WN_MASK)
6484 			|| node->wn_affixID != affixID)))
6485 	{
6486 	    /* Allocate a new node. */
6487 	    np = get_wordnode(spin);
6488 	    if (np == NULL)
6489 		return FAIL;
6490 	    np->wn_byte = word[i];
6491 
6492 	    /* If "node" is NULL this is a new child or the end of the sibling
6493 	     * list: ref count is one.  Otherwise use ref count of sibling and
6494 	     * make ref count of sibling one (matters when inserting in front
6495 	     * of the list of siblings). */
6496 	    if (node == NULL)
6497 		np->wn_refs = 1;
6498 	    else
6499 	    {
6500 		np->wn_refs = node->wn_refs;
6501 		node->wn_refs = 1;
6502 	    }
6503 	    *prev = np;
6504 	    np->wn_sibling = node;
6505 	    node = np;
6506 	}
6507 
6508 	if (word[i] == NUL)
6509 	{
6510 	    node->wn_flags = flags;
6511 	    node->wn_region |= region;
6512 	    node->wn_affixID = affixID;
6513 	    break;
6514 	}
6515 	prev = &node->wn_child;
6516 	node = *prev;
6517     }
6518 #ifdef SPELL_PRINTTREE
6519     smsg("Added \"%s\"", word);
6520     spell_print_tree(root->wn_sibling);
6521 #endif
6522 
6523     /* count nr of words added since last message */
6524     ++spin->si_msg_count;
6525 
6526     if (spin->si_compress_cnt > 1)
6527     {
6528 	if (--spin->si_compress_cnt == 1)
6529 	    /* Did enough words to lower the block count limit. */
6530 	    spin->si_blocks_cnt += compress_inc;
6531     }
6532 
6533     /*
6534      * When we have allocated lots of memory we need to compress the word tree
6535      * to free up some room.  But compression is slow, and we might actually
6536      * need that room, thus only compress in the following situations:
6537      * 1. When not compressed before (si_compress_cnt == 0): when using
6538      *    "compress_start" blocks.
6539      * 2. When compressed before and used "compress_inc" blocks before
6540      *    adding "compress_added" words (si_compress_cnt > 1).
6541      * 3. When compressed before, added "compress_added" words
6542      *    (si_compress_cnt == 1) and the number of free nodes drops below the
6543      *    maximum word length.
6544      */
6545 #ifndef SPELL_PRINTTREE
6546     if (spin->si_compress_cnt == 1
6547 	    ? spin->si_free_count < MAXWLEN
6548 	    : spin->si_blocks_cnt >= compress_start)
6549 #endif
6550     {
6551 	/* Decrement the block counter.  The effect is that we compress again
6552 	 * when the freed up room has been used and another "compress_inc"
6553 	 * blocks have been allocated.  Unless "compress_added" words have
6554 	 * been added, then the limit is put back again. */
6555 	spin->si_blocks_cnt -= compress_inc;
6556 	spin->si_compress_cnt = compress_added;
6557 
6558 	if (spin->si_verbose)
6559 	{
6560 	    msg_start();
6561 	    msg_puts((char_u *)_(msg_compressing));
6562 	    msg_clr_eos();
6563 	    msg_didout = FALSE;
6564 	    msg_col = 0;
6565 	    out_flush();
6566 	}
6567 
6568 	/* Compress both trees.  Either they both have many nodes, which makes
6569 	 * compression useful, or one of them is small, which means
6570 	 * compression goes fast. */
6571 	wordtree_compress(spin, spin->si_foldroot);
6572 	wordtree_compress(spin, spin->si_keeproot);
6573     }
6574 
6575     return OK;
6576 }
6577 
6578 /*
6579  * Check the 'mkspellmem' option.  Return FAIL if it's wrong.
6580  * Sets "sps_flags".
6581  */
6582     int
6583 spell_check_msm()
6584 {
6585     char_u	*p = p_msm;
6586     long	start = 0;
6587     long	inc = 0;
6588     long	added = 0;
6589 
6590     if (!VIM_ISDIGIT(*p))
6591 	return FAIL;
6592     /* block count = (value * 1024) / SBLOCKSIZE (but avoid overflow)*/
6593     start = (getdigits(&p) * 10) / (SBLOCKSIZE / 102);
6594     if (*p != ',')
6595 	return FAIL;
6596     ++p;
6597     if (!VIM_ISDIGIT(*p))
6598 	return FAIL;
6599     inc = (getdigits(&p) * 102) / (SBLOCKSIZE / 10);
6600     if (*p != ',')
6601 	return FAIL;
6602     ++p;
6603     if (!VIM_ISDIGIT(*p))
6604 	return FAIL;
6605     added = getdigits(&p) * 1024;
6606     if (*p != NUL)
6607 	return FAIL;
6608 
6609     if (start == 0 || inc == 0 || added == 0 || inc > start)
6610 	return FAIL;
6611 
6612     compress_start = start;
6613     compress_inc = inc;
6614     compress_added = added;
6615     return OK;
6616 }
6617 
6618 
6619 /*
6620  * Get a wordnode_T, either from the list of previously freed nodes or
6621  * allocate a new one.
6622  */
6623     static wordnode_T *
6624 get_wordnode(spin)
6625     spellinfo_T	    *spin;
6626 {
6627     wordnode_T *n;
6628 
6629     if (spin->si_first_free == NULL)
6630 	n = (wordnode_T *)getroom(spin, sizeof(wordnode_T), TRUE);
6631     else
6632     {
6633 	n = spin->si_first_free;
6634 	spin->si_first_free = n->wn_child;
6635 	vim_memset(n, 0, sizeof(wordnode_T));
6636 	--spin->si_free_count;
6637     }
6638 #ifdef SPELL_PRINTTREE
6639     n->wn_nr = ++spin->si_wordnode_nr;
6640 #endif
6641     return n;
6642 }
6643 
6644 /*
6645  * Decrement the reference count on a node (which is the head of a list of
6646  * siblings).  If the reference count becomes zero free the node and its
6647  * siblings.
6648  */
6649     static void
6650 deref_wordnode(spin, node)
6651     spellinfo_T *spin;
6652     wordnode_T  *node;
6653 {
6654     wordnode_T *np;
6655 
6656     if (--node->wn_refs == 0)
6657 	for (np = node; np != NULL; np = np->wn_sibling)
6658 	{
6659 	    if (np->wn_child != NULL)
6660 		deref_wordnode(spin, np->wn_child);
6661 	    free_wordnode(spin, np);
6662 	}
6663 }
6664 
6665 /*
6666  * Free a wordnode_T for re-use later.
6667  * Only the "wn_child" field becomes invalid.
6668  */
6669     static void
6670 free_wordnode(spin, n)
6671     spellinfo_T	*spin;
6672     wordnode_T  *n;
6673 {
6674     n->wn_child = spin->si_first_free;
6675     spin->si_first_free = n;
6676     ++spin->si_free_count;
6677 }
6678 
6679 /*
6680  * Compress a tree: find tails that are identical and can be shared.
6681  */
6682     static void
6683 wordtree_compress(spin, root)
6684     spellinfo_T	    *spin;
6685     wordnode_T	    *root;
6686 {
6687     hashtab_T	    ht;
6688     int		    n;
6689     int		    tot = 0;
6690     int		    perc;
6691 
6692     /* Skip the root itself, it's not actually used.  The first sibling is the
6693      * start of the tree. */
6694     if (root->wn_sibling != NULL)
6695     {
6696 	hash_init(&ht);
6697 	n = node_compress(spin, root->wn_sibling, &ht, &tot);
6698 
6699 #ifndef SPELL_PRINTTREE
6700 	if (spin->si_verbose || p_verbose > 2)
6701 #endif
6702 	{
6703 	    if (!spin->si_verbose)
6704 		verbose_enter();
6705 	    if (tot > 1000000)
6706 		perc = (tot - n) / (tot / 100);
6707 	    else if (tot == 0)
6708 		perc = 0;
6709 	    else
6710 		perc = (tot - n) * 100 / tot;
6711 	    smsg((char_u *)_("Compressed %d of %d nodes; %d%% remaining"),
6712 								n, tot, perc);
6713 	    if (p_verbose > 2)
6714 		verbose_leave();
6715 	}
6716 #ifdef SPELL_PRINTTREE
6717 	spell_print_tree(root->wn_sibling);
6718 #endif
6719 	hash_clear(&ht);
6720     }
6721 }
6722 
6723 /*
6724  * Compress a node, its siblings and its children, depth first.
6725  * Returns the number of compressed nodes.
6726  */
6727     static int
6728 node_compress(spin, node, ht, tot)
6729     spellinfo_T	*spin;
6730     wordnode_T	*node;
6731     hashtab_T	*ht;
6732     int		*tot;	    /* total count of nodes before compressing,
6733 			       incremented while going through the tree */
6734 {
6735     wordnode_T	*np;
6736     wordnode_T	*tp;
6737     wordnode_T	*child;
6738     hash_T	hash;
6739     hashitem_T	*hi;
6740     int		len = 0;
6741     unsigned	nr, n;
6742     int		compressed = 0;
6743 
6744     /*
6745      * Go through the list of siblings.  Compress each child and then try
6746      * finding an identical child to replace it.
6747      * Note that with "child" we mean not just the node that is pointed to,
6748      * but the whole list of siblings, of which the node is the first.
6749      */
6750     for (np = node; np != NULL && !got_int; np = np->wn_sibling)
6751     {
6752 	++len;
6753 	if ((child = np->wn_child) != NULL)
6754 	{
6755 	    /* Compress the child.  This fills hashkey. */
6756 	    compressed += node_compress(spin, child, ht, tot);
6757 
6758 	    /* Try to find an identical child. */
6759 	    hash = hash_hash(child->wn_u1.hashkey);
6760 	    hi = hash_lookup(ht, child->wn_u1.hashkey, hash);
6761 	    tp = NULL;
6762 	    if (!HASHITEM_EMPTY(hi))
6763 	    {
6764 		/* There are children with an identical hash value.  Now check
6765 		 * if there is one that is really identical. */
6766 		for (tp = HI2WN(hi); tp != NULL; tp = tp->wn_u2.next)
6767 		    if (node_equal(child, tp))
6768 		    {
6769 			/* Found one!  Now use that child in place of the
6770 			 * current one.  This means the current child and all
6771 			 * its siblings is unlinked from the tree. */
6772 			++tp->wn_refs;
6773 			deref_wordnode(spin, child);
6774 			np->wn_child = tp;
6775 			++compressed;
6776 			break;
6777 		    }
6778 		if (tp == NULL)
6779 		{
6780 		    /* No other child with this hash value equals the child of
6781 		     * the node, add it to the linked list after the first
6782 		     * item. */
6783 		    tp = HI2WN(hi);
6784 		    child->wn_u2.next = tp->wn_u2.next;
6785 		    tp->wn_u2.next = child;
6786 		}
6787 	    }
6788 	    else
6789 		/* No other child has this hash value, add it to the
6790 		 * hashtable. */
6791 		hash_add_item(ht, hi, child->wn_u1.hashkey, hash);
6792 	}
6793     }
6794     *tot += len;
6795 
6796     /*
6797      * Make a hash key for the node and its siblings, so that we can quickly
6798      * find a lookalike node.  This must be done after compressing the sibling
6799      * list, otherwise the hash key would become invalid by the compression.
6800      */
6801     node->wn_u1.hashkey[0] = len;
6802     nr = 0;
6803     for (np = node; np != NULL; np = np->wn_sibling)
6804     {
6805 	if (np->wn_byte == NUL)
6806 	    /* end node: use wn_flags, wn_region and wn_affixID */
6807 	    n = np->wn_flags + (np->wn_region << 8) + (np->wn_affixID << 16);
6808 	else
6809 	    /* byte node: use the byte value and the child pointer */
6810 	    n = np->wn_byte + ((long_u)np->wn_child << 8);
6811 	nr = nr * 101 + n;
6812     }
6813 
6814     /* Avoid NUL bytes, it terminates the hash key. */
6815     n = nr & 0xff;
6816     node->wn_u1.hashkey[1] = n == 0 ? 1 : n;
6817     n = (nr >> 8) & 0xff;
6818     node->wn_u1.hashkey[2] = n == 0 ? 1 : n;
6819     n = (nr >> 16) & 0xff;
6820     node->wn_u1.hashkey[3] = n == 0 ? 1 : n;
6821     n = (nr >> 24) & 0xff;
6822     node->wn_u1.hashkey[4] = n == 0 ? 1 : n;
6823     node->wn_u1.hashkey[5] = NUL;
6824 
6825     /* Check for CTRL-C pressed now and then. */
6826     fast_breakcheck();
6827 
6828     return compressed;
6829 }
6830 
6831 /*
6832  * Return TRUE when two nodes have identical siblings and children.
6833  */
6834     static int
6835 node_equal(n1, n2)
6836     wordnode_T	*n1;
6837     wordnode_T	*n2;
6838 {
6839     wordnode_T	*p1;
6840     wordnode_T	*p2;
6841 
6842     for (p1 = n1, p2 = n2; p1 != NULL && p2 != NULL;
6843 				     p1 = p1->wn_sibling, p2 = p2->wn_sibling)
6844 	if (p1->wn_byte != p2->wn_byte
6845 		|| (p1->wn_byte == NUL
6846 		    ? (p1->wn_flags != p2->wn_flags
6847 			|| p1->wn_region != p2->wn_region
6848 			|| p1->wn_affixID != p2->wn_affixID)
6849 		    : (p1->wn_child != p2->wn_child)))
6850 	    break;
6851 
6852     return p1 == NULL && p2 == NULL;
6853 }
6854 
6855 /*
6856  * Write a number to file "fd", MSB first, in "len" bytes.
6857  */
6858     void
6859 put_bytes(fd, nr, len)
6860     FILE    *fd;
6861     long_u  nr;
6862     int	    len;
6863 {
6864     int	    i;
6865 
6866     for (i = len - 1; i >= 0; --i)
6867 	putc((int)(nr >> (i * 8)), fd);
6868 }
6869 
6870 static int
6871 #ifdef __BORLANDC__
6872 _RTLENTRYF
6873 #endif
6874 rep_compare __ARGS((const void *s1, const void *s2));
6875 
6876 /*
6877  * Function given to qsort() to sort the REP items on "from" string.
6878  */
6879     static int
6880 #ifdef __BORLANDC__
6881 _RTLENTRYF
6882 #endif
6883 rep_compare(s1, s2)
6884     const void	*s1;
6885     const void	*s2;
6886 {
6887     fromto_T	*p1 = (fromto_T *)s1;
6888     fromto_T	*p2 = (fromto_T *)s2;
6889 
6890     return STRCMP(p1->ft_from, p2->ft_from);
6891 }
6892 
6893 /*
6894  * Write the Vim .spl file "fname".
6895  * Return FAIL or OK;
6896  */
6897     static int
6898 write_vim_spell(spin, fname)
6899     spellinfo_T	*spin;
6900     char_u	*fname;
6901 {
6902     FILE	*fd;
6903     int		regionmask;
6904     int		round;
6905     wordnode_T	*tree;
6906     int		nodecount;
6907     int		i;
6908     int		l;
6909     garray_T	*gap;
6910     fromto_T	*ftp;
6911     char_u	*p;
6912     int		rr;
6913     int		retval = OK;
6914 
6915     fd = mch_fopen((char *)fname, "w");
6916     if (fd == NULL)
6917     {
6918 	EMSG2(_(e_notopen), fname);
6919 	return FAIL;
6920     }
6921 
6922     /* <HEADER>: <fileID> <versionnr> */
6923 							    /* <fileID> */
6924     if (fwrite(VIMSPELLMAGIC, VIMSPELLMAGICL, (size_t)1, fd) != 1)
6925     {
6926 	EMSG(_(e_write));
6927 	retval = FAIL;
6928     }
6929     putc(VIMSPELLVERSION, fd);				    /* <versionnr> */
6930 
6931     /*
6932      * <SECTIONS>: <section> ... <sectionend>
6933      */
6934 
6935     /* SN_REGION: <regionname> ...
6936      * Write the region names only if there is more than one. */
6937     if (spin->si_region_count > 1)
6938     {
6939 	putc(SN_REGION, fd);				/* <sectionID> */
6940 	putc(SNF_REQUIRED, fd);				/* <sectionflags> */
6941 	l = spin->si_region_count * 2;
6942 	put_bytes(fd, (long_u)l, 4);			/* <sectionlen> */
6943 	fwrite(spin->si_region_name, (size_t)l, (size_t)1, fd);
6944 							/* <regionname> ... */
6945 	regionmask = (1 << spin->si_region_count) - 1;
6946     }
6947     else
6948 	regionmask = 0;
6949 
6950     /* SN_CHARFLAGS: <charflagslen> <charflags> <folcharslen> <folchars>
6951      *
6952      * The table with character flags and the table for case folding.
6953      * This makes sure the same characters are recognized as word characters
6954      * when generating an when using a spell file.
6955      * Skip this for ASCII, the table may conflict with the one used for
6956      * 'encoding'.
6957      * Also skip this for an .add.spl file, the main spell file must contain
6958      * the table (avoids that it conflicts).  File is shorter too.
6959      */
6960     if (!spin->si_ascii && !spin->si_add)
6961     {
6962 	char_u	folchars[128 * 8];
6963 	int	flags;
6964 
6965 	putc(SN_CHARFLAGS, fd);				/* <sectionID> */
6966 	putc(SNF_REQUIRED, fd);				/* <sectionflags> */
6967 
6968 	/* Form the <folchars> string first, we need to know its length. */
6969 	l = 0;
6970 	for (i = 128; i < 256; ++i)
6971 	{
6972 #ifdef FEAT_MBYTE
6973 	    if (has_mbyte)
6974 		l += mb_char2bytes(spelltab.st_fold[i], folchars + l);
6975 	    else
6976 #endif
6977 		folchars[l++] = spelltab.st_fold[i];
6978 	}
6979 	put_bytes(fd, (long_u)(1 + 128 + 2 + l), 4);	/* <sectionlen> */
6980 
6981 	fputc(128, fd);					/* <charflagslen> */
6982 	for (i = 128; i < 256; ++i)
6983 	{
6984 	    flags = 0;
6985 	    if (spelltab.st_isw[i])
6986 		flags |= CF_WORD;
6987 	    if (spelltab.st_isu[i])
6988 		flags |= CF_UPPER;
6989 	    fputc(flags, fd);				/* <charflags> */
6990 	}
6991 
6992 	put_bytes(fd, (long_u)l, 2);			/* <folcharslen> */
6993 	fwrite(folchars, (size_t)l, (size_t)1, fd);	/* <folchars> */
6994     }
6995 
6996     /* SN_MIDWORD: <midword> */
6997     if (spin->si_midword != NULL)
6998     {
6999 	putc(SN_MIDWORD, fd);				/* <sectionID> */
7000 	putc(SNF_REQUIRED, fd);				/* <sectionflags> */
7001 
7002 	i = STRLEN(spin->si_midword);
7003 	put_bytes(fd, (long_u)i, 4);			/* <sectionlen> */
7004 	fwrite(spin->si_midword, (size_t)i, (size_t)1, fd); /* <midword> */
7005     }
7006 
7007     /* SN_PREFCOND: <prefcondcnt> <prefcond> ... */
7008     if (spin->si_prefcond.ga_len > 0)
7009     {
7010 	putc(SN_PREFCOND, fd);				/* <sectionID> */
7011 	putc(SNF_REQUIRED, fd);				/* <sectionflags> */
7012 
7013 	l = write_spell_prefcond(NULL, &spin->si_prefcond);
7014 	put_bytes(fd, (long_u)l, 4);			/* <sectionlen> */
7015 
7016 	write_spell_prefcond(fd, &spin->si_prefcond);
7017     }
7018 
7019     /* SN_REP: <repcount> <rep> ...
7020      * SN_SAL: <salflags> <salcount> <sal> ... */
7021 
7022     /* Sort the REP items. */
7023     qsort(spin->si_rep.ga_data, (size_t)spin->si_rep.ga_len,
7024 					       sizeof(fromto_T), rep_compare);
7025 
7026     /* round 1: SN_REP section
7027      * round 2: SN_SAL section (unless SN_SOFO is used) */
7028     for (round = 1; round <= 2; ++round)
7029     {
7030 	if (round == 1)
7031 	{
7032 	    gap = &spin->si_rep;
7033 	    putc(SN_REP, fd);				/* <sectionID> */
7034 	}
7035 	else
7036 	{
7037 	    if (spin->si_sofofr != NULL && spin->si_sofoto != NULL)
7038 		/* using SN_SOFO section instead of SN_SAL */
7039 		break;
7040 	    gap = &spin->si_sal;
7041 	    putc(SN_SAL, fd);				/* <sectionID> */
7042 	}
7043 
7044 	/* This is for making suggestions, section is not required. */
7045 	putc(0, fd);					/* <sectionflags> */
7046 
7047 	/* Compute the length of what follows. */
7048 	l = 2;	    /* count <repcount> or <salcount> */
7049 	for (i = 0; i < gap->ga_len; ++i)
7050 	{
7051 	    ftp = &((fromto_T *)gap->ga_data)[i];
7052 	    l += 1 + STRLEN(ftp->ft_from);  /* count <*fromlen> and <*from> */
7053 	    l += 1 + STRLEN(ftp->ft_to);    /* count <*tolen> and <*to> */
7054 	}
7055 	if (round == 2)
7056 	    ++l;	/* count <salflags> */
7057 	put_bytes(fd, (long_u)l, 4);			/* <sectionlen> */
7058 
7059 	if (round == 2)
7060 	{
7061 	    i = 0;
7062 	    if (spin->si_followup)
7063 		i |= SAL_F0LLOWUP;
7064 	    if (spin->si_collapse)
7065 		i |= SAL_COLLAPSE;
7066 	    if (spin->si_rem_accents)
7067 		i |= SAL_REM_ACCENTS;
7068 	    putc(i, fd);			/* <salflags> */
7069 	}
7070 
7071 	put_bytes(fd, (long_u)gap->ga_len, 2);	/* <repcount> or <salcount> */
7072 	for (i = 0; i < gap->ga_len; ++i)
7073 	{
7074 	    /* <rep> : <repfromlen> <repfrom> <reptolen> <repto> */
7075 	    /* <sal> : <salfromlen> <salfrom> <saltolen> <salto> */
7076 	    ftp = &((fromto_T *)gap->ga_data)[i];
7077 	    for (rr = 1; rr <= 2; ++rr)
7078 	    {
7079 		p = rr == 1 ? ftp->ft_from : ftp->ft_to;
7080 		l = STRLEN(p);
7081 		putc(l, fd);
7082 		fwrite(p, l, (size_t)1, fd);
7083 	    }
7084 	}
7085 
7086     }
7087 
7088     /* SN_SOFO: <sofofromlen> <sofofrom> <sofotolen> <sofoto>
7089      * This is for making suggestions, section is not required. */
7090     if (spin->si_sofofr != NULL && spin->si_sofoto != NULL)
7091     {
7092 	putc(SN_SOFO, fd);				/* <sectionID> */
7093 	putc(0, fd);					/* <sectionflags> */
7094 
7095 	l = STRLEN(spin->si_sofofr);
7096 	put_bytes(fd, (long_u)(l + STRLEN(spin->si_sofoto) + 4), 4);
7097 							/* <sectionlen> */
7098 
7099 	put_bytes(fd, (long_u)l, 2);			/* <sofofromlen> */
7100 	fwrite(spin->si_sofofr, l, (size_t)1, fd);	/* <sofofrom> */
7101 
7102 	l = STRLEN(spin->si_sofoto);
7103 	put_bytes(fd, (long_u)l, 2);			/* <sofotolen> */
7104 	fwrite(spin->si_sofoto, l, (size_t)1, fd);	/* <sofoto> */
7105     }
7106 
7107     /* SN_MAP: <mapstr>
7108      * This is for making suggestions, section is not required. */
7109     if (spin->si_map.ga_len > 0)
7110     {
7111 	putc(SN_MAP, fd);				/* <sectionID> */
7112 	putc(0, fd);					/* <sectionflags> */
7113 	l = spin->si_map.ga_len;
7114 	put_bytes(fd, (long_u)l, 4);			/* <sectionlen> */
7115 	fwrite(spin->si_map.ga_data, (size_t)l, (size_t)1, fd);
7116 							/* <mapstr> */
7117     }
7118 
7119     /* SN_COMPOUND: compound info.
7120      * We don't mark it required, when not supported all compound words will
7121      * be bad words. */
7122     if (spin->si_compflags != NULL)
7123     {
7124 	putc(SN_COMPOUND, fd);				/* <sectionID> */
7125 	putc(0, fd);					/* <sectionflags> */
7126 
7127 	l = STRLEN(spin->si_compflags);
7128 	put_bytes(fd, (long_u)(l + 3), 4);		/* <sectionlen> */
7129 	putc(spin->si_compmax, fd);			/* <compmax> */
7130 	putc(spin->si_compminlen, fd);			/* <compminlen> */
7131 	putc(spin->si_compsylmax, fd);			/* <compsylmax> */
7132 							/* <compflags> */
7133 	fwrite(spin->si_compflags, (size_t)l, (size_t)1, fd);
7134     }
7135 
7136     /* SN_NOBREAK: NOBREAK flag */
7137     if (spin->si_nobreak)
7138     {
7139 	putc(SN_NOBREAK, fd);				/* <sectionID> */
7140 	putc(0, fd);					/* <sectionflags> */
7141 
7142 	/* It's empty, the precense of the section flags the feature. */
7143 	put_bytes(fd, (long_u)0, 4);			/* <sectionlen> */
7144     }
7145 
7146     /* SN_SYLLABLE: syllable info.
7147      * We don't mark it required, when not supported syllables will not be
7148      * counted. */
7149     if (spin->si_syllable != NULL)
7150     {
7151 	putc(SN_SYLLABLE, fd);				/* <sectionID> */
7152 	putc(0, fd);					/* <sectionflags> */
7153 
7154 	l = STRLEN(spin->si_syllable);
7155 	put_bytes(fd, (long_u)l, 4);			/* <sectionlen> */
7156 	fwrite(spin->si_syllable, (size_t)l, (size_t)1, fd); /* <syllable> */
7157     }
7158 
7159     /* end of <SECTIONS> */
7160     putc(SN_END, fd);					/* <sectionend> */
7161 
7162 
7163     /*
7164      * <LWORDTREE>  <KWORDTREE>  <PREFIXTREE>
7165      */
7166     spin->si_memtot = 0;
7167     for (round = 1; round <= 3; ++round)
7168     {
7169 	if (round == 1)
7170 	    tree = spin->si_foldroot->wn_sibling;
7171 	else if (round == 2)
7172 	    tree = spin->si_keeproot->wn_sibling;
7173 	else
7174 	    tree = spin->si_prefroot->wn_sibling;
7175 
7176 	/* Clear the index and wnode fields in the tree. */
7177 	clear_node(tree);
7178 
7179 	/* Count the number of nodes.  Needed to be able to allocate the
7180 	 * memory when reading the nodes.  Also fills in index for shared
7181 	 * nodes. */
7182 	nodecount = put_node(NULL, tree, 0, regionmask, round == 3);
7183 
7184 	/* number of nodes in 4 bytes */
7185 	put_bytes(fd, (long_u)nodecount, 4);	/* <nodecount> */
7186 	spin->si_memtot += nodecount + nodecount * sizeof(int);
7187 
7188 	/* Write the nodes. */
7189 	(void)put_node(fd, tree, 0, regionmask, round == 3);
7190     }
7191 
7192     /* Write another byte to check for errors. */
7193     if (putc(0, fd) == EOF)
7194 	retval = FAIL;
7195 
7196     if (fclose(fd) == EOF)
7197 	retval = FAIL;
7198 
7199     return retval;
7200 }
7201 
7202 /*
7203  * Clear the index and wnode fields of "node", it siblings and its
7204  * children.  This is needed because they are a union with other items to save
7205  * space.
7206  */
7207     static void
7208 clear_node(node)
7209     wordnode_T	*node;
7210 {
7211     wordnode_T	*np;
7212 
7213     if (node != NULL)
7214 	for (np = node; np != NULL; np = np->wn_sibling)
7215 	{
7216 	    np->wn_u1.index = 0;
7217 	    np->wn_u2.wnode = NULL;
7218 
7219 	    if (np->wn_byte != NUL)
7220 		clear_node(np->wn_child);
7221 	}
7222 }
7223 
7224 
7225 /*
7226  * Dump a word tree at node "node".
7227  *
7228  * This first writes the list of possible bytes (siblings).  Then for each
7229  * byte recursively write the children.
7230  *
7231  * NOTE: The code here must match the code in read_tree(), since assumptions
7232  * are made about the indexes (so that we don't have to write them in the
7233  * file).
7234  *
7235  * Returns the number of nodes used.
7236  */
7237     static int
7238 put_node(fd, node, index, regionmask, prefixtree)
7239     FILE	*fd;		/* NULL when only counting */
7240     wordnode_T	*node;
7241     int		index;
7242     int		regionmask;
7243     int		prefixtree;	/* TRUE for PREFIXTREE */
7244 {
7245     int		newindex = index;
7246     int		siblingcount = 0;
7247     wordnode_T	*np;
7248     int		flags;
7249 
7250     /* If "node" is zero the tree is empty. */
7251     if (node == NULL)
7252 	return 0;
7253 
7254     /* Store the index where this node is written. */
7255     node->wn_u1.index = index;
7256 
7257     /* Count the number of siblings. */
7258     for (np = node; np != NULL; np = np->wn_sibling)
7259 	++siblingcount;
7260 
7261     /* Write the sibling count. */
7262     if (fd != NULL)
7263 	putc(siblingcount, fd);				/* <siblingcount> */
7264 
7265     /* Write each sibling byte and optionally extra info. */
7266     for (np = node; np != NULL; np = np->wn_sibling)
7267     {
7268 	if (np->wn_byte == 0)
7269 	{
7270 	    if (fd != NULL)
7271 	    {
7272 		/* For a NUL byte (end of word) write the flags etc. */
7273 		if (prefixtree)
7274 		{
7275 		    /* In PREFIXTREE write the required affixID and the
7276 		     * associated condition nr (stored in wn_region).  The
7277 		     * byte value is misused to store the "rare" and "not
7278 		     * combining" flags */
7279 		    if (np->wn_flags == (short_u)PFX_FLAGS)
7280 			putc(BY_NOFLAGS, fd);		/* <byte> */
7281 		    else
7282 		    {
7283 			putc(BY_FLAGS, fd);		/* <byte> */
7284 			putc(np->wn_flags, fd);		/* <pflags> */
7285 		    }
7286 		    putc(np->wn_affixID, fd);		/* <affixID> */
7287 		    put_bytes(fd, (long_u)np->wn_region, 2); /* <prefcondnr> */
7288 		}
7289 		else
7290 		{
7291 		    /* For word trees we write the flag/region items. */
7292 		    flags = np->wn_flags;
7293 		    if (regionmask != 0 && np->wn_region != regionmask)
7294 			flags |= WF_REGION;
7295 		    if (np->wn_affixID != 0)
7296 			flags |= WF_AFX;
7297 		    if (flags == 0)
7298 		    {
7299 			/* word without flags or region */
7300 			putc(BY_NOFLAGS, fd);			/* <byte> */
7301 		    }
7302 		    else
7303 		    {
7304 			if (np->wn_flags >= 0x100)
7305 			{
7306 			    putc(BY_FLAGS2, fd);		/* <byte> */
7307 			    putc(flags, fd);			/* <flags> */
7308 			    putc((unsigned)flags >> 8, fd);	/* <flags2> */
7309 			}
7310 			else
7311 			{
7312 			    putc(BY_FLAGS, fd);			/* <byte> */
7313 			    putc(flags, fd);			/* <flags> */
7314 			}
7315 			if (flags & WF_REGION)
7316 			    putc(np->wn_region, fd);		/* <region> */
7317 			if (flags & WF_AFX)
7318 			    putc(np->wn_affixID, fd);		/* <affixID> */
7319 		    }
7320 		}
7321 	    }
7322 	}
7323 	else
7324 	{
7325 	    if (np->wn_child->wn_u1.index != 0
7326 					 && np->wn_child->wn_u2.wnode != node)
7327 	    {
7328 		/* The child is written elsewhere, write the reference. */
7329 		if (fd != NULL)
7330 		{
7331 		    putc(BY_INDEX, fd);			/* <byte> */
7332 							/* <nodeidx> */
7333 		    put_bytes(fd, (long_u)np->wn_child->wn_u1.index, 3);
7334 		}
7335 	    }
7336 	    else if (np->wn_child->wn_u2.wnode == NULL)
7337 		/* We will write the child below and give it an index. */
7338 		np->wn_child->wn_u2.wnode = node;
7339 
7340 	    if (fd != NULL)
7341 		if (putc(np->wn_byte, fd) == EOF) /* <byte> or <xbyte> */
7342 		{
7343 		    EMSG(_(e_write));
7344 		    return 0;
7345 		}
7346 	}
7347     }
7348 
7349     /* Space used in the array when reading: one for each sibling and one for
7350      * the count. */
7351     newindex += siblingcount + 1;
7352 
7353     /* Recursively dump the children of each sibling. */
7354     for (np = node; np != NULL; np = np->wn_sibling)
7355 	if (np->wn_byte != 0 && np->wn_child->wn_u2.wnode == node)
7356 	    newindex = put_node(fd, np->wn_child, newindex, regionmask,
7357 								  prefixtree);
7358 
7359     return newindex;
7360 }
7361 
7362 
7363 /*
7364  * ":mkspell [-ascii] outfile  infile ..."
7365  * ":mkspell [-ascii] addfile"
7366  */
7367     void
7368 ex_mkspell(eap)
7369     exarg_T *eap;
7370 {
7371     int		fcount;
7372     char_u	**fnames;
7373     char_u	*arg = eap->arg;
7374     int		ascii = FALSE;
7375 
7376     if (STRNCMP(arg, "-ascii", 6) == 0)
7377     {
7378 	ascii = TRUE;
7379 	arg = skipwhite(arg + 6);
7380     }
7381 
7382     /* Expand all the remaining arguments (e.g., $VIMRUNTIME). */
7383     if (get_arglist_exp(arg, &fcount, &fnames) == OK)
7384     {
7385 	mkspell(fcount, fnames, ascii, eap->forceit, FALSE);
7386 	FreeWild(fcount, fnames);
7387     }
7388 }
7389 
7390 /*
7391  * Create a Vim spell file from one or more word lists.
7392  * "fnames[0]" is the output file name.
7393  * "fnames[fcount - 1]" is the last input file name.
7394  * Exception: when "fnames[0]" ends in ".add" it's used as the input file name
7395  * and ".spl" is appended to make the output file name.
7396  */
7397     static void
7398 mkspell(fcount, fnames, ascii, overwrite, added_word)
7399     int		fcount;
7400     char_u	**fnames;
7401     int		ascii;		    /* -ascii argument given */
7402     int		overwrite;	    /* overwrite existing output file */
7403     int		added_word;	    /* invoked through "zg" */
7404 {
7405     char_u	fname[MAXPATHL];
7406     char_u	wfname[MAXPATHL];
7407     char_u	**innames;
7408     int		incount;
7409     afffile_T	*(afile[8]);
7410     int		i;
7411     int		len;
7412     struct stat	st;
7413     int		error = FALSE;
7414     spellinfo_T spin;
7415 
7416     vim_memset(&spin, 0, sizeof(spin));
7417     spin.si_verbose = !added_word;
7418     spin.si_ascii = ascii;
7419     spin.si_followup = TRUE;
7420     spin.si_rem_accents = TRUE;
7421     ga_init2(&spin.si_rep, (int)sizeof(fromto_T), 20);
7422     ga_init2(&spin.si_sal, (int)sizeof(fromto_T), 20);
7423     ga_init2(&spin.si_map, (int)sizeof(char_u), 100);
7424     ga_init2(&spin.si_prefcond, (int)sizeof(char_u *), 50);
7425     spin.si_newcompID = 127;	/* start compound ID at first maximum */
7426 
7427     /* default: fnames[0] is output file, following are input files */
7428     innames = &fnames[1];
7429     incount = fcount - 1;
7430 
7431     if (fcount >= 1)
7432     {
7433 	len = STRLEN(fnames[0]);
7434 	if (fcount == 1 && len > 4 && STRCMP(fnames[0] + len - 4, ".add") == 0)
7435 	{
7436 	    /* For ":mkspell path/en.latin1.add" output file is
7437 	     * "path/en.latin1.add.spl". */
7438 	    innames = &fnames[0];
7439 	    incount = 1;
7440 	    vim_snprintf((char *)wfname, sizeof(wfname), "%s.spl", fnames[0]);
7441 	}
7442 	else if (fcount == 1)
7443 	{
7444 	    /* For ":mkspell path/vim" output file is "path/vim.latin1.spl". */
7445 	    innames = &fnames[0];
7446 	    incount = 1;
7447 	    vim_snprintf((char *)wfname, sizeof(wfname), "%s.%s.spl", fnames[0],
7448 			     spin.si_ascii ? (char_u *)"ascii" : spell_enc());
7449 	}
7450 	else if (len > 4 && STRCMP(fnames[0] + len - 4, ".spl") == 0)
7451 	{
7452 	    /* Name ends in ".spl", use as the file name. */
7453 	    vim_strncpy(wfname, fnames[0], sizeof(wfname) - 1);
7454 	}
7455 	else
7456 	    /* Name should be language, make the file name from it. */
7457 	    vim_snprintf((char *)wfname, sizeof(wfname), "%s.%s.spl", fnames[0],
7458 			     spin.si_ascii ? (char_u *)"ascii" : spell_enc());
7459 
7460 	/* Check for .ascii.spl. */
7461 	if (strstr((char *)gettail(wfname), ".ascii.") != NULL)
7462 	    spin.si_ascii = TRUE;
7463 
7464 	/* Check for .add.spl. */
7465 	if (strstr((char *)gettail(wfname), ".add.") != NULL)
7466 	    spin.si_add = TRUE;
7467     }
7468 
7469     if (incount <= 0)
7470 	EMSG(_(e_invarg));	/* need at least output and input names */
7471     else if (vim_strchr(gettail(wfname), '_') != NULL)
7472 	EMSG(_("E751: Output file name must not have region name"));
7473     else if (incount > 8)
7474 	EMSG(_("E754: Only up to 8 regions supported"));
7475     else
7476     {
7477 	/* Check for overwriting before doing things that may take a lot of
7478 	 * time. */
7479 	if (!overwrite && mch_stat((char *)wfname, &st) >= 0)
7480 	{
7481 	    EMSG(_(e_exists));
7482 	    return;
7483 	}
7484 	if (mch_isdir(wfname))
7485 	{
7486 	    EMSG2(_(e_isadir2), wfname);
7487 	    return;
7488 	}
7489 
7490 	/*
7491 	 * Init the aff and dic pointers.
7492 	 * Get the region names if there are more than 2 arguments.
7493 	 */
7494 	for (i = 0; i < incount; ++i)
7495 	{
7496 	    afile[i] = NULL;
7497 
7498 	    if (incount > 1)
7499 	    {
7500 		len = STRLEN(innames[i]);
7501 		if (STRLEN(gettail(innames[i])) < 5
7502 						|| innames[i][len - 3] != '_')
7503 		{
7504 		    EMSG2(_("E755: Invalid region in %s"), innames[i]);
7505 		    return;
7506 		}
7507 		spin.si_region_name[i * 2] = TOLOWER_ASC(innames[i][len - 2]);
7508 		spin.si_region_name[i * 2 + 1] =
7509 					     TOLOWER_ASC(innames[i][len - 1]);
7510 	    }
7511 	}
7512 	spin.si_region_count = incount;
7513 
7514 	spin.si_foldroot = wordtree_alloc(&spin);
7515 	spin.si_keeproot = wordtree_alloc(&spin);
7516 	spin.si_prefroot = wordtree_alloc(&spin);
7517 	if (spin.si_foldroot == NULL
7518 		|| spin.si_keeproot == NULL
7519 		|| spin.si_prefroot == NULL)
7520 	{
7521 	    free_blocks(spin.si_blocks);
7522 	    return;
7523 	}
7524 
7525 	/* When not producing a .add.spl file clear the character table when
7526 	 * we encounter one in the .aff file.  This means we dump the current
7527 	 * one in the .spl file if the .aff file doesn't define one.  That's
7528 	 * better than guessing the contents, the table will match a
7529 	 * previously loaded spell file. */
7530 	if (!spin.si_add)
7531 	    spin.si_clear_chartab = TRUE;
7532 
7533 	/*
7534 	 * Read all the .aff and .dic files.
7535 	 * Text is converted to 'encoding'.
7536 	 * Words are stored in the case-folded and keep-case trees.
7537 	 */
7538 	for (i = 0; i < incount && !error; ++i)
7539 	{
7540 	    spin.si_conv.vc_type = CONV_NONE;
7541 	    spin.si_region = 1 << i;
7542 
7543 	    vim_snprintf((char *)fname, sizeof(fname), "%s.aff", innames[i]);
7544 	    if (mch_stat((char *)fname, &st) >= 0)
7545 	    {
7546 		/* Read the .aff file.  Will init "spin->si_conv" based on the
7547 		 * "SET" line. */
7548 		afile[i] = spell_read_aff(&spin, fname);
7549 		if (afile[i] == NULL)
7550 		    error = TRUE;
7551 		else
7552 		{
7553 		    /* Read the .dic file and store the words in the trees. */
7554 		    vim_snprintf((char *)fname, sizeof(fname), "%s.dic",
7555 								  innames[i]);
7556 		    if (spell_read_dic(&spin, fname, afile[i]) == FAIL)
7557 			error = TRUE;
7558 		}
7559 	    }
7560 	    else
7561 	    {
7562 		/* No .aff file, try reading the file as a word list.  Store
7563 		 * the words in the trees. */
7564 		if (spell_read_wordfile(&spin, innames[i]) == FAIL)
7565 		    error = TRUE;
7566 	    }
7567 
7568 #ifdef FEAT_MBYTE
7569 	    /* Free any conversion stuff. */
7570 	    convert_setup(&spin.si_conv, NULL, NULL);
7571 #endif
7572 	}
7573 
7574 	if (spin.si_compflags != NULL && spin.si_nobreak)
7575 	    MSG(_("Warning: both compounding and NOBREAK specified"));
7576 
7577 	if (!error)
7578 	{
7579 	    /*
7580 	     * Combine tails in the tree.
7581 	     */
7582 	    if (spin.si_verbose || p_verbose > 2)
7583 	    {
7584 		if (!spin.si_verbose)
7585 		    verbose_enter();
7586 		MSG(_(msg_compressing));
7587 		out_flush();
7588 		if (!spin.si_verbose)
7589 		    verbose_leave();
7590 	    }
7591 	    wordtree_compress(&spin, spin.si_foldroot);
7592 	    wordtree_compress(&spin, spin.si_keeproot);
7593 	    wordtree_compress(&spin, spin.si_prefroot);
7594 	}
7595 
7596 	if (!error)
7597 	{
7598 	    /*
7599 	     * Write the info in the spell file.
7600 	     */
7601 	    if (spin.si_verbose || p_verbose > 2)
7602 	    {
7603 		if (!spin.si_verbose)
7604 		    verbose_enter();
7605 		smsg((char_u *)_("Writing spell file %s ..."), wfname);
7606 		out_flush();
7607 		if (!spin.si_verbose)
7608 		    verbose_leave();
7609 	    }
7610 
7611 	    error = write_vim_spell(&spin, wfname) == FAIL;
7612 
7613 	    if (spin.si_verbose || p_verbose > 2)
7614 	    {
7615 		if (!spin.si_verbose)
7616 		    verbose_enter();
7617 		MSG(_("Done!"));
7618 		smsg((char_u *)_("Estimated runtime memory use: %d bytes"),
7619 							      spin.si_memtot);
7620 		out_flush();
7621 		if (!spin.si_verbose)
7622 		    verbose_leave();
7623 	    }
7624 
7625 	    /* If the file is loaded need to reload it. */
7626 	    if (!error)
7627 		spell_reload_one(wfname, added_word);
7628 	}
7629 
7630 	/* Free the allocated memory. */
7631 	ga_clear(&spin.si_rep);
7632 	ga_clear(&spin.si_sal);
7633 	ga_clear(&spin.si_map);
7634 	ga_clear(&spin.si_prefcond);
7635 
7636 	/* Free the .aff file structures. */
7637 	for (i = 0; i < incount; ++i)
7638 	    if (afile[i] != NULL)
7639 		spell_free_aff(afile[i]);
7640 
7641 	/* Free all the bits and pieces at once. */
7642 	free_blocks(spin.si_blocks);
7643     }
7644 }
7645 
7646 
7647 /*
7648  * ":[count]spellgood  {word}"
7649  * ":[count]spellwrong  {word}"
7650  */
7651     void
7652 ex_spell(eap)
7653     exarg_T *eap;
7654 {
7655     spell_add_word(eap->arg, STRLEN(eap->arg), eap->cmdidx == CMD_spellwrong,
7656 				    eap->forceit ? 0 : (int)eap->line2);
7657 }
7658 
7659 /*
7660  * Add "word[len]" to 'spellfile' as a good or bad word.
7661  */
7662     void
7663 spell_add_word(word, len, bad, index)
7664     char_u	*word;
7665     int		len;
7666     int		bad;
7667     int		index;	    /* "zG" and "zW": zero, otherwise index in
7668 			       'spellfile' */
7669 {
7670     FILE	*fd;
7671     buf_T	*buf = NULL;
7672     int		new_spf = FALSE;
7673     struct stat	st;
7674     char_u	*fname;
7675     char_u	fnamebuf[MAXPATHL];
7676     char_u	line[MAXWLEN * 2];
7677     long	fpos, fpos_next = 0;
7678     int		i;
7679     char_u	*spf;
7680 
7681     if (index == 0)	    /* use internal wordlist */
7682     {
7683 	if (int_wordlist == NULL)
7684 	{
7685 	    int_wordlist = vim_tempname('s');
7686 	    if (int_wordlist == NULL)
7687 		return;
7688 	}
7689 	fname = int_wordlist;
7690     }
7691     else
7692     {
7693 	/* If 'spellfile' isn't set figure out a good default value. */
7694 	if (*curbuf->b_p_spf == NUL)
7695 	{
7696 	    init_spellfile();
7697 	    new_spf = TRUE;
7698 	}
7699 
7700 	if (*curbuf->b_p_spf == NUL)
7701 	{
7702 	    EMSG(_("E764: 'spellfile' is not set"));
7703 	    return;
7704 	}
7705 
7706 	for (spf = curbuf->b_p_spf, i = 1; *spf != NUL; ++i)
7707 	{
7708 	    copy_option_part(&spf, fnamebuf, MAXPATHL, ",");
7709 	    if (i == index)
7710 		break;
7711 	    if (*spf == NUL)
7712 	    {
7713 		EMSGN(_("E765: 'spellfile' does not have %ld entries"), index);
7714 		return;
7715 	    }
7716 	}
7717 
7718 	/* Check that the user isn't editing the .add file somewhere. */
7719 	buf = buflist_findname_exp(fnamebuf);
7720 	if (buf != NULL && buf->b_ml.ml_mfp == NULL)
7721 	    buf = NULL;
7722 	if (buf != NULL && bufIsChanged(buf))
7723 	{
7724 	    EMSG(_(e_bufloaded));
7725 	    return;
7726 	}
7727 
7728 	fname = fnamebuf;
7729     }
7730 
7731     if (bad)
7732     {
7733 	/* When the word also appears as good word we need to remove that one,
7734 	 * since its flags sort before the one with WF_BANNED. */
7735 	fd = mch_fopen((char *)fname, "r");
7736 	if (fd != NULL)
7737 	{
7738 	    while (!vim_fgets(line, MAXWLEN * 2, fd))
7739 	    {
7740 		fpos = fpos_next;
7741 		fpos_next = ftell(fd);
7742 		if (STRNCMP(word, line, len) == 0
7743 			&& (line[len] == '/' || line[len] < ' '))
7744 		{
7745 		    /* Found duplicate word.  Remove it by writing a '#' at
7746 		     * the start of the line.  Mixing reading and writing
7747 		     * doesn't work for all systems, close the file first. */
7748 		    fclose(fd);
7749 		    fd = mch_fopen((char *)fname, "r+");
7750 		    if (fd == NULL)
7751 			break;
7752 		    if (fseek(fd, fpos, SEEK_SET) == 0)
7753 			fputc('#', fd);
7754 		    fseek(fd, fpos_next, SEEK_SET);
7755 		}
7756 	    }
7757 	    fclose(fd);
7758 	}
7759     }
7760 
7761     fd = mch_fopen((char *)fname, "a");
7762     if (fd == NULL && new_spf)
7763     {
7764 	/* We just initialized the 'spellfile' option and can't open the file.
7765 	 * We may need to create the "spell" directory first.  We already
7766 	 * checked the runtime directory is writable in init_spellfile(). */
7767 	STRCPY(NameBuff, fname);
7768 	*gettail_sep(NameBuff) = NUL;
7769 	if (mch_stat((char *)NameBuff, &st) < 0)
7770 	{
7771 	    /* The directory doesn't exist.  Try creating it and opening the
7772 	     * file again. */
7773 	    vim_mkdir(NameBuff, 0755);
7774 	    fd = mch_fopen((char *)fname, "a");
7775 	}
7776     }
7777 
7778     if (fd == NULL)
7779 	EMSG2(_(e_notopen), fname);
7780     else
7781     {
7782 	if (bad)
7783 	    fprintf(fd, "%.*s/!\n", len, word);
7784 	else
7785 	    fprintf(fd, "%.*s\n", len, word);
7786 	fclose(fd);
7787 
7788 	/* Update the .add.spl file. */
7789 	mkspell(1, &fname, FALSE, TRUE, TRUE);
7790 
7791 	/* If the .add file is edited somewhere, reload it. */
7792 	if (buf != NULL)
7793 	    buf_reload(buf);
7794 
7795 	redraw_all_later(NOT_VALID);
7796     }
7797 }
7798 
7799 /*
7800  * Initialize 'spellfile' for the current buffer.
7801  */
7802     static void
7803 init_spellfile()
7804 {
7805     char_u	buf[MAXPATHL];
7806     int		l;
7807     char_u	*fname;
7808     char_u	*rtp;
7809     char_u	*lend;
7810     int		aspath = FALSE;
7811     char_u	*lstart = curbuf->b_p_spl;
7812 
7813     if (*curbuf->b_p_spl != NUL && curbuf->b_langp.ga_len > 0)
7814     {
7815 	/* Find the end of the language name.  Exclude the region.  If there
7816 	 * is a path separator remember the start of the tail. */
7817 	for (lend = curbuf->b_p_spl; *lend != NUL
7818 			&& vim_strchr((char_u *)",._", *lend) == NULL; ++lend)
7819 	    if (vim_ispathsep(*lend))
7820 	    {
7821 		aspath = TRUE;
7822 		lstart = lend + 1;
7823 	    }
7824 
7825 	/* Loop over all entries in 'runtimepath'.  Use the first one where we
7826 	 * are allowed to write. */
7827 	rtp = p_rtp;
7828 	while (*rtp != NUL)
7829 	{
7830 	    if (aspath)
7831 		/* Use directory of an entry with path, e.g., for
7832 		 * "/dir/lg.utf-8.spl" use "/dir". */
7833 		vim_strncpy(buf, curbuf->b_p_spl, lstart - curbuf->b_p_spl - 1);
7834 	    else
7835 		/* Copy the path from 'runtimepath' to buf[]. */
7836 		copy_option_part(&rtp, buf, MAXPATHL, ",");
7837 	    if (filewritable(buf) == 2)
7838 	    {
7839 		/* Use the first language name from 'spelllang' and the
7840 		 * encoding used in the first loaded .spl file. */
7841 		if (aspath)
7842 		    vim_strncpy(buf, curbuf->b_p_spl, lend - curbuf->b_p_spl);
7843 		else
7844 		{
7845 		    l = STRLEN(buf);
7846 		    vim_snprintf((char *)buf + l, MAXPATHL - l,
7847 				 "/spell/%.*s", (int)(lend - lstart), lstart);
7848 		}
7849 		l = STRLEN(buf);
7850 		fname = LANGP_ENTRY(curbuf->b_langp, 0)->lp_slang->sl_fname;
7851 		vim_snprintf((char *)buf + l, MAXPATHL - l, ".%s.add",
7852 			fname != NULL
7853 			  && strstr((char *)gettail(fname), ".ascii.") != NULL
7854 				       ? (char_u *)"ascii" : spell_enc());
7855 		set_option_value((char_u *)"spellfile", 0L, buf, OPT_LOCAL);
7856 		break;
7857 	    }
7858 	    aspath = FALSE;
7859 	}
7860     }
7861 }
7862 
7863 
7864 /*
7865  * Init the chartab used for spelling for ASCII.
7866  * EBCDIC is not supported!
7867  */
7868     static void
7869 clear_spell_chartab(sp)
7870     spelltab_T	*sp;
7871 {
7872     int		i;
7873 
7874     /* Init everything to FALSE. */
7875     vim_memset(sp->st_isw, FALSE, sizeof(sp->st_isw));
7876     vim_memset(sp->st_isu, FALSE, sizeof(sp->st_isu));
7877     for (i = 0; i < 256; ++i)
7878     {
7879 	sp->st_fold[i] = i;
7880 	sp->st_upper[i] = i;
7881     }
7882 
7883     /* We include digits.  A word shouldn't start with a digit, but handling
7884      * that is done separately. */
7885     for (i = '0'; i <= '9'; ++i)
7886 	sp->st_isw[i] = TRUE;
7887     for (i = 'A'; i <= 'Z'; ++i)
7888     {
7889 	sp->st_isw[i] = TRUE;
7890 	sp->st_isu[i] = TRUE;
7891 	sp->st_fold[i] = i + 0x20;
7892     }
7893     for (i = 'a'; i <= 'z'; ++i)
7894     {
7895 	sp->st_isw[i] = TRUE;
7896 	sp->st_upper[i] = i - 0x20;
7897     }
7898 }
7899 
7900 /*
7901  * Init the chartab used for spelling.  Only depends on 'encoding'.
7902  * Called once while starting up and when 'encoding' changes.
7903  * The default is to use isalpha(), but the spell file should define the word
7904  * characters to make it possible that 'encoding' differs from the current
7905  * locale.  For utf-8 we don't use isalpha() but our own functions.
7906  */
7907     void
7908 init_spell_chartab()
7909 {
7910     int	    i;
7911 
7912     did_set_spelltab = FALSE;
7913     clear_spell_chartab(&spelltab);
7914 #ifdef FEAT_MBYTE
7915     if (enc_dbcs)
7916     {
7917 	/* DBCS: assume double-wide characters are word characters. */
7918 	for (i = 128; i <= 255; ++i)
7919 	    if (MB_BYTE2LEN(i) == 2)
7920 		spelltab.st_isw[i] = TRUE;
7921     }
7922     else if (enc_utf8)
7923     {
7924 	for (i = 128; i < 256; ++i)
7925 	{
7926 	    spelltab.st_isu[i] = utf_isupper(i);
7927 	    spelltab.st_isw[i] = spelltab.st_isu[i] || utf_islower(i);
7928 	    spelltab.st_fold[i] = utf_fold(i);
7929 	    spelltab.st_upper[i] = utf_toupper(i);
7930 	}
7931     }
7932     else
7933 #endif
7934     {
7935 	/* Rough guess: use locale-dependent library functions. */
7936 	for (i = 128; i < 256; ++i)
7937 	{
7938 	    if (MB_ISUPPER(i))
7939 	    {
7940 		spelltab.st_isw[i] = TRUE;
7941 		spelltab.st_isu[i] = TRUE;
7942 		spelltab.st_fold[i] = MB_TOLOWER(i);
7943 	    }
7944 	    else if (MB_ISLOWER(i))
7945 	    {
7946 		spelltab.st_isw[i] = TRUE;
7947 		spelltab.st_upper[i] = MB_TOUPPER(i);
7948 	    }
7949 	}
7950     }
7951 }
7952 
7953 /*
7954  * Set the spell character tables from strings in the affix file.
7955  */
7956     static int
7957 set_spell_chartab(fol, low, upp)
7958     char_u	*fol;
7959     char_u	*low;
7960     char_u	*upp;
7961 {
7962     /* We build the new tables here first, so that we can compare with the
7963      * previous one. */
7964     spelltab_T	new_st;
7965     char_u	*pf = fol, *pl = low, *pu = upp;
7966     int		f, l, u;
7967 
7968     clear_spell_chartab(&new_st);
7969 
7970     while (*pf != NUL)
7971     {
7972 	if (*pl == NUL || *pu == NUL)
7973 	{
7974 	    EMSG(_(e_affform));
7975 	    return FAIL;
7976 	}
7977 #ifdef FEAT_MBYTE
7978 	f = mb_ptr2char_adv(&pf);
7979 	l = mb_ptr2char_adv(&pl);
7980 	u = mb_ptr2char_adv(&pu);
7981 #else
7982 	f = *pf++;
7983 	l = *pl++;
7984 	u = *pu++;
7985 #endif
7986 	/* Every character that appears is a word character. */
7987 	if (f < 256)
7988 	    new_st.st_isw[f] = TRUE;
7989 	if (l < 256)
7990 	    new_st.st_isw[l] = TRUE;
7991 	if (u < 256)
7992 	    new_st.st_isw[u] = TRUE;
7993 
7994 	/* if "LOW" and "FOL" are not the same the "LOW" char needs
7995 	 * case-folding */
7996 	if (l < 256 && l != f)
7997 	{
7998 	    if (f >= 256)
7999 	    {
8000 		EMSG(_(e_affrange));
8001 		return FAIL;
8002 	    }
8003 	    new_st.st_fold[l] = f;
8004 	}
8005 
8006 	/* if "UPP" and "FOL" are not the same the "UPP" char needs
8007 	 * case-folding, it's upper case and the "UPP" is the upper case of
8008 	 * "FOL" . */
8009 	if (u < 256 && u != f)
8010 	{
8011 	    if (f >= 256)
8012 	    {
8013 		EMSG(_(e_affrange));
8014 		return FAIL;
8015 	    }
8016 	    new_st.st_fold[u] = f;
8017 	    new_st.st_isu[u] = TRUE;
8018 	    new_st.st_upper[f] = u;
8019 	}
8020     }
8021 
8022     if (*pl != NUL || *pu != NUL)
8023     {
8024 	EMSG(_(e_affform));
8025 	return FAIL;
8026     }
8027 
8028     return set_spell_finish(&new_st);
8029 }
8030 
8031 /*
8032  * Set the spell character tables from strings in the .spl file.
8033  */
8034     static void
8035 set_spell_charflags(flags, cnt, fol)
8036     char_u	*flags;
8037     int		cnt;	    /* length of "flags" */
8038     char_u	*fol;
8039 {
8040     /* We build the new tables here first, so that we can compare with the
8041      * previous one. */
8042     spelltab_T	new_st;
8043     int		i;
8044     char_u	*p = fol;
8045     int		c;
8046 
8047     clear_spell_chartab(&new_st);
8048 
8049     for (i = 0; i < 128; ++i)
8050     {
8051 	if (i < cnt)
8052 	{
8053 	    new_st.st_isw[i + 128] = (flags[i] & CF_WORD) != 0;
8054 	    new_st.st_isu[i + 128] = (flags[i] & CF_UPPER) != 0;
8055 	}
8056 
8057 	if (*p != NUL)
8058 	{
8059 #ifdef FEAT_MBYTE
8060 	    c = mb_ptr2char_adv(&p);
8061 #else
8062 	    c = *p++;
8063 #endif
8064 	    new_st.st_fold[i + 128] = c;
8065 	    if (i + 128 != c && new_st.st_isu[i + 128] && c < 256)
8066 		new_st.st_upper[c] = i + 128;
8067 	}
8068     }
8069 
8070     (void)set_spell_finish(&new_st);
8071 }
8072 
8073     static int
8074 set_spell_finish(new_st)
8075     spelltab_T	*new_st;
8076 {
8077     int		i;
8078 
8079     if (did_set_spelltab)
8080     {
8081 	/* check that it's the same table */
8082 	for (i = 0; i < 256; ++i)
8083 	{
8084 	    if (spelltab.st_isw[i] != new_st->st_isw[i]
8085 		    || spelltab.st_isu[i] != new_st->st_isu[i]
8086 		    || spelltab.st_fold[i] != new_st->st_fold[i]
8087 		    || spelltab.st_upper[i] != new_st->st_upper[i])
8088 	    {
8089 		EMSG(_("E763: Word characters differ between spell files"));
8090 		return FAIL;
8091 	    }
8092 	}
8093     }
8094     else
8095     {
8096 	/* copy the new spelltab into the one being used */
8097 	spelltab = *new_st;
8098 	did_set_spelltab = TRUE;
8099     }
8100 
8101     return OK;
8102 }
8103 
8104 /*
8105  * Return TRUE if "p" points to a word character.
8106  * As a special case we see "midword" characters as word character when it is
8107  * followed by a word character.  This finds they'there but not 'they there'.
8108  * Thus this only works properly when past the first character of the word.
8109  */
8110     static int
8111 spell_iswordp(p, buf)
8112     char_u	*p;
8113     buf_T	*buf;	    /* buffer used */
8114 {
8115 #ifdef FEAT_MBYTE
8116     char_u	*s;
8117     int		l;
8118     int		c;
8119 
8120     if (has_mbyte)
8121     {
8122 	l = MB_BYTE2LEN(*p);
8123 	s = p;
8124 	if (l == 1)
8125 	{
8126 	    /* be quick for ASCII */
8127 	    if (buf->b_spell_ismw[*p])
8128 	    {
8129 		s = p + 1;		/* skip a mid-word character */
8130 		l = MB_BYTE2LEN(*s);
8131 	    }
8132 	}
8133 	else
8134 	{
8135 	    c = mb_ptr2char(p);
8136 	    if (c < 256 ? buf->b_spell_ismw[c]
8137 		    : (buf->b_spell_ismw_mb != NULL
8138 			   && vim_strchr(buf->b_spell_ismw_mb, c) != NULL))
8139 	    {
8140 		s = p + l;
8141 		l = MB_BYTE2LEN(*s);
8142 	    }
8143 	}
8144 
8145 	c = mb_ptr2char(s);
8146 	if (c > 255)
8147 	    return mb_get_class(s) >= 2;
8148 	return spelltab.st_isw[c];
8149     }
8150 #endif
8151 
8152     return spelltab.st_isw[buf->b_spell_ismw[*p] ? p[1] : p[0]];
8153 }
8154 
8155 /*
8156  * Return TRUE if "p" points to a word character.
8157  * Unlike spell_iswordp() this doesn't check for "midword" characters.
8158  */
8159     static int
8160 spell_iswordp_nmw(p)
8161     char_u	*p;
8162 {
8163 #ifdef FEAT_MBYTE
8164     int		c;
8165 
8166     if (has_mbyte)
8167     {
8168 	c = mb_ptr2char(p);
8169 	if (c > 255)
8170 	    return mb_get_class(p) >= 2;
8171 	return spelltab.st_isw[c];
8172     }
8173 #endif
8174     return spelltab.st_isw[*p];
8175 }
8176 
8177 #ifdef FEAT_MBYTE
8178 /*
8179  * Return TRUE if "p" points to a word character.
8180  * Wide version of spell_iswordp().
8181  */
8182     static int
8183 spell_iswordp_w(p, buf)
8184     int		*p;
8185     buf_T	*buf;
8186 {
8187     int		*s;
8188 
8189     if (*p < 256 ? buf->b_spell_ismw[*p]
8190 		 : (buf->b_spell_ismw_mb != NULL
8191 			     && vim_strchr(buf->b_spell_ismw_mb, *p) != NULL))
8192 	s = p + 1;
8193     else
8194 	s = p;
8195 
8196     if (*s > 255)
8197     {
8198 	if (enc_utf8)
8199 	    return utf_class(*s) >= 2;
8200 	if (enc_dbcs)
8201 	    return dbcs_class((unsigned)*s >> 8, *s & 0xff) >= 2;
8202 	return 0;
8203     }
8204     return spelltab.st_isw[*s];
8205 }
8206 #endif
8207 
8208 /*
8209  * Write the table with prefix conditions to the .spl file.
8210  * When "fd" is NULL only count the length of what is written.
8211  */
8212     static int
8213 write_spell_prefcond(fd, gap)
8214     FILE	*fd;
8215     garray_T	*gap;
8216 {
8217     int		i;
8218     char_u	*p;
8219     int		len;
8220     int		totlen;
8221 
8222     if (fd != NULL)
8223 	put_bytes(fd, (long_u)gap->ga_len, 2);	    /* <prefcondcnt> */
8224 
8225     totlen = 2 + gap->ga_len; /* length of <prefcondcnt> and <condlen> bytes */
8226 
8227     for (i = 0; i < gap->ga_len; ++i)
8228     {
8229 	/* <prefcond> : <condlen> <condstr> */
8230 	p = ((char_u **)gap->ga_data)[i];
8231 	if (p != NULL)
8232 	{
8233 	    len = STRLEN(p);
8234 	    if (fd != NULL)
8235 	    {
8236 		fputc(len, fd);
8237 		fwrite(p, (size_t)len, (size_t)1, fd);
8238 	    }
8239 	    totlen += len;
8240 	}
8241 	else if (fd != NULL)
8242 	    fputc(0, fd);
8243     }
8244 
8245     return totlen;
8246 }
8247 
8248 /*
8249  * Case-fold "str[len]" into "buf[buflen]".  The result is NUL terminated.
8250  * Uses the character definitions from the .spl file.
8251  * When using a multi-byte 'encoding' the length may change!
8252  * Returns FAIL when something wrong.
8253  */
8254     static int
8255 spell_casefold(str, len, buf, buflen)
8256     char_u	*str;
8257     int		len;
8258     char_u	*buf;
8259     int		buflen;
8260 {
8261     int		i;
8262 
8263     if (len >= buflen)
8264     {
8265 	buf[0] = NUL;
8266 	return FAIL;		/* result will not fit */
8267     }
8268 
8269 #ifdef FEAT_MBYTE
8270     if (has_mbyte)
8271     {
8272 	int	outi = 0;
8273 	char_u	*p;
8274 	int	c;
8275 
8276 	/* Fold one character at a time. */
8277 	for (p = str; p < str + len; )
8278 	{
8279 	    if (outi + MB_MAXBYTES > buflen)
8280 	    {
8281 		buf[outi] = NUL;
8282 		return FAIL;
8283 	    }
8284 	    c = mb_cptr2char_adv(&p);
8285 	    outi += mb_char2bytes(SPELL_TOFOLD(c), buf + outi);
8286 	}
8287 	buf[outi] = NUL;
8288     }
8289     else
8290 #endif
8291     {
8292 	/* Be quick for non-multibyte encodings. */
8293 	for (i = 0; i < len; ++i)
8294 	    buf[i] = spelltab.st_fold[str[i]];
8295 	buf[i] = NUL;
8296     }
8297 
8298     return OK;
8299 }
8300 
8301 #define SPS_BEST    1
8302 #define SPS_FAST    2
8303 #define SPS_DOUBLE  4
8304 
8305 static int sps_flags = SPS_BEST;
8306 static int sps_limit = 9999;
8307 
8308 /*
8309  * Check the 'spellsuggest' option.  Return FAIL if it's wrong.
8310  * Sets "sps_flags" and "sps_limit".
8311  */
8312     int
8313 spell_check_sps()
8314 {
8315     char_u	*p;
8316     char_u	*s;
8317     char_u	buf[MAXPATHL];
8318     int		f;
8319 
8320     sps_flags = 0;
8321     sps_limit = 9999;
8322 
8323     for (p = p_sps; *p != NUL; )
8324     {
8325 	copy_option_part(&p, buf, MAXPATHL, ",");
8326 
8327 	f = 0;
8328 	if (VIM_ISDIGIT(*buf))
8329 	{
8330 	    s = buf;
8331 	    sps_limit = getdigits(&s);
8332 	    if (*s != NUL && !VIM_ISDIGIT(*s))
8333 		f = -1;
8334 	}
8335 	else if (STRCMP(buf, "best") == 0)
8336 	    f = SPS_BEST;
8337 	else if (STRCMP(buf, "fast") == 0)
8338 	    f = SPS_FAST;
8339 	else if (STRCMP(buf, "double") == 0)
8340 	    f = SPS_DOUBLE;
8341 	else if (STRNCMP(buf, "expr:", 5) != 0
8342 		&& STRNCMP(buf, "file:", 5) != 0)
8343 	    f = -1;
8344 
8345 	if (f == -1 || (sps_flags != 0 && f != 0))
8346 	{
8347 	    sps_flags = SPS_BEST;
8348 	    sps_limit = 9999;
8349 	    return FAIL;
8350 	}
8351 	if (f != 0)
8352 	    sps_flags = f;
8353     }
8354 
8355     if (sps_flags == 0)
8356 	sps_flags = SPS_BEST;
8357 
8358     return OK;
8359 }
8360 
8361 /* Remember what "z?" replaced. */
8362 static char_u	*repl_from = NULL;
8363 static char_u	*repl_to = NULL;
8364 
8365 /*
8366  * "z?": Find badly spelled word under or after the cursor.
8367  * Give suggestions for the properly spelled word.
8368  * When "count" is non-zero use that suggestion.
8369  */
8370     void
8371 spell_suggest(count)
8372     int		count;
8373 {
8374     char_u	*line;
8375     pos_T	prev_cursor = curwin->w_cursor;
8376     char_u	wcopy[MAXWLEN + 2];
8377     char_u	*p;
8378     int		i;
8379     int		c;
8380     suginfo_T	sug;
8381     suggest_T	*stp;
8382     int		mouse_used;
8383     int		need_cap;
8384     int		limit;
8385     int		selected = count;
8386 
8387     /* Find the start of the badly spelled word. */
8388     if (spell_move_to(curwin, FORWARD, TRUE, TRUE, NULL) == 0
8389 	    || curwin->w_cursor.col > prev_cursor.col)
8390     {
8391 	if (!curwin->w_p_spell || *curbuf->b_p_spl == NUL)
8392 	    return;
8393 
8394 	/* No bad word or it starts after the cursor: use the word under the
8395 	 * cursor. */
8396 	curwin->w_cursor = prev_cursor;
8397 	line = ml_get_curline();
8398 	p = line + curwin->w_cursor.col;
8399 	/* Backup to before start of word. */
8400 	while (p > line && spell_iswordp_nmw(p))
8401 	    mb_ptr_back(line, p);
8402 	/* Forward to start of word. */
8403 	while (*p != NUL && !spell_iswordp_nmw(p))
8404 	    mb_ptr_adv(p);
8405 
8406 	if (!spell_iswordp_nmw(p))		/* No word found. */
8407 	{
8408 	    beep_flush();
8409 	    return;
8410 	}
8411 	curwin->w_cursor.col = p - line;
8412     }
8413 
8414     /* Get the word and its length. */
8415 
8416     /* Figure out if the word should be capitalised. */
8417     need_cap = check_need_cap(curwin->w_cursor.lnum, curwin->w_cursor.col);
8418 
8419     line = ml_get_curline();
8420 
8421     /* Get the list of suggestions.  Limit to 'lines' - 2 or the number in
8422      * 'spellsuggest', whatever is smaller. */
8423     if (sps_limit > (int)Rows - 2)
8424 	limit = (int)Rows - 2;
8425     else
8426 	limit = sps_limit;
8427     spell_find_suggest(line + curwin->w_cursor.col, &sug, limit,
8428 							      TRUE, need_cap);
8429 
8430     if (sug.su_ga.ga_len == 0)
8431 	MSG(_("Sorry, no suggestions"));
8432     else if (count > 0)
8433     {
8434 	if (count > sug.su_ga.ga_len)
8435 	    smsg((char_u *)_("Sorry, only %ld suggestions"),
8436 						      (long)sug.su_ga.ga_len);
8437     }
8438     else
8439     {
8440 	vim_free(repl_from);
8441 	repl_from = NULL;
8442 	vim_free(repl_to);
8443 	repl_to = NULL;
8444 
8445 #ifdef FEAT_RIGHTLEFT
8446 	/* When 'rightleft' is set the list is drawn right-left. */
8447 	cmdmsg_rl = curwin->w_p_rl;
8448 	if (cmdmsg_rl)
8449 	    msg_col = Columns - 1;
8450 #endif
8451 
8452 	/* List the suggestions. */
8453 	msg_start();
8454 	lines_left = Rows;	/* avoid more prompt */
8455 	vim_snprintf((char *)IObuff, IOSIZE, _("Change \"%.*s\" to:"),
8456 						sug.su_badlen, sug.su_badptr);
8457 #ifdef FEAT_RIGHTLEFT
8458 	if (cmdmsg_rl && STRNCMP(IObuff, "Change", 6) == 0)
8459 	{
8460 	    /* And now the rabbit from the high hat: Avoid showing the
8461 	     * untranslated message rightleft. */
8462 	    vim_snprintf((char *)IObuff, IOSIZE, ":ot \"%.*s\" egnahC",
8463 						sug.su_badlen, sug.su_badptr);
8464 	}
8465 #endif
8466 	msg_puts(IObuff);
8467 	msg_clr_eos();
8468 	msg_putchar('\n');
8469 
8470 	msg_scroll = TRUE;
8471 	for (i = 0; i < sug.su_ga.ga_len; ++i)
8472 	{
8473 	    stp = &SUG(sug.su_ga, i);
8474 
8475 	    /* The suggested word may replace only part of the bad word, add
8476 	     * the not replaced part. */
8477 	    STRCPY(wcopy, stp->st_word);
8478 	    if (sug.su_badlen > stp->st_orglen)
8479 		vim_strncpy(wcopy + STRLEN(wcopy),
8480 					       sug.su_badptr + stp->st_orglen,
8481 					      sug.su_badlen - stp->st_orglen);
8482 	    vim_snprintf((char *)IObuff, IOSIZE, "%2d", i + 1);
8483 #ifdef FEAT_RIGHTLEFT
8484 	    if (cmdmsg_rl)
8485 		rl_mirror(IObuff);
8486 #endif
8487 	    msg_puts(IObuff);
8488 
8489 	    vim_snprintf((char *)IObuff, IOSIZE, " \"%s\"", wcopy);
8490 	    msg_puts(IObuff);
8491 
8492 	    /* The word may replace more than "su_badlen". */
8493 	    if (sug.su_badlen < stp->st_orglen)
8494 	    {
8495 		vim_snprintf((char *)IObuff, IOSIZE, _(" < \"%.*s\""),
8496 					       stp->st_orglen, sug.su_badptr);
8497 		msg_puts(IObuff);
8498 	    }
8499 
8500 	    if (p_verbose > 0)
8501 	    {
8502 		/* Add the score. */
8503 		if (sps_flags & (SPS_DOUBLE | SPS_BEST))
8504 		    vim_snprintf((char *)IObuff, IOSIZE, " (%s%d - %d)",
8505 			stp->st_salscore ? "s " : "",
8506 			stp->st_score, stp->st_altscore);
8507 		else
8508 		    vim_snprintf((char *)IObuff, IOSIZE, " (%d)",
8509 			    stp->st_score);
8510 #ifdef FEAT_RIGHTLEFT
8511 		if (cmdmsg_rl)
8512 		    /* Mirror the numbers, but keep the leading space. */
8513 		    rl_mirror(IObuff + 1);
8514 #endif
8515 		msg_advance(30);
8516 		msg_puts(IObuff);
8517 	    }
8518 	    msg_putchar('\n');
8519 	}
8520 
8521 #ifdef FEAT_RIGHTLEFT
8522 	cmdmsg_rl = FALSE;
8523 	msg_col = 0;
8524 #endif
8525 	/* Ask for choice. */
8526 	selected = prompt_for_number(&mouse_used);
8527 	if (mouse_used)
8528 	    selected -= lines_left;
8529     }
8530 
8531     if (selected > 0 && selected <= sug.su_ga.ga_len && u_save_cursor() == OK)
8532     {
8533 	/* Save the from and to text for :spellrepall. */
8534 	stp = &SUG(sug.su_ga, selected - 1);
8535 	repl_from = vim_strnsave(sug.su_badptr, stp->st_orglen);
8536 	repl_to = vim_strsave(stp->st_word);
8537 
8538 	/* Replace the word. */
8539 	p = alloc(STRLEN(line) - stp->st_orglen + STRLEN(stp->st_word) + 1);
8540 	if (p != NULL)
8541 	{
8542 	    c = sug.su_badptr - line;
8543 	    mch_memmove(p, line, c);
8544 	    STRCPY(p + c, stp->st_word);
8545 	    STRCAT(p, sug.su_badptr + stp->st_orglen);
8546 	    ml_replace(curwin->w_cursor.lnum, p, FALSE);
8547 	    curwin->w_cursor.col = c;
8548 	    changed_bytes(curwin->w_cursor.lnum, c);
8549 
8550 	    /* For redo we use a change-word command. */
8551 	    ResetRedobuff();
8552 	    AppendToRedobuff((char_u *)"ciw");
8553 	    AppendToRedobuff(stp->st_word);
8554 	    AppendCharToRedobuff(ESC);
8555 	}
8556     }
8557     else
8558 	curwin->w_cursor = prev_cursor;
8559 
8560     spell_find_cleanup(&sug);
8561 }
8562 
8563 /*
8564  * Check if the word at line "lnum" column "col" is required to start with a
8565  * capital.  This uses 'spellcapcheck' of the current buffer.
8566  */
8567     static int
8568 check_need_cap(lnum, col)
8569     linenr_T	lnum;
8570     colnr_T	col;
8571 {
8572     int		need_cap = FALSE;
8573     char_u	*line;
8574     char_u	*line_copy = NULL;
8575     char_u	*p;
8576     colnr_T	endcol;
8577     regmatch_T	regmatch;
8578 
8579     if (curbuf->b_cap_prog == NULL)
8580 	return FALSE;
8581 
8582     line = ml_get_curline();
8583     endcol = 0;
8584     if ((int)(skipwhite(line) - line) >= (int)col)
8585     {
8586 	/* At start of line, check if previous line is empty or sentence
8587 	 * ends there. */
8588 	if (lnum == 1)
8589 	    need_cap = TRUE;
8590 	else
8591 	{
8592 	    line = ml_get(lnum - 1);
8593 	    if (*skipwhite(line) == NUL)
8594 		need_cap = TRUE;
8595 	    else
8596 	    {
8597 		/* Append a space in place of the line break. */
8598 		line_copy = concat_str(line, (char_u *)" ");
8599 		line = line_copy;
8600 		endcol = STRLEN(line);
8601 	    }
8602 	}
8603     }
8604     else
8605 	endcol = col;
8606 
8607     if (endcol > 0)
8608     {
8609 	/* Check if sentence ends before the bad word. */
8610 	regmatch.regprog = curbuf->b_cap_prog;
8611 	regmatch.rm_ic = FALSE;
8612 	p = line + endcol;
8613 	for (;;)
8614 	{
8615 	    mb_ptr_back(line, p);
8616 	    if (p == line || spell_iswordp_nmw(p))
8617 		break;
8618 	    if (vim_regexec(&regmatch, p, 0)
8619 					 && regmatch.endp[0] == line + endcol)
8620 	    {
8621 		need_cap = TRUE;
8622 		break;
8623 	    }
8624 	}
8625     }
8626 
8627     vim_free(line_copy);
8628 
8629     return need_cap;
8630 }
8631 
8632 
8633 /*
8634  * ":spellrepall"
8635  */
8636 /*ARGSUSED*/
8637     void
8638 ex_spellrepall(eap)
8639     exarg_T *eap;
8640 {
8641     pos_T	pos = curwin->w_cursor;
8642     char_u	*frompat;
8643     int		addlen;
8644     char_u	*line;
8645     char_u	*p;
8646     int		save_ws = p_ws;
8647     linenr_T	prev_lnum = 0;
8648 
8649     if (repl_from == NULL || repl_to == NULL)
8650     {
8651 	EMSG(_("E752: No previous spell replacement"));
8652 	return;
8653     }
8654     addlen = STRLEN(repl_to) - STRLEN(repl_from);
8655 
8656     frompat = alloc(STRLEN(repl_from) + 7);
8657     if (frompat == NULL)
8658 	return;
8659     sprintf((char *)frompat, "\\V\\<%s\\>", repl_from);
8660     p_ws = FALSE;
8661 
8662     sub_nsubs = 0;
8663     sub_nlines = 0;
8664     curwin->w_cursor.lnum = 0;
8665     while (!got_int)
8666     {
8667 	if (do_search(NULL, '/', frompat, 1L, SEARCH_KEEP) == 0
8668 						   || u_save_cursor() == FAIL)
8669 	    break;
8670 
8671 	/* Only replace when the right word isn't there yet.  This happens
8672 	 * when changing "etc" to "etc.". */
8673 	line = ml_get_curline();
8674 	if (addlen <= 0 || STRNCMP(line + curwin->w_cursor.col,
8675 					       repl_to, STRLEN(repl_to)) != 0)
8676 	{
8677 	    p = alloc(STRLEN(line) + addlen + 1);
8678 	    if (p == NULL)
8679 		break;
8680 	    mch_memmove(p, line, curwin->w_cursor.col);
8681 	    STRCPY(p + curwin->w_cursor.col, repl_to);
8682 	    STRCAT(p, line + curwin->w_cursor.col + STRLEN(repl_from));
8683 	    ml_replace(curwin->w_cursor.lnum, p, FALSE);
8684 	    changed_bytes(curwin->w_cursor.lnum, curwin->w_cursor.col);
8685 
8686 	    if (curwin->w_cursor.lnum != prev_lnum)
8687 	    {
8688 		++sub_nlines;
8689 		prev_lnum = curwin->w_cursor.lnum;
8690 	    }
8691 	    ++sub_nsubs;
8692 	}
8693 	curwin->w_cursor.col += STRLEN(repl_to);
8694     }
8695 
8696     p_ws = save_ws;
8697     curwin->w_cursor = pos;
8698     vim_free(frompat);
8699 
8700     if (sub_nsubs == 0)
8701 	EMSG2(_("E753: Not found: %s"), repl_from);
8702     else
8703 	do_sub_msg(FALSE);
8704 }
8705 
8706 /*
8707  * Find spell suggestions for "word".  Return them in the growarray "*gap" as
8708  * a list of allocated strings.
8709  */
8710     void
8711 spell_suggest_list(gap, word, maxcount, need_cap)
8712     garray_T	*gap;
8713     char_u	*word;
8714     int		maxcount;	/* maximum nr of suggestions */
8715     int		need_cap;	/* 'spellcapcheck' matched */
8716 {
8717     suginfo_T	sug;
8718     int		i;
8719     suggest_T	*stp;
8720     char_u	*wcopy;
8721 
8722     spell_find_suggest(word, &sug, maxcount, FALSE, need_cap);
8723 
8724     /* Make room in "gap". */
8725     ga_init2(gap, sizeof(char_u *), sug.su_ga.ga_len + 1);
8726     if (ga_grow(gap, sug.su_ga.ga_len) == FAIL)
8727 	return;
8728 
8729     for (i = 0; i < sug.su_ga.ga_len; ++i)
8730     {
8731 	stp = &SUG(sug.su_ga, i);
8732 
8733 	/* The suggested word may replace only part of "word", add the not
8734 	 * replaced part. */
8735 	wcopy = alloc(STRLEN(stp->st_word)
8736 				+ STRLEN(sug.su_badptr + stp->st_orglen) + 1);
8737 	if (wcopy == NULL)
8738 	    break;
8739 	STRCPY(wcopy, stp->st_word);
8740 	STRCAT(wcopy, sug.su_badptr + stp->st_orglen);
8741 	((char_u **)gap->ga_data)[gap->ga_len++] = wcopy;
8742     }
8743 
8744     spell_find_cleanup(&sug);
8745 }
8746 
8747 /*
8748  * Find spell suggestions for the word at the start of "badptr".
8749  * Return the suggestions in "su->su_ga".
8750  * The maximum number of suggestions is "maxcount".
8751  * Note: does use info for the current window.
8752  * This is based on the mechanisms of Aspell, but completely reimplemented.
8753  */
8754     static void
8755 spell_find_suggest(badptr, su, maxcount, banbadword, need_cap)
8756     char_u	*badptr;
8757     suginfo_T	*su;
8758     int		maxcount;
8759     int		banbadword;	/* don't include badword in suggestions */
8760     int		need_cap;	/* word should start with capital */
8761 {
8762     int		attr = 0;
8763     char_u	buf[MAXPATHL];
8764     char_u	*p;
8765     int		do_combine = FALSE;
8766     char_u	*sps_copy;
8767 #ifdef FEAT_EVAL
8768     static int	expr_busy = FALSE;
8769 #endif
8770     int		c;
8771     int		i;
8772     langp_T	*lp;
8773 
8774     /*
8775      * Set the info in "*su".
8776      */
8777     vim_memset(su, 0, sizeof(suginfo_T));
8778     ga_init2(&su->su_ga, (int)sizeof(suggest_T), 10);
8779     ga_init2(&su->su_sga, (int)sizeof(suggest_T), 10);
8780     if (*badptr == NUL)
8781 	return;
8782     hash_init(&su->su_banned);
8783 
8784     su->su_badptr = badptr;
8785     su->su_badlen = spell_check(curwin, su->su_badptr, &attr, NULL);
8786     su->su_maxcount = maxcount;
8787     su->su_maxscore = SCORE_MAXINIT;
8788 
8789     if (su->su_badlen >= MAXWLEN)
8790 	su->su_badlen = MAXWLEN - 1;	/* just in case */
8791     vim_strncpy(su->su_badword, su->su_badptr, su->su_badlen);
8792     (void)spell_casefold(su->su_badptr, su->su_badlen,
8793 						    su->su_fbadword, MAXWLEN);
8794     /* get caps flags for bad word */
8795     su->su_badflags = badword_captype(su->su_badptr,
8796 					       su->su_badptr + su->su_badlen);
8797     if (need_cap)
8798 	su->su_badflags |= WF_ONECAP;
8799 
8800     /* Find the default language for sound folding.  We simply use the first
8801      * one in 'spelllang' that supports sound folding.  That's good for when
8802      * using multiple files for one language, it's not that bad when mixing
8803      * languages (e.g., "pl,en"). */
8804     for (i = 0; i < curbuf->b_langp.ga_len; ++i)
8805     {
8806 	lp = LANGP_ENTRY(curbuf->b_langp, i);
8807 	if (lp->lp_sallang != NULL)
8808 	{
8809 	    su->su_sallang = lp->lp_sallang;
8810 	    break;
8811 	}
8812     }
8813 
8814     /* If the word is not capitalised and spell_check() doesn't consider the
8815      * word to be bad then it might need to be capitalised.  Add a suggestion
8816      * for that. */
8817     c = PTR2CHAR(su->su_badptr);
8818     if (!SPELL_ISUPPER(c) && attr == 0)
8819     {
8820 	make_case_word(su->su_badword, buf, WF_ONECAP);
8821 	add_suggestion(su, &su->su_ga, buf, su->su_badlen, SCORE_ICASE,
8822 						     0, TRUE, su->su_sallang);
8823     }
8824 
8825     /* Ban the bad word itself.  It may appear in another region. */
8826     if (banbadword)
8827 	add_banned(su, su->su_badword);
8828 
8829     /* Make a copy of 'spellsuggest', because the expression may change it. */
8830     sps_copy = vim_strsave(p_sps);
8831     if (sps_copy == NULL)
8832 	return;
8833 
8834     /* Loop over the items in 'spellsuggest'. */
8835     for (p = sps_copy; *p != NUL; )
8836     {
8837 	copy_option_part(&p, buf, MAXPATHL, ",");
8838 
8839 	if (STRNCMP(buf, "expr:", 5) == 0)
8840 	{
8841 #ifdef FEAT_EVAL
8842 	    /* Evaluate an expression.  Skip this when called recursively,
8843 	     * when using spellsuggest() in the expression. */
8844 	    if (!expr_busy)
8845 	    {
8846 		expr_busy = TRUE;
8847 		spell_suggest_expr(su, buf + 5);
8848 		expr_busy = FALSE;
8849 	    }
8850 #endif
8851 	}
8852 	else if (STRNCMP(buf, "file:", 5) == 0)
8853 	    /* Use list of suggestions in a file. */
8854 	    spell_suggest_file(su, buf + 5);
8855 	else
8856 	{
8857 	    /* Use internal method. */
8858 	    spell_suggest_intern(su);
8859 	    if (sps_flags & SPS_DOUBLE)
8860 		do_combine = TRUE;
8861 	}
8862     }
8863 
8864     vim_free(sps_copy);
8865 
8866     if (do_combine)
8867 	/* Combine the two list of suggestions.  This must be done last,
8868 	 * because sorting changes the order again. */
8869 	score_combine(su);
8870 }
8871 
8872 #ifdef FEAT_EVAL
8873 /*
8874  * Find suggestions by evaluating expression "expr".
8875  */
8876     static void
8877 spell_suggest_expr(su, expr)
8878     suginfo_T	*su;
8879     char_u	*expr;
8880 {
8881     list_T	*list;
8882     listitem_T	*li;
8883     int		score;
8884     char_u	*p;
8885 
8886     /* The work is split up in a few parts to avoid having to export
8887      * suginfo_T.
8888      * First evaluate the expression and get the resulting list. */
8889     list = eval_spell_expr(su->su_badword, expr);
8890     if (list != NULL)
8891     {
8892 	/* Loop over the items in the list. */
8893 	for (li = list->lv_first; li != NULL; li = li->li_next)
8894 	    if (li->li_tv.v_type == VAR_LIST)
8895 	    {
8896 		/* Get the word and the score from the items. */
8897 		score = get_spellword(li->li_tv.vval.v_list, &p);
8898 		if (score >= 0)
8899 		    add_suggestion(su, &su->su_ga, p,
8900 			       su->su_badlen, score, 0, TRUE, su->su_sallang);
8901 	    }
8902 	list_unref(list);
8903     }
8904 
8905     /* Sort the suggestions and truncate at "maxcount". */
8906     (void)cleanup_suggestions(&su->su_ga, su->su_maxscore, su->su_maxcount);
8907 }
8908 #endif
8909 
8910 /*
8911  * Find suggestions in file "fname".  Used for "file:" in 'spellsuggest'.
8912  */
8913     static void
8914 spell_suggest_file(su, fname)
8915     suginfo_T	*su;
8916     char_u	*fname;
8917 {
8918     FILE	*fd;
8919     char_u	line[MAXWLEN * 2];
8920     char_u	*p;
8921     int		len;
8922     char_u	cword[MAXWLEN];
8923 
8924     /* Open the file. */
8925     fd = mch_fopen((char *)fname, "r");
8926     if (fd == NULL)
8927     {
8928 	EMSG2(_(e_notopen), fname);
8929 	return;
8930     }
8931 
8932     /* Read it line by line. */
8933     while (!vim_fgets(line, MAXWLEN * 2, fd) && !got_int)
8934     {
8935 	line_breakcheck();
8936 
8937 	p = vim_strchr(line, '/');
8938 	if (p == NULL)
8939 	    continue;	    /* No Tab found, just skip the line. */
8940 	*p++ = NUL;
8941 	if (STRICMP(su->su_badword, line) == 0)
8942 	{
8943 	    /* Match!  Isolate the good word, until CR or NL. */
8944 	    for (len = 0; p[len] >= ' '; ++len)
8945 		;
8946 	    p[len] = NUL;
8947 
8948 	    /* If the suggestion doesn't have specific case duplicate the case
8949 	     * of the bad word. */
8950 	    if (captype(p, NULL) == 0)
8951 	    {
8952 		make_case_word(p, cword, su->su_badflags);
8953 		p = cword;
8954 	    }
8955 
8956 	    add_suggestion(su, &su->su_ga, p, su->su_badlen,
8957 					 SCORE_FILE, 0, TRUE, su->su_sallang);
8958 	}
8959     }
8960 
8961     fclose(fd);
8962 
8963     /* Sort the suggestions and truncate at "maxcount". */
8964     (void)cleanup_suggestions(&su->su_ga, su->su_maxscore, su->su_maxcount);
8965 }
8966 
8967 /*
8968  * Find suggestions for the internal method indicated by "sps_flags".
8969  */
8970     static void
8971 spell_suggest_intern(su)
8972     suginfo_T	*su;
8973 {
8974     /*
8975      * 1. Try special cases, such as repeating a word: "the the" -> "the".
8976      *
8977      * Set a maximum score to limit the combination of operations that is
8978      * tried.
8979      */
8980     suggest_try_special(su);
8981 
8982     /*
8983      * 2. Try inserting/deleting/swapping/changing a letter, use REP entries
8984      *    from the .aff file and inserting a space (split the word).
8985      */
8986     suggest_try_change(su);
8987 
8988     /* For the resulting top-scorers compute the sound-a-like score. */
8989     if (sps_flags & SPS_DOUBLE)
8990 	score_comp_sal(su);
8991 
8992     /*
8993      * 3. Try finding sound-a-like words.
8994      *
8995      * Only do this when we don't have a lot of suggestions yet, because it's
8996      * very slow and often doesn't find new suggestions.
8997      */
8998     if ((sps_flags & SPS_DOUBLE)
8999 	    || (!(sps_flags & SPS_FAST)
9000 				    && su->su_ga.ga_len < SUG_CLEAN_COUNT(su)))
9001     {
9002 	/* Allow a higher score now. */
9003 	su->su_maxscore = SCORE_MAXMAX;
9004 	suggest_try_soundalike(su);
9005     }
9006 
9007     /* When CTRL-C was hit while searching do show the results. */
9008     ui_breakcheck();
9009     if (got_int)
9010     {
9011 	(void)vgetc();
9012 	got_int = FALSE;
9013     }
9014 
9015     if ((sps_flags & SPS_DOUBLE) == 0 && su->su_ga.ga_len != 0)
9016     {
9017 	if (sps_flags & SPS_BEST)
9018 	    /* Adjust the word score for how it sounds like. */
9019 	    rescore_suggestions(su);
9020 
9021 	/* Sort the suggestions and truncate at "maxcount". */
9022 	(void)cleanup_suggestions(&su->su_ga, su->su_maxscore, su->su_maxcount);
9023     }
9024 }
9025 
9026 /*
9027  * Free the info put in "*su" by spell_find_suggest().
9028  */
9029     static void
9030 spell_find_cleanup(su)
9031     suginfo_T	*su;
9032 {
9033     int		i;
9034 
9035     /* Free the suggestions. */
9036     for (i = 0; i < su->su_ga.ga_len; ++i)
9037 	vim_free(SUG(su->su_ga, i).st_word);
9038     ga_clear(&su->su_ga);
9039     for (i = 0; i < su->su_sga.ga_len; ++i)
9040 	vim_free(SUG(su->su_sga, i).st_word);
9041     ga_clear(&su->su_sga);
9042 
9043     /* Free the banned words. */
9044     free_banned(su);
9045 }
9046 
9047 /*
9048  * Make a copy of "word", with the first letter upper or lower cased, to
9049  * "wcopy[MAXWLEN]".  "word" must not be empty.
9050  * The result is NUL terminated.
9051  */
9052     static void
9053 onecap_copy(word, wcopy, upper)
9054     char_u	*word;
9055     char_u	*wcopy;
9056     int		upper;	    /* TRUE: first letter made upper case */
9057 {
9058     char_u	*p;
9059     int		c;
9060     int		l;
9061 
9062     p = word;
9063 #ifdef FEAT_MBYTE
9064     if (has_mbyte)
9065 	c = mb_cptr2char_adv(&p);
9066     else
9067 #endif
9068 	c = *p++;
9069     if (upper)
9070 	c = SPELL_TOUPPER(c);
9071     else
9072 	c = SPELL_TOFOLD(c);
9073 #ifdef FEAT_MBYTE
9074     if (has_mbyte)
9075 	l = mb_char2bytes(c, wcopy);
9076     else
9077 #endif
9078     {
9079 	l = 1;
9080 	wcopy[0] = c;
9081     }
9082     vim_strncpy(wcopy + l, p, MAXWLEN - l - 1);
9083 }
9084 
9085 /*
9086  * Make a copy of "word" with all the letters upper cased into
9087  * "wcopy[MAXWLEN]".  The result is NUL terminated.
9088  */
9089     static void
9090 allcap_copy(word, wcopy)
9091     char_u	*word;
9092     char_u	*wcopy;
9093 {
9094     char_u	*s;
9095     char_u	*d;
9096     int		c;
9097 
9098     d = wcopy;
9099     for (s = word; *s != NUL; )
9100     {
9101 #ifdef FEAT_MBYTE
9102 	if (has_mbyte)
9103 	    c = mb_cptr2char_adv(&s);
9104 	else
9105 #endif
9106 	    c = *s++;
9107 
9108 #ifdef FEAT_MBYTE
9109 	/* We only change � to SS when we are certain latin1 is used.  It
9110 	 * would cause weird errors in other 8-bit encodings. */
9111 	if (enc_latin1like && c == 0xdf)
9112 	{
9113 	    c = 'S';
9114 	    if (d - wcopy >= MAXWLEN - 1)
9115 		break;
9116 	    *d++ = c;
9117 	}
9118 	else
9119 #endif
9120 	    c = SPELL_TOUPPER(c);
9121 
9122 #ifdef FEAT_MBYTE
9123 	if (has_mbyte)
9124 	{
9125 	    if (d - wcopy >= MAXWLEN - MB_MAXBYTES)
9126 		break;
9127 	    d += mb_char2bytes(c, d);
9128 	}
9129 	else
9130 #endif
9131 	{
9132 	    if (d - wcopy >= MAXWLEN - 1)
9133 		break;
9134 	    *d++ = c;
9135 	}
9136     }
9137     *d = NUL;
9138 }
9139 
9140 /*
9141  * Try finding suggestions by recognizing specific situations.
9142  */
9143     static void
9144 suggest_try_special(su)
9145     suginfo_T	*su;
9146 {
9147     char_u	*p;
9148     size_t	len;
9149     int		c;
9150     char_u	word[MAXWLEN];
9151 
9152     /*
9153      * Recognize a word that is repeated: "the the".
9154      */
9155     p = skiptowhite(su->su_fbadword);
9156     len = p - su->su_fbadword;
9157     p = skipwhite(p);
9158     if (STRLEN(p) == len && STRNCMP(su->su_fbadword, p, len) == 0)
9159     {
9160 	/* Include badflags: if the badword is onecap or allcap
9161 	 * use that for the goodword too: "The the" -> "The". */
9162 	c = su->su_fbadword[len];
9163 	su->su_fbadword[len] = NUL;
9164 	make_case_word(su->su_fbadword, word, su->su_badflags);
9165 	su->su_fbadword[len] = c;
9166 	add_suggestion(su, &su->su_ga, word, su->su_badlen, SCORE_DEL,
9167 						     0, TRUE, su->su_sallang);
9168     }
9169 }
9170 
9171 /*
9172  * Try finding suggestions by adding/removing/swapping letters.
9173  *
9174  * This uses a state machine.  At each node in the tree we try various
9175  * operations.  When trying if an operation work "depth" is increased and the
9176  * stack[] is used to store info.  This allows combinations, thus insert one
9177  * character, replace one and delete another.  The number of changes is
9178  * limited by su->su_maxscore, checked in try_deeper().
9179  *
9180  * After implementing this I noticed an article by Kemal Oflazer that
9181  * describes something similar: "Error-tolerant Finite State Recognition with
9182  * Applications to Morphological Analysis and Spelling Correction" (1996).
9183  * The implementation in the article is simplified and requires a stack of
9184  * unknown depth.  The implementation here only needs a stack depth of the
9185  * length of the word.
9186  */
9187     static void
9188 suggest_try_change(su)
9189     suginfo_T	*su;
9190 {
9191     char_u	fword[MAXWLEN];	    /* copy of the bad word, case-folded */
9192     char_u	tword[MAXWLEN];	    /* good word collected so far */
9193     trystate_T	stack[MAXWLEN];
9194     char_u	preword[MAXWLEN * 3]; /* word found with proper case;
9195 				       * concatanation of prefix compound
9196 				       * words and split word.  NUL terminated
9197 				       * when going deeper but not when coming
9198 				       * back. */
9199     char_u	compflags[MAXWLEN];	/* compound flags, one for each word */
9200     trystate_T	*sp;
9201     int		newscore;
9202     langp_T	*lp;
9203     char_u	*byts, *fbyts, *pbyts;
9204     idx_T	*idxs, *fidxs, *pidxs;
9205     int		depth;
9206     int		c, c2, c3;
9207     int		n;
9208     int		flags;
9209     garray_T	*gap;
9210     idx_T	arridx;
9211     int		len;
9212     char_u	*p;
9213     fromto_T	*ftp;
9214     int		fl = 0, tl;
9215     int		repextra = 0;	    /* extra bytes in fword[] from REP item */
9216     slang_T	*slang;
9217     int		fword_ends;
9218     int		lpi;
9219 
9220     /* We make a copy of the case-folded bad word, so that we can modify it
9221      * to find matches (esp. REP items).  Append some more text, changing
9222      * chars after the bad word may help. */
9223     STRCPY(fword, su->su_fbadword);
9224     n = STRLEN(fword);
9225     p = su->su_badptr + su->su_badlen;
9226     (void)spell_casefold(p, STRLEN(p), fword + n, MAXWLEN - n);
9227 
9228     for (lpi = 0; lpi < curbuf->b_langp.ga_len; ++lpi)
9229     {
9230 	lp = LANGP_ENTRY(curbuf->b_langp, lpi);
9231 	slang = lp->lp_slang;
9232 
9233 	/* If reloading a spell file fails it's still in the list but
9234 	 * everything has been cleared. */
9235 	if (slang->sl_fbyts == NULL)
9236 	    continue;
9237 
9238 	/*
9239 	 * Go through the whole case-fold tree, try changes at each node.
9240 	 * "tword[]" contains the word collected from nodes in the tree.
9241 	 * "fword[]" the word we are trying to match with (initially the bad
9242 	 * word).
9243 	 */
9244 	depth = 0;
9245 	sp = &stack[0];
9246 	vim_memset(sp, 0, sizeof(trystate_T));
9247 	sp->ts_curi = 1;
9248 
9249 	/*
9250 	 * When there are postponed prefixes we need to use these first.  At
9251 	 * the end of the prefix we continue in the case-fold tree.
9252 	 */
9253 	fbyts = slang->sl_fbyts;
9254 	fidxs = slang->sl_fidxs;
9255 	pbyts = slang->sl_pbyts;
9256 	pidxs = slang->sl_pidxs;
9257 	if (pbyts != NULL)
9258 	{
9259 	    byts = pbyts;
9260 	    idxs = pidxs;
9261 	    sp->ts_prefixdepth = PFD_PREFIXTREE;
9262 	    sp->ts_state = STATE_NOPREFIX;	/* try without prefix first */
9263 	}
9264 	else
9265 	{
9266 	    byts = fbyts;
9267 	    idxs = fidxs;
9268 	    sp->ts_prefixdepth = PFD_NOPREFIX;
9269 	    sp->ts_state = STATE_START;
9270 	}
9271 
9272 	/*
9273 	 * Loop to find all suggestions.  At each round we either:
9274 	 * - For the current state try one operation, advance "ts_curi",
9275 	 *   increase "depth".
9276 	 * - When a state is done go to the next, set "ts_state".
9277 	 * - When all states are tried decrease "depth".
9278 	 */
9279 	while (depth >= 0 && !got_int)
9280 	{
9281 	    sp = &stack[depth];
9282 	    switch (sp->ts_state)
9283 	    {
9284 	    case STATE_START:
9285 	    case STATE_NOPREFIX:
9286 		/*
9287 		 * Start of node: Deal with NUL bytes, which means
9288 		 * tword[] may end here.
9289 		 */
9290 		arridx = sp->ts_arridx;	    /* current node in the tree */
9291 		len = byts[arridx];	    /* bytes in this node */
9292 		arridx += sp->ts_curi;	    /* index of current byte */
9293 
9294 		if (sp->ts_prefixdepth == PFD_PREFIXTREE)
9295 		{
9296 		    /* Skip over the NUL bytes, we use them later. */
9297 		    for (n = 0; n < len && byts[arridx + n] == 0; ++n)
9298 			;
9299 		    sp->ts_curi += n;
9300 
9301 		    /* Always past NUL bytes now. */
9302 		    n = (int)sp->ts_state;
9303 		    sp->ts_state = STATE_ENDNUL;
9304 		    sp->ts_save_badflags = su->su_badflags;
9305 
9306 		    /* At end of a prefix or at start of prefixtree: check for
9307 		     * following word. */
9308 		    if (byts[arridx] == 0 || n == (int)STATE_NOPREFIX)
9309 		    {
9310 			/* Set su->su_badflags to the caps type at this
9311 			 * position.  Use the caps type until here for the
9312 			 * prefix itself. */
9313 #ifdef FEAT_MBYTE
9314 			if (has_mbyte)
9315 			    n = nofold_len(fword, sp->ts_fidx, su->su_badptr);
9316 			else
9317 #endif
9318 			    n = sp->ts_fidx;
9319 			flags = badword_captype(su->su_badptr,
9320 							   su->su_badptr + n);
9321 			su->su_badflags = badword_captype(su->su_badptr + n,
9322 					       su->su_badptr + su->su_badlen);
9323 			++depth;
9324 			stack[depth] = stack[depth - 1];
9325 			sp = &stack[depth];
9326 			sp->ts_prefixdepth = depth - 1;
9327 			byts = fbyts;
9328 			idxs = fidxs;
9329 			sp->ts_state = STATE_START;
9330 			sp->ts_curi = 1;   /* start just after length byte */
9331 			sp->ts_arridx = 0;
9332 
9333 			/* Move the prefix to preword[] with the right case
9334 			 * and make find_keepcap_word() works. */
9335 			tword[sp->ts_twordlen] = NUL;
9336 			make_case_word(tword + sp->ts_splitoff,
9337 						  preword + sp->ts_prewordlen,
9338 								       flags);
9339 			sp->ts_prewordlen = STRLEN(preword);
9340 			sp->ts_splitoff = sp->ts_twordlen;
9341 		    }
9342 		    break;
9343 		}
9344 
9345 		if (sp->ts_curi > len || byts[arridx] != 0)
9346 		{
9347 		    /* Past bytes in node and/or past NUL bytes. */
9348 		    sp->ts_state = STATE_ENDNUL;
9349 		    sp->ts_save_badflags = su->su_badflags;
9350 		    break;
9351 		}
9352 
9353 		/*
9354 		 * End of word in tree.
9355 		 */
9356 		++sp->ts_curi;		/* eat one NUL byte */
9357 
9358 		flags = (int)idxs[arridx];
9359 		fword_ends = (fword[sp->ts_fidx] == NUL
9360 			       || !spell_iswordp(fword + sp->ts_fidx, curbuf));
9361 		tword[sp->ts_twordlen] = NUL;
9362 
9363 		if (sp->ts_prefixdepth <= PFD_NOTSPECIAL
9364 					&& (sp->ts_flags & TSF_PREFIXOK) == 0)
9365 		{
9366 		    /* There was a prefix before the word.  Check that the
9367 		     * prefix can be used with this word. */
9368 		    /* Count the length of the NULs in the prefix.  If there
9369 		     * are none this must be the first try without a prefix.
9370 		     */
9371 		    n = stack[sp->ts_prefixdepth].ts_arridx;
9372 		    len = pbyts[n++];
9373 		    for (c = 0; c < len && pbyts[n + c] == 0; ++c)
9374 			;
9375 		    if (c > 0)
9376 		    {
9377 			c = valid_word_prefix(c, n, flags,
9378 				       tword + sp->ts_splitoff, slang, FALSE);
9379 			if (c == 0)
9380 			    break;
9381 
9382 			/* Use the WF_RARE flag for a rare prefix. */
9383 			if (c & WF_RAREPFX)
9384 			    flags |= WF_RARE;
9385 
9386 			/* Tricky: when checking for both prefix and
9387 			 * compounding we run into the prefix flag first.
9388 			 * Remember that it's OK, so that we accept the prefix
9389 			 * when arriving at a compound flag. */
9390 			sp->ts_flags |= TSF_PREFIXOK;
9391 		    }
9392 		}
9393 
9394 		/* Check NEEDCOMPOUND: can't use word without compounding. */
9395 		if (sp->ts_complen == sp->ts_compsplit && fword_ends
9396 						     && (flags & WF_NEEDCOMP))
9397 		    break;
9398 
9399 		if (sp->ts_complen > sp->ts_compsplit)
9400 		{
9401 		    if (slang->sl_nobreak)
9402 		    {
9403 			/* There was a word before this word.  When there was
9404 			 * no change in this word (it was correct) add the
9405 			 * first word as a suggestion.  If this word was
9406 			 * corrected too, we need to check if a correct word
9407 			 * follows. */
9408 			if (sp->ts_fidx - sp->ts_splitfidx
9409 					  == sp->ts_twordlen - sp->ts_splitoff
9410 				&& STRNCMP(fword + sp->ts_splitfidx,
9411 					    tword + sp->ts_splitoff,
9412 					 sp->ts_fidx - sp->ts_splitfidx) == 0)
9413 			{
9414 			    preword[sp->ts_prewordlen] = NUL;
9415 			    add_suggestion(su, &su->su_ga, preword,
9416 				    sp->ts_splitfidx - repextra,
9417 				    sp->ts_score, 0, FALSE,
9418 				    lp->lp_sallang);
9419 			    break;
9420 			}
9421 		    }
9422 		    else
9423 		    {
9424 			/* There was a compound word before this word.  If
9425 			 * this word does not support compounding then give up
9426 			 * (splitting is tried for the word without compound
9427 			 * flag). */
9428 			if (((unsigned)flags >> 24) == 0
9429 				|| sp->ts_twordlen - sp->ts_splitoff
9430 						       < slang->sl_compminlen)
9431 			    break;
9432 #ifdef FEAT_MBYTE
9433 			/* For multi-byte chars check character length against
9434 			 * COMPOUNDMIN. */
9435 			if (has_mbyte
9436 				&& slang->sl_compminlen > 0
9437 				&& mb_charlen(tword + sp->ts_splitoff)
9438 						       < slang->sl_compminlen)
9439 			    break;
9440 #endif
9441 
9442 			compflags[sp->ts_complen] = ((unsigned)flags >> 24);
9443 			compflags[sp->ts_complen + 1] = NUL;
9444 			vim_strncpy(preword + sp->ts_prewordlen,
9445 				tword + sp->ts_splitoff,
9446 				sp->ts_twordlen - sp->ts_splitoff);
9447 			p = preword;
9448 			while (*skiptowhite(p) != NUL)
9449 			    p = skipwhite(skiptowhite(p));
9450 			if (fword_ends && !can_compound(slang, p,
9451 						compflags + sp->ts_compsplit))
9452 			    break;
9453 
9454 			/* Get pointer to last char of previous word. */
9455 			p = preword + sp->ts_prewordlen;
9456 			mb_ptr_back(preword, p);
9457 		    }
9458 		}
9459 		else
9460 		    p = NULL;
9461 
9462 		/*
9463 		 * Form the word with proper case in preword.
9464 		 * If there is a word from a previous split, append.
9465 		 */
9466 		if (flags & WF_KEEPCAP)
9467 		    /* Must find the word in the keep-case tree. */
9468 		    find_keepcap_word(slang, tword + sp->ts_splitoff,
9469 						 preword + sp->ts_prewordlen);
9470 		else
9471 		{
9472 		    /* Include badflags: if the badword is onecap or allcap
9473 		     * use that for the goodword too.  But if the badword is
9474 		     * allcap and it's only one char long use onecap. */
9475 		    c = su->su_badflags;
9476 		    if ((c & WF_ALLCAP)
9477 #ifdef FEAT_MBYTE
9478 			    && su->su_badlen == (*mb_ptr2len)(su->su_badptr)
9479 #else
9480 			    && su->su_badlen == 1
9481 #endif
9482 			    )
9483 			c = WF_ONECAP;
9484 		    c |= flags;
9485 
9486 		    /* When appending a compound word after a word character
9487 		     * don't use Onecap. */
9488 		    if (p != NULL && spell_iswordp_nmw(p))
9489 			c &= ~WF_ONECAP;
9490 		    make_case_word(tword + sp->ts_splitoff,
9491 					      preword + sp->ts_prewordlen, c);
9492 		}
9493 
9494 		/* Don't use a banned word.  It may appear again as a good
9495 		 * word, thus remember it. */
9496 		if (flags & WF_BANNED)
9497 		{
9498 		    add_banned(su, preword + sp->ts_prewordlen);
9499 		    break;
9500 		}
9501 		if (was_banned(su, preword + sp->ts_prewordlen)
9502 						   || was_banned(su, preword))
9503 		    break;
9504 
9505 		newscore = 0;
9506 		if ((flags & WF_REGION)
9507 			    && (((unsigned)flags >> 16) & lp->lp_region) == 0)
9508 		    newscore += SCORE_REGION;
9509 		if (flags & WF_RARE)
9510 		    newscore += SCORE_RARE;
9511 
9512 		if (!spell_valid_case(su->su_badflags,
9513 				  captype(preword + sp->ts_prewordlen, NULL)))
9514 		    newscore += SCORE_ICASE;
9515 
9516 		if (fword_ends && sp->ts_fidx >= sp->ts_fidxtry)
9517 		{
9518 		    /* The badword also ends: add suggestions.  Give a penalty
9519 		     * when changing non-word char to word char, e.g., "thes,"
9520 		     * -> "these". */
9521 		    p = fword + sp->ts_fidx;
9522 #ifdef FEAT_MBYTE
9523 		    if (has_mbyte)
9524 			mb_ptr_back(fword, p);
9525 		    else
9526 #endif
9527 			--p;
9528 		    if (!spell_iswordp(p, curbuf))
9529 		    {
9530 			p = preword + STRLEN(preword);
9531 #ifdef FEAT_MBYTE
9532 			if (has_mbyte)
9533 			    mb_ptr_back(preword, p);
9534 			else
9535 #endif
9536 			    --p;
9537 			if (spell_iswordp(p, curbuf))
9538 			    newscore += SCORE_NONWORD;
9539 		    }
9540 
9541 		    add_suggestion(su, &su->su_ga, preword,
9542 			    sp->ts_fidx - repextra,
9543 				     sp->ts_score + newscore, 0, FALSE,
9544 				     lp->lp_sallang);
9545 		}
9546 		else if ((sp->ts_fidx >= sp->ts_fidxtry || fword_ends)
9547 #ifdef FEAT_MBYTE
9548 			/* Don't split halfway a character. */
9549 			&& (!has_mbyte || sp->ts_tcharlen == 0)
9550 #endif
9551 			)
9552 		{
9553 		    int	    try_compound;
9554 
9555 		    /* Get here in two situations:
9556 		     * 1. The word in the tree ends but the badword continues:
9557 		     *    If the word allows compounding try that.  Otherwise
9558 		     *    try a split by inserting a space.  For both check
9559 		     *    that a valid words starts at fword[sp->ts_fidx].
9560 		     *    For NOBREAK do like compounding to be able to check
9561 		     *    if the next word is valid.
9562 		     * 2. The badword does end, but it was due to a change
9563 		     *    (e.g., a swap).  No need to split, but do check that
9564 		     *    the following word is valid.
9565 		     */
9566 		    try_compound = FALSE;
9567 		    if (!fword_ends
9568 			    && slang->sl_compprog != NULL
9569 			    && ((unsigned)flags >> 24) != 0
9570 			    && sp->ts_twordlen - sp->ts_splitoff
9571 						      >= slang->sl_compminlen
9572 #ifdef FEAT_MBYTE
9573 			    && (!has_mbyte
9574 				|| slang->sl_compminlen == 0
9575 				|| mb_charlen(tword + sp->ts_splitoff)
9576 						      >= slang->sl_compminlen)
9577 #endif
9578 			    && (slang->sl_compsylmax < MAXWLEN
9579 				|| sp->ts_complen + 1 - sp->ts_compsplit
9580 							   < slang->sl_compmax)
9581 			    && (byte_in_str(sp->ts_complen == sp->ts_compsplit
9582 						? slang->sl_compstartflags
9583 						: slang->sl_compallflags,
9584 						    ((unsigned)flags >> 24))))
9585 		    {
9586 			try_compound = TRUE;
9587 			compflags[sp->ts_complen] = ((unsigned)flags >> 24);
9588 			compflags[sp->ts_complen + 1] = NUL;
9589 		    }
9590 
9591 		    /* For NOBREAK we never try splitting, it won't make any
9592 		     * word valid. */
9593 		    if (slang->sl_nobreak)
9594 			try_compound = TRUE;
9595 
9596 		    /* If we could add a compound word, and it's also possible
9597 		     * to split at this point, do the split first and set
9598 		     * TSF_DIDSPLIT to avoid doing it again. */
9599 		    else if (!fword_ends
9600 			    && try_compound
9601 			    && (sp->ts_flags & TSF_DIDSPLIT) == 0)
9602 		    {
9603 			try_compound = FALSE;
9604 			sp->ts_flags |= TSF_DIDSPLIT;
9605 			--sp->ts_curi;	    /* do the same NUL again */
9606 			compflags[sp->ts_complen] = NUL;
9607 		    }
9608 		    else
9609 			sp->ts_flags &= ~TSF_DIDSPLIT;
9610 
9611 		    if (!try_compound && !fword_ends)
9612 		    {
9613 			/* If we're going to split need to check that the
9614 			 * words so far are valid for compounding.  If there
9615 			 * is only one word it must not have the NEEDCOMPOUND
9616 			 * flag. */
9617 			if (sp->ts_complen == sp->ts_compsplit
9618 						     && (flags & WF_NEEDCOMP))
9619 			    break;
9620 			p = preword;
9621 			while (*skiptowhite(p) != NUL)
9622 			    p = skipwhite(skiptowhite(p));
9623 			if (sp->ts_complen > sp->ts_compsplit
9624 				&& !can_compound(slang, p,
9625 						compflags + sp->ts_compsplit))
9626 			    break;
9627 			newscore += SCORE_SPLIT;
9628 		    }
9629 
9630 		    if (try_deeper(su, stack, depth, newscore))
9631 		    {
9632 			/* Save things to be restored at STATE_SPLITUNDO. */
9633 			sp->ts_save_badflags = su->su_badflags;
9634 			sp->ts_state = STATE_SPLITUNDO;
9635 
9636 			++depth;
9637 			sp = &stack[depth];
9638 
9639 			/* Append a space to preword when splitting. */
9640 			if (!try_compound && !fword_ends)
9641 			    STRCAT(preword, " ");
9642 			sp->ts_prewordlen = STRLEN(preword);
9643 			sp->ts_splitoff = sp->ts_twordlen;
9644 			sp->ts_splitfidx = sp->ts_fidx;
9645 
9646 			/* If the badword has a non-word character at this
9647 			 * position skip it.  That means replacing the
9648 			 * non-word character with a space.  Always skip a
9649 			 * character when the word ends. */
9650 			if ((!try_compound
9651 				   && !spell_iswordp_nmw(fword + sp->ts_fidx))
9652 				|| fword_ends)
9653 			{
9654 			    int	    l;
9655 
9656 #ifdef FEAT_MBYTE
9657 			    if (has_mbyte)
9658 				l = MB_BYTE2LEN(fword[sp->ts_fidx]);
9659 			    else
9660 #endif
9661 				l = 1;
9662 			    if (fword_ends)
9663 			    {
9664 				/* Copy the skipped character to preword. */
9665 				mch_memmove(preword + sp->ts_prewordlen,
9666 						      fword + sp->ts_fidx, l);
9667 				sp->ts_prewordlen += l;
9668 				preword[sp->ts_prewordlen] = NUL;
9669 			    }
9670 			    else
9671 				sp->ts_score -= SCORE_SPLIT - SCORE_SUBST;
9672 			    sp->ts_fidx += l;
9673 			}
9674 
9675 			/* When compounding include compound flag in
9676 			 * compflags[] (already set above).  When splitting we
9677 			 * may start compounding over again.  */
9678 			if (try_compound)
9679 			    ++sp->ts_complen;
9680 			else
9681 			    sp->ts_compsplit = sp->ts_complen;
9682 			sp->ts_prefixdepth = PFD_NOPREFIX;
9683 
9684 			/* set su->su_badflags to the caps type at this
9685 			 * position */
9686 #ifdef FEAT_MBYTE
9687 			if (has_mbyte)
9688 			    n = nofold_len(fword, sp->ts_fidx, su->su_badptr);
9689 			else
9690 #endif
9691 			    n = sp->ts_fidx;
9692 			su->su_badflags = badword_captype(su->su_badptr + n,
9693 					       su->su_badptr + su->su_badlen);
9694 
9695 			/* Restart at top of the tree. */
9696 			sp->ts_arridx = 0;
9697 
9698 			/* If there are postponed prefixes, try these too. */
9699 			if (pbyts != NULL)
9700 			{
9701 			    byts = pbyts;
9702 			    idxs = pidxs;
9703 			    sp->ts_prefixdepth = PFD_PREFIXTREE;
9704 			    sp->ts_state = STATE_NOPREFIX;
9705 			}
9706 		    }
9707 		}
9708 		break;
9709 
9710 	    case STATE_SPLITUNDO:
9711 		/* Undo the changes done for word split or compound word. */
9712 		su->su_badflags = sp->ts_save_badflags;
9713 
9714 		/* Continue looking for NUL bytes. */
9715 		sp->ts_state = STATE_START;
9716 
9717 		/* In case we went into the prefix tree. */
9718 		byts = fbyts;
9719 		idxs = fidxs;
9720 		break;
9721 
9722 	    case STATE_ENDNUL:
9723 		/* Past the NUL bytes in the node. */
9724 		su->su_badflags = sp->ts_save_badflags;
9725 		if (fword[sp->ts_fidx] == NUL
9726 #ifdef FEAT_MBYTE
9727 			&& sp->ts_tcharlen == 0
9728 #endif
9729 		   )
9730 		{
9731 		    /* The badword ends, can't use the bytes in this node. */
9732 		    sp->ts_state = STATE_DEL;
9733 		    break;
9734 		}
9735 		sp->ts_state = STATE_PLAIN;
9736 		/*FALLTHROUGH*/
9737 
9738 	    case STATE_PLAIN:
9739 		/*
9740 		 * Go over all possible bytes at this node, add each to
9741 		 * tword[] and use child node.  "ts_curi" is the index.
9742 		 */
9743 		arridx = sp->ts_arridx;
9744 		if (sp->ts_curi > byts[arridx])
9745 		{
9746 		    /* Done all bytes at this node, do next state.  When still
9747 		     * at already changed bytes skip the other tricks. */
9748 		    if (sp->ts_fidx >= sp->ts_fidxtry)
9749 			sp->ts_state = STATE_DEL;
9750 		    else
9751 			sp->ts_state = STATE_FINAL;
9752 		}
9753 		else
9754 		{
9755 		    arridx += sp->ts_curi++;
9756 		    c = byts[arridx];
9757 
9758 		    /* Normal byte, go one level deeper.  If it's not equal to
9759 		     * the byte in the bad word adjust the score.  But don't
9760 		     * even try when the byte was already changed. */
9761 		    if (c == fword[sp->ts_fidx]
9762 #ifdef FEAT_MBYTE
9763 			    || (sp->ts_tcharlen > 0
9764 						&& sp->ts_isdiff != DIFF_NONE)
9765 #endif
9766 			    )
9767 			newscore = 0;
9768 		    else
9769 			newscore = SCORE_SUBST;
9770 		    if ((newscore == 0 || sp->ts_fidx >= sp->ts_fidxtry)
9771 				    && try_deeper(su, stack, depth, newscore))
9772 		    {
9773 			++depth;
9774 			sp = &stack[depth];
9775 			++sp->ts_fidx;
9776 			tword[sp->ts_twordlen++] = c;
9777 			sp->ts_arridx = idxs[arridx];
9778 #ifdef FEAT_MBYTE
9779 			if (newscore == SCORE_SUBST)
9780 			    sp->ts_isdiff = DIFF_YES;
9781 			if (has_mbyte)
9782 			{
9783 			    /* Multi-byte characters are a bit complicated to
9784 			     * handle: They differ when any of the bytes
9785 			     * differ and then their length may also differ. */
9786 			    if (sp->ts_tcharlen == 0)
9787 			    {
9788 				/* First byte. */
9789 				sp->ts_tcharidx = 0;
9790 				sp->ts_tcharlen = MB_BYTE2LEN(c);
9791 				sp->ts_fcharstart = sp->ts_fidx - 1;
9792 				sp->ts_isdiff = (newscore != 0)
9793 						       ? DIFF_YES : DIFF_NONE;
9794 			    }
9795 			    else if (sp->ts_isdiff == DIFF_INSERT)
9796 				/* When inserting trail bytes don't advance in
9797 				 * the bad word. */
9798 				--sp->ts_fidx;
9799 			    if (++sp->ts_tcharidx == sp->ts_tcharlen)
9800 			    {
9801 				/* Last byte of character. */
9802 				if (sp->ts_isdiff == DIFF_YES)
9803 				{
9804 				    /* Correct ts_fidx for the byte length of
9805 				     * the character (we didn't check that
9806 				     * before). */
9807 				    sp->ts_fidx = sp->ts_fcharstart
9808 						+ MB_BYTE2LEN(
9809 						    fword[sp->ts_fcharstart]);
9810 
9811 				    /* For changing a composing character
9812 				     * adjust the score from SCORE_SUBST to
9813 				     * SCORE_SUBCOMP. */
9814 				    if (enc_utf8
9815 					    && utf_iscomposing(
9816 						mb_ptr2char(tword
9817 						    + sp->ts_twordlen
9818 							   - sp->ts_tcharlen))
9819 					    && utf_iscomposing(
9820 						mb_ptr2char(fword
9821 							+ sp->ts_fcharstart)))
9822 					sp->ts_score -=
9823 						  SCORE_SUBST - SCORE_SUBCOMP;
9824 
9825 				    /* For a similar character adjust score
9826 				     * from SCORE_SUBST to SCORE_SIMILAR. */
9827 				    else if (slang->sl_has_map
9828 					    && similar_chars(slang,
9829 						mb_ptr2char(tword
9830 						    + sp->ts_twordlen
9831 							   - sp->ts_tcharlen),
9832 						mb_ptr2char(fword
9833 							+ sp->ts_fcharstart)))
9834 					sp->ts_score -=
9835 						  SCORE_SUBST - SCORE_SIMILAR;
9836 				}
9837 				else if (sp->ts_isdiff == DIFF_INSERT
9838 					&& sp->ts_twordlen > sp->ts_tcharlen)
9839 				{
9840 				    p = tword + sp->ts_twordlen
9841 							    - sp->ts_tcharlen;
9842 				    c = mb_ptr2char(p);
9843 				    if (enc_utf8 && utf_iscomposing(c))
9844 				    {
9845 					/* Inserting a composing char doesn't
9846 					 * count that much. */
9847 					sp->ts_score -= SCORE_INS
9848 							      - SCORE_INSCOMP;
9849 				    }
9850 				    else
9851 				    {
9852 					/* If the previous character was the
9853 					 * same, thus doubling a character,
9854 					 * give a bonus to the score. */
9855 					mb_ptr_back(tword, p);
9856 					if (c == mb_ptr2char(p))
9857 					    sp->ts_score -= SCORE_INS
9858 							       - SCORE_INSDUP;
9859 				    }
9860 				}
9861 
9862 				/* Starting a new char, reset the length. */
9863 				sp->ts_tcharlen = 0;
9864 			    }
9865 			}
9866 			else
9867 #endif
9868 			{
9869 			    /* If we found a similar char adjust the score.
9870 			     * We do this after calling try_deeper() because
9871 			     * it's slow. */
9872 			    if (newscore != 0
9873 				    && slang->sl_has_map
9874 				    && similar_chars(slang,
9875 						   c, fword[sp->ts_fidx - 1]))
9876 				sp->ts_score -= SCORE_SUBST - SCORE_SIMILAR;
9877 			}
9878 		    }
9879 		}
9880 		break;
9881 
9882 	    case STATE_DEL:
9883 #ifdef FEAT_MBYTE
9884 		/* When past the first byte of a multi-byte char don't try
9885 		 * delete/insert/swap a character. */
9886 		if (has_mbyte && sp->ts_tcharlen > 0)
9887 		{
9888 		    sp->ts_state = STATE_FINAL;
9889 		    break;
9890 		}
9891 #endif
9892 		/*
9893 		 * Try skipping one character in the bad word (delete it).
9894 		 */
9895 		sp->ts_state = STATE_INS;
9896 		sp->ts_curi = 1;
9897 		if (fword[sp->ts_fidx] != NUL
9898 			&& try_deeper(su, stack, depth, SCORE_DEL))
9899 		{
9900 		    ++depth;
9901 
9902 		    /* Advance over the character in fword[]. Give a bonus to
9903 		     * the score if the same character is following "nn" ->
9904 		     * "n". */
9905 #ifdef FEAT_MBYTE
9906 		    if (has_mbyte)
9907 		    {
9908 			c = mb_ptr2char(fword + sp->ts_fidx);
9909 			stack[depth].ts_fidx += MB_BYTE2LEN(fword[sp->ts_fidx]);
9910 			if (enc_utf8 && utf_iscomposing(c))
9911 			    stack[depth].ts_score -= SCORE_DEL - SCORE_DELCOMP;
9912 			else if (c == mb_ptr2char(fword + stack[depth].ts_fidx))
9913 			    stack[depth].ts_score -= SCORE_DEL - SCORE_DELDUP;
9914 		    }
9915 		    else
9916 #endif
9917 		    {
9918 			++stack[depth].ts_fidx;
9919 			if (fword[sp->ts_fidx] == fword[sp->ts_fidx + 1])
9920 			    stack[depth].ts_score -= SCORE_DEL - SCORE_DELDUP;
9921 		    }
9922 		    break;
9923 		}
9924 		/*FALLTHROUGH*/
9925 
9926 	    case STATE_INS:
9927 		/* Insert one byte.  Do this for each possible byte at this
9928 		 * node. */
9929 		n = sp->ts_arridx;
9930 		if (sp->ts_curi > byts[n])
9931 		{
9932 		    /* Done all bytes at this node, do next state. */
9933 		    sp->ts_state = STATE_SWAP;
9934 		}
9935 		else
9936 		{
9937 		    /* Do one more byte at this node.  Skip NUL bytes. */
9938 		    n += sp->ts_curi++;
9939 		    c = byts[n];
9940 		    if (c != 0 && try_deeper(su, stack, depth, SCORE_INS))
9941 		    {
9942 			++depth;
9943 			sp = &stack[depth];
9944 			tword[sp->ts_twordlen++] = c;
9945 			sp->ts_arridx = idxs[n];
9946 #ifdef FEAT_MBYTE
9947 			if (has_mbyte)
9948 			{
9949 			    fl = MB_BYTE2LEN(c);
9950 			    if (fl > 1)
9951 			    {
9952 				/* There are following bytes for the same
9953 				 * character.  We must find all bytes before
9954 				 * trying delete/insert/swap/etc. */
9955 				sp->ts_tcharlen = fl;
9956 				sp->ts_tcharidx = 1;
9957 				sp->ts_isdiff = DIFF_INSERT;
9958 			    }
9959 			}
9960 			else
9961 			    fl = 1;
9962 			if (fl == 1)
9963 #endif
9964 			{
9965 			    /* If the previous character was the same, thus
9966 			     * doubling a character, give a bonus to the
9967 			     * score. */
9968 			    if (sp->ts_twordlen >= 2
9969 					   && tword[sp->ts_twordlen - 2] == c)
9970 				sp->ts_score -= SCORE_INS - SCORE_INSDUP;
9971 			}
9972 		    }
9973 		}
9974 		break;
9975 
9976 	    case STATE_SWAP:
9977 		/*
9978 		 * Swap two bytes in the bad word: "12" -> "21".
9979 		 * We change "fword" here, it's changed back afterwards.
9980 		 */
9981 		p = fword + sp->ts_fidx;
9982 		c = *p;
9983 		if (c == NUL)
9984 		{
9985 		    /* End of word, can't swap or replace. */
9986 		    sp->ts_state = STATE_FINAL;
9987 		    break;
9988 		}
9989 #ifdef FEAT_MBYTE
9990 		if (has_mbyte)
9991 		{
9992 		    n = mb_cptr2len(p);
9993 		    c = mb_ptr2char(p);
9994 		    c2 = mb_ptr2char(p + n);
9995 		}
9996 		else
9997 #endif
9998 		    c2 = p[1];
9999 		if (c == c2)
10000 		{
10001 		    /* Characters are identical, swap won't do anything. */
10002 		    sp->ts_state = STATE_SWAP3;
10003 		    break;
10004 		}
10005 		if (c2 != NUL && try_deeper(su, stack, depth, SCORE_SWAP))
10006 		{
10007 		    sp->ts_state = STATE_UNSWAP;
10008 		    ++depth;
10009 #ifdef FEAT_MBYTE
10010 		    if (has_mbyte)
10011 		    {
10012 			fl = mb_char2len(c2);
10013 			mch_memmove(p, p + n, fl);
10014 			mb_char2bytes(c, p + fl);
10015 			stack[depth].ts_fidxtry = sp->ts_fidx + n + fl;
10016 		    }
10017 		    else
10018 #endif
10019 		    {
10020 			p[0] = c2;
10021 			p[1] = c;
10022 			stack[depth].ts_fidxtry = sp->ts_fidx + 2;
10023 		    }
10024 		}
10025 		else
10026 		    /* If this swap doesn't work then SWAP3 won't either. */
10027 		    sp->ts_state = STATE_REP_INI;
10028 		break;
10029 
10030 	    case STATE_UNSWAP:
10031 		/* Undo the STATE_SWAP swap: "21" -> "12". */
10032 		p = fword + sp->ts_fidx;
10033 #ifdef FEAT_MBYTE
10034 		if (has_mbyte)
10035 		{
10036 		    n = MB_BYTE2LEN(*p);
10037 		    c = mb_ptr2char(p + n);
10038 		    mch_memmove(p + MB_BYTE2LEN(p[n]), p, n);
10039 		    mb_char2bytes(c, p);
10040 		}
10041 		else
10042 #endif
10043 		{
10044 		    c = *p;
10045 		    *p = p[1];
10046 		    p[1] = c;
10047 		}
10048 		/*FALLTHROUGH*/
10049 
10050 	    case STATE_SWAP3:
10051 		/* Swap two bytes, skipping one: "123" -> "321".  We change
10052 		 * "fword" here, it's changed back afterwards. */
10053 		p = fword + sp->ts_fidx;
10054 #ifdef FEAT_MBYTE
10055 		if (has_mbyte)
10056 		{
10057 		    n = mb_cptr2len(p);
10058 		    c = mb_ptr2char(p);
10059 		    fl = mb_cptr2len(p + n);
10060 		    c2 = mb_ptr2char(p + n);
10061 		    c3 = mb_ptr2char(p + n + fl);
10062 		}
10063 		else
10064 #endif
10065 		{
10066 		    c = *p;
10067 		    c2 = p[1];
10068 		    c3 = p[2];
10069 		}
10070 
10071 		/* When characters are identical: "121" then SWAP3 result is
10072 		 * identical, ROT3L result is same as SWAP: "211", ROT3L
10073 		 * result is same as SWAP on next char: "112".  Thus skip all
10074 		 * swapping.  Also skip when c3 is NUL.  */
10075 		if (c == c3 || c3 == NUL)
10076 		{
10077 		    sp->ts_state = STATE_REP_INI;
10078 		    break;
10079 		}
10080 		if (try_deeper(su, stack, depth, SCORE_SWAP3))
10081 		{
10082 		    sp->ts_state = STATE_UNSWAP3;
10083 		    ++depth;
10084 #ifdef FEAT_MBYTE
10085 		    if (has_mbyte)
10086 		    {
10087 			tl = mb_char2len(c3);
10088 			mch_memmove(p, p + n + fl, tl);
10089 			mb_char2bytes(c2, p + tl);
10090 			mb_char2bytes(c, p + fl + tl);
10091 			stack[depth].ts_fidxtry = sp->ts_fidx + n + fl + tl;
10092 		    }
10093 		    else
10094 #endif
10095 		    {
10096 			p[0] = p[2];
10097 			p[2] = c;
10098 			stack[depth].ts_fidxtry = sp->ts_fidx + 3;
10099 		    }
10100 		}
10101 		else
10102 		    sp->ts_state = STATE_REP_INI;
10103 		break;
10104 
10105 	    case STATE_UNSWAP3:
10106 		/* Undo STATE_SWAP3: "321" -> "123" */
10107 		p = fword + sp->ts_fidx;
10108 #ifdef FEAT_MBYTE
10109 		if (has_mbyte)
10110 		{
10111 		    n = MB_BYTE2LEN(*p);
10112 		    c2 = mb_ptr2char(p + n);
10113 		    fl = MB_BYTE2LEN(p[n]);
10114 		    c = mb_ptr2char(p + n + fl);
10115 		    tl = MB_BYTE2LEN(p[n + fl]);
10116 		    mch_memmove(p + fl + tl, p, n);
10117 		    mb_char2bytes(c, p);
10118 		    mb_char2bytes(c2, p + tl);
10119 		}
10120 		else
10121 #endif
10122 		{
10123 		    c = *p;
10124 		    *p = p[2];
10125 		    p[2] = c;
10126 		}
10127 
10128 		/* Rotate three characters left: "123" -> "231".  We change
10129 		 * "fword" here, it's changed back afterwards. */
10130 		if (try_deeper(su, stack, depth, SCORE_SWAP3))
10131 		{
10132 		    sp->ts_state = STATE_UNROT3L;
10133 		    ++depth;
10134 		    p = fword + sp->ts_fidx;
10135 #ifdef FEAT_MBYTE
10136 		    if (has_mbyte)
10137 		    {
10138 			n = mb_cptr2len(p);
10139 			c = mb_ptr2char(p);
10140 			fl = mb_cptr2len(p + n);
10141 			fl += mb_cptr2len(p + n + fl);
10142 			mch_memmove(p, p + n, fl);
10143 			mb_char2bytes(c, p + fl);
10144 			stack[depth].ts_fidxtry = sp->ts_fidx + n + fl;
10145 		    }
10146 		    else
10147 #endif
10148 		    {
10149 			c = *p;
10150 			*p = p[1];
10151 			p[1] = p[2];
10152 			p[2] = c;
10153 			stack[depth].ts_fidxtry = sp->ts_fidx + 3;
10154 		    }
10155 		}
10156 		else
10157 		    sp->ts_state = STATE_REP_INI;
10158 		break;
10159 
10160 	    case STATE_UNROT3L:
10161 		/* Undo ROT3L: "231" -> "123" */
10162 		p = fword + sp->ts_fidx;
10163 #ifdef FEAT_MBYTE
10164 		if (has_mbyte)
10165 		{
10166 		    n = MB_BYTE2LEN(*p);
10167 		    n += MB_BYTE2LEN(p[n]);
10168 		    c = mb_ptr2char(p + n);
10169 		    tl = MB_BYTE2LEN(p[n]);
10170 		    mch_memmove(p + tl, p, n);
10171 		    mb_char2bytes(c, p);
10172 		}
10173 		else
10174 #endif
10175 		{
10176 		    c = p[2];
10177 		    p[2] = p[1];
10178 		    p[1] = *p;
10179 		    *p = c;
10180 		}
10181 
10182 		/* Rotate three bytes right: "123" -> "312".  We change
10183 		 * "fword" here, it's changed back afterwards. */
10184 		if (try_deeper(su, stack, depth, SCORE_SWAP3))
10185 		{
10186 		    sp->ts_state = STATE_UNROT3R;
10187 		    ++depth;
10188 		    p = fword + sp->ts_fidx;
10189 #ifdef FEAT_MBYTE
10190 		    if (has_mbyte)
10191 		    {
10192 			n = mb_cptr2len(p);
10193 			n += mb_cptr2len(p + n);
10194 			c = mb_ptr2char(p + n);
10195 			tl = mb_cptr2len(p + n);
10196 			mch_memmove(p + tl, p, n);
10197 			mb_char2bytes(c, p);
10198 			stack[depth].ts_fidxtry = sp->ts_fidx + n + tl;
10199 		    }
10200 		    else
10201 #endif
10202 		    {
10203 			c = p[2];
10204 			p[2] = p[1];
10205 			p[1] = *p;
10206 			*p = c;
10207 			stack[depth].ts_fidxtry = sp->ts_fidx + 3;
10208 		    }
10209 		}
10210 		else
10211 		    sp->ts_state = STATE_REP_INI;
10212 		break;
10213 
10214 	    case STATE_UNROT3R:
10215 		/* Undo ROT3R: "312" -> "123" */
10216 		p = fword + sp->ts_fidx;
10217 #ifdef FEAT_MBYTE
10218 		if (has_mbyte)
10219 		{
10220 		    c = mb_ptr2char(p);
10221 		    tl = MB_BYTE2LEN(*p);
10222 		    n = MB_BYTE2LEN(p[tl]);
10223 		    n += MB_BYTE2LEN(p[tl + n]);
10224 		    mch_memmove(p, p + tl, n);
10225 		    mb_char2bytes(c, p + n);
10226 		}
10227 		else
10228 #endif
10229 		{
10230 		    c = *p;
10231 		    *p = p[1];
10232 		    p[1] = p[2];
10233 		    p[2] = c;
10234 		}
10235 		/*FALLTHROUGH*/
10236 
10237 	    case STATE_REP_INI:
10238 		/* Check if matching with REP items from the .aff file would
10239 		 * work.  Quickly skip if:
10240 		 * - there are no REP items
10241 		 * - the score is going to be too high anyway
10242 		 * - already applied a REP item or swapped here  */
10243 		if (lp->lp_replang == NULL
10244 			|| sp->ts_score + SCORE_REP >= su->su_maxscore
10245 			|| sp->ts_fidx < sp->ts_fidxtry)
10246 		{
10247 		    sp->ts_state = STATE_FINAL;
10248 		    break;
10249 		}
10250 		gap = &lp->lp_replang->sl_rep;
10251 
10252 		/* Use the first byte to quickly find the first entry that
10253 		 * may match.  If the index is -1 there is none. */
10254 		sp->ts_curi = lp->lp_replang->sl_rep_first[fword[sp->ts_fidx]];
10255 		if (sp->ts_curi < 0)
10256 		{
10257 		    sp->ts_state = STATE_FINAL;
10258 		    break;
10259 		}
10260 
10261 		sp->ts_state = STATE_REP;
10262 		/*FALLTHROUGH*/
10263 
10264 	    case STATE_REP:
10265 		/* Try matching with REP items from the .aff file.  For each
10266 		 * match replace the characters and check if the resulting
10267 		 * word is valid. */
10268 		p = fword + sp->ts_fidx;
10269 
10270 		gap = &lp->lp_replang->sl_rep;
10271 		while (sp->ts_curi < gap->ga_len)
10272 		{
10273 		    ftp = (fromto_T *)gap->ga_data + sp->ts_curi++;
10274 		    if (*ftp->ft_from != *p)
10275 		    {
10276 			/* past possible matching entries */
10277 			sp->ts_curi = gap->ga_len;
10278 			break;
10279 		    }
10280 		    if (STRNCMP(ftp->ft_from, p, STRLEN(ftp->ft_from)) == 0
10281 			    && try_deeper(su, stack, depth, SCORE_REP))
10282 		    {
10283 			/* Need to undo this afterwards. */
10284 			sp->ts_state = STATE_REP_UNDO;
10285 
10286 			/* Change the "from" to the "to" string. */
10287 			++depth;
10288 			fl = STRLEN(ftp->ft_from);
10289 			tl = STRLEN(ftp->ft_to);
10290 			if (fl != tl)
10291 			{
10292 			    mch_memmove(p + tl, p + fl, STRLEN(p + fl) + 1);
10293 			    repextra += tl - fl;
10294 			}
10295 			mch_memmove(p, ftp->ft_to, tl);
10296 			stack[depth].ts_fidxtry = sp->ts_fidx + tl;
10297 #ifdef FEAT_MBYTE
10298 			stack[depth].ts_tcharlen = 0;
10299 #endif
10300 			break;
10301 		    }
10302 		}
10303 
10304 		if (sp->ts_curi >= gap->ga_len && sp->ts_state == STATE_REP)
10305 		    /* No (more) matches. */
10306 		    sp->ts_state = STATE_FINAL;
10307 
10308 		break;
10309 
10310 	    case STATE_REP_UNDO:
10311 		/* Undo a REP replacement and continue with the next one. */
10312 		ftp = (fromto_T *)lp->lp_replang->sl_rep.ga_data
10313 							    + sp->ts_curi - 1;
10314 		fl = STRLEN(ftp->ft_from);
10315 		tl = STRLEN(ftp->ft_to);
10316 		p = fword + sp->ts_fidx;
10317 		if (fl != tl)
10318 		{
10319 		    mch_memmove(p + fl, p + tl, STRLEN(p + tl) + 1);
10320 		    repextra -= tl - fl;
10321 		}
10322 		mch_memmove(p, ftp->ft_from, fl);
10323 		sp->ts_state = STATE_REP;
10324 		break;
10325 
10326 	    default:
10327 		/* Did all possible states at this level, go up one level. */
10328 		--depth;
10329 
10330 		if (depth >= 0 && stack[depth].ts_prefixdepth == PFD_PREFIXTREE)
10331 		{
10332 		    /* Continue in or go back to the prefix tree. */
10333 		    byts = pbyts;
10334 		    idxs = pidxs;
10335 		}
10336 
10337 		/* Don't check for CTRL-C too often, it takes time. */
10338 		line_breakcheck();
10339 	    }
10340 	}
10341     }
10342 }
10343 
10344 /*
10345  * Try going one level deeper in the tree.
10346  */
10347     static int
10348 try_deeper(su, stack, depth, score_add)
10349     suginfo_T	*su;
10350     trystate_T	*stack;
10351     int		depth;
10352     int		score_add;
10353 {
10354     int		newscore;
10355 
10356     /* Refuse to go deeper if the scrore is getting too big. */
10357     newscore = stack[depth].ts_score + score_add;
10358     if (newscore >= su->su_maxscore)
10359 	return FALSE;
10360 
10361     stack[depth + 1] = stack[depth];
10362     stack[depth + 1].ts_state = STATE_START;
10363     stack[depth + 1].ts_score = newscore;
10364     stack[depth + 1].ts_curi = 1;	/* start just after length byte */
10365     stack[depth + 1].ts_flags = 0;
10366     return TRUE;
10367 }
10368 
10369 #ifdef FEAT_MBYTE
10370 /*
10371  * Case-folding may change the number of bytes: Count nr of chars in
10372  * fword[flen] and return the byte length of that many chars in "word".
10373  */
10374     static int
10375 nofold_len(fword, flen, word)
10376     char_u	*fword;
10377     int		flen;
10378     char_u	*word;
10379 {
10380     char_u	*p;
10381     int		i = 0;
10382 
10383     for (p = fword; p < fword + flen; mb_ptr_adv(p))
10384 	++i;
10385     for (p = word; i > 0; mb_ptr_adv(p))
10386 	--i;
10387     return (int)(p - word);
10388 }
10389 #endif
10390 
10391 /*
10392  * "fword" is a good word with case folded.  Find the matching keep-case
10393  * words and put it in "kword".
10394  * Theoretically there could be several keep-case words that result in the
10395  * same case-folded word, but we only find one...
10396  */
10397     static void
10398 find_keepcap_word(slang, fword, kword)
10399     slang_T	*slang;
10400     char_u	*fword;
10401     char_u	*kword;
10402 {
10403     char_u	uword[MAXWLEN];		/* "fword" in upper-case */
10404     int		depth;
10405     idx_T	tryidx;
10406 
10407     /* The following arrays are used at each depth in the tree. */
10408     idx_T	arridx[MAXWLEN];
10409     int		round[MAXWLEN];
10410     int		fwordidx[MAXWLEN];
10411     int		uwordidx[MAXWLEN];
10412     int		kwordlen[MAXWLEN];
10413 
10414     int		flen, ulen;
10415     int		l;
10416     int		len;
10417     int		c;
10418     idx_T	lo, hi, m;
10419     char_u	*p;
10420     char_u	*byts = slang->sl_kbyts;    /* array with bytes of the words */
10421     idx_T	*idxs = slang->sl_kidxs;    /* array with indexes */
10422 
10423     if (byts == NULL)
10424     {
10425 	/* array is empty: "cannot happen" */
10426 	*kword = NUL;
10427 	return;
10428     }
10429 
10430     /* Make an all-cap version of "fword". */
10431     allcap_copy(fword, uword);
10432 
10433     /*
10434      * Each character needs to be tried both case-folded and upper-case.
10435      * All this gets very complicated if we keep in mind that changing case
10436      * may change the byte length of a multi-byte character...
10437      */
10438     depth = 0;
10439     arridx[0] = 0;
10440     round[0] = 0;
10441     fwordidx[0] = 0;
10442     uwordidx[0] = 0;
10443     kwordlen[0] = 0;
10444     while (depth >= 0)
10445     {
10446 	if (fword[fwordidx[depth]] == NUL)
10447 	{
10448 	    /* We are at the end of "fword".  If the tree allows a word to end
10449 	     * here we have found a match. */
10450 	    if (byts[arridx[depth] + 1] == 0)
10451 	    {
10452 		kword[kwordlen[depth]] = NUL;
10453 		return;
10454 	    }
10455 
10456 	    /* kword is getting too long, continue one level up */
10457 	    --depth;
10458 	}
10459 	else if (++round[depth] > 2)
10460 	{
10461 	    /* tried both fold-case and upper-case character, continue one
10462 	     * level up */
10463 	    --depth;
10464 	}
10465 	else
10466 	{
10467 	    /*
10468 	     * round[depth] == 1: Try using the folded-case character.
10469 	     * round[depth] == 2: Try using the upper-case character.
10470 	     */
10471 #ifdef FEAT_MBYTE
10472 	    if (has_mbyte)
10473 	    {
10474 		flen = mb_cptr2len(fword + fwordidx[depth]);
10475 		ulen = mb_cptr2len(uword + uwordidx[depth]);
10476 	    }
10477 	    else
10478 #endif
10479 		ulen = flen = 1;
10480 	    if (round[depth] == 1)
10481 	    {
10482 		p = fword + fwordidx[depth];
10483 		l = flen;
10484 	    }
10485 	    else
10486 	    {
10487 		p = uword + uwordidx[depth];
10488 		l = ulen;
10489 	    }
10490 
10491 	    for (tryidx = arridx[depth]; l > 0; --l)
10492 	    {
10493 		/* Perform a binary search in the list of accepted bytes. */
10494 		len = byts[tryidx++];
10495 		c = *p++;
10496 		lo = tryidx;
10497 		hi = tryidx + len - 1;
10498 		while (lo < hi)
10499 		{
10500 		    m = (lo + hi) / 2;
10501 		    if (byts[m] > c)
10502 			hi = m - 1;
10503 		    else if (byts[m] < c)
10504 			lo = m + 1;
10505 		    else
10506 		    {
10507 			lo = hi = m;
10508 			break;
10509 		    }
10510 		}
10511 
10512 		/* Stop if there is no matching byte. */
10513 		if (hi < lo || byts[lo] != c)
10514 		    break;
10515 
10516 		/* Continue at the child (if there is one). */
10517 		tryidx = idxs[lo];
10518 	    }
10519 
10520 	    if (l == 0)
10521 	    {
10522 		/*
10523 		 * Found the matching char.  Copy it to "kword" and go a
10524 		 * level deeper.
10525 		 */
10526 		if (round[depth] == 1)
10527 		{
10528 		    STRNCPY(kword + kwordlen[depth], fword + fwordidx[depth],
10529 									flen);
10530 		    kwordlen[depth + 1] = kwordlen[depth] + flen;
10531 		}
10532 		else
10533 		{
10534 		    STRNCPY(kword + kwordlen[depth], uword + uwordidx[depth],
10535 									ulen);
10536 		    kwordlen[depth + 1] = kwordlen[depth] + ulen;
10537 		}
10538 		fwordidx[depth + 1] = fwordidx[depth] + flen;
10539 		uwordidx[depth + 1] = uwordidx[depth] + ulen;
10540 
10541 		++depth;
10542 		arridx[depth] = tryidx;
10543 		round[depth] = 0;
10544 	    }
10545 	}
10546     }
10547 
10548     /* Didn't find it: "cannot happen". */
10549     *kword = NUL;
10550 }
10551 
10552 /*
10553  * Compute the sound-a-like score for suggestions in su->su_ga and add them to
10554  * su->su_sga.
10555  */
10556     static void
10557 score_comp_sal(su)
10558     suginfo_T	*su;
10559 {
10560     langp_T	*lp;
10561     char_u	badsound[MAXWLEN];
10562     int		i;
10563     suggest_T   *stp;
10564     suggest_T   *sstp;
10565     int		score;
10566     int		lpi;
10567 
10568     if (ga_grow(&su->su_sga, su->su_ga.ga_len) == FAIL)
10569 	return;
10570 
10571     /*	Use the sound-folding of the first language that supports it. */
10572     for (lpi = 0; lpi < curbuf->b_langp.ga_len; ++lpi)
10573     {
10574 	lp = LANGP_ENTRY(curbuf->b_langp, lpi);
10575 	if (lp->lp_slang->sl_sal.ga_len > 0)
10576 	{
10577 	    /* soundfold the bad word */
10578 	    spell_soundfold(lp->lp_slang, su->su_fbadword, TRUE, badsound);
10579 
10580 	    for (i = 0; i < su->su_ga.ga_len; ++i)
10581 	    {
10582 		stp = &SUG(su->su_ga, i);
10583 
10584 		/* Case-fold the suggested word, sound-fold it and compute the
10585 		 * sound-a-like score. */
10586 		score = stp_sal_score(stp, su, lp->lp_slang, badsound);
10587 		if (score < SCORE_MAXMAX)
10588 		{
10589 		    /* Add the suggestion. */
10590 		    sstp = &SUG(su->su_sga, su->su_sga.ga_len);
10591 		    sstp->st_word = vim_strsave(stp->st_word);
10592 		    if (sstp->st_word != NULL)
10593 		    {
10594 			sstp->st_score = score;
10595 			sstp->st_altscore = 0;
10596 			sstp->st_orglen = stp->st_orglen;
10597 			++su->su_sga.ga_len;
10598 		    }
10599 		}
10600 	    }
10601 	    break;
10602 	}
10603     }
10604 }
10605 
10606 /*
10607  * Combine the list of suggestions in su->su_ga and su->su_sga.
10608  * They are intwined.
10609  */
10610     static void
10611 score_combine(su)
10612     suginfo_T	*su;
10613 {
10614     int		i;
10615     int		j;
10616     garray_T	ga;
10617     garray_T	*gap;
10618     langp_T	*lp;
10619     suggest_T	*stp;
10620     char_u	*p;
10621     char_u	badsound[MAXWLEN];
10622     int		round;
10623     int		lpi;
10624 
10625     /* Add the alternate score to su_ga. */
10626     for (lpi = 0; lpi < curbuf->b_langp.ga_len; ++lpi)
10627     {
10628 	lp = LANGP_ENTRY(curbuf->b_langp, lpi);
10629 	if (lp->lp_slang->sl_sal.ga_len > 0)
10630 	{
10631 	    /* soundfold the bad word */
10632 	    spell_soundfold(lp->lp_slang, su->su_fbadword, TRUE, badsound);
10633 
10634 	    for (i = 0; i < su->su_ga.ga_len; ++i)
10635 	    {
10636 		stp = &SUG(su->su_ga, i);
10637 		stp->st_altscore = stp_sal_score(stp, su, lp->lp_slang,
10638 								    badsound);
10639 		if (stp->st_altscore == SCORE_MAXMAX)
10640 		    stp->st_score = (stp->st_score * 3 + SCORE_BIG) / 4;
10641 		else
10642 		    stp->st_score = (stp->st_score * 3
10643 						  + stp->st_altscore) / 4;
10644 		stp->st_salscore = FALSE;
10645 	    }
10646 	    break;
10647 	}
10648     }
10649 
10650     /* Add the alternate score to su_sga. */
10651     for (i = 0; i < su->su_sga.ga_len; ++i)
10652     {
10653 	stp = &SUG(su->su_sga, i);
10654 	stp->st_altscore = spell_edit_score(su->su_badword, stp->st_word);
10655 	if (stp->st_score == SCORE_MAXMAX)
10656 	    stp->st_score = (SCORE_BIG * 7 + stp->st_altscore) / 8;
10657 	else
10658 	    stp->st_score = (stp->st_score * 7 + stp->st_altscore) / 8;
10659 	stp->st_salscore = TRUE;
10660     }
10661 
10662     /* Sort the suggestions and truncate at "maxcount" for both lists. */
10663     (void)cleanup_suggestions(&su->su_ga, su->su_maxscore, su->su_maxcount);
10664     (void)cleanup_suggestions(&su->su_sga, su->su_maxscore, su->su_maxcount);
10665 
10666     ga_init2(&ga, (int)sizeof(suginfo_T), 1);
10667     if (ga_grow(&ga, su->su_ga.ga_len + su->su_sga.ga_len) == FAIL)
10668 	return;
10669 
10670     stp = &SUG(ga, 0);
10671     for (i = 0; i < su->su_ga.ga_len || i < su->su_sga.ga_len; ++i)
10672     {
10673 	/* round 1: get a suggestion from su_ga
10674 	 * round 2: get a suggestion from su_sga */
10675 	for (round = 1; round <= 2; ++round)
10676 	{
10677 	    gap = round == 1 ? &su->su_ga : &su->su_sga;
10678 	    if (i < gap->ga_len)
10679 	    {
10680 		/* Don't add a word if it's already there. */
10681 		p = SUG(*gap, i).st_word;
10682 		for (j = 0; j < ga.ga_len; ++j)
10683 		    if (STRCMP(stp[j].st_word, p) == 0)
10684 			break;
10685 		if (j == ga.ga_len)
10686 		    stp[ga.ga_len++] = SUG(*gap, i);
10687 		else
10688 		    vim_free(p);
10689 	    }
10690 	}
10691     }
10692 
10693     ga_clear(&su->su_ga);
10694     ga_clear(&su->su_sga);
10695 
10696     /* Truncate the list to the number of suggestions that will be displayed. */
10697     if (ga.ga_len > su->su_maxcount)
10698     {
10699 	for (i = su->su_maxcount; i < ga.ga_len; ++i)
10700 	    vim_free(stp[i].st_word);
10701 	ga.ga_len = su->su_maxcount;
10702     }
10703 
10704     su->su_ga = ga;
10705 }
10706 
10707 /*
10708  * For the goodword in "stp" compute the soundalike score compared to the
10709  * badword.
10710  */
10711     static int
10712 stp_sal_score(stp, su, slang, badsound)
10713     suggest_T	*stp;
10714     suginfo_T	*su;
10715     slang_T	*slang;
10716     char_u	*badsound;	/* sound-folded badword */
10717 {
10718     char_u	*p;
10719     char_u	badsound2[MAXWLEN];
10720     char_u	fword[MAXWLEN];
10721     char_u	goodsound[MAXWLEN];
10722 
10723     if (stp->st_orglen <= su->su_badlen)
10724 	p = badsound;
10725     else
10726     {
10727 	/* soundfold the bad word with more characters following */
10728 	(void)spell_casefold(su->su_badptr, stp->st_orglen, fword, MAXWLEN);
10729 
10730 	/* When joining two words the sound often changes a lot.  E.g., "t he"
10731 	 * sounds like "t h" while "the" sounds like "@".  Avoid that by
10732 	 * removing the space.  Don't do it when the good word also contains a
10733 	 * space. */
10734 	if (vim_iswhite(su->su_badptr[su->su_badlen])
10735 					 && *skiptowhite(stp->st_word) == NUL)
10736 	    for (p = fword; *(p = skiptowhite(p)) != NUL; )
10737 		mch_memmove(p, p + 1, STRLEN(p));
10738 
10739 	spell_soundfold(slang, fword, TRUE, badsound2);
10740 	p = badsound2;
10741     }
10742 
10743     /* Sound-fold the word and compute the score for the difference. */
10744     spell_soundfold(slang, stp->st_word, FALSE, goodsound);
10745 
10746     return soundalike_score(goodsound, p);
10747 }
10748 
10749 /*
10750  * Find suggestions by comparing the word in a sound-a-like form.
10751  * Note: This doesn't support postponed prefixes.
10752  */
10753     static void
10754 suggest_try_soundalike(su)
10755     suginfo_T	*su;
10756 {
10757     char_u	salword[MAXWLEN];
10758     char_u	tword[MAXWLEN];
10759     char_u	tsalword[MAXWLEN];
10760     idx_T	arridx[MAXWLEN];
10761     int		curi[MAXWLEN];
10762     langp_T	*lp;
10763     char_u	*byts;
10764     idx_T	*idxs;
10765     int		depth;
10766     int		c;
10767     idx_T	n;
10768     int		round;
10769     int		flags;
10770     int		sound_score;
10771     int		local_score;
10772     int		lpi;
10773     slang_T	*slang;
10774 
10775     /* Do this for all languages that support sound folding. */
10776     for (lpi = 0; lpi < curbuf->b_langp.ga_len; ++lpi)
10777     {
10778 	lp = LANGP_ENTRY(curbuf->b_langp, lpi);
10779 	slang = lp->lp_slang;
10780 	if (slang->sl_sal.ga_len > 0)
10781 	{
10782 	    /* soundfold the bad word */
10783 	    spell_soundfold(slang, su->su_fbadword, TRUE, salword);
10784 
10785 	    /*
10786 	     * Go through the whole tree, soundfold each word and compare.
10787 	     * round 1: use the case-folded tree.
10788 	     * round 2: use the keep-case tree.
10789 	     */
10790 	    for (round = 1; round <= 2; ++round)
10791 	    {
10792 		if (round == 1)
10793 		{
10794 		    byts = slang->sl_fbyts;
10795 		    idxs = slang->sl_fidxs;
10796 		}
10797 		else
10798 		{
10799 		    byts = slang->sl_kbyts;
10800 		    idxs = slang->sl_kidxs;
10801 		    if (byts == NULL)	    /* no keep-case words */
10802 			continue;
10803 		}
10804 
10805 		depth = 0;
10806 		arridx[0] = 0;
10807 		curi[0] = 1;
10808 		while (depth >= 0 && !got_int)
10809 		{
10810 		    if (curi[depth] > byts[arridx[depth]])
10811 		    {
10812 			/* Done all bytes at this node, go up one level. */
10813 			--depth;
10814 			line_breakcheck();
10815 		    }
10816 		    else
10817 		    {
10818 			/* Do one more byte at this node. */
10819 			n = arridx[depth] + curi[depth];
10820 			++curi[depth];
10821 			c = byts[n];
10822 			if (c == 0)
10823 			{
10824 			    /* End of word, deal with the word. */
10825 			    flags = (int)idxs[n];
10826 			    if (round == 2 || (flags & WF_KEEPCAP) == 0)
10827 			    {
10828 				tword[depth] = NUL;
10829 				/* Sound-fold.  Only in keep-case tree need to
10830 				 * case-fold the word. */
10831 				spell_soundfold(slang, tword,
10832 							round == 1, tsalword);
10833 
10834 				/* Compute the edit distance between the
10835 				 * sound-a-like words. */
10836 				sound_score = soundalike_score(salword,
10837 								    tsalword);
10838 
10839 				/* Add a penalty for words in another region. */
10840 				if ((flags & WF_REGION) && (((unsigned)flags
10841 						 >> 16) & lp->lp_region) == 0)
10842 				    local_score = SCORE_REGION;
10843 				else
10844 				    local_score = 0;
10845 				sound_score += local_score;
10846 
10847 				if (sound_score < SCORE_MAXMAX)
10848 				{
10849 				    char_u	cword[MAXWLEN];
10850 				    char_u	*p;
10851 				    int		score;
10852 
10853 				    flags |= su->su_badflags;
10854 				    if (round == 1 && (flags & WF_CAPMASK) != 0)
10855 				    {
10856 					/* Need to fix case according to
10857 					 * "flags". */
10858 					make_case_word(tword, cword, flags);
10859 					p = cword;
10860 				    }
10861 				    else
10862 					p = tword;
10863 
10864 				    if (sps_flags & SPS_DOUBLE)
10865 					add_suggestion(su, &su->su_sga, p,
10866 						su->su_badlen,
10867 						sound_score, 0, FALSE,
10868 						lp->lp_sallang);
10869 				    else
10870 				    {
10871 					/* Compute the score. */
10872 					score = spell_edit_score(
10873 							   su->su_badword, p)
10874 						    + local_score;
10875 					if (sps_flags & SPS_BEST)
10876 					    /* give a bonus for the good word
10877 					     * sounding the same as the bad
10878 					     * word */
10879 					    add_suggestion(su, &su->su_ga, p,
10880 						    su->su_badlen,
10881 						  RESCORE(score, sound_score),
10882 						    sound_score, TRUE,
10883 						    lp->lp_sallang);
10884 					else
10885 					    add_suggestion(su, &su->su_ga, p,
10886 						    su->su_badlen,
10887 						    score + sound_score,
10888 						    0, FALSE,
10889 						    lp->lp_sallang);
10890 				    }
10891 				}
10892 			    }
10893 
10894 			    /* Skip over other NUL bytes. */
10895 			    while (byts[n + 1] == 0)
10896 			    {
10897 				++n;
10898 				++curi[depth];
10899 			    }
10900 			}
10901 			else
10902 			{
10903 			    /* Normal char, go one level deeper. */
10904 			    tword[depth++] = c;
10905 			    arridx[depth] = idxs[n];
10906 			    curi[depth] = 1;
10907 			}
10908 		    }
10909 		}
10910 	    }
10911 	}
10912     }
10913 }
10914 
10915 /*
10916  * Copy "fword" to "cword", fixing case according to "flags".
10917  */
10918     static void
10919 make_case_word(fword, cword, flags)
10920     char_u	*fword;
10921     char_u	*cword;
10922     int		flags;
10923 {
10924     if (flags & WF_ALLCAP)
10925 	/* Make it all upper-case */
10926 	allcap_copy(fword, cword);
10927     else if (flags & WF_ONECAP)
10928 	/* Make the first letter upper-case */
10929 	onecap_copy(fword, cword, TRUE);
10930     else
10931 	/* Use goodword as-is. */
10932 	STRCPY(cword, fword);
10933 }
10934 
10935 /*
10936  * Use map string "map" for languages "lp".
10937  */
10938     static void
10939 set_map_str(lp, map)
10940     slang_T	*lp;
10941     char_u	*map;
10942 {
10943     char_u	*p;
10944     int		headc = 0;
10945     int		c;
10946     int		i;
10947 
10948     if (*map == NUL)
10949     {
10950 	lp->sl_has_map = FALSE;
10951 	return;
10952     }
10953     lp->sl_has_map = TRUE;
10954 
10955     /* Init the array and hash table empty. */
10956     for (i = 0; i < 256; ++i)
10957 	lp->sl_map_array[i] = 0;
10958 #ifdef FEAT_MBYTE
10959     hash_init(&lp->sl_map_hash);
10960 #endif
10961 
10962     /*
10963      * The similar characters are stored separated with slashes:
10964      * "aaa/bbb/ccc/".  Fill sl_map_array[c] with the character before c and
10965      * before the same slash.  For characters above 255 sl_map_hash is used.
10966      */
10967     for (p = map; *p != NUL; )
10968     {
10969 #ifdef FEAT_MBYTE
10970 	c = mb_cptr2char_adv(&p);
10971 #else
10972 	c = *p++;
10973 #endif
10974 	if (c == '/')
10975 	    headc = 0;
10976 	else
10977 	{
10978 	    if (headc == 0)
10979 		 headc = c;
10980 
10981 #ifdef FEAT_MBYTE
10982 	    /* Characters above 255 don't fit in sl_map_array[], put them in
10983 	     * the hash table.  Each entry is the char, a NUL the headchar and
10984 	     * a NUL. */
10985 	    if (c >= 256)
10986 	    {
10987 		int	    cl = mb_char2len(c);
10988 		int	    headcl = mb_char2len(headc);
10989 		char_u	    *b;
10990 		hash_T	    hash;
10991 		hashitem_T  *hi;
10992 
10993 		b = alloc((unsigned)(cl + headcl + 2));
10994 		if (b == NULL)
10995 		    return;
10996 		mb_char2bytes(c, b);
10997 		b[cl] = NUL;
10998 		mb_char2bytes(headc, b + cl + 1);
10999 		b[cl + 1 + headcl] = NUL;
11000 		hash = hash_hash(b);
11001 		hi = hash_lookup(&lp->sl_map_hash, b, hash);
11002 		if (HASHITEM_EMPTY(hi))
11003 		    hash_add_item(&lp->sl_map_hash, hi, b, hash);
11004 		else
11005 		{
11006 		    /* This should have been checked when generating the .spl
11007 		     * file. */
11008 		    EMSG(_("E999: duplicate char in MAP entry"));
11009 		    vim_free(b);
11010 		}
11011 	    }
11012 	    else
11013 #endif
11014 		lp->sl_map_array[c] = headc;
11015 	}
11016     }
11017 }
11018 
11019 /*
11020  * Return TRUE if "c1" and "c2" are similar characters according to the MAP
11021  * lines in the .aff file.
11022  */
11023     static int
11024 similar_chars(slang, c1, c2)
11025     slang_T	*slang;
11026     int		c1;
11027     int		c2;
11028 {
11029     int		m1, m2;
11030 #ifdef FEAT_MBYTE
11031     char_u	buf[MB_MAXBYTES];
11032     hashitem_T  *hi;
11033 
11034     if (c1 >= 256)
11035     {
11036 	buf[mb_char2bytes(c1, buf)] = 0;
11037 	hi = hash_find(&slang->sl_map_hash, buf);
11038 	if (HASHITEM_EMPTY(hi))
11039 	    m1 = 0;
11040 	else
11041 	    m1 = mb_ptr2char(hi->hi_key + STRLEN(hi->hi_key) + 1);
11042     }
11043     else
11044 #endif
11045 	m1 = slang->sl_map_array[c1];
11046     if (m1 == 0)
11047 	return FALSE;
11048 
11049 
11050 #ifdef FEAT_MBYTE
11051     if (c2 >= 256)
11052     {
11053 	buf[mb_char2bytes(c2, buf)] = 0;
11054 	hi = hash_find(&slang->sl_map_hash, buf);
11055 	if (HASHITEM_EMPTY(hi))
11056 	    m2 = 0;
11057 	else
11058 	    m2 = mb_ptr2char(hi->hi_key + STRLEN(hi->hi_key) + 1);
11059     }
11060     else
11061 #endif
11062 	m2 = slang->sl_map_array[c2];
11063 
11064     return m1 == m2;
11065 }
11066 
11067 /*
11068  * Add a suggestion to the list of suggestions.
11069  * Do not add a duplicate suggestion or suggestions with a bad score.
11070  * When "use_score" is not zero it's used, otherwise the score is computed
11071  * with spell_edit_score().
11072  */
11073     static void
11074 add_suggestion(su, gap, goodword, badlen, score, altscore, had_bonus, slang)
11075     suginfo_T	*su;
11076     garray_T	*gap;
11077     char_u	*goodword;
11078     int		badlen;		/* length of bad word used */
11079     int		score;
11080     int		altscore;
11081     int		had_bonus;	/* value for st_had_bonus */
11082     slang_T	*slang;		/* language for sound folding */
11083 {
11084     suggest_T   *stp;
11085     int		i;
11086     char_u	*p = NULL;
11087     int		c = 0;
11088 
11089     /* Check that the word wasn't banned. */
11090     if (was_banned(su, goodword))
11091 	return;
11092 
11093     /* If past "su_badlen" and the rest is identical stop at "su_badlen".
11094      * Remove the common part from "goodword". */
11095     i = badlen - su->su_badlen;
11096     if (i > 0)
11097     {
11098 	/* This assumes there was no case folding or it didn't change the
11099 	 * length... */
11100 	p = goodword + STRLEN(goodword) - i;
11101 	if (p > goodword && STRNICMP(su->su_badptr + su->su_badlen, p, i) == 0)
11102 	{
11103 	    badlen = su->su_badlen;
11104 	    c = *p;
11105 	    *p = NUL;
11106 	}
11107 	else
11108 	    p = NULL;
11109     }
11110     else if (i < 0)
11111     {
11112 	/* When replacing part of the word check that we actually change
11113 	 * something.  For "the the" a suggestion can be replacing the first
11114 	 * "the" with itself, since "the" wasn't banned. */
11115 	if (badlen == (int)STRLEN(goodword)
11116 			    && STRNCMP(su->su_badword, goodword, badlen) == 0)
11117 	    return;
11118     }
11119 
11120 
11121     if (score <= su->su_maxscore)
11122     {
11123 	/* Check if the word is already there.  Also check the length that is
11124 	 * being replaced "thes," -> "these" is a different suggestion from
11125 	 * "thes" -> "these". */
11126 	stp = &SUG(*gap, 0);
11127 	for (i = gap->ga_len - 1; i >= 0; --i)
11128 	    if (STRCMP(stp[i].st_word, goodword) == 0
11129 						&& stp[i].st_orglen == badlen)
11130 	    {
11131 		/* Found it.  Remember the lowest score. */
11132 		if (stp[i].st_score > score)
11133 		{
11134 		    stp[i].st_score = score;
11135 		    stp[i].st_altscore = altscore;
11136 		    stp[i].st_had_bonus = had_bonus;
11137 		}
11138 		if (stp[i].st_slang == NULL)
11139 		    stp[i].st_slang = slang;
11140 		break;
11141 	    }
11142 
11143 	if (i < 0 && ga_grow(gap, 1) == OK)
11144 	{
11145 	    /* Add a suggestion. */
11146 	    stp = &SUG(*gap, gap->ga_len);
11147 	    stp->st_word = vim_strsave(goodword);
11148 	    if (stp->st_word != NULL)
11149 	    {
11150 		stp->st_score = score;
11151 		stp->st_altscore = altscore;
11152 		stp->st_had_bonus = had_bonus;
11153 		stp->st_orglen = badlen;
11154 		stp->st_slang = slang;
11155 		++gap->ga_len;
11156 
11157 		/* If we have too many suggestions now, sort the list and keep
11158 		 * the best suggestions. */
11159 		if (gap->ga_len > SUG_MAX_COUNT(su))
11160 		    su->su_maxscore = cleanup_suggestions(gap, su->su_maxscore,
11161 							 SUG_CLEAN_COUNT(su));
11162 	    }
11163 	}
11164     }
11165 
11166     if (p != NULL)
11167 	*p = c;		/* restore "goodword" */
11168 }
11169 
11170 /*
11171  * Add a word to be banned.
11172  */
11173     static void
11174 add_banned(su, word)
11175     suginfo_T	*su;
11176     char_u	*word;
11177 {
11178     char_u	*s = vim_strsave(word);
11179     hash_T	hash;
11180     hashitem_T	*hi;
11181 
11182     if (s != NULL)
11183     {
11184 	hash = hash_hash(s);
11185 	hi = hash_lookup(&su->su_banned, s, hash);
11186 	if (HASHITEM_EMPTY(hi))
11187 	    hash_add_item(&su->su_banned, hi, s, hash);
11188 	else
11189 	    vim_free(s);
11190     }
11191 }
11192 
11193 /*
11194  * Return TRUE if a word appears in the list of banned words.
11195  */
11196     static int
11197 was_banned(su, word)
11198     suginfo_T	*su;
11199     char_u	*word;
11200 {
11201     hashitem_T	*hi = hash_find(&su->su_banned, word);
11202 
11203     return !HASHITEM_EMPTY(hi);
11204 }
11205 
11206 /*
11207  * Free the banned words in "su".
11208  */
11209     static void
11210 free_banned(su)
11211     suginfo_T	*su;
11212 {
11213     int		todo;
11214     hashitem_T	*hi;
11215 
11216     todo = su->su_banned.ht_used;
11217     for (hi = su->su_banned.ht_array; todo > 0; ++hi)
11218     {
11219 	if (!HASHITEM_EMPTY(hi))
11220 	{
11221 	    vim_free(hi->hi_key);
11222 	    --todo;
11223 	}
11224     }
11225     hash_clear(&su->su_banned);
11226 }
11227 
11228 /*
11229  * Recompute the score if sound-folding is possible.  This is slow,
11230  * thus only done for the final results.
11231  */
11232     static void
11233 rescore_suggestions(su)
11234     suginfo_T	*su;
11235 {
11236     langp_T	*lp;
11237     suggest_T	*stp;
11238     char_u	sal_badword[MAXWLEN];
11239     char_u	sal_badword2[MAXWLEN];
11240     int		i;
11241     int		lpi;
11242     slang_T	*slang_first = NULL;
11243     slang_T	*slang;
11244 
11245     for (lpi = 0; lpi < curbuf->b_langp.ga_len; ++lpi)
11246     {
11247 	lp = LANGP_ENTRY(curbuf->b_langp, lpi);
11248 	if (lp->lp_slang->sl_sal.ga_len > 0)
11249 	{
11250 	    /* soundfold the bad word */
11251 	    slang_first = lp->lp_slang;
11252 	    spell_soundfold(slang_first, su->su_fbadword, TRUE, sal_badword);
11253 	    break;
11254 	}
11255     }
11256 
11257     if (slang_first != NULL)
11258     {
11259 	for (i = 0; i < su->su_ga.ga_len; ++i)
11260 	{
11261 	    /* Only rescore suggestions that have no sal score yet and do have
11262 	     * a language. */
11263 	    stp = &SUG(su->su_ga, i);
11264 	    if (!stp->st_had_bonus && stp->st_slang != NULL)
11265 	    {
11266 		slang = stp->st_slang;
11267 		if (slang->sl_sal.ga_len > 0)
11268 		{
11269 		    if (slang == slang_first)
11270 			stp->st_altscore = stp_sal_score(stp, su,
11271 							  slang, sal_badword);
11272 		    else
11273 		    {
11274 			spell_soundfold(slang, su->su_fbadword,
11275 							  TRUE, sal_badword2);
11276 			stp->st_altscore = stp_sal_score(stp, su,
11277 							 slang, sal_badword2);
11278 		    }
11279 		    if (stp->st_altscore == SCORE_MAXMAX)
11280 			stp->st_altscore = SCORE_BIG;
11281 		    stp->st_score = RESCORE(stp->st_score, stp->st_altscore);
11282 		}
11283 	    }
11284 	}
11285     }
11286 }
11287 
11288 static int
11289 #ifdef __BORLANDC__
11290 _RTLENTRYF
11291 #endif
11292 sug_compare __ARGS((const void *s1, const void *s2));
11293 
11294 /*
11295  * Function given to qsort() to sort the suggestions on st_score.
11296  */
11297     static int
11298 #ifdef __BORLANDC__
11299 _RTLENTRYF
11300 #endif
11301 sug_compare(s1, s2)
11302     const void	*s1;
11303     const void	*s2;
11304 {
11305     suggest_T	*p1 = (suggest_T *)s1;
11306     suggest_T	*p2 = (suggest_T *)s2;
11307     int		n = p1->st_score - p2->st_score;
11308 
11309     if (n == 0)
11310 	return p1->st_altscore - p2->st_altscore;
11311     return n;
11312 }
11313 
11314 /*
11315  * Cleanup the suggestions:
11316  * - Sort on score.
11317  * - Remove words that won't be displayed.
11318  * Returns the maximum score in the list or "maxscore" unmodified.
11319  */
11320     static int
11321 cleanup_suggestions(gap, maxscore, keep)
11322     garray_T	*gap;
11323     int		maxscore;
11324     int		keep;		/* nr of suggestions to keep */
11325 {
11326     suggest_T   *stp = &SUG(*gap, 0);
11327     int		i;
11328 
11329     /* Sort the list. */
11330     qsort(gap->ga_data, (size_t)gap->ga_len, sizeof(suggest_T), sug_compare);
11331 
11332     /* Truncate the list to the number of suggestions that will be displayed. */
11333     if (gap->ga_len > keep)
11334     {
11335 	for (i = keep; i < gap->ga_len; ++i)
11336 	    vim_free(stp[i].st_word);
11337 	gap->ga_len = keep;
11338 	return stp[keep - 1].st_score;
11339     }
11340     return maxscore;
11341 }
11342 
11343 #if defined(FEAT_EVAL) || defined(PROTO)
11344 /*
11345  * Soundfold a string, for soundfold().
11346  * Result is in allocated memory, NULL for an error.
11347  */
11348     char_u *
11349 eval_soundfold(word)
11350     char_u	*word;
11351 {
11352     langp_T	*lp;
11353     char_u	sound[MAXWLEN];
11354     int		lpi;
11355 
11356     if (curwin->w_p_spell && *curbuf->b_p_spl != NUL)
11357 	/* Use the sound-folding of the first language that supports it. */
11358 	for (lpi = 0; lpi < curbuf->b_langp.ga_len; ++lpi)
11359 	{
11360 	    lp = LANGP_ENTRY(curbuf->b_langp, lpi);
11361 	    if (lp->lp_slang->sl_sal.ga_len > 0)
11362 	    {
11363 		/* soundfold the word */
11364 		spell_soundfold(lp->lp_slang, word, FALSE, sound);
11365 		return vim_strsave(sound);
11366 	    }
11367 	}
11368 
11369     /* No language with sound folding, return word as-is. */
11370     return vim_strsave(word);
11371 }
11372 #endif
11373 
11374 /*
11375  * Turn "inword" into its sound-a-like equivalent in "res[MAXWLEN]".
11376  *
11377  * There are many ways to turn a word into a sound-a-like representation.  The
11378  * oldest is Soundex (1918!).   A nice overview can be found in "Approximate
11379  * swedish name matching - survey and test of different algorithms" by Klas
11380  * Erikson.
11381  *
11382  * We support two methods:
11383  * 1. SOFOFROM/SOFOTO do a simple character mapping.
11384  * 2. SAL items define a more advanced sound-folding (and much slower).
11385  */
11386     static void
11387 spell_soundfold(slang, inword, folded, res)
11388     slang_T	*slang;
11389     char_u	*inword;
11390     int		folded;	    /* "inword" is already case-folded */
11391     char_u	*res;
11392 {
11393     char_u	fword[MAXWLEN];
11394     char_u	*word;
11395 
11396     if (slang->sl_sofo)
11397 	/* SOFOFROM and SOFOTO used */
11398 	spell_soundfold_sofo(slang, inword, res);
11399     else
11400     {
11401 	/* SAL items used.  Requires the word to be case-folded. */
11402 	if (folded)
11403 	    word = inword;
11404 	else
11405 	{
11406 	    (void)spell_casefold(inword, STRLEN(inword), fword, MAXWLEN);
11407 	    word = fword;
11408 	}
11409 
11410 #ifdef FEAT_MBYTE
11411 	if (has_mbyte)
11412 	    spell_soundfold_wsal(slang, word, res);
11413 	else
11414 #endif
11415 	    spell_soundfold_sal(slang, word, res);
11416     }
11417 }
11418 
11419 /*
11420  * Perform sound folding of "inword" into "res" according to SOFOFROM and
11421  * SOFOTO lines.
11422  */
11423     static void
11424 spell_soundfold_sofo(slang, inword, res)
11425     slang_T	*slang;
11426     char_u	*inword;
11427     char_u	*res;
11428 {
11429     char_u	*s;
11430     int		ri = 0;
11431     int		c;
11432 
11433 #ifdef FEAT_MBYTE
11434     if (has_mbyte)
11435     {
11436 	int	prevc = 0;
11437 	int	*ip;
11438 
11439 	/* The sl_sal_first[] table contains the translation for chars up to
11440 	 * 255, sl_sal the rest. */
11441 	for (s = inword; *s != NUL; )
11442 	{
11443 	    c = mb_cptr2char_adv(&s);
11444 	    if (enc_utf8 ? utf_class(c) == 0 : vim_iswhite(c))
11445 		c = ' ';
11446 	    else if (c < 256)
11447 		c = slang->sl_sal_first[c];
11448 	    else
11449 	    {
11450 		ip = ((int **)slang->sl_sal.ga_data)[c & 0xff];
11451 		if (ip == NULL)		/* empty list, can't match */
11452 		    c = NUL;
11453 		else
11454 		    for (;;)		/* find "c" in the list */
11455 		    {
11456 			if (*ip == 0)	/* not found */
11457 			{
11458 			    c = NUL;
11459 			    break;
11460 			}
11461 			if (*ip == c)	/* match! */
11462 			{
11463 			    c = ip[1];
11464 			    break;
11465 			}
11466 			ip += 2;
11467 		    }
11468 	    }
11469 
11470 	    if (c != NUL && c != prevc)
11471 	    {
11472 		ri += mb_char2bytes(c, res + ri);
11473 		if (ri + MB_MAXBYTES > MAXWLEN)
11474 		    break;
11475 		prevc = c;
11476 	    }
11477 	}
11478     }
11479     else
11480 #endif
11481     {
11482 	/* The sl_sal_first[] table contains the translation. */
11483 	for (s = inword; (c = *s) != NUL; ++s)
11484 	{
11485 	    if (vim_iswhite(c))
11486 		c = ' ';
11487 	    else
11488 		c = slang->sl_sal_first[c];
11489 	    if (c != NUL && (ri == 0 || res[ri - 1] != c))
11490 		res[ri++] = c;
11491 	}
11492     }
11493 
11494     res[ri] = NUL;
11495 }
11496 
11497     static void
11498 spell_soundfold_sal(slang, inword, res)
11499     slang_T	*slang;
11500     char_u	*inword;
11501     char_u	*res;
11502 {
11503     salitem_T	*smp;
11504     char_u	word[MAXWLEN];
11505     char_u	*s = inword;
11506     char_u	*t;
11507     char_u	*pf;
11508     int		i, j, z;
11509     int		reslen;
11510     int		n, k = 0;
11511     int		z0;
11512     int		k0;
11513     int		n0;
11514     int		c;
11515     int		pri;
11516     int		p0 = -333;
11517     int		c0;
11518 
11519     /* Remove accents, if wanted.  We actually remove all non-word characters.
11520      * But keep white space.  We need a copy, the word may be changed here. */
11521     if (slang->sl_rem_accents)
11522     {
11523 	t = word;
11524 	while (*s != NUL)
11525 	{
11526 	    if (vim_iswhite(*s))
11527 	    {
11528 		*t++ = ' ';
11529 		s = skipwhite(s);
11530 	    }
11531 	    else
11532 	    {
11533 		if (spell_iswordp_nmw(s))
11534 		    *t++ = *s;
11535 		++s;
11536 	    }
11537 	}
11538 	*t = NUL;
11539     }
11540     else
11541 	STRCPY(word, s);
11542 
11543     smp = (salitem_T *)slang->sl_sal.ga_data;
11544 
11545     /*
11546      * This comes from Aspell phonet.cpp.  Converted from C++ to C.
11547      * Changed to keep spaces.
11548      */
11549     i = reslen = z = 0;
11550     while ((c = word[i]) != NUL)
11551     {
11552 	/* Start with the first rule that has the character in the word. */
11553 	n = slang->sl_sal_first[c];
11554 	z0 = 0;
11555 
11556 	if (n >= 0)
11557 	{
11558 	    /* check all rules for the same letter */
11559 	    for (; (s = smp[n].sm_lead)[0] == c; ++n)
11560 	    {
11561 		/* Quickly skip entries that don't match the word.  Most
11562 		 * entries are less then three chars, optimize for that. */
11563 		k = smp[n].sm_leadlen;
11564 		if (k > 1)
11565 		{
11566 		    if (word[i + 1] != s[1])
11567 			continue;
11568 		    if (k > 2)
11569 		    {
11570 			for (j = 2; j < k; ++j)
11571 			    if (word[i + j] != s[j])
11572 				break;
11573 			if (j < k)
11574 			    continue;
11575 		    }
11576 		}
11577 
11578 		if ((pf = smp[n].sm_oneof) != NULL)
11579 		{
11580 		    /* Check for match with one of the chars in "sm_oneof". */
11581 		    while (*pf != NUL && *pf != word[i + k])
11582 			++pf;
11583 		    if (*pf == NUL)
11584 			continue;
11585 		    ++k;
11586 		}
11587 		s = smp[n].sm_rules;
11588 		pri = 5;    /* default priority */
11589 
11590 		p0 = *s;
11591 		k0 = k;
11592 		while (*s == '-' && k > 1)
11593 		{
11594 		    k--;
11595 		    s++;
11596 		}
11597 		if (*s == '<')
11598 		    s++;
11599 		if (VIM_ISDIGIT(*s))
11600 		{
11601 		    /* determine priority */
11602 		    pri = *s - '0';
11603 		    s++;
11604 		}
11605 		if (*s == '^' && *(s + 1) == '^')
11606 		    s++;
11607 
11608 		if (*s == NUL
11609 			|| (*s == '^'
11610 			    && (i == 0 || !(word[i - 1] == ' '
11611 				      || spell_iswordp(word + i - 1, curbuf)))
11612 			    && (*(s + 1) != '$'
11613 				|| (!spell_iswordp(word + i + k0, curbuf))))
11614 			|| (*s == '$' && i > 0
11615 			    && spell_iswordp(word + i - 1, curbuf)
11616 			    && (!spell_iswordp(word + i + k0, curbuf))))
11617 		{
11618 		    /* search for followup rules, if:    */
11619 		    /* followup and k > 1  and  NO '-' in searchstring */
11620 		    c0 = word[i + k - 1];
11621 		    n0 = slang->sl_sal_first[c0];
11622 
11623 		    if (slang->sl_followup && k > 1 && n0 >= 0
11624 					   && p0 != '-' && word[i + k] != NUL)
11625 		    {
11626 			/* test follow-up rule for "word[i + k]" */
11627 			for ( ; (s = smp[n0].sm_lead)[0] == c0; ++n0)
11628 			{
11629 			    /* Quickly skip entries that don't match the word.
11630 			     * */
11631 			    k0 = smp[n0].sm_leadlen;
11632 			    if (k0 > 1)
11633 			    {
11634 				if (word[i + k] != s[1])
11635 				    continue;
11636 				if (k0 > 2)
11637 				{
11638 				    pf = word + i + k + 1;
11639 				    for (j = 2; j < k0; ++j)
11640 					if (*pf++ != s[j])
11641 					    break;
11642 				    if (j < k0)
11643 					continue;
11644 				}
11645 			    }
11646 			    k0 += k - 1;
11647 
11648 			    if ((pf = smp[n0].sm_oneof) != NULL)
11649 			    {
11650 				/* Check for match with one of the chars in
11651 				 * "sm_oneof". */
11652 				while (*pf != NUL && *pf != word[i + k0])
11653 				    ++pf;
11654 				if (*pf == NUL)
11655 				    continue;
11656 				++k0;
11657 			    }
11658 
11659 			    p0 = 5;
11660 			    s = smp[n0].sm_rules;
11661 			    while (*s == '-')
11662 			    {
11663 				/* "k0" gets NOT reduced because
11664 				 * "if (k0 == k)" */
11665 				s++;
11666 			    }
11667 			    if (*s == '<')
11668 				s++;
11669 			    if (VIM_ISDIGIT(*s))
11670 			    {
11671 				p0 = *s - '0';
11672 				s++;
11673 			    }
11674 
11675 			    if (*s == NUL
11676 				    /* *s == '^' cuts */
11677 				    || (*s == '$'
11678 					    && !spell_iswordp(word + i + k0,
11679 								     curbuf)))
11680 			    {
11681 				if (k0 == k)
11682 				    /* this is just a piece of the string */
11683 				    continue;
11684 
11685 				if (p0 < pri)
11686 				    /* priority too low */
11687 				    continue;
11688 				/* rule fits; stop search */
11689 				break;
11690 			    }
11691 			}
11692 
11693 			if (p0 >= pri && smp[n0].sm_lead[0] == c0)
11694 			    continue;
11695 		    }
11696 
11697 		    /* replace string */
11698 		    s = smp[n].sm_to;
11699 		    if (s == NULL)
11700 			s = (char_u *)"";
11701 		    pf = smp[n].sm_rules;
11702 		    p0 = (vim_strchr(pf, '<') != NULL) ? 1 : 0;
11703 		    if (p0 == 1 && z == 0)
11704 		    {
11705 			/* rule with '<' is used */
11706 			if (reslen > 0 && *s != NUL && (res[reslen - 1] == c
11707 						    || res[reslen - 1] == *s))
11708 			    reslen--;
11709 			z0 = 1;
11710 			z = 1;
11711 			k0 = 0;
11712 			while (*s != NUL && word[i + k0] != NUL)
11713 			{
11714 			    word[i + k0] = *s;
11715 			    k0++;
11716 			    s++;
11717 			}
11718 			if (k > k0)
11719 			    mch_memmove(word + i + k0, word + i + k,
11720 						    STRLEN(word + i + k) + 1);
11721 
11722 			/* new "actual letter" */
11723 			c = word[i];
11724 		    }
11725 		    else
11726 		    {
11727 			/* no '<' rule used */
11728 			i += k - 1;
11729 			z = 0;
11730 			while (*s != NUL && s[1] != NUL && reslen < MAXWLEN)
11731 			{
11732 			    if (reslen == 0 || res[reslen - 1] != *s)
11733 				res[reslen++] = *s;
11734 			    s++;
11735 			}
11736 			/* new "actual letter" */
11737 			c = *s;
11738 			if (strstr((char *)pf, "^^") != NULL)
11739 			{
11740 			    if (c != NUL)
11741 				res[reslen++] = c;
11742 			    mch_memmove(word, word + i + 1,
11743 						    STRLEN(word + i + 1) + 1);
11744 			    i = 0;
11745 			    z0 = 1;
11746 			}
11747 		    }
11748 		    break;
11749 		}
11750 	    }
11751 	}
11752 	else if (vim_iswhite(c))
11753 	{
11754 	    c = ' ';
11755 	    k = 1;
11756 	}
11757 
11758 	if (z0 == 0)
11759 	{
11760 	    if (k && !p0 && reslen < MAXWLEN && c != NUL
11761 		    && (!slang->sl_collapse || reslen == 0
11762 						     || res[reslen - 1] != c))
11763 		/* condense only double letters */
11764 		res[reslen++] = c;
11765 
11766 	    i++;
11767 	    z = 0;
11768 	    k = 0;
11769 	}
11770     }
11771 
11772     res[reslen] = NUL;
11773 }
11774 
11775 #ifdef FEAT_MBYTE
11776 /*
11777  * Turn "inword" into its sound-a-like equivalent in "res[MAXWLEN]".
11778  * Multi-byte version of spell_soundfold().
11779  */
11780     static void
11781 spell_soundfold_wsal(slang, inword, res)
11782     slang_T	*slang;
11783     char_u	*inword;
11784     char_u	*res;
11785 {
11786     salitem_T	*smp = (salitem_T *)slang->sl_sal.ga_data;
11787     int		word[MAXWLEN];
11788     int		wres[MAXWLEN];
11789     int		l;
11790     char_u	*s;
11791     int		*ws;
11792     char_u	*t;
11793     int		*pf;
11794     int		i, j, z;
11795     int		reslen;
11796     int		n, k = 0;
11797     int		z0;
11798     int		k0;
11799     int		n0;
11800     int		c;
11801     int		pri;
11802     int		p0 = -333;
11803     int		c0;
11804     int		did_white = FALSE;
11805 
11806     /*
11807      * Convert the multi-byte string to a wide-character string.
11808      * Remove accents, if wanted.  We actually remove all non-word characters.
11809      * But keep white space.
11810      */
11811     n = 0;
11812     for (s = inword; *s != NUL; )
11813     {
11814 	t = s;
11815 	c = mb_cptr2char_adv(&s);
11816 	if (slang->sl_rem_accents)
11817 	{
11818 	    if (enc_utf8 ? utf_class(c) == 0 : vim_iswhite(c))
11819 	    {
11820 		if (did_white)
11821 		    continue;
11822 		c = ' ';
11823 		did_white = TRUE;
11824 	    }
11825 	    else
11826 	    {
11827 		did_white = FALSE;
11828 		if (!spell_iswordp_nmw(t))
11829 		    continue;
11830 	    }
11831 	}
11832 	word[n++] = c;
11833     }
11834     word[n] = NUL;
11835 
11836     /*
11837      * This comes from Aspell phonet.cpp.
11838      * Converted from C++ to C.  Added support for multi-byte chars.
11839      * Changed to keep spaces.
11840      */
11841     i = reslen = z = 0;
11842     while ((c = word[i]) != NUL)
11843     {
11844 	/* Start with the first rule that has the character in the word. */
11845 	n = slang->sl_sal_first[c & 0xff];
11846 	z0 = 0;
11847 
11848 	if (n >= 0)
11849 	{
11850 	    /* check all rules for the same index byte */
11851 	    for (; ((ws = smp[n].sm_lead_w)[0] & 0xff) == (c & 0xff); ++n)
11852 	    {
11853 		/* Quickly skip entries that don't match the word.  Most
11854 		 * entries are less then three chars, optimize for that. */
11855 		if (c != ws[0])
11856 		    continue;
11857 		k = smp[n].sm_leadlen;
11858 		if (k > 1)
11859 		{
11860 		    if (word[i + 1] != ws[1])
11861 			continue;
11862 		    if (k > 2)
11863 		    {
11864 			for (j = 2; j < k; ++j)
11865 			    if (word[i + j] != ws[j])
11866 				break;
11867 			if (j < k)
11868 			    continue;
11869 		    }
11870 		}
11871 
11872 		if ((pf = smp[n].sm_oneof_w) != NULL)
11873 		{
11874 		    /* Check for match with one of the chars in "sm_oneof". */
11875 		    while (*pf != NUL && *pf != word[i + k])
11876 			++pf;
11877 		    if (*pf == NUL)
11878 			continue;
11879 		    ++k;
11880 		}
11881 		s = smp[n].sm_rules;
11882 		pri = 5;    /* default priority */
11883 
11884 		p0 = *s;
11885 		k0 = k;
11886 		while (*s == '-' && k > 1)
11887 		{
11888 		    k--;
11889 		    s++;
11890 		}
11891 		if (*s == '<')
11892 		    s++;
11893 		if (VIM_ISDIGIT(*s))
11894 		{
11895 		    /* determine priority */
11896 		    pri = *s - '0';
11897 		    s++;
11898 		}
11899 		if (*s == '^' && *(s + 1) == '^')
11900 		    s++;
11901 
11902 		if (*s == NUL
11903 			|| (*s == '^'
11904 			    && (i == 0 || !(word[i - 1] == ' '
11905 				    || spell_iswordp_w(word + i - 1, curbuf)))
11906 			    && (*(s + 1) != '$'
11907 				|| (!spell_iswordp_w(word + i + k0, curbuf))))
11908 			|| (*s == '$' && i > 0
11909 			    && spell_iswordp_w(word + i - 1, curbuf)
11910 			    && (!spell_iswordp_w(word + i + k0, curbuf))))
11911 		{
11912 		    /* search for followup rules, if:    */
11913 		    /* followup and k > 1  and  NO '-' in searchstring */
11914 		    c0 = word[i + k - 1];
11915 		    n0 = slang->sl_sal_first[c0 & 0xff];
11916 
11917 		    if (slang->sl_followup && k > 1 && n0 >= 0
11918 					   && p0 != '-' && word[i + k] != NUL)
11919 		    {
11920 			/* Test follow-up rule for "word[i + k]"; loop over
11921 			 * all entries with the same index byte. */
11922 			for ( ; ((ws = smp[n0].sm_lead_w)[0] & 0xff)
11923 							 == (c0 & 0xff); ++n0)
11924 			{
11925 			    /* Quickly skip entries that don't match the word.
11926 			     */
11927 			    if (c0 != ws[0])
11928 				continue;
11929 			    k0 = smp[n0].sm_leadlen;
11930 			    if (k0 > 1)
11931 			    {
11932 				if (word[i + k] != ws[1])
11933 				    continue;
11934 				if (k0 > 2)
11935 				{
11936 				    pf = word + i + k + 1;
11937 				    for (j = 2; j < k0; ++j)
11938 					if (*pf++ != ws[j])
11939 					    break;
11940 				    if (j < k0)
11941 					continue;
11942 				}
11943 			    }
11944 			    k0 += k - 1;
11945 
11946 			    if ((pf = smp[n0].sm_oneof_w) != NULL)
11947 			    {
11948 				/* Check for match with one of the chars in
11949 				 * "sm_oneof". */
11950 				while (*pf != NUL && *pf != word[i + k0])
11951 				    ++pf;
11952 				if (*pf == NUL)
11953 				    continue;
11954 				++k0;
11955 			    }
11956 
11957 			    p0 = 5;
11958 			    s = smp[n0].sm_rules;
11959 			    while (*s == '-')
11960 			    {
11961 				/* "k0" gets NOT reduced because
11962 				 * "if (k0 == k)" */
11963 				s++;
11964 			    }
11965 			    if (*s == '<')
11966 				s++;
11967 			    if (VIM_ISDIGIT(*s))
11968 			    {
11969 				p0 = *s - '0';
11970 				s++;
11971 			    }
11972 
11973 			    if (*s == NUL
11974 				    /* *s == '^' cuts */
11975 				    || (*s == '$'
11976 					 && !spell_iswordp_w(word + i + k0,
11977 								     curbuf)))
11978 			    {
11979 				if (k0 == k)
11980 				    /* this is just a piece of the string */
11981 				    continue;
11982 
11983 				if (p0 < pri)
11984 				    /* priority too low */
11985 				    continue;
11986 				/* rule fits; stop search */
11987 				break;
11988 			    }
11989 			}
11990 
11991 			if (p0 >= pri && (smp[n0].sm_lead_w[0] & 0xff)
11992 							       == (c0 & 0xff))
11993 			    continue;
11994 		    }
11995 
11996 		    /* replace string */
11997 		    ws = smp[n].sm_to_w;
11998 		    s = smp[n].sm_rules;
11999 		    p0 = (vim_strchr(s, '<') != NULL) ? 1 : 0;
12000 		    if (p0 == 1 && z == 0)
12001 		    {
12002 			/* rule with '<' is used */
12003 			if (reslen > 0 && ws != NULL && *ws != NUL
12004 				&& (wres[reslen - 1] == c
12005 						    || wres[reslen - 1] == *ws))
12006 			    reslen--;
12007 			z0 = 1;
12008 			z = 1;
12009 			k0 = 0;
12010 			if (ws != NULL)
12011 			    while (*ws != NUL && word[i + k0] != NUL)
12012 			    {
12013 				word[i + k0] = *ws;
12014 				k0++;
12015 				ws++;
12016 			    }
12017 			if (k > k0)
12018 			    mch_memmove(word + i + k0, word + i + k,
12019 				    sizeof(int) * (STRLEN(word + i + k) + 1));
12020 
12021 			/* new "actual letter" */
12022 			c = word[i];
12023 		    }
12024 		    else
12025 		    {
12026 			/* no '<' rule used */
12027 			i += k - 1;
12028 			z = 0;
12029 			if (ws != NULL)
12030 			    while (*ws != NUL && ws[1] != NUL
12031 							  && reslen < MAXWLEN)
12032 			    {
12033 				if (reslen == 0 || wres[reslen - 1] != *ws)
12034 				    wres[reslen++] = *ws;
12035 				ws++;
12036 			    }
12037 			/* new "actual letter" */
12038 			if (ws == NULL)
12039 			    c = NUL;
12040 			else
12041 			    c = *ws;
12042 			if (strstr((char *)s, "^^") != NULL)
12043 			{
12044 			    if (c != NUL)
12045 				wres[reslen++] = c;
12046 			    mch_memmove(word, word + i + 1,
12047 				    sizeof(int) * (STRLEN(word + i + 1) + 1));
12048 			    i = 0;
12049 			    z0 = 1;
12050 			}
12051 		    }
12052 		    break;
12053 		}
12054 	    }
12055 	}
12056 	else if (vim_iswhite(c))
12057 	{
12058 	    c = ' ';
12059 	    k = 1;
12060 	}
12061 
12062 	if (z0 == 0)
12063 	{
12064 	    if (k && !p0 && reslen < MAXWLEN && c != NUL
12065 		    && (!slang->sl_collapse || reslen == 0
12066 						     || wres[reslen - 1] != c))
12067 		/* condense only double letters */
12068 		wres[reslen++] = c;
12069 
12070 	    i++;
12071 	    z = 0;
12072 	    k = 0;
12073 	}
12074     }
12075 
12076     /* Convert wide characters in "wres" to a multi-byte string in "res". */
12077     l = 0;
12078     for (n = 0; n < reslen; ++n)
12079     {
12080 	l += mb_char2bytes(wres[n], res + l);
12081 	if (l + MB_MAXBYTES > MAXWLEN)
12082 	    break;
12083     }
12084     res[l] = NUL;
12085 }
12086 #endif
12087 
12088 /*
12089  * Compute a score for two sound-a-like words.
12090  * This permits up to two inserts/deletes/swaps/etc. to keep things fast.
12091  * Instead of a generic loop we write out the code.  That keeps it fast by
12092  * avoiding checks that will not be possible.
12093  */
12094     static int
12095 soundalike_score(goodstart, badstart)
12096     char_u	*goodstart;	/* sound-folded good word */
12097     char_u	*badstart;	/* sound-folded bad word */
12098 {
12099     char_u	*goodsound = goodstart;
12100     char_u	*badsound = badstart;
12101     int		goodlen;
12102     int		badlen;
12103     int		n;
12104     char_u	*pl, *ps;
12105     char_u	*pl2, *ps2;
12106     int		score = 0;
12107 
12108     /* adding/inserting "*" at the start (word starts with vowel) shouldn't be
12109      * counted so much, vowels halfway the word aren't counted at all. */
12110     if ((*badsound == '*' || *goodsound == '*') && *badsound != *goodsound)
12111     {
12112 	score = SCORE_DEL / 2;
12113 	if (*badsound == '*')
12114 	    ++badsound;
12115 	else
12116 	    ++goodsound;
12117     }
12118 
12119     goodlen = STRLEN(goodsound);
12120     badlen = STRLEN(badsound);
12121 
12122     /* Return quickly if the lenghts are too different to be fixed by two
12123      * changes. */
12124     n = goodlen - badlen;
12125     if (n < -2 || n > 2)
12126 	return SCORE_MAXMAX;
12127 
12128     if (n > 0)
12129     {
12130 	pl = goodsound;	    /* goodsound is longest */
12131 	ps = badsound;
12132     }
12133     else
12134     {
12135 	pl = badsound;	    /* badsound is longest */
12136 	ps = goodsound;
12137     }
12138 
12139     /* Skip over the identical part. */
12140     while (*pl == *ps && *pl != NUL)
12141     {
12142 	++pl;
12143 	++ps;
12144     }
12145 
12146     switch (n)
12147     {
12148 	case -2:
12149 	case 2:
12150 	    /*
12151 	     * Must delete two characters from "pl".
12152 	     */
12153 	    ++pl;	/* first delete */
12154 	    while (*pl == *ps)
12155 	    {
12156 		++pl;
12157 		++ps;
12158 	    }
12159 	    /* strings must be equal after second delete */
12160 	    if (STRCMP(pl + 1, ps) == 0)
12161 		return score + SCORE_DEL * 2;
12162 
12163 	    /* Failed to compare. */
12164 	    break;
12165 
12166 	case -1:
12167 	case 1:
12168 	    /*
12169 	     * Minimal one delete from "pl" required.
12170 	     */
12171 
12172 	    /* 1: delete */
12173 	    pl2 = pl + 1;
12174 	    ps2 = ps;
12175 	    while (*pl2 == *ps2)
12176 	    {
12177 		if (*pl2 == NUL)	/* reached the end */
12178 		    return score + SCORE_DEL;
12179 		++pl2;
12180 		++ps2;
12181 	    }
12182 
12183 	    /* 2: delete then swap, then rest must be equal */
12184 	    if (pl2[0] == ps2[1] && pl2[1] == ps2[0]
12185 					     && STRCMP(pl2 + 2, ps2 + 2) == 0)
12186 		return score + SCORE_DEL + SCORE_SWAP;
12187 
12188 	    /* 3: delete then substitute, then the rest must be equal */
12189 	    if (STRCMP(pl2 + 1, ps2 + 1) == 0)
12190 		return score + SCORE_DEL + SCORE_SUBST;
12191 
12192 	    /* 4: first swap then delete */
12193 	    if (pl[0] == ps[1] && pl[1] == ps[0])
12194 	    {
12195 		pl2 = pl + 2;	    /* swap, skip two chars */
12196 		ps2 = ps + 2;
12197 		while (*pl2 == *ps2)
12198 		{
12199 		    ++pl2;
12200 		    ++ps2;
12201 		}
12202 		/* delete a char and then strings must be equal */
12203 		if (STRCMP(pl2 + 1, ps2) == 0)
12204 		    return score + SCORE_SWAP + SCORE_DEL;
12205 	    }
12206 
12207 	    /* 5: first substitute then delete */
12208 	    pl2 = pl + 1;	    /* substitute, skip one char */
12209 	    ps2 = ps + 1;
12210 	    while (*pl2 == *ps2)
12211 	    {
12212 		++pl2;
12213 		++ps2;
12214 	    }
12215 	    /* delete a char and then strings must be equal */
12216 	    if (STRCMP(pl2 + 1, ps2) == 0)
12217 		return score + SCORE_SUBST + SCORE_DEL;
12218 
12219 	    /* Failed to compare. */
12220 	    break;
12221 
12222 	case 0:
12223 	    /*
12224 	     * Lenghts are equal, thus changes must result in same length: An
12225 	     * insert is only possible in combination with a delete.
12226 	     * 1: check if for identical strings
12227 	     */
12228 	    if (*pl == NUL)
12229 		return score;
12230 
12231 	    /* 2: swap */
12232 	    if (pl[0] == ps[1] && pl[1] == ps[0])
12233 	    {
12234 		pl2 = pl + 2;	    /* swap, skip two chars */
12235 		ps2 = ps + 2;
12236 		while (*pl2 == *ps2)
12237 		{
12238 		    if (*pl2 == NUL)	/* reached the end */
12239 			return score + SCORE_SWAP;
12240 		    ++pl2;
12241 		    ++ps2;
12242 		}
12243 		/* 3: swap and swap again */
12244 		if (pl2[0] == ps2[1] && pl2[1] == ps2[0]
12245 					     && STRCMP(pl2 + 2, ps2 + 2) == 0)
12246 		    return score + SCORE_SWAP + SCORE_SWAP;
12247 
12248 		/* 4: swap and substitute */
12249 		if (STRCMP(pl2 + 1, ps2 + 1) == 0)
12250 		    return score + SCORE_SWAP + SCORE_SUBST;
12251 	    }
12252 
12253 	    /* 5: substitute */
12254 	    pl2 = pl + 1;
12255 	    ps2 = ps + 1;
12256 	    while (*pl2 == *ps2)
12257 	    {
12258 		if (*pl2 == NUL)	/* reached the end */
12259 		    return score + SCORE_SUBST;
12260 		++pl2;
12261 		++ps2;
12262 	    }
12263 
12264 	    /* 6: substitute and swap */
12265 	    if (pl2[0] == ps2[1] && pl2[1] == ps2[0]
12266 					     && STRCMP(pl2 + 2, ps2 + 2) == 0)
12267 		return score + SCORE_SUBST + SCORE_SWAP;
12268 
12269 	    /* 7: substitute and substitute */
12270 	    if (STRCMP(pl2 + 1, ps2 + 1) == 0)
12271 		return score + SCORE_SUBST + SCORE_SUBST;
12272 
12273 	    /* 8: insert then delete */
12274 	    pl2 = pl;
12275 	    ps2 = ps + 1;
12276 	    while (*pl2 == *ps2)
12277 	    {
12278 		++pl2;
12279 		++ps2;
12280 	    }
12281 	    if (STRCMP(pl2 + 1, ps2) == 0)
12282 		return score + SCORE_INS + SCORE_DEL;
12283 
12284 	    /* 9: delete then insert */
12285 	    pl2 = pl + 1;
12286 	    ps2 = ps;
12287 	    while (*pl2 == *ps2)
12288 	    {
12289 		++pl2;
12290 		++ps2;
12291 	    }
12292 	    if (STRCMP(pl2, ps2 + 1) == 0)
12293 		return score + SCORE_INS + SCORE_DEL;
12294 
12295 	    /* Failed to compare. */
12296 	    break;
12297     }
12298 
12299     return SCORE_MAXMAX;
12300 }
12301 
12302 /*
12303  * Compute the "edit distance" to turn "badword" into "goodword".  The less
12304  * deletes/inserts/substitutes/swaps are required the lower the score.
12305  *
12306  * The algorithm is described by Du and Chang, 1992.
12307  * The implementation of the algorithm comes from Aspell editdist.cpp,
12308  * edit_distance().  It has been converted from C++ to C and modified to
12309  * support multi-byte characters.
12310  */
12311     static int
12312 spell_edit_score(badword, goodword)
12313     char_u	*badword;
12314     char_u	*goodword;
12315 {
12316     int		*cnt;
12317     int		badlen, goodlen;	/* lenghts including NUL */
12318     int		j, i;
12319     int		t;
12320     int		bc, gc;
12321     int		pbc, pgc;
12322 #ifdef FEAT_MBYTE
12323     char_u	*p;
12324     int		wbadword[MAXWLEN];
12325     int		wgoodword[MAXWLEN];
12326 
12327     if (has_mbyte)
12328     {
12329 	/* Get the characters from the multi-byte strings and put them in an
12330 	 * int array for easy access. */
12331 	for (p = badword, badlen = 0; *p != NUL; )
12332 	    wbadword[badlen++] = mb_cptr2char_adv(&p);
12333 	wbadword[badlen++] = 0;
12334 	for (p = goodword, goodlen = 0; *p != NUL; )
12335 	    wgoodword[goodlen++] = mb_cptr2char_adv(&p);
12336 	wgoodword[goodlen++] = 0;
12337     }
12338     else
12339 #endif
12340     {
12341 	badlen = STRLEN(badword) + 1;
12342 	goodlen = STRLEN(goodword) + 1;
12343     }
12344 
12345     /* We use "cnt" as an array: CNT(badword_idx, goodword_idx). */
12346 #define CNT(a, b)   cnt[(a) + (b) * (badlen + 1)]
12347     cnt = (int *)lalloc((long_u)(sizeof(int) * (badlen + 1) * (goodlen + 1)),
12348 									TRUE);
12349     if (cnt == NULL)
12350 	return 0;	/* out of memory */
12351 
12352     CNT(0, 0) = 0;
12353     for (j = 1; j <= goodlen; ++j)
12354 	CNT(0, j) = CNT(0, j - 1) + SCORE_DEL;
12355 
12356     for (i = 1; i <= badlen; ++i)
12357     {
12358 	CNT(i, 0) = CNT(i - 1, 0) + SCORE_INS;
12359 	for (j = 1; j <= goodlen; ++j)
12360 	{
12361 #ifdef FEAT_MBYTE
12362 	    if (has_mbyte)
12363 	    {
12364 		bc = wbadword[i - 1];
12365 		gc = wgoodword[j - 1];
12366 	    }
12367 	    else
12368 #endif
12369 	    {
12370 		bc = badword[i - 1];
12371 		gc = goodword[j - 1];
12372 	    }
12373 	    if (bc == gc)
12374 		CNT(i, j) = CNT(i - 1, j - 1);
12375 	    else
12376 	    {
12377 		/* Use a better score when there is only a case difference. */
12378 		if (SPELL_TOFOLD(bc) == SPELL_TOFOLD(gc))
12379 		    CNT(i, j) = SCORE_ICASE + CNT(i - 1, j - 1);
12380 		else
12381 		    CNT(i, j) = SCORE_SUBST + CNT(i - 1, j - 1);
12382 
12383 		if (i > 1 && j > 1)
12384 		{
12385 #ifdef FEAT_MBYTE
12386 		    if (has_mbyte)
12387 		    {
12388 			pbc = wbadword[i - 2];
12389 			pgc = wgoodword[j - 2];
12390 		    }
12391 		    else
12392 #endif
12393 		    {
12394 			pbc = badword[i - 2];
12395 			pgc = goodword[j - 2];
12396 		    }
12397 		    if (bc == pgc && pbc == gc)
12398 		    {
12399 			t = SCORE_SWAP + CNT(i - 2, j - 2);
12400 			if (t < CNT(i, j))
12401 			    CNT(i, j) = t;
12402 		    }
12403 		}
12404 		t = SCORE_DEL + CNT(i - 1, j);
12405 		if (t < CNT(i, j))
12406 		    CNT(i, j) = t;
12407 		t = SCORE_INS + CNT(i, j - 1);
12408 		if (t < CNT(i, j))
12409 		    CNT(i, j) = t;
12410 	    }
12411 	}
12412     }
12413 
12414     i = CNT(badlen - 1, goodlen - 1);
12415     vim_free(cnt);
12416     return i;
12417 }
12418 
12419 /*
12420  * ":spelldump"
12421  */
12422 /*ARGSUSED*/
12423     void
12424 ex_spelldump(eap)
12425     exarg_T *eap;
12426 {
12427     buf_T	*buf = curbuf;
12428     langp_T	*lp;
12429     slang_T	*slang;
12430     idx_T	arridx[MAXWLEN];
12431     int		curi[MAXWLEN];
12432     char_u	word[MAXWLEN];
12433     int		c;
12434     char_u	*byts;
12435     idx_T	*idxs;
12436     linenr_T	lnum = 0;
12437     int		round;
12438     int		depth;
12439     int		n;
12440     int		flags;
12441     char_u	*region_names = NULL;	    /* region names being used */
12442     int		do_region = TRUE;	    /* dump region names and numbers */
12443     char_u	*p;
12444     int		lpi;
12445 
12446     if (no_spell_checking(curwin))
12447 	return;
12448 
12449     /* Create a new empty buffer by splitting the window. */
12450     do_cmdline_cmd((char_u *)"new");
12451     if (!bufempty() || !buf_valid(buf))
12452 	return;
12453 
12454     /* Find out if we can support regions: All languages must support the same
12455      * regions or none at all. */
12456     for (lpi = 0; lpi < buf->b_langp.ga_len; ++lpi)
12457     {
12458 	lp = LANGP_ENTRY(buf->b_langp, lpi);
12459 	p = lp->lp_slang->sl_regions;
12460 	if (p[0] != 0)
12461 	{
12462 	    if (region_names == NULL)	    /* first language with regions */
12463 		region_names = p;
12464 	    else if (STRCMP(region_names, p) != 0)
12465 	    {
12466 		do_region = FALSE;	    /* region names are different */
12467 		break;
12468 	    }
12469 	}
12470     }
12471 
12472     if (do_region && region_names != NULL)
12473     {
12474 	vim_snprintf((char *)IObuff, IOSIZE, "/regions=%s", region_names);
12475 	ml_append(lnum++, IObuff, (colnr_T)0, FALSE);
12476     }
12477     else
12478 	do_region = FALSE;
12479 
12480     /*
12481      * Loop over all files loaded for the entries in 'spelllang'.
12482      */
12483     for (lpi = 0; lpi < buf->b_langp.ga_len; ++lpi)
12484     {
12485 	lp = LANGP_ENTRY(buf->b_langp, lpi);
12486 	slang = lp->lp_slang;
12487 	if (slang->sl_fbyts == NULL)	    /* reloading failed */
12488 	    continue;
12489 
12490 	vim_snprintf((char *)IObuff, IOSIZE, "# file: %s", slang->sl_fname);
12491 	ml_append(lnum++, IObuff, (colnr_T)0, FALSE);
12492 
12493 	/* round 1: case-folded tree
12494 	 * round 2: keep-case tree */
12495 	for (round = 1; round <= 2; ++round)
12496 	{
12497 	    if (round == 1)
12498 	    {
12499 		byts = slang->sl_fbyts;
12500 		idxs = slang->sl_fidxs;
12501 	    }
12502 	    else
12503 	    {
12504 		byts = slang->sl_kbyts;
12505 		idxs = slang->sl_kidxs;
12506 	    }
12507 	    if (byts == NULL)
12508 		continue;		/* array is empty */
12509 
12510 	    depth = 0;
12511 	    arridx[0] = 0;
12512 	    curi[0] = 1;
12513 	    while (depth >= 0 && !got_int)
12514 	    {
12515 		if (curi[depth] > byts[arridx[depth]])
12516 		{
12517 		    /* Done all bytes at this node, go up one level. */
12518 		    --depth;
12519 		    line_breakcheck();
12520 		}
12521 		else
12522 		{
12523 		    /* Do one more byte at this node. */
12524 		    n = arridx[depth] + curi[depth];
12525 		    ++curi[depth];
12526 		    c = byts[n];
12527 		    if (c == 0)
12528 		    {
12529 			/* End of word, deal with the word.
12530 			 * Don't use keep-case words in the fold-case tree,
12531 			 * they will appear in the keep-case tree.
12532 			 * Only use the word when the region matches. */
12533 			flags = (int)idxs[n];
12534 			if ((round == 2 || (flags & WF_KEEPCAP) == 0)
12535 				&& (flags & WF_NEEDCOMP) == 0
12536 				&& (do_region
12537 				    || (flags & WF_REGION) == 0
12538 				    || (((unsigned)flags >> 16)
12539 						       & lp->lp_region) != 0))
12540 			{
12541 			    word[depth] = NUL;
12542 			    if (!do_region)
12543 				flags &= ~WF_REGION;
12544 
12545 			    /* Dump the basic word if there is no prefix or
12546 			     * when it's the first one. */
12547 			    c = (unsigned)flags >> 24;
12548 			    if (c == 0 || curi[depth] == 2)
12549 				dump_word(word, round, flags, lnum++);
12550 
12551 			    /* Apply the prefix, if there is one. */
12552 			    if (c != 0)
12553 				lnum = dump_prefixes(slang, word, round,
12554 								 flags, lnum);
12555 			}
12556 		    }
12557 		    else
12558 		    {
12559 			/* Normal char, go one level deeper. */
12560 			word[depth++] = c;
12561 			arridx[depth] = idxs[n];
12562 			curi[depth] = 1;
12563 		    }
12564 		}
12565 	    }
12566 	}
12567     }
12568 
12569     /* Delete the empty line that we started with. */
12570     if (curbuf->b_ml.ml_line_count > 1)
12571 	ml_delete(curbuf->b_ml.ml_line_count, FALSE);
12572 
12573     redraw_later(NOT_VALID);
12574 }
12575 
12576 /*
12577  * Dump one word: apply case modifications and append a line to the buffer.
12578  */
12579     static void
12580 dump_word(word, round, flags, lnum)
12581     char_u	*word;
12582     int		round;
12583     int		flags;
12584     linenr_T	lnum;
12585 {
12586     int		keepcap = FALSE;
12587     char_u	*p;
12588     char_u	cword[MAXWLEN];
12589     char_u	badword[MAXWLEN + 10];
12590     int		i;
12591 
12592     if (round == 1 && (flags & WF_CAPMASK) != 0)
12593     {
12594 	/* Need to fix case according to "flags". */
12595 	make_case_word(word, cword, flags);
12596 	p = cword;
12597     }
12598     else
12599     {
12600 	p = word;
12601 	if (round == 2 && ((captype(word, NULL) & WF_KEEPCAP) == 0
12602 						 || (flags & WF_FIXCAP) != 0))
12603 	    keepcap = TRUE;
12604     }
12605 
12606     /* Add flags and regions after a slash. */
12607     if ((flags & (WF_BANNED | WF_RARE | WF_REGION)) || keepcap)
12608     {
12609 	STRCPY(badword, p);
12610 	STRCAT(badword, "/");
12611 	if (keepcap)
12612 	    STRCAT(badword, "=");
12613 	if (flags & WF_BANNED)
12614 	    STRCAT(badword, "!");
12615 	else if (flags & WF_RARE)
12616 	    STRCAT(badword, "?");
12617 	if (flags & WF_REGION)
12618 	    for (i = 0; i < 7; ++i)
12619 		if (flags & (0x10000 << i))
12620 		    sprintf((char *)badword + STRLEN(badword), "%d", i + 1);
12621 	p = badword;
12622     }
12623 
12624     ml_append(lnum, p, (colnr_T)0, FALSE);
12625 }
12626 
12627 /*
12628  * For ":spelldump": Find matching prefixes for "word".  Prepend each to
12629  * "word" and append a line to the buffer.
12630  * Return the updated line number.
12631  */
12632     static linenr_T
12633 dump_prefixes(slang, word, round, flags, startlnum)
12634     slang_T	*slang;
12635     char_u	*word;	    /* case-folded word */
12636     int		round;
12637     int		flags;	    /* flags with prefix ID */
12638     linenr_T	startlnum;
12639 {
12640     idx_T	arridx[MAXWLEN];
12641     int		curi[MAXWLEN];
12642     char_u	prefix[MAXWLEN];
12643     char_u	word_up[MAXWLEN];
12644     int		has_word_up = FALSE;
12645     int		c;
12646     char_u	*byts;
12647     idx_T	*idxs;
12648     linenr_T	lnum = startlnum;
12649     int		depth;
12650     int		n;
12651     int		len;
12652     int		i;
12653 
12654     /* if the word starts with a lower-case letter make the word with an
12655      * upper-case letter in word_up[]. */
12656     c = PTR2CHAR(word);
12657     if (SPELL_TOUPPER(c) != c)
12658     {
12659 	onecap_copy(word, word_up, TRUE);
12660 	has_word_up = TRUE;
12661     }
12662 
12663     byts = slang->sl_pbyts;
12664     idxs = slang->sl_pidxs;
12665     if (byts != NULL)		/* array not is empty */
12666     {
12667 	/*
12668 	 * Loop over all prefixes, building them byte-by-byte in prefix[].
12669 	 * When at the end of a prefix check that it supports "flags".
12670 	 */
12671 	depth = 0;
12672 	arridx[0] = 0;
12673 	curi[0] = 1;
12674 	while (depth >= 0 && !got_int)
12675 	{
12676 	    n = arridx[depth];
12677 	    len = byts[n];
12678 	    if (curi[depth] > len)
12679 	    {
12680 		/* Done all bytes at this node, go up one level. */
12681 		--depth;
12682 		line_breakcheck();
12683 	    }
12684 	    else
12685 	    {
12686 		/* Do one more byte at this node. */
12687 		n += curi[depth];
12688 		++curi[depth];
12689 		c = byts[n];
12690 		if (c == 0)
12691 		{
12692 		    /* End of prefix, find out how many IDs there are. */
12693 		    for (i = 1; i < len; ++i)
12694 			if (byts[n + i] != 0)
12695 			    break;
12696 		    curi[depth] += i - 1;
12697 
12698 		    c = valid_word_prefix(i, n, flags, word, slang, FALSE);
12699 		    if (c != 0)
12700 		    {
12701 			vim_strncpy(prefix + depth, word, MAXWLEN - depth - 1);
12702 			dump_word(prefix, round,
12703 				(c & WF_RAREPFX) ? (flags | WF_RARE)
12704 							     : flags, lnum++);
12705 		    }
12706 
12707 		    /* Check for prefix that matches the word when the
12708 		     * first letter is upper-case, but only if the prefix has
12709 		     * a condition. */
12710 		    if (has_word_up)
12711 		    {
12712 			c = valid_word_prefix(i, n, flags, word_up, slang,
12713 									TRUE);
12714 			if (c != 0)
12715 			{
12716 			    vim_strncpy(prefix + depth, word_up,
12717 							 MAXWLEN - depth - 1);
12718 			    dump_word(prefix, round,
12719 				    (c & WF_RAREPFX) ? (flags | WF_RARE)
12720 							     : flags, lnum++);
12721 			}
12722 		    }
12723 		}
12724 		else
12725 		{
12726 		    /* Normal char, go one level deeper. */
12727 		    prefix[depth++] = c;
12728 		    arridx[depth] = idxs[n];
12729 		    curi[depth] = 1;
12730 		}
12731 	    }
12732 	}
12733     }
12734 
12735     return lnum;
12736 }
12737 
12738 /*
12739  * Move "p" to end of word.
12740  */
12741     char_u *
12742 spell_to_word_end(start, buf)
12743     char_u  *start;
12744     buf_T   *buf;
12745 {
12746     char_u  *p = start;
12747 
12748     while (*p != NUL && spell_iswordp(p, buf))
12749 	mb_ptr_adv(p);
12750     return p;
12751 }
12752 
12753 #if defined(FEAT_INS_EXPAND) || defined(PROTO)
12754 static int spell_expand_need_cap;
12755 
12756 /*
12757  * Find start of the word in front of the cursor.  We don't check if it is
12758  * badly spelled, with completion we can only change the word in front of the
12759  * cursor.
12760  * Used for Insert mode completion CTRL-X ?.
12761  * Returns the column number of the word.
12762  */
12763     int
12764 spell_word_start(startcol)
12765     int		startcol;
12766 {
12767     char_u	*line;
12768     char_u	*p;
12769     int		col = 0;
12770 
12771     if (no_spell_checking(curwin))
12772 	return startcol;
12773 
12774     /* Find a word character before "startcol". */
12775     line = ml_get_curline();
12776     for (p = line + startcol; p > line; )
12777     {
12778 	mb_ptr_back(line, p);
12779 	if (spell_iswordp_nmw(p))
12780 	    break;
12781     }
12782 
12783     /* Go back to start of the word. */
12784     while (p > line)
12785     {
12786 	col = p - line;
12787 	mb_ptr_back(line, p);
12788 	if (!spell_iswordp(p, curbuf))
12789 	    break;
12790 	col = 0;
12791     }
12792 
12793     /* Need to check for 'spellcapcheck' now, the word is removed before
12794      * expand_spelling() is called.  Therefore the ugly global variable. */
12795     spell_expand_need_cap = check_need_cap(curwin->w_cursor.lnum, col);
12796 
12797     return col;
12798 }
12799 
12800 /*
12801  * Get list of spelling suggestions.
12802  * Used for Insert mode completion CTRL-X ?.
12803  * Returns the number of matches.  The matches are in "matchp[]", array of
12804  * allocated strings.
12805  */
12806 /*ARGSUSED*/
12807     int
12808 expand_spelling(lnum, col, pat, matchp)
12809     linenr_T	lnum;
12810     int		col;
12811     char_u	*pat;
12812     char_u	***matchp;
12813 {
12814     garray_T	ga;
12815 
12816     spell_suggest_list(&ga, pat, 100, spell_expand_need_cap);
12817     *matchp = ga.ga_data;
12818     return ga.ga_len;
12819 }
12820 #endif
12821 
12822 #endif  /* FEAT_SYN_HL */
12823