xref: /vim-8.2.3635/src/spell.c (revision 577fadfc)
1 /* vi:set ts=8 sts=4 sw=4 noet:
2  *
3  * VIM - Vi IMproved	by Bram Moolenaar
4  *
5  * Do ":help uganda"  in Vim to read copying and usage conditions.
6  * Do ":help credits" in Vim to see a list of people who contributed.
7  * See README.txt for an overview of the Vim source code.
8  */
9 
10 /*
11  * spell.c: code for spell checking
12  *
13  * See spellfile.c for the Vim spell file format.
14  *
15  * The spell checking mechanism uses a tree (aka trie).  Each node in the tree
16  * has a list of bytes that can appear (siblings).  For each byte there is a
17  * pointer to the node with the byte that follows in the word (child).
18  *
19  * A NUL byte is used where the word may end.  The bytes are sorted, so that
20  * binary searching can be used and the NUL bytes are at the start.  The
21  * number of possible bytes is stored before the list of bytes.
22  *
23  * The tree uses two arrays: "byts" stores the characters, "idxs" stores
24  * either the next index or flags.  The tree starts at index 0.  For example,
25  * to lookup "vi" this sequence is followed:
26  *	i = 0
27  *	len = byts[i]
28  *	n = where "v" appears in byts[i + 1] to byts[i + len]
29  *	i = idxs[n]
30  *	len = byts[i]
31  *	n = where "i" appears in byts[i + 1] to byts[i + len]
32  *	i = idxs[n]
33  *	len = byts[i]
34  *	find that byts[i + 1] is 0, idxs[i + 1] has flags for "vi".
35  *
36  * There are two word trees: one with case-folded words and one with words in
37  * original case.  The second one is only used for keep-case words and is
38  * usually small.
39  *
40  * There is one additional tree for when not all prefixes are applied when
41  * generating the .spl file.  This tree stores all the possible prefixes, as
42  * if they were words.  At each word (prefix) end the prefix nr is stored, the
43  * following word must support this prefix nr.  And the condition nr is
44  * stored, used to lookup the condition that the word must match with.
45  *
46  * Thanks to Olaf Seibert for providing an example implementation of this tree
47  * and the compression mechanism.
48  * LZ trie ideas:
49  *	http://www.irb.hr/hr/home/ristov/papers/RistovLZtrieRevision1.pdf
50  * More papers: http://www-igm.univ-mlv.fr/~laporte/publi_en.html
51  *
52  * Matching involves checking the caps type: Onecap ALLCAP KeepCap.
53  *
54  * Why doesn't Vim use aspell/ispell/myspell/etc.?
55  * See ":help develop-spell".
56  */
57 
58 /*
59  * Use this to adjust the score after finding suggestions, based on the
60  * suggested word sounding like the bad word.  This is much faster than doing
61  * it for every possible suggestion.
62  * Disadvantage: When "the" is typed as "hte" it sounds quite different ("@"
63  * vs "ht") and goes down in the list.
64  * Used when 'spellsuggest' is set to "best".
65  */
66 #define RESCORE(word_score, sound_score) ((3 * word_score + sound_score) / 4)
67 
68 /*
69  * Do the opposite: based on a maximum end score and a known sound score,
70  * compute the maximum word score that can be used.
71  */
72 #define MAXSCORE(word_score, sound_score) ((4 * word_score - sound_score) / 3)
73 
74 #define IN_SPELL_C
75 #include "vim.h"
76 
77 #if defined(FEAT_SPELL) || defined(PROTO)
78 
79 #ifndef UNIX		/* it's in os_unix.h for Unix */
80 # include <time.h>	/* for time_t */
81 #endif
82 
83 /* only used for su_badflags */
84 #define WF_MIXCAP   0x20	/* mix of upper and lower case: macaRONI */
85 
86 #define WF_CAPMASK (WF_ONECAP | WF_ALLCAP | WF_KEEPCAP | WF_FIXCAP)
87 
88 #define REGION_ALL 0xff		/* word valid in all regions */
89 
90 #define VIMSUGMAGIC "VIMsug"	/* string at start of Vim .sug file */
91 #define VIMSUGMAGICL 6
92 #define VIMSUGVERSION 1
93 
94 /* Result values.  Lower number is accepted over higher one. */
95 #define SP_BANNED	-1
96 #define SP_OK		0
97 #define SP_RARE		1
98 #define SP_LOCAL	2
99 #define SP_BAD		3
100 
101 typedef struct wordcount_S
102 {
103     short_u	wc_count;	    /* nr of times word was seen */
104     char_u	wc_word[1];	    /* word, actually longer */
105 } wordcount_T;
106 
107 #define WC_KEY_OFF  offsetof(wordcount_T, wc_word)
108 #define HI2WC(hi)     ((wordcount_T *)((hi)->hi_key - WC_KEY_OFF))
109 #define MAXWORDCOUNT 0xffff
110 
111 /*
112  * Information used when looking for suggestions.
113  */
114 typedef struct suginfo_S
115 {
116     garray_T	su_ga;		    /* suggestions, contains "suggest_T" */
117     int		su_maxcount;	    /* max. number of suggestions displayed */
118     int		su_maxscore;	    /* maximum score for adding to su_ga */
119     int		su_sfmaxscore;	    /* idem, for when doing soundfold words */
120     garray_T	su_sga;		    /* like su_ga, sound-folded scoring */
121     char_u	*su_badptr;	    /* start of bad word in line */
122     int		su_badlen;	    /* length of detected bad word in line */
123     int		su_badflags;	    /* caps flags for bad word */
124     char_u	su_badword[MAXWLEN]; /* bad word truncated at su_badlen */
125     char_u	su_fbadword[MAXWLEN]; /* su_badword case-folded */
126     char_u	su_sal_badword[MAXWLEN]; /* su_badword soundfolded */
127     hashtab_T	su_banned;	    /* table with banned words */
128     slang_T	*su_sallang;	    /* default language for sound folding */
129 } suginfo_T;
130 
131 /* One word suggestion.  Used in "si_ga". */
132 typedef struct suggest_S
133 {
134     char_u	*st_word;	/* suggested word, allocated string */
135     int		st_wordlen;	/* STRLEN(st_word) */
136     int		st_orglen;	/* length of replaced text */
137     int		st_score;	/* lower is better */
138     int		st_altscore;	/* used when st_score compares equal */
139     int		st_salscore;	/* st_score is for soundalike */
140     int		st_had_bonus;	/* bonus already included in score */
141     slang_T	*st_slang;	/* language used for sound folding */
142 } suggest_T;
143 
144 #define SUG(ga, i) (((suggest_T *)(ga).ga_data)[i])
145 
146 /* TRUE if a word appears in the list of banned words.  */
147 #define WAS_BANNED(su, word) (!HASHITEM_EMPTY(hash_find(&su->su_banned, word)))
148 
149 /* Number of suggestions kept when cleaning up.  We need to keep more than
150  * what is displayed, because when rescore_suggestions() is called the score
151  * may change and wrong suggestions may be removed later. */
152 #define SUG_CLEAN_COUNT(su)    ((su)->su_maxcount < 130 ? 150 : (su)->su_maxcount + 20)
153 
154 /* Threshold for sorting and cleaning up suggestions.  Don't want to keep lots
155  * of suggestions that are not going to be displayed. */
156 #define SUG_MAX_COUNT(su)	(SUG_CLEAN_COUNT(su) + 50)
157 
158 /* score for various changes */
159 #define SCORE_SPLIT	149	/* split bad word */
160 #define SCORE_SPLIT_NO	249	/* split bad word with NOSPLITSUGS */
161 #define SCORE_ICASE	52	/* slightly different case */
162 #define SCORE_REGION	200	/* word is for different region */
163 #define SCORE_RARE	180	/* rare word */
164 #define SCORE_SWAP	75	/* swap two characters */
165 #define SCORE_SWAP3	110	/* swap two characters in three */
166 #define SCORE_REP	65	/* REP replacement */
167 #define SCORE_SUBST	93	/* substitute a character */
168 #define SCORE_SIMILAR	33	/* substitute a similar character */
169 #define SCORE_SUBCOMP	33	/* substitute a composing character */
170 #define SCORE_DEL	94	/* delete a character */
171 #define SCORE_DELDUP	66	/* delete a duplicated character */
172 #define SCORE_DELCOMP	28	/* delete a composing character */
173 #define SCORE_INS	96	/* insert a character */
174 #define SCORE_INSDUP	67	/* insert a duplicate character */
175 #define SCORE_INSCOMP	30	/* insert a composing character */
176 #define SCORE_NONWORD	103	/* change non-word to word char */
177 
178 #define SCORE_FILE	30	/* suggestion from a file */
179 #define SCORE_MAXINIT	350	/* Initial maximum score: higher == slower.
180 				 * 350 allows for about three changes. */
181 
182 #define SCORE_COMMON1	30	/* subtracted for words seen before */
183 #define SCORE_COMMON2	40	/* subtracted for words often seen */
184 #define SCORE_COMMON3	50	/* subtracted for words very often seen */
185 #define SCORE_THRES2	10	/* word count threshold for COMMON2 */
186 #define SCORE_THRES3	100	/* word count threshold for COMMON3 */
187 
188 /* When trying changed soundfold words it becomes slow when trying more than
189  * two changes.  With less then two changes it's slightly faster but we miss a
190  * few good suggestions.  In rare cases we need to try three of four changes.
191  */
192 #define SCORE_SFMAX1	200	/* maximum score for first try */
193 #define SCORE_SFMAX2	300	/* maximum score for second try */
194 #define SCORE_SFMAX3	400	/* maximum score for third try */
195 
196 #define SCORE_BIG	SCORE_INS * 3	/* big difference */
197 #define SCORE_MAXMAX	999999		/* accept any score */
198 #define SCORE_LIMITMAX	350		/* for spell_edit_score_limit() */
199 
200 /* for spell_edit_score_limit() we need to know the minimum value of
201  * SCORE_ICASE, SCORE_SWAP, SCORE_DEL, SCORE_SIMILAR and SCORE_INS */
202 #define SCORE_EDIT_MIN	SCORE_SIMILAR
203 
204 /*
205  * Structure to store info for word matching.
206  */
207 typedef struct matchinf_S
208 {
209     langp_T	*mi_lp;			/* info for language and region */
210 
211     /* pointers to original text to be checked */
212     char_u	*mi_word;		/* start of word being checked */
213     char_u	*mi_end;		/* end of matching word so far */
214     char_u	*mi_fend;		/* next char to be added to mi_fword */
215     char_u	*mi_cend;		/* char after what was used for
216 					   mi_capflags */
217 
218     /* case-folded text */
219     char_u	mi_fword[MAXWLEN + 1];	/* mi_word case-folded */
220     int		mi_fwordlen;		/* nr of valid bytes in mi_fword */
221 
222     /* for when checking word after a prefix */
223     int		mi_prefarridx;		/* index in sl_pidxs with list of
224 					   affixID/condition */
225     int		mi_prefcnt;		/* number of entries at mi_prefarridx */
226     int		mi_prefixlen;		/* byte length of prefix */
227     int		mi_cprefixlen;		/* byte length of prefix in original
228 					   case */
229 
230     /* for when checking a compound word */
231     int		mi_compoff;		/* start of following word offset */
232     char_u	mi_compflags[MAXWLEN];	/* flags for compound words used */
233     int		mi_complen;		/* nr of compound words used */
234     int		mi_compextra;		/* nr of COMPOUNDROOT words */
235 
236     /* others */
237     int		mi_result;		/* result so far: SP_BAD, SP_OK, etc. */
238     int		mi_capflags;		/* WF_ONECAP WF_ALLCAP WF_KEEPCAP */
239     win_T	*mi_win;		/* buffer being checked */
240 
241     /* for NOBREAK */
242     int		mi_result2;		/* "mi_resul" without following word */
243     char_u	*mi_end2;		/* "mi_end" without following word */
244 } matchinf_T;
245 
246 
247 static int spell_iswordp(char_u *p, win_T *wp);
248 static int spell_mb_isword_class(int cl, win_T *wp);
249 
250 /*
251  * For finding suggestions: At each node in the tree these states are tried:
252  */
253 typedef enum
254 {
255     STATE_START = 0,	/* At start of node check for NUL bytes (goodword
256 			 * ends); if badword ends there is a match, otherwise
257 			 * try splitting word. */
258     STATE_NOPREFIX,	/* try without prefix */
259     STATE_SPLITUNDO,	/* Undo splitting. */
260     STATE_ENDNUL,	/* Past NUL bytes at start of the node. */
261     STATE_PLAIN,	/* Use each byte of the node. */
262     STATE_DEL,		/* Delete a byte from the bad word. */
263     STATE_INS_PREP,	/* Prepare for inserting bytes. */
264     STATE_INS,		/* Insert a byte in the bad word. */
265     STATE_SWAP,		/* Swap two bytes. */
266     STATE_UNSWAP,	/* Undo swap two characters. */
267     STATE_SWAP3,	/* Swap two characters over three. */
268     STATE_UNSWAP3,	/* Undo Swap two characters over three. */
269     STATE_UNROT3L,	/* Undo rotate three characters left */
270     STATE_UNROT3R,	/* Undo rotate three characters right */
271     STATE_REP_INI,	/* Prepare for using REP items. */
272     STATE_REP,		/* Use matching REP items from the .aff file. */
273     STATE_REP_UNDO,	/* Undo a REP item replacement. */
274     STATE_FINAL		/* End of this node. */
275 } state_T;
276 
277 /*
278  * Struct to keep the state at each level in suggest_try_change().
279  */
280 typedef struct trystate_S
281 {
282     state_T	ts_state;	/* state at this level, STATE_ */
283     int		ts_score;	/* score */
284     idx_T	ts_arridx;	/* index in tree array, start of node */
285     short	ts_curi;	/* index in list of child nodes */
286     char_u	ts_fidx;	/* index in fword[], case-folded bad word */
287     char_u	ts_fidxtry;	/* ts_fidx at which bytes may be changed */
288     char_u	ts_twordlen;	/* valid length of tword[] */
289     char_u	ts_prefixdepth;	/* stack depth for end of prefix or
290 				 * PFD_PREFIXTREE or PFD_NOPREFIX */
291     char_u	ts_flags;	/* TSF_ flags */
292     char_u	ts_tcharlen;	/* number of bytes in tword character */
293     char_u	ts_tcharidx;	/* current byte index in tword character */
294     char_u	ts_isdiff;	/* DIFF_ values */
295     char_u	ts_fcharstart;	/* index in fword where badword char started */
296     char_u	ts_prewordlen;	/* length of word in "preword[]" */
297     char_u	ts_splitoff;	/* index in "tword" after last split */
298     char_u	ts_splitfidx;	/* "ts_fidx" at word split */
299     char_u	ts_complen;	/* nr of compound words used */
300     char_u	ts_compsplit;	/* index for "compflags" where word was spit */
301     char_u	ts_save_badflags;   /* su_badflags saved here */
302     char_u	ts_delidx;	/* index in fword for char that was deleted,
303 				   valid when "ts_flags" has TSF_DIDDEL */
304 } trystate_T;
305 
306 /* values for ts_isdiff */
307 #define DIFF_NONE	0	/* no different byte (yet) */
308 #define DIFF_YES	1	/* different byte found */
309 #define DIFF_INSERT	2	/* inserting character */
310 
311 /* values for ts_flags */
312 #define TSF_PREFIXOK	1	/* already checked that prefix is OK */
313 #define TSF_DIDSPLIT	2	/* tried split at this point */
314 #define TSF_DIDDEL	4	/* did a delete, "ts_delidx" has index */
315 
316 /* special values ts_prefixdepth */
317 #define PFD_NOPREFIX	0xff	/* not using prefixes */
318 #define PFD_PREFIXTREE	0xfe	/* walking through the prefix tree */
319 #define PFD_NOTSPECIAL	0xfd	/* highest value that's not special */
320 
321 /* mode values for find_word */
322 #define FIND_FOLDWORD	    0	/* find word case-folded */
323 #define FIND_KEEPWORD	    1	/* find keep-case word */
324 #define FIND_PREFIX	    2	/* find word after prefix */
325 #define FIND_COMPOUND	    3	/* find case-folded compound word */
326 #define FIND_KEEPCOMPOUND   4	/* find keep-case compound word */
327 
328 static void find_word(matchinf_T *mip, int mode);
329 static int match_checkcompoundpattern(char_u *ptr, int wlen, garray_T *gap);
330 static int can_compound(slang_T *slang, char_u *word, char_u *flags);
331 static int match_compoundrule(slang_T *slang, char_u *compflags);
332 static int valid_word_prefix(int totprefcnt, int arridx, int flags, char_u *word, slang_T *slang, int cond_req);
333 static void find_prefix(matchinf_T *mip, int mode);
334 static int fold_more(matchinf_T *mip);
335 static int spell_valid_case(int wordflags, int treeflags);
336 static void spell_load_cb(char_u *fname, void *cookie);
337 static int count_syllables(slang_T *slang, char_u *word);
338 static void clear_midword(win_T *buf);
339 static void use_midword(slang_T *lp, win_T *buf);
340 static int find_region(char_u *rp, char_u *region);
341 static int check_need_cap(linenr_T lnum, colnr_T col);
342 static void spell_find_suggest(char_u *badptr, int badlen, suginfo_T *su, int maxcount, int banbadword, int need_cap, int interactive);
343 #ifdef FEAT_EVAL
344 static void spell_suggest_expr(suginfo_T *su, char_u *expr);
345 #endif
346 static void spell_suggest_file(suginfo_T *su, char_u *fname);
347 static void spell_suggest_intern(suginfo_T *su, int interactive);
348 static void spell_find_cleanup(suginfo_T *su);
349 static void suggest_try_special(suginfo_T *su);
350 static void suggest_try_change(suginfo_T *su);
351 static void suggest_trie_walk(suginfo_T *su, langp_T *lp, char_u *fword, int soundfold);
352 static void go_deeper(trystate_T *stack, int depth, int score_add);
353 static int nofold_len(char_u *fword, int flen, char_u *word);
354 static void find_keepcap_word(slang_T *slang, char_u *fword, char_u *kword);
355 static void score_comp_sal(suginfo_T *su);
356 static void score_combine(suginfo_T *su);
357 static int stp_sal_score(suggest_T *stp, suginfo_T *su, slang_T *slang, char_u *badsound);
358 static void suggest_try_soundalike_prep(void);
359 static void suggest_try_soundalike(suginfo_T *su);
360 static void suggest_try_soundalike_finish(void);
361 static void add_sound_suggest(suginfo_T *su, char_u *goodword, int score, langp_T *lp);
362 static int soundfold_find(slang_T *slang, char_u *word);
363 static void make_case_word(char_u *fword, char_u *cword, int flags);
364 static int similar_chars(slang_T *slang, int c1, int c2);
365 static void add_suggestion(suginfo_T *su, garray_T *gap, char_u *goodword, int badlen, int score, int altscore, int had_bonus, slang_T *slang, int maxsf);
366 static void check_suggestions(suginfo_T *su, garray_T *gap);
367 static void add_banned(suginfo_T *su, char_u *word);
368 static void rescore_suggestions(suginfo_T *su);
369 static void rescore_one(suginfo_T *su, suggest_T *stp);
370 static int cleanup_suggestions(garray_T *gap, int maxscore, int keep);
371 static void spell_soundfold_sofo(slang_T *slang, char_u *inword, char_u *res);
372 static void spell_soundfold_sal(slang_T *slang, char_u *inword, char_u *res);
373 static void spell_soundfold_wsal(slang_T *slang, char_u *inword, char_u *res);
374 static int soundalike_score(char_u *goodsound, char_u *badsound);
375 static int spell_edit_score(slang_T *slang, char_u *badword, char_u *goodword);
376 static int spell_edit_score_limit(slang_T *slang, char_u *badword, char_u *goodword, int limit);
377 static int spell_edit_score_limit_w(slang_T *slang, char_u *badword, char_u *goodword, int limit);
378 static void dump_word(slang_T *slang, char_u *word, char_u *pat, int *dir, int round, int flags, linenr_T lnum);
379 static linenr_T dump_prefixes(slang_T *slang, char_u *word, char_u *pat, int *dir, int round, int flags, linenr_T startlnum);
380 
381 
382 /* Remember what "z?" replaced. */
383 static char_u	*repl_from = NULL;
384 static char_u	*repl_to = NULL;
385 
386 /*
387  * Main spell-checking function.
388  * "ptr" points to a character that could be the start of a word.
389  * "*attrp" is set to the highlight index for a badly spelled word.  For a
390  * non-word or when it's OK it remains unchanged.
391  * This must only be called when 'spelllang' is not empty.
392  *
393  * "capcol" is used to check for a Capitalised word after the end of a
394  * sentence.  If it's zero then perform the check.  Return the column where to
395  * check next, or -1 when no sentence end was found.  If it's NULL then don't
396  * worry.
397  *
398  * Returns the length of the word in bytes, also when it's OK, so that the
399  * caller can skip over the word.
400  */
401     int
402 spell_check(
403     win_T	*wp,		/* current window */
404     char_u	*ptr,
405     hlf_T	*attrp,
406     int		*capcol,	/* column to check for Capital */
407     int		docount)	/* count good words */
408 {
409     matchinf_T	mi;		/* Most things are put in "mi" so that it can
410 				   be passed to functions quickly. */
411     int		nrlen = 0;	/* found a number first */
412     int		c;
413     int		wrongcaplen = 0;
414     int		lpi;
415     int		count_word = docount;
416 
417     /* A word never starts at a space or a control character.  Return quickly
418      * then, skipping over the character. */
419     if (*ptr <= ' ')
420 	return 1;
421 
422     /* Return here when loading language files failed. */
423     if (wp->w_s->b_langp.ga_len == 0)
424 	return 1;
425 
426     vim_memset(&mi, 0, sizeof(matchinf_T));
427 
428     /* A number is always OK.  Also skip hexadecimal numbers 0xFF99 and
429      * 0X99FF.  But always do check spelling to find "3GPP" and "11
430      * julifeest". */
431     if (*ptr >= '0' && *ptr <= '9')
432     {
433 	if (*ptr == '0' && (ptr[1] == 'b' || ptr[1] == 'B'))
434 	    mi.mi_end = skipbin(ptr + 2);
435 	else if (*ptr == '0' && (ptr[1] == 'x' || ptr[1] == 'X'))
436 	    mi.mi_end = skiphex(ptr + 2);
437 	else
438 	    mi.mi_end = skipdigits(ptr);
439 	nrlen = (int)(mi.mi_end - ptr);
440     }
441 
442     /* Find the normal end of the word (until the next non-word character). */
443     mi.mi_word = ptr;
444     mi.mi_fend = ptr;
445     if (spell_iswordp(mi.mi_fend, wp))
446     {
447 	do
448 	    MB_PTR_ADV(mi.mi_fend);
449 	while (*mi.mi_fend != NUL && spell_iswordp(mi.mi_fend, wp));
450 
451 	if (capcol != NULL && *capcol == 0 && wp->w_s->b_cap_prog != NULL)
452 	{
453 	    /* Check word starting with capital letter. */
454 	    c = PTR2CHAR(ptr);
455 	    if (!SPELL_ISUPPER(c))
456 		wrongcaplen = (int)(mi.mi_fend - ptr);
457 	}
458     }
459     if (capcol != NULL)
460 	*capcol = -1;
461 
462     /* We always use the characters up to the next non-word character,
463      * also for bad words. */
464     mi.mi_end = mi.mi_fend;
465 
466     /* Check caps type later. */
467     mi.mi_capflags = 0;
468     mi.mi_cend = NULL;
469     mi.mi_win = wp;
470 
471     /* case-fold the word with one non-word character, so that we can check
472      * for the word end. */
473     if (*mi.mi_fend != NUL)
474 	MB_PTR_ADV(mi.mi_fend);
475 
476     (void)spell_casefold(ptr, (int)(mi.mi_fend - ptr), mi.mi_fword,
477 							     MAXWLEN + 1);
478     mi.mi_fwordlen = (int)STRLEN(mi.mi_fword);
479 
480     /* The word is bad unless we recognize it. */
481     mi.mi_result = SP_BAD;
482     mi.mi_result2 = SP_BAD;
483 
484     /*
485      * Loop over the languages specified in 'spelllang'.
486      * We check them all, because a word may be matched longer in another
487      * language.
488      */
489     for (lpi = 0; lpi < wp->w_s->b_langp.ga_len; ++lpi)
490     {
491 	mi.mi_lp = LANGP_ENTRY(wp->w_s->b_langp, lpi);
492 
493 	/* If reloading fails the language is still in the list but everything
494 	 * has been cleared. */
495 	if (mi.mi_lp->lp_slang->sl_fidxs == NULL)
496 	    continue;
497 
498 	/* Check for a matching word in case-folded words. */
499 	find_word(&mi, FIND_FOLDWORD);
500 
501 	/* Check for a matching word in keep-case words. */
502 	find_word(&mi, FIND_KEEPWORD);
503 
504 	/* Check for matching prefixes. */
505 	find_prefix(&mi, FIND_FOLDWORD);
506 
507 	/* For a NOBREAK language, may want to use a word without a following
508 	 * word as a backup. */
509 	if (mi.mi_lp->lp_slang->sl_nobreak && mi.mi_result == SP_BAD
510 						   && mi.mi_result2 != SP_BAD)
511 	{
512 	    mi.mi_result = mi.mi_result2;
513 	    mi.mi_end = mi.mi_end2;
514 	}
515 
516 	/* Count the word in the first language where it's found to be OK. */
517 	if (count_word && mi.mi_result == SP_OK)
518 	{
519 	    count_common_word(mi.mi_lp->lp_slang, ptr,
520 						   (int)(mi.mi_end - ptr), 1);
521 	    count_word = FALSE;
522 	}
523     }
524 
525     if (mi.mi_result != SP_OK)
526     {
527 	/* If we found a number skip over it.  Allows for "42nd".  Do flag
528 	 * rare and local words, e.g., "3GPP". */
529 	if (nrlen > 0)
530 	{
531 	    if (mi.mi_result == SP_BAD || mi.mi_result == SP_BANNED)
532 		return nrlen;
533 	}
534 
535 	/* When we are at a non-word character there is no error, just
536 	 * skip over the character (try looking for a word after it). */
537 	else if (!spell_iswordp_nmw(ptr, wp))
538 	{
539 	    if (capcol != NULL && wp->w_s->b_cap_prog != NULL)
540 	    {
541 		regmatch_T	regmatch;
542 		int		r;
543 
544 		/* Check for end of sentence. */
545 		regmatch.regprog = wp->w_s->b_cap_prog;
546 		regmatch.rm_ic = FALSE;
547 		r = vim_regexec(&regmatch, ptr, 0);
548 		wp->w_s->b_cap_prog = regmatch.regprog;
549 		if (r)
550 		    *capcol = (int)(regmatch.endp[0] - ptr);
551 	    }
552 
553 	    if (has_mbyte)
554 		return (*mb_ptr2len)(ptr);
555 	    return 1;
556 	}
557 	else if (mi.mi_end == ptr)
558 	    /* Always include at least one character.  Required for when there
559 	     * is a mixup in "midword". */
560 	    MB_PTR_ADV(mi.mi_end);
561 	else if (mi.mi_result == SP_BAD
562 		&& LANGP_ENTRY(wp->w_s->b_langp, 0)->lp_slang->sl_nobreak)
563 	{
564 	    char_u	*p, *fp;
565 	    int		save_result = mi.mi_result;
566 
567 	    /* First language in 'spelllang' is NOBREAK.  Find first position
568 	     * at which any word would be valid. */
569 	    mi.mi_lp = LANGP_ENTRY(wp->w_s->b_langp, 0);
570 	    if (mi.mi_lp->lp_slang->sl_fidxs != NULL)
571 	    {
572 		p = mi.mi_word;
573 		fp = mi.mi_fword;
574 		for (;;)
575 		{
576 		    MB_PTR_ADV(p);
577 		    MB_PTR_ADV(fp);
578 		    if (p >= mi.mi_end)
579 			break;
580 		    mi.mi_compoff = (int)(fp - mi.mi_fword);
581 		    find_word(&mi, FIND_COMPOUND);
582 		    if (mi.mi_result != SP_BAD)
583 		    {
584 			mi.mi_end = p;
585 			break;
586 		    }
587 		}
588 		mi.mi_result = save_result;
589 	    }
590 	}
591 
592 	if (mi.mi_result == SP_BAD || mi.mi_result == SP_BANNED)
593 	    *attrp = HLF_SPB;
594 	else if (mi.mi_result == SP_RARE)
595 	    *attrp = HLF_SPR;
596 	else
597 	    *attrp = HLF_SPL;
598     }
599 
600     if (wrongcaplen > 0 && (mi.mi_result == SP_OK || mi.mi_result == SP_RARE))
601     {
602 	/* Report SpellCap only when the word isn't badly spelled. */
603 	*attrp = HLF_SPC;
604 	return wrongcaplen;
605     }
606 
607     return (int)(mi.mi_end - ptr);
608 }
609 
610 /*
611  * Check if the word at "mip->mi_word" is in the tree.
612  * When "mode" is FIND_FOLDWORD check in fold-case word tree.
613  * When "mode" is FIND_KEEPWORD check in keep-case word tree.
614  * When "mode" is FIND_PREFIX check for word after prefix in fold-case word
615  * tree.
616  *
617  * For a match mip->mi_result is updated.
618  */
619     static void
620 find_word(matchinf_T *mip, int mode)
621 {
622     idx_T	arridx = 0;
623     int		endlen[MAXWLEN];    /* length at possible word endings */
624     idx_T	endidx[MAXWLEN];    /* possible word endings */
625     int		endidxcnt = 0;
626     int		len;
627     int		wlen = 0;
628     int		flen;
629     int		c;
630     char_u	*ptr;
631     idx_T	lo, hi, m;
632     char_u	*s;
633     char_u	*p;
634     int		res = SP_BAD;
635     slang_T	*slang = mip->mi_lp->lp_slang;
636     unsigned	flags;
637     char_u	*byts;
638     idx_T	*idxs;
639     int		word_ends;
640     int		prefix_found;
641     int		nobreak_result;
642 
643     if (mode == FIND_KEEPWORD || mode == FIND_KEEPCOMPOUND)
644     {
645 	/* Check for word with matching case in keep-case tree. */
646 	ptr = mip->mi_word;
647 	flen = 9999;		    /* no case folding, always enough bytes */
648 	byts = slang->sl_kbyts;
649 	idxs = slang->sl_kidxs;
650 
651 	if (mode == FIND_KEEPCOMPOUND)
652 	    /* Skip over the previously found word(s). */
653 	    wlen += mip->mi_compoff;
654     }
655     else
656     {
657 	/* Check for case-folded in case-folded tree. */
658 	ptr = mip->mi_fword;
659 	flen = mip->mi_fwordlen;    /* available case-folded bytes */
660 	byts = slang->sl_fbyts;
661 	idxs = slang->sl_fidxs;
662 
663 	if (mode == FIND_PREFIX)
664 	{
665 	    /* Skip over the prefix. */
666 	    wlen = mip->mi_prefixlen;
667 	    flen -= mip->mi_prefixlen;
668 	}
669 	else if (mode == FIND_COMPOUND)
670 	{
671 	    /* Skip over the previously found word(s). */
672 	    wlen = mip->mi_compoff;
673 	    flen -= mip->mi_compoff;
674 	}
675 
676     }
677 
678     if (byts == NULL)
679 	return;			/* array is empty */
680 
681     /*
682      * Repeat advancing in the tree until:
683      * - there is a byte that doesn't match,
684      * - we reach the end of the tree,
685      * - or we reach the end of the line.
686      */
687     for (;;)
688     {
689 	if (flen <= 0 && *mip->mi_fend != NUL)
690 	    flen = fold_more(mip);
691 
692 	len = byts[arridx++];
693 
694 	/* If the first possible byte is a zero the word could end here.
695 	 * Remember this index, we first check for the longest word. */
696 	if (byts[arridx] == 0)
697 	{
698 	    if (endidxcnt == MAXWLEN)
699 	    {
700 		/* Must be a corrupted spell file. */
701 		emsg(_(e_format));
702 		return;
703 	    }
704 	    endlen[endidxcnt] = wlen;
705 	    endidx[endidxcnt++] = arridx++;
706 	    --len;
707 
708 	    /* Skip over the zeros, there can be several flag/region
709 	     * combinations. */
710 	    while (len > 0 && byts[arridx] == 0)
711 	    {
712 		++arridx;
713 		--len;
714 	    }
715 	    if (len == 0)
716 		break;	    /* no children, word must end here */
717 	}
718 
719 	/* Stop looking at end of the line. */
720 	if (ptr[wlen] == NUL)
721 	    break;
722 
723 	/* Perform a binary search in the list of accepted bytes. */
724 	c = ptr[wlen];
725 	if (c == TAB)	    /* <Tab> is handled like <Space> */
726 	    c = ' ';
727 	lo = arridx;
728 	hi = arridx + len - 1;
729 	while (lo < hi)
730 	{
731 	    m = (lo + hi) / 2;
732 	    if (byts[m] > c)
733 		hi = m - 1;
734 	    else if (byts[m] < c)
735 		lo = m + 1;
736 	    else
737 	    {
738 		lo = hi = m;
739 		break;
740 	    }
741 	}
742 
743 	/* Stop if there is no matching byte. */
744 	if (hi < lo || byts[lo] != c)
745 	    break;
746 
747 	/* Continue at the child (if there is one). */
748 	arridx = idxs[lo];
749 	++wlen;
750 	--flen;
751 
752 	/* One space in the good word may stand for several spaces in the
753 	 * checked word. */
754 	if (c == ' ')
755 	{
756 	    for (;;)
757 	    {
758 		if (flen <= 0 && *mip->mi_fend != NUL)
759 		    flen = fold_more(mip);
760 		if (ptr[wlen] != ' ' && ptr[wlen] != TAB)
761 		    break;
762 		++wlen;
763 		--flen;
764 	    }
765 	}
766     }
767 
768     /*
769      * Verify that one of the possible endings is valid.  Try the longest
770      * first.
771      */
772     while (endidxcnt > 0)
773     {
774 	--endidxcnt;
775 	arridx = endidx[endidxcnt];
776 	wlen = endlen[endidxcnt];
777 
778 	if ((*mb_head_off)(ptr, ptr + wlen) > 0)
779 	    continue;	    /* not at first byte of character */
780 	if (spell_iswordp(ptr + wlen, mip->mi_win))
781 	{
782 	    if (slang->sl_compprog == NULL && !slang->sl_nobreak)
783 		continue;	    /* next char is a word character */
784 	    word_ends = FALSE;
785 	}
786 	else
787 	    word_ends = TRUE;
788 	/* The prefix flag is before compound flags.  Once a valid prefix flag
789 	 * has been found we try compound flags. */
790 	prefix_found = FALSE;
791 
792 	if (mode != FIND_KEEPWORD && has_mbyte)
793 	{
794 	    /* Compute byte length in original word, length may change
795 	     * when folding case.  This can be slow, take a shortcut when the
796 	     * case-folded word is equal to the keep-case word. */
797 	    p = mip->mi_word;
798 	    if (STRNCMP(ptr, p, wlen) != 0)
799 	    {
800 		for (s = ptr; s < ptr + wlen; MB_PTR_ADV(s))
801 		    MB_PTR_ADV(p);
802 		wlen = (int)(p - mip->mi_word);
803 	    }
804 	}
805 
806 	/* Check flags and region.  For FIND_PREFIX check the condition and
807 	 * prefix ID.
808 	 * Repeat this if there are more flags/region alternatives until there
809 	 * is a match. */
810 	res = SP_BAD;
811 	for (len = byts[arridx - 1]; len > 0 && byts[arridx] == 0;
812 							      --len, ++arridx)
813 	{
814 	    flags = idxs[arridx];
815 
816 	    /* For the fold-case tree check that the case of the checked word
817 	     * matches with what the word in the tree requires.
818 	     * For keep-case tree the case is always right.  For prefixes we
819 	     * don't bother to check. */
820 	    if (mode == FIND_FOLDWORD)
821 	    {
822 		if (mip->mi_cend != mip->mi_word + wlen)
823 		{
824 		    /* mi_capflags was set for a different word length, need
825 		     * to do it again. */
826 		    mip->mi_cend = mip->mi_word + wlen;
827 		    mip->mi_capflags = captype(mip->mi_word, mip->mi_cend);
828 		}
829 
830 		if (mip->mi_capflags == WF_KEEPCAP
831 				|| !spell_valid_case(mip->mi_capflags, flags))
832 		    continue;
833 	    }
834 
835 	    /* When mode is FIND_PREFIX the word must support the prefix:
836 	     * check the prefix ID and the condition.  Do that for the list at
837 	     * mip->mi_prefarridx that find_prefix() filled. */
838 	    else if (mode == FIND_PREFIX && !prefix_found)
839 	    {
840 		c = valid_word_prefix(mip->mi_prefcnt, mip->mi_prefarridx,
841 				    flags,
842 				    mip->mi_word + mip->mi_cprefixlen, slang,
843 				    FALSE);
844 		if (c == 0)
845 		    continue;
846 
847 		/* Use the WF_RARE flag for a rare prefix. */
848 		if (c & WF_RAREPFX)
849 		    flags |= WF_RARE;
850 		prefix_found = TRUE;
851 	    }
852 
853 	    if (slang->sl_nobreak)
854 	    {
855 		if ((mode == FIND_COMPOUND || mode == FIND_KEEPCOMPOUND)
856 			&& (flags & WF_BANNED) == 0)
857 		{
858 		    /* NOBREAK: found a valid following word.  That's all we
859 		     * need to know, so return. */
860 		    mip->mi_result = SP_OK;
861 		    break;
862 		}
863 	    }
864 
865 	    else if ((mode == FIND_COMPOUND || mode == FIND_KEEPCOMPOUND
866 								|| !word_ends))
867 	    {
868 		/* If there is no compound flag or the word is shorter than
869 		 * COMPOUNDMIN reject it quickly.
870 		 * Makes you wonder why someone puts a compound flag on a word
871 		 * that's too short...  Myspell compatibility requires this
872 		 * anyway. */
873 		if (((unsigned)flags >> 24) == 0
874 			     || wlen - mip->mi_compoff < slang->sl_compminlen)
875 		    continue;
876 		/* For multi-byte chars check character length against
877 		 * COMPOUNDMIN. */
878 		if (has_mbyte
879 			&& slang->sl_compminlen > 0
880 			&& mb_charlen_len(mip->mi_word + mip->mi_compoff,
881 				wlen - mip->mi_compoff) < slang->sl_compminlen)
882 			continue;
883 
884 		/* Limit the number of compound words to COMPOUNDWORDMAX if no
885 		 * maximum for syllables is specified. */
886 		if (!word_ends && mip->mi_complen + mip->mi_compextra + 2
887 							   > slang->sl_compmax
888 					   && slang->sl_compsylmax == MAXWLEN)
889 		    continue;
890 
891 		/* Don't allow compounding on a side where an affix was added,
892 		 * unless COMPOUNDPERMITFLAG was used. */
893 		if (mip->mi_complen > 0 && (flags & WF_NOCOMPBEF))
894 		    continue;
895 		if (!word_ends && (flags & WF_NOCOMPAFT))
896 		    continue;
897 
898 		/* Quickly check if compounding is possible with this flag. */
899 		if (!byte_in_str(mip->mi_complen == 0
900 					? slang->sl_compstartflags
901 					: slang->sl_compallflags,
902 					    ((unsigned)flags >> 24)))
903 		    continue;
904 
905 		/* If there is a match with a CHECKCOMPOUNDPATTERN rule
906 		 * discard the compound word. */
907 		if (match_checkcompoundpattern(ptr, wlen, &slang->sl_comppat))
908 		    continue;
909 
910 		if (mode == FIND_COMPOUND)
911 		{
912 		    int	    capflags;
913 
914 		    /* Need to check the caps type of the appended compound
915 		     * word. */
916 		    if (has_mbyte && STRNCMP(ptr, mip->mi_word,
917 							mip->mi_compoff) != 0)
918 		    {
919 			/* case folding may have changed the length */
920 			p = mip->mi_word;
921 			for (s = ptr; s < ptr + mip->mi_compoff; MB_PTR_ADV(s))
922 			    MB_PTR_ADV(p);
923 		    }
924 		    else
925 			p = mip->mi_word + mip->mi_compoff;
926 		    capflags = captype(p, mip->mi_word + wlen);
927 		    if (capflags == WF_KEEPCAP || (capflags == WF_ALLCAP
928 						 && (flags & WF_FIXCAP) != 0))
929 			continue;
930 
931 		    if (capflags != WF_ALLCAP)
932 		    {
933 			/* When the character before the word is a word
934 			 * character we do not accept a Onecap word.  We do
935 			 * accept a no-caps word, even when the dictionary
936 			 * word specifies ONECAP. */
937 			MB_PTR_BACK(mip->mi_word, p);
938 			if (spell_iswordp_nmw(p, mip->mi_win)
939 				? capflags == WF_ONECAP
940 				: (flags & WF_ONECAP) != 0
941 						     && capflags != WF_ONECAP)
942 			    continue;
943 		    }
944 		}
945 
946 		/* If the word ends the sequence of compound flags of the
947 		 * words must match with one of the COMPOUNDRULE items and
948 		 * the number of syllables must not be too large. */
949 		mip->mi_compflags[mip->mi_complen] = ((unsigned)flags >> 24);
950 		mip->mi_compflags[mip->mi_complen + 1] = NUL;
951 		if (word_ends)
952 		{
953 		    char_u	fword[MAXWLEN];
954 
955 		    if (slang->sl_compsylmax < MAXWLEN)
956 		    {
957 			/* "fword" is only needed for checking syllables. */
958 			if (ptr == mip->mi_word)
959 			    (void)spell_casefold(ptr, wlen, fword, MAXWLEN);
960 			else
961 			    vim_strncpy(fword, ptr, endlen[endidxcnt]);
962 		    }
963 		    if (!can_compound(slang, fword, mip->mi_compflags))
964 			continue;
965 		}
966 		else if (slang->sl_comprules != NULL
967 			     && !match_compoundrule(slang, mip->mi_compflags))
968 		    /* The compound flags collected so far do not match any
969 		     * COMPOUNDRULE, discard the compounded word. */
970 		    continue;
971 	    }
972 
973 	    /* Check NEEDCOMPOUND: can't use word without compounding. */
974 	    else if (flags & WF_NEEDCOMP)
975 		continue;
976 
977 	    nobreak_result = SP_OK;
978 
979 	    if (!word_ends)
980 	    {
981 		int	save_result = mip->mi_result;
982 		char_u	*save_end = mip->mi_end;
983 		langp_T	*save_lp = mip->mi_lp;
984 		int	lpi;
985 
986 		/* Check that a valid word follows.  If there is one and we
987 		 * are compounding, it will set "mi_result", thus we are
988 		 * always finished here.  For NOBREAK we only check that a
989 		 * valid word follows.
990 		 * Recursive! */
991 		if (slang->sl_nobreak)
992 		    mip->mi_result = SP_BAD;
993 
994 		/* Find following word in case-folded tree. */
995 		mip->mi_compoff = endlen[endidxcnt];
996 		if (has_mbyte && mode == FIND_KEEPWORD)
997 		{
998 		    /* Compute byte length in case-folded word from "wlen":
999 		     * byte length in keep-case word.  Length may change when
1000 		     * folding case.  This can be slow, take a shortcut when
1001 		     * the case-folded word is equal to the keep-case word. */
1002 		    p = mip->mi_fword;
1003 		    if (STRNCMP(ptr, p, wlen) != 0)
1004 		    {
1005 			for (s = ptr; s < ptr + wlen; MB_PTR_ADV(s))
1006 			    MB_PTR_ADV(p);
1007 			mip->mi_compoff = (int)(p - mip->mi_fword);
1008 		    }
1009 		}
1010 #if 0 /* Disabled, see below */
1011 		c = mip->mi_compoff;
1012 #endif
1013 		++mip->mi_complen;
1014 		if (flags & WF_COMPROOT)
1015 		    ++mip->mi_compextra;
1016 
1017 		/* For NOBREAK we need to try all NOBREAK languages, at least
1018 		 * to find the ".add" file(s). */
1019 		for (lpi = 0; lpi < mip->mi_win->w_s->b_langp.ga_len; ++lpi)
1020 		{
1021 		    if (slang->sl_nobreak)
1022 		    {
1023 			mip->mi_lp = LANGP_ENTRY(mip->mi_win->w_s->b_langp, lpi);
1024 			if (mip->mi_lp->lp_slang->sl_fidxs == NULL
1025 					 || !mip->mi_lp->lp_slang->sl_nobreak)
1026 			    continue;
1027 		    }
1028 
1029 		    find_word(mip, FIND_COMPOUND);
1030 
1031 		    /* When NOBREAK any word that matches is OK.  Otherwise we
1032 		     * need to find the longest match, thus try with keep-case
1033 		     * and prefix too. */
1034 		    if (!slang->sl_nobreak || mip->mi_result == SP_BAD)
1035 		    {
1036 			/* Find following word in keep-case tree. */
1037 			mip->mi_compoff = wlen;
1038 			find_word(mip, FIND_KEEPCOMPOUND);
1039 
1040 #if 0	    /* Disabled, a prefix must not appear halfway a compound word,
1041 	       unless the COMPOUNDPERMITFLAG is used and then it can't be a
1042 	       postponed prefix. */
1043 			if (!slang->sl_nobreak || mip->mi_result == SP_BAD)
1044 			{
1045 			    /* Check for following word with prefix. */
1046 			    mip->mi_compoff = c;
1047 			    find_prefix(mip, FIND_COMPOUND);
1048 			}
1049 #endif
1050 		    }
1051 
1052 		    if (!slang->sl_nobreak)
1053 			break;
1054 		}
1055 		--mip->mi_complen;
1056 		if (flags & WF_COMPROOT)
1057 		    --mip->mi_compextra;
1058 		mip->mi_lp = save_lp;
1059 
1060 		if (slang->sl_nobreak)
1061 		{
1062 		    nobreak_result = mip->mi_result;
1063 		    mip->mi_result = save_result;
1064 		    mip->mi_end = save_end;
1065 		}
1066 		else
1067 		{
1068 		    if (mip->mi_result == SP_OK)
1069 			break;
1070 		    continue;
1071 		}
1072 	    }
1073 
1074 	    if (flags & WF_BANNED)
1075 		res = SP_BANNED;
1076 	    else if (flags & WF_REGION)
1077 	    {
1078 		/* Check region. */
1079 		if ((mip->mi_lp->lp_region & (flags >> 16)) != 0)
1080 		    res = SP_OK;
1081 		else
1082 		    res = SP_LOCAL;
1083 	    }
1084 	    else if (flags & WF_RARE)
1085 		res = SP_RARE;
1086 	    else
1087 		res = SP_OK;
1088 
1089 	    /* Always use the longest match and the best result.  For NOBREAK
1090 	     * we separately keep the longest match without a following good
1091 	     * word as a fall-back. */
1092 	    if (nobreak_result == SP_BAD)
1093 	    {
1094 		if (mip->mi_result2 > res)
1095 		{
1096 		    mip->mi_result2 = res;
1097 		    mip->mi_end2 = mip->mi_word + wlen;
1098 		}
1099 		else if (mip->mi_result2 == res
1100 					&& mip->mi_end2 < mip->mi_word + wlen)
1101 		    mip->mi_end2 = mip->mi_word + wlen;
1102 	    }
1103 	    else if (mip->mi_result > res)
1104 	    {
1105 		mip->mi_result = res;
1106 		mip->mi_end = mip->mi_word + wlen;
1107 	    }
1108 	    else if (mip->mi_result == res && mip->mi_end < mip->mi_word + wlen)
1109 		mip->mi_end = mip->mi_word + wlen;
1110 
1111 	    if (mip->mi_result == SP_OK)
1112 		break;
1113 	}
1114 
1115 	if (mip->mi_result == SP_OK)
1116 	    break;
1117     }
1118 }
1119 
1120 /*
1121  * Return TRUE if there is a match between the word ptr[wlen] and
1122  * CHECKCOMPOUNDPATTERN rules, assuming that we will concatenate with another
1123  * word.
1124  * A match means that the first part of CHECKCOMPOUNDPATTERN matches at the
1125  * end of ptr[wlen] and the second part matches after it.
1126  */
1127     static int
1128 match_checkcompoundpattern(
1129     char_u	*ptr,
1130     int		wlen,
1131     garray_T	*gap)  /* &sl_comppat */
1132 {
1133     int		i;
1134     char_u	*p;
1135     int		len;
1136 
1137     for (i = 0; i + 1 < gap->ga_len; i += 2)
1138     {
1139 	p = ((char_u **)gap->ga_data)[i + 1];
1140 	if (STRNCMP(ptr + wlen, p, STRLEN(p)) == 0)
1141 	{
1142 	    /* Second part matches at start of following compound word, now
1143 	     * check if first part matches at end of previous word. */
1144 	    p = ((char_u **)gap->ga_data)[i];
1145 	    len = (int)STRLEN(p);
1146 	    if (len <= wlen && STRNCMP(ptr + wlen - len, p, len) == 0)
1147 		return TRUE;
1148 	}
1149     }
1150     return FALSE;
1151 }
1152 
1153 /*
1154  * Return TRUE if "flags" is a valid sequence of compound flags and "word"
1155  * does not have too many syllables.
1156  */
1157     static int
1158 can_compound(slang_T *slang, char_u *word, char_u *flags)
1159 {
1160     char_u	uflags[MAXWLEN * 2];
1161     int		i;
1162     char_u	*p;
1163 
1164     if (slang->sl_compprog == NULL)
1165 	return FALSE;
1166     if (enc_utf8)
1167     {
1168 	/* Need to convert the single byte flags to utf8 characters. */
1169 	p = uflags;
1170 	for (i = 0; flags[i] != NUL; ++i)
1171 	    p += utf_char2bytes(flags[i], p);
1172 	*p = NUL;
1173 	p = uflags;
1174     }
1175     else
1176 	p = flags;
1177     if (!vim_regexec_prog(&slang->sl_compprog, FALSE, p, 0))
1178 	return FALSE;
1179 
1180     /* Count the number of syllables.  This may be slow, do it last.  If there
1181      * are too many syllables AND the number of compound words is above
1182      * COMPOUNDWORDMAX then compounding is not allowed. */
1183     if (slang->sl_compsylmax < MAXWLEN
1184 		       && count_syllables(slang, word) > slang->sl_compsylmax)
1185 	return (int)STRLEN(flags) < slang->sl_compmax;
1186     return TRUE;
1187 }
1188 
1189 /*
1190  * Return TRUE when the sequence of flags in "compflags" plus "flag" can
1191  * possibly form a valid compounded word.  This also checks the COMPOUNDRULE
1192  * lines if they don't contain wildcards.
1193  */
1194     static int
1195 can_be_compound(
1196     trystate_T	*sp,
1197     slang_T	*slang,
1198     char_u	*compflags,
1199     int		flag)
1200 {
1201     /* If the flag doesn't appear in sl_compstartflags or sl_compallflags
1202      * then it can't possibly compound. */
1203     if (!byte_in_str(sp->ts_complen == sp->ts_compsplit
1204 		? slang->sl_compstartflags : slang->sl_compallflags, flag))
1205 	return FALSE;
1206 
1207     /* If there are no wildcards, we can check if the flags collected so far
1208      * possibly can form a match with COMPOUNDRULE patterns.  This only
1209      * makes sense when we have two or more words. */
1210     if (slang->sl_comprules != NULL && sp->ts_complen > sp->ts_compsplit)
1211     {
1212 	int v;
1213 
1214 	compflags[sp->ts_complen] = flag;
1215 	compflags[sp->ts_complen + 1] = NUL;
1216 	v = match_compoundrule(slang, compflags + sp->ts_compsplit);
1217 	compflags[sp->ts_complen] = NUL;
1218 	return v;
1219     }
1220 
1221     return TRUE;
1222 }
1223 
1224 
1225 /*
1226  * Return TRUE if the compound flags in compflags[] match the start of any
1227  * compound rule.  This is used to stop trying a compound if the flags
1228  * collected so far can't possibly match any compound rule.
1229  * Caller must check that slang->sl_comprules is not NULL.
1230  */
1231     static int
1232 match_compoundrule(slang_T *slang, char_u *compflags)
1233 {
1234     char_u	*p;
1235     int		i;
1236     int		c;
1237 
1238     /* loop over all the COMPOUNDRULE entries */
1239     for (p = slang->sl_comprules; *p != NUL; ++p)
1240     {
1241 	/* loop over the flags in the compound word we have made, match
1242 	 * them against the current rule entry */
1243 	for (i = 0; ; ++i)
1244 	{
1245 	    c = compflags[i];
1246 	    if (c == NUL)
1247 		/* found a rule that matches for the flags we have so far */
1248 		return TRUE;
1249 	    if (*p == '/' || *p == NUL)
1250 		break;  /* end of rule, it's too short */
1251 	    if (*p == '[')
1252 	    {
1253 		int match = FALSE;
1254 
1255 		/* compare against all the flags in [] */
1256 		++p;
1257 		while (*p != ']' && *p != NUL)
1258 		    if (*p++ == c)
1259 			match = TRUE;
1260 		if (!match)
1261 		    break;  /* none matches */
1262 	    }
1263 	    else if (*p != c)
1264 		break;  /* flag of word doesn't match flag in pattern */
1265 	    ++p;
1266 	}
1267 
1268 	/* Skip to the next "/", where the next pattern starts. */
1269 	p = vim_strchr(p, '/');
1270 	if (p == NULL)
1271 	    break;
1272     }
1273 
1274     /* Checked all the rules and none of them match the flags, so there
1275      * can't possibly be a compound starting with these flags. */
1276     return FALSE;
1277 }
1278 
1279 /*
1280  * Return non-zero if the prefix indicated by "arridx" matches with the prefix
1281  * ID in "flags" for the word "word".
1282  * The WF_RAREPFX flag is included in the return value for a rare prefix.
1283  */
1284     static int
1285 valid_word_prefix(
1286     int		totprefcnt,	/* nr of prefix IDs */
1287     int		arridx,		/* idx in sl_pidxs[] */
1288     int		flags,
1289     char_u	*word,
1290     slang_T	*slang,
1291     int		cond_req)	/* only use prefixes with a condition */
1292 {
1293     int		prefcnt;
1294     int		pidx;
1295     regprog_T	**rp;
1296     int		prefid;
1297 
1298     prefid = (unsigned)flags >> 24;
1299     for (prefcnt = totprefcnt - 1; prefcnt >= 0; --prefcnt)
1300     {
1301 	pidx = slang->sl_pidxs[arridx + prefcnt];
1302 
1303 	/* Check the prefix ID. */
1304 	if (prefid != (pidx & 0xff))
1305 	    continue;
1306 
1307 	/* Check if the prefix doesn't combine and the word already has a
1308 	 * suffix. */
1309 	if ((flags & WF_HAS_AFF) && (pidx & WF_PFX_NC))
1310 	    continue;
1311 
1312 	/* Check the condition, if there is one.  The condition index is
1313 	 * stored in the two bytes above the prefix ID byte.  */
1314 	rp = &slang->sl_prefprog[((unsigned)pidx >> 8) & 0xffff];
1315 	if (*rp != NULL)
1316 	{
1317 	    if (!vim_regexec_prog(rp, FALSE, word, 0))
1318 		continue;
1319 	}
1320 	else if (cond_req)
1321 	    continue;
1322 
1323 	/* It's a match!  Return the WF_ flags. */
1324 	return pidx;
1325     }
1326     return 0;
1327 }
1328 
1329 /*
1330  * Check if the word at "mip->mi_word" has a matching prefix.
1331  * If it does, then check the following word.
1332  *
1333  * If "mode" is "FIND_COMPOUND" then do the same after another word, find a
1334  * prefix in a compound word.
1335  *
1336  * For a match mip->mi_result is updated.
1337  */
1338     static void
1339 find_prefix(matchinf_T *mip, int mode)
1340 {
1341     idx_T	arridx = 0;
1342     int		len;
1343     int		wlen = 0;
1344     int		flen;
1345     int		c;
1346     char_u	*ptr;
1347     idx_T	lo, hi, m;
1348     slang_T	*slang = mip->mi_lp->lp_slang;
1349     char_u	*byts;
1350     idx_T	*idxs;
1351 
1352     byts = slang->sl_pbyts;
1353     if (byts == NULL)
1354 	return;			/* array is empty */
1355 
1356     /* We use the case-folded word here, since prefixes are always
1357      * case-folded. */
1358     ptr = mip->mi_fword;
1359     flen = mip->mi_fwordlen;    /* available case-folded bytes */
1360     if (mode == FIND_COMPOUND)
1361     {
1362 	/* Skip over the previously found word(s). */
1363 	ptr += mip->mi_compoff;
1364 	flen -= mip->mi_compoff;
1365     }
1366     idxs = slang->sl_pidxs;
1367 
1368     /*
1369      * Repeat advancing in the tree until:
1370      * - there is a byte that doesn't match,
1371      * - we reach the end of the tree,
1372      * - or we reach the end of the line.
1373      */
1374     for (;;)
1375     {
1376 	if (flen == 0 && *mip->mi_fend != NUL)
1377 	    flen = fold_more(mip);
1378 
1379 	len = byts[arridx++];
1380 
1381 	/* If the first possible byte is a zero the prefix could end here.
1382 	 * Check if the following word matches and supports the prefix. */
1383 	if (byts[arridx] == 0)
1384 	{
1385 	    /* There can be several prefixes with different conditions.  We
1386 	     * try them all, since we don't know which one will give the
1387 	     * longest match.  The word is the same each time, pass the list
1388 	     * of possible prefixes to find_word(). */
1389 	    mip->mi_prefarridx = arridx;
1390 	    mip->mi_prefcnt = len;
1391 	    while (len > 0 && byts[arridx] == 0)
1392 	    {
1393 		++arridx;
1394 		--len;
1395 	    }
1396 	    mip->mi_prefcnt -= len;
1397 
1398 	    /* Find the word that comes after the prefix. */
1399 	    mip->mi_prefixlen = wlen;
1400 	    if (mode == FIND_COMPOUND)
1401 		/* Skip over the previously found word(s). */
1402 		mip->mi_prefixlen += mip->mi_compoff;
1403 
1404 	    if (has_mbyte)
1405 	    {
1406 		/* Case-folded length may differ from original length. */
1407 		mip->mi_cprefixlen = nofold_len(mip->mi_fword,
1408 					     mip->mi_prefixlen, mip->mi_word);
1409 	    }
1410 	    else
1411 		mip->mi_cprefixlen = mip->mi_prefixlen;
1412 	    find_word(mip, FIND_PREFIX);
1413 
1414 
1415 	    if (len == 0)
1416 		break;	    /* no children, word must end here */
1417 	}
1418 
1419 	/* Stop looking at end of the line. */
1420 	if (ptr[wlen] == NUL)
1421 	    break;
1422 
1423 	/* Perform a binary search in the list of accepted bytes. */
1424 	c = ptr[wlen];
1425 	lo = arridx;
1426 	hi = arridx + len - 1;
1427 	while (lo < hi)
1428 	{
1429 	    m = (lo + hi) / 2;
1430 	    if (byts[m] > c)
1431 		hi = m - 1;
1432 	    else if (byts[m] < c)
1433 		lo = m + 1;
1434 	    else
1435 	    {
1436 		lo = hi = m;
1437 		break;
1438 	    }
1439 	}
1440 
1441 	/* Stop if there is no matching byte. */
1442 	if (hi < lo || byts[lo] != c)
1443 	    break;
1444 
1445 	/* Continue at the child (if there is one). */
1446 	arridx = idxs[lo];
1447 	++wlen;
1448 	--flen;
1449     }
1450 }
1451 
1452 /*
1453  * Need to fold at least one more character.  Do until next non-word character
1454  * for efficiency.  Include the non-word character too.
1455  * Return the length of the folded chars in bytes.
1456  */
1457     static int
1458 fold_more(matchinf_T *mip)
1459 {
1460     int		flen;
1461     char_u	*p;
1462 
1463     p = mip->mi_fend;
1464     do
1465 	MB_PTR_ADV(mip->mi_fend);
1466     while (*mip->mi_fend != NUL && spell_iswordp(mip->mi_fend, mip->mi_win));
1467 
1468     /* Include the non-word character so that we can check for the word end. */
1469     if (*mip->mi_fend != NUL)
1470 	MB_PTR_ADV(mip->mi_fend);
1471 
1472     (void)spell_casefold(p, (int)(mip->mi_fend - p),
1473 			     mip->mi_fword + mip->mi_fwordlen,
1474 			     MAXWLEN - mip->mi_fwordlen);
1475     flen = (int)STRLEN(mip->mi_fword + mip->mi_fwordlen);
1476     mip->mi_fwordlen += flen;
1477     return flen;
1478 }
1479 
1480 /*
1481  * Check case flags for a word.  Return TRUE if the word has the requested
1482  * case.
1483  */
1484     static int
1485 spell_valid_case(
1486     int	    wordflags,	    /* flags for the checked word. */
1487     int	    treeflags)	    /* flags for the word in the spell tree */
1488 {
1489     return ((wordflags == WF_ALLCAP && (treeflags & WF_FIXCAP) == 0)
1490 	    || ((treeflags & (WF_ALLCAP | WF_KEEPCAP)) == 0
1491 		&& ((treeflags & WF_ONECAP) == 0
1492 					   || (wordflags & WF_ONECAP) != 0)));
1493 }
1494 
1495 /*
1496  * Return TRUE if spell checking is not enabled.
1497  */
1498     static int
1499 no_spell_checking(win_T *wp)
1500 {
1501     if (!wp->w_p_spell || *wp->w_s->b_p_spl == NUL
1502 					 || wp->w_s->b_langp.ga_len == 0)
1503     {
1504 	emsg(_("E756: Spell checking is not enabled"));
1505 	return TRUE;
1506     }
1507     return FALSE;
1508 }
1509 
1510 /*
1511  * Move to next spell error.
1512  * "curline" is FALSE for "[s", "]s", "[S" and "]S".
1513  * "curline" is TRUE to find word under/after cursor in the same line.
1514  * For Insert mode completion "dir" is BACKWARD and "curline" is TRUE: move
1515  * to after badly spelled word before the cursor.
1516  * Return 0 if not found, length of the badly spelled word otherwise.
1517  */
1518     int
1519 spell_move_to(
1520     win_T	*wp,
1521     int		dir,		/* FORWARD or BACKWARD */
1522     int		allwords,	/* TRUE for "[s"/"]s", FALSE for "[S"/"]S" */
1523     int		curline,
1524     hlf_T	*attrp)		/* return: attributes of bad word or NULL
1525 				   (only when "dir" is FORWARD) */
1526 {
1527     linenr_T	lnum;
1528     pos_T	found_pos;
1529     int		found_len = 0;
1530     char_u	*line;
1531     char_u	*p;
1532     char_u	*endp;
1533     hlf_T	attr;
1534     int		len;
1535 #ifdef FEAT_SYN_HL
1536     int		has_syntax = syntax_present(wp);
1537 #endif
1538     int		col;
1539     int		can_spell;
1540     char_u	*buf = NULL;
1541     int		buflen = 0;
1542     int		skip = 0;
1543     int		capcol = -1;
1544     int		found_one = FALSE;
1545     int		wrapped = FALSE;
1546 
1547     if (no_spell_checking(wp))
1548 	return 0;
1549 
1550     /*
1551      * Start looking for bad word at the start of the line, because we can't
1552      * start halfway a word, we don't know where it starts or ends.
1553      *
1554      * When searching backwards, we continue in the line to find the last
1555      * bad word (in the cursor line: before the cursor).
1556      *
1557      * We concatenate the start of the next line, so that wrapped words work
1558      * (e.g. "et<line-break>cetera").  Doesn't work when searching backwards
1559      * though...
1560      */
1561     lnum = wp->w_cursor.lnum;
1562     CLEAR_POS(&found_pos);
1563 
1564     while (!got_int)
1565     {
1566 	line = ml_get_buf(wp->w_buffer, lnum, FALSE);
1567 
1568 	len = (int)STRLEN(line);
1569 	if (buflen < len + MAXWLEN + 2)
1570 	{
1571 	    vim_free(buf);
1572 	    buflen = len + MAXWLEN + 2;
1573 	    buf = alloc(buflen);
1574 	    if (buf == NULL)
1575 		break;
1576 	}
1577 
1578 	/* In first line check first word for Capital. */
1579 	if (lnum == 1)
1580 	    capcol = 0;
1581 
1582 	/* For checking first word with a capital skip white space. */
1583 	if (capcol == 0)
1584 	    capcol = getwhitecols(line);
1585 	else if (curline && wp == curwin)
1586 	{
1587 	    /* For spellbadword(): check if first word needs a capital. */
1588 	    col = getwhitecols(line);
1589 	    if (check_need_cap(lnum, col))
1590 		capcol = col;
1591 
1592 	    /* Need to get the line again, may have looked at the previous
1593 	     * one. */
1594 	    line = ml_get_buf(wp->w_buffer, lnum, FALSE);
1595 	}
1596 
1597 	/* Copy the line into "buf" and append the start of the next line if
1598 	 * possible. */
1599 	STRCPY(buf, line);
1600 	if (lnum < wp->w_buffer->b_ml.ml_line_count)
1601 	    spell_cat_line(buf + STRLEN(buf),
1602 			  ml_get_buf(wp->w_buffer, lnum + 1, FALSE), MAXWLEN);
1603 
1604 	p = buf + skip;
1605 	endp = buf + len;
1606 	while (p < endp)
1607 	{
1608 	    /* When searching backward don't search after the cursor.  Unless
1609 	     * we wrapped around the end of the buffer. */
1610 	    if (dir == BACKWARD
1611 		    && lnum == wp->w_cursor.lnum
1612 		    && !wrapped
1613 		    && (colnr_T)(p - buf) >= wp->w_cursor.col)
1614 		break;
1615 
1616 	    /* start of word */
1617 	    attr = HLF_COUNT;
1618 	    len = spell_check(wp, p, &attr, &capcol, FALSE);
1619 
1620 	    if (attr != HLF_COUNT)
1621 	    {
1622 		/* We found a bad word.  Check the attribute. */
1623 		if (allwords || attr == HLF_SPB)
1624 		{
1625 		    /* When searching forward only accept a bad word after
1626 		     * the cursor. */
1627 		    if (dir == BACKWARD
1628 			    || lnum != wp->w_cursor.lnum
1629 			    || (lnum == wp->w_cursor.lnum
1630 				&& (wrapped
1631 				    || (colnr_T)(curline ? p - buf + len
1632 						     : p - buf)
1633 						  > wp->w_cursor.col)))
1634 		    {
1635 #ifdef FEAT_SYN_HL
1636 			if (has_syntax)
1637 			{
1638 			    col = (int)(p - buf);
1639 			    (void)syn_get_id(wp, lnum, (colnr_T)col,
1640 						    FALSE, &can_spell, FALSE);
1641 			    if (!can_spell)
1642 				attr = HLF_COUNT;
1643 			}
1644 			else
1645 #endif
1646 			    can_spell = TRUE;
1647 
1648 			if (can_spell)
1649 			{
1650 			    found_one = TRUE;
1651 			    found_pos.lnum = lnum;
1652 			    found_pos.col = (int)(p - buf);
1653 			    found_pos.coladd = 0;
1654 			    if (dir == FORWARD)
1655 			    {
1656 				/* No need to search further. */
1657 				wp->w_cursor = found_pos;
1658 				vim_free(buf);
1659 				if (attrp != NULL)
1660 				    *attrp = attr;
1661 				return len;
1662 			    }
1663 			    else if (curline)
1664 				/* Insert mode completion: put cursor after
1665 				 * the bad word. */
1666 				found_pos.col += len;
1667 			    found_len = len;
1668 			}
1669 		    }
1670 		    else
1671 			found_one = TRUE;
1672 		}
1673 	    }
1674 
1675 	    /* advance to character after the word */
1676 	    p += len;
1677 	    capcol -= len;
1678 	}
1679 
1680 	if (dir == BACKWARD && found_pos.lnum != 0)
1681 	{
1682 	    /* Use the last match in the line (before the cursor). */
1683 	    wp->w_cursor = found_pos;
1684 	    vim_free(buf);
1685 	    return found_len;
1686 	}
1687 
1688 	if (curline)
1689 	    break;	/* only check cursor line */
1690 
1691 	/* If we are back at the starting line and searched it again there
1692 	 * is no match, give up. */
1693 	if (lnum == wp->w_cursor.lnum && wrapped)
1694 	    break;
1695 
1696 	/* Advance to next line. */
1697 	if (dir == BACKWARD)
1698 	{
1699 	    if (lnum > 1)
1700 		--lnum;
1701 	    else if (!p_ws)
1702 		break;	    /* at first line and 'nowrapscan' */
1703 	    else
1704 	    {
1705 		/* Wrap around to the end of the buffer.  May search the
1706 		 * starting line again and accept the last match. */
1707 		lnum = wp->w_buffer->b_ml.ml_line_count;
1708 		wrapped = TRUE;
1709 		if (!shortmess(SHM_SEARCH))
1710 		    give_warning((char_u *)_(top_bot_msg), TRUE);
1711 	    }
1712 	    capcol = -1;
1713 	}
1714 	else
1715 	{
1716 	    if (lnum < wp->w_buffer->b_ml.ml_line_count)
1717 		++lnum;
1718 	    else if (!p_ws)
1719 		break;	    /* at first line and 'nowrapscan' */
1720 	    else
1721 	    {
1722 		/* Wrap around to the start of the buffer.  May search the
1723 		 * starting line again and accept the first match. */
1724 		lnum = 1;
1725 		wrapped = TRUE;
1726 		if (!shortmess(SHM_SEARCH))
1727 		    give_warning((char_u *)_(bot_top_msg), TRUE);
1728 	    }
1729 
1730 	    /* If we are back at the starting line and there is no match then
1731 	     * give up. */
1732 	    if (lnum == wp->w_cursor.lnum && !found_one)
1733 		break;
1734 
1735 	    /* Skip the characters at the start of the next line that were
1736 	     * included in a match crossing line boundaries. */
1737 	    if (attr == HLF_COUNT)
1738 		skip = (int)(p - endp);
1739 	    else
1740 		skip = 0;
1741 
1742 	    /* Capcol skips over the inserted space. */
1743 	    --capcol;
1744 
1745 	    /* But after empty line check first word in next line */
1746 	    if (*skipwhite(line) == NUL)
1747 		capcol = 0;
1748 	}
1749 
1750 	line_breakcheck();
1751     }
1752 
1753     vim_free(buf);
1754     return 0;
1755 }
1756 
1757 /*
1758  * For spell checking: concatenate the start of the following line "line" into
1759  * "buf", blanking-out special characters.  Copy less then "maxlen" bytes.
1760  * Keep the blanks at the start of the next line, this is used in win_line()
1761  * to skip those bytes if the word was OK.
1762  */
1763     void
1764 spell_cat_line(char_u *buf, char_u *line, int maxlen)
1765 {
1766     char_u	*p;
1767     int		n;
1768 
1769     p = skipwhite(line);
1770     while (vim_strchr((char_u *)"*#/\"\t", *p) != NULL)
1771 	p = skipwhite(p + 1);
1772 
1773     if (*p != NUL)
1774     {
1775 	/* Only worth concatenating if there is something else than spaces to
1776 	 * concatenate. */
1777 	n = (int)(p - line) + 1;
1778 	if (n < maxlen - 1)
1779 	{
1780 	    vim_memset(buf, ' ', n);
1781 	    vim_strncpy(buf +  n, p, maxlen - 1 - n);
1782 	}
1783     }
1784 }
1785 
1786 /*
1787  * Structure used for the cookie argument of do_in_runtimepath().
1788  */
1789 typedef struct spelload_S
1790 {
1791     char_u  sl_lang[MAXWLEN + 1];	/* language name */
1792     slang_T *sl_slang;			/* resulting slang_T struct */
1793     int	    sl_nobreak;			/* NOBREAK language found */
1794 } spelload_T;
1795 
1796 /*
1797  * Load word list(s) for "lang" from Vim spell file(s).
1798  * "lang" must be the language without the region: e.g., "en".
1799  */
1800     static void
1801 spell_load_lang(char_u *lang)
1802 {
1803     char_u	fname_enc[85];
1804     int		r;
1805     spelload_T	sl;
1806     int		round;
1807 
1808     /* Copy the language name to pass it to spell_load_cb() as a cookie.
1809      * It's truncated when an error is detected. */
1810     STRCPY(sl.sl_lang, lang);
1811     sl.sl_slang = NULL;
1812     sl.sl_nobreak = FALSE;
1813 
1814     /* We may retry when no spell file is found for the language, an
1815      * autocommand may load it then. */
1816     for (round = 1; round <= 2; ++round)
1817     {
1818 	/*
1819 	 * Find the first spell file for "lang" in 'runtimepath' and load it.
1820 	 */
1821 	vim_snprintf((char *)fname_enc, sizeof(fname_enc) - 5,
1822 #ifdef VMS
1823 					"spell/%s_%s.spl",
1824 #else
1825 					"spell/%s.%s.spl",
1826 #endif
1827 							   lang, spell_enc());
1828 	r = do_in_runtimepath(fname_enc, 0, spell_load_cb, &sl);
1829 
1830 	if (r == FAIL && *sl.sl_lang != NUL)
1831 	{
1832 	    /* Try loading the ASCII version. */
1833 	    vim_snprintf((char *)fname_enc, sizeof(fname_enc) - 5,
1834 #ifdef VMS
1835 						  "spell/%s_ascii.spl",
1836 #else
1837 						  "spell/%s.ascii.spl",
1838 #endif
1839 									lang);
1840 	    r = do_in_runtimepath(fname_enc, 0, spell_load_cb, &sl);
1841 
1842 	    if (r == FAIL && *sl.sl_lang != NUL && round == 1
1843 		    && apply_autocmds(EVENT_SPELLFILEMISSING, lang,
1844 					      curbuf->b_fname, FALSE, curbuf))
1845 		continue;
1846 	    break;
1847 	}
1848 	break;
1849     }
1850 
1851     if (r == FAIL)
1852     {
1853 	smsg(
1854 #ifdef VMS
1855 	_("Warning: Cannot find word list \"%s_%s.spl\" or \"%s_ascii.spl\""),
1856 #else
1857 	_("Warning: Cannot find word list \"%s.%s.spl\" or \"%s.ascii.spl\""),
1858 #endif
1859 						     lang, spell_enc(), lang);
1860     }
1861     else if (sl.sl_slang != NULL)
1862     {
1863 	/* At least one file was loaded, now load ALL the additions. */
1864 	STRCPY(fname_enc + STRLEN(fname_enc) - 3, "add.spl");
1865 	do_in_runtimepath(fname_enc, DIP_ALL, spell_load_cb, &sl);
1866     }
1867 }
1868 
1869 /*
1870  * Return the encoding used for spell checking: Use 'encoding', except that we
1871  * use "latin1" for "latin9".  And limit to 60 characters (just in case).
1872  */
1873     char_u *
1874 spell_enc(void)
1875 {
1876 
1877     if (STRLEN(p_enc) < 60 && STRCMP(p_enc, "iso-8859-15") != 0)
1878 	return p_enc;
1879     return (char_u *)"latin1";
1880 }
1881 
1882 /*
1883  * Get the name of the .spl file for the internal wordlist into
1884  * "fname[MAXPATHL]".
1885  */
1886     static void
1887 int_wordlist_spl(char_u *fname)
1888 {
1889     vim_snprintf((char *)fname, MAXPATHL, SPL_FNAME_TMPL,
1890 						  int_wordlist, spell_enc());
1891 }
1892 
1893 /*
1894  * Allocate a new slang_T for language "lang".  "lang" can be NULL.
1895  * Caller must fill "sl_next".
1896  */
1897     slang_T *
1898 slang_alloc(char_u *lang)
1899 {
1900     slang_T *lp;
1901 
1902     lp = (slang_T *)alloc_clear(sizeof(slang_T));
1903     if (lp != NULL)
1904     {
1905 	if (lang != NULL)
1906 	    lp->sl_name = vim_strsave(lang);
1907 	ga_init2(&lp->sl_rep, sizeof(fromto_T), 10);
1908 	ga_init2(&lp->sl_repsal, sizeof(fromto_T), 10);
1909 	lp->sl_compmax = MAXWLEN;
1910 	lp->sl_compsylmax = MAXWLEN;
1911 	hash_init(&lp->sl_wordcount);
1912     }
1913 
1914     return lp;
1915 }
1916 
1917 /*
1918  * Free the contents of an slang_T and the structure itself.
1919  */
1920     void
1921 slang_free(slang_T *lp)
1922 {
1923     vim_free(lp->sl_name);
1924     vim_free(lp->sl_fname);
1925     slang_clear(lp);
1926     vim_free(lp);
1927 }
1928 
1929 /*
1930  * Clear an slang_T so that the file can be reloaded.
1931  */
1932     void
1933 slang_clear(slang_T *lp)
1934 {
1935     garray_T	*gap;
1936     fromto_T	*ftp;
1937     salitem_T	*smp;
1938     int		i;
1939     int		round;
1940 
1941     VIM_CLEAR(lp->sl_fbyts);
1942     VIM_CLEAR(lp->sl_kbyts);
1943     VIM_CLEAR(lp->sl_pbyts);
1944 
1945     VIM_CLEAR(lp->sl_fidxs);
1946     VIM_CLEAR(lp->sl_kidxs);
1947     VIM_CLEAR(lp->sl_pidxs);
1948 
1949     for (round = 1; round <= 2; ++round)
1950     {
1951 	gap = round == 1 ? &lp->sl_rep : &lp->sl_repsal;
1952 	while (gap->ga_len > 0)
1953 	{
1954 	    ftp = &((fromto_T *)gap->ga_data)[--gap->ga_len];
1955 	    vim_free(ftp->ft_from);
1956 	    vim_free(ftp->ft_to);
1957 	}
1958 	ga_clear(gap);
1959     }
1960 
1961     gap = &lp->sl_sal;
1962     if (lp->sl_sofo)
1963     {
1964 	/* "ga_len" is set to 1 without adding an item for latin1 */
1965 	if (gap->ga_data != NULL)
1966 	    /* SOFOFROM and SOFOTO items: free lists of wide characters. */
1967 	    for (i = 0; i < gap->ga_len; ++i)
1968 		vim_free(((int **)gap->ga_data)[i]);
1969     }
1970     else
1971 	/* SAL items: free salitem_T items */
1972 	while (gap->ga_len > 0)
1973 	{
1974 	    smp = &((salitem_T *)gap->ga_data)[--gap->ga_len];
1975 	    vim_free(smp->sm_lead);
1976 	    /* Don't free sm_oneof and sm_rules, they point into sm_lead. */
1977 	    vim_free(smp->sm_to);
1978 	    vim_free(smp->sm_lead_w);
1979 	    vim_free(smp->sm_oneof_w);
1980 	    vim_free(smp->sm_to_w);
1981 	}
1982     ga_clear(gap);
1983 
1984     for (i = 0; i < lp->sl_prefixcnt; ++i)
1985 	vim_regfree(lp->sl_prefprog[i]);
1986     lp->sl_prefixcnt = 0;
1987     VIM_CLEAR(lp->sl_prefprog);
1988 
1989     VIM_CLEAR(lp->sl_info);
1990 
1991     VIM_CLEAR(lp->sl_midword);
1992 
1993     vim_regfree(lp->sl_compprog);
1994     lp->sl_compprog = NULL;
1995     VIM_CLEAR(lp->sl_comprules);
1996     VIM_CLEAR(lp->sl_compstartflags);
1997     VIM_CLEAR(lp->sl_compallflags);
1998 
1999     VIM_CLEAR(lp->sl_syllable);
2000     ga_clear(&lp->sl_syl_items);
2001 
2002     ga_clear_strings(&lp->sl_comppat);
2003 
2004     hash_clear_all(&lp->sl_wordcount, WC_KEY_OFF);
2005     hash_init(&lp->sl_wordcount);
2006 
2007     hash_clear_all(&lp->sl_map_hash, 0);
2008 
2009     /* Clear info from .sug file. */
2010     slang_clear_sug(lp);
2011 
2012     lp->sl_compmax = MAXWLEN;
2013     lp->sl_compminlen = 0;
2014     lp->sl_compsylmax = MAXWLEN;
2015     lp->sl_regions[0] = NUL;
2016 }
2017 
2018 /*
2019  * Clear the info from the .sug file in "lp".
2020  */
2021     void
2022 slang_clear_sug(slang_T *lp)
2023 {
2024     VIM_CLEAR(lp->sl_sbyts);
2025     VIM_CLEAR(lp->sl_sidxs);
2026     close_spellbuf(lp->sl_sugbuf);
2027     lp->sl_sugbuf = NULL;
2028     lp->sl_sugloaded = FALSE;
2029     lp->sl_sugtime = 0;
2030 }
2031 
2032 /*
2033  * Load one spell file and store the info into a slang_T.
2034  * Invoked through do_in_runtimepath().
2035  */
2036     static void
2037 spell_load_cb(char_u *fname, void *cookie)
2038 {
2039     spelload_T	*slp = (spelload_T *)cookie;
2040     slang_T	*slang;
2041 
2042     slang = spell_load_file(fname, slp->sl_lang, NULL, FALSE);
2043     if (slang != NULL)
2044     {
2045 	/* When a previously loaded file has NOBREAK also use it for the
2046 	 * ".add" files. */
2047 	if (slp->sl_nobreak && slang->sl_add)
2048 	    slang->sl_nobreak = TRUE;
2049 	else if (slang->sl_nobreak)
2050 	    slp->sl_nobreak = TRUE;
2051 
2052 	slp->sl_slang = slang;
2053     }
2054 }
2055 
2056 
2057 /*
2058  * Add a word to the hashtable of common words.
2059  * If it's already there then the counter is increased.
2060  */
2061     void
2062 count_common_word(
2063     slang_T	*lp,
2064     char_u	*word,
2065     int		len,	    /* word length, -1 for upto NUL */
2066     int		count)	    /* 1 to count once, 10 to init */
2067 {
2068     hash_T	hash;
2069     hashitem_T	*hi;
2070     wordcount_T	*wc;
2071     char_u	buf[MAXWLEN];
2072     char_u	*p;
2073 
2074     if (len == -1)
2075 	p = word;
2076     else
2077     {
2078 	vim_strncpy(buf, word, len);
2079 	p = buf;
2080     }
2081 
2082     hash = hash_hash(p);
2083     hi = hash_lookup(&lp->sl_wordcount, p, hash);
2084     if (HASHITEM_EMPTY(hi))
2085     {
2086 	wc = (wordcount_T *)alloc((unsigned)(sizeof(wordcount_T) + STRLEN(p)));
2087 	if (wc == NULL)
2088 	    return;
2089 	STRCPY(wc->wc_word, p);
2090 	wc->wc_count = count;
2091 	hash_add_item(&lp->sl_wordcount, hi, wc->wc_word, hash);
2092     }
2093     else
2094     {
2095 	wc = HI2WC(hi);
2096 	if ((wc->wc_count += count) < (unsigned)count)	/* check for overflow */
2097 	    wc->wc_count = MAXWORDCOUNT;
2098     }
2099 }
2100 
2101 /*
2102  * Adjust the score of common words.
2103  */
2104     static int
2105 score_wordcount_adj(
2106     slang_T	*slang,
2107     int		score,
2108     char_u	*word,
2109     int		split)	    /* word was split, less bonus */
2110 {
2111     hashitem_T	*hi;
2112     wordcount_T	*wc;
2113     int		bonus;
2114     int		newscore;
2115 
2116     hi = hash_find(&slang->sl_wordcount, word);
2117     if (!HASHITEM_EMPTY(hi))
2118     {
2119 	wc = HI2WC(hi);
2120 	if (wc->wc_count < SCORE_THRES2)
2121 	    bonus = SCORE_COMMON1;
2122 	else if (wc->wc_count < SCORE_THRES3)
2123 	    bonus = SCORE_COMMON2;
2124 	else
2125 	    bonus = SCORE_COMMON3;
2126 	if (split)
2127 	    newscore = score - bonus / 2;
2128 	else
2129 	    newscore = score - bonus;
2130 	if (newscore < 0)
2131 	    return 0;
2132 	return newscore;
2133     }
2134     return score;
2135 }
2136 
2137 
2138 /*
2139  * Return TRUE if byte "n" appears in "str".
2140  * Like strchr() but independent of locale.
2141  */
2142     int
2143 byte_in_str(char_u *str, int n)
2144 {
2145     char_u	*p;
2146 
2147     for (p = str; *p != NUL; ++p)
2148 	if (*p == n)
2149 	    return TRUE;
2150     return FALSE;
2151 }
2152 
2153 #define SY_MAXLEN   30
2154 typedef struct syl_item_S
2155 {
2156     char_u	sy_chars[SY_MAXLEN];	    /* the sequence of chars */
2157     int		sy_len;
2158 } syl_item_T;
2159 
2160 /*
2161  * Truncate "slang->sl_syllable" at the first slash and put the following items
2162  * in "slang->sl_syl_items".
2163  */
2164     int
2165 init_syl_tab(slang_T *slang)
2166 {
2167     char_u	*p;
2168     char_u	*s;
2169     int		l;
2170     syl_item_T	*syl;
2171 
2172     ga_init2(&slang->sl_syl_items, sizeof(syl_item_T), 4);
2173     p = vim_strchr(slang->sl_syllable, '/');
2174     while (p != NULL)
2175     {
2176 	*p++ = NUL;
2177 	if (*p == NUL)	    /* trailing slash */
2178 	    break;
2179 	s = p;
2180 	p = vim_strchr(p, '/');
2181 	if (p == NULL)
2182 	    l = (int)STRLEN(s);
2183 	else
2184 	    l = (int)(p - s);
2185 	if (l >= SY_MAXLEN)
2186 	    return SP_FORMERROR;
2187 	if (ga_grow(&slang->sl_syl_items, 1) == FAIL)
2188 	    return SP_OTHERERROR;
2189 	syl = ((syl_item_T *)slang->sl_syl_items.ga_data)
2190 					       + slang->sl_syl_items.ga_len++;
2191 	vim_strncpy(syl->sy_chars, s, l);
2192 	syl->sy_len = l;
2193     }
2194     return OK;
2195 }
2196 
2197 /*
2198  * Count the number of syllables in "word".
2199  * When "word" contains spaces the syllables after the last space are counted.
2200  * Returns zero if syllables are not defines.
2201  */
2202     static int
2203 count_syllables(slang_T *slang, char_u *word)
2204 {
2205     int		cnt = 0;
2206     int		skip = FALSE;
2207     char_u	*p;
2208     int		len;
2209     int		i;
2210     syl_item_T	*syl;
2211     int		c;
2212 
2213     if (slang->sl_syllable == NULL)
2214 	return 0;
2215 
2216     for (p = word; *p != NUL; p += len)
2217     {
2218 	/* When running into a space reset counter. */
2219 	if (*p == ' ')
2220 	{
2221 	    len = 1;
2222 	    cnt = 0;
2223 	    continue;
2224 	}
2225 
2226 	/* Find longest match of syllable items. */
2227 	len = 0;
2228 	for (i = 0; i < slang->sl_syl_items.ga_len; ++i)
2229 	{
2230 	    syl = ((syl_item_T *)slang->sl_syl_items.ga_data) + i;
2231 	    if (syl->sy_len > len
2232 			       && STRNCMP(p, syl->sy_chars, syl->sy_len) == 0)
2233 		len = syl->sy_len;
2234 	}
2235 	if (len != 0)	/* found a match, count syllable  */
2236 	{
2237 	    ++cnt;
2238 	    skip = FALSE;
2239 	}
2240 	else
2241 	{
2242 	    /* No recognized syllable item, at least a syllable char then? */
2243 	    c = mb_ptr2char(p);
2244 	    len = (*mb_ptr2len)(p);
2245 	    if (vim_strchr(slang->sl_syllable, c) == NULL)
2246 		skip = FALSE;	    /* No, search for next syllable */
2247 	    else if (!skip)
2248 	    {
2249 		++cnt;		    /* Yes, count it */
2250 		skip = TRUE;	    /* don't count following syllable chars */
2251 	    }
2252 	}
2253     }
2254     return cnt;
2255 }
2256 
2257 /*
2258  * Parse 'spelllang' and set w_s->b_langp accordingly.
2259  * Returns NULL if it's OK, an error message otherwise.
2260  */
2261     char *
2262 did_set_spelllang(win_T *wp)
2263 {
2264     garray_T	ga;
2265     char_u	*splp;
2266     char_u	*region;
2267     char_u	region_cp[3];
2268     int		filename;
2269     int		region_mask;
2270     slang_T	*slang;
2271     int		c;
2272     char_u	lang[MAXWLEN + 1];
2273     char_u	spf_name[MAXPATHL];
2274     int		len;
2275     char_u	*p;
2276     int		round;
2277     char_u	*spf;
2278     char_u	*use_region = NULL;
2279     int		dont_use_region = FALSE;
2280     int		nobreak = FALSE;
2281     int		i, j;
2282     langp_T	*lp, *lp2;
2283     static int	recursive = FALSE;
2284     char	*ret_msg = NULL;
2285     char_u	*spl_copy;
2286     bufref_T	bufref;
2287 
2288     set_bufref(&bufref, wp->w_buffer);
2289 
2290     /* We don't want to do this recursively.  May happen when a language is
2291      * not available and the SpellFileMissing autocommand opens a new buffer
2292      * in which 'spell' is set. */
2293     if (recursive)
2294 	return NULL;
2295     recursive = TRUE;
2296 
2297     ga_init2(&ga, sizeof(langp_T), 2);
2298     clear_midword(wp);
2299 
2300     /* Make a copy of 'spelllang', the SpellFileMissing autocommands may change
2301      * it under our fingers. */
2302     spl_copy = vim_strsave(wp->w_s->b_p_spl);
2303     if (spl_copy == NULL)
2304 	goto theend;
2305 
2306     wp->w_s->b_cjk = 0;
2307 
2308     /* Loop over comma separated language names. */
2309     for (splp = spl_copy; *splp != NUL; )
2310     {
2311 	/* Get one language name. */
2312 	copy_option_part(&splp, lang, MAXWLEN, ",");
2313 	region = NULL;
2314 	len = (int)STRLEN(lang);
2315 
2316 	if (STRCMP(lang, "cjk") == 0)
2317 	{
2318 	    wp->w_s->b_cjk = 1;
2319 	    continue;
2320 	}
2321 
2322 	/* If the name ends in ".spl" use it as the name of the spell file.
2323 	 * If there is a region name let "region" point to it and remove it
2324 	 * from the name. */
2325 	if (len > 4 && fnamecmp(lang + len - 4, ".spl") == 0)
2326 	{
2327 	    filename = TRUE;
2328 
2329 	    /* Locate a region and remove it from the file name. */
2330 	    p = vim_strchr(gettail(lang), '_');
2331 	    if (p != NULL && ASCII_ISALPHA(p[1]) && ASCII_ISALPHA(p[2])
2332 						      && !ASCII_ISALPHA(p[3]))
2333 	    {
2334 		vim_strncpy(region_cp, p + 1, 2);
2335 		mch_memmove(p, p + 3, len - (p - lang) - 2);
2336 		region = region_cp;
2337 	    }
2338 	    else
2339 		dont_use_region = TRUE;
2340 
2341 	    /* Check if we loaded this language before. */
2342 	    for (slang = first_lang; slang != NULL; slang = slang->sl_next)
2343 		if (fullpathcmp(lang, slang->sl_fname, FALSE) == FPC_SAME)
2344 		    break;
2345 	}
2346 	else
2347 	{
2348 	    filename = FALSE;
2349 	    if (len > 3 && lang[len - 3] == '_')
2350 	    {
2351 		region = lang + len - 2;
2352 		len -= 3;
2353 		lang[len] = NUL;
2354 	    }
2355 	    else
2356 		dont_use_region = TRUE;
2357 
2358 	    /* Check if we loaded this language before. */
2359 	    for (slang = first_lang; slang != NULL; slang = slang->sl_next)
2360 		if (STRICMP(lang, slang->sl_name) == 0)
2361 		    break;
2362 	}
2363 
2364 	if (region != NULL)
2365 	{
2366 	    /* If the region differs from what was used before then don't
2367 	     * use it for 'spellfile'. */
2368 	    if (use_region != NULL && STRCMP(region, use_region) != 0)
2369 		dont_use_region = TRUE;
2370 	    use_region = region;
2371 	}
2372 
2373 	/* If not found try loading the language now. */
2374 	if (slang == NULL)
2375 	{
2376 	    if (filename)
2377 		(void)spell_load_file(lang, lang, NULL, FALSE);
2378 	    else
2379 	    {
2380 		spell_load_lang(lang);
2381 		/* SpellFileMissing autocommands may do anything, including
2382 		 * destroying the buffer we are using... */
2383 		if (!bufref_valid(&bufref))
2384 		{
2385 		    ret_msg = N_("E797: SpellFileMissing autocommand deleted buffer");
2386 		    goto theend;
2387 		}
2388 	    }
2389 	}
2390 
2391 	/*
2392 	 * Loop over the languages, there can be several files for "lang".
2393 	 */
2394 	for (slang = first_lang; slang != NULL; slang = slang->sl_next)
2395 	    if (filename ? fullpathcmp(lang, slang->sl_fname, FALSE) == FPC_SAME
2396 			 : STRICMP(lang, slang->sl_name) == 0)
2397 	    {
2398 		region_mask = REGION_ALL;
2399 		if (!filename && region != NULL)
2400 		{
2401 		    /* find region in sl_regions */
2402 		    c = find_region(slang->sl_regions, region);
2403 		    if (c == REGION_ALL)
2404 		    {
2405 			if (slang->sl_add)
2406 			{
2407 			    if (*slang->sl_regions != NUL)
2408 				/* This addition file is for other regions. */
2409 				region_mask = 0;
2410 			}
2411 			else
2412 			    /* This is probably an error.  Give a warning and
2413 			     * accept the words anyway. */
2414 			    smsg(_("Warning: region %s not supported"),
2415 								      region);
2416 		    }
2417 		    else
2418 			region_mask = 1 << c;
2419 		}
2420 
2421 		if (region_mask != 0)
2422 		{
2423 		    if (ga_grow(&ga, 1) == FAIL)
2424 		    {
2425 			ga_clear(&ga);
2426 			ret_msg = e_outofmem;
2427 			goto theend;
2428 		    }
2429 		    LANGP_ENTRY(ga, ga.ga_len)->lp_slang = slang;
2430 		    LANGP_ENTRY(ga, ga.ga_len)->lp_region = region_mask;
2431 		    ++ga.ga_len;
2432 		    use_midword(slang, wp);
2433 		    if (slang->sl_nobreak)
2434 			nobreak = TRUE;
2435 		}
2436 	    }
2437     }
2438 
2439     /* round 0: load int_wordlist, if possible.
2440      * round 1: load first name in 'spellfile'.
2441      * round 2: load second name in 'spellfile.
2442      * etc. */
2443     spf = curwin->w_s->b_p_spf;
2444     for (round = 0; round == 0 || *spf != NUL; ++round)
2445     {
2446 	if (round == 0)
2447 	{
2448 	    /* Internal wordlist, if there is one. */
2449 	    if (int_wordlist == NULL)
2450 		continue;
2451 	    int_wordlist_spl(spf_name);
2452 	}
2453 	else
2454 	{
2455 	    /* One entry in 'spellfile'. */
2456 	    copy_option_part(&spf, spf_name, MAXPATHL - 5, ",");
2457 	    STRCAT(spf_name, ".spl");
2458 
2459 	    /* If it was already found above then skip it. */
2460 	    for (c = 0; c < ga.ga_len; ++c)
2461 	    {
2462 		p = LANGP_ENTRY(ga, c)->lp_slang->sl_fname;
2463 		if (p != NULL && fullpathcmp(spf_name, p, FALSE) == FPC_SAME)
2464 		    break;
2465 	    }
2466 	    if (c < ga.ga_len)
2467 		continue;
2468 	}
2469 
2470 	/* Check if it was loaded already. */
2471 	for (slang = first_lang; slang != NULL; slang = slang->sl_next)
2472 	    if (fullpathcmp(spf_name, slang->sl_fname, FALSE) == FPC_SAME)
2473 		break;
2474 	if (slang == NULL)
2475 	{
2476 	    /* Not loaded, try loading it now.  The language name includes the
2477 	     * region name, the region is ignored otherwise.  for int_wordlist
2478 	     * use an arbitrary name. */
2479 	    if (round == 0)
2480 		STRCPY(lang, "internal wordlist");
2481 	    else
2482 	    {
2483 		vim_strncpy(lang, gettail(spf_name), MAXWLEN);
2484 		p = vim_strchr(lang, '.');
2485 		if (p != NULL)
2486 		    *p = NUL;	/* truncate at ".encoding.add" */
2487 	    }
2488 	    slang = spell_load_file(spf_name, lang, NULL, TRUE);
2489 
2490 	    /* If one of the languages has NOBREAK we assume the addition
2491 	     * files also have this. */
2492 	    if (slang != NULL && nobreak)
2493 		slang->sl_nobreak = TRUE;
2494 	}
2495 	if (slang != NULL && ga_grow(&ga, 1) == OK)
2496 	{
2497 	    region_mask = REGION_ALL;
2498 	    if (use_region != NULL && !dont_use_region)
2499 	    {
2500 		/* find region in sl_regions */
2501 		c = find_region(slang->sl_regions, use_region);
2502 		if (c != REGION_ALL)
2503 		    region_mask = 1 << c;
2504 		else if (*slang->sl_regions != NUL)
2505 		    /* This spell file is for other regions. */
2506 		    region_mask = 0;
2507 	    }
2508 
2509 	    if (region_mask != 0)
2510 	    {
2511 		LANGP_ENTRY(ga, ga.ga_len)->lp_slang = slang;
2512 		LANGP_ENTRY(ga, ga.ga_len)->lp_sallang = NULL;
2513 		LANGP_ENTRY(ga, ga.ga_len)->lp_replang = NULL;
2514 		LANGP_ENTRY(ga, ga.ga_len)->lp_region = region_mask;
2515 		++ga.ga_len;
2516 		use_midword(slang, wp);
2517 	    }
2518 	}
2519     }
2520 
2521     /* Everything is fine, store the new b_langp value. */
2522     ga_clear(&wp->w_s->b_langp);
2523     wp->w_s->b_langp = ga;
2524 
2525     /* For each language figure out what language to use for sound folding and
2526      * REP items.  If the language doesn't support it itself use another one
2527      * with the same name.  E.g. for "en-math" use "en". */
2528     for (i = 0; i < ga.ga_len; ++i)
2529     {
2530 	lp = LANGP_ENTRY(ga, i);
2531 
2532 	/* sound folding */
2533 	if (lp->lp_slang->sl_sal.ga_len > 0)
2534 	    /* language does sound folding itself */
2535 	    lp->lp_sallang = lp->lp_slang;
2536 	else
2537 	    /* find first similar language that does sound folding */
2538 	    for (j = 0; j < ga.ga_len; ++j)
2539 	    {
2540 		lp2 = LANGP_ENTRY(ga, j);
2541 		if (lp2->lp_slang->sl_sal.ga_len > 0
2542 			&& STRNCMP(lp->lp_slang->sl_name,
2543 					      lp2->lp_slang->sl_name, 2) == 0)
2544 		{
2545 		    lp->lp_sallang = lp2->lp_slang;
2546 		    break;
2547 		}
2548 	    }
2549 
2550 	/* REP items */
2551 	if (lp->lp_slang->sl_rep.ga_len > 0)
2552 	    /* language has REP items itself */
2553 	    lp->lp_replang = lp->lp_slang;
2554 	else
2555 	    /* find first similar language that has REP items */
2556 	    for (j = 0; j < ga.ga_len; ++j)
2557 	    {
2558 		lp2 = LANGP_ENTRY(ga, j);
2559 		if (lp2->lp_slang->sl_rep.ga_len > 0
2560 			&& STRNCMP(lp->lp_slang->sl_name,
2561 					      lp2->lp_slang->sl_name, 2) == 0)
2562 		{
2563 		    lp->lp_replang = lp2->lp_slang;
2564 		    break;
2565 		}
2566 	    }
2567     }
2568 
2569 theend:
2570     vim_free(spl_copy);
2571     recursive = FALSE;
2572     redraw_win_later(wp, NOT_VALID);
2573     return ret_msg;
2574 }
2575 
2576 /*
2577  * Clear the midword characters for buffer "buf".
2578  */
2579     static void
2580 clear_midword(win_T *wp)
2581 {
2582     vim_memset(wp->w_s->b_spell_ismw, 0, 256);
2583     VIM_CLEAR(wp->w_s->b_spell_ismw_mb);
2584 }
2585 
2586 /*
2587  * Use the "sl_midword" field of language "lp" for buffer "buf".
2588  * They add up to any currently used midword characters.
2589  */
2590     static void
2591 use_midword(slang_T *lp, win_T *wp)
2592 {
2593     char_u	*p;
2594 
2595     if (lp->sl_midword == NULL)	    /* there aren't any */
2596 	return;
2597 
2598     for (p = lp->sl_midword; *p != NUL; )
2599 	if (has_mbyte)
2600 	{
2601 	    int	    c, l, n;
2602 	    char_u  *bp;
2603 
2604 	    c = mb_ptr2char(p);
2605 	    l = (*mb_ptr2len)(p);
2606 	    if (c < 256 && l <= 2)
2607 		wp->w_s->b_spell_ismw[c] = TRUE;
2608 	    else if (wp->w_s->b_spell_ismw_mb == NULL)
2609 		/* First multi-byte char in "b_spell_ismw_mb". */
2610 		wp->w_s->b_spell_ismw_mb = vim_strnsave(p, l);
2611 	    else
2612 	    {
2613 		/* Append multi-byte chars to "b_spell_ismw_mb". */
2614 		n = (int)STRLEN(wp->w_s->b_spell_ismw_mb);
2615 		bp = vim_strnsave(wp->w_s->b_spell_ismw_mb, n + l);
2616 		if (bp != NULL)
2617 		{
2618 		    vim_free(wp->w_s->b_spell_ismw_mb);
2619 		    wp->w_s->b_spell_ismw_mb = bp;
2620 		    vim_strncpy(bp + n, p, l);
2621 		}
2622 	    }
2623 	    p += l;
2624 	}
2625 	else
2626 	    wp->w_s->b_spell_ismw[*p++] = TRUE;
2627 }
2628 
2629 /*
2630  * Find the region "region[2]" in "rp" (points to "sl_regions").
2631  * Each region is simply stored as the two characters of its name.
2632  * Returns the index if found (first is 0), REGION_ALL if not found.
2633  */
2634     static int
2635 find_region(char_u *rp, char_u *region)
2636 {
2637     int		i;
2638 
2639     for (i = 0; ; i += 2)
2640     {
2641 	if (rp[i] == NUL)
2642 	    return REGION_ALL;
2643 	if (rp[i] == region[0] && rp[i + 1] == region[1])
2644 	    break;
2645     }
2646     return i / 2;
2647 }
2648 
2649 /*
2650  * Return case type of word:
2651  * w word	0
2652  * Word		WF_ONECAP
2653  * W WORD	WF_ALLCAP
2654  * WoRd	wOrd	WF_KEEPCAP
2655  */
2656     int
2657 captype(
2658     char_u	*word,
2659     char_u	*end)	    /* When NULL use up to NUL byte. */
2660 {
2661     char_u	*p;
2662     int		c;
2663     int		firstcap;
2664     int		allcap;
2665     int		past_second = FALSE;	/* past second word char */
2666 
2667     /* find first letter */
2668     for (p = word; !spell_iswordp_nmw(p, curwin); MB_PTR_ADV(p))
2669 	if (end == NULL ? *p == NUL : p >= end)
2670 	    return 0;	    /* only non-word characters, illegal word */
2671     if (has_mbyte)
2672 	c = mb_ptr2char_adv(&p);
2673     else
2674 	c = *p++;
2675     firstcap = allcap = SPELL_ISUPPER(c);
2676 
2677     /*
2678      * Need to check all letters to find a word with mixed upper/lower.
2679      * But a word with an upper char only at start is a ONECAP.
2680      */
2681     for ( ; end == NULL ? *p != NUL : p < end; MB_PTR_ADV(p))
2682 	if (spell_iswordp_nmw(p, curwin))
2683 	{
2684 	    c = PTR2CHAR(p);
2685 	    if (!SPELL_ISUPPER(c))
2686 	    {
2687 		/* UUl -> KEEPCAP */
2688 		if (past_second && allcap)
2689 		    return WF_KEEPCAP;
2690 		allcap = FALSE;
2691 	    }
2692 	    else if (!allcap)
2693 		/* UlU -> KEEPCAP */
2694 		return WF_KEEPCAP;
2695 	    past_second = TRUE;
2696 	}
2697 
2698     if (allcap)
2699 	return WF_ALLCAP;
2700     if (firstcap)
2701 	return WF_ONECAP;
2702     return 0;
2703 }
2704 
2705 /*
2706  * Like captype() but for a KEEPCAP word add ONECAP if the word starts with a
2707  * capital.  So that make_case_word() can turn WOrd into Word.
2708  * Add ALLCAP for "WOrD".
2709  */
2710     static int
2711 badword_captype(char_u *word, char_u *end)
2712 {
2713     int		flags = captype(word, end);
2714     int		c;
2715     int		l, u;
2716     int		first;
2717     char_u	*p;
2718 
2719     if (flags & WF_KEEPCAP)
2720     {
2721 	/* Count the number of UPPER and lower case letters. */
2722 	l = u = 0;
2723 	first = FALSE;
2724 	for (p = word; p < end; MB_PTR_ADV(p))
2725 	{
2726 	    c = PTR2CHAR(p);
2727 	    if (SPELL_ISUPPER(c))
2728 	    {
2729 		++u;
2730 		if (p == word)
2731 		    first = TRUE;
2732 	    }
2733 	    else
2734 		++l;
2735 	}
2736 
2737 	/* If there are more UPPER than lower case letters suggest an
2738 	 * ALLCAP word.  Otherwise, if the first letter is UPPER then
2739 	 * suggest ONECAP.  Exception: "ALl" most likely should be "All",
2740 	 * require three upper case letters. */
2741 	if (u > l && u > 2)
2742 	    flags |= WF_ALLCAP;
2743 	else if (first)
2744 	    flags |= WF_ONECAP;
2745 
2746 	if (u >= 2 && l >= 2)	/* maCARONI maCAroni */
2747 	    flags |= WF_MIXCAP;
2748     }
2749     return flags;
2750 }
2751 
2752 /*
2753  * Delete the internal wordlist and its .spl file.
2754  */
2755     void
2756 spell_delete_wordlist(void)
2757 {
2758     char_u	fname[MAXPATHL];
2759 
2760     if (int_wordlist != NULL)
2761     {
2762 	mch_remove(int_wordlist);
2763 	int_wordlist_spl(fname);
2764 	mch_remove(fname);
2765 	VIM_CLEAR(int_wordlist);
2766     }
2767 }
2768 
2769 /*
2770  * Free all languages.
2771  */
2772     void
2773 spell_free_all(void)
2774 {
2775     slang_T	*slang;
2776     buf_T	*buf;
2777 
2778     /* Go through all buffers and handle 'spelllang'. <VN> */
2779     FOR_ALL_BUFFERS(buf)
2780 	ga_clear(&buf->b_s.b_langp);
2781 
2782     while (first_lang != NULL)
2783     {
2784 	slang = first_lang;
2785 	first_lang = slang->sl_next;
2786 	slang_free(slang);
2787     }
2788 
2789     spell_delete_wordlist();
2790 
2791     VIM_CLEAR(repl_to);
2792     VIM_CLEAR(repl_from);
2793 }
2794 
2795 /*
2796  * Clear all spelling tables and reload them.
2797  * Used after 'encoding' is set and when ":mkspell" was used.
2798  */
2799     void
2800 spell_reload(void)
2801 {
2802     win_T	*wp;
2803 
2804     /* Initialize the table for spell_iswordp(). */
2805     init_spell_chartab();
2806 
2807     /* Unload all allocated memory. */
2808     spell_free_all();
2809 
2810     /* Go through all buffers and handle 'spelllang'. */
2811     FOR_ALL_WINDOWS(wp)
2812     {
2813 	/* Only load the wordlists when 'spelllang' is set and there is a
2814 	 * window for this buffer in which 'spell' is set. */
2815 	if (*wp->w_s->b_p_spl != NUL)
2816 	{
2817 		if (wp->w_p_spell)
2818 		{
2819 		    (void)did_set_spelllang(wp);
2820 		    break;
2821 		}
2822 	}
2823     }
2824 }
2825 
2826 /*
2827  * Opposite of offset2bytes().
2828  * "pp" points to the bytes and is advanced over it.
2829  * Returns the offset.
2830  */
2831     static int
2832 bytes2offset(char_u **pp)
2833 {
2834     char_u	*p = *pp;
2835     int		nr;
2836     int		c;
2837 
2838     c = *p++;
2839     if ((c & 0x80) == 0x00)		/* 1 byte */
2840     {
2841 	nr = c - 1;
2842     }
2843     else if ((c & 0xc0) == 0x80)	/* 2 bytes */
2844     {
2845 	nr = (c & 0x3f) - 1;
2846 	nr = nr * 255 + (*p++ - 1);
2847     }
2848     else if ((c & 0xe0) == 0xc0)	/* 3 bytes */
2849     {
2850 	nr = (c & 0x1f) - 1;
2851 	nr = nr * 255 + (*p++ - 1);
2852 	nr = nr * 255 + (*p++ - 1);
2853     }
2854     else				/* 4 bytes */
2855     {
2856 	nr = (c & 0x0f) - 1;
2857 	nr = nr * 255 + (*p++ - 1);
2858 	nr = nr * 255 + (*p++ - 1);
2859 	nr = nr * 255 + (*p++ - 1);
2860     }
2861 
2862     *pp = p;
2863     return nr;
2864 }
2865 
2866 
2867 /*
2868  * Open a spell buffer.  This is a nameless buffer that is not in the buffer
2869  * list and only contains text lines.  Can use a swapfile to reduce memory
2870  * use.
2871  * Most other fields are invalid!  Esp. watch out for string options being
2872  * NULL and there is no undo info.
2873  * Returns NULL when out of memory.
2874  */
2875     buf_T *
2876 open_spellbuf(void)
2877 {
2878     buf_T	*buf;
2879 
2880     buf = (buf_T *)alloc_clear(sizeof(buf_T));
2881     if (buf != NULL)
2882     {
2883 	buf->b_spell = TRUE;
2884 	buf->b_p_swf = TRUE;	/* may create a swap file */
2885 #ifdef FEAT_CRYPT
2886 	buf->b_p_key = empty_option;
2887 #endif
2888 	ml_open(buf);
2889 	ml_open_file(buf);	/* create swap file now */
2890     }
2891     return buf;
2892 }
2893 
2894 /*
2895  * Close the buffer used for spell info.
2896  */
2897     void
2898 close_spellbuf(buf_T *buf)
2899 {
2900     if (buf != NULL)
2901     {
2902 	ml_close(buf, TRUE);
2903 	vim_free(buf);
2904     }
2905 }
2906 
2907 /*
2908  * Init the chartab used for spelling for ASCII.
2909  * EBCDIC is not supported!
2910  */
2911     void
2912 clear_spell_chartab(spelltab_T *sp)
2913 {
2914     int		i;
2915 
2916     /* Init everything to FALSE. */
2917     vim_memset(sp->st_isw, FALSE, sizeof(sp->st_isw));
2918     vim_memset(sp->st_isu, FALSE, sizeof(sp->st_isu));
2919     for (i = 0; i < 256; ++i)
2920     {
2921 	sp->st_fold[i] = i;
2922 	sp->st_upper[i] = i;
2923     }
2924 
2925     /* We include digits.  A word shouldn't start with a digit, but handling
2926      * that is done separately. */
2927     for (i = '0'; i <= '9'; ++i)
2928 	sp->st_isw[i] = TRUE;
2929     for (i = 'A'; i <= 'Z'; ++i)
2930     {
2931 	sp->st_isw[i] = TRUE;
2932 	sp->st_isu[i] = TRUE;
2933 	sp->st_fold[i] = i + 0x20;
2934     }
2935     for (i = 'a'; i <= 'z'; ++i)
2936     {
2937 	sp->st_isw[i] = TRUE;
2938 	sp->st_upper[i] = i - 0x20;
2939     }
2940 }
2941 
2942 /*
2943  * Init the chartab used for spelling.  Only depends on 'encoding'.
2944  * Called once while starting up and when 'encoding' changes.
2945  * The default is to use isalpha(), but the spell file should define the word
2946  * characters to make it possible that 'encoding' differs from the current
2947  * locale.  For utf-8 we don't use isalpha() but our own functions.
2948  */
2949     void
2950 init_spell_chartab(void)
2951 {
2952     int	    i;
2953 
2954     did_set_spelltab = FALSE;
2955     clear_spell_chartab(&spelltab);
2956     if (enc_dbcs)
2957     {
2958 	/* DBCS: assume double-wide characters are word characters. */
2959 	for (i = 128; i <= 255; ++i)
2960 	    if (MB_BYTE2LEN(i) == 2)
2961 		spelltab.st_isw[i] = TRUE;
2962     }
2963     else if (enc_utf8)
2964     {
2965 	for (i = 128; i < 256; ++i)
2966 	{
2967 	    int f = utf_fold(i);
2968 	    int u = utf_toupper(i);
2969 
2970 	    spelltab.st_isu[i] = utf_isupper(i);
2971 	    spelltab.st_isw[i] = spelltab.st_isu[i] || utf_islower(i);
2972 	    /* The folded/upper-cased value is different between latin1 and
2973 	     * utf8 for 0xb5, causing E763 for no good reason.  Use the latin1
2974 	     * value for utf-8 to avoid this. */
2975 	    spelltab.st_fold[i] = (f < 256) ? f : i;
2976 	    spelltab.st_upper[i] = (u < 256) ? u : i;
2977 	}
2978     }
2979     else
2980     {
2981 	/* Rough guess: use locale-dependent library functions. */
2982 	for (i = 128; i < 256; ++i)
2983 	{
2984 	    if (MB_ISUPPER(i))
2985 	    {
2986 		spelltab.st_isw[i] = TRUE;
2987 		spelltab.st_isu[i] = TRUE;
2988 		spelltab.st_fold[i] = MB_TOLOWER(i);
2989 	    }
2990 	    else if (MB_ISLOWER(i))
2991 	    {
2992 		spelltab.st_isw[i] = TRUE;
2993 		spelltab.st_upper[i] = MB_TOUPPER(i);
2994 	    }
2995 	}
2996     }
2997 }
2998 
2999 
3000 /*
3001  * Return TRUE if "p" points to a word character.
3002  * As a special case we see "midword" characters as word character when it is
3003  * followed by a word character.  This finds they'there but not 'they there'.
3004  * Thus this only works properly when past the first character of the word.
3005  */
3006     static int
3007 spell_iswordp(
3008     char_u	*p,
3009     win_T	*wp)	    /* buffer used */
3010 {
3011     char_u	*s;
3012     int		l;
3013     int		c;
3014 
3015     if (has_mbyte)
3016     {
3017 	l = MB_PTR2LEN(p);
3018 	s = p;
3019 	if (l == 1)
3020 	{
3021 	    /* be quick for ASCII */
3022 	    if (wp->w_s->b_spell_ismw[*p])
3023 		s = p + 1;		/* skip a mid-word character */
3024 	}
3025 	else
3026 	{
3027 	    c = mb_ptr2char(p);
3028 	    if (c < 256 ? wp->w_s->b_spell_ismw[c]
3029 		    : (wp->w_s->b_spell_ismw_mb != NULL
3030 			   && vim_strchr(wp->w_s->b_spell_ismw_mb, c) != NULL))
3031 		s = p + l;
3032 	}
3033 
3034 	c = mb_ptr2char(s);
3035 	if (c > 255)
3036 	    return spell_mb_isword_class(mb_get_class(s), wp);
3037 	return spelltab.st_isw[c];
3038     }
3039 
3040     return spelltab.st_isw[wp->w_s->b_spell_ismw[*p] ? p[1] : p[0]];
3041 }
3042 
3043 /*
3044  * Return TRUE if "p" points to a word character.
3045  * Unlike spell_iswordp() this doesn't check for "midword" characters.
3046  */
3047     int
3048 spell_iswordp_nmw(char_u *p, win_T *wp)
3049 {
3050     int		c;
3051 
3052     if (has_mbyte)
3053     {
3054 	c = mb_ptr2char(p);
3055 	if (c > 255)
3056 	    return spell_mb_isword_class(mb_get_class(p), wp);
3057 	return spelltab.st_isw[c];
3058     }
3059     return spelltab.st_isw[*p];
3060 }
3061 
3062 /*
3063  * Return TRUE if word class indicates a word character.
3064  * Only for characters above 255.
3065  * Unicode subscript and superscript are not considered word characters.
3066  * See also dbcs_class() and utf_class() in mbyte.c.
3067  */
3068     static int
3069 spell_mb_isword_class(int cl, win_T *wp)
3070 {
3071     if (wp->w_s->b_cjk)
3072 	/* East Asian characters are not considered word characters. */
3073 	return cl == 2 || cl == 0x2800;
3074     return cl >= 2 && cl != 0x2070 && cl != 0x2080;
3075 }
3076 
3077 /*
3078  * Return TRUE if "p" points to a word character.
3079  * Wide version of spell_iswordp().
3080  */
3081     static int
3082 spell_iswordp_w(int *p, win_T *wp)
3083 {
3084     int		*s;
3085 
3086     if (*p < 256 ? wp->w_s->b_spell_ismw[*p]
3087 		 : (wp->w_s->b_spell_ismw_mb != NULL
3088 			     && vim_strchr(wp->w_s->b_spell_ismw_mb, *p) != NULL))
3089 	s = p + 1;
3090     else
3091 	s = p;
3092 
3093     if (*s > 255)
3094     {
3095 	if (enc_utf8)
3096 	    return spell_mb_isword_class(utf_class(*s), wp);
3097 	if (enc_dbcs)
3098 	    return spell_mb_isword_class(
3099 				dbcs_class((unsigned)*s >> 8, *s & 0xff), wp);
3100 	return 0;
3101     }
3102     return spelltab.st_isw[*s];
3103 }
3104 
3105 /*
3106  * Case-fold "str[len]" into "buf[buflen]".  The result is NUL terminated.
3107  * Uses the character definitions from the .spl file.
3108  * When using a multi-byte 'encoding' the length may change!
3109  * Returns FAIL when something wrong.
3110  */
3111     int
3112 spell_casefold(
3113     char_u	*str,
3114     int		len,
3115     char_u	*buf,
3116     int		buflen)
3117 {
3118     int		i;
3119 
3120     if (len >= buflen)
3121     {
3122 	buf[0] = NUL;
3123 	return FAIL;		/* result will not fit */
3124     }
3125 
3126     if (has_mbyte)
3127     {
3128 	int	outi = 0;
3129 	char_u	*p;
3130 	int	c;
3131 
3132 	/* Fold one character at a time. */
3133 	for (p = str; p < str + len; )
3134 	{
3135 	    if (outi + MB_MAXBYTES > buflen)
3136 	    {
3137 		buf[outi] = NUL;
3138 		return FAIL;
3139 	    }
3140 	    c = mb_cptr2char_adv(&p);
3141 	    outi += mb_char2bytes(SPELL_TOFOLD(c), buf + outi);
3142 	}
3143 	buf[outi] = NUL;
3144     }
3145     else
3146     {
3147 	/* Be quick for non-multibyte encodings. */
3148 	for (i = 0; i < len; ++i)
3149 	    buf[i] = spelltab.st_fold[str[i]];
3150 	buf[i] = NUL;
3151     }
3152 
3153     return OK;
3154 }
3155 
3156 /* values for sps_flags */
3157 #define SPS_BEST    1
3158 #define SPS_FAST    2
3159 #define SPS_DOUBLE  4
3160 
3161 static int sps_flags = SPS_BEST;	/* flags from 'spellsuggest' */
3162 static int sps_limit = 9999;		/* max nr of suggestions given */
3163 
3164 /*
3165  * Check the 'spellsuggest' option.  Return FAIL if it's wrong.
3166  * Sets "sps_flags" and "sps_limit".
3167  */
3168     int
3169 spell_check_sps(void)
3170 {
3171     char_u	*p;
3172     char_u	*s;
3173     char_u	buf[MAXPATHL];
3174     int		f;
3175 
3176     sps_flags = 0;
3177     sps_limit = 9999;
3178 
3179     for (p = p_sps; *p != NUL; )
3180     {
3181 	copy_option_part(&p, buf, MAXPATHL, ",");
3182 
3183 	f = 0;
3184 	if (VIM_ISDIGIT(*buf))
3185 	{
3186 	    s = buf;
3187 	    sps_limit = getdigits(&s);
3188 	    if (*s != NUL && !VIM_ISDIGIT(*s))
3189 		f = -1;
3190 	}
3191 	else if (STRCMP(buf, "best") == 0)
3192 	    f = SPS_BEST;
3193 	else if (STRCMP(buf, "fast") == 0)
3194 	    f = SPS_FAST;
3195 	else if (STRCMP(buf, "double") == 0)
3196 	    f = SPS_DOUBLE;
3197 	else if (STRNCMP(buf, "expr:", 5) != 0
3198 		&& STRNCMP(buf, "file:", 5) != 0)
3199 	    f = -1;
3200 
3201 	if (f == -1 || (sps_flags != 0 && f != 0))
3202 	{
3203 	    sps_flags = SPS_BEST;
3204 	    sps_limit = 9999;
3205 	    return FAIL;
3206 	}
3207 	if (f != 0)
3208 	    sps_flags = f;
3209     }
3210 
3211     if (sps_flags == 0)
3212 	sps_flags = SPS_BEST;
3213 
3214     return OK;
3215 }
3216 
3217 /*
3218  * "z=": Find badly spelled word under or after the cursor.
3219  * Give suggestions for the properly spelled word.
3220  * In Visual mode use the highlighted word as the bad word.
3221  * When "count" is non-zero use that suggestion.
3222  */
3223     void
3224 spell_suggest(int count)
3225 {
3226     char_u	*line;
3227     pos_T	prev_cursor = curwin->w_cursor;
3228     char_u	wcopy[MAXWLEN + 2];
3229     char_u	*p;
3230     int		i;
3231     int		c;
3232     suginfo_T	sug;
3233     suggest_T	*stp;
3234     int		mouse_used;
3235     int		need_cap;
3236     int		limit;
3237     int		selected = count;
3238     int		badlen = 0;
3239     int		msg_scroll_save = msg_scroll;
3240 
3241     if (no_spell_checking(curwin))
3242 	return;
3243 
3244     if (VIsual_active)
3245     {
3246 	/* Use the Visually selected text as the bad word.  But reject
3247 	 * a multi-line selection. */
3248 	if (curwin->w_cursor.lnum != VIsual.lnum)
3249 	{
3250 	    vim_beep(BO_SPELL);
3251 	    return;
3252 	}
3253 	badlen = (int)curwin->w_cursor.col - (int)VIsual.col;
3254 	if (badlen < 0)
3255 	    badlen = -badlen;
3256 	else
3257 	    curwin->w_cursor.col = VIsual.col;
3258 	++badlen;
3259 	end_visual_mode();
3260     }
3261     /* Find the start of the badly spelled word. */
3262     else if (spell_move_to(curwin, FORWARD, TRUE, TRUE, NULL) == 0
3263 	    || curwin->w_cursor.col > prev_cursor.col)
3264     {
3265 	/* No bad word or it starts after the cursor: use the word under the
3266 	 * cursor. */
3267 	curwin->w_cursor = prev_cursor;
3268 	line = ml_get_curline();
3269 	p = line + curwin->w_cursor.col;
3270 	/* Backup to before start of word. */
3271 	while (p > line && spell_iswordp_nmw(p, curwin))
3272 	    MB_PTR_BACK(line, p);
3273 	/* Forward to start of word. */
3274 	while (*p != NUL && !spell_iswordp_nmw(p, curwin))
3275 	    MB_PTR_ADV(p);
3276 
3277 	if (!spell_iswordp_nmw(p, curwin))		/* No word found. */
3278 	{
3279 	    beep_flush();
3280 	    return;
3281 	}
3282 	curwin->w_cursor.col = (colnr_T)(p - line);
3283     }
3284 
3285     /* Get the word and its length. */
3286 
3287     /* Figure out if the word should be capitalised. */
3288     need_cap = check_need_cap(curwin->w_cursor.lnum, curwin->w_cursor.col);
3289 
3290     /* Make a copy of current line since autocommands may free the line. */
3291     line = vim_strsave(ml_get_curline());
3292     if (line == NULL)
3293 	goto skip;
3294 
3295     /* Get the list of suggestions.  Limit to 'lines' - 2 or the number in
3296      * 'spellsuggest', whatever is smaller. */
3297     if (sps_limit > (int)Rows - 2)
3298 	limit = (int)Rows - 2;
3299     else
3300 	limit = sps_limit;
3301     spell_find_suggest(line + curwin->w_cursor.col, badlen, &sug, limit,
3302 							TRUE, need_cap, TRUE);
3303 
3304     if (sug.su_ga.ga_len == 0)
3305 	msg(_("Sorry, no suggestions"));
3306     else if (count > 0)
3307     {
3308 	if (count > sug.su_ga.ga_len)
3309 	    smsg(_("Sorry, only %ld suggestions"),
3310 						      (long)sug.su_ga.ga_len);
3311     }
3312     else
3313     {
3314 	VIM_CLEAR(repl_from);
3315 	VIM_CLEAR(repl_to);
3316 
3317 #ifdef FEAT_RIGHTLEFT
3318 	/* When 'rightleft' is set the list is drawn right-left. */
3319 	cmdmsg_rl = curwin->w_p_rl;
3320 	if (cmdmsg_rl)
3321 	    msg_col = Columns - 1;
3322 #endif
3323 
3324 	/* List the suggestions. */
3325 	msg_start();
3326 	msg_row = Rows - 1;	/* for when 'cmdheight' > 1 */
3327 	lines_left = Rows;	/* avoid more prompt */
3328 	vim_snprintf((char *)IObuff, IOSIZE, _("Change \"%.*s\" to:"),
3329 						sug.su_badlen, sug.su_badptr);
3330 #ifdef FEAT_RIGHTLEFT
3331 	if (cmdmsg_rl && STRNCMP(IObuff, "Change", 6) == 0)
3332 	{
3333 	    /* And now the rabbit from the high hat: Avoid showing the
3334 	     * untranslated message rightleft. */
3335 	    vim_snprintf((char *)IObuff, IOSIZE, ":ot \"%.*s\" egnahC",
3336 						sug.su_badlen, sug.su_badptr);
3337 	}
3338 #endif
3339 	msg_puts((char *)IObuff);
3340 	msg_clr_eos();
3341 	msg_putchar('\n');
3342 
3343 	msg_scroll = TRUE;
3344 	for (i = 0; i < sug.su_ga.ga_len; ++i)
3345 	{
3346 	    stp = &SUG(sug.su_ga, i);
3347 
3348 	    /* The suggested word may replace only part of the bad word, add
3349 	     * the not replaced part. */
3350 	    vim_strncpy(wcopy, stp->st_word, MAXWLEN);
3351 	    if (sug.su_badlen > stp->st_orglen)
3352 		vim_strncpy(wcopy + stp->st_wordlen,
3353 					       sug.su_badptr + stp->st_orglen,
3354 					      sug.su_badlen - stp->st_orglen);
3355 	    vim_snprintf((char *)IObuff, IOSIZE, "%2d", i + 1);
3356 #ifdef FEAT_RIGHTLEFT
3357 	    if (cmdmsg_rl)
3358 		rl_mirror(IObuff);
3359 #endif
3360 	    msg_puts((char *)IObuff);
3361 
3362 	    vim_snprintf((char *)IObuff, IOSIZE, " \"%s\"", wcopy);
3363 	    msg_puts((char *)IObuff);
3364 
3365 	    /* The word may replace more than "su_badlen". */
3366 	    if (sug.su_badlen < stp->st_orglen)
3367 	    {
3368 		vim_snprintf((char *)IObuff, IOSIZE, _(" < \"%.*s\""),
3369 					       stp->st_orglen, sug.su_badptr);
3370 		msg_puts((char *)IObuff);
3371 	    }
3372 
3373 	    if (p_verbose > 0)
3374 	    {
3375 		/* Add the score. */
3376 		if (sps_flags & (SPS_DOUBLE | SPS_BEST))
3377 		    vim_snprintf((char *)IObuff, IOSIZE, " (%s%d - %d)",
3378 			stp->st_salscore ? "s " : "",
3379 			stp->st_score, stp->st_altscore);
3380 		else
3381 		    vim_snprintf((char *)IObuff, IOSIZE, " (%d)",
3382 			    stp->st_score);
3383 #ifdef FEAT_RIGHTLEFT
3384 		if (cmdmsg_rl)
3385 		    /* Mirror the numbers, but keep the leading space. */
3386 		    rl_mirror(IObuff + 1);
3387 #endif
3388 		msg_advance(30);
3389 		msg_puts((char *)IObuff);
3390 	    }
3391 	    msg_putchar('\n');
3392 	}
3393 
3394 #ifdef FEAT_RIGHTLEFT
3395 	cmdmsg_rl = FALSE;
3396 	msg_col = 0;
3397 #endif
3398 	/* Ask for choice. */
3399 	selected = prompt_for_number(&mouse_used);
3400 	if (mouse_used)
3401 	    selected -= lines_left;
3402 	lines_left = Rows;		/* avoid more prompt */
3403 	/* don't delay for 'smd' in normal_cmd() */
3404 	msg_scroll = msg_scroll_save;
3405     }
3406 
3407     if (selected > 0 && selected <= sug.su_ga.ga_len && u_save_cursor() == OK)
3408     {
3409 	/* Save the from and to text for :spellrepall. */
3410 	stp = &SUG(sug.su_ga, selected - 1);
3411 	if (sug.su_badlen > stp->st_orglen)
3412 	{
3413 	    /* Replacing less than "su_badlen", append the remainder to
3414 	     * repl_to. */
3415 	    repl_from = vim_strnsave(sug.su_badptr, sug.su_badlen);
3416 	    vim_snprintf((char *)IObuff, IOSIZE, "%s%.*s", stp->st_word,
3417 		    sug.su_badlen - stp->st_orglen,
3418 					      sug.su_badptr + stp->st_orglen);
3419 	    repl_to = vim_strsave(IObuff);
3420 	}
3421 	else
3422 	{
3423 	    /* Replacing su_badlen or more, use the whole word. */
3424 	    repl_from = vim_strnsave(sug.su_badptr, stp->st_orglen);
3425 	    repl_to = vim_strsave(stp->st_word);
3426 	}
3427 
3428 	/* Replace the word. */
3429 	p = alloc((unsigned)STRLEN(line) - stp->st_orglen
3430 						       + stp->st_wordlen + 1);
3431 	if (p != NULL)
3432 	{
3433 	    c = (int)(sug.su_badptr - line);
3434 	    mch_memmove(p, line, c);
3435 	    STRCPY(p + c, stp->st_word);
3436 	    STRCAT(p, sug.su_badptr + stp->st_orglen);
3437 	    ml_replace(curwin->w_cursor.lnum, p, FALSE);
3438 	    curwin->w_cursor.col = c;
3439 
3440 	    /* For redo we use a change-word command. */
3441 	    ResetRedobuff();
3442 	    AppendToRedobuff((char_u *)"ciw");
3443 	    AppendToRedobuffLit(p + c,
3444 			    stp->st_wordlen + sug.su_badlen - stp->st_orglen);
3445 	    AppendCharToRedobuff(ESC);
3446 
3447 	    /* After this "p" may be invalid. */
3448 	    changed_bytes(curwin->w_cursor.lnum, c);
3449 	}
3450     }
3451     else
3452 	curwin->w_cursor = prev_cursor;
3453 
3454     spell_find_cleanup(&sug);
3455 skip:
3456     vim_free(line);
3457 }
3458 
3459 /*
3460  * Check if the word at line "lnum" column "col" is required to start with a
3461  * capital.  This uses 'spellcapcheck' of the current buffer.
3462  */
3463     static int
3464 check_need_cap(linenr_T lnum, colnr_T col)
3465 {
3466     int		need_cap = FALSE;
3467     char_u	*line;
3468     char_u	*line_copy = NULL;
3469     char_u	*p;
3470     colnr_T	endcol;
3471     regmatch_T	regmatch;
3472 
3473     if (curwin->w_s->b_cap_prog == NULL)
3474 	return FALSE;
3475 
3476     line = ml_get_curline();
3477     endcol = 0;
3478     if (getwhitecols(line) >= (int)col)
3479     {
3480 	/* At start of line, check if previous line is empty or sentence
3481 	 * ends there. */
3482 	if (lnum == 1)
3483 	    need_cap = TRUE;
3484 	else
3485 	{
3486 	    line = ml_get(lnum - 1);
3487 	    if (*skipwhite(line) == NUL)
3488 		need_cap = TRUE;
3489 	    else
3490 	    {
3491 		/* Append a space in place of the line break. */
3492 		line_copy = concat_str(line, (char_u *)" ");
3493 		line = line_copy;
3494 		endcol = (colnr_T)STRLEN(line);
3495 	    }
3496 	}
3497     }
3498     else
3499 	endcol = col;
3500 
3501     if (endcol > 0)
3502     {
3503 	/* Check if sentence ends before the bad word. */
3504 	regmatch.regprog = curwin->w_s->b_cap_prog;
3505 	regmatch.rm_ic = FALSE;
3506 	p = line + endcol;
3507 	for (;;)
3508 	{
3509 	    MB_PTR_BACK(line, p);
3510 	    if (p == line || spell_iswordp_nmw(p, curwin))
3511 		break;
3512 	    if (vim_regexec(&regmatch, p, 0)
3513 					 && regmatch.endp[0] == line + endcol)
3514 	    {
3515 		need_cap = TRUE;
3516 		break;
3517 	    }
3518 	}
3519 	curwin->w_s->b_cap_prog = regmatch.regprog;
3520     }
3521 
3522     vim_free(line_copy);
3523 
3524     return need_cap;
3525 }
3526 
3527 
3528 /*
3529  * ":spellrepall"
3530  */
3531     void
3532 ex_spellrepall(exarg_T *eap UNUSED)
3533 {
3534     pos_T	pos = curwin->w_cursor;
3535     char_u	*frompat;
3536     int		addlen;
3537     char_u	*line;
3538     char_u	*p;
3539     int		save_ws = p_ws;
3540     linenr_T	prev_lnum = 0;
3541 
3542     if (repl_from == NULL || repl_to == NULL)
3543     {
3544 	emsg(_("E752: No previous spell replacement"));
3545 	return;
3546     }
3547     addlen = (int)(STRLEN(repl_to) - STRLEN(repl_from));
3548 
3549     frompat = alloc((unsigned)STRLEN(repl_from) + 7);
3550     if (frompat == NULL)
3551 	return;
3552     sprintf((char *)frompat, "\\V\\<%s\\>", repl_from);
3553     p_ws = FALSE;
3554 
3555     sub_nsubs = 0;
3556     sub_nlines = 0;
3557     curwin->w_cursor.lnum = 0;
3558     while (!got_int)
3559     {
3560 	if (do_search(NULL, '/', frompat, 1L, SEARCH_KEEP, NULL, NULL) == 0
3561 						   || u_save_cursor() == FAIL)
3562 	    break;
3563 
3564 	/* Only replace when the right word isn't there yet.  This happens
3565 	 * when changing "etc" to "etc.". */
3566 	line = ml_get_curline();
3567 	if (addlen <= 0 || STRNCMP(line + curwin->w_cursor.col,
3568 					       repl_to, STRLEN(repl_to)) != 0)
3569 	{
3570 	    p = alloc((unsigned)STRLEN(line) + addlen + 1);
3571 	    if (p == NULL)
3572 		break;
3573 	    mch_memmove(p, line, curwin->w_cursor.col);
3574 	    STRCPY(p + curwin->w_cursor.col, repl_to);
3575 	    STRCAT(p, line + curwin->w_cursor.col + STRLEN(repl_from));
3576 	    ml_replace(curwin->w_cursor.lnum, p, FALSE);
3577 	    changed_bytes(curwin->w_cursor.lnum, curwin->w_cursor.col);
3578 
3579 	    if (curwin->w_cursor.lnum != prev_lnum)
3580 	    {
3581 		++sub_nlines;
3582 		prev_lnum = curwin->w_cursor.lnum;
3583 	    }
3584 	    ++sub_nsubs;
3585 	}
3586 	curwin->w_cursor.col += (colnr_T)STRLEN(repl_to);
3587     }
3588 
3589     p_ws = save_ws;
3590     curwin->w_cursor = pos;
3591     vim_free(frompat);
3592 
3593     if (sub_nsubs == 0)
3594 	semsg(_("E753: Not found: %s"), repl_from);
3595     else
3596 	do_sub_msg(FALSE);
3597 }
3598 
3599 /*
3600  * Find spell suggestions for "word".  Return them in the growarray "*gap" as
3601  * a list of allocated strings.
3602  */
3603     void
3604 spell_suggest_list(
3605     garray_T	*gap,
3606     char_u	*word,
3607     int		maxcount,	/* maximum nr of suggestions */
3608     int		need_cap,	/* 'spellcapcheck' matched */
3609     int		interactive)
3610 {
3611     suginfo_T	sug;
3612     int		i;
3613     suggest_T	*stp;
3614     char_u	*wcopy;
3615 
3616     spell_find_suggest(word, 0, &sug, maxcount, FALSE, need_cap, interactive);
3617 
3618     /* Make room in "gap". */
3619     ga_init2(gap, sizeof(char_u *), sug.su_ga.ga_len + 1);
3620     if (ga_grow(gap, sug.su_ga.ga_len) == OK)
3621     {
3622 	for (i = 0; i < sug.su_ga.ga_len; ++i)
3623 	{
3624 	    stp = &SUG(sug.su_ga, i);
3625 
3626 	    /* The suggested word may replace only part of "word", add the not
3627 	     * replaced part. */
3628 	    wcopy = alloc(stp->st_wordlen
3629 		      + (unsigned)STRLEN(sug.su_badptr + stp->st_orglen) + 1);
3630 	    if (wcopy == NULL)
3631 		break;
3632 	    STRCPY(wcopy, stp->st_word);
3633 	    STRCPY(wcopy + stp->st_wordlen, sug.su_badptr + stp->st_orglen);
3634 	    ((char_u **)gap->ga_data)[gap->ga_len++] = wcopy;
3635 	}
3636     }
3637 
3638     spell_find_cleanup(&sug);
3639 }
3640 
3641 /*
3642  * Find spell suggestions for the word at the start of "badptr".
3643  * Return the suggestions in "su->su_ga".
3644  * The maximum number of suggestions is "maxcount".
3645  * Note: does use info for the current window.
3646  * This is based on the mechanisms of Aspell, but completely reimplemented.
3647  */
3648     static void
3649 spell_find_suggest(
3650     char_u	*badptr,
3651     int		badlen,		/* length of bad word or 0 if unknown */
3652     suginfo_T	*su,
3653     int		maxcount,
3654     int		banbadword,	/* don't include badword in suggestions */
3655     int		need_cap,	/* word should start with capital */
3656     int		interactive)
3657 {
3658     hlf_T	attr = HLF_COUNT;
3659     char_u	buf[MAXPATHL];
3660     char_u	*p;
3661     int		do_combine = FALSE;
3662     char_u	*sps_copy;
3663 #ifdef FEAT_EVAL
3664     static int	expr_busy = FALSE;
3665 #endif
3666     int		c;
3667     int		i;
3668     langp_T	*lp;
3669 
3670     /*
3671      * Set the info in "*su".
3672      */
3673     vim_memset(su, 0, sizeof(suginfo_T));
3674     ga_init2(&su->su_ga, (int)sizeof(suggest_T), 10);
3675     ga_init2(&su->su_sga, (int)sizeof(suggest_T), 10);
3676     if (*badptr == NUL)
3677 	return;
3678     hash_init(&su->su_banned);
3679 
3680     su->su_badptr = badptr;
3681     if (badlen != 0)
3682 	su->su_badlen = badlen;
3683     else
3684 	su->su_badlen = spell_check(curwin, su->su_badptr, &attr, NULL, FALSE);
3685     su->su_maxcount = maxcount;
3686     su->su_maxscore = SCORE_MAXINIT;
3687 
3688     if (su->su_badlen >= MAXWLEN)
3689 	su->su_badlen = MAXWLEN - 1;	/* just in case */
3690     vim_strncpy(su->su_badword, su->su_badptr, su->su_badlen);
3691     (void)spell_casefold(su->su_badptr, su->su_badlen,
3692 						    su->su_fbadword, MAXWLEN);
3693     /* TODO: make this work if the case-folded text is longer than the original
3694      * text. Currently an illegal byte causes wrong pointer computations. */
3695     su->su_fbadword[su->su_badlen] = NUL;
3696 
3697     /* get caps flags for bad word */
3698     su->su_badflags = badword_captype(su->su_badptr,
3699 					       su->su_badptr + su->su_badlen);
3700     if (need_cap)
3701 	su->su_badflags |= WF_ONECAP;
3702 
3703     /* Find the default language for sound folding.  We simply use the first
3704      * one in 'spelllang' that supports sound folding.  That's good for when
3705      * using multiple files for one language, it's not that bad when mixing
3706      * languages (e.g., "pl,en"). */
3707     for (i = 0; i < curbuf->b_s.b_langp.ga_len; ++i)
3708     {
3709 	lp = LANGP_ENTRY(curbuf->b_s.b_langp, i);
3710 	if (lp->lp_sallang != NULL)
3711 	{
3712 	    su->su_sallang = lp->lp_sallang;
3713 	    break;
3714 	}
3715     }
3716 
3717     /* Soundfold the bad word with the default sound folding, so that we don't
3718      * have to do this many times. */
3719     if (su->su_sallang != NULL)
3720 	spell_soundfold(su->su_sallang, su->su_fbadword, TRUE,
3721 							  su->su_sal_badword);
3722 
3723     /* If the word is not capitalised and spell_check() doesn't consider the
3724      * word to be bad then it might need to be capitalised.  Add a suggestion
3725      * for that. */
3726     c = PTR2CHAR(su->su_badptr);
3727     if (!SPELL_ISUPPER(c) && attr == HLF_COUNT)
3728     {
3729 	make_case_word(su->su_badword, buf, WF_ONECAP);
3730 	add_suggestion(su, &su->su_ga, buf, su->su_badlen, SCORE_ICASE,
3731 					      0, TRUE, su->su_sallang, FALSE);
3732     }
3733 
3734     /* Ban the bad word itself.  It may appear in another region. */
3735     if (banbadword)
3736 	add_banned(su, su->su_badword);
3737 
3738     /* Make a copy of 'spellsuggest', because the expression may change it. */
3739     sps_copy = vim_strsave(p_sps);
3740     if (sps_copy == NULL)
3741 	return;
3742 
3743     /* Loop over the items in 'spellsuggest'. */
3744     for (p = sps_copy; *p != NUL; )
3745     {
3746 	copy_option_part(&p, buf, MAXPATHL, ",");
3747 
3748 	if (STRNCMP(buf, "expr:", 5) == 0)
3749 	{
3750 #ifdef FEAT_EVAL
3751 	    /* Evaluate an expression.  Skip this when called recursively,
3752 	     * when using spellsuggest() in the expression. */
3753 	    if (!expr_busy)
3754 	    {
3755 		expr_busy = TRUE;
3756 		spell_suggest_expr(su, buf + 5);
3757 		expr_busy = FALSE;
3758 	    }
3759 #endif
3760 	}
3761 	else if (STRNCMP(buf, "file:", 5) == 0)
3762 	    /* Use list of suggestions in a file. */
3763 	    spell_suggest_file(su, buf + 5);
3764 	else
3765 	{
3766 	    /* Use internal method. */
3767 	    spell_suggest_intern(su, interactive);
3768 	    if (sps_flags & SPS_DOUBLE)
3769 		do_combine = TRUE;
3770 	}
3771     }
3772 
3773     vim_free(sps_copy);
3774 
3775     if (do_combine)
3776 	/* Combine the two list of suggestions.  This must be done last,
3777 	 * because sorting changes the order again. */
3778 	score_combine(su);
3779 }
3780 
3781 #ifdef FEAT_EVAL
3782 /*
3783  * Find suggestions by evaluating expression "expr".
3784  */
3785     static void
3786 spell_suggest_expr(suginfo_T *su, char_u *expr)
3787 {
3788     list_T	*list;
3789     listitem_T	*li;
3790     int		score;
3791     char_u	*p;
3792 
3793     /* The work is split up in a few parts to avoid having to export
3794      * suginfo_T.
3795      * First evaluate the expression and get the resulting list. */
3796     list = eval_spell_expr(su->su_badword, expr);
3797     if (list != NULL)
3798     {
3799 	/* Loop over the items in the list. */
3800 	for (li = list->lv_first; li != NULL; li = li->li_next)
3801 	    if (li->li_tv.v_type == VAR_LIST)
3802 	    {
3803 		/* Get the word and the score from the items. */
3804 		score = get_spellword(li->li_tv.vval.v_list, &p);
3805 		if (score >= 0 && score <= su->su_maxscore)
3806 		    add_suggestion(su, &su->su_ga, p, su->su_badlen,
3807 				       score, 0, TRUE, su->su_sallang, FALSE);
3808 	    }
3809 	list_unref(list);
3810     }
3811 
3812     /* Remove bogus suggestions, sort and truncate at "maxcount". */
3813     check_suggestions(su, &su->su_ga);
3814     (void)cleanup_suggestions(&su->su_ga, su->su_maxscore, su->su_maxcount);
3815 }
3816 #endif
3817 
3818 /*
3819  * Find suggestions in file "fname".  Used for "file:" in 'spellsuggest'.
3820  */
3821     static void
3822 spell_suggest_file(suginfo_T *su, char_u *fname)
3823 {
3824     FILE	*fd;
3825     char_u	line[MAXWLEN * 2];
3826     char_u	*p;
3827     int		len;
3828     char_u	cword[MAXWLEN];
3829 
3830     /* Open the file. */
3831     fd = mch_fopen((char *)fname, "r");
3832     if (fd == NULL)
3833     {
3834 	semsg(_(e_notopen), fname);
3835 	return;
3836     }
3837 
3838     /* Read it line by line. */
3839     while (!vim_fgets(line, MAXWLEN * 2, fd) && !got_int)
3840     {
3841 	line_breakcheck();
3842 
3843 	p = vim_strchr(line, '/');
3844 	if (p == NULL)
3845 	    continue;	    /* No Tab found, just skip the line. */
3846 	*p++ = NUL;
3847 	if (STRICMP(su->su_badword, line) == 0)
3848 	{
3849 	    /* Match!  Isolate the good word, until CR or NL. */
3850 	    for (len = 0; p[len] >= ' '; ++len)
3851 		;
3852 	    p[len] = NUL;
3853 
3854 	    /* If the suggestion doesn't have specific case duplicate the case
3855 	     * of the bad word. */
3856 	    if (captype(p, NULL) == 0)
3857 	    {
3858 		make_case_word(p, cword, su->su_badflags);
3859 		p = cword;
3860 	    }
3861 
3862 	    add_suggestion(su, &su->su_ga, p, su->su_badlen,
3863 				  SCORE_FILE, 0, TRUE, su->su_sallang, FALSE);
3864 	}
3865     }
3866 
3867     fclose(fd);
3868 
3869     /* Remove bogus suggestions, sort and truncate at "maxcount". */
3870     check_suggestions(su, &su->su_ga);
3871     (void)cleanup_suggestions(&su->su_ga, su->su_maxscore, su->su_maxcount);
3872 }
3873 
3874 /*
3875  * Find suggestions for the internal method indicated by "sps_flags".
3876  */
3877     static void
3878 spell_suggest_intern(suginfo_T *su, int interactive)
3879 {
3880     /*
3881      * Load the .sug file(s) that are available and not done yet.
3882      */
3883     suggest_load_files();
3884 
3885     /*
3886      * 1. Try special cases, such as repeating a word: "the the" -> "the".
3887      *
3888      * Set a maximum score to limit the combination of operations that is
3889      * tried.
3890      */
3891     suggest_try_special(su);
3892 
3893     /*
3894      * 2. Try inserting/deleting/swapping/changing a letter, use REP entries
3895      *    from the .aff file and inserting a space (split the word).
3896      */
3897     suggest_try_change(su);
3898 
3899     /* For the resulting top-scorers compute the sound-a-like score. */
3900     if (sps_flags & SPS_DOUBLE)
3901 	score_comp_sal(su);
3902 
3903     /*
3904      * 3. Try finding sound-a-like words.
3905      */
3906     if ((sps_flags & SPS_FAST) == 0)
3907     {
3908 	if (sps_flags & SPS_BEST)
3909 	    /* Adjust the word score for the suggestions found so far for how
3910 	     * they sounds like. */
3911 	    rescore_suggestions(su);
3912 
3913 	/*
3914 	 * While going through the soundfold tree "su_maxscore" is the score
3915 	 * for the soundfold word, limits the changes that are being tried,
3916 	 * and "su_sfmaxscore" the rescored score, which is set by
3917 	 * cleanup_suggestions().
3918 	 * First find words with a small edit distance, because this is much
3919 	 * faster and often already finds the top-N suggestions.  If we didn't
3920 	 * find many suggestions try again with a higher edit distance.
3921 	 * "sl_sounddone" is used to avoid doing the same word twice.
3922 	 */
3923 	suggest_try_soundalike_prep();
3924 	su->su_maxscore = SCORE_SFMAX1;
3925 	su->su_sfmaxscore = SCORE_MAXINIT * 3;
3926 	suggest_try_soundalike(su);
3927 	if (su->su_ga.ga_len < SUG_CLEAN_COUNT(su))
3928 	{
3929 	    /* We didn't find enough matches, try again, allowing more
3930 	     * changes to the soundfold word. */
3931 	    su->su_maxscore = SCORE_SFMAX2;
3932 	    suggest_try_soundalike(su);
3933 	    if (su->su_ga.ga_len < SUG_CLEAN_COUNT(su))
3934 	    {
3935 		/* Still didn't find enough matches, try again, allowing even
3936 		 * more changes to the soundfold word. */
3937 		su->su_maxscore = SCORE_SFMAX3;
3938 		suggest_try_soundalike(su);
3939 	    }
3940 	}
3941 	su->su_maxscore = su->su_sfmaxscore;
3942 	suggest_try_soundalike_finish();
3943     }
3944 
3945     /* When CTRL-C was hit while searching do show the results.  Only clear
3946      * got_int when using a command, not for spellsuggest(). */
3947     ui_breakcheck();
3948     if (interactive && got_int)
3949     {
3950 	(void)vgetc();
3951 	got_int = FALSE;
3952     }
3953 
3954     if ((sps_flags & SPS_DOUBLE) == 0 && su->su_ga.ga_len != 0)
3955     {
3956 	if (sps_flags & SPS_BEST)
3957 	    /* Adjust the word score for how it sounds like. */
3958 	    rescore_suggestions(su);
3959 
3960 	/* Remove bogus suggestions, sort and truncate at "maxcount". */
3961 	check_suggestions(su, &su->su_ga);
3962 	(void)cleanup_suggestions(&su->su_ga, su->su_maxscore, su->su_maxcount);
3963     }
3964 }
3965 
3966 /*
3967  * Free the info put in "*su" by spell_find_suggest().
3968  */
3969     static void
3970 spell_find_cleanup(suginfo_T *su)
3971 {
3972     int		i;
3973 
3974     /* Free the suggestions. */
3975     for (i = 0; i < su->su_ga.ga_len; ++i)
3976 	vim_free(SUG(su->su_ga, i).st_word);
3977     ga_clear(&su->su_ga);
3978     for (i = 0; i < su->su_sga.ga_len; ++i)
3979 	vim_free(SUG(su->su_sga, i).st_word);
3980     ga_clear(&su->su_sga);
3981 
3982     /* Free the banned words. */
3983     hash_clear_all(&su->su_banned, 0);
3984 }
3985 
3986 /*
3987  * Make a copy of "word", with the first letter upper or lower cased, to
3988  * "wcopy[MAXWLEN]".  "word" must not be empty.
3989  * The result is NUL terminated.
3990  */
3991     void
3992 onecap_copy(
3993     char_u	*word,
3994     char_u	*wcopy,
3995     int		upper)	    /* TRUE: first letter made upper case */
3996 {
3997     char_u	*p;
3998     int		c;
3999     int		l;
4000 
4001     p = word;
4002     if (has_mbyte)
4003 	c = mb_cptr2char_adv(&p);
4004     else
4005 	c = *p++;
4006     if (upper)
4007 	c = SPELL_TOUPPER(c);
4008     else
4009 	c = SPELL_TOFOLD(c);
4010     if (has_mbyte)
4011 	l = mb_char2bytes(c, wcopy);
4012     else
4013     {
4014 	l = 1;
4015 	wcopy[0] = c;
4016     }
4017     vim_strncpy(wcopy + l, p, MAXWLEN - l - 1);
4018 }
4019 
4020 /*
4021  * Make a copy of "word" with all the letters upper cased into
4022  * "wcopy[MAXWLEN]".  The result is NUL terminated.
4023  */
4024     static void
4025 allcap_copy(char_u *word, char_u *wcopy)
4026 {
4027     char_u	*s;
4028     char_u	*d;
4029     int		c;
4030 
4031     d = wcopy;
4032     for (s = word; *s != NUL; )
4033     {
4034 	if (has_mbyte)
4035 	    c = mb_cptr2char_adv(&s);
4036 	else
4037 	    c = *s++;
4038 
4039 	/* We only change 0xdf to SS when we are certain latin1 is used.  It
4040 	 * would cause weird errors in other 8-bit encodings. */
4041 	if (enc_latin1like && c == 0xdf)
4042 	{
4043 	    c = 'S';
4044 	    if (d - wcopy >= MAXWLEN - 1)
4045 		break;
4046 	    *d++ = c;
4047 	}
4048 	else
4049 	    c = SPELL_TOUPPER(c);
4050 
4051 	if (has_mbyte)
4052 	{
4053 	    if (d - wcopy >= MAXWLEN - MB_MAXBYTES)
4054 		break;
4055 	    d += mb_char2bytes(c, d);
4056 	}
4057 	else
4058 	{
4059 	    if (d - wcopy >= MAXWLEN - 1)
4060 		break;
4061 	    *d++ = c;
4062 	}
4063     }
4064     *d = NUL;
4065 }
4066 
4067 /*
4068  * Try finding suggestions by recognizing specific situations.
4069  */
4070     static void
4071 suggest_try_special(suginfo_T *su)
4072 {
4073     char_u	*p;
4074     size_t	len;
4075     int		c;
4076     char_u	word[MAXWLEN];
4077 
4078     /*
4079      * Recognize a word that is repeated: "the the".
4080      */
4081     p = skiptowhite(su->su_fbadword);
4082     len = p - su->su_fbadword;
4083     p = skipwhite(p);
4084     if (STRLEN(p) == len && STRNCMP(su->su_fbadword, p, len) == 0)
4085     {
4086 	/* Include badflags: if the badword is onecap or allcap
4087 	 * use that for the goodword too: "The the" -> "The". */
4088 	c = su->su_fbadword[len];
4089 	su->su_fbadword[len] = NUL;
4090 	make_case_word(su->su_fbadword, word, su->su_badflags);
4091 	su->su_fbadword[len] = c;
4092 
4093 	/* Give a soundalike score of 0, compute the score as if deleting one
4094 	 * character. */
4095 	add_suggestion(su, &su->su_ga, word, su->su_badlen,
4096 		       RESCORE(SCORE_REP, 0), 0, TRUE, su->su_sallang, FALSE);
4097     }
4098 }
4099 
4100 /*
4101  * Change the 0 to 1 to measure how much time is spent in each state.
4102  * Output is dumped in "suggestprof".
4103  */
4104 #if 0
4105 # define SUGGEST_PROFILE
4106 proftime_T current;
4107 proftime_T total;
4108 proftime_T times[STATE_FINAL + 1];
4109 long counts[STATE_FINAL + 1];
4110 
4111     static void
4112 prof_init(void)
4113 {
4114     for (int i = 0; i <= STATE_FINAL; ++i)
4115     {
4116 	profile_zero(&times[i]);
4117 	counts[i] = 0;
4118     }
4119     profile_start(&current);
4120     profile_start(&total);
4121 }
4122 
4123 /* call before changing state */
4124     static void
4125 prof_store(state_T state)
4126 {
4127     profile_end(&current);
4128     profile_add(&times[state], &current);
4129     ++counts[state];
4130     profile_start(&current);
4131 }
4132 # define PROF_STORE(state) prof_store(state);
4133 
4134     static void
4135 prof_report(char *name)
4136 {
4137     FILE *fd = fopen("suggestprof", "a");
4138 
4139     profile_end(&total);
4140     fprintf(fd, "-----------------------\n");
4141     fprintf(fd, "%s: %s\n", name, profile_msg(&total));
4142     for (int i = 0; i <= STATE_FINAL; ++i)
4143 	fprintf(fd, "%d: %s (%ld)\n", i, profile_msg(&times[i]), counts[i]);
4144     fclose(fd);
4145 }
4146 #else
4147 # define PROF_STORE(state)
4148 #endif
4149 
4150 /*
4151  * Try finding suggestions by adding/removing/swapping letters.
4152  */
4153     static void
4154 suggest_try_change(suginfo_T *su)
4155 {
4156     char_u	fword[MAXWLEN];	    /* copy of the bad word, case-folded */
4157     int		n;
4158     char_u	*p;
4159     int		lpi;
4160     langp_T	*lp;
4161 
4162     /* We make a copy of the case-folded bad word, so that we can modify it
4163      * to find matches (esp. REP items).  Append some more text, changing
4164      * chars after the bad word may help. */
4165     STRCPY(fword, su->su_fbadword);
4166     n = (int)STRLEN(fword);
4167     p = su->su_badptr + su->su_badlen;
4168     (void)spell_casefold(p, (int)STRLEN(p), fword + n, MAXWLEN - n);
4169 
4170     for (lpi = 0; lpi < curwin->w_s->b_langp.ga_len; ++lpi)
4171     {
4172 	lp = LANGP_ENTRY(curwin->w_s->b_langp, lpi);
4173 
4174 	/* If reloading a spell file fails it's still in the list but
4175 	 * everything has been cleared. */
4176 	if (lp->lp_slang->sl_fbyts == NULL)
4177 	    continue;
4178 
4179 	/* Try it for this language.  Will add possible suggestions. */
4180 #ifdef SUGGEST_PROFILE
4181 	prof_init();
4182 #endif
4183 	suggest_trie_walk(su, lp, fword, FALSE);
4184 #ifdef SUGGEST_PROFILE
4185 	prof_report("try_change");
4186 #endif
4187     }
4188 }
4189 
4190 /* Check the maximum score, if we go over it we won't try this change. */
4191 #define TRY_DEEPER(su, stack, depth, add) \
4192 		(stack[depth].ts_score + (add) < su->su_maxscore)
4193 
4194 /*
4195  * Try finding suggestions by adding/removing/swapping letters.
4196  *
4197  * This uses a state machine.  At each node in the tree we try various
4198  * operations.  When trying if an operation works "depth" is increased and the
4199  * stack[] is used to store info.  This allows combinations, thus insert one
4200  * character, replace one and delete another.  The number of changes is
4201  * limited by su->su_maxscore.
4202  *
4203  * After implementing this I noticed an article by Kemal Oflazer that
4204  * describes something similar: "Error-tolerant Finite State Recognition with
4205  * Applications to Morphological Analysis and Spelling Correction" (1996).
4206  * The implementation in the article is simplified and requires a stack of
4207  * unknown depth.  The implementation here only needs a stack depth equal to
4208  * the length of the word.
4209  *
4210  * This is also used for the sound-folded word, "soundfold" is TRUE then.
4211  * The mechanism is the same, but we find a match with a sound-folded word
4212  * that comes from one or more original words.  Each of these words may be
4213  * added, this is done by add_sound_suggest().
4214  * Don't use:
4215  *	the prefix tree or the keep-case tree
4216  *	"su->su_badlen"
4217  *	anything to do with upper and lower case
4218  *	anything to do with word or non-word characters ("spell_iswordp()")
4219  *	banned words
4220  *	word flags (rare, region, compounding)
4221  *	word splitting for now
4222  *	"similar_chars()"
4223  *	use "slang->sl_repsal" instead of "lp->lp_replang->sl_rep"
4224  */
4225     static void
4226 suggest_trie_walk(
4227     suginfo_T	*su,
4228     langp_T	*lp,
4229     char_u	*fword,
4230     int		soundfold)
4231 {
4232     char_u	tword[MAXWLEN];	    /* good word collected so far */
4233     trystate_T	stack[MAXWLEN];
4234     char_u	preword[MAXWLEN * 3]; /* word found with proper case;
4235 				       * concatenation of prefix compound
4236 				       * words and split word.  NUL terminated
4237 				       * when going deeper but not when coming
4238 				       * back. */
4239     char_u	compflags[MAXWLEN];	/* compound flags, one for each word */
4240     trystate_T	*sp;
4241     int		newscore;
4242     int		score;
4243     char_u	*byts, *fbyts, *pbyts;
4244     idx_T	*idxs, *fidxs, *pidxs;
4245     int		depth;
4246     int		c, c2, c3;
4247     int		n = 0;
4248     int		flags;
4249     garray_T	*gap;
4250     idx_T	arridx;
4251     int		len;
4252     char_u	*p;
4253     fromto_T	*ftp;
4254     int		fl = 0, tl;
4255     int		repextra = 0;	    /* extra bytes in fword[] from REP item */
4256     slang_T	*slang = lp->lp_slang;
4257     int		fword_ends;
4258     int		goodword_ends;
4259 #ifdef DEBUG_TRIEWALK
4260     /* Stores the name of the change made at each level. */
4261     char_u	changename[MAXWLEN][80];
4262 #endif
4263     int		breakcheckcount = 1000;
4264     int		compound_ok;
4265 
4266     /*
4267      * Go through the whole case-fold tree, try changes at each node.
4268      * "tword[]" contains the word collected from nodes in the tree.
4269      * "fword[]" the word we are trying to match with (initially the bad
4270      * word).
4271      */
4272     depth = 0;
4273     sp = &stack[0];
4274     vim_memset(sp, 0, sizeof(trystate_T));
4275     sp->ts_curi = 1;
4276 
4277     if (soundfold)
4278     {
4279 	/* Going through the soundfold tree. */
4280 	byts = fbyts = slang->sl_sbyts;
4281 	idxs = fidxs = slang->sl_sidxs;
4282 	pbyts = NULL;
4283 	pidxs = NULL;
4284 	sp->ts_prefixdepth = PFD_NOPREFIX;
4285 	sp->ts_state = STATE_START;
4286     }
4287     else
4288     {
4289 	/*
4290 	 * When there are postponed prefixes we need to use these first.  At
4291 	 * the end of the prefix we continue in the case-fold tree.
4292 	 */
4293 	fbyts = slang->sl_fbyts;
4294 	fidxs = slang->sl_fidxs;
4295 	pbyts = slang->sl_pbyts;
4296 	pidxs = slang->sl_pidxs;
4297 	if (pbyts != NULL)
4298 	{
4299 	    byts = pbyts;
4300 	    idxs = pidxs;
4301 	    sp->ts_prefixdepth = PFD_PREFIXTREE;
4302 	    sp->ts_state = STATE_NOPREFIX;	/* try without prefix first */
4303 	}
4304 	else
4305 	{
4306 	    byts = fbyts;
4307 	    idxs = fidxs;
4308 	    sp->ts_prefixdepth = PFD_NOPREFIX;
4309 	    sp->ts_state = STATE_START;
4310 	}
4311     }
4312 
4313     /*
4314      * Loop to find all suggestions.  At each round we either:
4315      * - For the current state try one operation, advance "ts_curi",
4316      *   increase "depth".
4317      * - When a state is done go to the next, set "ts_state".
4318      * - When all states are tried decrease "depth".
4319      */
4320     while (depth >= 0 && !got_int)
4321     {
4322 	sp = &stack[depth];
4323 	switch (sp->ts_state)
4324 	{
4325 	case STATE_START:
4326 	case STATE_NOPREFIX:
4327 	    /*
4328 	     * Start of node: Deal with NUL bytes, which means
4329 	     * tword[] may end here.
4330 	     */
4331 	    arridx = sp->ts_arridx;	    /* current node in the tree */
4332 	    len = byts[arridx];		    /* bytes in this node */
4333 	    arridx += sp->ts_curi;	    /* index of current byte */
4334 
4335 	    if (sp->ts_prefixdepth == PFD_PREFIXTREE)
4336 	    {
4337 		/* Skip over the NUL bytes, we use them later. */
4338 		for (n = 0; n < len && byts[arridx + n] == 0; ++n)
4339 		    ;
4340 		sp->ts_curi += n;
4341 
4342 		/* Always past NUL bytes now. */
4343 		n = (int)sp->ts_state;
4344 		PROF_STORE(sp->ts_state)
4345 		sp->ts_state = STATE_ENDNUL;
4346 		sp->ts_save_badflags = su->su_badflags;
4347 
4348 		/* At end of a prefix or at start of prefixtree: check for
4349 		 * following word. */
4350 		if (byts[arridx] == 0 || n == (int)STATE_NOPREFIX)
4351 		{
4352 		    /* Set su->su_badflags to the caps type at this position.
4353 		     * Use the caps type until here for the prefix itself. */
4354 		    if (has_mbyte)
4355 			n = nofold_len(fword, sp->ts_fidx, su->su_badptr);
4356 		    else
4357 			n = sp->ts_fidx;
4358 		    flags = badword_captype(su->su_badptr, su->su_badptr + n);
4359 		    su->su_badflags = badword_captype(su->su_badptr + n,
4360 					       su->su_badptr + su->su_badlen);
4361 #ifdef DEBUG_TRIEWALK
4362 		    sprintf(changename[depth], "prefix");
4363 #endif
4364 		    go_deeper(stack, depth, 0);
4365 		    ++depth;
4366 		    sp = &stack[depth];
4367 		    sp->ts_prefixdepth = depth - 1;
4368 		    byts = fbyts;
4369 		    idxs = fidxs;
4370 		    sp->ts_arridx = 0;
4371 
4372 		    /* Move the prefix to preword[] with the right case
4373 		     * and make find_keepcap_word() works. */
4374 		    tword[sp->ts_twordlen] = NUL;
4375 		    make_case_word(tword + sp->ts_splitoff,
4376 					  preword + sp->ts_prewordlen, flags);
4377 		    sp->ts_prewordlen = (char_u)STRLEN(preword);
4378 		    sp->ts_splitoff = sp->ts_twordlen;
4379 		}
4380 		break;
4381 	    }
4382 
4383 	    if (sp->ts_curi > len || byts[arridx] != 0)
4384 	    {
4385 		/* Past bytes in node and/or past NUL bytes. */
4386 		PROF_STORE(sp->ts_state)
4387 		sp->ts_state = STATE_ENDNUL;
4388 		sp->ts_save_badflags = su->su_badflags;
4389 		break;
4390 	    }
4391 
4392 	    /*
4393 	     * End of word in tree.
4394 	     */
4395 	    ++sp->ts_curi;		/* eat one NUL byte */
4396 
4397 	    flags = (int)idxs[arridx];
4398 
4399 	    /* Skip words with the NOSUGGEST flag. */
4400 	    if (flags & WF_NOSUGGEST)
4401 		break;
4402 
4403 	    fword_ends = (fword[sp->ts_fidx] == NUL
4404 			   || (soundfold
4405 			       ? VIM_ISWHITE(fword[sp->ts_fidx])
4406 			       : !spell_iswordp(fword + sp->ts_fidx, curwin)));
4407 	    tword[sp->ts_twordlen] = NUL;
4408 
4409 	    if (sp->ts_prefixdepth <= PFD_NOTSPECIAL
4410 					&& (sp->ts_flags & TSF_PREFIXOK) == 0)
4411 	    {
4412 		/* There was a prefix before the word.  Check that the prefix
4413 		 * can be used with this word. */
4414 		/* Count the length of the NULs in the prefix.  If there are
4415 		 * none this must be the first try without a prefix.  */
4416 		n = stack[sp->ts_prefixdepth].ts_arridx;
4417 		len = pbyts[n++];
4418 		for (c = 0; c < len && pbyts[n + c] == 0; ++c)
4419 		    ;
4420 		if (c > 0)
4421 		{
4422 		    c = valid_word_prefix(c, n, flags,
4423 				       tword + sp->ts_splitoff, slang, FALSE);
4424 		    if (c == 0)
4425 			break;
4426 
4427 		    /* Use the WF_RARE flag for a rare prefix. */
4428 		    if (c & WF_RAREPFX)
4429 			flags |= WF_RARE;
4430 
4431 		    /* Tricky: when checking for both prefix and compounding
4432 		     * we run into the prefix flag first.
4433 		     * Remember that it's OK, so that we accept the prefix
4434 		     * when arriving at a compound flag. */
4435 		    sp->ts_flags |= TSF_PREFIXOK;
4436 		}
4437 	    }
4438 
4439 	    /* Check NEEDCOMPOUND: can't use word without compounding.  Do try
4440 	     * appending another compound word below. */
4441 	    if (sp->ts_complen == sp->ts_compsplit && fword_ends
4442 						     && (flags & WF_NEEDCOMP))
4443 		goodword_ends = FALSE;
4444 	    else
4445 		goodword_ends = TRUE;
4446 
4447 	    p = NULL;
4448 	    compound_ok = TRUE;
4449 	    if (sp->ts_complen > sp->ts_compsplit)
4450 	    {
4451 		if (slang->sl_nobreak)
4452 		{
4453 		    /* There was a word before this word.  When there was no
4454 		     * change in this word (it was correct) add the first word
4455 		     * as a suggestion.  If this word was corrected too, we
4456 		     * need to check if a correct word follows. */
4457 		    if (sp->ts_fidx - sp->ts_splitfidx
4458 					  == sp->ts_twordlen - sp->ts_splitoff
4459 			    && STRNCMP(fword + sp->ts_splitfidx,
4460 					tword + sp->ts_splitoff,
4461 					 sp->ts_fidx - sp->ts_splitfidx) == 0)
4462 		    {
4463 			preword[sp->ts_prewordlen] = NUL;
4464 			newscore = score_wordcount_adj(slang, sp->ts_score,
4465 						 preword + sp->ts_prewordlen,
4466 						 sp->ts_prewordlen > 0);
4467 			/* Add the suggestion if the score isn't too bad. */
4468 			if (newscore <= su->su_maxscore)
4469 			    add_suggestion(su, &su->su_ga, preword,
4470 				    sp->ts_splitfidx - repextra,
4471 				    newscore, 0, FALSE,
4472 				    lp->lp_sallang, FALSE);
4473 			break;
4474 		    }
4475 		}
4476 		else
4477 		{
4478 		    /* There was a compound word before this word.  If this
4479 		     * word does not support compounding then give up
4480 		     * (splitting is tried for the word without compound
4481 		     * flag). */
4482 		    if (((unsigned)flags >> 24) == 0
4483 			    || sp->ts_twordlen - sp->ts_splitoff
4484 						       < slang->sl_compminlen)
4485 			break;
4486 		    /* For multi-byte chars check character length against
4487 		     * COMPOUNDMIN. */
4488 		    if (has_mbyte
4489 			    && slang->sl_compminlen > 0
4490 			    && mb_charlen(tword + sp->ts_splitoff)
4491 						       < slang->sl_compminlen)
4492 			break;
4493 
4494 		    compflags[sp->ts_complen] = ((unsigned)flags >> 24);
4495 		    compflags[sp->ts_complen + 1] = NUL;
4496 		    vim_strncpy(preword + sp->ts_prewordlen,
4497 			    tword + sp->ts_splitoff,
4498 			    sp->ts_twordlen - sp->ts_splitoff);
4499 
4500 		    /* Verify CHECKCOMPOUNDPATTERN  rules. */
4501 		    if (match_checkcompoundpattern(preword,  sp->ts_prewordlen,
4502 							  &slang->sl_comppat))
4503 			compound_ok = FALSE;
4504 
4505 		    if (compound_ok)
4506 		    {
4507 			p = preword;
4508 			while (*skiptowhite(p) != NUL)
4509 			    p = skipwhite(skiptowhite(p));
4510 			if (fword_ends && !can_compound(slang, p,
4511 						compflags + sp->ts_compsplit))
4512 			    /* Compound is not allowed.  But it may still be
4513 			     * possible if we add another (short) word. */
4514 			    compound_ok = FALSE;
4515 		    }
4516 
4517 		    /* Get pointer to last char of previous word. */
4518 		    p = preword + sp->ts_prewordlen;
4519 		    MB_PTR_BACK(preword, p);
4520 		}
4521 	    }
4522 
4523 	    /*
4524 	     * Form the word with proper case in preword.
4525 	     * If there is a word from a previous split, append.
4526 	     * For the soundfold tree don't change the case, simply append.
4527 	     */
4528 	    if (soundfold)
4529 		STRCPY(preword + sp->ts_prewordlen, tword + sp->ts_splitoff);
4530 	    else if (flags & WF_KEEPCAP)
4531 		/* Must find the word in the keep-case tree. */
4532 		find_keepcap_word(slang, tword + sp->ts_splitoff,
4533 						 preword + sp->ts_prewordlen);
4534 	    else
4535 	    {
4536 		/* Include badflags: If the badword is onecap or allcap
4537 		 * use that for the goodword too.  But if the badword is
4538 		 * allcap and it's only one char long use onecap. */
4539 		c = su->su_badflags;
4540 		if ((c & WF_ALLCAP)
4541 			&& su->su_badlen == (*mb_ptr2len)(su->su_badptr))
4542 		    c = WF_ONECAP;
4543 		c |= flags;
4544 
4545 		/* When appending a compound word after a word character don't
4546 		 * use Onecap. */
4547 		if (p != NULL && spell_iswordp_nmw(p, curwin))
4548 		    c &= ~WF_ONECAP;
4549 		make_case_word(tword + sp->ts_splitoff,
4550 					      preword + sp->ts_prewordlen, c);
4551 	    }
4552 
4553 	    if (!soundfold)
4554 	    {
4555 		/* Don't use a banned word.  It may appear again as a good
4556 		 * word, thus remember it. */
4557 		if (flags & WF_BANNED)
4558 		{
4559 		    add_banned(su, preword + sp->ts_prewordlen);
4560 		    break;
4561 		}
4562 		if ((sp->ts_complen == sp->ts_compsplit
4563 			    && WAS_BANNED(su, preword + sp->ts_prewordlen))
4564 						   || WAS_BANNED(su, preword))
4565 		{
4566 		    if (slang->sl_compprog == NULL)
4567 			break;
4568 		    /* the word so far was banned but we may try compounding */
4569 		    goodword_ends = FALSE;
4570 		}
4571 	    }
4572 
4573 	    newscore = 0;
4574 	    if (!soundfold)	/* soundfold words don't have flags */
4575 	    {
4576 		if ((flags & WF_REGION)
4577 			    && (((unsigned)flags >> 16) & lp->lp_region) == 0)
4578 		    newscore += SCORE_REGION;
4579 		if (flags & WF_RARE)
4580 		    newscore += SCORE_RARE;
4581 
4582 		if (!spell_valid_case(su->su_badflags,
4583 				  captype(preword + sp->ts_prewordlen, NULL)))
4584 		    newscore += SCORE_ICASE;
4585 	    }
4586 
4587 	    /* TODO: how about splitting in the soundfold tree? */
4588 	    if (fword_ends
4589 		    && goodword_ends
4590 		    && sp->ts_fidx >= sp->ts_fidxtry
4591 		    && compound_ok)
4592 	    {
4593 		/* The badword also ends: add suggestions. */
4594 #ifdef DEBUG_TRIEWALK
4595 		if (soundfold && STRCMP(preword, "smwrd") == 0)
4596 		{
4597 		    int	    j;
4598 
4599 		    /* print the stack of changes that brought us here */
4600 		    smsg("------ %s -------", fword);
4601 		    for (j = 0; j < depth; ++j)
4602 			smsg("%s", changename[j]);
4603 		}
4604 #endif
4605 		if (soundfold)
4606 		{
4607 		    /* For soundfolded words we need to find the original
4608 		     * words, the edit distance and then add them. */
4609 		    add_sound_suggest(su, preword, sp->ts_score, lp);
4610 		}
4611 		else if (sp->ts_fidx > 0)
4612 		{
4613 		    /* Give a penalty when changing non-word char to word
4614 		     * char, e.g., "thes," -> "these". */
4615 		    p = fword + sp->ts_fidx;
4616 		    MB_PTR_BACK(fword, p);
4617 		    if (!spell_iswordp(p, curwin))
4618 		    {
4619 			p = preword + STRLEN(preword);
4620 			MB_PTR_BACK(preword, p);
4621 			if (spell_iswordp(p, curwin))
4622 			    newscore += SCORE_NONWORD;
4623 		    }
4624 
4625 		    /* Give a bonus to words seen before. */
4626 		    score = score_wordcount_adj(slang,
4627 						sp->ts_score + newscore,
4628 						preword + sp->ts_prewordlen,
4629 						sp->ts_prewordlen > 0);
4630 
4631 		    /* Add the suggestion if the score isn't too bad. */
4632 		    if (score <= su->su_maxscore)
4633 		    {
4634 			add_suggestion(su, &su->su_ga, preword,
4635 				    sp->ts_fidx - repextra,
4636 				    score, 0, FALSE, lp->lp_sallang, FALSE);
4637 
4638 			if (su->su_badflags & WF_MIXCAP)
4639 			{
4640 			    /* We really don't know if the word should be
4641 			     * upper or lower case, add both. */
4642 			    c = captype(preword, NULL);
4643 			    if (c == 0 || c == WF_ALLCAP)
4644 			    {
4645 				make_case_word(tword + sp->ts_splitoff,
4646 					      preword + sp->ts_prewordlen,
4647 						      c == 0 ? WF_ALLCAP : 0);
4648 
4649 				add_suggestion(su, &su->su_ga, preword,
4650 					sp->ts_fidx - repextra,
4651 					score + SCORE_ICASE, 0, FALSE,
4652 					lp->lp_sallang, FALSE);
4653 			    }
4654 			}
4655 		    }
4656 		}
4657 	    }
4658 
4659 	    /*
4660 	     * Try word split and/or compounding.
4661 	     */
4662 	    if ((sp->ts_fidx >= sp->ts_fidxtry || fword_ends)
4663 		    /* Don't split halfway a character. */
4664 		    && (!has_mbyte || sp->ts_tcharlen == 0))
4665 	    {
4666 		int	try_compound;
4667 		int	try_split;
4668 
4669 		/* If past the end of the bad word don't try a split.
4670 		 * Otherwise try changing the next word.  E.g., find
4671 		 * suggestions for "the the" where the second "the" is
4672 		 * different.  It's done like a split.
4673 		 * TODO: word split for soundfold words */
4674 		try_split = (sp->ts_fidx - repextra < su->su_badlen)
4675 								&& !soundfold;
4676 
4677 		/* Get here in several situations:
4678 		 * 1. The word in the tree ends:
4679 		 *    If the word allows compounding try that.  Otherwise try
4680 		 *    a split by inserting a space.  For both check that a
4681 		 *    valid words starts at fword[sp->ts_fidx].
4682 		 *    For NOBREAK do like compounding to be able to check if
4683 		 *    the next word is valid.
4684 		 * 2. The badword does end, but it was due to a change (e.g.,
4685 		 *    a swap).  No need to split, but do check that the
4686 		 *    following word is valid.
4687 		 * 3. The badword and the word in the tree end.  It may still
4688 		 *    be possible to compound another (short) word.
4689 		 */
4690 		try_compound = FALSE;
4691 		if (!soundfold
4692 			&& !slang->sl_nocompoundsugs
4693 			&& slang->sl_compprog != NULL
4694 			&& ((unsigned)flags >> 24) != 0
4695 			&& sp->ts_twordlen - sp->ts_splitoff
4696 						       >= slang->sl_compminlen
4697 			&& (!has_mbyte
4698 			    || slang->sl_compminlen == 0
4699 			    || mb_charlen(tword + sp->ts_splitoff)
4700 						      >= slang->sl_compminlen)
4701 			&& (slang->sl_compsylmax < MAXWLEN
4702 			    || sp->ts_complen + 1 - sp->ts_compsplit
4703 							  < slang->sl_compmax)
4704 			&& (can_be_compound(sp, slang,
4705 					 compflags, ((unsigned)flags >> 24))))
4706 
4707 		{
4708 		    try_compound = TRUE;
4709 		    compflags[sp->ts_complen] = ((unsigned)flags >> 24);
4710 		    compflags[sp->ts_complen + 1] = NUL;
4711 		}
4712 
4713 		/* For NOBREAK we never try splitting, it won't make any word
4714 		 * valid. */
4715 		if (slang->sl_nobreak && !slang->sl_nocompoundsugs)
4716 		    try_compound = TRUE;
4717 
4718 		/* If we could add a compound word, and it's also possible to
4719 		 * split at this point, do the split first and set
4720 		 * TSF_DIDSPLIT to avoid doing it again. */
4721 		else if (!fword_ends
4722 			&& try_compound
4723 			&& (sp->ts_flags & TSF_DIDSPLIT) == 0)
4724 		{
4725 		    try_compound = FALSE;
4726 		    sp->ts_flags |= TSF_DIDSPLIT;
4727 		    --sp->ts_curi;	    /* do the same NUL again */
4728 		    compflags[sp->ts_complen] = NUL;
4729 		}
4730 		else
4731 		    sp->ts_flags &= ~TSF_DIDSPLIT;
4732 
4733 		if (try_split || try_compound)
4734 		{
4735 		    if (!try_compound && (!fword_ends || !goodword_ends))
4736 		    {
4737 			/* If we're going to split need to check that the
4738 			 * words so far are valid for compounding.  If there
4739 			 * is only one word it must not have the NEEDCOMPOUND
4740 			 * flag. */
4741 			if (sp->ts_complen == sp->ts_compsplit
4742 						     && (flags & WF_NEEDCOMP))
4743 			    break;
4744 			p = preword;
4745 			while (*skiptowhite(p) != NUL)
4746 			    p = skipwhite(skiptowhite(p));
4747 			if (sp->ts_complen > sp->ts_compsplit
4748 				&& !can_compound(slang, p,
4749 						compflags + sp->ts_compsplit))
4750 			    break;
4751 
4752 			if (slang->sl_nosplitsugs)
4753 			    newscore += SCORE_SPLIT_NO;
4754 			else
4755 			    newscore += SCORE_SPLIT;
4756 
4757 			/* Give a bonus to words seen before. */
4758 			newscore = score_wordcount_adj(slang, newscore,
4759 					   preword + sp->ts_prewordlen, TRUE);
4760 		    }
4761 
4762 		    if (TRY_DEEPER(su, stack, depth, newscore))
4763 		    {
4764 			go_deeper(stack, depth, newscore);
4765 #ifdef DEBUG_TRIEWALK
4766 			if (!try_compound && !fword_ends)
4767 			    sprintf(changename[depth], "%.*s-%s: split",
4768 				 sp->ts_twordlen, tword, fword + sp->ts_fidx);
4769 			else
4770 			    sprintf(changename[depth], "%.*s-%s: compound",
4771 				 sp->ts_twordlen, tword, fword + sp->ts_fidx);
4772 #endif
4773 			/* Save things to be restored at STATE_SPLITUNDO. */
4774 			sp->ts_save_badflags = su->su_badflags;
4775 			PROF_STORE(sp->ts_state)
4776 			sp->ts_state = STATE_SPLITUNDO;
4777 
4778 			++depth;
4779 			sp = &stack[depth];
4780 
4781 			/* Append a space to preword when splitting. */
4782 			if (!try_compound && !fword_ends)
4783 			    STRCAT(preword, " ");
4784 			sp->ts_prewordlen = (char_u)STRLEN(preword);
4785 			sp->ts_splitoff = sp->ts_twordlen;
4786 			sp->ts_splitfidx = sp->ts_fidx;
4787 
4788 			/* If the badword has a non-word character at this
4789 			 * position skip it.  That means replacing the
4790 			 * non-word character with a space.  Always skip a
4791 			 * character when the word ends.  But only when the
4792 			 * good word can end. */
4793 			if (((!try_compound && !spell_iswordp_nmw(fword
4794 							       + sp->ts_fidx,
4795 							       curwin))
4796 				    || fword_ends)
4797 				&& fword[sp->ts_fidx] != NUL
4798 				&& goodword_ends)
4799 			{
4800 			    int	    l;
4801 
4802 			    l = MB_PTR2LEN(fword + sp->ts_fidx);
4803 			    if (fword_ends)
4804 			    {
4805 				/* Copy the skipped character to preword. */
4806 				mch_memmove(preword + sp->ts_prewordlen,
4807 						      fword + sp->ts_fidx, l);
4808 				sp->ts_prewordlen += l;
4809 				preword[sp->ts_prewordlen] = NUL;
4810 			    }
4811 			    else
4812 				sp->ts_score -= SCORE_SPLIT - SCORE_SUBST;
4813 			    sp->ts_fidx += l;
4814 			}
4815 
4816 			/* When compounding include compound flag in
4817 			 * compflags[] (already set above).  When splitting we
4818 			 * may start compounding over again.  */
4819 			if (try_compound)
4820 			    ++sp->ts_complen;
4821 			else
4822 			    sp->ts_compsplit = sp->ts_complen;
4823 			sp->ts_prefixdepth = PFD_NOPREFIX;
4824 
4825 			/* set su->su_badflags to the caps type at this
4826 			 * position */
4827 			if (has_mbyte)
4828 			    n = nofold_len(fword, sp->ts_fidx, su->su_badptr);
4829 			else
4830 			    n = sp->ts_fidx;
4831 			su->su_badflags = badword_captype(su->su_badptr + n,
4832 					       su->su_badptr + su->su_badlen);
4833 
4834 			/* Restart at top of the tree. */
4835 			sp->ts_arridx = 0;
4836 
4837 			/* If there are postponed prefixes, try these too. */
4838 			if (pbyts != NULL)
4839 			{
4840 			    byts = pbyts;
4841 			    idxs = pidxs;
4842 			    sp->ts_prefixdepth = PFD_PREFIXTREE;
4843 			    PROF_STORE(sp->ts_state)
4844 			    sp->ts_state = STATE_NOPREFIX;
4845 			}
4846 		    }
4847 		}
4848 	    }
4849 	    break;
4850 
4851 	case STATE_SPLITUNDO:
4852 	    /* Undo the changes done for word split or compound word. */
4853 	    su->su_badflags = sp->ts_save_badflags;
4854 
4855 	    /* Continue looking for NUL bytes. */
4856 	    PROF_STORE(sp->ts_state)
4857 	    sp->ts_state = STATE_START;
4858 
4859 	    /* In case we went into the prefix tree. */
4860 	    byts = fbyts;
4861 	    idxs = fidxs;
4862 	    break;
4863 
4864 	case STATE_ENDNUL:
4865 	    /* Past the NUL bytes in the node. */
4866 	    su->su_badflags = sp->ts_save_badflags;
4867 	    if (fword[sp->ts_fidx] == NUL && sp->ts_tcharlen == 0)
4868 	    {
4869 		/* The badword ends, can't use STATE_PLAIN. */
4870 		PROF_STORE(sp->ts_state)
4871 		sp->ts_state = STATE_DEL;
4872 		break;
4873 	    }
4874 	    PROF_STORE(sp->ts_state)
4875 	    sp->ts_state = STATE_PLAIN;
4876 	    /* FALLTHROUGH */
4877 
4878 	case STATE_PLAIN:
4879 	    /*
4880 	     * Go over all possible bytes at this node, add each to tword[]
4881 	     * and use child node.  "ts_curi" is the index.
4882 	     */
4883 	    arridx = sp->ts_arridx;
4884 	    if (sp->ts_curi > byts[arridx])
4885 	    {
4886 		/* Done all bytes at this node, do next state.  When still at
4887 		 * already changed bytes skip the other tricks. */
4888 		PROF_STORE(sp->ts_state)
4889 		if (sp->ts_fidx >= sp->ts_fidxtry)
4890 		    sp->ts_state = STATE_DEL;
4891 		else
4892 		    sp->ts_state = STATE_FINAL;
4893 	    }
4894 	    else
4895 	    {
4896 		arridx += sp->ts_curi++;
4897 		c = byts[arridx];
4898 
4899 		/* Normal byte, go one level deeper.  If it's not equal to the
4900 		 * byte in the bad word adjust the score.  But don't even try
4901 		 * when the byte was already changed.  And don't try when we
4902 		 * just deleted this byte, accepting it is always cheaper than
4903 		 * delete + substitute. */
4904 		if (c == fword[sp->ts_fidx]
4905 			|| (sp->ts_tcharlen > 0 && sp->ts_isdiff != DIFF_NONE))
4906 		    newscore = 0;
4907 		else
4908 		    newscore = SCORE_SUBST;
4909 		if ((newscore == 0
4910 			    || (sp->ts_fidx >= sp->ts_fidxtry
4911 				&& ((sp->ts_flags & TSF_DIDDEL) == 0
4912 				    || c != fword[sp->ts_delidx])))
4913 			&& TRY_DEEPER(su, stack, depth, newscore))
4914 		{
4915 		    go_deeper(stack, depth, newscore);
4916 #ifdef DEBUG_TRIEWALK
4917 		    if (newscore > 0)
4918 			sprintf(changename[depth], "%.*s-%s: subst %c to %c",
4919 				sp->ts_twordlen, tword, fword + sp->ts_fidx,
4920 				fword[sp->ts_fidx], c);
4921 		    else
4922 			sprintf(changename[depth], "%.*s-%s: accept %c",
4923 				sp->ts_twordlen, tword, fword + sp->ts_fidx,
4924 				fword[sp->ts_fidx]);
4925 #endif
4926 		    ++depth;
4927 		    sp = &stack[depth];
4928 		    ++sp->ts_fidx;
4929 		    tword[sp->ts_twordlen++] = c;
4930 		    sp->ts_arridx = idxs[arridx];
4931 		    if (newscore == SCORE_SUBST)
4932 			sp->ts_isdiff = DIFF_YES;
4933 		    if (has_mbyte)
4934 		    {
4935 			/* Multi-byte characters are a bit complicated to
4936 			 * handle: They differ when any of the bytes differ
4937 			 * and then their length may also differ. */
4938 			if (sp->ts_tcharlen == 0)
4939 			{
4940 			    /* First byte. */
4941 			    sp->ts_tcharidx = 0;
4942 			    sp->ts_tcharlen = MB_BYTE2LEN(c);
4943 			    sp->ts_fcharstart = sp->ts_fidx - 1;
4944 			    sp->ts_isdiff = (newscore != 0)
4945 						       ? DIFF_YES : DIFF_NONE;
4946 			}
4947 			else if (sp->ts_isdiff == DIFF_INSERT)
4948 			    /* When inserting trail bytes don't advance in the
4949 			     * bad word. */
4950 			    --sp->ts_fidx;
4951 			if (++sp->ts_tcharidx == sp->ts_tcharlen)
4952 			{
4953 			    /* Last byte of character. */
4954 			    if (sp->ts_isdiff == DIFF_YES)
4955 			    {
4956 				/* Correct ts_fidx for the byte length of the
4957 				 * character (we didn't check that before). */
4958 				sp->ts_fidx = sp->ts_fcharstart
4959 					    + MB_PTR2LEN(
4960 						    fword + sp->ts_fcharstart);
4961 				/* For changing a composing character adjust
4962 				 * the score from SCORE_SUBST to
4963 				 * SCORE_SUBCOMP. */
4964 				if (enc_utf8
4965 					&& utf_iscomposing(
4966 					    utf_ptr2char(tword
4967 						+ sp->ts_twordlen
4968 							   - sp->ts_tcharlen))
4969 					&& utf_iscomposing(
4970 					    utf_ptr2char(fword
4971 							+ sp->ts_fcharstart)))
4972 				    sp->ts_score -=
4973 						  SCORE_SUBST - SCORE_SUBCOMP;
4974 
4975 				/* For a similar character adjust score from
4976 				 * SCORE_SUBST to SCORE_SIMILAR. */
4977 				else if (!soundfold
4978 					&& slang->sl_has_map
4979 					&& similar_chars(slang,
4980 					    mb_ptr2char(tword
4981 						+ sp->ts_twordlen
4982 							   - sp->ts_tcharlen),
4983 					    mb_ptr2char(fword
4984 							+ sp->ts_fcharstart)))
4985 				    sp->ts_score -=
4986 						  SCORE_SUBST - SCORE_SIMILAR;
4987 			    }
4988 			    else if (sp->ts_isdiff == DIFF_INSERT
4989 					 && sp->ts_twordlen > sp->ts_tcharlen)
4990 			    {
4991 				p = tword + sp->ts_twordlen - sp->ts_tcharlen;
4992 				c = mb_ptr2char(p);
4993 				if (enc_utf8 && utf_iscomposing(c))
4994 				{
4995 				    /* Inserting a composing char doesn't
4996 				     * count that much. */
4997 				    sp->ts_score -= SCORE_INS - SCORE_INSCOMP;
4998 				}
4999 				else
5000 				{
5001 				    /* If the previous character was the same,
5002 				     * thus doubling a character, give a bonus
5003 				     * to the score.  Also for the soundfold
5004 				     * tree (might seem illogical but does
5005 				     * give better scores). */
5006 				    MB_PTR_BACK(tword, p);
5007 				    if (c == mb_ptr2char(p))
5008 					sp->ts_score -= SCORE_INS
5009 							       - SCORE_INSDUP;
5010 				}
5011 			    }
5012 
5013 			    /* Starting a new char, reset the length. */
5014 			    sp->ts_tcharlen = 0;
5015 			}
5016 		    }
5017 		    else
5018 		    {
5019 			/* If we found a similar char adjust the score.
5020 			 * We do this after calling go_deeper() because
5021 			 * it's slow. */
5022 			if (newscore != 0
5023 				&& !soundfold
5024 				&& slang->sl_has_map
5025 				&& similar_chars(slang,
5026 						   c, fword[sp->ts_fidx - 1]))
5027 			    sp->ts_score -= SCORE_SUBST - SCORE_SIMILAR;
5028 		    }
5029 		}
5030 	    }
5031 	    break;
5032 
5033 	case STATE_DEL:
5034 	    /* When past the first byte of a multi-byte char don't try
5035 	     * delete/insert/swap a character. */
5036 	    if (has_mbyte && sp->ts_tcharlen > 0)
5037 	    {
5038 		PROF_STORE(sp->ts_state)
5039 		sp->ts_state = STATE_FINAL;
5040 		break;
5041 	    }
5042 	    /*
5043 	     * Try skipping one character in the bad word (delete it).
5044 	     */
5045 	    PROF_STORE(sp->ts_state)
5046 	    sp->ts_state = STATE_INS_PREP;
5047 	    sp->ts_curi = 1;
5048 	    if (soundfold && sp->ts_fidx == 0 && fword[sp->ts_fidx] == '*')
5049 		/* Deleting a vowel at the start of a word counts less, see
5050 		 * soundalike_score(). */
5051 		newscore = 2 * SCORE_DEL / 3;
5052 	    else
5053 		newscore = SCORE_DEL;
5054 	    if (fword[sp->ts_fidx] != NUL
5055 				    && TRY_DEEPER(su, stack, depth, newscore))
5056 	    {
5057 		go_deeper(stack, depth, newscore);
5058 #ifdef DEBUG_TRIEWALK
5059 		sprintf(changename[depth], "%.*s-%s: delete %c",
5060 			sp->ts_twordlen, tword, fword + sp->ts_fidx,
5061 			fword[sp->ts_fidx]);
5062 #endif
5063 		++depth;
5064 
5065 		/* Remember what character we deleted, so that we can avoid
5066 		 * inserting it again. */
5067 		stack[depth].ts_flags |= TSF_DIDDEL;
5068 		stack[depth].ts_delidx = sp->ts_fidx;
5069 
5070 		/* Advance over the character in fword[].  Give a bonus to the
5071 		 * score if the same character is following "nn" -> "n".  It's
5072 		 * a bit illogical for soundfold tree but it does give better
5073 		 * results. */
5074 		if (has_mbyte)
5075 		{
5076 		    c = mb_ptr2char(fword + sp->ts_fidx);
5077 		    stack[depth].ts_fidx += MB_PTR2LEN(fword + sp->ts_fidx);
5078 		    if (enc_utf8 && utf_iscomposing(c))
5079 			stack[depth].ts_score -= SCORE_DEL - SCORE_DELCOMP;
5080 		    else if (c == mb_ptr2char(fword + stack[depth].ts_fidx))
5081 			stack[depth].ts_score -= SCORE_DEL - SCORE_DELDUP;
5082 		}
5083 		else
5084 		{
5085 		    ++stack[depth].ts_fidx;
5086 		    if (fword[sp->ts_fidx] == fword[sp->ts_fidx + 1])
5087 			stack[depth].ts_score -= SCORE_DEL - SCORE_DELDUP;
5088 		}
5089 		break;
5090 	    }
5091 	    /* FALLTHROUGH */
5092 
5093 	case STATE_INS_PREP:
5094 	    if (sp->ts_flags & TSF_DIDDEL)
5095 	    {
5096 		/* If we just deleted a byte then inserting won't make sense,
5097 		 * a substitute is always cheaper. */
5098 		PROF_STORE(sp->ts_state)
5099 		sp->ts_state = STATE_SWAP;
5100 		break;
5101 	    }
5102 
5103 	    /* skip over NUL bytes */
5104 	    n = sp->ts_arridx;
5105 	    for (;;)
5106 	    {
5107 		if (sp->ts_curi > byts[n])
5108 		{
5109 		    /* Only NUL bytes at this node, go to next state. */
5110 		    PROF_STORE(sp->ts_state)
5111 		    sp->ts_state = STATE_SWAP;
5112 		    break;
5113 		}
5114 		if (byts[n + sp->ts_curi] != NUL)
5115 		{
5116 		    /* Found a byte to insert. */
5117 		    PROF_STORE(sp->ts_state)
5118 		    sp->ts_state = STATE_INS;
5119 		    break;
5120 		}
5121 		++sp->ts_curi;
5122 	    }
5123 	    break;
5124 
5125 	    /* FALLTHROUGH */
5126 
5127 	case STATE_INS:
5128 	    /* Insert one byte.  Repeat this for each possible byte at this
5129 	     * node. */
5130 	    n = sp->ts_arridx;
5131 	    if (sp->ts_curi > byts[n])
5132 	    {
5133 		/* Done all bytes at this node, go to next state. */
5134 		PROF_STORE(sp->ts_state)
5135 		sp->ts_state = STATE_SWAP;
5136 		break;
5137 	    }
5138 
5139 	    /* Do one more byte at this node, but:
5140 	     * - Skip NUL bytes.
5141 	     * - Skip the byte if it's equal to the byte in the word,
5142 	     *   accepting that byte is always better.
5143 	     */
5144 	    n += sp->ts_curi++;
5145 	    c = byts[n];
5146 	    if (soundfold && sp->ts_twordlen == 0 && c == '*')
5147 		/* Inserting a vowel at the start of a word counts less,
5148 		 * see soundalike_score(). */
5149 		newscore = 2 * SCORE_INS / 3;
5150 	    else
5151 		newscore = SCORE_INS;
5152 	    if (c != fword[sp->ts_fidx]
5153 				    && TRY_DEEPER(su, stack, depth, newscore))
5154 	    {
5155 		go_deeper(stack, depth, newscore);
5156 #ifdef DEBUG_TRIEWALK
5157 		sprintf(changename[depth], "%.*s-%s: insert %c",
5158 			sp->ts_twordlen, tword, fword + sp->ts_fidx,
5159 			c);
5160 #endif
5161 		++depth;
5162 		sp = &stack[depth];
5163 		tword[sp->ts_twordlen++] = c;
5164 		sp->ts_arridx = idxs[n];
5165 		if (has_mbyte)
5166 		{
5167 		    fl = MB_BYTE2LEN(c);
5168 		    if (fl > 1)
5169 		    {
5170 			/* There are following bytes for the same character.
5171 			 * We must find all bytes before trying
5172 			 * delete/insert/swap/etc. */
5173 			sp->ts_tcharlen = fl;
5174 			sp->ts_tcharidx = 1;
5175 			sp->ts_isdiff = DIFF_INSERT;
5176 		    }
5177 		}
5178 		else
5179 		    fl = 1;
5180 		if (fl == 1)
5181 		{
5182 		    /* If the previous character was the same, thus doubling a
5183 		     * character, give a bonus to the score.  Also for
5184 		     * soundfold words (illogical but does give a better
5185 		     * score). */
5186 		    if (sp->ts_twordlen >= 2
5187 					   && tword[sp->ts_twordlen - 2] == c)
5188 			sp->ts_score -= SCORE_INS - SCORE_INSDUP;
5189 		}
5190 	    }
5191 	    break;
5192 
5193 	case STATE_SWAP:
5194 	    /*
5195 	     * Swap two bytes in the bad word: "12" -> "21".
5196 	     * We change "fword" here, it's changed back afterwards at
5197 	     * STATE_UNSWAP.
5198 	     */
5199 	    p = fword + sp->ts_fidx;
5200 	    c = *p;
5201 	    if (c == NUL)
5202 	    {
5203 		/* End of word, can't swap or replace. */
5204 		PROF_STORE(sp->ts_state)
5205 		sp->ts_state = STATE_FINAL;
5206 		break;
5207 	    }
5208 
5209 	    /* Don't swap if the first character is not a word character.
5210 	     * SWAP3 etc. also don't make sense then. */
5211 	    if (!soundfold && !spell_iswordp(p, curwin))
5212 	    {
5213 		PROF_STORE(sp->ts_state)
5214 		sp->ts_state = STATE_REP_INI;
5215 		break;
5216 	    }
5217 
5218 	    if (has_mbyte)
5219 	    {
5220 		n = MB_CPTR2LEN(p);
5221 		c = mb_ptr2char(p);
5222 		if (p[n] == NUL)
5223 		    c2 = NUL;
5224 		else if (!soundfold && !spell_iswordp(p + n, curwin))
5225 		    c2 = c; /* don't swap non-word char */
5226 		else
5227 		    c2 = mb_ptr2char(p + n);
5228 	    }
5229 	    else
5230 	    {
5231 		if (p[1] == NUL)
5232 		    c2 = NUL;
5233 		else if (!soundfold && !spell_iswordp(p + 1, curwin))
5234 		    c2 = c; /* don't swap non-word char */
5235 		else
5236 		    c2 = p[1];
5237 	    }
5238 
5239 	    /* When the second character is NUL we can't swap. */
5240 	    if (c2 == NUL)
5241 	    {
5242 		PROF_STORE(sp->ts_state)
5243 		sp->ts_state = STATE_REP_INI;
5244 		break;
5245 	    }
5246 
5247 	    /* When characters are identical, swap won't do anything.
5248 	     * Also get here if the second char is not a word character. */
5249 	    if (c == c2)
5250 	    {
5251 		PROF_STORE(sp->ts_state)
5252 		sp->ts_state = STATE_SWAP3;
5253 		break;
5254 	    }
5255 	    if (c2 != NUL && TRY_DEEPER(su, stack, depth, SCORE_SWAP))
5256 	    {
5257 		go_deeper(stack, depth, SCORE_SWAP);
5258 #ifdef DEBUG_TRIEWALK
5259 		sprintf(changename[depth], "%.*s-%s: swap %c and %c",
5260 			sp->ts_twordlen, tword, fword + sp->ts_fidx,
5261 			c, c2);
5262 #endif
5263 		PROF_STORE(sp->ts_state)
5264 		sp->ts_state = STATE_UNSWAP;
5265 		++depth;
5266 		if (has_mbyte)
5267 		{
5268 		    fl = mb_char2len(c2);
5269 		    mch_memmove(p, p + n, fl);
5270 		    mb_char2bytes(c, p + fl);
5271 		    stack[depth].ts_fidxtry = sp->ts_fidx + n + fl;
5272 		}
5273 		else
5274 		{
5275 		    p[0] = c2;
5276 		    p[1] = c;
5277 		    stack[depth].ts_fidxtry = sp->ts_fidx + 2;
5278 		}
5279 	    }
5280 	    else
5281 	    {
5282 		/* If this swap doesn't work then SWAP3 won't either. */
5283 		PROF_STORE(sp->ts_state)
5284 		sp->ts_state = STATE_REP_INI;
5285 	    }
5286 	    break;
5287 
5288 	case STATE_UNSWAP:
5289 	    /* Undo the STATE_SWAP swap: "21" -> "12". */
5290 	    p = fword + sp->ts_fidx;
5291 	    if (has_mbyte)
5292 	    {
5293 		n = MB_PTR2LEN(p);
5294 		c = mb_ptr2char(p + n);
5295 		mch_memmove(p + MB_PTR2LEN(p + n), p, n);
5296 		mb_char2bytes(c, p);
5297 	    }
5298 	    else
5299 	    {
5300 		c = *p;
5301 		*p = p[1];
5302 		p[1] = c;
5303 	    }
5304 	    /* FALLTHROUGH */
5305 
5306 	case STATE_SWAP3:
5307 	    /* Swap two bytes, skipping one: "123" -> "321".  We change
5308 	     * "fword" here, it's changed back afterwards at STATE_UNSWAP3. */
5309 	    p = fword + sp->ts_fidx;
5310 	    if (has_mbyte)
5311 	    {
5312 		n = MB_CPTR2LEN(p);
5313 		c = mb_ptr2char(p);
5314 		fl = MB_CPTR2LEN(p + n);
5315 		c2 = mb_ptr2char(p + n);
5316 		if (!soundfold && !spell_iswordp(p + n + fl, curwin))
5317 		    c3 = c;	/* don't swap non-word char */
5318 		else
5319 		    c3 = mb_ptr2char(p + n + fl);
5320 	    }
5321 	    else
5322 	    {
5323 		c = *p;
5324 		c2 = p[1];
5325 		if (!soundfold && !spell_iswordp(p + 2, curwin))
5326 		    c3 = c;	/* don't swap non-word char */
5327 		else
5328 		    c3 = p[2];
5329 	    }
5330 
5331 	    /* When characters are identical: "121" then SWAP3 result is
5332 	     * identical, ROT3L result is same as SWAP: "211", ROT3L result is
5333 	     * same as SWAP on next char: "112".  Thus skip all swapping.
5334 	     * Also skip when c3 is NUL.
5335 	     * Also get here when the third character is not a word character.
5336 	     * Second character may any char: "a.b" -> "b.a" */
5337 	    if (c == c3 || c3 == NUL)
5338 	    {
5339 		PROF_STORE(sp->ts_state)
5340 		sp->ts_state = STATE_REP_INI;
5341 		break;
5342 	    }
5343 	    if (TRY_DEEPER(su, stack, depth, SCORE_SWAP3))
5344 	    {
5345 		go_deeper(stack, depth, SCORE_SWAP3);
5346 #ifdef DEBUG_TRIEWALK
5347 		sprintf(changename[depth], "%.*s-%s: swap3 %c and %c",
5348 			sp->ts_twordlen, tword, fword + sp->ts_fidx,
5349 			c, c3);
5350 #endif
5351 		PROF_STORE(sp->ts_state)
5352 		sp->ts_state = STATE_UNSWAP3;
5353 		++depth;
5354 		if (has_mbyte)
5355 		{
5356 		    tl = mb_char2len(c3);
5357 		    mch_memmove(p, p + n + fl, tl);
5358 		    mb_char2bytes(c2, p + tl);
5359 		    mb_char2bytes(c, p + fl + tl);
5360 		    stack[depth].ts_fidxtry = sp->ts_fidx + n + fl + tl;
5361 		}
5362 		else
5363 		{
5364 		    p[0] = p[2];
5365 		    p[2] = c;
5366 		    stack[depth].ts_fidxtry = sp->ts_fidx + 3;
5367 		}
5368 	    }
5369 	    else
5370 	    {
5371 		PROF_STORE(sp->ts_state)
5372 		sp->ts_state = STATE_REP_INI;
5373 	    }
5374 	    break;
5375 
5376 	case STATE_UNSWAP3:
5377 	    /* Undo STATE_SWAP3: "321" -> "123" */
5378 	    p = fword + sp->ts_fidx;
5379 	    if (has_mbyte)
5380 	    {
5381 		n = MB_PTR2LEN(p);
5382 		c2 = mb_ptr2char(p + n);
5383 		fl = MB_PTR2LEN(p + n);
5384 		c = mb_ptr2char(p + n + fl);
5385 		tl = MB_PTR2LEN(p + n + fl);
5386 		mch_memmove(p + fl + tl, p, n);
5387 		mb_char2bytes(c, p);
5388 		mb_char2bytes(c2, p + tl);
5389 		p = p + tl;
5390 	    }
5391 	    else
5392 	    {
5393 		c = *p;
5394 		*p = p[2];
5395 		p[2] = c;
5396 		++p;
5397 	    }
5398 
5399 	    if (!soundfold && !spell_iswordp(p, curwin))
5400 	    {
5401 		/* Middle char is not a word char, skip the rotate.  First and
5402 		 * third char were already checked at swap and swap3. */
5403 		PROF_STORE(sp->ts_state)
5404 		sp->ts_state = STATE_REP_INI;
5405 		break;
5406 	    }
5407 
5408 	    /* Rotate three characters left: "123" -> "231".  We change
5409 	     * "fword" here, it's changed back afterwards at STATE_UNROT3L. */
5410 	    if (TRY_DEEPER(su, stack, depth, SCORE_SWAP3))
5411 	    {
5412 		go_deeper(stack, depth, SCORE_SWAP3);
5413 #ifdef DEBUG_TRIEWALK
5414 		p = fword + sp->ts_fidx;
5415 		sprintf(changename[depth], "%.*s-%s: rotate left %c%c%c",
5416 			sp->ts_twordlen, tword, fword + sp->ts_fidx,
5417 			p[0], p[1], p[2]);
5418 #endif
5419 		PROF_STORE(sp->ts_state)
5420 		sp->ts_state = STATE_UNROT3L;
5421 		++depth;
5422 		p = fword + sp->ts_fidx;
5423 		if (has_mbyte)
5424 		{
5425 		    n = MB_CPTR2LEN(p);
5426 		    c = mb_ptr2char(p);
5427 		    fl = MB_CPTR2LEN(p + n);
5428 		    fl += MB_CPTR2LEN(p + n + fl);
5429 		    mch_memmove(p, p + n, fl);
5430 		    mb_char2bytes(c, p + fl);
5431 		    stack[depth].ts_fidxtry = sp->ts_fidx + n + fl;
5432 		}
5433 		else
5434 		{
5435 		    c = *p;
5436 		    *p = p[1];
5437 		    p[1] = p[2];
5438 		    p[2] = c;
5439 		    stack[depth].ts_fidxtry = sp->ts_fidx + 3;
5440 		}
5441 	    }
5442 	    else
5443 	    {
5444 		PROF_STORE(sp->ts_state)
5445 		sp->ts_state = STATE_REP_INI;
5446 	    }
5447 	    break;
5448 
5449 	case STATE_UNROT3L:
5450 	    /* Undo ROT3L: "231" -> "123" */
5451 	    p = fword + sp->ts_fidx;
5452 	    if (has_mbyte)
5453 	    {
5454 		n = MB_PTR2LEN(p);
5455 		n += MB_PTR2LEN(p + n);
5456 		c = mb_ptr2char(p + n);
5457 		tl = MB_PTR2LEN(p + n);
5458 		mch_memmove(p + tl, p, n);
5459 		mb_char2bytes(c, p);
5460 	    }
5461 	    else
5462 	    {
5463 		c = p[2];
5464 		p[2] = p[1];
5465 		p[1] = *p;
5466 		*p = c;
5467 	    }
5468 
5469 	    /* Rotate three bytes right: "123" -> "312".  We change "fword"
5470 	     * here, it's changed back afterwards at STATE_UNROT3R. */
5471 	    if (TRY_DEEPER(su, stack, depth, SCORE_SWAP3))
5472 	    {
5473 		go_deeper(stack, depth, SCORE_SWAP3);
5474 #ifdef DEBUG_TRIEWALK
5475 		p = fword + sp->ts_fidx;
5476 		sprintf(changename[depth], "%.*s-%s: rotate right %c%c%c",
5477 			sp->ts_twordlen, tword, fword + sp->ts_fidx,
5478 			p[0], p[1], p[2]);
5479 #endif
5480 		PROF_STORE(sp->ts_state)
5481 		sp->ts_state = STATE_UNROT3R;
5482 		++depth;
5483 		p = fword + sp->ts_fidx;
5484 		if (has_mbyte)
5485 		{
5486 		    n = MB_CPTR2LEN(p);
5487 		    n += MB_CPTR2LEN(p + n);
5488 		    c = mb_ptr2char(p + n);
5489 		    tl = MB_CPTR2LEN(p + n);
5490 		    mch_memmove(p + tl, p, n);
5491 		    mb_char2bytes(c, p);
5492 		    stack[depth].ts_fidxtry = sp->ts_fidx + n + tl;
5493 		}
5494 		else
5495 		{
5496 		    c = p[2];
5497 		    p[2] = p[1];
5498 		    p[1] = *p;
5499 		    *p = c;
5500 		    stack[depth].ts_fidxtry = sp->ts_fidx + 3;
5501 		}
5502 	    }
5503 	    else
5504 	    {
5505 		PROF_STORE(sp->ts_state)
5506 		sp->ts_state = STATE_REP_INI;
5507 	    }
5508 	    break;
5509 
5510 	case STATE_UNROT3R:
5511 	    /* Undo ROT3R: "312" -> "123" */
5512 	    p = fword + sp->ts_fidx;
5513 	    if (has_mbyte)
5514 	    {
5515 		c = mb_ptr2char(p);
5516 		tl = MB_PTR2LEN(p);
5517 		n = MB_PTR2LEN(p + tl);
5518 		n += MB_PTR2LEN(p + tl + n);
5519 		mch_memmove(p, p + tl, n);
5520 		mb_char2bytes(c, p + n);
5521 	    }
5522 	    else
5523 	    {
5524 		c = *p;
5525 		*p = p[1];
5526 		p[1] = p[2];
5527 		p[2] = c;
5528 	    }
5529 	    /* FALLTHROUGH */
5530 
5531 	case STATE_REP_INI:
5532 	    /* Check if matching with REP items from the .aff file would work.
5533 	     * Quickly skip if:
5534 	     * - there are no REP items and we are not in the soundfold trie
5535 	     * - the score is going to be too high anyway
5536 	     * - already applied a REP item or swapped here  */
5537 	    if ((lp->lp_replang == NULL && !soundfold)
5538 		    || sp->ts_score + SCORE_REP >= su->su_maxscore
5539 		    || sp->ts_fidx < sp->ts_fidxtry)
5540 	    {
5541 		PROF_STORE(sp->ts_state)
5542 		sp->ts_state = STATE_FINAL;
5543 		break;
5544 	    }
5545 
5546 	    /* Use the first byte to quickly find the first entry that may
5547 	     * match.  If the index is -1 there is none. */
5548 	    if (soundfold)
5549 		sp->ts_curi = slang->sl_repsal_first[fword[sp->ts_fidx]];
5550 	    else
5551 		sp->ts_curi = lp->lp_replang->sl_rep_first[fword[sp->ts_fidx]];
5552 
5553 	    if (sp->ts_curi < 0)
5554 	    {
5555 		PROF_STORE(sp->ts_state)
5556 		sp->ts_state = STATE_FINAL;
5557 		break;
5558 	    }
5559 
5560 	    PROF_STORE(sp->ts_state)
5561 	    sp->ts_state = STATE_REP;
5562 	    /* FALLTHROUGH */
5563 
5564 	case STATE_REP:
5565 	    /* Try matching with REP items from the .aff file.  For each match
5566 	     * replace the characters and check if the resulting word is
5567 	     * valid. */
5568 	    p = fword + sp->ts_fidx;
5569 
5570 	    if (soundfold)
5571 		gap = &slang->sl_repsal;
5572 	    else
5573 		gap = &lp->lp_replang->sl_rep;
5574 	    while (sp->ts_curi < gap->ga_len)
5575 	    {
5576 		ftp = (fromto_T *)gap->ga_data + sp->ts_curi++;
5577 		if (*ftp->ft_from != *p)
5578 		{
5579 		    /* past possible matching entries */
5580 		    sp->ts_curi = gap->ga_len;
5581 		    break;
5582 		}
5583 		if (STRNCMP(ftp->ft_from, p, STRLEN(ftp->ft_from)) == 0
5584 			&& TRY_DEEPER(su, stack, depth, SCORE_REP))
5585 		{
5586 		    go_deeper(stack, depth, SCORE_REP);
5587 #ifdef DEBUG_TRIEWALK
5588 		    sprintf(changename[depth], "%.*s-%s: replace %s with %s",
5589 			    sp->ts_twordlen, tword, fword + sp->ts_fidx,
5590 			    ftp->ft_from, ftp->ft_to);
5591 #endif
5592 		    /* Need to undo this afterwards. */
5593 		    PROF_STORE(sp->ts_state)
5594 		    sp->ts_state = STATE_REP_UNDO;
5595 
5596 		    /* Change the "from" to the "to" string. */
5597 		    ++depth;
5598 		    fl = (int)STRLEN(ftp->ft_from);
5599 		    tl = (int)STRLEN(ftp->ft_to);
5600 		    if (fl != tl)
5601 		    {
5602 			STRMOVE(p + tl, p + fl);
5603 			repextra += tl - fl;
5604 		    }
5605 		    mch_memmove(p, ftp->ft_to, tl);
5606 		    stack[depth].ts_fidxtry = sp->ts_fidx + tl;
5607 		    stack[depth].ts_tcharlen = 0;
5608 		    break;
5609 		}
5610 	    }
5611 
5612 	    if (sp->ts_curi >= gap->ga_len && sp->ts_state == STATE_REP)
5613 	    {
5614 		/* No (more) matches. */
5615 		PROF_STORE(sp->ts_state)
5616 		sp->ts_state = STATE_FINAL;
5617 	    }
5618 
5619 	    break;
5620 
5621 	case STATE_REP_UNDO:
5622 	    /* Undo a REP replacement and continue with the next one. */
5623 	    if (soundfold)
5624 		gap = &slang->sl_repsal;
5625 	    else
5626 		gap = &lp->lp_replang->sl_rep;
5627 	    ftp = (fromto_T *)gap->ga_data + sp->ts_curi - 1;
5628 	    fl = (int)STRLEN(ftp->ft_from);
5629 	    tl = (int)STRLEN(ftp->ft_to);
5630 	    p = fword + sp->ts_fidx;
5631 	    if (fl != tl)
5632 	    {
5633 		STRMOVE(p + fl, p + tl);
5634 		repextra -= tl - fl;
5635 	    }
5636 	    mch_memmove(p, ftp->ft_from, fl);
5637 	    PROF_STORE(sp->ts_state)
5638 	    sp->ts_state = STATE_REP;
5639 	    break;
5640 
5641 	default:
5642 	    /* Did all possible states at this level, go up one level. */
5643 	    --depth;
5644 
5645 	    if (depth >= 0 && stack[depth].ts_prefixdepth == PFD_PREFIXTREE)
5646 	    {
5647 		/* Continue in or go back to the prefix tree. */
5648 		byts = pbyts;
5649 		idxs = pidxs;
5650 	    }
5651 
5652 	    /* Don't check for CTRL-C too often, it takes time. */
5653 	    if (--breakcheckcount == 0)
5654 	    {
5655 		ui_breakcheck();
5656 		breakcheckcount = 1000;
5657 	    }
5658 	}
5659     }
5660 }
5661 
5662 
5663 /*
5664  * Go one level deeper in the tree.
5665  */
5666     static void
5667 go_deeper(trystate_T *stack, int depth, int score_add)
5668 {
5669     stack[depth + 1] = stack[depth];
5670     stack[depth + 1].ts_state = STATE_START;
5671     stack[depth + 1].ts_score = stack[depth].ts_score + score_add;
5672     stack[depth + 1].ts_curi = 1;	/* start just after length byte */
5673     stack[depth + 1].ts_flags = 0;
5674 }
5675 
5676 /*
5677  * Case-folding may change the number of bytes: Count nr of chars in
5678  * fword[flen] and return the byte length of that many chars in "word".
5679  */
5680     static int
5681 nofold_len(char_u *fword, int flen, char_u *word)
5682 {
5683     char_u	*p;
5684     int		i = 0;
5685 
5686     for (p = fword; p < fword + flen; MB_PTR_ADV(p))
5687 	++i;
5688     for (p = word; i > 0; MB_PTR_ADV(p))
5689 	--i;
5690     return (int)(p - word);
5691 }
5692 
5693 /*
5694  * "fword" is a good word with case folded.  Find the matching keep-case
5695  * words and put it in "kword".
5696  * Theoretically there could be several keep-case words that result in the
5697  * same case-folded word, but we only find one...
5698  */
5699     static void
5700 find_keepcap_word(slang_T *slang, char_u *fword, char_u *kword)
5701 {
5702     char_u	uword[MAXWLEN];		/* "fword" in upper-case */
5703     int		depth;
5704     idx_T	tryidx;
5705 
5706     /* The following arrays are used at each depth in the tree. */
5707     idx_T	arridx[MAXWLEN];
5708     int		round[MAXWLEN];
5709     int		fwordidx[MAXWLEN];
5710     int		uwordidx[MAXWLEN];
5711     int		kwordlen[MAXWLEN];
5712 
5713     int		flen, ulen;
5714     int		l;
5715     int		len;
5716     int		c;
5717     idx_T	lo, hi, m;
5718     char_u	*p;
5719     char_u	*byts = slang->sl_kbyts;    /* array with bytes of the words */
5720     idx_T	*idxs = slang->sl_kidxs;    /* array with indexes */
5721 
5722     if (byts == NULL)
5723     {
5724 	/* array is empty: "cannot happen" */
5725 	*kword = NUL;
5726 	return;
5727     }
5728 
5729     /* Make an all-cap version of "fword". */
5730     allcap_copy(fword, uword);
5731 
5732     /*
5733      * Each character needs to be tried both case-folded and upper-case.
5734      * All this gets very complicated if we keep in mind that changing case
5735      * may change the byte length of a multi-byte character...
5736      */
5737     depth = 0;
5738     arridx[0] = 0;
5739     round[0] = 0;
5740     fwordidx[0] = 0;
5741     uwordidx[0] = 0;
5742     kwordlen[0] = 0;
5743     while (depth >= 0)
5744     {
5745 	if (fword[fwordidx[depth]] == NUL)
5746 	{
5747 	    /* We are at the end of "fword".  If the tree allows a word to end
5748 	     * here we have found a match. */
5749 	    if (byts[arridx[depth] + 1] == 0)
5750 	    {
5751 		kword[kwordlen[depth]] = NUL;
5752 		return;
5753 	    }
5754 
5755 	    /* kword is getting too long, continue one level up */
5756 	    --depth;
5757 	}
5758 	else if (++round[depth] > 2)
5759 	{
5760 	    /* tried both fold-case and upper-case character, continue one
5761 	     * level up */
5762 	    --depth;
5763 	}
5764 	else
5765 	{
5766 	    /*
5767 	     * round[depth] == 1: Try using the folded-case character.
5768 	     * round[depth] == 2: Try using the upper-case character.
5769 	     */
5770 	    if (has_mbyte)
5771 	    {
5772 		flen = MB_CPTR2LEN(fword + fwordidx[depth]);
5773 		ulen = MB_CPTR2LEN(uword + uwordidx[depth]);
5774 	    }
5775 	    else
5776 		ulen = flen = 1;
5777 	    if (round[depth] == 1)
5778 	    {
5779 		p = fword + fwordidx[depth];
5780 		l = flen;
5781 	    }
5782 	    else
5783 	    {
5784 		p = uword + uwordidx[depth];
5785 		l = ulen;
5786 	    }
5787 
5788 	    for (tryidx = arridx[depth]; l > 0; --l)
5789 	    {
5790 		/* Perform a binary search in the list of accepted bytes. */
5791 		len = byts[tryidx++];
5792 		c = *p++;
5793 		lo = tryidx;
5794 		hi = tryidx + len - 1;
5795 		while (lo < hi)
5796 		{
5797 		    m = (lo + hi) / 2;
5798 		    if (byts[m] > c)
5799 			hi = m - 1;
5800 		    else if (byts[m] < c)
5801 			lo = m + 1;
5802 		    else
5803 		    {
5804 			lo = hi = m;
5805 			break;
5806 		    }
5807 		}
5808 
5809 		/* Stop if there is no matching byte. */
5810 		if (hi < lo || byts[lo] != c)
5811 		    break;
5812 
5813 		/* Continue at the child (if there is one). */
5814 		tryidx = idxs[lo];
5815 	    }
5816 
5817 	    if (l == 0)
5818 	    {
5819 		/*
5820 		 * Found the matching char.  Copy it to "kword" and go a
5821 		 * level deeper.
5822 		 */
5823 		if (round[depth] == 1)
5824 		{
5825 		    STRNCPY(kword + kwordlen[depth], fword + fwordidx[depth],
5826 									flen);
5827 		    kwordlen[depth + 1] = kwordlen[depth] + flen;
5828 		}
5829 		else
5830 		{
5831 		    STRNCPY(kword + kwordlen[depth], uword + uwordidx[depth],
5832 									ulen);
5833 		    kwordlen[depth + 1] = kwordlen[depth] + ulen;
5834 		}
5835 		fwordidx[depth + 1] = fwordidx[depth] + flen;
5836 		uwordidx[depth + 1] = uwordidx[depth] + ulen;
5837 
5838 		++depth;
5839 		arridx[depth] = tryidx;
5840 		round[depth] = 0;
5841 	    }
5842 	}
5843     }
5844 
5845     /* Didn't find it: "cannot happen". */
5846     *kword = NUL;
5847 }
5848 
5849 /*
5850  * Compute the sound-a-like score for suggestions in su->su_ga and add them to
5851  * su->su_sga.
5852  */
5853     static void
5854 score_comp_sal(suginfo_T *su)
5855 {
5856     langp_T	*lp;
5857     char_u	badsound[MAXWLEN];
5858     int		i;
5859     suggest_T   *stp;
5860     suggest_T   *sstp;
5861     int		score;
5862     int		lpi;
5863 
5864     if (ga_grow(&su->su_sga, su->su_ga.ga_len) == FAIL)
5865 	return;
5866 
5867     /*	Use the sound-folding of the first language that supports it. */
5868     for (lpi = 0; lpi < curwin->w_s->b_langp.ga_len; ++lpi)
5869     {
5870 	lp = LANGP_ENTRY(curwin->w_s->b_langp, lpi);
5871 	if (lp->lp_slang->sl_sal.ga_len > 0)
5872 	{
5873 	    /* soundfold the bad word */
5874 	    spell_soundfold(lp->lp_slang, su->su_fbadword, TRUE, badsound);
5875 
5876 	    for (i = 0; i < su->su_ga.ga_len; ++i)
5877 	    {
5878 		stp = &SUG(su->su_ga, i);
5879 
5880 		/* Case-fold the suggested word, sound-fold it and compute the
5881 		 * sound-a-like score. */
5882 		score = stp_sal_score(stp, su, lp->lp_slang, badsound);
5883 		if (score < SCORE_MAXMAX)
5884 		{
5885 		    /* Add the suggestion. */
5886 		    sstp = &SUG(su->su_sga, su->su_sga.ga_len);
5887 		    sstp->st_word = vim_strsave(stp->st_word);
5888 		    if (sstp->st_word != NULL)
5889 		    {
5890 			sstp->st_wordlen = stp->st_wordlen;
5891 			sstp->st_score = score;
5892 			sstp->st_altscore = 0;
5893 			sstp->st_orglen = stp->st_orglen;
5894 			++su->su_sga.ga_len;
5895 		    }
5896 		}
5897 	    }
5898 	    break;
5899 	}
5900     }
5901 }
5902 
5903 /*
5904  * Combine the list of suggestions in su->su_ga and su->su_sga.
5905  * They are entwined.
5906  */
5907     static void
5908 score_combine(suginfo_T *su)
5909 {
5910     int		i;
5911     int		j;
5912     garray_T	ga;
5913     garray_T	*gap;
5914     langp_T	*lp;
5915     suggest_T	*stp;
5916     char_u	*p;
5917     char_u	badsound[MAXWLEN];
5918     int		round;
5919     int		lpi;
5920     slang_T	*slang = NULL;
5921 
5922     /* Add the alternate score to su_ga. */
5923     for (lpi = 0; lpi < curwin->w_s->b_langp.ga_len; ++lpi)
5924     {
5925 	lp = LANGP_ENTRY(curwin->w_s->b_langp, lpi);
5926 	if (lp->lp_slang->sl_sal.ga_len > 0)
5927 	{
5928 	    /* soundfold the bad word */
5929 	    slang = lp->lp_slang;
5930 	    spell_soundfold(slang, su->su_fbadword, TRUE, badsound);
5931 
5932 	    for (i = 0; i < su->su_ga.ga_len; ++i)
5933 	    {
5934 		stp = &SUG(su->su_ga, i);
5935 		stp->st_altscore = stp_sal_score(stp, su, slang, badsound);
5936 		if (stp->st_altscore == SCORE_MAXMAX)
5937 		    stp->st_score = (stp->st_score * 3 + SCORE_BIG) / 4;
5938 		else
5939 		    stp->st_score = (stp->st_score * 3
5940 						  + stp->st_altscore) / 4;
5941 		stp->st_salscore = FALSE;
5942 	    }
5943 	    break;
5944 	}
5945     }
5946 
5947     if (slang == NULL)	/* Using "double" without sound folding. */
5948     {
5949 	(void)cleanup_suggestions(&su->su_ga, su->su_maxscore,
5950 							     su->su_maxcount);
5951 	return;
5952     }
5953 
5954     /* Add the alternate score to su_sga. */
5955     for (i = 0; i < su->su_sga.ga_len; ++i)
5956     {
5957 	stp = &SUG(su->su_sga, i);
5958 	stp->st_altscore = spell_edit_score(slang,
5959 						su->su_badword, stp->st_word);
5960 	if (stp->st_score == SCORE_MAXMAX)
5961 	    stp->st_score = (SCORE_BIG * 7 + stp->st_altscore) / 8;
5962 	else
5963 	    stp->st_score = (stp->st_score * 7 + stp->st_altscore) / 8;
5964 	stp->st_salscore = TRUE;
5965     }
5966 
5967     /* Remove bad suggestions, sort the suggestions and truncate at "maxcount"
5968      * for both lists. */
5969     check_suggestions(su, &su->su_ga);
5970     (void)cleanup_suggestions(&su->su_ga, su->su_maxscore, su->su_maxcount);
5971     check_suggestions(su, &su->su_sga);
5972     (void)cleanup_suggestions(&su->su_sga, su->su_maxscore, su->su_maxcount);
5973 
5974     ga_init2(&ga, (int)sizeof(suginfo_T), 1);
5975     if (ga_grow(&ga, su->su_ga.ga_len + su->su_sga.ga_len) == FAIL)
5976 	return;
5977 
5978     stp = &SUG(ga, 0);
5979     for (i = 0; i < su->su_ga.ga_len || i < su->su_sga.ga_len; ++i)
5980     {
5981 	/* round 1: get a suggestion from su_ga
5982 	 * round 2: get a suggestion from su_sga */
5983 	for (round = 1; round <= 2; ++round)
5984 	{
5985 	    gap = round == 1 ? &su->su_ga : &su->su_sga;
5986 	    if (i < gap->ga_len)
5987 	    {
5988 		/* Don't add a word if it's already there. */
5989 		p = SUG(*gap, i).st_word;
5990 		for (j = 0; j < ga.ga_len; ++j)
5991 		    if (STRCMP(stp[j].st_word, p) == 0)
5992 			break;
5993 		if (j == ga.ga_len)
5994 		    stp[ga.ga_len++] = SUG(*gap, i);
5995 		else
5996 		    vim_free(p);
5997 	    }
5998 	}
5999     }
6000 
6001     ga_clear(&su->su_ga);
6002     ga_clear(&su->su_sga);
6003 
6004     /* Truncate the list to the number of suggestions that will be displayed. */
6005     if (ga.ga_len > su->su_maxcount)
6006     {
6007 	for (i = su->su_maxcount; i < ga.ga_len; ++i)
6008 	    vim_free(stp[i].st_word);
6009 	ga.ga_len = su->su_maxcount;
6010     }
6011 
6012     su->su_ga = ga;
6013 }
6014 
6015 /*
6016  * For the goodword in "stp" compute the soundalike score compared to the
6017  * badword.
6018  */
6019     static int
6020 stp_sal_score(
6021     suggest_T	*stp,
6022     suginfo_T	*su,
6023     slang_T	*slang,
6024     char_u	*badsound)	/* sound-folded badword */
6025 {
6026     char_u	*p;
6027     char_u	*pbad;
6028     char_u	*pgood;
6029     char_u	badsound2[MAXWLEN];
6030     char_u	fword[MAXWLEN];
6031     char_u	goodsound[MAXWLEN];
6032     char_u	goodword[MAXWLEN];
6033     int		lendiff;
6034 
6035     lendiff = (int)(su->su_badlen - stp->st_orglen);
6036     if (lendiff >= 0)
6037 	pbad = badsound;
6038     else
6039     {
6040 	/* soundfold the bad word with more characters following */
6041 	(void)spell_casefold(su->su_badptr, stp->st_orglen, fword, MAXWLEN);
6042 
6043 	/* When joining two words the sound often changes a lot.  E.g., "t he"
6044 	 * sounds like "t h" while "the" sounds like "@".  Avoid that by
6045 	 * removing the space.  Don't do it when the good word also contains a
6046 	 * space. */
6047 	if (VIM_ISWHITE(su->su_badptr[su->su_badlen])
6048 					 && *skiptowhite(stp->st_word) == NUL)
6049 	    for (p = fword; *(p = skiptowhite(p)) != NUL; )
6050 		STRMOVE(p, p + 1);
6051 
6052 	spell_soundfold(slang, fword, TRUE, badsound2);
6053 	pbad = badsound2;
6054     }
6055 
6056     if (lendiff > 0 && stp->st_wordlen + lendiff < MAXWLEN)
6057     {
6058 	/* Add part of the bad word to the good word, so that we soundfold
6059 	 * what replaces the bad word. */
6060 	STRCPY(goodword, stp->st_word);
6061 	vim_strncpy(goodword + stp->st_wordlen,
6062 			    su->su_badptr + su->su_badlen - lendiff, lendiff);
6063 	pgood = goodword;
6064     }
6065     else
6066 	pgood = stp->st_word;
6067 
6068     /* Sound-fold the word and compute the score for the difference. */
6069     spell_soundfold(slang, pgood, FALSE, goodsound);
6070 
6071     return soundalike_score(goodsound, pbad);
6072 }
6073 
6074 /* structure used to store soundfolded words that add_sound_suggest() has
6075  * handled already. */
6076 typedef struct
6077 {
6078     short	sft_score;	/* lowest score used */
6079     char_u	sft_word[1];    /* soundfolded word, actually longer */
6080 } sftword_T;
6081 
6082 static sftword_T dumsft;
6083 #define HIKEY2SFT(p)  ((sftword_T *)(p - (dumsft.sft_word - (char_u *)&dumsft)))
6084 #define HI2SFT(hi)     HIKEY2SFT((hi)->hi_key)
6085 
6086 /*
6087  * Prepare for calling suggest_try_soundalike().
6088  */
6089     static void
6090 suggest_try_soundalike_prep(void)
6091 {
6092     langp_T	*lp;
6093     int		lpi;
6094     slang_T	*slang;
6095 
6096     /* Do this for all languages that support sound folding and for which a
6097      * .sug file has been loaded. */
6098     for (lpi = 0; lpi < curwin->w_s->b_langp.ga_len; ++lpi)
6099     {
6100 	lp = LANGP_ENTRY(curwin->w_s->b_langp, lpi);
6101 	slang = lp->lp_slang;
6102 	if (slang->sl_sal.ga_len > 0 && slang->sl_sbyts != NULL)
6103 	    /* prepare the hashtable used by add_sound_suggest() */
6104 	    hash_init(&slang->sl_sounddone);
6105     }
6106 }
6107 
6108 /*
6109  * Find suggestions by comparing the word in a sound-a-like form.
6110  * Note: This doesn't support postponed prefixes.
6111  */
6112     static void
6113 suggest_try_soundalike(suginfo_T *su)
6114 {
6115     char_u	salword[MAXWLEN];
6116     langp_T	*lp;
6117     int		lpi;
6118     slang_T	*slang;
6119 
6120     /* Do this for all languages that support sound folding and for which a
6121      * .sug file has been loaded. */
6122     for (lpi = 0; lpi < curwin->w_s->b_langp.ga_len; ++lpi)
6123     {
6124 	lp = LANGP_ENTRY(curwin->w_s->b_langp, lpi);
6125 	slang = lp->lp_slang;
6126 	if (slang->sl_sal.ga_len > 0 && slang->sl_sbyts != NULL)
6127 	{
6128 	    /* soundfold the bad word */
6129 	    spell_soundfold(slang, su->su_fbadword, TRUE, salword);
6130 
6131 	    /* try all kinds of inserts/deletes/swaps/etc. */
6132 	    /* TODO: also soundfold the next words, so that we can try joining
6133 	     * and splitting */
6134 #ifdef SUGGEST_PROFILE
6135 	prof_init();
6136 #endif
6137 	    suggest_trie_walk(su, lp, salword, TRUE);
6138 #ifdef SUGGEST_PROFILE
6139 	prof_report("soundalike");
6140 #endif
6141 	}
6142     }
6143 }
6144 
6145 /*
6146  * Finish up after calling suggest_try_soundalike().
6147  */
6148     static void
6149 suggest_try_soundalike_finish(void)
6150 {
6151     langp_T	*lp;
6152     int		lpi;
6153     slang_T	*slang;
6154     int		todo;
6155     hashitem_T	*hi;
6156 
6157     /* Do this for all languages that support sound folding and for which a
6158      * .sug file has been loaded. */
6159     for (lpi = 0; lpi < curwin->w_s->b_langp.ga_len; ++lpi)
6160     {
6161 	lp = LANGP_ENTRY(curwin->w_s->b_langp, lpi);
6162 	slang = lp->lp_slang;
6163 	if (slang->sl_sal.ga_len > 0 && slang->sl_sbyts != NULL)
6164 	{
6165 	    /* Free the info about handled words. */
6166 	    todo = (int)slang->sl_sounddone.ht_used;
6167 	    for (hi = slang->sl_sounddone.ht_array; todo > 0; ++hi)
6168 		if (!HASHITEM_EMPTY(hi))
6169 		{
6170 		    vim_free(HI2SFT(hi));
6171 		    --todo;
6172 		}
6173 
6174 	    /* Clear the hashtable, it may also be used by another region. */
6175 	    hash_clear(&slang->sl_sounddone);
6176 	    hash_init(&slang->sl_sounddone);
6177 	}
6178     }
6179 }
6180 
6181 /*
6182  * A match with a soundfolded word is found.  Add the good word(s) that
6183  * produce this soundfolded word.
6184  */
6185     static void
6186 add_sound_suggest(
6187     suginfo_T	*su,
6188     char_u	*goodword,
6189     int		score,		/* soundfold score  */
6190     langp_T	*lp)
6191 {
6192     slang_T	*slang = lp->lp_slang;	/* language for sound folding */
6193     int		sfwordnr;
6194     char_u	*nrline;
6195     int		orgnr;
6196     char_u	theword[MAXWLEN];
6197     int		i;
6198     int		wlen;
6199     char_u	*byts;
6200     idx_T	*idxs;
6201     int		n;
6202     int		wordcount;
6203     int		wc;
6204     int		goodscore;
6205     hash_T	hash;
6206     hashitem_T  *hi;
6207     sftword_T	*sft;
6208     int		bc, gc;
6209     int		limit;
6210 
6211     /*
6212      * It's very well possible that the same soundfold word is found several
6213      * times with different scores.  Since the following is quite slow only do
6214      * the words that have a better score than before.  Use a hashtable to
6215      * remember the words that have been done.
6216      */
6217     hash = hash_hash(goodword);
6218     hi = hash_lookup(&slang->sl_sounddone, goodword, hash);
6219     if (HASHITEM_EMPTY(hi))
6220     {
6221 	sft = (sftword_T *)alloc((unsigned)(sizeof(sftword_T)
6222 							 + STRLEN(goodword)));
6223 	if (sft != NULL)
6224 	{
6225 	    sft->sft_score = score;
6226 	    STRCPY(sft->sft_word, goodword);
6227 	    hash_add_item(&slang->sl_sounddone, hi, sft->sft_word, hash);
6228 	}
6229     }
6230     else
6231     {
6232 	sft = HI2SFT(hi);
6233 	if (score >= sft->sft_score)
6234 	    return;
6235 	sft->sft_score = score;
6236     }
6237 
6238     /*
6239      * Find the word nr in the soundfold tree.
6240      */
6241     sfwordnr = soundfold_find(slang, goodword);
6242     if (sfwordnr < 0)
6243     {
6244 	internal_error("add_sound_suggest()");
6245 	return;
6246     }
6247 
6248     /*
6249      * go over the list of good words that produce this soundfold word
6250      */
6251     nrline = ml_get_buf(slang->sl_sugbuf, (linenr_T)(sfwordnr + 1), FALSE);
6252     orgnr = 0;
6253     while (*nrline != NUL)
6254     {
6255 	/* The wordnr was stored in a minimal nr of bytes as an offset to the
6256 	 * previous wordnr. */
6257 	orgnr += bytes2offset(&nrline);
6258 
6259 	byts = slang->sl_fbyts;
6260 	idxs = slang->sl_fidxs;
6261 
6262 	/* Lookup the word "orgnr" one of the two tries. */
6263 	n = 0;
6264 	wordcount = 0;
6265 	for (wlen = 0; wlen < MAXWLEN - 3; ++wlen)
6266 	{
6267 	    i = 1;
6268 	    if (wordcount == orgnr && byts[n + 1] == NUL)
6269 		break;	/* found end of word */
6270 
6271 	    if (byts[n + 1] == NUL)
6272 		++wordcount;
6273 
6274 	    /* skip over the NUL bytes */
6275 	    for ( ; byts[n + i] == NUL; ++i)
6276 		if (i > byts[n])	/* safety check */
6277 		{
6278 		    STRCPY(theword + wlen, "BAD");
6279 		    wlen += 3;
6280 		    goto badword;
6281 		}
6282 
6283 	    /* One of the siblings must have the word. */
6284 	    for ( ; i < byts[n]; ++i)
6285 	    {
6286 		wc = idxs[idxs[n + i]];	/* nr of words under this byte */
6287 		if (wordcount + wc > orgnr)
6288 		    break;
6289 		wordcount += wc;
6290 	    }
6291 
6292 	    theword[wlen] = byts[n + i];
6293 	    n = idxs[n + i];
6294 	}
6295 badword:
6296 	theword[wlen] = NUL;
6297 
6298 	/* Go over the possible flags and regions. */
6299 	for (; i <= byts[n] && byts[n + i] == NUL; ++i)
6300 	{
6301 	    char_u	cword[MAXWLEN];
6302 	    char_u	*p;
6303 	    int		flags = (int)idxs[n + i];
6304 
6305 	    /* Skip words with the NOSUGGEST flag */
6306 	    if (flags & WF_NOSUGGEST)
6307 		continue;
6308 
6309 	    if (flags & WF_KEEPCAP)
6310 	    {
6311 		/* Must find the word in the keep-case tree. */
6312 		find_keepcap_word(slang, theword, cword);
6313 		p = cword;
6314 	    }
6315 	    else
6316 	    {
6317 		flags |= su->su_badflags;
6318 		if ((flags & WF_CAPMASK) != 0)
6319 		{
6320 		    /* Need to fix case according to "flags". */
6321 		    make_case_word(theword, cword, flags);
6322 		    p = cword;
6323 		}
6324 		else
6325 		    p = theword;
6326 	    }
6327 
6328 	    /* Add the suggestion. */
6329 	    if (sps_flags & SPS_DOUBLE)
6330 	    {
6331 		/* Add the suggestion if the score isn't too bad. */
6332 		if (score <= su->su_maxscore)
6333 		    add_suggestion(su, &su->su_sga, p, su->su_badlen,
6334 					       score, 0, FALSE, slang, FALSE);
6335 	    }
6336 	    else
6337 	    {
6338 		/* Add a penalty for words in another region. */
6339 		if ((flags & WF_REGION)
6340 			    && (((unsigned)flags >> 16) & lp->lp_region) == 0)
6341 		    goodscore = SCORE_REGION;
6342 		else
6343 		    goodscore = 0;
6344 
6345 		/* Add a small penalty for changing the first letter from
6346 		 * lower to upper case.  Helps for "tath" -> "Kath", which is
6347 		 * less common than "tath" -> "path".  Don't do it when the
6348 		 * letter is the same, that has already been counted. */
6349 		gc = PTR2CHAR(p);
6350 		if (SPELL_ISUPPER(gc))
6351 		{
6352 		    bc = PTR2CHAR(su->su_badword);
6353 		    if (!SPELL_ISUPPER(bc)
6354 				      && SPELL_TOFOLD(bc) != SPELL_TOFOLD(gc))
6355 			goodscore += SCORE_ICASE / 2;
6356 		}
6357 
6358 		/* Compute the score for the good word.  This only does letter
6359 		 * insert/delete/swap/replace.  REP items are not considered,
6360 		 * which may make the score a bit higher.
6361 		 * Use a limit for the score to make it work faster.  Use
6362 		 * MAXSCORE(), because RESCORE() will change the score.
6363 		 * If the limit is very high then the iterative method is
6364 		 * inefficient, using an array is quicker. */
6365 		limit = MAXSCORE(su->su_sfmaxscore - goodscore, score);
6366 		if (limit > SCORE_LIMITMAX)
6367 		    goodscore += spell_edit_score(slang, su->su_badword, p);
6368 		else
6369 		    goodscore += spell_edit_score_limit(slang, su->su_badword,
6370 								    p, limit);
6371 
6372 		/* When going over the limit don't bother to do the rest. */
6373 		if (goodscore < SCORE_MAXMAX)
6374 		{
6375 		    /* Give a bonus to words seen before. */
6376 		    goodscore = score_wordcount_adj(slang, goodscore, p, FALSE);
6377 
6378 		    /* Add the suggestion if the score isn't too bad. */
6379 		    goodscore = RESCORE(goodscore, score);
6380 		    if (goodscore <= su->su_sfmaxscore)
6381 			add_suggestion(su, &su->su_ga, p, su->su_badlen,
6382 					 goodscore, score, TRUE, slang, TRUE);
6383 		}
6384 	    }
6385 	}
6386 	/* smsg("word %s (%d): %s (%d)", sftword, sftnr, theword, orgnr); */
6387     }
6388 }
6389 
6390 /*
6391  * Find word "word" in fold-case tree for "slang" and return the word number.
6392  */
6393     static int
6394 soundfold_find(slang_T *slang, char_u *word)
6395 {
6396     idx_T	arridx = 0;
6397     int		len;
6398     int		wlen = 0;
6399     int		c;
6400     char_u	*ptr = word;
6401     char_u	*byts;
6402     idx_T	*idxs;
6403     int		wordnr = 0;
6404 
6405     byts = slang->sl_sbyts;
6406     idxs = slang->sl_sidxs;
6407 
6408     for (;;)
6409     {
6410 	/* First byte is the number of possible bytes. */
6411 	len = byts[arridx++];
6412 
6413 	/* If the first possible byte is a zero the word could end here.
6414 	 * If the word ends we found the word.  If not skip the NUL bytes. */
6415 	c = ptr[wlen];
6416 	if (byts[arridx] == NUL)
6417 	{
6418 	    if (c == NUL)
6419 		break;
6420 
6421 	    /* Skip over the zeros, there can be several. */
6422 	    while (len > 0 && byts[arridx] == NUL)
6423 	    {
6424 		++arridx;
6425 		--len;
6426 	    }
6427 	    if (len == 0)
6428 		return -1;    /* no children, word should have ended here */
6429 	    ++wordnr;
6430 	}
6431 
6432 	/* If the word ends we didn't find it. */
6433 	if (c == NUL)
6434 	    return -1;
6435 
6436 	/* Perform a binary search in the list of accepted bytes. */
6437 	if (c == TAB)	    /* <Tab> is handled like <Space> */
6438 	    c = ' ';
6439 	while (byts[arridx] < c)
6440 	{
6441 	    /* The word count is in the first idxs[] entry of the child. */
6442 	    wordnr += idxs[idxs[arridx]];
6443 	    ++arridx;
6444 	    if (--len == 0)	/* end of the bytes, didn't find it */
6445 		return -1;
6446 	}
6447 	if (byts[arridx] != c)	/* didn't find the byte */
6448 	    return -1;
6449 
6450 	/* Continue at the child (if there is one). */
6451 	arridx = idxs[arridx];
6452 	++wlen;
6453 
6454 	/* One space in the good word may stand for several spaces in the
6455 	 * checked word. */
6456 	if (c == ' ')
6457 	    while (ptr[wlen] == ' ' || ptr[wlen] == TAB)
6458 		++wlen;
6459     }
6460 
6461     return wordnr;
6462 }
6463 
6464 /*
6465  * Copy "fword" to "cword", fixing case according to "flags".
6466  */
6467     static void
6468 make_case_word(char_u *fword, char_u *cword, int flags)
6469 {
6470     if (flags & WF_ALLCAP)
6471 	/* Make it all upper-case */
6472 	allcap_copy(fword, cword);
6473     else if (flags & WF_ONECAP)
6474 	/* Make the first letter upper-case */
6475 	onecap_copy(fword, cword, TRUE);
6476     else
6477 	/* Use goodword as-is. */
6478 	STRCPY(cword, fword);
6479 }
6480 
6481 
6482 /*
6483  * Return TRUE if "c1" and "c2" are similar characters according to the MAP
6484  * lines in the .aff file.
6485  */
6486     static int
6487 similar_chars(slang_T *slang, int c1, int c2)
6488 {
6489     int		m1, m2;
6490     char_u	buf[MB_MAXBYTES + 1];
6491     hashitem_T  *hi;
6492 
6493     if (c1 >= 256)
6494     {
6495 	buf[mb_char2bytes(c1, buf)] = 0;
6496 	hi = hash_find(&slang->sl_map_hash, buf);
6497 	if (HASHITEM_EMPTY(hi))
6498 	    m1 = 0;
6499 	else
6500 	    m1 = mb_ptr2char(hi->hi_key + STRLEN(hi->hi_key) + 1);
6501     }
6502     else
6503 	m1 = slang->sl_map_array[c1];
6504     if (m1 == 0)
6505 	return FALSE;
6506 
6507 
6508     if (c2 >= 256)
6509     {
6510 	buf[mb_char2bytes(c2, buf)] = 0;
6511 	hi = hash_find(&slang->sl_map_hash, buf);
6512 	if (HASHITEM_EMPTY(hi))
6513 	    m2 = 0;
6514 	else
6515 	    m2 = mb_ptr2char(hi->hi_key + STRLEN(hi->hi_key) + 1);
6516     }
6517     else
6518 	m2 = slang->sl_map_array[c2];
6519 
6520     return m1 == m2;
6521 }
6522 
6523 /*
6524  * Add a suggestion to the list of suggestions.
6525  * For a suggestion that is already in the list the lowest score is remembered.
6526  */
6527     static void
6528 add_suggestion(
6529     suginfo_T	*su,
6530     garray_T	*gap,		/* either su_ga or su_sga */
6531     char_u	*goodword,
6532     int		badlenarg,	/* len of bad word replaced with "goodword" */
6533     int		score,
6534     int		altscore,
6535     int		had_bonus,	/* value for st_had_bonus */
6536     slang_T	*slang,		/* language for sound folding */
6537     int		maxsf)		/* su_maxscore applies to soundfold score,
6538 				   su_sfmaxscore to the total score. */
6539 {
6540     int		goodlen;	/* len of goodword changed */
6541     int		badlen;		/* len of bad word changed */
6542     suggest_T   *stp;
6543     suggest_T   new_sug;
6544     int		i;
6545     char_u	*pgood, *pbad;
6546 
6547     /* Minimize "badlen" for consistency.  Avoids that changing "the the" to
6548      * "thee the" is added next to changing the first "the" the "thee".  */
6549     pgood = goodword + STRLEN(goodword);
6550     pbad = su->su_badptr + badlenarg;
6551     for (;;)
6552     {
6553 	goodlen = (int)(pgood - goodword);
6554 	badlen = (int)(pbad - su->su_badptr);
6555 	if (goodlen <= 0 || badlen <= 0)
6556 	    break;
6557 	MB_PTR_BACK(goodword, pgood);
6558 	MB_PTR_BACK(su->su_badptr, pbad);
6559 	if (has_mbyte)
6560 	{
6561 	    if (mb_ptr2char(pgood) != mb_ptr2char(pbad))
6562 		break;
6563 	}
6564 	else if (*pgood != *pbad)
6565 		break;
6566     }
6567 
6568     if (badlen == 0 && goodlen == 0)
6569 	/* goodword doesn't change anything; may happen for "the the" changing
6570 	 * the first "the" to itself. */
6571 	return;
6572 
6573     if (gap->ga_len == 0)
6574 	i = -1;
6575     else
6576     {
6577 	/* Check if the word is already there.  Also check the length that is
6578 	 * being replaced "thes," -> "these" is a different suggestion from
6579 	 * "thes" -> "these". */
6580 	stp = &SUG(*gap, 0);
6581 	for (i = gap->ga_len; --i >= 0; ++stp)
6582 	    if (stp->st_wordlen == goodlen
6583 		    && stp->st_orglen == badlen
6584 		    && STRNCMP(stp->st_word, goodword, goodlen) == 0)
6585 	    {
6586 		/*
6587 		 * Found it.  Remember the word with the lowest score.
6588 		 */
6589 		if (stp->st_slang == NULL)
6590 		    stp->st_slang = slang;
6591 
6592 		new_sug.st_score = score;
6593 		new_sug.st_altscore = altscore;
6594 		new_sug.st_had_bonus = had_bonus;
6595 
6596 		if (stp->st_had_bonus != had_bonus)
6597 		{
6598 		    /* Only one of the two had the soundalike score computed.
6599 		     * Need to do that for the other one now, otherwise the
6600 		     * scores can't be compared.  This happens because
6601 		     * suggest_try_change() doesn't compute the soundalike
6602 		     * word to keep it fast, while some special methods set
6603 		     * the soundalike score to zero. */
6604 		    if (had_bonus)
6605 			rescore_one(su, stp);
6606 		    else
6607 		    {
6608 			new_sug.st_word = stp->st_word;
6609 			new_sug.st_wordlen = stp->st_wordlen;
6610 			new_sug.st_slang = stp->st_slang;
6611 			new_sug.st_orglen = badlen;
6612 			rescore_one(su, &new_sug);
6613 		    }
6614 		}
6615 
6616 		if (stp->st_score > new_sug.st_score)
6617 		{
6618 		    stp->st_score = new_sug.st_score;
6619 		    stp->st_altscore = new_sug.st_altscore;
6620 		    stp->st_had_bonus = new_sug.st_had_bonus;
6621 		}
6622 		break;
6623 	    }
6624     }
6625 
6626     if (i < 0 && ga_grow(gap, 1) == OK)
6627     {
6628 	/* Add a suggestion. */
6629 	stp = &SUG(*gap, gap->ga_len);
6630 	stp->st_word = vim_strnsave(goodword, goodlen);
6631 	if (stp->st_word != NULL)
6632 	{
6633 	    stp->st_wordlen = goodlen;
6634 	    stp->st_score = score;
6635 	    stp->st_altscore = altscore;
6636 	    stp->st_had_bonus = had_bonus;
6637 	    stp->st_orglen = badlen;
6638 	    stp->st_slang = slang;
6639 	    ++gap->ga_len;
6640 
6641 	    /* If we have too many suggestions now, sort the list and keep
6642 	     * the best suggestions. */
6643 	    if (gap->ga_len > SUG_MAX_COUNT(su))
6644 	    {
6645 		if (maxsf)
6646 		    su->su_sfmaxscore = cleanup_suggestions(gap,
6647 				      su->su_sfmaxscore, SUG_CLEAN_COUNT(su));
6648 		else
6649 		    su->su_maxscore = cleanup_suggestions(gap,
6650 					su->su_maxscore, SUG_CLEAN_COUNT(su));
6651 	    }
6652 	}
6653     }
6654 }
6655 
6656 /*
6657  * Suggestions may in fact be flagged as errors.  Esp. for banned words and
6658  * for split words, such as "the the".  Remove these from the list here.
6659  */
6660     static void
6661 check_suggestions(
6662     suginfo_T	*su,
6663     garray_T	*gap)		    /* either su_ga or su_sga */
6664 {
6665     suggest_T   *stp;
6666     int		i;
6667     char_u	longword[MAXWLEN + 1];
6668     int		len;
6669     hlf_T	attr;
6670 
6671     stp = &SUG(*gap, 0);
6672     for (i = gap->ga_len - 1; i >= 0; --i)
6673     {
6674 	/* Need to append what follows to check for "the the". */
6675 	vim_strncpy(longword, stp[i].st_word, MAXWLEN);
6676 	len = stp[i].st_wordlen;
6677 	vim_strncpy(longword + len, su->su_badptr + stp[i].st_orglen,
6678 							       MAXWLEN - len);
6679 	attr = HLF_COUNT;
6680 	(void)spell_check(curwin, longword, &attr, NULL, FALSE);
6681 	if (attr != HLF_COUNT)
6682 	{
6683 	    /* Remove this entry. */
6684 	    vim_free(stp[i].st_word);
6685 	    --gap->ga_len;
6686 	    if (i < gap->ga_len)
6687 		mch_memmove(stp + i, stp + i + 1,
6688 				       sizeof(suggest_T) * (gap->ga_len - i));
6689 	}
6690     }
6691 }
6692 
6693 
6694 /*
6695  * Add a word to be banned.
6696  */
6697     static void
6698 add_banned(
6699     suginfo_T	*su,
6700     char_u	*word)
6701 {
6702     char_u	*s;
6703     hash_T	hash;
6704     hashitem_T	*hi;
6705 
6706     hash = hash_hash(word);
6707     hi = hash_lookup(&su->su_banned, word, hash);
6708     if (HASHITEM_EMPTY(hi))
6709     {
6710 	s = vim_strsave(word);
6711 	if (s != NULL)
6712 	    hash_add_item(&su->su_banned, hi, s, hash);
6713     }
6714 }
6715 
6716 /*
6717  * Recompute the score for all suggestions if sound-folding is possible.  This
6718  * is slow, thus only done for the final results.
6719  */
6720     static void
6721 rescore_suggestions(suginfo_T *su)
6722 {
6723     int		i;
6724 
6725     if (su->su_sallang != NULL)
6726 	for (i = 0; i < su->su_ga.ga_len; ++i)
6727 	    rescore_one(su, &SUG(su->su_ga, i));
6728 }
6729 
6730 /*
6731  * Recompute the score for one suggestion if sound-folding is possible.
6732  */
6733     static void
6734 rescore_one(suginfo_T *su, suggest_T *stp)
6735 {
6736     slang_T	*slang = stp->st_slang;
6737     char_u	sal_badword[MAXWLEN];
6738     char_u	*p;
6739 
6740     /* Only rescore suggestions that have no sal score yet and do have a
6741      * language. */
6742     if (slang != NULL && slang->sl_sal.ga_len > 0 && !stp->st_had_bonus)
6743     {
6744 	if (slang == su->su_sallang)
6745 	    p = su->su_sal_badword;
6746 	else
6747 	{
6748 	    spell_soundfold(slang, su->su_fbadword, TRUE, sal_badword);
6749 	    p = sal_badword;
6750 	}
6751 
6752 	stp->st_altscore = stp_sal_score(stp, su, slang, p);
6753 	if (stp->st_altscore == SCORE_MAXMAX)
6754 	    stp->st_altscore = SCORE_BIG;
6755 	stp->st_score = RESCORE(stp->st_score, stp->st_altscore);
6756 	stp->st_had_bonus = TRUE;
6757     }
6758 }
6759 
6760 static int
6761 #ifdef __BORLANDC__
6762 _RTLENTRYF
6763 #endif
6764 sug_compare(const void *s1, const void *s2);
6765 
6766 /*
6767  * Function given to qsort() to sort the suggestions on st_score.
6768  * First on "st_score", then "st_altscore" then alphabetically.
6769  */
6770     static int
6771 #ifdef __BORLANDC__
6772 _RTLENTRYF
6773 #endif
6774 sug_compare(const void *s1, const void *s2)
6775 {
6776     suggest_T	*p1 = (suggest_T *)s1;
6777     suggest_T	*p2 = (suggest_T *)s2;
6778     int		n = p1->st_score - p2->st_score;
6779 
6780     if (n == 0)
6781     {
6782 	n = p1->st_altscore - p2->st_altscore;
6783 	if (n == 0)
6784 	    n = STRICMP(p1->st_word, p2->st_word);
6785     }
6786     return n;
6787 }
6788 
6789 /*
6790  * Cleanup the suggestions:
6791  * - Sort on score.
6792  * - Remove words that won't be displayed.
6793  * Returns the maximum score in the list or "maxscore" unmodified.
6794  */
6795     static int
6796 cleanup_suggestions(
6797     garray_T	*gap,
6798     int		maxscore,
6799     int		keep)		/* nr of suggestions to keep */
6800 {
6801     suggest_T   *stp = &SUG(*gap, 0);
6802     int		i;
6803 
6804     /* Sort the list. */
6805     qsort(gap->ga_data, (size_t)gap->ga_len, sizeof(suggest_T), sug_compare);
6806 
6807     /* Truncate the list to the number of suggestions that will be displayed. */
6808     if (gap->ga_len > keep)
6809     {
6810 	for (i = keep; i < gap->ga_len; ++i)
6811 	    vim_free(stp[i].st_word);
6812 	gap->ga_len = keep;
6813 	return stp[keep - 1].st_score;
6814     }
6815     return maxscore;
6816 }
6817 
6818 #if defined(FEAT_EVAL) || defined(PROTO)
6819 /*
6820  * Soundfold a string, for soundfold().
6821  * Result is in allocated memory, NULL for an error.
6822  */
6823     char_u *
6824 eval_soundfold(char_u *word)
6825 {
6826     langp_T	*lp;
6827     char_u	sound[MAXWLEN];
6828     int		lpi;
6829 
6830     if (curwin->w_p_spell && *curwin->w_s->b_p_spl != NUL)
6831 	/* Use the sound-folding of the first language that supports it. */
6832 	for (lpi = 0; lpi < curwin->w_s->b_langp.ga_len; ++lpi)
6833 	{
6834 	    lp = LANGP_ENTRY(curwin->w_s->b_langp, lpi);
6835 	    if (lp->lp_slang->sl_sal.ga_len > 0)
6836 	    {
6837 		/* soundfold the word */
6838 		spell_soundfold(lp->lp_slang, word, FALSE, sound);
6839 		return vim_strsave(sound);
6840 	    }
6841 	}
6842 
6843     /* No language with sound folding, return word as-is. */
6844     return vim_strsave(word);
6845 }
6846 #endif
6847 
6848 /*
6849  * Turn "inword" into its sound-a-like equivalent in "res[MAXWLEN]".
6850  *
6851  * There are many ways to turn a word into a sound-a-like representation.  The
6852  * oldest is Soundex (1918!).   A nice overview can be found in "Approximate
6853  * swedish name matching - survey and test of different algorithms" by Klas
6854  * Erikson.
6855  *
6856  * We support two methods:
6857  * 1. SOFOFROM/SOFOTO do a simple character mapping.
6858  * 2. SAL items define a more advanced sound-folding (and much slower).
6859  */
6860     void
6861 spell_soundfold(
6862     slang_T	*slang,
6863     char_u	*inword,
6864     int		folded,	    /* "inword" is already case-folded */
6865     char_u	*res)
6866 {
6867     char_u	fword[MAXWLEN];
6868     char_u	*word;
6869 
6870     if (slang->sl_sofo)
6871 	/* SOFOFROM and SOFOTO used */
6872 	spell_soundfold_sofo(slang, inword, res);
6873     else
6874     {
6875 	/* SAL items used.  Requires the word to be case-folded. */
6876 	if (folded)
6877 	    word = inword;
6878 	else
6879 	{
6880 	    (void)spell_casefold(inword, (int)STRLEN(inword), fword, MAXWLEN);
6881 	    word = fword;
6882 	}
6883 
6884 	if (has_mbyte)
6885 	    spell_soundfold_wsal(slang, word, res);
6886 	else
6887 	    spell_soundfold_sal(slang, word, res);
6888     }
6889 }
6890 
6891 /*
6892  * Perform sound folding of "inword" into "res" according to SOFOFROM and
6893  * SOFOTO lines.
6894  */
6895     static void
6896 spell_soundfold_sofo(slang_T *slang, char_u *inword, char_u *res)
6897 {
6898     char_u	*s;
6899     int		ri = 0;
6900     int		c;
6901 
6902     if (has_mbyte)
6903     {
6904 	int	prevc = 0;
6905 	int	*ip;
6906 
6907 	/* The sl_sal_first[] table contains the translation for chars up to
6908 	 * 255, sl_sal the rest. */
6909 	for (s = inword; *s != NUL; )
6910 	{
6911 	    c = mb_cptr2char_adv(&s);
6912 	    if (enc_utf8 ? utf_class(c) == 0 : VIM_ISWHITE(c))
6913 		c = ' ';
6914 	    else if (c < 256)
6915 		c = slang->sl_sal_first[c];
6916 	    else
6917 	    {
6918 		ip = ((int **)slang->sl_sal.ga_data)[c & 0xff];
6919 		if (ip == NULL)		/* empty list, can't match */
6920 		    c = NUL;
6921 		else
6922 		    for (;;)		/* find "c" in the list */
6923 		    {
6924 			if (*ip == 0)	/* not found */
6925 			{
6926 			    c = NUL;
6927 			    break;
6928 			}
6929 			if (*ip == c)	/* match! */
6930 			{
6931 			    c = ip[1];
6932 			    break;
6933 			}
6934 			ip += 2;
6935 		    }
6936 	    }
6937 
6938 	    if (c != NUL && c != prevc)
6939 	    {
6940 		ri += mb_char2bytes(c, res + ri);
6941 		if (ri + MB_MAXBYTES > MAXWLEN)
6942 		    break;
6943 		prevc = c;
6944 	    }
6945 	}
6946     }
6947     else
6948     {
6949 	/* The sl_sal_first[] table contains the translation. */
6950 	for (s = inword; (c = *s) != NUL; ++s)
6951 	{
6952 	    if (VIM_ISWHITE(c))
6953 		c = ' ';
6954 	    else
6955 		c = slang->sl_sal_first[c];
6956 	    if (c != NUL && (ri == 0 || res[ri - 1] != c))
6957 		res[ri++] = c;
6958 	}
6959     }
6960 
6961     res[ri] = NUL;
6962 }
6963 
6964     static void
6965 spell_soundfold_sal(slang_T *slang, char_u *inword, char_u *res)
6966 {
6967     salitem_T	*smp;
6968     char_u	word[MAXWLEN];
6969     char_u	*s = inword;
6970     char_u	*t;
6971     char_u	*pf;
6972     int		i, j, z;
6973     int		reslen;
6974     int		n, k = 0;
6975     int		z0;
6976     int		k0;
6977     int		n0;
6978     int		c;
6979     int		pri;
6980     int		p0 = -333;
6981     int		c0;
6982 
6983     /* Remove accents, if wanted.  We actually remove all non-word characters.
6984      * But keep white space.  We need a copy, the word may be changed here. */
6985     if (slang->sl_rem_accents)
6986     {
6987 	t = word;
6988 	while (*s != NUL)
6989 	{
6990 	    if (VIM_ISWHITE(*s))
6991 	    {
6992 		*t++ = ' ';
6993 		s = skipwhite(s);
6994 	    }
6995 	    else
6996 	    {
6997 		if (spell_iswordp_nmw(s, curwin))
6998 		    *t++ = *s;
6999 		++s;
7000 	    }
7001 	}
7002 	*t = NUL;
7003     }
7004     else
7005 	vim_strncpy(word, s, MAXWLEN - 1);
7006 
7007     smp = (salitem_T *)slang->sl_sal.ga_data;
7008 
7009     /*
7010      * This comes from Aspell phonet.cpp.  Converted from C++ to C.
7011      * Changed to keep spaces.
7012      */
7013     i = reslen = z = 0;
7014     while ((c = word[i]) != NUL)
7015     {
7016 	/* Start with the first rule that has the character in the word. */
7017 	n = slang->sl_sal_first[c];
7018 	z0 = 0;
7019 
7020 	if (n >= 0)
7021 	{
7022 	    /* check all rules for the same letter */
7023 	    for (; (s = smp[n].sm_lead)[0] == c; ++n)
7024 	    {
7025 		/* Quickly skip entries that don't match the word.  Most
7026 		 * entries are less then three chars, optimize for that. */
7027 		k = smp[n].sm_leadlen;
7028 		if (k > 1)
7029 		{
7030 		    if (word[i + 1] != s[1])
7031 			continue;
7032 		    if (k > 2)
7033 		    {
7034 			for (j = 2; j < k; ++j)
7035 			    if (word[i + j] != s[j])
7036 				break;
7037 			if (j < k)
7038 			    continue;
7039 		    }
7040 		}
7041 
7042 		if ((pf = smp[n].sm_oneof) != NULL)
7043 		{
7044 		    /* Check for match with one of the chars in "sm_oneof". */
7045 		    while (*pf != NUL && *pf != word[i + k])
7046 			++pf;
7047 		    if (*pf == NUL)
7048 			continue;
7049 		    ++k;
7050 		}
7051 		s = smp[n].sm_rules;
7052 		pri = 5;    /* default priority */
7053 
7054 		p0 = *s;
7055 		k0 = k;
7056 		while (*s == '-' && k > 1)
7057 		{
7058 		    k--;
7059 		    s++;
7060 		}
7061 		if (*s == '<')
7062 		    s++;
7063 		if (VIM_ISDIGIT(*s))
7064 		{
7065 		    /* determine priority */
7066 		    pri = *s - '0';
7067 		    s++;
7068 		}
7069 		if (*s == '^' && *(s + 1) == '^')
7070 		    s++;
7071 
7072 		if (*s == NUL
7073 			|| (*s == '^'
7074 			    && (i == 0 || !(word[i - 1] == ' '
7075 				      || spell_iswordp(word + i - 1, curwin)))
7076 			    && (*(s + 1) != '$'
7077 				|| (!spell_iswordp(word + i + k0, curwin))))
7078 			|| (*s == '$' && i > 0
7079 			    && spell_iswordp(word + i - 1, curwin)
7080 			    && (!spell_iswordp(word + i + k0, curwin))))
7081 		{
7082 		    /* search for followup rules, if:    */
7083 		    /* followup and k > 1  and  NO '-' in searchstring */
7084 		    c0 = word[i + k - 1];
7085 		    n0 = slang->sl_sal_first[c0];
7086 
7087 		    if (slang->sl_followup && k > 1 && n0 >= 0
7088 					   && p0 != '-' && word[i + k] != NUL)
7089 		    {
7090 			/* test follow-up rule for "word[i + k]" */
7091 			for ( ; (s = smp[n0].sm_lead)[0] == c0; ++n0)
7092 			{
7093 			    /* Quickly skip entries that don't match the word.
7094 			     * */
7095 			    k0 = smp[n0].sm_leadlen;
7096 			    if (k0 > 1)
7097 			    {
7098 				if (word[i + k] != s[1])
7099 				    continue;
7100 				if (k0 > 2)
7101 				{
7102 				    pf = word + i + k + 1;
7103 				    for (j = 2; j < k0; ++j)
7104 					if (*pf++ != s[j])
7105 					    break;
7106 				    if (j < k0)
7107 					continue;
7108 				}
7109 			    }
7110 			    k0 += k - 1;
7111 
7112 			    if ((pf = smp[n0].sm_oneof) != NULL)
7113 			    {
7114 				/* Check for match with one of the chars in
7115 				 * "sm_oneof". */
7116 				while (*pf != NUL && *pf != word[i + k0])
7117 				    ++pf;
7118 				if (*pf == NUL)
7119 				    continue;
7120 				++k0;
7121 			    }
7122 
7123 			    p0 = 5;
7124 			    s = smp[n0].sm_rules;
7125 			    while (*s == '-')
7126 			    {
7127 				/* "k0" gets NOT reduced because
7128 				 * "if (k0 == k)" */
7129 				s++;
7130 			    }
7131 			    if (*s == '<')
7132 				s++;
7133 			    if (VIM_ISDIGIT(*s))
7134 			    {
7135 				p0 = *s - '0';
7136 				s++;
7137 			    }
7138 
7139 			    if (*s == NUL
7140 				    /* *s == '^' cuts */
7141 				    || (*s == '$'
7142 					    && !spell_iswordp(word + i + k0,
7143 								     curwin)))
7144 			    {
7145 				if (k0 == k)
7146 				    /* this is just a piece of the string */
7147 				    continue;
7148 
7149 				if (p0 < pri)
7150 				    /* priority too low */
7151 				    continue;
7152 				/* rule fits; stop search */
7153 				break;
7154 			    }
7155 			}
7156 
7157 			if (p0 >= pri && smp[n0].sm_lead[0] == c0)
7158 			    continue;
7159 		    }
7160 
7161 		    /* replace string */
7162 		    s = smp[n].sm_to;
7163 		    if (s == NULL)
7164 			s = (char_u *)"";
7165 		    pf = smp[n].sm_rules;
7166 		    p0 = (vim_strchr(pf, '<') != NULL) ? 1 : 0;
7167 		    if (p0 == 1 && z == 0)
7168 		    {
7169 			/* rule with '<' is used */
7170 			if (reslen > 0 && *s != NUL && (res[reslen - 1] == c
7171 						    || res[reslen - 1] == *s))
7172 			    reslen--;
7173 			z0 = 1;
7174 			z = 1;
7175 			k0 = 0;
7176 			while (*s != NUL && word[i + k0] != NUL)
7177 			{
7178 			    word[i + k0] = *s;
7179 			    k0++;
7180 			    s++;
7181 			}
7182 			if (k > k0)
7183 			    STRMOVE(word + i + k0, word + i + k);
7184 
7185 			/* new "actual letter" */
7186 			c = word[i];
7187 		    }
7188 		    else
7189 		    {
7190 			/* no '<' rule used */
7191 			i += k - 1;
7192 			z = 0;
7193 			while (*s != NUL && s[1] != NUL && reslen < MAXWLEN)
7194 			{
7195 			    if (reslen == 0 || res[reslen - 1] != *s)
7196 				res[reslen++] = *s;
7197 			    s++;
7198 			}
7199 			/* new "actual letter" */
7200 			c = *s;
7201 			if (strstr((char *)pf, "^^") != NULL)
7202 			{
7203 			    if (c != NUL)
7204 				res[reslen++] = c;
7205 			    STRMOVE(word, word + i + 1);
7206 			    i = 0;
7207 			    z0 = 1;
7208 			}
7209 		    }
7210 		    break;
7211 		}
7212 	    }
7213 	}
7214 	else if (VIM_ISWHITE(c))
7215 	{
7216 	    c = ' ';
7217 	    k = 1;
7218 	}
7219 
7220 	if (z0 == 0)
7221 	{
7222 	    if (k && !p0 && reslen < MAXWLEN && c != NUL
7223 		    && (!slang->sl_collapse || reslen == 0
7224 						     || res[reslen - 1] != c))
7225 		/* condense only double letters */
7226 		res[reslen++] = c;
7227 
7228 	    i++;
7229 	    z = 0;
7230 	    k = 0;
7231 	}
7232     }
7233 
7234     res[reslen] = NUL;
7235 }
7236 
7237 /*
7238  * Turn "inword" into its sound-a-like equivalent in "res[MAXWLEN]".
7239  * Multi-byte version of spell_soundfold().
7240  */
7241     static void
7242 spell_soundfold_wsal(slang_T *slang, char_u *inword, char_u *res)
7243 {
7244     salitem_T	*smp = (salitem_T *)slang->sl_sal.ga_data;
7245     int		word[MAXWLEN];
7246     int		wres[MAXWLEN];
7247     int		l;
7248     char_u	*s;
7249     int		*ws;
7250     char_u	*t;
7251     int		*pf;
7252     int		i, j, z;
7253     int		reslen;
7254     int		n, k = 0;
7255     int		z0;
7256     int		k0;
7257     int		n0;
7258     int		c;
7259     int		pri;
7260     int		p0 = -333;
7261     int		c0;
7262     int		did_white = FALSE;
7263     int		wordlen;
7264 
7265 
7266     /*
7267      * Convert the multi-byte string to a wide-character string.
7268      * Remove accents, if wanted.  We actually remove all non-word characters.
7269      * But keep white space.
7270      */
7271     wordlen = 0;
7272     for (s = inword; *s != NUL; )
7273     {
7274 	t = s;
7275 	c = mb_cptr2char_adv(&s);
7276 	if (slang->sl_rem_accents)
7277 	{
7278 	    if (enc_utf8 ? utf_class(c) == 0 : VIM_ISWHITE(c))
7279 	    {
7280 		if (did_white)
7281 		    continue;
7282 		c = ' ';
7283 		did_white = TRUE;
7284 	    }
7285 	    else
7286 	    {
7287 		did_white = FALSE;
7288 		if (!spell_iswordp_nmw(t, curwin))
7289 		    continue;
7290 	    }
7291 	}
7292 	word[wordlen++] = c;
7293     }
7294     word[wordlen] = NUL;
7295 
7296     /*
7297      * This algorithm comes from Aspell phonet.cpp.
7298      * Converted from C++ to C.  Added support for multi-byte chars.
7299      * Changed to keep spaces.
7300      */
7301     i = reslen = z = 0;
7302     while ((c = word[i]) != NUL)
7303     {
7304 	/* Start with the first rule that has the character in the word. */
7305 	n = slang->sl_sal_first[c & 0xff];
7306 	z0 = 0;
7307 
7308 	if (n >= 0)
7309 	{
7310 	    /* Check all rules for the same index byte.
7311 	     * If c is 0x300 need extra check for the end of the array, as
7312 	     * (c & 0xff) is NUL. */
7313 	    for (; ((ws = smp[n].sm_lead_w)[0] & 0xff) == (c & 0xff)
7314 							 && ws[0] != NUL; ++n)
7315 	    {
7316 		/* Quickly skip entries that don't match the word.  Most
7317 		 * entries are less then three chars, optimize for that. */
7318 		if (c != ws[0])
7319 		    continue;
7320 		k = smp[n].sm_leadlen;
7321 		if (k > 1)
7322 		{
7323 		    if (word[i + 1] != ws[1])
7324 			continue;
7325 		    if (k > 2)
7326 		    {
7327 			for (j = 2; j < k; ++j)
7328 			    if (word[i + j] != ws[j])
7329 				break;
7330 			if (j < k)
7331 			    continue;
7332 		    }
7333 		}
7334 
7335 		if ((pf = smp[n].sm_oneof_w) != NULL)
7336 		{
7337 		    /* Check for match with one of the chars in "sm_oneof". */
7338 		    while (*pf != NUL && *pf != word[i + k])
7339 			++pf;
7340 		    if (*pf == NUL)
7341 			continue;
7342 		    ++k;
7343 		}
7344 		s = smp[n].sm_rules;
7345 		pri = 5;    /* default priority */
7346 
7347 		p0 = *s;
7348 		k0 = k;
7349 		while (*s == '-' && k > 1)
7350 		{
7351 		    k--;
7352 		    s++;
7353 		}
7354 		if (*s == '<')
7355 		    s++;
7356 		if (VIM_ISDIGIT(*s))
7357 		{
7358 		    /* determine priority */
7359 		    pri = *s - '0';
7360 		    s++;
7361 		}
7362 		if (*s == '^' && *(s + 1) == '^')
7363 		    s++;
7364 
7365 		if (*s == NUL
7366 			|| (*s == '^'
7367 			    && (i == 0 || !(word[i - 1] == ' '
7368 				    || spell_iswordp_w(word + i - 1, curwin)))
7369 			    && (*(s + 1) != '$'
7370 				|| (!spell_iswordp_w(word + i + k0, curwin))))
7371 			|| (*s == '$' && i > 0
7372 			    && spell_iswordp_w(word + i - 1, curwin)
7373 			    && (!spell_iswordp_w(word + i + k0, curwin))))
7374 		{
7375 		    /* search for followup rules, if:    */
7376 		    /* followup and k > 1  and  NO '-' in searchstring */
7377 		    c0 = word[i + k - 1];
7378 		    n0 = slang->sl_sal_first[c0 & 0xff];
7379 
7380 		    if (slang->sl_followup && k > 1 && n0 >= 0
7381 					   && p0 != '-' && word[i + k] != NUL)
7382 		    {
7383 			/* Test follow-up rule for "word[i + k]"; loop over
7384 			 * all entries with the same index byte. */
7385 			for ( ; ((ws = smp[n0].sm_lead_w)[0] & 0xff)
7386 							 == (c0 & 0xff); ++n0)
7387 			{
7388 			    /* Quickly skip entries that don't match the word.
7389 			     */
7390 			    if (c0 != ws[0])
7391 				continue;
7392 			    k0 = smp[n0].sm_leadlen;
7393 			    if (k0 > 1)
7394 			    {
7395 				if (word[i + k] != ws[1])
7396 				    continue;
7397 				if (k0 > 2)
7398 				{
7399 				    pf = word + i + k + 1;
7400 				    for (j = 2; j < k0; ++j)
7401 					if (*pf++ != ws[j])
7402 					    break;
7403 				    if (j < k0)
7404 					continue;
7405 				}
7406 			    }
7407 			    k0 += k - 1;
7408 
7409 			    if ((pf = smp[n0].sm_oneof_w) != NULL)
7410 			    {
7411 				/* Check for match with one of the chars in
7412 				 * "sm_oneof". */
7413 				while (*pf != NUL && *pf != word[i + k0])
7414 				    ++pf;
7415 				if (*pf == NUL)
7416 				    continue;
7417 				++k0;
7418 			    }
7419 
7420 			    p0 = 5;
7421 			    s = smp[n0].sm_rules;
7422 			    while (*s == '-')
7423 			    {
7424 				/* "k0" gets NOT reduced because
7425 				 * "if (k0 == k)" */
7426 				s++;
7427 			    }
7428 			    if (*s == '<')
7429 				s++;
7430 			    if (VIM_ISDIGIT(*s))
7431 			    {
7432 				p0 = *s - '0';
7433 				s++;
7434 			    }
7435 
7436 			    if (*s == NUL
7437 				    /* *s == '^' cuts */
7438 				    || (*s == '$'
7439 					 && !spell_iswordp_w(word + i + k0,
7440 								     curwin)))
7441 			    {
7442 				if (k0 == k)
7443 				    /* this is just a piece of the string */
7444 				    continue;
7445 
7446 				if (p0 < pri)
7447 				    /* priority too low */
7448 				    continue;
7449 				/* rule fits; stop search */
7450 				break;
7451 			    }
7452 			}
7453 
7454 			if (p0 >= pri && (smp[n0].sm_lead_w[0] & 0xff)
7455 							       == (c0 & 0xff))
7456 			    continue;
7457 		    }
7458 
7459 		    /* replace string */
7460 		    ws = smp[n].sm_to_w;
7461 		    s = smp[n].sm_rules;
7462 		    p0 = (vim_strchr(s, '<') != NULL) ? 1 : 0;
7463 		    if (p0 == 1 && z == 0)
7464 		    {
7465 			/* rule with '<' is used */
7466 			if (reslen > 0 && ws != NULL && *ws != NUL
7467 				&& (wres[reslen - 1] == c
7468 						    || wres[reslen - 1] == *ws))
7469 			    reslen--;
7470 			z0 = 1;
7471 			z = 1;
7472 			k0 = 0;
7473 			if (ws != NULL)
7474 			    while (*ws != NUL && word[i + k0] != NUL)
7475 			    {
7476 				word[i + k0] = *ws;
7477 				k0++;
7478 				ws++;
7479 			    }
7480 			if (k > k0)
7481 			    mch_memmove(word + i + k0, word + i + k,
7482 				    sizeof(int) * (wordlen - (i + k) + 1));
7483 
7484 			/* new "actual letter" */
7485 			c = word[i];
7486 		    }
7487 		    else
7488 		    {
7489 			/* no '<' rule used */
7490 			i += k - 1;
7491 			z = 0;
7492 			if (ws != NULL)
7493 			    while (*ws != NUL && ws[1] != NUL
7494 							  && reslen < MAXWLEN)
7495 			    {
7496 				if (reslen == 0 || wres[reslen - 1] != *ws)
7497 				    wres[reslen++] = *ws;
7498 				ws++;
7499 			    }
7500 			/* new "actual letter" */
7501 			if (ws == NULL)
7502 			    c = NUL;
7503 			else
7504 			    c = *ws;
7505 			if (strstr((char *)s, "^^") != NULL)
7506 			{
7507 			    if (c != NUL)
7508 				wres[reslen++] = c;
7509 			    mch_memmove(word, word + i + 1,
7510 				       sizeof(int) * (wordlen - (i + 1) + 1));
7511 			    i = 0;
7512 			    z0 = 1;
7513 			}
7514 		    }
7515 		    break;
7516 		}
7517 	    }
7518 	}
7519 	else if (VIM_ISWHITE(c))
7520 	{
7521 	    c = ' ';
7522 	    k = 1;
7523 	}
7524 
7525 	if (z0 == 0)
7526 	{
7527 	    if (k && !p0 && reslen < MAXWLEN && c != NUL
7528 		    && (!slang->sl_collapse || reslen == 0
7529 						     || wres[reslen - 1] != c))
7530 		/* condense only double letters */
7531 		wres[reslen++] = c;
7532 
7533 	    i++;
7534 	    z = 0;
7535 	    k = 0;
7536 	}
7537     }
7538 
7539     /* Convert wide characters in "wres" to a multi-byte string in "res". */
7540     l = 0;
7541     for (n = 0; n < reslen; ++n)
7542     {
7543 	l += mb_char2bytes(wres[n], res + l);
7544 	if (l + MB_MAXBYTES > MAXWLEN)
7545 	    break;
7546     }
7547     res[l] = NUL;
7548 }
7549 
7550 /*
7551  * Compute a score for two sound-a-like words.
7552  * This permits up to two inserts/deletes/swaps/etc. to keep things fast.
7553  * Instead of a generic loop we write out the code.  That keeps it fast by
7554  * avoiding checks that will not be possible.
7555  */
7556     static int
7557 soundalike_score(
7558     char_u	*goodstart,	/* sound-folded good word */
7559     char_u	*badstart)	/* sound-folded bad word */
7560 {
7561     char_u	*goodsound = goodstart;
7562     char_u	*badsound = badstart;
7563     int		goodlen;
7564     int		badlen;
7565     int		n;
7566     char_u	*pl, *ps;
7567     char_u	*pl2, *ps2;
7568     int		score = 0;
7569 
7570     /* Adding/inserting "*" at the start (word starts with vowel) shouldn't be
7571      * counted so much, vowels halfway the word aren't counted at all. */
7572     if ((*badsound == '*' || *goodsound == '*') && *badsound != *goodsound)
7573     {
7574 	if ((badsound[0] == NUL && goodsound[1] == NUL)
7575 	    || (goodsound[0] == NUL && badsound[1] == NUL))
7576 	    /* changing word with vowel to word without a sound */
7577 	    return SCORE_DEL;
7578 	if (badsound[0] == NUL || goodsound[0] == NUL)
7579 	    /* more than two changes */
7580 	    return SCORE_MAXMAX;
7581 
7582 	if (badsound[1] == goodsound[1]
7583 		|| (badsound[1] != NUL
7584 		    && goodsound[1] != NUL
7585 		    && badsound[2] == goodsound[2]))
7586 	{
7587 	    /* handle like a substitute */
7588 	}
7589 	else
7590 	{
7591 	    score = 2 * SCORE_DEL / 3;
7592 	    if (*badsound == '*')
7593 		++badsound;
7594 	    else
7595 		++goodsound;
7596 	}
7597     }
7598 
7599     goodlen = (int)STRLEN(goodsound);
7600     badlen = (int)STRLEN(badsound);
7601 
7602     /* Return quickly if the lengths are too different to be fixed by two
7603      * changes. */
7604     n = goodlen - badlen;
7605     if (n < -2 || n > 2)
7606 	return SCORE_MAXMAX;
7607 
7608     if (n > 0)
7609     {
7610 	pl = goodsound;	    /* goodsound is longest */
7611 	ps = badsound;
7612     }
7613     else
7614     {
7615 	pl = badsound;	    /* badsound is longest */
7616 	ps = goodsound;
7617     }
7618 
7619     /* Skip over the identical part. */
7620     while (*pl == *ps && *pl != NUL)
7621     {
7622 	++pl;
7623 	++ps;
7624     }
7625 
7626     switch (n)
7627     {
7628 	case -2:
7629 	case 2:
7630 	    /*
7631 	     * Must delete two characters from "pl".
7632 	     */
7633 	    ++pl;	/* first delete */
7634 	    while (*pl == *ps)
7635 	    {
7636 		++pl;
7637 		++ps;
7638 	    }
7639 	    /* strings must be equal after second delete */
7640 	    if (STRCMP(pl + 1, ps) == 0)
7641 		return score + SCORE_DEL * 2;
7642 
7643 	    /* Failed to compare. */
7644 	    break;
7645 
7646 	case -1:
7647 	case 1:
7648 	    /*
7649 	     * Minimal one delete from "pl" required.
7650 	     */
7651 
7652 	    /* 1: delete */
7653 	    pl2 = pl + 1;
7654 	    ps2 = ps;
7655 	    while (*pl2 == *ps2)
7656 	    {
7657 		if (*pl2 == NUL)	/* reached the end */
7658 		    return score + SCORE_DEL;
7659 		++pl2;
7660 		++ps2;
7661 	    }
7662 
7663 	    /* 2: delete then swap, then rest must be equal */
7664 	    if (pl2[0] == ps2[1] && pl2[1] == ps2[0]
7665 					     && STRCMP(pl2 + 2, ps2 + 2) == 0)
7666 		return score + SCORE_DEL + SCORE_SWAP;
7667 
7668 	    /* 3: delete then substitute, then the rest must be equal */
7669 	    if (STRCMP(pl2 + 1, ps2 + 1) == 0)
7670 		return score + SCORE_DEL + SCORE_SUBST;
7671 
7672 	    /* 4: first swap then delete */
7673 	    if (pl[0] == ps[1] && pl[1] == ps[0])
7674 	    {
7675 		pl2 = pl + 2;	    /* swap, skip two chars */
7676 		ps2 = ps + 2;
7677 		while (*pl2 == *ps2)
7678 		{
7679 		    ++pl2;
7680 		    ++ps2;
7681 		}
7682 		/* delete a char and then strings must be equal */
7683 		if (STRCMP(pl2 + 1, ps2) == 0)
7684 		    return score + SCORE_SWAP + SCORE_DEL;
7685 	    }
7686 
7687 	    /* 5: first substitute then delete */
7688 	    pl2 = pl + 1;	    /* substitute, skip one char */
7689 	    ps2 = ps + 1;
7690 	    while (*pl2 == *ps2)
7691 	    {
7692 		++pl2;
7693 		++ps2;
7694 	    }
7695 	    /* delete a char and then strings must be equal */
7696 	    if (STRCMP(pl2 + 1, ps2) == 0)
7697 		return score + SCORE_SUBST + SCORE_DEL;
7698 
7699 	    /* Failed to compare. */
7700 	    break;
7701 
7702 	case 0:
7703 	    /*
7704 	     * Lengths are equal, thus changes must result in same length: An
7705 	     * insert is only possible in combination with a delete.
7706 	     * 1: check if for identical strings
7707 	     */
7708 	    if (*pl == NUL)
7709 		return score;
7710 
7711 	    /* 2: swap */
7712 	    if (pl[0] == ps[1] && pl[1] == ps[0])
7713 	    {
7714 		pl2 = pl + 2;	    /* swap, skip two chars */
7715 		ps2 = ps + 2;
7716 		while (*pl2 == *ps2)
7717 		{
7718 		    if (*pl2 == NUL)	/* reached the end */
7719 			return score + SCORE_SWAP;
7720 		    ++pl2;
7721 		    ++ps2;
7722 		}
7723 		/* 3: swap and swap again */
7724 		if (pl2[0] == ps2[1] && pl2[1] == ps2[0]
7725 					     && STRCMP(pl2 + 2, ps2 + 2) == 0)
7726 		    return score + SCORE_SWAP + SCORE_SWAP;
7727 
7728 		/* 4: swap and substitute */
7729 		if (STRCMP(pl2 + 1, ps2 + 1) == 0)
7730 		    return score + SCORE_SWAP + SCORE_SUBST;
7731 	    }
7732 
7733 	    /* 5: substitute */
7734 	    pl2 = pl + 1;
7735 	    ps2 = ps + 1;
7736 	    while (*pl2 == *ps2)
7737 	    {
7738 		if (*pl2 == NUL)	/* reached the end */
7739 		    return score + SCORE_SUBST;
7740 		++pl2;
7741 		++ps2;
7742 	    }
7743 
7744 	    /* 6: substitute and swap */
7745 	    if (pl2[0] == ps2[1] && pl2[1] == ps2[0]
7746 					     && STRCMP(pl2 + 2, ps2 + 2) == 0)
7747 		return score + SCORE_SUBST + SCORE_SWAP;
7748 
7749 	    /* 7: substitute and substitute */
7750 	    if (STRCMP(pl2 + 1, ps2 + 1) == 0)
7751 		return score + SCORE_SUBST + SCORE_SUBST;
7752 
7753 	    /* 8: insert then delete */
7754 	    pl2 = pl;
7755 	    ps2 = ps + 1;
7756 	    while (*pl2 == *ps2)
7757 	    {
7758 		++pl2;
7759 		++ps2;
7760 	    }
7761 	    if (STRCMP(pl2 + 1, ps2) == 0)
7762 		return score + SCORE_INS + SCORE_DEL;
7763 
7764 	    /* 9: delete then insert */
7765 	    pl2 = pl + 1;
7766 	    ps2 = ps;
7767 	    while (*pl2 == *ps2)
7768 	    {
7769 		++pl2;
7770 		++ps2;
7771 	    }
7772 	    if (STRCMP(pl2, ps2 + 1) == 0)
7773 		return score + SCORE_INS + SCORE_DEL;
7774 
7775 	    /* Failed to compare. */
7776 	    break;
7777     }
7778 
7779     return SCORE_MAXMAX;
7780 }
7781 
7782 /*
7783  * Compute the "edit distance" to turn "badword" into "goodword".  The less
7784  * deletes/inserts/substitutes/swaps are required the lower the score.
7785  *
7786  * The algorithm is described by Du and Chang, 1992.
7787  * The implementation of the algorithm comes from Aspell editdist.cpp,
7788  * edit_distance().  It has been converted from C++ to C and modified to
7789  * support multi-byte characters.
7790  */
7791     static int
7792 spell_edit_score(
7793     slang_T	*slang,
7794     char_u	*badword,
7795     char_u	*goodword)
7796 {
7797     int		*cnt;
7798     int		badlen, goodlen;	/* lengths including NUL */
7799     int		j, i;
7800     int		t;
7801     int		bc, gc;
7802     int		pbc, pgc;
7803     char_u	*p;
7804     int		wbadword[MAXWLEN];
7805     int		wgoodword[MAXWLEN];
7806 
7807     if (has_mbyte)
7808     {
7809 	/* Get the characters from the multi-byte strings and put them in an
7810 	 * int array for easy access. */
7811 	for (p = badword, badlen = 0; *p != NUL; )
7812 	    wbadword[badlen++] = mb_cptr2char_adv(&p);
7813 	wbadword[badlen++] = 0;
7814 	for (p = goodword, goodlen = 0; *p != NUL; )
7815 	    wgoodword[goodlen++] = mb_cptr2char_adv(&p);
7816 	wgoodword[goodlen++] = 0;
7817     }
7818     else
7819     {
7820 	badlen = (int)STRLEN(badword) + 1;
7821 	goodlen = (int)STRLEN(goodword) + 1;
7822     }
7823 
7824     /* We use "cnt" as an array: CNT(badword_idx, goodword_idx). */
7825 #define CNT(a, b)   cnt[(a) + (b) * (badlen + 1)]
7826     cnt = (int *)lalloc((long_u)(sizeof(int) * (badlen + 1) * (goodlen + 1)),
7827 									TRUE);
7828     if (cnt == NULL)
7829 	return 0;	/* out of memory */
7830 
7831     CNT(0, 0) = 0;
7832     for (j = 1; j <= goodlen; ++j)
7833 	CNT(0, j) = CNT(0, j - 1) + SCORE_INS;
7834 
7835     for (i = 1; i <= badlen; ++i)
7836     {
7837 	CNT(i, 0) = CNT(i - 1, 0) + SCORE_DEL;
7838 	for (j = 1; j <= goodlen; ++j)
7839 	{
7840 	    if (has_mbyte)
7841 	    {
7842 		bc = wbadword[i - 1];
7843 		gc = wgoodword[j - 1];
7844 	    }
7845 	    else
7846 	    {
7847 		bc = badword[i - 1];
7848 		gc = goodword[j - 1];
7849 	    }
7850 	    if (bc == gc)
7851 		CNT(i, j) = CNT(i - 1, j - 1);
7852 	    else
7853 	    {
7854 		/* Use a better score when there is only a case difference. */
7855 		if (SPELL_TOFOLD(bc) == SPELL_TOFOLD(gc))
7856 		    CNT(i, j) = SCORE_ICASE + CNT(i - 1, j - 1);
7857 		else
7858 		{
7859 		    /* For a similar character use SCORE_SIMILAR. */
7860 		    if (slang != NULL
7861 			    && slang->sl_has_map
7862 			    && similar_chars(slang, gc, bc))
7863 			CNT(i, j) = SCORE_SIMILAR + CNT(i - 1, j - 1);
7864 		    else
7865 			CNT(i, j) = SCORE_SUBST + CNT(i - 1, j - 1);
7866 		}
7867 
7868 		if (i > 1 && j > 1)
7869 		{
7870 		    if (has_mbyte)
7871 		    {
7872 			pbc = wbadword[i - 2];
7873 			pgc = wgoodword[j - 2];
7874 		    }
7875 		    else
7876 		    {
7877 			pbc = badword[i - 2];
7878 			pgc = goodword[j - 2];
7879 		    }
7880 		    if (bc == pgc && pbc == gc)
7881 		    {
7882 			t = SCORE_SWAP + CNT(i - 2, j - 2);
7883 			if (t < CNT(i, j))
7884 			    CNT(i, j) = t;
7885 		    }
7886 		}
7887 		t = SCORE_DEL + CNT(i - 1, j);
7888 		if (t < CNT(i, j))
7889 		    CNT(i, j) = t;
7890 		t = SCORE_INS + CNT(i, j - 1);
7891 		if (t < CNT(i, j))
7892 		    CNT(i, j) = t;
7893 	    }
7894 	}
7895     }
7896 
7897     i = CNT(badlen - 1, goodlen - 1);
7898     vim_free(cnt);
7899     return i;
7900 }
7901 
7902 typedef struct
7903 {
7904     int		badi;
7905     int		goodi;
7906     int		score;
7907 } limitscore_T;
7908 
7909 /*
7910  * Like spell_edit_score(), but with a limit on the score to make it faster.
7911  * May return SCORE_MAXMAX when the score is higher than "limit".
7912  *
7913  * This uses a stack for the edits still to be tried.
7914  * The idea comes from Aspell leditdist.cpp.  Rewritten in C and added support
7915  * for multi-byte characters.
7916  */
7917     static int
7918 spell_edit_score_limit(
7919     slang_T	*slang,
7920     char_u	*badword,
7921     char_u	*goodword,
7922     int		limit)
7923 {
7924     limitscore_T    stack[10];		/* allow for over 3 * 2 edits */
7925     int		    stackidx;
7926     int		    bi, gi;
7927     int		    bi2, gi2;
7928     int		    bc, gc;
7929     int		    score;
7930     int		    score_off;
7931     int		    minscore;
7932     int		    round;
7933 
7934     /* Multi-byte characters require a bit more work, use a different function
7935      * to avoid testing "has_mbyte" quite often. */
7936     if (has_mbyte)
7937 	return spell_edit_score_limit_w(slang, badword, goodword, limit);
7938 
7939     /*
7940      * The idea is to go from start to end over the words.  So long as
7941      * characters are equal just continue, this always gives the lowest score.
7942      * When there is a difference try several alternatives.  Each alternative
7943      * increases "score" for the edit distance.  Some of the alternatives are
7944      * pushed unto a stack and tried later, some are tried right away.  At the
7945      * end of the word the score for one alternative is known.  The lowest
7946      * possible score is stored in "minscore".
7947      */
7948     stackidx = 0;
7949     bi = 0;
7950     gi = 0;
7951     score = 0;
7952     minscore = limit + 1;
7953 
7954     for (;;)
7955     {
7956 	/* Skip over an equal part, score remains the same. */
7957 	for (;;)
7958 	{
7959 	    bc = badword[bi];
7960 	    gc = goodword[gi];
7961 	    if (bc != gc)	/* stop at a char that's different */
7962 		break;
7963 	    if (bc == NUL)	/* both words end */
7964 	    {
7965 		if (score < minscore)
7966 		    minscore = score;
7967 		goto pop;	/* do next alternative */
7968 	    }
7969 	    ++bi;
7970 	    ++gi;
7971 	}
7972 
7973 	if (gc == NUL)    /* goodword ends, delete badword chars */
7974 	{
7975 	    do
7976 	    {
7977 		if ((score += SCORE_DEL) >= minscore)
7978 		    goto pop;	    /* do next alternative */
7979 	    } while (badword[++bi] != NUL);
7980 	    minscore = score;
7981 	}
7982 	else if (bc == NUL) /* badword ends, insert badword chars */
7983 	{
7984 	    do
7985 	    {
7986 		if ((score += SCORE_INS) >= minscore)
7987 		    goto pop;	    /* do next alternative */
7988 	    } while (goodword[++gi] != NUL);
7989 	    minscore = score;
7990 	}
7991 	else			/* both words continue */
7992 	{
7993 	    /* If not close to the limit, perform a change.  Only try changes
7994 	     * that may lead to a lower score than "minscore".
7995 	     * round 0: try deleting a char from badword
7996 	     * round 1: try inserting a char in badword */
7997 	    for (round = 0; round <= 1; ++round)
7998 	    {
7999 		score_off = score + (round == 0 ? SCORE_DEL : SCORE_INS);
8000 		if (score_off < minscore)
8001 		{
8002 		    if (score_off + SCORE_EDIT_MIN >= minscore)
8003 		    {
8004 			/* Near the limit, rest of the words must match.  We
8005 			 * can check that right now, no need to push an item
8006 			 * onto the stack. */
8007 			bi2 = bi + 1 - round;
8008 			gi2 = gi + round;
8009 			while (goodword[gi2] == badword[bi2])
8010 			{
8011 			    if (goodword[gi2] == NUL)
8012 			    {
8013 				minscore = score_off;
8014 				break;
8015 			    }
8016 			    ++bi2;
8017 			    ++gi2;
8018 			}
8019 		    }
8020 		    else
8021 		    {
8022 			/* try deleting/inserting a character later */
8023 			stack[stackidx].badi = bi + 1 - round;
8024 			stack[stackidx].goodi = gi + round;
8025 			stack[stackidx].score = score_off;
8026 			++stackidx;
8027 		    }
8028 		}
8029 	    }
8030 
8031 	    if (score + SCORE_SWAP < minscore)
8032 	    {
8033 		/* If swapping two characters makes a match then the
8034 		 * substitution is more expensive, thus there is no need to
8035 		 * try both. */
8036 		if (gc == badword[bi + 1] && bc == goodword[gi + 1])
8037 		{
8038 		    /* Swap two characters, that is: skip them. */
8039 		    gi += 2;
8040 		    bi += 2;
8041 		    score += SCORE_SWAP;
8042 		    continue;
8043 		}
8044 	    }
8045 
8046 	    /* Substitute one character for another which is the same
8047 	     * thing as deleting a character from both goodword and badword.
8048 	     * Use a better score when there is only a case difference. */
8049 	    if (SPELL_TOFOLD(bc) == SPELL_TOFOLD(gc))
8050 		score += SCORE_ICASE;
8051 	    else
8052 	    {
8053 		/* For a similar character use SCORE_SIMILAR. */
8054 		if (slang != NULL
8055 			&& slang->sl_has_map
8056 			&& similar_chars(slang, gc, bc))
8057 		    score += SCORE_SIMILAR;
8058 		else
8059 		    score += SCORE_SUBST;
8060 	    }
8061 
8062 	    if (score < minscore)
8063 	    {
8064 		/* Do the substitution. */
8065 		++gi;
8066 		++bi;
8067 		continue;
8068 	    }
8069 	}
8070 pop:
8071 	/*
8072 	 * Get here to try the next alternative, pop it from the stack.
8073 	 */
8074 	if (stackidx == 0)		/* stack is empty, finished */
8075 	    break;
8076 
8077 	/* pop an item from the stack */
8078 	--stackidx;
8079 	gi = stack[stackidx].goodi;
8080 	bi = stack[stackidx].badi;
8081 	score = stack[stackidx].score;
8082     }
8083 
8084     /* When the score goes over "limit" it may actually be much higher.
8085      * Return a very large number to avoid going below the limit when giving a
8086      * bonus. */
8087     if (minscore > limit)
8088 	return SCORE_MAXMAX;
8089     return minscore;
8090 }
8091 
8092 /*
8093  * Multi-byte version of spell_edit_score_limit().
8094  * Keep it in sync with the above!
8095  */
8096     static int
8097 spell_edit_score_limit_w(
8098     slang_T	*slang,
8099     char_u	*badword,
8100     char_u	*goodword,
8101     int		limit)
8102 {
8103     limitscore_T    stack[10];		/* allow for over 3 * 2 edits */
8104     int		    stackidx;
8105     int		    bi, gi;
8106     int		    bi2, gi2;
8107     int		    bc, gc;
8108     int		    score;
8109     int		    score_off;
8110     int		    minscore;
8111     int		    round;
8112     char_u	    *p;
8113     int		    wbadword[MAXWLEN];
8114     int		    wgoodword[MAXWLEN];
8115 
8116     /* Get the characters from the multi-byte strings and put them in an
8117      * int array for easy access. */
8118     bi = 0;
8119     for (p = badword; *p != NUL; )
8120 	wbadword[bi++] = mb_cptr2char_adv(&p);
8121     wbadword[bi++] = 0;
8122     gi = 0;
8123     for (p = goodword; *p != NUL; )
8124 	wgoodword[gi++] = mb_cptr2char_adv(&p);
8125     wgoodword[gi++] = 0;
8126 
8127     /*
8128      * The idea is to go from start to end over the words.  So long as
8129      * characters are equal just continue, this always gives the lowest score.
8130      * When there is a difference try several alternatives.  Each alternative
8131      * increases "score" for the edit distance.  Some of the alternatives are
8132      * pushed unto a stack and tried later, some are tried right away.  At the
8133      * end of the word the score for one alternative is known.  The lowest
8134      * possible score is stored in "minscore".
8135      */
8136     stackidx = 0;
8137     bi = 0;
8138     gi = 0;
8139     score = 0;
8140     minscore = limit + 1;
8141 
8142     for (;;)
8143     {
8144 	/* Skip over an equal part, score remains the same. */
8145 	for (;;)
8146 	{
8147 	    bc = wbadword[bi];
8148 	    gc = wgoodword[gi];
8149 
8150 	    if (bc != gc)	/* stop at a char that's different */
8151 		break;
8152 	    if (bc == NUL)	/* both words end */
8153 	    {
8154 		if (score < minscore)
8155 		    minscore = score;
8156 		goto pop;	/* do next alternative */
8157 	    }
8158 	    ++bi;
8159 	    ++gi;
8160 	}
8161 
8162 	if (gc == NUL)    /* goodword ends, delete badword chars */
8163 	{
8164 	    do
8165 	    {
8166 		if ((score += SCORE_DEL) >= minscore)
8167 		    goto pop;	    /* do next alternative */
8168 	    } while (wbadword[++bi] != NUL);
8169 	    minscore = score;
8170 	}
8171 	else if (bc == NUL) /* badword ends, insert badword chars */
8172 	{
8173 	    do
8174 	    {
8175 		if ((score += SCORE_INS) >= minscore)
8176 		    goto pop;	    /* do next alternative */
8177 	    } while (wgoodword[++gi] != NUL);
8178 	    minscore = score;
8179 	}
8180 	else			/* both words continue */
8181 	{
8182 	    /* If not close to the limit, perform a change.  Only try changes
8183 	     * that may lead to a lower score than "minscore".
8184 	     * round 0: try deleting a char from badword
8185 	     * round 1: try inserting a char in badword */
8186 	    for (round = 0; round <= 1; ++round)
8187 	    {
8188 		score_off = score + (round == 0 ? SCORE_DEL : SCORE_INS);
8189 		if (score_off < minscore)
8190 		{
8191 		    if (score_off + SCORE_EDIT_MIN >= minscore)
8192 		    {
8193 			/* Near the limit, rest of the words must match.  We
8194 			 * can check that right now, no need to push an item
8195 			 * onto the stack. */
8196 			bi2 = bi + 1 - round;
8197 			gi2 = gi + round;
8198 			while (wgoodword[gi2] == wbadword[bi2])
8199 			{
8200 			    if (wgoodword[gi2] == NUL)
8201 			    {
8202 				minscore = score_off;
8203 				break;
8204 			    }
8205 			    ++bi2;
8206 			    ++gi2;
8207 			}
8208 		    }
8209 		    else
8210 		    {
8211 			/* try deleting a character from badword later */
8212 			stack[stackidx].badi = bi + 1 - round;
8213 			stack[stackidx].goodi = gi + round;
8214 			stack[stackidx].score = score_off;
8215 			++stackidx;
8216 		    }
8217 		}
8218 	    }
8219 
8220 	    if (score + SCORE_SWAP < minscore)
8221 	    {
8222 		/* If swapping two characters makes a match then the
8223 		 * substitution is more expensive, thus there is no need to
8224 		 * try both. */
8225 		if (gc == wbadword[bi + 1] && bc == wgoodword[gi + 1])
8226 		{
8227 		    /* Swap two characters, that is: skip them. */
8228 		    gi += 2;
8229 		    bi += 2;
8230 		    score += SCORE_SWAP;
8231 		    continue;
8232 		}
8233 	    }
8234 
8235 	    /* Substitute one character for another which is the same
8236 	     * thing as deleting a character from both goodword and badword.
8237 	     * Use a better score when there is only a case difference. */
8238 	    if (SPELL_TOFOLD(bc) == SPELL_TOFOLD(gc))
8239 		score += SCORE_ICASE;
8240 	    else
8241 	    {
8242 		/* For a similar character use SCORE_SIMILAR. */
8243 		if (slang != NULL
8244 			&& slang->sl_has_map
8245 			&& similar_chars(slang, gc, bc))
8246 		    score += SCORE_SIMILAR;
8247 		else
8248 		    score += SCORE_SUBST;
8249 	    }
8250 
8251 	    if (score < minscore)
8252 	    {
8253 		/* Do the substitution. */
8254 		++gi;
8255 		++bi;
8256 		continue;
8257 	    }
8258 	}
8259 pop:
8260 	/*
8261 	 * Get here to try the next alternative, pop it from the stack.
8262 	 */
8263 	if (stackidx == 0)		/* stack is empty, finished */
8264 	    break;
8265 
8266 	/* pop an item from the stack */
8267 	--stackidx;
8268 	gi = stack[stackidx].goodi;
8269 	bi = stack[stackidx].badi;
8270 	score = stack[stackidx].score;
8271     }
8272 
8273     /* When the score goes over "limit" it may actually be much higher.
8274      * Return a very large number to avoid going below the limit when giving a
8275      * bonus. */
8276     if (minscore > limit)
8277 	return SCORE_MAXMAX;
8278     return minscore;
8279 }
8280 
8281 /*
8282  * ":spellinfo"
8283  */
8284     void
8285 ex_spellinfo(exarg_T *eap UNUSED)
8286 {
8287     int		lpi;
8288     langp_T	*lp;
8289     char_u	*p;
8290 
8291     if (no_spell_checking(curwin))
8292 	return;
8293 
8294     msg_start();
8295     for (lpi = 0; lpi < curwin->w_s->b_langp.ga_len && !got_int; ++lpi)
8296     {
8297 	lp = LANGP_ENTRY(curwin->w_s->b_langp, lpi);
8298 	msg_puts("file: ");
8299 	msg_puts((char *)lp->lp_slang->sl_fname);
8300 	msg_putchar('\n');
8301 	p = lp->lp_slang->sl_info;
8302 	if (p != NULL)
8303 	{
8304 	    msg_puts((char *)p);
8305 	    msg_putchar('\n');
8306 	}
8307     }
8308     msg_end();
8309 }
8310 
8311 #define DUMPFLAG_KEEPCASE   1	/* round 2: keep-case tree */
8312 #define DUMPFLAG_COUNT	    2	/* include word count */
8313 #define DUMPFLAG_ICASE	    4	/* ignore case when finding matches */
8314 #define DUMPFLAG_ONECAP	    8	/* pattern starts with capital */
8315 #define DUMPFLAG_ALLCAP	    16	/* pattern is all capitals */
8316 
8317 /*
8318  * ":spelldump"
8319  */
8320     void
8321 ex_spelldump(exarg_T *eap)
8322 {
8323     char_u  *spl;
8324     long    dummy;
8325 
8326     if (no_spell_checking(curwin))
8327 	return;
8328     get_option_value((char_u*)"spl", &dummy, &spl, OPT_LOCAL);
8329 
8330     /* Create a new empty buffer in a new window. */
8331     do_cmdline_cmd((char_u *)"new");
8332 
8333     /* enable spelling locally in the new window */
8334     set_option_value((char_u*)"spell", TRUE, (char_u*)"", OPT_LOCAL);
8335     set_option_value((char_u*)"spl",  dummy, spl, OPT_LOCAL);
8336     vim_free(spl);
8337 
8338     if (!BUFEMPTY())
8339 	return;
8340 
8341     spell_dump_compl(NULL, 0, NULL, eap->forceit ? DUMPFLAG_COUNT : 0);
8342 
8343     /* Delete the empty line that we started with. */
8344     if (curbuf->b_ml.ml_line_count > 1)
8345 	ml_delete(curbuf->b_ml.ml_line_count, FALSE);
8346 
8347     redraw_later(NOT_VALID);
8348 }
8349 
8350 /*
8351  * Go through all possible words and:
8352  * 1. When "pat" is NULL: dump a list of all words in the current buffer.
8353  *	"ic" and "dir" are not used.
8354  * 2. When "pat" is not NULL: add matching words to insert mode completion.
8355  */
8356     void
8357 spell_dump_compl(
8358     char_u	*pat,	    /* leading part of the word */
8359     int		ic,	    /* ignore case */
8360     int		*dir,	    /* direction for adding matches */
8361     int		dumpflags_arg)	/* DUMPFLAG_* */
8362 {
8363     langp_T	*lp;
8364     slang_T	*slang;
8365     idx_T	arridx[MAXWLEN];
8366     int		curi[MAXWLEN];
8367     char_u	word[MAXWLEN];
8368     int		c;
8369     char_u	*byts;
8370     idx_T	*idxs;
8371     linenr_T	lnum = 0;
8372     int		round;
8373     int		depth;
8374     int		n;
8375     int		flags;
8376     char_u	*region_names = NULL;	    /* region names being used */
8377     int		do_region = TRUE;	    /* dump region names and numbers */
8378     char_u	*p;
8379     int		lpi;
8380     int		dumpflags = dumpflags_arg;
8381     int		patlen;
8382 
8383     /* When ignoring case or when the pattern starts with capital pass this on
8384      * to dump_word(). */
8385     if (pat != NULL)
8386     {
8387 	if (ic)
8388 	    dumpflags |= DUMPFLAG_ICASE;
8389 	else
8390 	{
8391 	    n = captype(pat, NULL);
8392 	    if (n == WF_ONECAP)
8393 		dumpflags |= DUMPFLAG_ONECAP;
8394 	    else if (n == WF_ALLCAP && (int)STRLEN(pat) > mb_ptr2len(pat))
8395 		dumpflags |= DUMPFLAG_ALLCAP;
8396 	}
8397     }
8398 
8399     /* Find out if we can support regions: All languages must support the same
8400      * regions or none at all. */
8401     for (lpi = 0; lpi < curwin->w_s->b_langp.ga_len; ++lpi)
8402     {
8403 	lp = LANGP_ENTRY(curwin->w_s->b_langp, lpi);
8404 	p = lp->lp_slang->sl_regions;
8405 	if (p[0] != 0)
8406 	{
8407 	    if (region_names == NULL)	    /* first language with regions */
8408 		region_names = p;
8409 	    else if (STRCMP(region_names, p) != 0)
8410 	    {
8411 		do_region = FALSE;	    /* region names are different */
8412 		break;
8413 	    }
8414 	}
8415     }
8416 
8417     if (do_region && region_names != NULL)
8418     {
8419 	if (pat == NULL)
8420 	{
8421 	    vim_snprintf((char *)IObuff, IOSIZE, "/regions=%s", region_names);
8422 	    ml_append(lnum++, IObuff, (colnr_T)0, FALSE);
8423 	}
8424     }
8425     else
8426 	do_region = FALSE;
8427 
8428     /*
8429      * Loop over all files loaded for the entries in 'spelllang'.
8430      */
8431     for (lpi = 0; lpi < curwin->w_s->b_langp.ga_len; ++lpi)
8432     {
8433 	lp = LANGP_ENTRY(curwin->w_s->b_langp, lpi);
8434 	slang = lp->lp_slang;
8435 	if (slang->sl_fbyts == NULL)	    /* reloading failed */
8436 	    continue;
8437 
8438 	if (pat == NULL)
8439 	{
8440 	    vim_snprintf((char *)IObuff, IOSIZE, "# file: %s", slang->sl_fname);
8441 	    ml_append(lnum++, IObuff, (colnr_T)0, FALSE);
8442 	}
8443 
8444 	/* When matching with a pattern and there are no prefixes only use
8445 	 * parts of the tree that match "pat". */
8446 	if (pat != NULL && slang->sl_pbyts == NULL)
8447 	    patlen = (int)STRLEN(pat);
8448 	else
8449 	    patlen = -1;
8450 
8451 	/* round 1: case-folded tree
8452 	 * round 2: keep-case tree */
8453 	for (round = 1; round <= 2; ++round)
8454 	{
8455 	    if (round == 1)
8456 	    {
8457 		dumpflags &= ~DUMPFLAG_KEEPCASE;
8458 		byts = slang->sl_fbyts;
8459 		idxs = slang->sl_fidxs;
8460 	    }
8461 	    else
8462 	    {
8463 		dumpflags |= DUMPFLAG_KEEPCASE;
8464 		byts = slang->sl_kbyts;
8465 		idxs = slang->sl_kidxs;
8466 	    }
8467 	    if (byts == NULL)
8468 		continue;		/* array is empty */
8469 
8470 	    depth = 0;
8471 	    arridx[0] = 0;
8472 	    curi[0] = 1;
8473 	    while (depth >= 0 && !got_int
8474 				  && (pat == NULL || !ins_compl_interrupted()))
8475 	    {
8476 		if (curi[depth] > byts[arridx[depth]])
8477 		{
8478 		    /* Done all bytes at this node, go up one level. */
8479 		    --depth;
8480 		    line_breakcheck();
8481 		    ins_compl_check_keys(50, FALSE);
8482 		}
8483 		else
8484 		{
8485 		    /* Do one more byte at this node. */
8486 		    n = arridx[depth] + curi[depth];
8487 		    ++curi[depth];
8488 		    c = byts[n];
8489 		    if (c == 0)
8490 		    {
8491 			/* End of word, deal with the word.
8492 			 * Don't use keep-case words in the fold-case tree,
8493 			 * they will appear in the keep-case tree.
8494 			 * Only use the word when the region matches. */
8495 			flags = (int)idxs[n];
8496 			if ((round == 2 || (flags & WF_KEEPCAP) == 0)
8497 				&& (flags & WF_NEEDCOMP) == 0
8498 				&& (do_region
8499 				    || (flags & WF_REGION) == 0
8500 				    || (((unsigned)flags >> 16)
8501 						       & lp->lp_region) != 0))
8502 			{
8503 			    word[depth] = NUL;
8504 			    if (!do_region)
8505 				flags &= ~WF_REGION;
8506 
8507 			    /* Dump the basic word if there is no prefix or
8508 			     * when it's the first one. */
8509 			    c = (unsigned)flags >> 24;
8510 			    if (c == 0 || curi[depth] == 2)
8511 			    {
8512 				dump_word(slang, word, pat, dir,
8513 						      dumpflags, flags, lnum);
8514 				if (pat == NULL)
8515 				    ++lnum;
8516 			    }
8517 
8518 			    /* Apply the prefix, if there is one. */
8519 			    if (c != 0)
8520 				lnum = dump_prefixes(slang, word, pat, dir,
8521 						      dumpflags, flags, lnum);
8522 			}
8523 		    }
8524 		    else
8525 		    {
8526 			/* Normal char, go one level deeper. */
8527 			word[depth++] = c;
8528 			arridx[depth] = idxs[n];
8529 			curi[depth] = 1;
8530 
8531 			/* Check if this characters matches with the pattern.
8532 			 * If not skip the whole tree below it.
8533 			 * Always ignore case here, dump_word() will check
8534 			 * proper case later.  This isn't exactly right when
8535 			 * length changes for multi-byte characters with
8536 			 * ignore case... */
8537 			if (depth <= patlen
8538 					&& MB_STRNICMP(word, pat, depth) != 0)
8539 			    --depth;
8540 		    }
8541 		}
8542 	    }
8543 	}
8544     }
8545 }
8546 
8547 /*
8548  * Dump one word: apply case modifications and append a line to the buffer.
8549  * When "lnum" is zero add insert mode completion.
8550  */
8551     static void
8552 dump_word(
8553     slang_T	*slang,
8554     char_u	*word,
8555     char_u	*pat,
8556     int		*dir,
8557     int		dumpflags,
8558     int		wordflags,
8559     linenr_T	lnum)
8560 {
8561     int		keepcap = FALSE;
8562     char_u	*p;
8563     char_u	*tw;
8564     char_u	cword[MAXWLEN];
8565     char_u	badword[MAXWLEN + 10];
8566     int		i;
8567     int		flags = wordflags;
8568 
8569     if (dumpflags & DUMPFLAG_ONECAP)
8570 	flags |= WF_ONECAP;
8571     if (dumpflags & DUMPFLAG_ALLCAP)
8572 	flags |= WF_ALLCAP;
8573 
8574     if ((dumpflags & DUMPFLAG_KEEPCASE) == 0 && (flags & WF_CAPMASK) != 0)
8575     {
8576 	/* Need to fix case according to "flags". */
8577 	make_case_word(word, cword, flags);
8578 	p = cword;
8579     }
8580     else
8581     {
8582 	p = word;
8583 	if ((dumpflags & DUMPFLAG_KEEPCASE)
8584 		&& ((captype(word, NULL) & WF_KEEPCAP) == 0
8585 						 || (flags & WF_FIXCAP) != 0))
8586 	    keepcap = TRUE;
8587     }
8588     tw = p;
8589 
8590     if (pat == NULL)
8591     {
8592 	/* Add flags and regions after a slash. */
8593 	if ((flags & (WF_BANNED | WF_RARE | WF_REGION)) || keepcap)
8594 	{
8595 	    STRCPY(badword, p);
8596 	    STRCAT(badword, "/");
8597 	    if (keepcap)
8598 		STRCAT(badword, "=");
8599 	    if (flags & WF_BANNED)
8600 		STRCAT(badword, "!");
8601 	    else if (flags & WF_RARE)
8602 		STRCAT(badword, "?");
8603 	    if (flags & WF_REGION)
8604 		for (i = 0; i < 7; ++i)
8605 		    if (flags & (0x10000 << i))
8606 			sprintf((char *)badword + STRLEN(badword), "%d", i + 1);
8607 	    p = badword;
8608 	}
8609 
8610 	if (dumpflags & DUMPFLAG_COUNT)
8611 	{
8612 	    hashitem_T  *hi;
8613 
8614 	    /* Include the word count for ":spelldump!". */
8615 	    hi = hash_find(&slang->sl_wordcount, tw);
8616 	    if (!HASHITEM_EMPTY(hi))
8617 	    {
8618 		vim_snprintf((char *)IObuff, IOSIZE, "%s\t%d",
8619 						     tw, HI2WC(hi)->wc_count);
8620 		p = IObuff;
8621 	    }
8622 	}
8623 
8624 	ml_append(lnum, p, (colnr_T)0, FALSE);
8625     }
8626     else if (((dumpflags & DUMPFLAG_ICASE)
8627 		    ? MB_STRNICMP(p, pat, STRLEN(pat)) == 0
8628 		    : STRNCMP(p, pat, STRLEN(pat)) == 0)
8629 		&& ins_compl_add_infercase(p, (int)STRLEN(p),
8630 					  p_ic, NULL, *dir, 0) == OK)
8631 	/* if dir was BACKWARD then honor it just once */
8632 	*dir = FORWARD;
8633 }
8634 
8635 /*
8636  * For ":spelldump": Find matching prefixes for "word".  Prepend each to
8637  * "word" and append a line to the buffer.
8638  * When "lnum" is zero add insert mode completion.
8639  * Return the updated line number.
8640  */
8641     static linenr_T
8642 dump_prefixes(
8643     slang_T	*slang,
8644     char_u	*word,	    /* case-folded word */
8645     char_u	*pat,
8646     int		*dir,
8647     int		dumpflags,
8648     int		flags,	    /* flags with prefix ID */
8649     linenr_T	startlnum)
8650 {
8651     idx_T	arridx[MAXWLEN];
8652     int		curi[MAXWLEN];
8653     char_u	prefix[MAXWLEN];
8654     char_u	word_up[MAXWLEN];
8655     int		has_word_up = FALSE;
8656     int		c;
8657     char_u	*byts;
8658     idx_T	*idxs;
8659     linenr_T	lnum = startlnum;
8660     int		depth;
8661     int		n;
8662     int		len;
8663     int		i;
8664 
8665     /* If the word starts with a lower-case letter make the word with an
8666      * upper-case letter in word_up[]. */
8667     c = PTR2CHAR(word);
8668     if (SPELL_TOUPPER(c) != c)
8669     {
8670 	onecap_copy(word, word_up, TRUE);
8671 	has_word_up = TRUE;
8672     }
8673 
8674     byts = slang->sl_pbyts;
8675     idxs = slang->sl_pidxs;
8676     if (byts != NULL)		/* array not is empty */
8677     {
8678 	/*
8679 	 * Loop over all prefixes, building them byte-by-byte in prefix[].
8680 	 * When at the end of a prefix check that it supports "flags".
8681 	 */
8682 	depth = 0;
8683 	arridx[0] = 0;
8684 	curi[0] = 1;
8685 	while (depth >= 0 && !got_int)
8686 	{
8687 	    n = arridx[depth];
8688 	    len = byts[n];
8689 	    if (curi[depth] > len)
8690 	    {
8691 		/* Done all bytes at this node, go up one level. */
8692 		--depth;
8693 		line_breakcheck();
8694 	    }
8695 	    else
8696 	    {
8697 		/* Do one more byte at this node. */
8698 		n += curi[depth];
8699 		++curi[depth];
8700 		c = byts[n];
8701 		if (c == 0)
8702 		{
8703 		    /* End of prefix, find out how many IDs there are. */
8704 		    for (i = 1; i < len; ++i)
8705 			if (byts[n + i] != 0)
8706 			    break;
8707 		    curi[depth] += i - 1;
8708 
8709 		    c = valid_word_prefix(i, n, flags, word, slang, FALSE);
8710 		    if (c != 0)
8711 		    {
8712 			vim_strncpy(prefix + depth, word, MAXWLEN - depth - 1);
8713 			dump_word(slang, prefix, pat, dir, dumpflags,
8714 				(c & WF_RAREPFX) ? (flags | WF_RARE)
8715 							       : flags, lnum);
8716 			if (lnum != 0)
8717 			    ++lnum;
8718 		    }
8719 
8720 		    /* Check for prefix that matches the word when the
8721 		     * first letter is upper-case, but only if the prefix has
8722 		     * a condition. */
8723 		    if (has_word_up)
8724 		    {
8725 			c = valid_word_prefix(i, n, flags, word_up, slang,
8726 									TRUE);
8727 			if (c != 0)
8728 			{
8729 			    vim_strncpy(prefix + depth, word_up,
8730 							 MAXWLEN - depth - 1);
8731 			    dump_word(slang, prefix, pat, dir, dumpflags,
8732 				    (c & WF_RAREPFX) ? (flags | WF_RARE)
8733 							       : flags, lnum);
8734 			    if (lnum != 0)
8735 				++lnum;
8736 			}
8737 		    }
8738 		}
8739 		else
8740 		{
8741 		    /* Normal char, go one level deeper. */
8742 		    prefix[depth++] = c;
8743 		    arridx[depth] = idxs[n];
8744 		    curi[depth] = 1;
8745 		}
8746 	    }
8747 	}
8748     }
8749 
8750     return lnum;
8751 }
8752 
8753 /*
8754  * Move "p" to the end of word "start".
8755  * Uses the spell-checking word characters.
8756  */
8757     char_u *
8758 spell_to_word_end(char_u *start, win_T *win)
8759 {
8760     char_u  *p = start;
8761 
8762     while (*p != NUL && spell_iswordp(p, win))
8763 	MB_PTR_ADV(p);
8764     return p;
8765 }
8766 
8767 #if defined(FEAT_INS_EXPAND) || defined(PROTO)
8768 /*
8769  * For Insert mode completion CTRL-X s:
8770  * Find start of the word in front of column "startcol".
8771  * We don't check if it is badly spelled, with completion we can only change
8772  * the word in front of the cursor.
8773  * Returns the column number of the word.
8774  */
8775     int
8776 spell_word_start(int startcol)
8777 {
8778     char_u	*line;
8779     char_u	*p;
8780     int		col = 0;
8781 
8782     if (no_spell_checking(curwin))
8783 	return startcol;
8784 
8785     /* Find a word character before "startcol". */
8786     line = ml_get_curline();
8787     for (p = line + startcol; p > line; )
8788     {
8789 	MB_PTR_BACK(line, p);
8790 	if (spell_iswordp_nmw(p, curwin))
8791 	    break;
8792     }
8793 
8794     /* Go back to start of the word. */
8795     while (p > line)
8796     {
8797 	col = (int)(p - line);
8798 	MB_PTR_BACK(line, p);
8799 	if (!spell_iswordp(p, curwin))
8800 	    break;
8801 	col = 0;
8802     }
8803 
8804     return col;
8805 }
8806 
8807 /*
8808  * Need to check for 'spellcapcheck' now, the word is removed before
8809  * expand_spelling() is called.  Therefore the ugly global variable.
8810  */
8811 static int spell_expand_need_cap;
8812 
8813     void
8814 spell_expand_check_cap(colnr_T col)
8815 {
8816     spell_expand_need_cap = check_need_cap(curwin->w_cursor.lnum, col);
8817 }
8818 
8819 /*
8820  * Get list of spelling suggestions.
8821  * Used for Insert mode completion CTRL-X ?.
8822  * Returns the number of matches.  The matches are in "matchp[]", array of
8823  * allocated strings.
8824  */
8825     int
8826 expand_spelling(
8827     linenr_T	lnum UNUSED,
8828     char_u	*pat,
8829     char_u	***matchp)
8830 {
8831     garray_T	ga;
8832 
8833     spell_suggest_list(&ga, pat, 100, spell_expand_need_cap, TRUE);
8834     *matchp = ga.ga_data;
8835     return ga.ga_len;
8836 }
8837 #endif
8838 
8839 #endif  /* FEAT_SPELL */
8840