xref: /vim-8.2.3635/src/spell.c (revision a6c27c47)
1 /* vi:set ts=8 sts=4 sw=4 noet:
2  *
3  * VIM - Vi IMproved	by Bram Moolenaar
4  *
5  * Do ":help uganda"  in Vim to read copying and usage conditions.
6  * Do ":help credits" in Vim to see a list of people who contributed.
7  * See README.txt for an overview of the Vim source code.
8  */
9 
10 /*
11  * spell.c: code for spell checking
12  *
13  * See spellfile.c for the Vim spell file format.
14  *
15  * The spell checking mechanism uses a tree (aka trie).  Each node in the tree
16  * has a list of bytes that can appear (siblings).  For each byte there is a
17  * pointer to the node with the byte that follows in the word (child).
18  *
19  * A NUL byte is used where the word may end.  The bytes are sorted, so that
20  * binary searching can be used and the NUL bytes are at the start.  The
21  * number of possible bytes is stored before the list of bytes.
22  *
23  * The tree uses two arrays: "byts" stores the characters, "idxs" stores
24  * either the next index or flags.  The tree starts at index 0.  For example,
25  * to lookup "vi" this sequence is followed:
26  *	i = 0
27  *	len = byts[i]
28  *	n = where "v" appears in byts[i + 1] to byts[i + len]
29  *	i = idxs[n]
30  *	len = byts[i]
31  *	n = where "i" appears in byts[i + 1] to byts[i + len]
32  *	i = idxs[n]
33  *	len = byts[i]
34  *	find that byts[i + 1] is 0, idxs[i + 1] has flags for "vi".
35  *
36  * There are two word trees: one with case-folded words and one with words in
37  * original case.  The second one is only used for keep-case words and is
38  * usually small.
39  *
40  * There is one additional tree for when not all prefixes are applied when
41  * generating the .spl file.  This tree stores all the possible prefixes, as
42  * if they were words.  At each word (prefix) end the prefix nr is stored, the
43  * following word must support this prefix nr.  And the condition nr is
44  * stored, used to lookup the condition that the word must match with.
45  *
46  * Thanks to Olaf Seibert for providing an example implementation of this tree
47  * and the compression mechanism.
48  * LZ trie ideas:
49  *	http://www.irb.hr/hr/home/ristov/papers/RistovLZtrieRevision1.pdf
50  * More papers: http://www-igm.univ-mlv.fr/~laporte/publi_en.html
51  *
52  * Matching involves checking the caps type: Onecap ALLCAP KeepCap.
53  *
54  * Why doesn't Vim use aspell/ispell/myspell/etc.?
55  * See ":help develop-spell".
56  */
57 
58 /*
59  * Use this to adjust the score after finding suggestions, based on the
60  * suggested word sounding like the bad word.  This is much faster than doing
61  * it for every possible suggestion.
62  * Disadvantage: When "the" is typed as "hte" it sounds quite different ("@"
63  * vs "ht") and goes down in the list.
64  * Used when 'spellsuggest' is set to "best".
65  */
66 #define RESCORE(word_score, sound_score) ((3 * word_score + sound_score) / 4)
67 
68 /*
69  * Do the opposite: based on a maximum end score and a known sound score,
70  * compute the maximum word score that can be used.
71  */
72 #define MAXSCORE(word_score, sound_score) ((4 * word_score - sound_score) / 3)
73 
74 #define IN_SPELL_C
75 #include "vim.h"
76 
77 #if defined(FEAT_SPELL) || defined(PROTO)
78 
79 #ifndef UNIX		/* it's in os_unix.h for Unix */
80 # include <time.h>	/* for time_t */
81 #endif
82 
83 /* only used for su_badflags */
84 #define WF_MIXCAP   0x20	/* mix of upper and lower case: macaRONI */
85 
86 #define WF_CAPMASK (WF_ONECAP | WF_ALLCAP | WF_KEEPCAP | WF_FIXCAP)
87 
88 #define REGION_ALL 0xff		/* word valid in all regions */
89 
90 #define VIMSUGMAGIC "VIMsug"	/* string at start of Vim .sug file */
91 #define VIMSUGMAGICL 6
92 #define VIMSUGVERSION 1
93 
94 /* Result values.  Lower number is accepted over higher one. */
95 #define SP_BANNED	-1
96 #define SP_OK		0
97 #define SP_RARE		1
98 #define SP_LOCAL	2
99 #define SP_BAD		3
100 
101 typedef struct wordcount_S
102 {
103     short_u	wc_count;	    /* nr of times word was seen */
104     char_u	wc_word[1];	    /* word, actually longer */
105 } wordcount_T;
106 
107 #define WC_KEY_OFF  offsetof(wordcount_T, wc_word)
108 #define HI2WC(hi)     ((wordcount_T *)((hi)->hi_key - WC_KEY_OFF))
109 #define MAXWORDCOUNT 0xffff
110 
111 /*
112  * Information used when looking for suggestions.
113  */
114 typedef struct suginfo_S
115 {
116     garray_T	su_ga;		    /* suggestions, contains "suggest_T" */
117     int		su_maxcount;	    /* max. number of suggestions displayed */
118     int		su_maxscore;	    /* maximum score for adding to su_ga */
119     int		su_sfmaxscore;	    /* idem, for when doing soundfold words */
120     garray_T	su_sga;		    /* like su_ga, sound-folded scoring */
121     char_u	*su_badptr;	    /* start of bad word in line */
122     int		su_badlen;	    /* length of detected bad word in line */
123     int		su_badflags;	    /* caps flags for bad word */
124     char_u	su_badword[MAXWLEN]; /* bad word truncated at su_badlen */
125     char_u	su_fbadword[MAXWLEN]; /* su_badword case-folded */
126     char_u	su_sal_badword[MAXWLEN]; /* su_badword soundfolded */
127     hashtab_T	su_banned;	    /* table with banned words */
128     slang_T	*su_sallang;	    /* default language for sound folding */
129 } suginfo_T;
130 
131 /* One word suggestion.  Used in "si_ga". */
132 typedef struct suggest_S
133 {
134     char_u	*st_word;	/* suggested word, allocated string */
135     int		st_wordlen;	/* STRLEN(st_word) */
136     int		st_orglen;	/* length of replaced text */
137     int		st_score;	/* lower is better */
138     int		st_altscore;	/* used when st_score compares equal */
139     int		st_salscore;	/* st_score is for soundalike */
140     int		st_had_bonus;	/* bonus already included in score */
141     slang_T	*st_slang;	/* language used for sound folding */
142 } suggest_T;
143 
144 #define SUG(ga, i) (((suggest_T *)(ga).ga_data)[i])
145 
146 /* TRUE if a word appears in the list of banned words.  */
147 #define WAS_BANNED(su, word) (!HASHITEM_EMPTY(hash_find(&su->su_banned, word)))
148 
149 /* Number of suggestions kept when cleaning up.  We need to keep more than
150  * what is displayed, because when rescore_suggestions() is called the score
151  * may change and wrong suggestions may be removed later. */
152 #define SUG_CLEAN_COUNT(su)    ((su)->su_maxcount < 130 ? 150 : (su)->su_maxcount + 20)
153 
154 /* Threshold for sorting and cleaning up suggestions.  Don't want to keep lots
155  * of suggestions that are not going to be displayed. */
156 #define SUG_MAX_COUNT(su)	(SUG_CLEAN_COUNT(su) + 50)
157 
158 /* score for various changes */
159 #define SCORE_SPLIT	149	/* split bad word */
160 #define SCORE_SPLIT_NO	249	/* split bad word with NOSPLITSUGS */
161 #define SCORE_ICASE	52	/* slightly different case */
162 #define SCORE_REGION	200	/* word is for different region */
163 #define SCORE_RARE	180	/* rare word */
164 #define SCORE_SWAP	75	/* swap two characters */
165 #define SCORE_SWAP3	110	/* swap two characters in three */
166 #define SCORE_REP	65	/* REP replacement */
167 #define SCORE_SUBST	93	/* substitute a character */
168 #define SCORE_SIMILAR	33	/* substitute a similar character */
169 #define SCORE_SUBCOMP	33	/* substitute a composing character */
170 #define SCORE_DEL	94	/* delete a character */
171 #define SCORE_DELDUP	66	/* delete a duplicated character */
172 #define SCORE_DELCOMP	28	/* delete a composing character */
173 #define SCORE_INS	96	/* insert a character */
174 #define SCORE_INSDUP	67	/* insert a duplicate character */
175 #define SCORE_INSCOMP	30	/* insert a composing character */
176 #define SCORE_NONWORD	103	/* change non-word to word char */
177 
178 #define SCORE_FILE	30	/* suggestion from a file */
179 #define SCORE_MAXINIT	350	/* Initial maximum score: higher == slower.
180 				 * 350 allows for about three changes. */
181 
182 #define SCORE_COMMON1	30	/* subtracted for words seen before */
183 #define SCORE_COMMON2	40	/* subtracted for words often seen */
184 #define SCORE_COMMON3	50	/* subtracted for words very often seen */
185 #define SCORE_THRES2	10	/* word count threshold for COMMON2 */
186 #define SCORE_THRES3	100	/* word count threshold for COMMON3 */
187 
188 /* When trying changed soundfold words it becomes slow when trying more than
189  * two changes.  With less then two changes it's slightly faster but we miss a
190  * few good suggestions.  In rare cases we need to try three of four changes.
191  */
192 #define SCORE_SFMAX1	200	/* maximum score for first try */
193 #define SCORE_SFMAX2	300	/* maximum score for second try */
194 #define SCORE_SFMAX3	400	/* maximum score for third try */
195 
196 #define SCORE_BIG	SCORE_INS * 3	/* big difference */
197 #define SCORE_MAXMAX	999999		/* accept any score */
198 #define SCORE_LIMITMAX	350		/* for spell_edit_score_limit() */
199 
200 /* for spell_edit_score_limit() we need to know the minimum value of
201  * SCORE_ICASE, SCORE_SWAP, SCORE_DEL, SCORE_SIMILAR and SCORE_INS */
202 #define SCORE_EDIT_MIN	SCORE_SIMILAR
203 
204 /*
205  * Structure to store info for word matching.
206  */
207 typedef struct matchinf_S
208 {
209     langp_T	*mi_lp;			/* info for language and region */
210 
211     /* pointers to original text to be checked */
212     char_u	*mi_word;		/* start of word being checked */
213     char_u	*mi_end;		/* end of matching word so far */
214     char_u	*mi_fend;		/* next char to be added to mi_fword */
215     char_u	*mi_cend;		/* char after what was used for
216 					   mi_capflags */
217 
218     /* case-folded text */
219     char_u	mi_fword[MAXWLEN + 1];	/* mi_word case-folded */
220     int		mi_fwordlen;		/* nr of valid bytes in mi_fword */
221 
222     /* for when checking word after a prefix */
223     int		mi_prefarridx;		/* index in sl_pidxs with list of
224 					   affixID/condition */
225     int		mi_prefcnt;		/* number of entries at mi_prefarridx */
226     int		mi_prefixlen;		/* byte length of prefix */
227     int		mi_cprefixlen;		/* byte length of prefix in original
228 					   case */
229 
230     /* for when checking a compound word */
231     int		mi_compoff;		/* start of following word offset */
232     char_u	mi_compflags[MAXWLEN];	/* flags for compound words used */
233     int		mi_complen;		/* nr of compound words used */
234     int		mi_compextra;		/* nr of COMPOUNDROOT words */
235 
236     /* others */
237     int		mi_result;		/* result so far: SP_BAD, SP_OK, etc. */
238     int		mi_capflags;		/* WF_ONECAP WF_ALLCAP WF_KEEPCAP */
239     win_T	*mi_win;		/* buffer being checked */
240 
241     /* for NOBREAK */
242     int		mi_result2;		/* "mi_resul" without following word */
243     char_u	*mi_end2;		/* "mi_end" without following word */
244 } matchinf_T;
245 
246 
247 static int spell_iswordp(char_u *p, win_T *wp);
248 static int spell_mb_isword_class(int cl, win_T *wp);
249 
250 /*
251  * For finding suggestions: At each node in the tree these states are tried:
252  */
253 typedef enum
254 {
255     STATE_START = 0,	/* At start of node check for NUL bytes (goodword
256 			 * ends); if badword ends there is a match, otherwise
257 			 * try splitting word. */
258     STATE_NOPREFIX,	/* try without prefix */
259     STATE_SPLITUNDO,	/* Undo splitting. */
260     STATE_ENDNUL,	/* Past NUL bytes at start of the node. */
261     STATE_PLAIN,	/* Use each byte of the node. */
262     STATE_DEL,		/* Delete a byte from the bad word. */
263     STATE_INS_PREP,	/* Prepare for inserting bytes. */
264     STATE_INS,		/* Insert a byte in the bad word. */
265     STATE_SWAP,		/* Swap two bytes. */
266     STATE_UNSWAP,	/* Undo swap two characters. */
267     STATE_SWAP3,	/* Swap two characters over three. */
268     STATE_UNSWAP3,	/* Undo Swap two characters over three. */
269     STATE_UNROT3L,	/* Undo rotate three characters left */
270     STATE_UNROT3R,	/* Undo rotate three characters right */
271     STATE_REP_INI,	/* Prepare for using REP items. */
272     STATE_REP,		/* Use matching REP items from the .aff file. */
273     STATE_REP_UNDO,	/* Undo a REP item replacement. */
274     STATE_FINAL		/* End of this node. */
275 } state_T;
276 
277 /*
278  * Struct to keep the state at each level in suggest_try_change().
279  */
280 typedef struct trystate_S
281 {
282     state_T	ts_state;	/* state at this level, STATE_ */
283     int		ts_score;	/* score */
284     idx_T	ts_arridx;	/* index in tree array, start of node */
285     short	ts_curi;	/* index in list of child nodes */
286     char_u	ts_fidx;	/* index in fword[], case-folded bad word */
287     char_u	ts_fidxtry;	/* ts_fidx at which bytes may be changed */
288     char_u	ts_twordlen;	/* valid length of tword[] */
289     char_u	ts_prefixdepth;	/* stack depth for end of prefix or
290 				 * PFD_PREFIXTREE or PFD_NOPREFIX */
291     char_u	ts_flags;	/* TSF_ flags */
292     char_u	ts_tcharlen;	/* number of bytes in tword character */
293     char_u	ts_tcharidx;	/* current byte index in tword character */
294     char_u	ts_isdiff;	/* DIFF_ values */
295     char_u	ts_fcharstart;	/* index in fword where badword char started */
296     char_u	ts_prewordlen;	/* length of word in "preword[]" */
297     char_u	ts_splitoff;	/* index in "tword" after last split */
298     char_u	ts_splitfidx;	/* "ts_fidx" at word split */
299     char_u	ts_complen;	/* nr of compound words used */
300     char_u	ts_compsplit;	/* index for "compflags" where word was spit */
301     char_u	ts_save_badflags;   /* su_badflags saved here */
302     char_u	ts_delidx;	/* index in fword for char that was deleted,
303 				   valid when "ts_flags" has TSF_DIDDEL */
304 } trystate_T;
305 
306 /* values for ts_isdiff */
307 #define DIFF_NONE	0	/* no different byte (yet) */
308 #define DIFF_YES	1	/* different byte found */
309 #define DIFF_INSERT	2	/* inserting character */
310 
311 /* values for ts_flags */
312 #define TSF_PREFIXOK	1	/* already checked that prefix is OK */
313 #define TSF_DIDSPLIT	2	/* tried split at this point */
314 #define TSF_DIDDEL	4	/* did a delete, "ts_delidx" has index */
315 
316 /* special values ts_prefixdepth */
317 #define PFD_NOPREFIX	0xff	/* not using prefixes */
318 #define PFD_PREFIXTREE	0xfe	/* walking through the prefix tree */
319 #define PFD_NOTSPECIAL	0xfd	/* highest value that's not special */
320 
321 /* mode values for find_word */
322 #define FIND_FOLDWORD	    0	/* find word case-folded */
323 #define FIND_KEEPWORD	    1	/* find keep-case word */
324 #define FIND_PREFIX	    2	/* find word after prefix */
325 #define FIND_COMPOUND	    3	/* find case-folded compound word */
326 #define FIND_KEEPCOMPOUND   4	/* find keep-case compound word */
327 
328 static void find_word(matchinf_T *mip, int mode);
329 static int match_checkcompoundpattern(char_u *ptr, int wlen, garray_T *gap);
330 static int can_compound(slang_T *slang, char_u *word, char_u *flags);
331 static int match_compoundrule(slang_T *slang, char_u *compflags);
332 static int valid_word_prefix(int totprefcnt, int arridx, int flags, char_u *word, slang_T *slang, int cond_req);
333 static void find_prefix(matchinf_T *mip, int mode);
334 static int fold_more(matchinf_T *mip);
335 static int spell_valid_case(int wordflags, int treeflags);
336 static void spell_load_cb(char_u *fname, void *cookie);
337 static int count_syllables(slang_T *slang, char_u *word);
338 static void clear_midword(win_T *buf);
339 static void use_midword(slang_T *lp, win_T *buf);
340 static int find_region(char_u *rp, char_u *region);
341 static int check_need_cap(linenr_T lnum, colnr_T col);
342 static void spell_find_suggest(char_u *badptr, int badlen, suginfo_T *su, int maxcount, int banbadword, int need_cap, int interactive);
343 #ifdef FEAT_EVAL
344 static void spell_suggest_expr(suginfo_T *su, char_u *expr);
345 #endif
346 static void spell_suggest_file(suginfo_T *su, char_u *fname);
347 static void spell_suggest_intern(suginfo_T *su, int interactive);
348 static void spell_find_cleanup(suginfo_T *su);
349 static void suggest_try_special(suginfo_T *su);
350 static void suggest_try_change(suginfo_T *su);
351 static void suggest_trie_walk(suginfo_T *su, langp_T *lp, char_u *fword, int soundfold);
352 static void go_deeper(trystate_T *stack, int depth, int score_add);
353 static int nofold_len(char_u *fword, int flen, char_u *word);
354 static void find_keepcap_word(slang_T *slang, char_u *fword, char_u *kword);
355 static void score_comp_sal(suginfo_T *su);
356 static void score_combine(suginfo_T *su);
357 static int stp_sal_score(suggest_T *stp, suginfo_T *su, slang_T *slang, char_u *badsound);
358 static void suggest_try_soundalike_prep(void);
359 static void suggest_try_soundalike(suginfo_T *su);
360 static void suggest_try_soundalike_finish(void);
361 static void add_sound_suggest(suginfo_T *su, char_u *goodword, int score, langp_T *lp);
362 static int soundfold_find(slang_T *slang, char_u *word);
363 static void make_case_word(char_u *fword, char_u *cword, int flags);
364 static int similar_chars(slang_T *slang, int c1, int c2);
365 static void add_suggestion(suginfo_T *su, garray_T *gap, char_u *goodword, int badlen, int score, int altscore, int had_bonus, slang_T *slang, int maxsf);
366 static void check_suggestions(suginfo_T *su, garray_T *gap);
367 static void add_banned(suginfo_T *su, char_u *word);
368 static void rescore_suggestions(suginfo_T *su);
369 static void rescore_one(suginfo_T *su, suggest_T *stp);
370 static int cleanup_suggestions(garray_T *gap, int maxscore, int keep);
371 static void spell_soundfold_sofo(slang_T *slang, char_u *inword, char_u *res);
372 static void spell_soundfold_sal(slang_T *slang, char_u *inword, char_u *res);
373 static void spell_soundfold_wsal(slang_T *slang, char_u *inword, char_u *res);
374 static int soundalike_score(char_u *goodsound, char_u *badsound);
375 static int spell_edit_score(slang_T *slang, char_u *badword, char_u *goodword);
376 static int spell_edit_score_limit(slang_T *slang, char_u *badword, char_u *goodword, int limit);
377 static int spell_edit_score_limit_w(slang_T *slang, char_u *badword, char_u *goodword, int limit);
378 static void dump_word(slang_T *slang, char_u *word, char_u *pat, int *dir, int round, int flags, linenr_T lnum);
379 static linenr_T dump_prefixes(slang_T *slang, char_u *word, char_u *pat, int *dir, int round, int flags, linenr_T startlnum);
380 
381 
382 /* Remember what "z?" replaced. */
383 static char_u	*repl_from = NULL;
384 static char_u	*repl_to = NULL;
385 
386 /*
387  * Main spell-checking function.
388  * "ptr" points to a character that could be the start of a word.
389  * "*attrp" is set to the highlight index for a badly spelled word.  For a
390  * non-word or when it's OK it remains unchanged.
391  * This must only be called when 'spelllang' is not empty.
392  *
393  * "capcol" is used to check for a Capitalised word after the end of a
394  * sentence.  If it's zero then perform the check.  Return the column where to
395  * check next, or -1 when no sentence end was found.  If it's NULL then don't
396  * worry.
397  *
398  * Returns the length of the word in bytes, also when it's OK, so that the
399  * caller can skip over the word.
400  */
401     int
402 spell_check(
403     win_T	*wp,		/* current window */
404     char_u	*ptr,
405     hlf_T	*attrp,
406     int		*capcol,	/* column to check for Capital */
407     int		docount)	/* count good words */
408 {
409     matchinf_T	mi;		/* Most things are put in "mi" so that it can
410 				   be passed to functions quickly. */
411     int		nrlen = 0;	/* found a number first */
412     int		c;
413     int		wrongcaplen = 0;
414     int		lpi;
415     int		count_word = docount;
416 
417     /* A word never starts at a space or a control character.  Return quickly
418      * then, skipping over the character. */
419     if (*ptr <= ' ')
420 	return 1;
421 
422     /* Return here when loading language files failed. */
423     if (wp->w_s->b_langp.ga_len == 0)
424 	return 1;
425 
426     vim_memset(&mi, 0, sizeof(matchinf_T));
427 
428     /* A number is always OK.  Also skip hexadecimal numbers 0xFF99 and
429      * 0X99FF.  But always do check spelling to find "3GPP" and "11
430      * julifeest". */
431     if (*ptr >= '0' && *ptr <= '9')
432     {
433 	if (*ptr == '0' && (ptr[1] == 'b' || ptr[1] == 'B'))
434 	    mi.mi_end = skipbin(ptr + 2);
435 	else if (*ptr == '0' && (ptr[1] == 'x' || ptr[1] == 'X'))
436 	    mi.mi_end = skiphex(ptr + 2);
437 	else
438 	    mi.mi_end = skipdigits(ptr);
439 	nrlen = (int)(mi.mi_end - ptr);
440     }
441 
442     /* Find the normal end of the word (until the next non-word character). */
443     mi.mi_word = ptr;
444     mi.mi_fend = ptr;
445     if (spell_iswordp(mi.mi_fend, wp))
446     {
447 	do
448 	    MB_PTR_ADV(mi.mi_fend);
449 	while (*mi.mi_fend != NUL && spell_iswordp(mi.mi_fend, wp));
450 
451 	if (capcol != NULL && *capcol == 0 && wp->w_s->b_cap_prog != NULL)
452 	{
453 	    /* Check word starting with capital letter. */
454 	    c = PTR2CHAR(ptr);
455 	    if (!SPELL_ISUPPER(c))
456 		wrongcaplen = (int)(mi.mi_fend - ptr);
457 	}
458     }
459     if (capcol != NULL)
460 	*capcol = -1;
461 
462     /* We always use the characters up to the next non-word character,
463      * also for bad words. */
464     mi.mi_end = mi.mi_fend;
465 
466     /* Check caps type later. */
467     mi.mi_capflags = 0;
468     mi.mi_cend = NULL;
469     mi.mi_win = wp;
470 
471     /* case-fold the word with one non-word character, so that we can check
472      * for the word end. */
473     if (*mi.mi_fend != NUL)
474 	MB_PTR_ADV(mi.mi_fend);
475 
476     (void)spell_casefold(ptr, (int)(mi.mi_fend - ptr), mi.mi_fword,
477 							     MAXWLEN + 1);
478     mi.mi_fwordlen = (int)STRLEN(mi.mi_fword);
479 
480     /* The word is bad unless we recognize it. */
481     mi.mi_result = SP_BAD;
482     mi.mi_result2 = SP_BAD;
483 
484     /*
485      * Loop over the languages specified in 'spelllang'.
486      * We check them all, because a word may be matched longer in another
487      * language.
488      */
489     for (lpi = 0; lpi < wp->w_s->b_langp.ga_len; ++lpi)
490     {
491 	mi.mi_lp = LANGP_ENTRY(wp->w_s->b_langp, lpi);
492 
493 	/* If reloading fails the language is still in the list but everything
494 	 * has been cleared. */
495 	if (mi.mi_lp->lp_slang->sl_fidxs == NULL)
496 	    continue;
497 
498 	/* Check for a matching word in case-folded words. */
499 	find_word(&mi, FIND_FOLDWORD);
500 
501 	/* Check for a matching word in keep-case words. */
502 	find_word(&mi, FIND_KEEPWORD);
503 
504 	/* Check for matching prefixes. */
505 	find_prefix(&mi, FIND_FOLDWORD);
506 
507 	/* For a NOBREAK language, may want to use a word without a following
508 	 * word as a backup. */
509 	if (mi.mi_lp->lp_slang->sl_nobreak && mi.mi_result == SP_BAD
510 						   && mi.mi_result2 != SP_BAD)
511 	{
512 	    mi.mi_result = mi.mi_result2;
513 	    mi.mi_end = mi.mi_end2;
514 	}
515 
516 	/* Count the word in the first language where it's found to be OK. */
517 	if (count_word && mi.mi_result == SP_OK)
518 	{
519 	    count_common_word(mi.mi_lp->lp_slang, ptr,
520 						   (int)(mi.mi_end - ptr), 1);
521 	    count_word = FALSE;
522 	}
523     }
524 
525     if (mi.mi_result != SP_OK)
526     {
527 	/* If we found a number skip over it.  Allows for "42nd".  Do flag
528 	 * rare and local words, e.g., "3GPP". */
529 	if (nrlen > 0)
530 	{
531 	    if (mi.mi_result == SP_BAD || mi.mi_result == SP_BANNED)
532 		return nrlen;
533 	}
534 
535 	/* When we are at a non-word character there is no error, just
536 	 * skip over the character (try looking for a word after it). */
537 	else if (!spell_iswordp_nmw(ptr, wp))
538 	{
539 	    if (capcol != NULL && wp->w_s->b_cap_prog != NULL)
540 	    {
541 		regmatch_T	regmatch;
542 		int		r;
543 
544 		/* Check for end of sentence. */
545 		regmatch.regprog = wp->w_s->b_cap_prog;
546 		regmatch.rm_ic = FALSE;
547 		r = vim_regexec(&regmatch, ptr, 0);
548 		wp->w_s->b_cap_prog = regmatch.regprog;
549 		if (r)
550 		    *capcol = (int)(regmatch.endp[0] - ptr);
551 	    }
552 
553 	    if (has_mbyte)
554 		return (*mb_ptr2len)(ptr);
555 	    return 1;
556 	}
557 	else if (mi.mi_end == ptr)
558 	    /* Always include at least one character.  Required for when there
559 	     * is a mixup in "midword". */
560 	    MB_PTR_ADV(mi.mi_end);
561 	else if (mi.mi_result == SP_BAD
562 		&& LANGP_ENTRY(wp->w_s->b_langp, 0)->lp_slang->sl_nobreak)
563 	{
564 	    char_u	*p, *fp;
565 	    int		save_result = mi.mi_result;
566 
567 	    /* First language in 'spelllang' is NOBREAK.  Find first position
568 	     * at which any word would be valid. */
569 	    mi.mi_lp = LANGP_ENTRY(wp->w_s->b_langp, 0);
570 	    if (mi.mi_lp->lp_slang->sl_fidxs != NULL)
571 	    {
572 		p = mi.mi_word;
573 		fp = mi.mi_fword;
574 		for (;;)
575 		{
576 		    MB_PTR_ADV(p);
577 		    MB_PTR_ADV(fp);
578 		    if (p >= mi.mi_end)
579 			break;
580 		    mi.mi_compoff = (int)(fp - mi.mi_fword);
581 		    find_word(&mi, FIND_COMPOUND);
582 		    if (mi.mi_result != SP_BAD)
583 		    {
584 			mi.mi_end = p;
585 			break;
586 		    }
587 		}
588 		mi.mi_result = save_result;
589 	    }
590 	}
591 
592 	if (mi.mi_result == SP_BAD || mi.mi_result == SP_BANNED)
593 	    *attrp = HLF_SPB;
594 	else if (mi.mi_result == SP_RARE)
595 	    *attrp = HLF_SPR;
596 	else
597 	    *attrp = HLF_SPL;
598     }
599 
600     if (wrongcaplen > 0 && (mi.mi_result == SP_OK || mi.mi_result == SP_RARE))
601     {
602 	/* Report SpellCap only when the word isn't badly spelled. */
603 	*attrp = HLF_SPC;
604 	return wrongcaplen;
605     }
606 
607     return (int)(mi.mi_end - ptr);
608 }
609 
610 /*
611  * Check if the word at "mip->mi_word" is in the tree.
612  * When "mode" is FIND_FOLDWORD check in fold-case word tree.
613  * When "mode" is FIND_KEEPWORD check in keep-case word tree.
614  * When "mode" is FIND_PREFIX check for word after prefix in fold-case word
615  * tree.
616  *
617  * For a match mip->mi_result is updated.
618  */
619     static void
620 find_word(matchinf_T *mip, int mode)
621 {
622     idx_T	arridx = 0;
623     int		endlen[MAXWLEN];    /* length at possible word endings */
624     idx_T	endidx[MAXWLEN];    /* possible word endings */
625     int		endidxcnt = 0;
626     int		len;
627     int		wlen = 0;
628     int		flen;
629     int		c;
630     char_u	*ptr;
631     idx_T	lo, hi, m;
632     char_u	*s;
633     char_u	*p;
634     int		res = SP_BAD;
635     slang_T	*slang = mip->mi_lp->lp_slang;
636     unsigned	flags;
637     char_u	*byts;
638     idx_T	*idxs;
639     int		word_ends;
640     int		prefix_found;
641     int		nobreak_result;
642 
643     if (mode == FIND_KEEPWORD || mode == FIND_KEEPCOMPOUND)
644     {
645 	/* Check for word with matching case in keep-case tree. */
646 	ptr = mip->mi_word;
647 	flen = 9999;		    /* no case folding, always enough bytes */
648 	byts = slang->sl_kbyts;
649 	idxs = slang->sl_kidxs;
650 
651 	if (mode == FIND_KEEPCOMPOUND)
652 	    /* Skip over the previously found word(s). */
653 	    wlen += mip->mi_compoff;
654     }
655     else
656     {
657 	/* Check for case-folded in case-folded tree. */
658 	ptr = mip->mi_fword;
659 	flen = mip->mi_fwordlen;    /* available case-folded bytes */
660 	byts = slang->sl_fbyts;
661 	idxs = slang->sl_fidxs;
662 
663 	if (mode == FIND_PREFIX)
664 	{
665 	    /* Skip over the prefix. */
666 	    wlen = mip->mi_prefixlen;
667 	    flen -= mip->mi_prefixlen;
668 	}
669 	else if (mode == FIND_COMPOUND)
670 	{
671 	    /* Skip over the previously found word(s). */
672 	    wlen = mip->mi_compoff;
673 	    flen -= mip->mi_compoff;
674 	}
675 
676     }
677 
678     if (byts == NULL)
679 	return;			/* array is empty */
680 
681     /*
682      * Repeat advancing in the tree until:
683      * - there is a byte that doesn't match,
684      * - we reach the end of the tree,
685      * - or we reach the end of the line.
686      */
687     for (;;)
688     {
689 	if (flen <= 0 && *mip->mi_fend != NUL)
690 	    flen = fold_more(mip);
691 
692 	len = byts[arridx++];
693 
694 	/* If the first possible byte is a zero the word could end here.
695 	 * Remember this index, we first check for the longest word. */
696 	if (byts[arridx] == 0)
697 	{
698 	    if (endidxcnt == MAXWLEN)
699 	    {
700 		/* Must be a corrupted spell file. */
701 		emsg(_(e_format));
702 		return;
703 	    }
704 	    endlen[endidxcnt] = wlen;
705 	    endidx[endidxcnt++] = arridx++;
706 	    --len;
707 
708 	    /* Skip over the zeros, there can be several flag/region
709 	     * combinations. */
710 	    while (len > 0 && byts[arridx] == 0)
711 	    {
712 		++arridx;
713 		--len;
714 	    }
715 	    if (len == 0)
716 		break;	    /* no children, word must end here */
717 	}
718 
719 	/* Stop looking at end of the line. */
720 	if (ptr[wlen] == NUL)
721 	    break;
722 
723 	/* Perform a binary search in the list of accepted bytes. */
724 	c = ptr[wlen];
725 	if (c == TAB)	    /* <Tab> is handled like <Space> */
726 	    c = ' ';
727 	lo = arridx;
728 	hi = arridx + len - 1;
729 	while (lo < hi)
730 	{
731 	    m = (lo + hi) / 2;
732 	    if (byts[m] > c)
733 		hi = m - 1;
734 	    else if (byts[m] < c)
735 		lo = m + 1;
736 	    else
737 	    {
738 		lo = hi = m;
739 		break;
740 	    }
741 	}
742 
743 	/* Stop if there is no matching byte. */
744 	if (hi < lo || byts[lo] != c)
745 	    break;
746 
747 	/* Continue at the child (if there is one). */
748 	arridx = idxs[lo];
749 	++wlen;
750 	--flen;
751 
752 	/* One space in the good word may stand for several spaces in the
753 	 * checked word. */
754 	if (c == ' ')
755 	{
756 	    for (;;)
757 	    {
758 		if (flen <= 0 && *mip->mi_fend != NUL)
759 		    flen = fold_more(mip);
760 		if (ptr[wlen] != ' ' && ptr[wlen] != TAB)
761 		    break;
762 		++wlen;
763 		--flen;
764 	    }
765 	}
766     }
767 
768     /*
769      * Verify that one of the possible endings is valid.  Try the longest
770      * first.
771      */
772     while (endidxcnt > 0)
773     {
774 	--endidxcnt;
775 	arridx = endidx[endidxcnt];
776 	wlen = endlen[endidxcnt];
777 
778 	if ((*mb_head_off)(ptr, ptr + wlen) > 0)
779 	    continue;	    /* not at first byte of character */
780 	if (spell_iswordp(ptr + wlen, mip->mi_win))
781 	{
782 	    if (slang->sl_compprog == NULL && !slang->sl_nobreak)
783 		continue;	    /* next char is a word character */
784 	    word_ends = FALSE;
785 	}
786 	else
787 	    word_ends = TRUE;
788 	/* The prefix flag is before compound flags.  Once a valid prefix flag
789 	 * has been found we try compound flags. */
790 	prefix_found = FALSE;
791 
792 	if (mode != FIND_KEEPWORD && has_mbyte)
793 	{
794 	    /* Compute byte length in original word, length may change
795 	     * when folding case.  This can be slow, take a shortcut when the
796 	     * case-folded word is equal to the keep-case word. */
797 	    p = mip->mi_word;
798 	    if (STRNCMP(ptr, p, wlen) != 0)
799 	    {
800 		for (s = ptr; s < ptr + wlen; MB_PTR_ADV(s))
801 		    MB_PTR_ADV(p);
802 		wlen = (int)(p - mip->mi_word);
803 	    }
804 	}
805 
806 	/* Check flags and region.  For FIND_PREFIX check the condition and
807 	 * prefix ID.
808 	 * Repeat this if there are more flags/region alternatives until there
809 	 * is a match. */
810 	res = SP_BAD;
811 	for (len = byts[arridx - 1]; len > 0 && byts[arridx] == 0;
812 							      --len, ++arridx)
813 	{
814 	    flags = idxs[arridx];
815 
816 	    /* For the fold-case tree check that the case of the checked word
817 	     * matches with what the word in the tree requires.
818 	     * For keep-case tree the case is always right.  For prefixes we
819 	     * don't bother to check. */
820 	    if (mode == FIND_FOLDWORD)
821 	    {
822 		if (mip->mi_cend != mip->mi_word + wlen)
823 		{
824 		    /* mi_capflags was set for a different word length, need
825 		     * to do it again. */
826 		    mip->mi_cend = mip->mi_word + wlen;
827 		    mip->mi_capflags = captype(mip->mi_word, mip->mi_cend);
828 		}
829 
830 		if (mip->mi_capflags == WF_KEEPCAP
831 				|| !spell_valid_case(mip->mi_capflags, flags))
832 		    continue;
833 	    }
834 
835 	    /* When mode is FIND_PREFIX the word must support the prefix:
836 	     * check the prefix ID and the condition.  Do that for the list at
837 	     * mip->mi_prefarridx that find_prefix() filled. */
838 	    else if (mode == FIND_PREFIX && !prefix_found)
839 	    {
840 		c = valid_word_prefix(mip->mi_prefcnt, mip->mi_prefarridx,
841 				    flags,
842 				    mip->mi_word + mip->mi_cprefixlen, slang,
843 				    FALSE);
844 		if (c == 0)
845 		    continue;
846 
847 		/* Use the WF_RARE flag for a rare prefix. */
848 		if (c & WF_RAREPFX)
849 		    flags |= WF_RARE;
850 		prefix_found = TRUE;
851 	    }
852 
853 	    if (slang->sl_nobreak)
854 	    {
855 		if ((mode == FIND_COMPOUND || mode == FIND_KEEPCOMPOUND)
856 			&& (flags & WF_BANNED) == 0)
857 		{
858 		    /* NOBREAK: found a valid following word.  That's all we
859 		     * need to know, so return. */
860 		    mip->mi_result = SP_OK;
861 		    break;
862 		}
863 	    }
864 
865 	    else if ((mode == FIND_COMPOUND || mode == FIND_KEEPCOMPOUND
866 								|| !word_ends))
867 	    {
868 		/* If there is no compound flag or the word is shorter than
869 		 * COMPOUNDMIN reject it quickly.
870 		 * Makes you wonder why someone puts a compound flag on a word
871 		 * that's too short...  Myspell compatibility requires this
872 		 * anyway. */
873 		if (((unsigned)flags >> 24) == 0
874 			     || wlen - mip->mi_compoff < slang->sl_compminlen)
875 		    continue;
876 		/* For multi-byte chars check character length against
877 		 * COMPOUNDMIN. */
878 		if (has_mbyte
879 			&& slang->sl_compminlen > 0
880 			&& mb_charlen_len(mip->mi_word + mip->mi_compoff,
881 				wlen - mip->mi_compoff) < slang->sl_compminlen)
882 			continue;
883 
884 		/* Limit the number of compound words to COMPOUNDWORDMAX if no
885 		 * maximum for syllables is specified. */
886 		if (!word_ends && mip->mi_complen + mip->mi_compextra + 2
887 							   > slang->sl_compmax
888 					   && slang->sl_compsylmax == MAXWLEN)
889 		    continue;
890 
891 		/* Don't allow compounding on a side where an affix was added,
892 		 * unless COMPOUNDPERMITFLAG was used. */
893 		if (mip->mi_complen > 0 && (flags & WF_NOCOMPBEF))
894 		    continue;
895 		if (!word_ends && (flags & WF_NOCOMPAFT))
896 		    continue;
897 
898 		/* Quickly check if compounding is possible with this flag. */
899 		if (!byte_in_str(mip->mi_complen == 0
900 					? slang->sl_compstartflags
901 					: slang->sl_compallflags,
902 					    ((unsigned)flags >> 24)))
903 		    continue;
904 
905 		/* If there is a match with a CHECKCOMPOUNDPATTERN rule
906 		 * discard the compound word. */
907 		if (match_checkcompoundpattern(ptr, wlen, &slang->sl_comppat))
908 		    continue;
909 
910 		if (mode == FIND_COMPOUND)
911 		{
912 		    int	    capflags;
913 
914 		    /* Need to check the caps type of the appended compound
915 		     * word. */
916 		    if (has_mbyte && STRNCMP(ptr, mip->mi_word,
917 							mip->mi_compoff) != 0)
918 		    {
919 			/* case folding may have changed the length */
920 			p = mip->mi_word;
921 			for (s = ptr; s < ptr + mip->mi_compoff; MB_PTR_ADV(s))
922 			    MB_PTR_ADV(p);
923 		    }
924 		    else
925 			p = mip->mi_word + mip->mi_compoff;
926 		    capflags = captype(p, mip->mi_word + wlen);
927 		    if (capflags == WF_KEEPCAP || (capflags == WF_ALLCAP
928 						 && (flags & WF_FIXCAP) != 0))
929 			continue;
930 
931 		    if (capflags != WF_ALLCAP)
932 		    {
933 			/* When the character before the word is a word
934 			 * character we do not accept a Onecap word.  We do
935 			 * accept a no-caps word, even when the dictionary
936 			 * word specifies ONECAP. */
937 			MB_PTR_BACK(mip->mi_word, p);
938 			if (spell_iswordp_nmw(p, mip->mi_win)
939 				? capflags == WF_ONECAP
940 				: (flags & WF_ONECAP) != 0
941 						     && capflags != WF_ONECAP)
942 			    continue;
943 		    }
944 		}
945 
946 		/* If the word ends the sequence of compound flags of the
947 		 * words must match with one of the COMPOUNDRULE items and
948 		 * the number of syllables must not be too large. */
949 		mip->mi_compflags[mip->mi_complen] = ((unsigned)flags >> 24);
950 		mip->mi_compflags[mip->mi_complen + 1] = NUL;
951 		if (word_ends)
952 		{
953 		    char_u	fword[MAXWLEN];
954 
955 		    if (slang->sl_compsylmax < MAXWLEN)
956 		    {
957 			/* "fword" is only needed for checking syllables. */
958 			if (ptr == mip->mi_word)
959 			    (void)spell_casefold(ptr, wlen, fword, MAXWLEN);
960 			else
961 			    vim_strncpy(fword, ptr, endlen[endidxcnt]);
962 		    }
963 		    if (!can_compound(slang, fword, mip->mi_compflags))
964 			continue;
965 		}
966 		else if (slang->sl_comprules != NULL
967 			     && !match_compoundrule(slang, mip->mi_compflags))
968 		    /* The compound flags collected so far do not match any
969 		     * COMPOUNDRULE, discard the compounded word. */
970 		    continue;
971 	    }
972 
973 	    /* Check NEEDCOMPOUND: can't use word without compounding. */
974 	    else if (flags & WF_NEEDCOMP)
975 		continue;
976 
977 	    nobreak_result = SP_OK;
978 
979 	    if (!word_ends)
980 	    {
981 		int	save_result = mip->mi_result;
982 		char_u	*save_end = mip->mi_end;
983 		langp_T	*save_lp = mip->mi_lp;
984 		int	lpi;
985 
986 		/* Check that a valid word follows.  If there is one and we
987 		 * are compounding, it will set "mi_result", thus we are
988 		 * always finished here.  For NOBREAK we only check that a
989 		 * valid word follows.
990 		 * Recursive! */
991 		if (slang->sl_nobreak)
992 		    mip->mi_result = SP_BAD;
993 
994 		/* Find following word in case-folded tree. */
995 		mip->mi_compoff = endlen[endidxcnt];
996 		if (has_mbyte && mode == FIND_KEEPWORD)
997 		{
998 		    /* Compute byte length in case-folded word from "wlen":
999 		     * byte length in keep-case word.  Length may change when
1000 		     * folding case.  This can be slow, take a shortcut when
1001 		     * the case-folded word is equal to the keep-case word. */
1002 		    p = mip->mi_fword;
1003 		    if (STRNCMP(ptr, p, wlen) != 0)
1004 		    {
1005 			for (s = ptr; s < ptr + wlen; MB_PTR_ADV(s))
1006 			    MB_PTR_ADV(p);
1007 			mip->mi_compoff = (int)(p - mip->mi_fword);
1008 		    }
1009 		}
1010 #if 0 /* Disabled, see below */
1011 		c = mip->mi_compoff;
1012 #endif
1013 		++mip->mi_complen;
1014 		if (flags & WF_COMPROOT)
1015 		    ++mip->mi_compextra;
1016 
1017 		/* For NOBREAK we need to try all NOBREAK languages, at least
1018 		 * to find the ".add" file(s). */
1019 		for (lpi = 0; lpi < mip->mi_win->w_s->b_langp.ga_len; ++lpi)
1020 		{
1021 		    if (slang->sl_nobreak)
1022 		    {
1023 			mip->mi_lp = LANGP_ENTRY(mip->mi_win->w_s->b_langp, lpi);
1024 			if (mip->mi_lp->lp_slang->sl_fidxs == NULL
1025 					 || !mip->mi_lp->lp_slang->sl_nobreak)
1026 			    continue;
1027 		    }
1028 
1029 		    find_word(mip, FIND_COMPOUND);
1030 
1031 		    /* When NOBREAK any word that matches is OK.  Otherwise we
1032 		     * need to find the longest match, thus try with keep-case
1033 		     * and prefix too. */
1034 		    if (!slang->sl_nobreak || mip->mi_result == SP_BAD)
1035 		    {
1036 			/* Find following word in keep-case tree. */
1037 			mip->mi_compoff = wlen;
1038 			find_word(mip, FIND_KEEPCOMPOUND);
1039 
1040 #if 0	    /* Disabled, a prefix must not appear halfway a compound word,
1041 	       unless the COMPOUNDPERMITFLAG is used and then it can't be a
1042 	       postponed prefix. */
1043 			if (!slang->sl_nobreak || mip->mi_result == SP_BAD)
1044 			{
1045 			    /* Check for following word with prefix. */
1046 			    mip->mi_compoff = c;
1047 			    find_prefix(mip, FIND_COMPOUND);
1048 			}
1049 #endif
1050 		    }
1051 
1052 		    if (!slang->sl_nobreak)
1053 			break;
1054 		}
1055 		--mip->mi_complen;
1056 		if (flags & WF_COMPROOT)
1057 		    --mip->mi_compextra;
1058 		mip->mi_lp = save_lp;
1059 
1060 		if (slang->sl_nobreak)
1061 		{
1062 		    nobreak_result = mip->mi_result;
1063 		    mip->mi_result = save_result;
1064 		    mip->mi_end = save_end;
1065 		}
1066 		else
1067 		{
1068 		    if (mip->mi_result == SP_OK)
1069 			break;
1070 		    continue;
1071 		}
1072 	    }
1073 
1074 	    if (flags & WF_BANNED)
1075 		res = SP_BANNED;
1076 	    else if (flags & WF_REGION)
1077 	    {
1078 		/* Check region. */
1079 		if ((mip->mi_lp->lp_region & (flags >> 16)) != 0)
1080 		    res = SP_OK;
1081 		else
1082 		    res = SP_LOCAL;
1083 	    }
1084 	    else if (flags & WF_RARE)
1085 		res = SP_RARE;
1086 	    else
1087 		res = SP_OK;
1088 
1089 	    /* Always use the longest match and the best result.  For NOBREAK
1090 	     * we separately keep the longest match without a following good
1091 	     * word as a fall-back. */
1092 	    if (nobreak_result == SP_BAD)
1093 	    {
1094 		if (mip->mi_result2 > res)
1095 		{
1096 		    mip->mi_result2 = res;
1097 		    mip->mi_end2 = mip->mi_word + wlen;
1098 		}
1099 		else if (mip->mi_result2 == res
1100 					&& mip->mi_end2 < mip->mi_word + wlen)
1101 		    mip->mi_end2 = mip->mi_word + wlen;
1102 	    }
1103 	    else if (mip->mi_result > res)
1104 	    {
1105 		mip->mi_result = res;
1106 		mip->mi_end = mip->mi_word + wlen;
1107 	    }
1108 	    else if (mip->mi_result == res && mip->mi_end < mip->mi_word + wlen)
1109 		mip->mi_end = mip->mi_word + wlen;
1110 
1111 	    if (mip->mi_result == SP_OK)
1112 		break;
1113 	}
1114 
1115 	if (mip->mi_result == SP_OK)
1116 	    break;
1117     }
1118 }
1119 
1120 /*
1121  * Return TRUE if there is a match between the word ptr[wlen] and
1122  * CHECKCOMPOUNDPATTERN rules, assuming that we will concatenate with another
1123  * word.
1124  * A match means that the first part of CHECKCOMPOUNDPATTERN matches at the
1125  * end of ptr[wlen] and the second part matches after it.
1126  */
1127     static int
1128 match_checkcompoundpattern(
1129     char_u	*ptr,
1130     int		wlen,
1131     garray_T	*gap)  /* &sl_comppat */
1132 {
1133     int		i;
1134     char_u	*p;
1135     int		len;
1136 
1137     for (i = 0; i + 1 < gap->ga_len; i += 2)
1138     {
1139 	p = ((char_u **)gap->ga_data)[i + 1];
1140 	if (STRNCMP(ptr + wlen, p, STRLEN(p)) == 0)
1141 	{
1142 	    /* Second part matches at start of following compound word, now
1143 	     * check if first part matches at end of previous word. */
1144 	    p = ((char_u **)gap->ga_data)[i];
1145 	    len = (int)STRLEN(p);
1146 	    if (len <= wlen && STRNCMP(ptr + wlen - len, p, len) == 0)
1147 		return TRUE;
1148 	}
1149     }
1150     return FALSE;
1151 }
1152 
1153 /*
1154  * Return TRUE if "flags" is a valid sequence of compound flags and "word"
1155  * does not have too many syllables.
1156  */
1157     static int
1158 can_compound(slang_T *slang, char_u *word, char_u *flags)
1159 {
1160     char_u	uflags[MAXWLEN * 2];
1161     int		i;
1162     char_u	*p;
1163 
1164     if (slang->sl_compprog == NULL)
1165 	return FALSE;
1166     if (enc_utf8)
1167     {
1168 	/* Need to convert the single byte flags to utf8 characters. */
1169 	p = uflags;
1170 	for (i = 0; flags[i] != NUL; ++i)
1171 	    p += utf_char2bytes(flags[i], p);
1172 	*p = NUL;
1173 	p = uflags;
1174     }
1175     else
1176 	p = flags;
1177     if (!vim_regexec_prog(&slang->sl_compprog, FALSE, p, 0))
1178 	return FALSE;
1179 
1180     /* Count the number of syllables.  This may be slow, do it last.  If there
1181      * are too many syllables AND the number of compound words is above
1182      * COMPOUNDWORDMAX then compounding is not allowed. */
1183     if (slang->sl_compsylmax < MAXWLEN
1184 		       && count_syllables(slang, word) > slang->sl_compsylmax)
1185 	return (int)STRLEN(flags) < slang->sl_compmax;
1186     return TRUE;
1187 }
1188 
1189 /*
1190  * Return TRUE when the sequence of flags in "compflags" plus "flag" can
1191  * possibly form a valid compounded word.  This also checks the COMPOUNDRULE
1192  * lines if they don't contain wildcards.
1193  */
1194     static int
1195 can_be_compound(
1196     trystate_T	*sp,
1197     slang_T	*slang,
1198     char_u	*compflags,
1199     int		flag)
1200 {
1201     /* If the flag doesn't appear in sl_compstartflags or sl_compallflags
1202      * then it can't possibly compound. */
1203     if (!byte_in_str(sp->ts_complen == sp->ts_compsplit
1204 		? slang->sl_compstartflags : slang->sl_compallflags, flag))
1205 	return FALSE;
1206 
1207     /* If there are no wildcards, we can check if the flags collected so far
1208      * possibly can form a match with COMPOUNDRULE patterns.  This only
1209      * makes sense when we have two or more words. */
1210     if (slang->sl_comprules != NULL && sp->ts_complen > sp->ts_compsplit)
1211     {
1212 	int v;
1213 
1214 	compflags[sp->ts_complen] = flag;
1215 	compflags[sp->ts_complen + 1] = NUL;
1216 	v = match_compoundrule(slang, compflags + sp->ts_compsplit);
1217 	compflags[sp->ts_complen] = NUL;
1218 	return v;
1219     }
1220 
1221     return TRUE;
1222 }
1223 
1224 
1225 /*
1226  * Return TRUE if the compound flags in compflags[] match the start of any
1227  * compound rule.  This is used to stop trying a compound if the flags
1228  * collected so far can't possibly match any compound rule.
1229  * Caller must check that slang->sl_comprules is not NULL.
1230  */
1231     static int
1232 match_compoundrule(slang_T *slang, char_u *compflags)
1233 {
1234     char_u	*p;
1235     int		i;
1236     int		c;
1237 
1238     /* loop over all the COMPOUNDRULE entries */
1239     for (p = slang->sl_comprules; *p != NUL; ++p)
1240     {
1241 	/* loop over the flags in the compound word we have made, match
1242 	 * them against the current rule entry */
1243 	for (i = 0; ; ++i)
1244 	{
1245 	    c = compflags[i];
1246 	    if (c == NUL)
1247 		/* found a rule that matches for the flags we have so far */
1248 		return TRUE;
1249 	    if (*p == '/' || *p == NUL)
1250 		break;  /* end of rule, it's too short */
1251 	    if (*p == '[')
1252 	    {
1253 		int match = FALSE;
1254 
1255 		/* compare against all the flags in [] */
1256 		++p;
1257 		while (*p != ']' && *p != NUL)
1258 		    if (*p++ == c)
1259 			match = TRUE;
1260 		if (!match)
1261 		    break;  /* none matches */
1262 	    }
1263 	    else if (*p != c)
1264 		break;  /* flag of word doesn't match flag in pattern */
1265 	    ++p;
1266 	}
1267 
1268 	/* Skip to the next "/", where the next pattern starts. */
1269 	p = vim_strchr(p, '/');
1270 	if (p == NULL)
1271 	    break;
1272     }
1273 
1274     /* Checked all the rules and none of them match the flags, so there
1275      * can't possibly be a compound starting with these flags. */
1276     return FALSE;
1277 }
1278 
1279 /*
1280  * Return non-zero if the prefix indicated by "arridx" matches with the prefix
1281  * ID in "flags" for the word "word".
1282  * The WF_RAREPFX flag is included in the return value for a rare prefix.
1283  */
1284     static int
1285 valid_word_prefix(
1286     int		totprefcnt,	/* nr of prefix IDs */
1287     int		arridx,		/* idx in sl_pidxs[] */
1288     int		flags,
1289     char_u	*word,
1290     slang_T	*slang,
1291     int		cond_req)	/* only use prefixes with a condition */
1292 {
1293     int		prefcnt;
1294     int		pidx;
1295     regprog_T	**rp;
1296     int		prefid;
1297 
1298     prefid = (unsigned)flags >> 24;
1299     for (prefcnt = totprefcnt - 1; prefcnt >= 0; --prefcnt)
1300     {
1301 	pidx = slang->sl_pidxs[arridx + prefcnt];
1302 
1303 	/* Check the prefix ID. */
1304 	if (prefid != (pidx & 0xff))
1305 	    continue;
1306 
1307 	/* Check if the prefix doesn't combine and the word already has a
1308 	 * suffix. */
1309 	if ((flags & WF_HAS_AFF) && (pidx & WF_PFX_NC))
1310 	    continue;
1311 
1312 	/* Check the condition, if there is one.  The condition index is
1313 	 * stored in the two bytes above the prefix ID byte.  */
1314 	rp = &slang->sl_prefprog[((unsigned)pidx >> 8) & 0xffff];
1315 	if (*rp != NULL)
1316 	{
1317 	    if (!vim_regexec_prog(rp, FALSE, word, 0))
1318 		continue;
1319 	}
1320 	else if (cond_req)
1321 	    continue;
1322 
1323 	/* It's a match!  Return the WF_ flags. */
1324 	return pidx;
1325     }
1326     return 0;
1327 }
1328 
1329 /*
1330  * Check if the word at "mip->mi_word" has a matching prefix.
1331  * If it does, then check the following word.
1332  *
1333  * If "mode" is "FIND_COMPOUND" then do the same after another word, find a
1334  * prefix in a compound word.
1335  *
1336  * For a match mip->mi_result is updated.
1337  */
1338     static void
1339 find_prefix(matchinf_T *mip, int mode)
1340 {
1341     idx_T	arridx = 0;
1342     int		len;
1343     int		wlen = 0;
1344     int		flen;
1345     int		c;
1346     char_u	*ptr;
1347     idx_T	lo, hi, m;
1348     slang_T	*slang = mip->mi_lp->lp_slang;
1349     char_u	*byts;
1350     idx_T	*idxs;
1351 
1352     byts = slang->sl_pbyts;
1353     if (byts == NULL)
1354 	return;			/* array is empty */
1355 
1356     /* We use the case-folded word here, since prefixes are always
1357      * case-folded. */
1358     ptr = mip->mi_fword;
1359     flen = mip->mi_fwordlen;    /* available case-folded bytes */
1360     if (mode == FIND_COMPOUND)
1361     {
1362 	/* Skip over the previously found word(s). */
1363 	ptr += mip->mi_compoff;
1364 	flen -= mip->mi_compoff;
1365     }
1366     idxs = slang->sl_pidxs;
1367 
1368     /*
1369      * Repeat advancing in the tree until:
1370      * - there is a byte that doesn't match,
1371      * - we reach the end of the tree,
1372      * - or we reach the end of the line.
1373      */
1374     for (;;)
1375     {
1376 	if (flen == 0 && *mip->mi_fend != NUL)
1377 	    flen = fold_more(mip);
1378 
1379 	len = byts[arridx++];
1380 
1381 	/* If the first possible byte is a zero the prefix could end here.
1382 	 * Check if the following word matches and supports the prefix. */
1383 	if (byts[arridx] == 0)
1384 	{
1385 	    /* There can be several prefixes with different conditions.  We
1386 	     * try them all, since we don't know which one will give the
1387 	     * longest match.  The word is the same each time, pass the list
1388 	     * of possible prefixes to find_word(). */
1389 	    mip->mi_prefarridx = arridx;
1390 	    mip->mi_prefcnt = len;
1391 	    while (len > 0 && byts[arridx] == 0)
1392 	    {
1393 		++arridx;
1394 		--len;
1395 	    }
1396 	    mip->mi_prefcnt -= len;
1397 
1398 	    /* Find the word that comes after the prefix. */
1399 	    mip->mi_prefixlen = wlen;
1400 	    if (mode == FIND_COMPOUND)
1401 		/* Skip over the previously found word(s). */
1402 		mip->mi_prefixlen += mip->mi_compoff;
1403 
1404 	    if (has_mbyte)
1405 	    {
1406 		/* Case-folded length may differ from original length. */
1407 		mip->mi_cprefixlen = nofold_len(mip->mi_fword,
1408 					     mip->mi_prefixlen, mip->mi_word);
1409 	    }
1410 	    else
1411 		mip->mi_cprefixlen = mip->mi_prefixlen;
1412 	    find_word(mip, FIND_PREFIX);
1413 
1414 
1415 	    if (len == 0)
1416 		break;	    /* no children, word must end here */
1417 	}
1418 
1419 	/* Stop looking at end of the line. */
1420 	if (ptr[wlen] == NUL)
1421 	    break;
1422 
1423 	/* Perform a binary search in the list of accepted bytes. */
1424 	c = ptr[wlen];
1425 	lo = arridx;
1426 	hi = arridx + len - 1;
1427 	while (lo < hi)
1428 	{
1429 	    m = (lo + hi) / 2;
1430 	    if (byts[m] > c)
1431 		hi = m - 1;
1432 	    else if (byts[m] < c)
1433 		lo = m + 1;
1434 	    else
1435 	    {
1436 		lo = hi = m;
1437 		break;
1438 	    }
1439 	}
1440 
1441 	/* Stop if there is no matching byte. */
1442 	if (hi < lo || byts[lo] != c)
1443 	    break;
1444 
1445 	/* Continue at the child (if there is one). */
1446 	arridx = idxs[lo];
1447 	++wlen;
1448 	--flen;
1449     }
1450 }
1451 
1452 /*
1453  * Need to fold at least one more character.  Do until next non-word character
1454  * for efficiency.  Include the non-word character too.
1455  * Return the length of the folded chars in bytes.
1456  */
1457     static int
1458 fold_more(matchinf_T *mip)
1459 {
1460     int		flen;
1461     char_u	*p;
1462 
1463     p = mip->mi_fend;
1464     do
1465 	MB_PTR_ADV(mip->mi_fend);
1466     while (*mip->mi_fend != NUL && spell_iswordp(mip->mi_fend, mip->mi_win));
1467 
1468     /* Include the non-word character so that we can check for the word end. */
1469     if (*mip->mi_fend != NUL)
1470 	MB_PTR_ADV(mip->mi_fend);
1471 
1472     (void)spell_casefold(p, (int)(mip->mi_fend - p),
1473 			     mip->mi_fword + mip->mi_fwordlen,
1474 			     MAXWLEN - mip->mi_fwordlen);
1475     flen = (int)STRLEN(mip->mi_fword + mip->mi_fwordlen);
1476     mip->mi_fwordlen += flen;
1477     return flen;
1478 }
1479 
1480 /*
1481  * Check case flags for a word.  Return TRUE if the word has the requested
1482  * case.
1483  */
1484     static int
1485 spell_valid_case(
1486     int	    wordflags,	    /* flags for the checked word. */
1487     int	    treeflags)	    /* flags for the word in the spell tree */
1488 {
1489     return ((wordflags == WF_ALLCAP && (treeflags & WF_FIXCAP) == 0)
1490 	    || ((treeflags & (WF_ALLCAP | WF_KEEPCAP)) == 0
1491 		&& ((treeflags & WF_ONECAP) == 0
1492 					   || (wordflags & WF_ONECAP) != 0)));
1493 }
1494 
1495 /*
1496  * Return TRUE if spell checking is not enabled.
1497  */
1498     static int
1499 no_spell_checking(win_T *wp)
1500 {
1501     if (!wp->w_p_spell || *wp->w_s->b_p_spl == NUL
1502 					 || wp->w_s->b_langp.ga_len == 0)
1503     {
1504 	emsg(_("E756: Spell checking is not enabled"));
1505 	return TRUE;
1506     }
1507     return FALSE;
1508 }
1509 
1510 /*
1511  * Move to next spell error.
1512  * "curline" is FALSE for "[s", "]s", "[S" and "]S".
1513  * "curline" is TRUE to find word under/after cursor in the same line.
1514  * For Insert mode completion "dir" is BACKWARD and "curline" is TRUE: move
1515  * to after badly spelled word before the cursor.
1516  * Return 0 if not found, length of the badly spelled word otherwise.
1517  */
1518     int
1519 spell_move_to(
1520     win_T	*wp,
1521     int		dir,		/* FORWARD or BACKWARD */
1522     int		allwords,	/* TRUE for "[s"/"]s", FALSE for "[S"/"]S" */
1523     int		curline,
1524     hlf_T	*attrp)		/* return: attributes of bad word or NULL
1525 				   (only when "dir" is FORWARD) */
1526 {
1527     linenr_T	lnum;
1528     pos_T	found_pos;
1529     int		found_len = 0;
1530     char_u	*line;
1531     char_u	*p;
1532     char_u	*endp;
1533     hlf_T	attr;
1534     int		len;
1535 #ifdef FEAT_SYN_HL
1536     int		has_syntax = syntax_present(wp);
1537 #endif
1538     int		col;
1539     int		can_spell;
1540     char_u	*buf = NULL;
1541     int		buflen = 0;
1542     int		skip = 0;
1543     int		capcol = -1;
1544     int		found_one = FALSE;
1545     int		wrapped = FALSE;
1546 
1547     if (no_spell_checking(wp))
1548 	return 0;
1549 
1550     /*
1551      * Start looking for bad word at the start of the line, because we can't
1552      * start halfway a word, we don't know where it starts or ends.
1553      *
1554      * When searching backwards, we continue in the line to find the last
1555      * bad word (in the cursor line: before the cursor).
1556      *
1557      * We concatenate the start of the next line, so that wrapped words work
1558      * (e.g. "et<line-break>cetera").  Doesn't work when searching backwards
1559      * though...
1560      */
1561     lnum = wp->w_cursor.lnum;
1562     CLEAR_POS(&found_pos);
1563 
1564     while (!got_int)
1565     {
1566 	line = ml_get_buf(wp->w_buffer, lnum, FALSE);
1567 
1568 	len = (int)STRLEN(line);
1569 	if (buflen < len + MAXWLEN + 2)
1570 	{
1571 	    vim_free(buf);
1572 	    buflen = len + MAXWLEN + 2;
1573 	    buf = alloc(buflen);
1574 	    if (buf == NULL)
1575 		break;
1576 	}
1577 
1578 	/* In first line check first word for Capital. */
1579 	if (lnum == 1)
1580 	    capcol = 0;
1581 
1582 	/* For checking first word with a capital skip white space. */
1583 	if (capcol == 0)
1584 	    capcol = getwhitecols(line);
1585 	else if (curline && wp == curwin)
1586 	{
1587 	    /* For spellbadword(): check if first word needs a capital. */
1588 	    col = getwhitecols(line);
1589 	    if (check_need_cap(lnum, col))
1590 		capcol = col;
1591 
1592 	    /* Need to get the line again, may have looked at the previous
1593 	     * one. */
1594 	    line = ml_get_buf(wp->w_buffer, lnum, FALSE);
1595 	}
1596 
1597 	/* Copy the line into "buf" and append the start of the next line if
1598 	 * possible. */
1599 	STRCPY(buf, line);
1600 	if (lnum < wp->w_buffer->b_ml.ml_line_count)
1601 	    spell_cat_line(buf + STRLEN(buf),
1602 			  ml_get_buf(wp->w_buffer, lnum + 1, FALSE), MAXWLEN);
1603 
1604 	p = buf + skip;
1605 	endp = buf + len;
1606 	while (p < endp)
1607 	{
1608 	    /* When searching backward don't search after the cursor.  Unless
1609 	     * we wrapped around the end of the buffer. */
1610 	    if (dir == BACKWARD
1611 		    && lnum == wp->w_cursor.lnum
1612 		    && !wrapped
1613 		    && (colnr_T)(p - buf) >= wp->w_cursor.col)
1614 		break;
1615 
1616 	    /* start of word */
1617 	    attr = HLF_COUNT;
1618 	    len = spell_check(wp, p, &attr, &capcol, FALSE);
1619 
1620 	    if (attr != HLF_COUNT)
1621 	    {
1622 		/* We found a bad word.  Check the attribute. */
1623 		if (allwords || attr == HLF_SPB)
1624 		{
1625 		    /* When searching forward only accept a bad word after
1626 		     * the cursor. */
1627 		    if (dir == BACKWARD
1628 			    || lnum != wp->w_cursor.lnum
1629 			    || (lnum == wp->w_cursor.lnum
1630 				&& (wrapped
1631 				    || (colnr_T)(curline ? p - buf + len
1632 						     : p - buf)
1633 						  > wp->w_cursor.col)))
1634 		    {
1635 #ifdef FEAT_SYN_HL
1636 			if (has_syntax)
1637 			{
1638 			    col = (int)(p - buf);
1639 			    (void)syn_get_id(wp, lnum, (colnr_T)col,
1640 						    FALSE, &can_spell, FALSE);
1641 			    if (!can_spell)
1642 				attr = HLF_COUNT;
1643 			}
1644 			else
1645 #endif
1646 			    can_spell = TRUE;
1647 
1648 			if (can_spell)
1649 			{
1650 			    found_one = TRUE;
1651 			    found_pos.lnum = lnum;
1652 			    found_pos.col = (int)(p - buf);
1653 			    found_pos.coladd = 0;
1654 			    if (dir == FORWARD)
1655 			    {
1656 				/* No need to search further. */
1657 				wp->w_cursor = found_pos;
1658 				vim_free(buf);
1659 				if (attrp != NULL)
1660 				    *attrp = attr;
1661 				return len;
1662 			    }
1663 			    else if (curline)
1664 				/* Insert mode completion: put cursor after
1665 				 * the bad word. */
1666 				found_pos.col += len;
1667 			    found_len = len;
1668 			}
1669 		    }
1670 		    else
1671 			found_one = TRUE;
1672 		}
1673 	    }
1674 
1675 	    /* advance to character after the word */
1676 	    p += len;
1677 	    capcol -= len;
1678 	}
1679 
1680 	if (dir == BACKWARD && found_pos.lnum != 0)
1681 	{
1682 	    /* Use the last match in the line (before the cursor). */
1683 	    wp->w_cursor = found_pos;
1684 	    vim_free(buf);
1685 	    return found_len;
1686 	}
1687 
1688 	if (curline)
1689 	    break;	/* only check cursor line */
1690 
1691 	/* If we are back at the starting line and searched it again there
1692 	 * is no match, give up. */
1693 	if (lnum == wp->w_cursor.lnum && wrapped)
1694 	    break;
1695 
1696 	/* Advance to next line. */
1697 	if (dir == BACKWARD)
1698 	{
1699 	    if (lnum > 1)
1700 		--lnum;
1701 	    else if (!p_ws)
1702 		break;	    /* at first line and 'nowrapscan' */
1703 	    else
1704 	    {
1705 		/* Wrap around to the end of the buffer.  May search the
1706 		 * starting line again and accept the last match. */
1707 		lnum = wp->w_buffer->b_ml.ml_line_count;
1708 		wrapped = TRUE;
1709 		if (!shortmess(SHM_SEARCH))
1710 		    give_warning((char_u *)_(top_bot_msg), TRUE);
1711 	    }
1712 	    capcol = -1;
1713 	}
1714 	else
1715 	{
1716 	    if (lnum < wp->w_buffer->b_ml.ml_line_count)
1717 		++lnum;
1718 	    else if (!p_ws)
1719 		break;	    /* at first line and 'nowrapscan' */
1720 	    else
1721 	    {
1722 		/* Wrap around to the start of the buffer.  May search the
1723 		 * starting line again and accept the first match. */
1724 		lnum = 1;
1725 		wrapped = TRUE;
1726 		if (!shortmess(SHM_SEARCH))
1727 		    give_warning((char_u *)_(bot_top_msg), TRUE);
1728 	    }
1729 
1730 	    /* If we are back at the starting line and there is no match then
1731 	     * give up. */
1732 	    if (lnum == wp->w_cursor.lnum && !found_one)
1733 		break;
1734 
1735 	    /* Skip the characters at the start of the next line that were
1736 	     * included in a match crossing line boundaries. */
1737 	    if (attr == HLF_COUNT)
1738 		skip = (int)(p - endp);
1739 	    else
1740 		skip = 0;
1741 
1742 	    /* Capcol skips over the inserted space. */
1743 	    --capcol;
1744 
1745 	    /* But after empty line check first word in next line */
1746 	    if (*skipwhite(line) == NUL)
1747 		capcol = 0;
1748 	}
1749 
1750 	line_breakcheck();
1751     }
1752 
1753     vim_free(buf);
1754     return 0;
1755 }
1756 
1757 /*
1758  * For spell checking: concatenate the start of the following line "line" into
1759  * "buf", blanking-out special characters.  Copy less then "maxlen" bytes.
1760  * Keep the blanks at the start of the next line, this is used in win_line()
1761  * to skip those bytes if the word was OK.
1762  */
1763     void
1764 spell_cat_line(char_u *buf, char_u *line, int maxlen)
1765 {
1766     char_u	*p;
1767     int		n;
1768 
1769     p = skipwhite(line);
1770     while (vim_strchr((char_u *)"*#/\"\t", *p) != NULL)
1771 	p = skipwhite(p + 1);
1772 
1773     if (*p != NUL)
1774     {
1775 	/* Only worth concatenating if there is something else than spaces to
1776 	 * concatenate. */
1777 	n = (int)(p - line) + 1;
1778 	if (n < maxlen - 1)
1779 	{
1780 	    vim_memset(buf, ' ', n);
1781 	    vim_strncpy(buf +  n, p, maxlen - 1 - n);
1782 	}
1783     }
1784 }
1785 
1786 /*
1787  * Structure used for the cookie argument of do_in_runtimepath().
1788  */
1789 typedef struct spelload_S
1790 {
1791     char_u  sl_lang[MAXWLEN + 1];	/* language name */
1792     slang_T *sl_slang;			/* resulting slang_T struct */
1793     int	    sl_nobreak;			/* NOBREAK language found */
1794 } spelload_T;
1795 
1796 /*
1797  * Load word list(s) for "lang" from Vim spell file(s).
1798  * "lang" must be the language without the region: e.g., "en".
1799  */
1800     static void
1801 spell_load_lang(char_u *lang)
1802 {
1803     char_u	fname_enc[85];
1804     int		r;
1805     spelload_T	sl;
1806     int		round;
1807 
1808     /* Copy the language name to pass it to spell_load_cb() as a cookie.
1809      * It's truncated when an error is detected. */
1810     STRCPY(sl.sl_lang, lang);
1811     sl.sl_slang = NULL;
1812     sl.sl_nobreak = FALSE;
1813 
1814     /* We may retry when no spell file is found for the language, an
1815      * autocommand may load it then. */
1816     for (round = 1; round <= 2; ++round)
1817     {
1818 	/*
1819 	 * Find the first spell file for "lang" in 'runtimepath' and load it.
1820 	 */
1821 	vim_snprintf((char *)fname_enc, sizeof(fname_enc) - 5,
1822 #ifdef VMS
1823 					"spell/%s_%s.spl",
1824 #else
1825 					"spell/%s.%s.spl",
1826 #endif
1827 							   lang, spell_enc());
1828 	r = do_in_runtimepath(fname_enc, 0, spell_load_cb, &sl);
1829 
1830 	if (r == FAIL && *sl.sl_lang != NUL)
1831 	{
1832 	    /* Try loading the ASCII version. */
1833 	    vim_snprintf((char *)fname_enc, sizeof(fname_enc) - 5,
1834 #ifdef VMS
1835 						  "spell/%s_ascii.spl",
1836 #else
1837 						  "spell/%s.ascii.spl",
1838 #endif
1839 									lang);
1840 	    r = do_in_runtimepath(fname_enc, 0, spell_load_cb, &sl);
1841 
1842 	    if (r == FAIL && *sl.sl_lang != NUL && round == 1
1843 		    && apply_autocmds(EVENT_SPELLFILEMISSING, lang,
1844 					      curbuf->b_fname, FALSE, curbuf))
1845 		continue;
1846 	    break;
1847 	}
1848 	break;
1849     }
1850 
1851     if (r == FAIL)
1852     {
1853 	smsg(
1854 #ifdef VMS
1855 	_("Warning: Cannot find word list \"%s_%s.spl\" or \"%s_ascii.spl\""),
1856 #else
1857 	_("Warning: Cannot find word list \"%s.%s.spl\" or \"%s.ascii.spl\""),
1858 #endif
1859 						     lang, spell_enc(), lang);
1860     }
1861     else if (sl.sl_slang != NULL)
1862     {
1863 	/* At least one file was loaded, now load ALL the additions. */
1864 	STRCPY(fname_enc + STRLEN(fname_enc) - 3, "add.spl");
1865 	do_in_runtimepath(fname_enc, DIP_ALL, spell_load_cb, &sl);
1866     }
1867 }
1868 
1869 /*
1870  * Return the encoding used for spell checking: Use 'encoding', except that we
1871  * use "latin1" for "latin9".  And limit to 60 characters (just in case).
1872  */
1873     char_u *
1874 spell_enc(void)
1875 {
1876 
1877     if (STRLEN(p_enc) < 60 && STRCMP(p_enc, "iso-8859-15") != 0)
1878 	return p_enc;
1879     return (char_u *)"latin1";
1880 }
1881 
1882 /*
1883  * Get the name of the .spl file for the internal wordlist into
1884  * "fname[MAXPATHL]".
1885  */
1886     static void
1887 int_wordlist_spl(char_u *fname)
1888 {
1889     vim_snprintf((char *)fname, MAXPATHL, SPL_FNAME_TMPL,
1890 						  int_wordlist, spell_enc());
1891 }
1892 
1893 /*
1894  * Allocate a new slang_T for language "lang".  "lang" can be NULL.
1895  * Caller must fill "sl_next".
1896  */
1897     slang_T *
1898 slang_alloc(char_u *lang)
1899 {
1900     slang_T *lp;
1901 
1902     lp = (slang_T *)alloc_clear(sizeof(slang_T));
1903     if (lp != NULL)
1904     {
1905 	if (lang != NULL)
1906 	    lp->sl_name = vim_strsave(lang);
1907 	ga_init2(&lp->sl_rep, sizeof(fromto_T), 10);
1908 	ga_init2(&lp->sl_repsal, sizeof(fromto_T), 10);
1909 	lp->sl_compmax = MAXWLEN;
1910 	lp->sl_compsylmax = MAXWLEN;
1911 	hash_init(&lp->sl_wordcount);
1912     }
1913 
1914     return lp;
1915 }
1916 
1917 /*
1918  * Free the contents of an slang_T and the structure itself.
1919  */
1920     void
1921 slang_free(slang_T *lp)
1922 {
1923     vim_free(lp->sl_name);
1924     vim_free(lp->sl_fname);
1925     slang_clear(lp);
1926     vim_free(lp);
1927 }
1928 
1929 /*
1930  * Clear an slang_T so that the file can be reloaded.
1931  */
1932     void
1933 slang_clear(slang_T *lp)
1934 {
1935     garray_T	*gap;
1936     fromto_T	*ftp;
1937     salitem_T	*smp;
1938     int		i;
1939     int		round;
1940 
1941     VIM_CLEAR(lp->sl_fbyts);
1942     VIM_CLEAR(lp->sl_kbyts);
1943     VIM_CLEAR(lp->sl_pbyts);
1944 
1945     VIM_CLEAR(lp->sl_fidxs);
1946     VIM_CLEAR(lp->sl_kidxs);
1947     VIM_CLEAR(lp->sl_pidxs);
1948 
1949     for (round = 1; round <= 2; ++round)
1950     {
1951 	gap = round == 1 ? &lp->sl_rep : &lp->sl_repsal;
1952 	while (gap->ga_len > 0)
1953 	{
1954 	    ftp = &((fromto_T *)gap->ga_data)[--gap->ga_len];
1955 	    vim_free(ftp->ft_from);
1956 	    vim_free(ftp->ft_to);
1957 	}
1958 	ga_clear(gap);
1959     }
1960 
1961     gap = &lp->sl_sal;
1962     if (lp->sl_sofo)
1963     {
1964 	/* "ga_len" is set to 1 without adding an item for latin1 */
1965 	if (gap->ga_data != NULL)
1966 	    /* SOFOFROM and SOFOTO items: free lists of wide characters. */
1967 	    for (i = 0; i < gap->ga_len; ++i)
1968 		vim_free(((int **)gap->ga_data)[i]);
1969     }
1970     else
1971 	/* SAL items: free salitem_T items */
1972 	while (gap->ga_len > 0)
1973 	{
1974 	    smp = &((salitem_T *)gap->ga_data)[--gap->ga_len];
1975 	    vim_free(smp->sm_lead);
1976 	    /* Don't free sm_oneof and sm_rules, they point into sm_lead. */
1977 	    vim_free(smp->sm_to);
1978 	    vim_free(smp->sm_lead_w);
1979 	    vim_free(smp->sm_oneof_w);
1980 	    vim_free(smp->sm_to_w);
1981 	}
1982     ga_clear(gap);
1983 
1984     for (i = 0; i < lp->sl_prefixcnt; ++i)
1985 	vim_regfree(lp->sl_prefprog[i]);
1986     lp->sl_prefixcnt = 0;
1987     VIM_CLEAR(lp->sl_prefprog);
1988 
1989     VIM_CLEAR(lp->sl_info);
1990 
1991     VIM_CLEAR(lp->sl_midword);
1992 
1993     vim_regfree(lp->sl_compprog);
1994     lp->sl_compprog = NULL;
1995     VIM_CLEAR(lp->sl_comprules);
1996     VIM_CLEAR(lp->sl_compstartflags);
1997     VIM_CLEAR(lp->sl_compallflags);
1998 
1999     VIM_CLEAR(lp->sl_syllable);
2000     ga_clear(&lp->sl_syl_items);
2001 
2002     ga_clear_strings(&lp->sl_comppat);
2003 
2004     hash_clear_all(&lp->sl_wordcount, WC_KEY_OFF);
2005     hash_init(&lp->sl_wordcount);
2006 
2007     hash_clear_all(&lp->sl_map_hash, 0);
2008 
2009     /* Clear info from .sug file. */
2010     slang_clear_sug(lp);
2011 
2012     lp->sl_compmax = MAXWLEN;
2013     lp->sl_compminlen = 0;
2014     lp->sl_compsylmax = MAXWLEN;
2015     lp->sl_regions[0] = NUL;
2016 }
2017 
2018 /*
2019  * Clear the info from the .sug file in "lp".
2020  */
2021     void
2022 slang_clear_sug(slang_T *lp)
2023 {
2024     VIM_CLEAR(lp->sl_sbyts);
2025     VIM_CLEAR(lp->sl_sidxs);
2026     close_spellbuf(lp->sl_sugbuf);
2027     lp->sl_sugbuf = NULL;
2028     lp->sl_sugloaded = FALSE;
2029     lp->sl_sugtime = 0;
2030 }
2031 
2032 /*
2033  * Load one spell file and store the info into a slang_T.
2034  * Invoked through do_in_runtimepath().
2035  */
2036     static void
2037 spell_load_cb(char_u *fname, void *cookie)
2038 {
2039     spelload_T	*slp = (spelload_T *)cookie;
2040     slang_T	*slang;
2041 
2042     slang = spell_load_file(fname, slp->sl_lang, NULL, FALSE);
2043     if (slang != NULL)
2044     {
2045 	/* When a previously loaded file has NOBREAK also use it for the
2046 	 * ".add" files. */
2047 	if (slp->sl_nobreak && slang->sl_add)
2048 	    slang->sl_nobreak = TRUE;
2049 	else if (slang->sl_nobreak)
2050 	    slp->sl_nobreak = TRUE;
2051 
2052 	slp->sl_slang = slang;
2053     }
2054 }
2055 
2056 
2057 /*
2058  * Add a word to the hashtable of common words.
2059  * If it's already there then the counter is increased.
2060  */
2061     void
2062 count_common_word(
2063     slang_T	*lp,
2064     char_u	*word,
2065     int		len,	    /* word length, -1 for upto NUL */
2066     int		count)	    /* 1 to count once, 10 to init */
2067 {
2068     hash_T	hash;
2069     hashitem_T	*hi;
2070     wordcount_T	*wc;
2071     char_u	buf[MAXWLEN];
2072     char_u	*p;
2073 
2074     if (len == -1)
2075 	p = word;
2076     else
2077     {
2078 	vim_strncpy(buf, word, len);
2079 	p = buf;
2080     }
2081 
2082     hash = hash_hash(p);
2083     hi = hash_lookup(&lp->sl_wordcount, p, hash);
2084     if (HASHITEM_EMPTY(hi))
2085     {
2086 	wc = (wordcount_T *)alloc((unsigned)(sizeof(wordcount_T) + STRLEN(p)));
2087 	if (wc == NULL)
2088 	    return;
2089 	STRCPY(wc->wc_word, p);
2090 	wc->wc_count = count;
2091 	hash_add_item(&lp->sl_wordcount, hi, wc->wc_word, hash);
2092     }
2093     else
2094     {
2095 	wc = HI2WC(hi);
2096 	if ((wc->wc_count += count) < (unsigned)count)	/* check for overflow */
2097 	    wc->wc_count = MAXWORDCOUNT;
2098     }
2099 }
2100 
2101 /*
2102  * Adjust the score of common words.
2103  */
2104     static int
2105 score_wordcount_adj(
2106     slang_T	*slang,
2107     int		score,
2108     char_u	*word,
2109     int		split)	    /* word was split, less bonus */
2110 {
2111     hashitem_T	*hi;
2112     wordcount_T	*wc;
2113     int		bonus;
2114     int		newscore;
2115 
2116     hi = hash_find(&slang->sl_wordcount, word);
2117     if (!HASHITEM_EMPTY(hi))
2118     {
2119 	wc = HI2WC(hi);
2120 	if (wc->wc_count < SCORE_THRES2)
2121 	    bonus = SCORE_COMMON1;
2122 	else if (wc->wc_count < SCORE_THRES3)
2123 	    bonus = SCORE_COMMON2;
2124 	else
2125 	    bonus = SCORE_COMMON3;
2126 	if (split)
2127 	    newscore = score - bonus / 2;
2128 	else
2129 	    newscore = score - bonus;
2130 	if (newscore < 0)
2131 	    return 0;
2132 	return newscore;
2133     }
2134     return score;
2135 }
2136 
2137 
2138 /*
2139  * Return TRUE if byte "n" appears in "str".
2140  * Like strchr() but independent of locale.
2141  */
2142     int
2143 byte_in_str(char_u *str, int n)
2144 {
2145     char_u	*p;
2146 
2147     for (p = str; *p != NUL; ++p)
2148 	if (*p == n)
2149 	    return TRUE;
2150     return FALSE;
2151 }
2152 
2153 #define SY_MAXLEN   30
2154 typedef struct syl_item_S
2155 {
2156     char_u	sy_chars[SY_MAXLEN];	    /* the sequence of chars */
2157     int		sy_len;
2158 } syl_item_T;
2159 
2160 /*
2161  * Truncate "slang->sl_syllable" at the first slash and put the following items
2162  * in "slang->sl_syl_items".
2163  */
2164     int
2165 init_syl_tab(slang_T *slang)
2166 {
2167     char_u	*p;
2168     char_u	*s;
2169     int		l;
2170     syl_item_T	*syl;
2171 
2172     ga_init2(&slang->sl_syl_items, sizeof(syl_item_T), 4);
2173     p = vim_strchr(slang->sl_syllable, '/');
2174     while (p != NULL)
2175     {
2176 	*p++ = NUL;
2177 	if (*p == NUL)	    /* trailing slash */
2178 	    break;
2179 	s = p;
2180 	p = vim_strchr(p, '/');
2181 	if (p == NULL)
2182 	    l = (int)STRLEN(s);
2183 	else
2184 	    l = (int)(p - s);
2185 	if (l >= SY_MAXLEN)
2186 	    return SP_FORMERROR;
2187 	if (ga_grow(&slang->sl_syl_items, 1) == FAIL)
2188 	    return SP_OTHERERROR;
2189 	syl = ((syl_item_T *)slang->sl_syl_items.ga_data)
2190 					       + slang->sl_syl_items.ga_len++;
2191 	vim_strncpy(syl->sy_chars, s, l);
2192 	syl->sy_len = l;
2193     }
2194     return OK;
2195 }
2196 
2197 /*
2198  * Count the number of syllables in "word".
2199  * When "word" contains spaces the syllables after the last space are counted.
2200  * Returns zero if syllables are not defines.
2201  */
2202     static int
2203 count_syllables(slang_T *slang, char_u *word)
2204 {
2205     int		cnt = 0;
2206     int		skip = FALSE;
2207     char_u	*p;
2208     int		len;
2209     int		i;
2210     syl_item_T	*syl;
2211     int		c;
2212 
2213     if (slang->sl_syllable == NULL)
2214 	return 0;
2215 
2216     for (p = word; *p != NUL; p += len)
2217     {
2218 	/* When running into a space reset counter. */
2219 	if (*p == ' ')
2220 	{
2221 	    len = 1;
2222 	    cnt = 0;
2223 	    continue;
2224 	}
2225 
2226 	/* Find longest match of syllable items. */
2227 	len = 0;
2228 	for (i = 0; i < slang->sl_syl_items.ga_len; ++i)
2229 	{
2230 	    syl = ((syl_item_T *)slang->sl_syl_items.ga_data) + i;
2231 	    if (syl->sy_len > len
2232 			       && STRNCMP(p, syl->sy_chars, syl->sy_len) == 0)
2233 		len = syl->sy_len;
2234 	}
2235 	if (len != 0)	/* found a match, count syllable  */
2236 	{
2237 	    ++cnt;
2238 	    skip = FALSE;
2239 	}
2240 	else
2241 	{
2242 	    /* No recognized syllable item, at least a syllable char then? */
2243 	    c = mb_ptr2char(p);
2244 	    len = (*mb_ptr2len)(p);
2245 	    if (vim_strchr(slang->sl_syllable, c) == NULL)
2246 		skip = FALSE;	    /* No, search for next syllable */
2247 	    else if (!skip)
2248 	    {
2249 		++cnt;		    /* Yes, count it */
2250 		skip = TRUE;	    /* don't count following syllable chars */
2251 	    }
2252 	}
2253     }
2254     return cnt;
2255 }
2256 
2257 /*
2258  * Parse 'spelllang' and set w_s->b_langp accordingly.
2259  * Returns NULL if it's OK, an error message otherwise.
2260  */
2261     char *
2262 did_set_spelllang(win_T *wp)
2263 {
2264     garray_T	ga;
2265     char_u	*splp;
2266     char_u	*region;
2267     char_u	region_cp[3];
2268     int		filename;
2269     int		region_mask;
2270     slang_T	*slang;
2271     int		c;
2272     char_u	lang[MAXWLEN + 1];
2273     char_u	spf_name[MAXPATHL];
2274     int		len;
2275     char_u	*p;
2276     int		round;
2277     char_u	*spf;
2278     char_u	*use_region = NULL;
2279     int		dont_use_region = FALSE;
2280     int		nobreak = FALSE;
2281     int		i, j;
2282     langp_T	*lp, *lp2;
2283     static int	recursive = FALSE;
2284     char	*ret_msg = NULL;
2285     char_u	*spl_copy;
2286     bufref_T	bufref;
2287 
2288     set_bufref(&bufref, wp->w_buffer);
2289 
2290     /* We don't want to do this recursively.  May happen when a language is
2291      * not available and the SpellFileMissing autocommand opens a new buffer
2292      * in which 'spell' is set. */
2293     if (recursive)
2294 	return NULL;
2295     recursive = TRUE;
2296 
2297     ga_init2(&ga, sizeof(langp_T), 2);
2298     clear_midword(wp);
2299 
2300     /* Make a copy of 'spelllang', the SpellFileMissing autocommands may change
2301      * it under our fingers. */
2302     spl_copy = vim_strsave(wp->w_s->b_p_spl);
2303     if (spl_copy == NULL)
2304 	goto theend;
2305 
2306     wp->w_s->b_cjk = 0;
2307 
2308     /* Loop over comma separated language names. */
2309     for (splp = spl_copy; *splp != NUL; )
2310     {
2311 	// Get one language name.
2312 	copy_option_part(&splp, lang, MAXWLEN, ",");
2313 	region = NULL;
2314 	len = (int)STRLEN(lang);
2315 
2316 	if (!valid_spellang(lang))
2317 	    continue;
2318 
2319 	if (STRCMP(lang, "cjk") == 0)
2320 	{
2321 	    wp->w_s->b_cjk = 1;
2322 	    continue;
2323 	}
2324 
2325 	/* If the name ends in ".spl" use it as the name of the spell file.
2326 	 * If there is a region name let "region" point to it and remove it
2327 	 * from the name. */
2328 	if (len > 4 && fnamecmp(lang + len - 4, ".spl") == 0)
2329 	{
2330 	    filename = TRUE;
2331 
2332 	    /* Locate a region and remove it from the file name. */
2333 	    p = vim_strchr(gettail(lang), '_');
2334 	    if (p != NULL && ASCII_ISALPHA(p[1]) && ASCII_ISALPHA(p[2])
2335 						      && !ASCII_ISALPHA(p[3]))
2336 	    {
2337 		vim_strncpy(region_cp, p + 1, 2);
2338 		mch_memmove(p, p + 3, len - (p - lang) - 2);
2339 		region = region_cp;
2340 	    }
2341 	    else
2342 		dont_use_region = TRUE;
2343 
2344 	    /* Check if we loaded this language before. */
2345 	    for (slang = first_lang; slang != NULL; slang = slang->sl_next)
2346 		if (fullpathcmp(lang, slang->sl_fname, FALSE) == FPC_SAME)
2347 		    break;
2348 	}
2349 	else
2350 	{
2351 	    filename = FALSE;
2352 	    if (len > 3 && lang[len - 3] == '_')
2353 	    {
2354 		region = lang + len - 2;
2355 		len -= 3;
2356 		lang[len] = NUL;
2357 	    }
2358 	    else
2359 		dont_use_region = TRUE;
2360 
2361 	    /* Check if we loaded this language before. */
2362 	    for (slang = first_lang; slang != NULL; slang = slang->sl_next)
2363 		if (STRICMP(lang, slang->sl_name) == 0)
2364 		    break;
2365 	}
2366 
2367 	if (region != NULL)
2368 	{
2369 	    /* If the region differs from what was used before then don't
2370 	     * use it for 'spellfile'. */
2371 	    if (use_region != NULL && STRCMP(region, use_region) != 0)
2372 		dont_use_region = TRUE;
2373 	    use_region = region;
2374 	}
2375 
2376 	/* If not found try loading the language now. */
2377 	if (slang == NULL)
2378 	{
2379 	    if (filename)
2380 		(void)spell_load_file(lang, lang, NULL, FALSE);
2381 	    else
2382 	    {
2383 		spell_load_lang(lang);
2384 		/* SpellFileMissing autocommands may do anything, including
2385 		 * destroying the buffer we are using... */
2386 		if (!bufref_valid(&bufref))
2387 		{
2388 		    ret_msg = N_("E797: SpellFileMissing autocommand deleted buffer");
2389 		    goto theend;
2390 		}
2391 	    }
2392 	}
2393 
2394 	/*
2395 	 * Loop over the languages, there can be several files for "lang".
2396 	 */
2397 	for (slang = first_lang; slang != NULL; slang = slang->sl_next)
2398 	    if (filename ? fullpathcmp(lang, slang->sl_fname, FALSE) == FPC_SAME
2399 			 : STRICMP(lang, slang->sl_name) == 0)
2400 	    {
2401 		region_mask = REGION_ALL;
2402 		if (!filename && region != NULL)
2403 		{
2404 		    /* find region in sl_regions */
2405 		    c = find_region(slang->sl_regions, region);
2406 		    if (c == REGION_ALL)
2407 		    {
2408 			if (slang->sl_add)
2409 			{
2410 			    if (*slang->sl_regions != NUL)
2411 				/* This addition file is for other regions. */
2412 				region_mask = 0;
2413 			}
2414 			else
2415 			    /* This is probably an error.  Give a warning and
2416 			     * accept the words anyway. */
2417 			    smsg(_("Warning: region %s not supported"),
2418 								      region);
2419 		    }
2420 		    else
2421 			region_mask = 1 << c;
2422 		}
2423 
2424 		if (region_mask != 0)
2425 		{
2426 		    if (ga_grow(&ga, 1) == FAIL)
2427 		    {
2428 			ga_clear(&ga);
2429 			ret_msg = e_outofmem;
2430 			goto theend;
2431 		    }
2432 		    LANGP_ENTRY(ga, ga.ga_len)->lp_slang = slang;
2433 		    LANGP_ENTRY(ga, ga.ga_len)->lp_region = region_mask;
2434 		    ++ga.ga_len;
2435 		    use_midword(slang, wp);
2436 		    if (slang->sl_nobreak)
2437 			nobreak = TRUE;
2438 		}
2439 	    }
2440     }
2441 
2442     /* round 0: load int_wordlist, if possible.
2443      * round 1: load first name in 'spellfile'.
2444      * round 2: load second name in 'spellfile.
2445      * etc. */
2446     spf = curwin->w_s->b_p_spf;
2447     for (round = 0; round == 0 || *spf != NUL; ++round)
2448     {
2449 	if (round == 0)
2450 	{
2451 	    /* Internal wordlist, if there is one. */
2452 	    if (int_wordlist == NULL)
2453 		continue;
2454 	    int_wordlist_spl(spf_name);
2455 	}
2456 	else
2457 	{
2458 	    /* One entry in 'spellfile'. */
2459 	    copy_option_part(&spf, spf_name, MAXPATHL - 5, ",");
2460 	    STRCAT(spf_name, ".spl");
2461 
2462 	    /* If it was already found above then skip it. */
2463 	    for (c = 0; c < ga.ga_len; ++c)
2464 	    {
2465 		p = LANGP_ENTRY(ga, c)->lp_slang->sl_fname;
2466 		if (p != NULL && fullpathcmp(spf_name, p, FALSE) == FPC_SAME)
2467 		    break;
2468 	    }
2469 	    if (c < ga.ga_len)
2470 		continue;
2471 	}
2472 
2473 	/* Check if it was loaded already. */
2474 	for (slang = first_lang; slang != NULL; slang = slang->sl_next)
2475 	    if (fullpathcmp(spf_name, slang->sl_fname, FALSE) == FPC_SAME)
2476 		break;
2477 	if (slang == NULL)
2478 	{
2479 	    /* Not loaded, try loading it now.  The language name includes the
2480 	     * region name, the region is ignored otherwise.  for int_wordlist
2481 	     * use an arbitrary name. */
2482 	    if (round == 0)
2483 		STRCPY(lang, "internal wordlist");
2484 	    else
2485 	    {
2486 		vim_strncpy(lang, gettail(spf_name), MAXWLEN);
2487 		p = vim_strchr(lang, '.');
2488 		if (p != NULL)
2489 		    *p = NUL;	/* truncate at ".encoding.add" */
2490 	    }
2491 	    slang = spell_load_file(spf_name, lang, NULL, TRUE);
2492 
2493 	    /* If one of the languages has NOBREAK we assume the addition
2494 	     * files also have this. */
2495 	    if (slang != NULL && nobreak)
2496 		slang->sl_nobreak = TRUE;
2497 	}
2498 	if (slang != NULL && ga_grow(&ga, 1) == OK)
2499 	{
2500 	    region_mask = REGION_ALL;
2501 	    if (use_region != NULL && !dont_use_region)
2502 	    {
2503 		/* find region in sl_regions */
2504 		c = find_region(slang->sl_regions, use_region);
2505 		if (c != REGION_ALL)
2506 		    region_mask = 1 << c;
2507 		else if (*slang->sl_regions != NUL)
2508 		    /* This spell file is for other regions. */
2509 		    region_mask = 0;
2510 	    }
2511 
2512 	    if (region_mask != 0)
2513 	    {
2514 		LANGP_ENTRY(ga, ga.ga_len)->lp_slang = slang;
2515 		LANGP_ENTRY(ga, ga.ga_len)->lp_sallang = NULL;
2516 		LANGP_ENTRY(ga, ga.ga_len)->lp_replang = NULL;
2517 		LANGP_ENTRY(ga, ga.ga_len)->lp_region = region_mask;
2518 		++ga.ga_len;
2519 		use_midword(slang, wp);
2520 	    }
2521 	}
2522     }
2523 
2524     /* Everything is fine, store the new b_langp value. */
2525     ga_clear(&wp->w_s->b_langp);
2526     wp->w_s->b_langp = ga;
2527 
2528     /* For each language figure out what language to use for sound folding and
2529      * REP items.  If the language doesn't support it itself use another one
2530      * with the same name.  E.g. for "en-math" use "en". */
2531     for (i = 0; i < ga.ga_len; ++i)
2532     {
2533 	lp = LANGP_ENTRY(ga, i);
2534 
2535 	/* sound folding */
2536 	if (lp->lp_slang->sl_sal.ga_len > 0)
2537 	    /* language does sound folding itself */
2538 	    lp->lp_sallang = lp->lp_slang;
2539 	else
2540 	    /* find first similar language that does sound folding */
2541 	    for (j = 0; j < ga.ga_len; ++j)
2542 	    {
2543 		lp2 = LANGP_ENTRY(ga, j);
2544 		if (lp2->lp_slang->sl_sal.ga_len > 0
2545 			&& STRNCMP(lp->lp_slang->sl_name,
2546 					      lp2->lp_slang->sl_name, 2) == 0)
2547 		{
2548 		    lp->lp_sallang = lp2->lp_slang;
2549 		    break;
2550 		}
2551 	    }
2552 
2553 	/* REP items */
2554 	if (lp->lp_slang->sl_rep.ga_len > 0)
2555 	    /* language has REP items itself */
2556 	    lp->lp_replang = lp->lp_slang;
2557 	else
2558 	    /* find first similar language that has REP items */
2559 	    for (j = 0; j < ga.ga_len; ++j)
2560 	    {
2561 		lp2 = LANGP_ENTRY(ga, j);
2562 		if (lp2->lp_slang->sl_rep.ga_len > 0
2563 			&& STRNCMP(lp->lp_slang->sl_name,
2564 					      lp2->lp_slang->sl_name, 2) == 0)
2565 		{
2566 		    lp->lp_replang = lp2->lp_slang;
2567 		    break;
2568 		}
2569 	    }
2570     }
2571 
2572 theend:
2573     vim_free(spl_copy);
2574     recursive = FALSE;
2575     redraw_win_later(wp, NOT_VALID);
2576     return ret_msg;
2577 }
2578 
2579 /*
2580  * Clear the midword characters for buffer "buf".
2581  */
2582     static void
2583 clear_midword(win_T *wp)
2584 {
2585     vim_memset(wp->w_s->b_spell_ismw, 0, 256);
2586     VIM_CLEAR(wp->w_s->b_spell_ismw_mb);
2587 }
2588 
2589 /*
2590  * Use the "sl_midword" field of language "lp" for buffer "buf".
2591  * They add up to any currently used midword characters.
2592  */
2593     static void
2594 use_midword(slang_T *lp, win_T *wp)
2595 {
2596     char_u	*p;
2597 
2598     if (lp->sl_midword == NULL)	    /* there aren't any */
2599 	return;
2600 
2601     for (p = lp->sl_midword; *p != NUL; )
2602 	if (has_mbyte)
2603 	{
2604 	    int	    c, l, n;
2605 	    char_u  *bp;
2606 
2607 	    c = mb_ptr2char(p);
2608 	    l = (*mb_ptr2len)(p);
2609 	    if (c < 256 && l <= 2)
2610 		wp->w_s->b_spell_ismw[c] = TRUE;
2611 	    else if (wp->w_s->b_spell_ismw_mb == NULL)
2612 		/* First multi-byte char in "b_spell_ismw_mb". */
2613 		wp->w_s->b_spell_ismw_mb = vim_strnsave(p, l);
2614 	    else
2615 	    {
2616 		/* Append multi-byte chars to "b_spell_ismw_mb". */
2617 		n = (int)STRLEN(wp->w_s->b_spell_ismw_mb);
2618 		bp = vim_strnsave(wp->w_s->b_spell_ismw_mb, n + l);
2619 		if (bp != NULL)
2620 		{
2621 		    vim_free(wp->w_s->b_spell_ismw_mb);
2622 		    wp->w_s->b_spell_ismw_mb = bp;
2623 		    vim_strncpy(bp + n, p, l);
2624 		}
2625 	    }
2626 	    p += l;
2627 	}
2628 	else
2629 	    wp->w_s->b_spell_ismw[*p++] = TRUE;
2630 }
2631 
2632 /*
2633  * Find the region "region[2]" in "rp" (points to "sl_regions").
2634  * Each region is simply stored as the two characters of its name.
2635  * Returns the index if found (first is 0), REGION_ALL if not found.
2636  */
2637     static int
2638 find_region(char_u *rp, char_u *region)
2639 {
2640     int		i;
2641 
2642     for (i = 0; ; i += 2)
2643     {
2644 	if (rp[i] == NUL)
2645 	    return REGION_ALL;
2646 	if (rp[i] == region[0] && rp[i + 1] == region[1])
2647 	    break;
2648     }
2649     return i / 2;
2650 }
2651 
2652 /*
2653  * Return case type of word:
2654  * w word	0
2655  * Word		WF_ONECAP
2656  * W WORD	WF_ALLCAP
2657  * WoRd	wOrd	WF_KEEPCAP
2658  */
2659     int
2660 captype(
2661     char_u	*word,
2662     char_u	*end)	    /* When NULL use up to NUL byte. */
2663 {
2664     char_u	*p;
2665     int		c;
2666     int		firstcap;
2667     int		allcap;
2668     int		past_second = FALSE;	/* past second word char */
2669 
2670     /* find first letter */
2671     for (p = word; !spell_iswordp_nmw(p, curwin); MB_PTR_ADV(p))
2672 	if (end == NULL ? *p == NUL : p >= end)
2673 	    return 0;	    /* only non-word characters, illegal word */
2674     if (has_mbyte)
2675 	c = mb_ptr2char_adv(&p);
2676     else
2677 	c = *p++;
2678     firstcap = allcap = SPELL_ISUPPER(c);
2679 
2680     /*
2681      * Need to check all letters to find a word with mixed upper/lower.
2682      * But a word with an upper char only at start is a ONECAP.
2683      */
2684     for ( ; end == NULL ? *p != NUL : p < end; MB_PTR_ADV(p))
2685 	if (spell_iswordp_nmw(p, curwin))
2686 	{
2687 	    c = PTR2CHAR(p);
2688 	    if (!SPELL_ISUPPER(c))
2689 	    {
2690 		/* UUl -> KEEPCAP */
2691 		if (past_second && allcap)
2692 		    return WF_KEEPCAP;
2693 		allcap = FALSE;
2694 	    }
2695 	    else if (!allcap)
2696 		/* UlU -> KEEPCAP */
2697 		return WF_KEEPCAP;
2698 	    past_second = TRUE;
2699 	}
2700 
2701     if (allcap)
2702 	return WF_ALLCAP;
2703     if (firstcap)
2704 	return WF_ONECAP;
2705     return 0;
2706 }
2707 
2708 /*
2709  * Like captype() but for a KEEPCAP word add ONECAP if the word starts with a
2710  * capital.  So that make_case_word() can turn WOrd into Word.
2711  * Add ALLCAP for "WOrD".
2712  */
2713     static int
2714 badword_captype(char_u *word, char_u *end)
2715 {
2716     int		flags = captype(word, end);
2717     int		c;
2718     int		l, u;
2719     int		first;
2720     char_u	*p;
2721 
2722     if (flags & WF_KEEPCAP)
2723     {
2724 	/* Count the number of UPPER and lower case letters. */
2725 	l = u = 0;
2726 	first = FALSE;
2727 	for (p = word; p < end; MB_PTR_ADV(p))
2728 	{
2729 	    c = PTR2CHAR(p);
2730 	    if (SPELL_ISUPPER(c))
2731 	    {
2732 		++u;
2733 		if (p == word)
2734 		    first = TRUE;
2735 	    }
2736 	    else
2737 		++l;
2738 	}
2739 
2740 	/* If there are more UPPER than lower case letters suggest an
2741 	 * ALLCAP word.  Otherwise, if the first letter is UPPER then
2742 	 * suggest ONECAP.  Exception: "ALl" most likely should be "All",
2743 	 * require three upper case letters. */
2744 	if (u > l && u > 2)
2745 	    flags |= WF_ALLCAP;
2746 	else if (first)
2747 	    flags |= WF_ONECAP;
2748 
2749 	if (u >= 2 && l >= 2)	/* maCARONI maCAroni */
2750 	    flags |= WF_MIXCAP;
2751     }
2752     return flags;
2753 }
2754 
2755 /*
2756  * Delete the internal wordlist and its .spl file.
2757  */
2758     void
2759 spell_delete_wordlist(void)
2760 {
2761     char_u	fname[MAXPATHL];
2762 
2763     if (int_wordlist != NULL)
2764     {
2765 	mch_remove(int_wordlist);
2766 	int_wordlist_spl(fname);
2767 	mch_remove(fname);
2768 	VIM_CLEAR(int_wordlist);
2769     }
2770 }
2771 
2772 /*
2773  * Free all languages.
2774  */
2775     void
2776 spell_free_all(void)
2777 {
2778     slang_T	*slang;
2779     buf_T	*buf;
2780 
2781     /* Go through all buffers and handle 'spelllang'. <VN> */
2782     FOR_ALL_BUFFERS(buf)
2783 	ga_clear(&buf->b_s.b_langp);
2784 
2785     while (first_lang != NULL)
2786     {
2787 	slang = first_lang;
2788 	first_lang = slang->sl_next;
2789 	slang_free(slang);
2790     }
2791 
2792     spell_delete_wordlist();
2793 
2794     VIM_CLEAR(repl_to);
2795     VIM_CLEAR(repl_from);
2796 }
2797 
2798 /*
2799  * Clear all spelling tables and reload them.
2800  * Used after 'encoding' is set and when ":mkspell" was used.
2801  */
2802     void
2803 spell_reload(void)
2804 {
2805     win_T	*wp;
2806 
2807     /* Initialize the table for spell_iswordp(). */
2808     init_spell_chartab();
2809 
2810     /* Unload all allocated memory. */
2811     spell_free_all();
2812 
2813     /* Go through all buffers and handle 'spelllang'. */
2814     FOR_ALL_WINDOWS(wp)
2815     {
2816 	/* Only load the wordlists when 'spelllang' is set and there is a
2817 	 * window for this buffer in which 'spell' is set. */
2818 	if (*wp->w_s->b_p_spl != NUL)
2819 	{
2820 		if (wp->w_p_spell)
2821 		{
2822 		    (void)did_set_spelllang(wp);
2823 		    break;
2824 		}
2825 	}
2826     }
2827 }
2828 
2829 /*
2830  * Opposite of offset2bytes().
2831  * "pp" points to the bytes and is advanced over it.
2832  * Returns the offset.
2833  */
2834     static int
2835 bytes2offset(char_u **pp)
2836 {
2837     char_u	*p = *pp;
2838     int		nr;
2839     int		c;
2840 
2841     c = *p++;
2842     if ((c & 0x80) == 0x00)		/* 1 byte */
2843     {
2844 	nr = c - 1;
2845     }
2846     else if ((c & 0xc0) == 0x80)	/* 2 bytes */
2847     {
2848 	nr = (c & 0x3f) - 1;
2849 	nr = nr * 255 + (*p++ - 1);
2850     }
2851     else if ((c & 0xe0) == 0xc0)	/* 3 bytes */
2852     {
2853 	nr = (c & 0x1f) - 1;
2854 	nr = nr * 255 + (*p++ - 1);
2855 	nr = nr * 255 + (*p++ - 1);
2856     }
2857     else				/* 4 bytes */
2858     {
2859 	nr = (c & 0x0f) - 1;
2860 	nr = nr * 255 + (*p++ - 1);
2861 	nr = nr * 255 + (*p++ - 1);
2862 	nr = nr * 255 + (*p++ - 1);
2863     }
2864 
2865     *pp = p;
2866     return nr;
2867 }
2868 
2869 
2870 /*
2871  * Open a spell buffer.  This is a nameless buffer that is not in the buffer
2872  * list and only contains text lines.  Can use a swapfile to reduce memory
2873  * use.
2874  * Most other fields are invalid!  Esp. watch out for string options being
2875  * NULL and there is no undo info.
2876  * Returns NULL when out of memory.
2877  */
2878     buf_T *
2879 open_spellbuf(void)
2880 {
2881     buf_T	*buf;
2882 
2883     buf = (buf_T *)alloc_clear(sizeof(buf_T));
2884     if (buf != NULL)
2885     {
2886 	buf->b_spell = TRUE;
2887 	buf->b_p_swf = TRUE;	/* may create a swap file */
2888 #ifdef FEAT_CRYPT
2889 	buf->b_p_key = empty_option;
2890 #endif
2891 	ml_open(buf);
2892 	ml_open_file(buf);	/* create swap file now */
2893     }
2894     return buf;
2895 }
2896 
2897 /*
2898  * Close the buffer used for spell info.
2899  */
2900     void
2901 close_spellbuf(buf_T *buf)
2902 {
2903     if (buf != NULL)
2904     {
2905 	ml_close(buf, TRUE);
2906 	vim_free(buf);
2907     }
2908 }
2909 
2910 /*
2911  * Init the chartab used for spelling for ASCII.
2912  * EBCDIC is not supported!
2913  */
2914     void
2915 clear_spell_chartab(spelltab_T *sp)
2916 {
2917     int		i;
2918 
2919     /* Init everything to FALSE. */
2920     vim_memset(sp->st_isw, FALSE, sizeof(sp->st_isw));
2921     vim_memset(sp->st_isu, FALSE, sizeof(sp->st_isu));
2922     for (i = 0; i < 256; ++i)
2923     {
2924 	sp->st_fold[i] = i;
2925 	sp->st_upper[i] = i;
2926     }
2927 
2928     /* We include digits.  A word shouldn't start with a digit, but handling
2929      * that is done separately. */
2930     for (i = '0'; i <= '9'; ++i)
2931 	sp->st_isw[i] = TRUE;
2932     for (i = 'A'; i <= 'Z'; ++i)
2933     {
2934 	sp->st_isw[i] = TRUE;
2935 	sp->st_isu[i] = TRUE;
2936 	sp->st_fold[i] = i + 0x20;
2937     }
2938     for (i = 'a'; i <= 'z'; ++i)
2939     {
2940 	sp->st_isw[i] = TRUE;
2941 	sp->st_upper[i] = i - 0x20;
2942     }
2943 }
2944 
2945 /*
2946  * Init the chartab used for spelling.  Only depends on 'encoding'.
2947  * Called once while starting up and when 'encoding' changes.
2948  * The default is to use isalpha(), but the spell file should define the word
2949  * characters to make it possible that 'encoding' differs from the current
2950  * locale.  For utf-8 we don't use isalpha() but our own functions.
2951  */
2952     void
2953 init_spell_chartab(void)
2954 {
2955     int	    i;
2956 
2957     did_set_spelltab = FALSE;
2958     clear_spell_chartab(&spelltab);
2959     if (enc_dbcs)
2960     {
2961 	/* DBCS: assume double-wide characters are word characters. */
2962 	for (i = 128; i <= 255; ++i)
2963 	    if (MB_BYTE2LEN(i) == 2)
2964 		spelltab.st_isw[i] = TRUE;
2965     }
2966     else if (enc_utf8)
2967     {
2968 	for (i = 128; i < 256; ++i)
2969 	{
2970 	    int f = utf_fold(i);
2971 	    int u = utf_toupper(i);
2972 
2973 	    spelltab.st_isu[i] = utf_isupper(i);
2974 	    spelltab.st_isw[i] = spelltab.st_isu[i] || utf_islower(i);
2975 	    /* The folded/upper-cased value is different between latin1 and
2976 	     * utf8 for 0xb5, causing E763 for no good reason.  Use the latin1
2977 	     * value for utf-8 to avoid this. */
2978 	    spelltab.st_fold[i] = (f < 256) ? f : i;
2979 	    spelltab.st_upper[i] = (u < 256) ? u : i;
2980 	}
2981     }
2982     else
2983     {
2984 	/* Rough guess: use locale-dependent library functions. */
2985 	for (i = 128; i < 256; ++i)
2986 	{
2987 	    if (MB_ISUPPER(i))
2988 	    {
2989 		spelltab.st_isw[i] = TRUE;
2990 		spelltab.st_isu[i] = TRUE;
2991 		spelltab.st_fold[i] = MB_TOLOWER(i);
2992 	    }
2993 	    else if (MB_ISLOWER(i))
2994 	    {
2995 		spelltab.st_isw[i] = TRUE;
2996 		spelltab.st_upper[i] = MB_TOUPPER(i);
2997 	    }
2998 	}
2999     }
3000 }
3001 
3002 
3003 /*
3004  * Return TRUE if "p" points to a word character.
3005  * As a special case we see "midword" characters as word character when it is
3006  * followed by a word character.  This finds they'there but not 'they there'.
3007  * Thus this only works properly when past the first character of the word.
3008  */
3009     static int
3010 spell_iswordp(
3011     char_u	*p,
3012     win_T	*wp)	    /* buffer used */
3013 {
3014     char_u	*s;
3015     int		l;
3016     int		c;
3017 
3018     if (has_mbyte)
3019     {
3020 	l = MB_PTR2LEN(p);
3021 	s = p;
3022 	if (l == 1)
3023 	{
3024 	    /* be quick for ASCII */
3025 	    if (wp->w_s->b_spell_ismw[*p])
3026 		s = p + 1;		/* skip a mid-word character */
3027 	}
3028 	else
3029 	{
3030 	    c = mb_ptr2char(p);
3031 	    if (c < 256 ? wp->w_s->b_spell_ismw[c]
3032 		    : (wp->w_s->b_spell_ismw_mb != NULL
3033 			   && vim_strchr(wp->w_s->b_spell_ismw_mb, c) != NULL))
3034 		s = p + l;
3035 	}
3036 
3037 	c = mb_ptr2char(s);
3038 	if (c > 255)
3039 	    return spell_mb_isword_class(mb_get_class(s), wp);
3040 	return spelltab.st_isw[c];
3041     }
3042 
3043     return spelltab.st_isw[wp->w_s->b_spell_ismw[*p] ? p[1] : p[0]];
3044 }
3045 
3046 /*
3047  * Return TRUE if "p" points to a word character.
3048  * Unlike spell_iswordp() this doesn't check for "midword" characters.
3049  */
3050     int
3051 spell_iswordp_nmw(char_u *p, win_T *wp)
3052 {
3053     int		c;
3054 
3055     if (has_mbyte)
3056     {
3057 	c = mb_ptr2char(p);
3058 	if (c > 255)
3059 	    return spell_mb_isword_class(mb_get_class(p), wp);
3060 	return spelltab.st_isw[c];
3061     }
3062     return spelltab.st_isw[*p];
3063 }
3064 
3065 /*
3066  * Return TRUE if word class indicates a word character.
3067  * Only for characters above 255.
3068  * Unicode subscript and superscript are not considered word characters.
3069  * See also dbcs_class() and utf_class() in mbyte.c.
3070  */
3071     static int
3072 spell_mb_isword_class(int cl, win_T *wp)
3073 {
3074     if (wp->w_s->b_cjk)
3075 	/* East Asian characters are not considered word characters. */
3076 	return cl == 2 || cl == 0x2800;
3077     return cl >= 2 && cl != 0x2070 && cl != 0x2080;
3078 }
3079 
3080 /*
3081  * Return TRUE if "p" points to a word character.
3082  * Wide version of spell_iswordp().
3083  */
3084     static int
3085 spell_iswordp_w(int *p, win_T *wp)
3086 {
3087     int		*s;
3088 
3089     if (*p < 256 ? wp->w_s->b_spell_ismw[*p]
3090 		 : (wp->w_s->b_spell_ismw_mb != NULL
3091 			     && vim_strchr(wp->w_s->b_spell_ismw_mb, *p) != NULL))
3092 	s = p + 1;
3093     else
3094 	s = p;
3095 
3096     if (*s > 255)
3097     {
3098 	if (enc_utf8)
3099 	    return spell_mb_isword_class(utf_class(*s), wp);
3100 	if (enc_dbcs)
3101 	    return spell_mb_isword_class(
3102 				dbcs_class((unsigned)*s >> 8, *s & 0xff), wp);
3103 	return 0;
3104     }
3105     return spelltab.st_isw[*s];
3106 }
3107 
3108 /*
3109  * Case-fold "str[len]" into "buf[buflen]".  The result is NUL terminated.
3110  * Uses the character definitions from the .spl file.
3111  * When using a multi-byte 'encoding' the length may change!
3112  * Returns FAIL when something wrong.
3113  */
3114     int
3115 spell_casefold(
3116     char_u	*str,
3117     int		len,
3118     char_u	*buf,
3119     int		buflen)
3120 {
3121     int		i;
3122 
3123     if (len >= buflen)
3124     {
3125 	buf[0] = NUL;
3126 	return FAIL;		/* result will not fit */
3127     }
3128 
3129     if (has_mbyte)
3130     {
3131 	int	outi = 0;
3132 	char_u	*p;
3133 	int	c;
3134 
3135 	/* Fold one character at a time. */
3136 	for (p = str; p < str + len; )
3137 	{
3138 	    if (outi + MB_MAXBYTES > buflen)
3139 	    {
3140 		buf[outi] = NUL;
3141 		return FAIL;
3142 	    }
3143 	    c = mb_cptr2char_adv(&p);
3144 	    outi += mb_char2bytes(SPELL_TOFOLD(c), buf + outi);
3145 	}
3146 	buf[outi] = NUL;
3147     }
3148     else
3149     {
3150 	/* Be quick for non-multibyte encodings. */
3151 	for (i = 0; i < len; ++i)
3152 	    buf[i] = spelltab.st_fold[str[i]];
3153 	buf[i] = NUL;
3154     }
3155 
3156     return OK;
3157 }
3158 
3159 /* values for sps_flags */
3160 #define SPS_BEST    1
3161 #define SPS_FAST    2
3162 #define SPS_DOUBLE  4
3163 
3164 static int sps_flags = SPS_BEST;	/* flags from 'spellsuggest' */
3165 static int sps_limit = 9999;		/* max nr of suggestions given */
3166 
3167 /*
3168  * Check the 'spellsuggest' option.  Return FAIL if it's wrong.
3169  * Sets "sps_flags" and "sps_limit".
3170  */
3171     int
3172 spell_check_sps(void)
3173 {
3174     char_u	*p;
3175     char_u	*s;
3176     char_u	buf[MAXPATHL];
3177     int		f;
3178 
3179     sps_flags = 0;
3180     sps_limit = 9999;
3181 
3182     for (p = p_sps; *p != NUL; )
3183     {
3184 	copy_option_part(&p, buf, MAXPATHL, ",");
3185 
3186 	f = 0;
3187 	if (VIM_ISDIGIT(*buf))
3188 	{
3189 	    s = buf;
3190 	    sps_limit = getdigits(&s);
3191 	    if (*s != NUL && !VIM_ISDIGIT(*s))
3192 		f = -1;
3193 	}
3194 	else if (STRCMP(buf, "best") == 0)
3195 	    f = SPS_BEST;
3196 	else if (STRCMP(buf, "fast") == 0)
3197 	    f = SPS_FAST;
3198 	else if (STRCMP(buf, "double") == 0)
3199 	    f = SPS_DOUBLE;
3200 	else if (STRNCMP(buf, "expr:", 5) != 0
3201 		&& STRNCMP(buf, "file:", 5) != 0)
3202 	    f = -1;
3203 
3204 	if (f == -1 || (sps_flags != 0 && f != 0))
3205 	{
3206 	    sps_flags = SPS_BEST;
3207 	    sps_limit = 9999;
3208 	    return FAIL;
3209 	}
3210 	if (f != 0)
3211 	    sps_flags = f;
3212     }
3213 
3214     if (sps_flags == 0)
3215 	sps_flags = SPS_BEST;
3216 
3217     return OK;
3218 }
3219 
3220 /*
3221  * "z=": Find badly spelled word under or after the cursor.
3222  * Give suggestions for the properly spelled word.
3223  * In Visual mode use the highlighted word as the bad word.
3224  * When "count" is non-zero use that suggestion.
3225  */
3226     void
3227 spell_suggest(int count)
3228 {
3229     char_u	*line;
3230     pos_T	prev_cursor = curwin->w_cursor;
3231     char_u	wcopy[MAXWLEN + 2];
3232     char_u	*p;
3233     int		i;
3234     int		c;
3235     suginfo_T	sug;
3236     suggest_T	*stp;
3237     int		mouse_used;
3238     int		need_cap;
3239     int		limit;
3240     int		selected = count;
3241     int		badlen = 0;
3242     int		msg_scroll_save = msg_scroll;
3243 
3244     if (no_spell_checking(curwin))
3245 	return;
3246 
3247     if (VIsual_active)
3248     {
3249 	/* Use the Visually selected text as the bad word.  But reject
3250 	 * a multi-line selection. */
3251 	if (curwin->w_cursor.lnum != VIsual.lnum)
3252 	{
3253 	    vim_beep(BO_SPELL);
3254 	    return;
3255 	}
3256 	badlen = (int)curwin->w_cursor.col - (int)VIsual.col;
3257 	if (badlen < 0)
3258 	    badlen = -badlen;
3259 	else
3260 	    curwin->w_cursor.col = VIsual.col;
3261 	++badlen;
3262 	end_visual_mode();
3263     }
3264     /* Find the start of the badly spelled word. */
3265     else if (spell_move_to(curwin, FORWARD, TRUE, TRUE, NULL) == 0
3266 	    || curwin->w_cursor.col > prev_cursor.col)
3267     {
3268 	/* No bad word or it starts after the cursor: use the word under the
3269 	 * cursor. */
3270 	curwin->w_cursor = prev_cursor;
3271 	line = ml_get_curline();
3272 	p = line + curwin->w_cursor.col;
3273 	/* Backup to before start of word. */
3274 	while (p > line && spell_iswordp_nmw(p, curwin))
3275 	    MB_PTR_BACK(line, p);
3276 	/* Forward to start of word. */
3277 	while (*p != NUL && !spell_iswordp_nmw(p, curwin))
3278 	    MB_PTR_ADV(p);
3279 
3280 	if (!spell_iswordp_nmw(p, curwin))		/* No word found. */
3281 	{
3282 	    beep_flush();
3283 	    return;
3284 	}
3285 	curwin->w_cursor.col = (colnr_T)(p - line);
3286     }
3287 
3288     /* Get the word and its length. */
3289 
3290     /* Figure out if the word should be capitalised. */
3291     need_cap = check_need_cap(curwin->w_cursor.lnum, curwin->w_cursor.col);
3292 
3293     /* Make a copy of current line since autocommands may free the line. */
3294     line = vim_strsave(ml_get_curline());
3295     if (line == NULL)
3296 	goto skip;
3297 
3298     /* Get the list of suggestions.  Limit to 'lines' - 2 or the number in
3299      * 'spellsuggest', whatever is smaller. */
3300     if (sps_limit > (int)Rows - 2)
3301 	limit = (int)Rows - 2;
3302     else
3303 	limit = sps_limit;
3304     spell_find_suggest(line + curwin->w_cursor.col, badlen, &sug, limit,
3305 							TRUE, need_cap, TRUE);
3306 
3307     if (sug.su_ga.ga_len == 0)
3308 	msg(_("Sorry, no suggestions"));
3309     else if (count > 0)
3310     {
3311 	if (count > sug.su_ga.ga_len)
3312 	    smsg(_("Sorry, only %ld suggestions"),
3313 						      (long)sug.su_ga.ga_len);
3314     }
3315     else
3316     {
3317 	VIM_CLEAR(repl_from);
3318 	VIM_CLEAR(repl_to);
3319 
3320 #ifdef FEAT_RIGHTLEFT
3321 	/* When 'rightleft' is set the list is drawn right-left. */
3322 	cmdmsg_rl = curwin->w_p_rl;
3323 	if (cmdmsg_rl)
3324 	    msg_col = Columns - 1;
3325 #endif
3326 
3327 	/* List the suggestions. */
3328 	msg_start();
3329 	msg_row = Rows - 1;	/* for when 'cmdheight' > 1 */
3330 	lines_left = Rows;	/* avoid more prompt */
3331 	vim_snprintf((char *)IObuff, IOSIZE, _("Change \"%.*s\" to:"),
3332 						sug.su_badlen, sug.su_badptr);
3333 #ifdef FEAT_RIGHTLEFT
3334 	if (cmdmsg_rl && STRNCMP(IObuff, "Change", 6) == 0)
3335 	{
3336 	    /* And now the rabbit from the high hat: Avoid showing the
3337 	     * untranslated message rightleft. */
3338 	    vim_snprintf((char *)IObuff, IOSIZE, ":ot \"%.*s\" egnahC",
3339 						sug.su_badlen, sug.su_badptr);
3340 	}
3341 #endif
3342 	msg_puts((char *)IObuff);
3343 	msg_clr_eos();
3344 	msg_putchar('\n');
3345 
3346 	msg_scroll = TRUE;
3347 	for (i = 0; i < sug.su_ga.ga_len; ++i)
3348 	{
3349 	    stp = &SUG(sug.su_ga, i);
3350 
3351 	    /* The suggested word may replace only part of the bad word, add
3352 	     * the not replaced part. */
3353 	    vim_strncpy(wcopy, stp->st_word, MAXWLEN);
3354 	    if (sug.su_badlen > stp->st_orglen)
3355 		vim_strncpy(wcopy + stp->st_wordlen,
3356 					       sug.su_badptr + stp->st_orglen,
3357 					      sug.su_badlen - stp->st_orglen);
3358 	    vim_snprintf((char *)IObuff, IOSIZE, "%2d", i + 1);
3359 #ifdef FEAT_RIGHTLEFT
3360 	    if (cmdmsg_rl)
3361 		rl_mirror(IObuff);
3362 #endif
3363 	    msg_puts((char *)IObuff);
3364 
3365 	    vim_snprintf((char *)IObuff, IOSIZE, " \"%s\"", wcopy);
3366 	    msg_puts((char *)IObuff);
3367 
3368 	    /* The word may replace more than "su_badlen". */
3369 	    if (sug.su_badlen < stp->st_orglen)
3370 	    {
3371 		vim_snprintf((char *)IObuff, IOSIZE, _(" < \"%.*s\""),
3372 					       stp->st_orglen, sug.su_badptr);
3373 		msg_puts((char *)IObuff);
3374 	    }
3375 
3376 	    if (p_verbose > 0)
3377 	    {
3378 		/* Add the score. */
3379 		if (sps_flags & (SPS_DOUBLE | SPS_BEST))
3380 		    vim_snprintf((char *)IObuff, IOSIZE, " (%s%d - %d)",
3381 			stp->st_salscore ? "s " : "",
3382 			stp->st_score, stp->st_altscore);
3383 		else
3384 		    vim_snprintf((char *)IObuff, IOSIZE, " (%d)",
3385 			    stp->st_score);
3386 #ifdef FEAT_RIGHTLEFT
3387 		if (cmdmsg_rl)
3388 		    /* Mirror the numbers, but keep the leading space. */
3389 		    rl_mirror(IObuff + 1);
3390 #endif
3391 		msg_advance(30);
3392 		msg_puts((char *)IObuff);
3393 	    }
3394 	    msg_putchar('\n');
3395 	}
3396 
3397 #ifdef FEAT_RIGHTLEFT
3398 	cmdmsg_rl = FALSE;
3399 	msg_col = 0;
3400 #endif
3401 	/* Ask for choice. */
3402 	selected = prompt_for_number(&mouse_used);
3403 	if (mouse_used)
3404 	    selected -= lines_left;
3405 	lines_left = Rows;		/* avoid more prompt */
3406 	/* don't delay for 'smd' in normal_cmd() */
3407 	msg_scroll = msg_scroll_save;
3408     }
3409 
3410     if (selected > 0 && selected <= sug.su_ga.ga_len && u_save_cursor() == OK)
3411     {
3412 	/* Save the from and to text for :spellrepall. */
3413 	stp = &SUG(sug.su_ga, selected - 1);
3414 	if (sug.su_badlen > stp->st_orglen)
3415 	{
3416 	    /* Replacing less than "su_badlen", append the remainder to
3417 	     * repl_to. */
3418 	    repl_from = vim_strnsave(sug.su_badptr, sug.su_badlen);
3419 	    vim_snprintf((char *)IObuff, IOSIZE, "%s%.*s", stp->st_word,
3420 		    sug.su_badlen - stp->st_orglen,
3421 					      sug.su_badptr + stp->st_orglen);
3422 	    repl_to = vim_strsave(IObuff);
3423 	}
3424 	else
3425 	{
3426 	    /* Replacing su_badlen or more, use the whole word. */
3427 	    repl_from = vim_strnsave(sug.su_badptr, stp->st_orglen);
3428 	    repl_to = vim_strsave(stp->st_word);
3429 	}
3430 
3431 	/* Replace the word. */
3432 	p = alloc((unsigned)STRLEN(line) - stp->st_orglen
3433 						       + stp->st_wordlen + 1);
3434 	if (p != NULL)
3435 	{
3436 	    c = (int)(sug.su_badptr - line);
3437 	    mch_memmove(p, line, c);
3438 	    STRCPY(p + c, stp->st_word);
3439 	    STRCAT(p, sug.su_badptr + stp->st_orglen);
3440 	    ml_replace(curwin->w_cursor.lnum, p, FALSE);
3441 	    curwin->w_cursor.col = c;
3442 
3443 	    /* For redo we use a change-word command. */
3444 	    ResetRedobuff();
3445 	    AppendToRedobuff((char_u *)"ciw");
3446 	    AppendToRedobuffLit(p + c,
3447 			    stp->st_wordlen + sug.su_badlen - stp->st_orglen);
3448 	    AppendCharToRedobuff(ESC);
3449 
3450 	    /* After this "p" may be invalid. */
3451 	    changed_bytes(curwin->w_cursor.lnum, c);
3452 	}
3453     }
3454     else
3455 	curwin->w_cursor = prev_cursor;
3456 
3457     spell_find_cleanup(&sug);
3458 skip:
3459     vim_free(line);
3460 }
3461 
3462 /*
3463  * Check if the word at line "lnum" column "col" is required to start with a
3464  * capital.  This uses 'spellcapcheck' of the current buffer.
3465  */
3466     static int
3467 check_need_cap(linenr_T lnum, colnr_T col)
3468 {
3469     int		need_cap = FALSE;
3470     char_u	*line;
3471     char_u	*line_copy = NULL;
3472     char_u	*p;
3473     colnr_T	endcol;
3474     regmatch_T	regmatch;
3475 
3476     if (curwin->w_s->b_cap_prog == NULL)
3477 	return FALSE;
3478 
3479     line = ml_get_curline();
3480     endcol = 0;
3481     if (getwhitecols(line) >= (int)col)
3482     {
3483 	/* At start of line, check if previous line is empty or sentence
3484 	 * ends there. */
3485 	if (lnum == 1)
3486 	    need_cap = TRUE;
3487 	else
3488 	{
3489 	    line = ml_get(lnum - 1);
3490 	    if (*skipwhite(line) == NUL)
3491 		need_cap = TRUE;
3492 	    else
3493 	    {
3494 		/* Append a space in place of the line break. */
3495 		line_copy = concat_str(line, (char_u *)" ");
3496 		line = line_copy;
3497 		endcol = (colnr_T)STRLEN(line);
3498 	    }
3499 	}
3500     }
3501     else
3502 	endcol = col;
3503 
3504     if (endcol > 0)
3505     {
3506 	/* Check if sentence ends before the bad word. */
3507 	regmatch.regprog = curwin->w_s->b_cap_prog;
3508 	regmatch.rm_ic = FALSE;
3509 	p = line + endcol;
3510 	for (;;)
3511 	{
3512 	    MB_PTR_BACK(line, p);
3513 	    if (p == line || spell_iswordp_nmw(p, curwin))
3514 		break;
3515 	    if (vim_regexec(&regmatch, p, 0)
3516 					 && regmatch.endp[0] == line + endcol)
3517 	    {
3518 		need_cap = TRUE;
3519 		break;
3520 	    }
3521 	}
3522 	curwin->w_s->b_cap_prog = regmatch.regprog;
3523     }
3524 
3525     vim_free(line_copy);
3526 
3527     return need_cap;
3528 }
3529 
3530 
3531 /*
3532  * ":spellrepall"
3533  */
3534     void
3535 ex_spellrepall(exarg_T *eap UNUSED)
3536 {
3537     pos_T	pos = curwin->w_cursor;
3538     char_u	*frompat;
3539     int		addlen;
3540     char_u	*line;
3541     char_u	*p;
3542     int		save_ws = p_ws;
3543     linenr_T	prev_lnum = 0;
3544 
3545     if (repl_from == NULL || repl_to == NULL)
3546     {
3547 	emsg(_("E752: No previous spell replacement"));
3548 	return;
3549     }
3550     addlen = (int)(STRLEN(repl_to) - STRLEN(repl_from));
3551 
3552     frompat = alloc((unsigned)STRLEN(repl_from) + 7);
3553     if (frompat == NULL)
3554 	return;
3555     sprintf((char *)frompat, "\\V\\<%s\\>", repl_from);
3556     p_ws = FALSE;
3557 
3558     sub_nsubs = 0;
3559     sub_nlines = 0;
3560     curwin->w_cursor.lnum = 0;
3561     while (!got_int)
3562     {
3563 	if (do_search(NULL, '/', frompat, 1L, SEARCH_KEEP, NULL, NULL) == 0
3564 						   || u_save_cursor() == FAIL)
3565 	    break;
3566 
3567 	/* Only replace when the right word isn't there yet.  This happens
3568 	 * when changing "etc" to "etc.". */
3569 	line = ml_get_curline();
3570 	if (addlen <= 0 || STRNCMP(line + curwin->w_cursor.col,
3571 					       repl_to, STRLEN(repl_to)) != 0)
3572 	{
3573 	    p = alloc((unsigned)STRLEN(line) + addlen + 1);
3574 	    if (p == NULL)
3575 		break;
3576 	    mch_memmove(p, line, curwin->w_cursor.col);
3577 	    STRCPY(p + curwin->w_cursor.col, repl_to);
3578 	    STRCAT(p, line + curwin->w_cursor.col + STRLEN(repl_from));
3579 	    ml_replace(curwin->w_cursor.lnum, p, FALSE);
3580 	    changed_bytes(curwin->w_cursor.lnum, curwin->w_cursor.col);
3581 
3582 	    if (curwin->w_cursor.lnum != prev_lnum)
3583 	    {
3584 		++sub_nlines;
3585 		prev_lnum = curwin->w_cursor.lnum;
3586 	    }
3587 	    ++sub_nsubs;
3588 	}
3589 	curwin->w_cursor.col += (colnr_T)STRLEN(repl_to);
3590     }
3591 
3592     p_ws = save_ws;
3593     curwin->w_cursor = pos;
3594     vim_free(frompat);
3595 
3596     if (sub_nsubs == 0)
3597 	semsg(_("E753: Not found: %s"), repl_from);
3598     else
3599 	do_sub_msg(FALSE);
3600 }
3601 
3602 /*
3603  * Find spell suggestions for "word".  Return them in the growarray "*gap" as
3604  * a list of allocated strings.
3605  */
3606     void
3607 spell_suggest_list(
3608     garray_T	*gap,
3609     char_u	*word,
3610     int		maxcount,	/* maximum nr of suggestions */
3611     int		need_cap,	/* 'spellcapcheck' matched */
3612     int		interactive)
3613 {
3614     suginfo_T	sug;
3615     int		i;
3616     suggest_T	*stp;
3617     char_u	*wcopy;
3618 
3619     spell_find_suggest(word, 0, &sug, maxcount, FALSE, need_cap, interactive);
3620 
3621     /* Make room in "gap". */
3622     ga_init2(gap, sizeof(char_u *), sug.su_ga.ga_len + 1);
3623     if (ga_grow(gap, sug.su_ga.ga_len) == OK)
3624     {
3625 	for (i = 0; i < sug.su_ga.ga_len; ++i)
3626 	{
3627 	    stp = &SUG(sug.su_ga, i);
3628 
3629 	    /* The suggested word may replace only part of "word", add the not
3630 	     * replaced part. */
3631 	    wcopy = alloc(stp->st_wordlen
3632 		      + (unsigned)STRLEN(sug.su_badptr + stp->st_orglen) + 1);
3633 	    if (wcopy == NULL)
3634 		break;
3635 	    STRCPY(wcopy, stp->st_word);
3636 	    STRCPY(wcopy + stp->st_wordlen, sug.su_badptr + stp->st_orglen);
3637 	    ((char_u **)gap->ga_data)[gap->ga_len++] = wcopy;
3638 	}
3639     }
3640 
3641     spell_find_cleanup(&sug);
3642 }
3643 
3644 /*
3645  * Find spell suggestions for the word at the start of "badptr".
3646  * Return the suggestions in "su->su_ga".
3647  * The maximum number of suggestions is "maxcount".
3648  * Note: does use info for the current window.
3649  * This is based on the mechanisms of Aspell, but completely reimplemented.
3650  */
3651     static void
3652 spell_find_suggest(
3653     char_u	*badptr,
3654     int		badlen,		/* length of bad word or 0 if unknown */
3655     suginfo_T	*su,
3656     int		maxcount,
3657     int		banbadword,	/* don't include badword in suggestions */
3658     int		need_cap,	/* word should start with capital */
3659     int		interactive)
3660 {
3661     hlf_T	attr = HLF_COUNT;
3662     char_u	buf[MAXPATHL];
3663     char_u	*p;
3664     int		do_combine = FALSE;
3665     char_u	*sps_copy;
3666 #ifdef FEAT_EVAL
3667     static int	expr_busy = FALSE;
3668 #endif
3669     int		c;
3670     int		i;
3671     langp_T	*lp;
3672 
3673     /*
3674      * Set the info in "*su".
3675      */
3676     vim_memset(su, 0, sizeof(suginfo_T));
3677     ga_init2(&su->su_ga, (int)sizeof(suggest_T), 10);
3678     ga_init2(&su->su_sga, (int)sizeof(suggest_T), 10);
3679     if (*badptr == NUL)
3680 	return;
3681     hash_init(&su->su_banned);
3682 
3683     su->su_badptr = badptr;
3684     if (badlen != 0)
3685 	su->su_badlen = badlen;
3686     else
3687 	su->su_badlen = spell_check(curwin, su->su_badptr, &attr, NULL, FALSE);
3688     su->su_maxcount = maxcount;
3689     su->su_maxscore = SCORE_MAXINIT;
3690 
3691     if (su->su_badlen >= MAXWLEN)
3692 	su->su_badlen = MAXWLEN - 1;	/* just in case */
3693     vim_strncpy(su->su_badword, su->su_badptr, su->su_badlen);
3694     (void)spell_casefold(su->su_badptr, su->su_badlen,
3695 						    su->su_fbadword, MAXWLEN);
3696     /* TODO: make this work if the case-folded text is longer than the original
3697      * text. Currently an illegal byte causes wrong pointer computations. */
3698     su->su_fbadword[su->su_badlen] = NUL;
3699 
3700     /* get caps flags for bad word */
3701     su->su_badflags = badword_captype(su->su_badptr,
3702 					       su->su_badptr + su->su_badlen);
3703     if (need_cap)
3704 	su->su_badflags |= WF_ONECAP;
3705 
3706     /* Find the default language for sound folding.  We simply use the first
3707      * one in 'spelllang' that supports sound folding.  That's good for when
3708      * using multiple files for one language, it's not that bad when mixing
3709      * languages (e.g., "pl,en"). */
3710     for (i = 0; i < curbuf->b_s.b_langp.ga_len; ++i)
3711     {
3712 	lp = LANGP_ENTRY(curbuf->b_s.b_langp, i);
3713 	if (lp->lp_sallang != NULL)
3714 	{
3715 	    su->su_sallang = lp->lp_sallang;
3716 	    break;
3717 	}
3718     }
3719 
3720     /* Soundfold the bad word with the default sound folding, so that we don't
3721      * have to do this many times. */
3722     if (su->su_sallang != NULL)
3723 	spell_soundfold(su->su_sallang, su->su_fbadword, TRUE,
3724 							  su->su_sal_badword);
3725 
3726     /* If the word is not capitalised and spell_check() doesn't consider the
3727      * word to be bad then it might need to be capitalised.  Add a suggestion
3728      * for that. */
3729     c = PTR2CHAR(su->su_badptr);
3730     if (!SPELL_ISUPPER(c) && attr == HLF_COUNT)
3731     {
3732 	make_case_word(su->su_badword, buf, WF_ONECAP);
3733 	add_suggestion(su, &su->su_ga, buf, su->su_badlen, SCORE_ICASE,
3734 					      0, TRUE, su->su_sallang, FALSE);
3735     }
3736 
3737     /* Ban the bad word itself.  It may appear in another region. */
3738     if (banbadword)
3739 	add_banned(su, su->su_badword);
3740 
3741     /* Make a copy of 'spellsuggest', because the expression may change it. */
3742     sps_copy = vim_strsave(p_sps);
3743     if (sps_copy == NULL)
3744 	return;
3745 
3746     /* Loop over the items in 'spellsuggest'. */
3747     for (p = sps_copy; *p != NUL; )
3748     {
3749 	copy_option_part(&p, buf, MAXPATHL, ",");
3750 
3751 	if (STRNCMP(buf, "expr:", 5) == 0)
3752 	{
3753 #ifdef FEAT_EVAL
3754 	    /* Evaluate an expression.  Skip this when called recursively,
3755 	     * when using spellsuggest() in the expression. */
3756 	    if (!expr_busy)
3757 	    {
3758 		expr_busy = TRUE;
3759 		spell_suggest_expr(su, buf + 5);
3760 		expr_busy = FALSE;
3761 	    }
3762 #endif
3763 	}
3764 	else if (STRNCMP(buf, "file:", 5) == 0)
3765 	    /* Use list of suggestions in a file. */
3766 	    spell_suggest_file(su, buf + 5);
3767 	else
3768 	{
3769 	    /* Use internal method. */
3770 	    spell_suggest_intern(su, interactive);
3771 	    if (sps_flags & SPS_DOUBLE)
3772 		do_combine = TRUE;
3773 	}
3774     }
3775 
3776     vim_free(sps_copy);
3777 
3778     if (do_combine)
3779 	/* Combine the two list of suggestions.  This must be done last,
3780 	 * because sorting changes the order again. */
3781 	score_combine(su);
3782 }
3783 
3784 #ifdef FEAT_EVAL
3785 /*
3786  * Find suggestions by evaluating expression "expr".
3787  */
3788     static void
3789 spell_suggest_expr(suginfo_T *su, char_u *expr)
3790 {
3791     list_T	*list;
3792     listitem_T	*li;
3793     int		score;
3794     char_u	*p;
3795 
3796     /* The work is split up in a few parts to avoid having to export
3797      * suginfo_T.
3798      * First evaluate the expression and get the resulting list. */
3799     list = eval_spell_expr(su->su_badword, expr);
3800     if (list != NULL)
3801     {
3802 	/* Loop over the items in the list. */
3803 	for (li = list->lv_first; li != NULL; li = li->li_next)
3804 	    if (li->li_tv.v_type == VAR_LIST)
3805 	    {
3806 		/* Get the word and the score from the items. */
3807 		score = get_spellword(li->li_tv.vval.v_list, &p);
3808 		if (score >= 0 && score <= su->su_maxscore)
3809 		    add_suggestion(su, &su->su_ga, p, su->su_badlen,
3810 				       score, 0, TRUE, su->su_sallang, FALSE);
3811 	    }
3812 	list_unref(list);
3813     }
3814 
3815     /* Remove bogus suggestions, sort and truncate at "maxcount". */
3816     check_suggestions(su, &su->su_ga);
3817     (void)cleanup_suggestions(&su->su_ga, su->su_maxscore, su->su_maxcount);
3818 }
3819 #endif
3820 
3821 /*
3822  * Find suggestions in file "fname".  Used for "file:" in 'spellsuggest'.
3823  */
3824     static void
3825 spell_suggest_file(suginfo_T *su, char_u *fname)
3826 {
3827     FILE	*fd;
3828     char_u	line[MAXWLEN * 2];
3829     char_u	*p;
3830     int		len;
3831     char_u	cword[MAXWLEN];
3832 
3833     /* Open the file. */
3834     fd = mch_fopen((char *)fname, "r");
3835     if (fd == NULL)
3836     {
3837 	semsg(_(e_notopen), fname);
3838 	return;
3839     }
3840 
3841     /* Read it line by line. */
3842     while (!vim_fgets(line, MAXWLEN * 2, fd) && !got_int)
3843     {
3844 	line_breakcheck();
3845 
3846 	p = vim_strchr(line, '/');
3847 	if (p == NULL)
3848 	    continue;	    /* No Tab found, just skip the line. */
3849 	*p++ = NUL;
3850 	if (STRICMP(su->su_badword, line) == 0)
3851 	{
3852 	    /* Match!  Isolate the good word, until CR or NL. */
3853 	    for (len = 0; p[len] >= ' '; ++len)
3854 		;
3855 	    p[len] = NUL;
3856 
3857 	    /* If the suggestion doesn't have specific case duplicate the case
3858 	     * of the bad word. */
3859 	    if (captype(p, NULL) == 0)
3860 	    {
3861 		make_case_word(p, cword, su->su_badflags);
3862 		p = cword;
3863 	    }
3864 
3865 	    add_suggestion(su, &su->su_ga, p, su->su_badlen,
3866 				  SCORE_FILE, 0, TRUE, su->su_sallang, FALSE);
3867 	}
3868     }
3869 
3870     fclose(fd);
3871 
3872     /* Remove bogus suggestions, sort and truncate at "maxcount". */
3873     check_suggestions(su, &su->su_ga);
3874     (void)cleanup_suggestions(&su->su_ga, su->su_maxscore, su->su_maxcount);
3875 }
3876 
3877 /*
3878  * Find suggestions for the internal method indicated by "sps_flags".
3879  */
3880     static void
3881 spell_suggest_intern(suginfo_T *su, int interactive)
3882 {
3883     /*
3884      * Load the .sug file(s) that are available and not done yet.
3885      */
3886     suggest_load_files();
3887 
3888     /*
3889      * 1. Try special cases, such as repeating a word: "the the" -> "the".
3890      *
3891      * Set a maximum score to limit the combination of operations that is
3892      * tried.
3893      */
3894     suggest_try_special(su);
3895 
3896     /*
3897      * 2. Try inserting/deleting/swapping/changing a letter, use REP entries
3898      *    from the .aff file and inserting a space (split the word).
3899      */
3900     suggest_try_change(su);
3901 
3902     /* For the resulting top-scorers compute the sound-a-like score. */
3903     if (sps_flags & SPS_DOUBLE)
3904 	score_comp_sal(su);
3905 
3906     /*
3907      * 3. Try finding sound-a-like words.
3908      */
3909     if ((sps_flags & SPS_FAST) == 0)
3910     {
3911 	if (sps_flags & SPS_BEST)
3912 	    /* Adjust the word score for the suggestions found so far for how
3913 	     * they sounds like. */
3914 	    rescore_suggestions(su);
3915 
3916 	/*
3917 	 * While going through the soundfold tree "su_maxscore" is the score
3918 	 * for the soundfold word, limits the changes that are being tried,
3919 	 * and "su_sfmaxscore" the rescored score, which is set by
3920 	 * cleanup_suggestions().
3921 	 * First find words with a small edit distance, because this is much
3922 	 * faster and often already finds the top-N suggestions.  If we didn't
3923 	 * find many suggestions try again with a higher edit distance.
3924 	 * "sl_sounddone" is used to avoid doing the same word twice.
3925 	 */
3926 	suggest_try_soundalike_prep();
3927 	su->su_maxscore = SCORE_SFMAX1;
3928 	su->su_sfmaxscore = SCORE_MAXINIT * 3;
3929 	suggest_try_soundalike(su);
3930 	if (su->su_ga.ga_len < SUG_CLEAN_COUNT(su))
3931 	{
3932 	    /* We didn't find enough matches, try again, allowing more
3933 	     * changes to the soundfold word. */
3934 	    su->su_maxscore = SCORE_SFMAX2;
3935 	    suggest_try_soundalike(su);
3936 	    if (su->su_ga.ga_len < SUG_CLEAN_COUNT(su))
3937 	    {
3938 		/* Still didn't find enough matches, try again, allowing even
3939 		 * more changes to the soundfold word. */
3940 		su->su_maxscore = SCORE_SFMAX3;
3941 		suggest_try_soundalike(su);
3942 	    }
3943 	}
3944 	su->su_maxscore = su->su_sfmaxscore;
3945 	suggest_try_soundalike_finish();
3946     }
3947 
3948     /* When CTRL-C was hit while searching do show the results.  Only clear
3949      * got_int when using a command, not for spellsuggest(). */
3950     ui_breakcheck();
3951     if (interactive && got_int)
3952     {
3953 	(void)vgetc();
3954 	got_int = FALSE;
3955     }
3956 
3957     if ((sps_flags & SPS_DOUBLE) == 0 && su->su_ga.ga_len != 0)
3958     {
3959 	if (sps_flags & SPS_BEST)
3960 	    /* Adjust the word score for how it sounds like. */
3961 	    rescore_suggestions(su);
3962 
3963 	/* Remove bogus suggestions, sort and truncate at "maxcount". */
3964 	check_suggestions(su, &su->su_ga);
3965 	(void)cleanup_suggestions(&su->su_ga, su->su_maxscore, su->su_maxcount);
3966     }
3967 }
3968 
3969 /*
3970  * Free the info put in "*su" by spell_find_suggest().
3971  */
3972     static void
3973 spell_find_cleanup(suginfo_T *su)
3974 {
3975     int		i;
3976 
3977     /* Free the suggestions. */
3978     for (i = 0; i < su->su_ga.ga_len; ++i)
3979 	vim_free(SUG(su->su_ga, i).st_word);
3980     ga_clear(&su->su_ga);
3981     for (i = 0; i < su->su_sga.ga_len; ++i)
3982 	vim_free(SUG(su->su_sga, i).st_word);
3983     ga_clear(&su->su_sga);
3984 
3985     /* Free the banned words. */
3986     hash_clear_all(&su->su_banned, 0);
3987 }
3988 
3989 /*
3990  * Make a copy of "word", with the first letter upper or lower cased, to
3991  * "wcopy[MAXWLEN]".  "word" must not be empty.
3992  * The result is NUL terminated.
3993  */
3994     void
3995 onecap_copy(
3996     char_u	*word,
3997     char_u	*wcopy,
3998     int		upper)	    /* TRUE: first letter made upper case */
3999 {
4000     char_u	*p;
4001     int		c;
4002     int		l;
4003 
4004     p = word;
4005     if (has_mbyte)
4006 	c = mb_cptr2char_adv(&p);
4007     else
4008 	c = *p++;
4009     if (upper)
4010 	c = SPELL_TOUPPER(c);
4011     else
4012 	c = SPELL_TOFOLD(c);
4013     if (has_mbyte)
4014 	l = mb_char2bytes(c, wcopy);
4015     else
4016     {
4017 	l = 1;
4018 	wcopy[0] = c;
4019     }
4020     vim_strncpy(wcopy + l, p, MAXWLEN - l - 1);
4021 }
4022 
4023 /*
4024  * Make a copy of "word" with all the letters upper cased into
4025  * "wcopy[MAXWLEN]".  The result is NUL terminated.
4026  */
4027     static void
4028 allcap_copy(char_u *word, char_u *wcopy)
4029 {
4030     char_u	*s;
4031     char_u	*d;
4032     int		c;
4033 
4034     d = wcopy;
4035     for (s = word; *s != NUL; )
4036     {
4037 	if (has_mbyte)
4038 	    c = mb_cptr2char_adv(&s);
4039 	else
4040 	    c = *s++;
4041 
4042 	/* We only change 0xdf to SS when we are certain latin1 is used.  It
4043 	 * would cause weird errors in other 8-bit encodings. */
4044 	if (enc_latin1like && c == 0xdf)
4045 	{
4046 	    c = 'S';
4047 	    if (d - wcopy >= MAXWLEN - 1)
4048 		break;
4049 	    *d++ = c;
4050 	}
4051 	else
4052 	    c = SPELL_TOUPPER(c);
4053 
4054 	if (has_mbyte)
4055 	{
4056 	    if (d - wcopy >= MAXWLEN - MB_MAXBYTES)
4057 		break;
4058 	    d += mb_char2bytes(c, d);
4059 	}
4060 	else
4061 	{
4062 	    if (d - wcopy >= MAXWLEN - 1)
4063 		break;
4064 	    *d++ = c;
4065 	}
4066     }
4067     *d = NUL;
4068 }
4069 
4070 /*
4071  * Try finding suggestions by recognizing specific situations.
4072  */
4073     static void
4074 suggest_try_special(suginfo_T *su)
4075 {
4076     char_u	*p;
4077     size_t	len;
4078     int		c;
4079     char_u	word[MAXWLEN];
4080 
4081     /*
4082      * Recognize a word that is repeated: "the the".
4083      */
4084     p = skiptowhite(su->su_fbadword);
4085     len = p - su->su_fbadword;
4086     p = skipwhite(p);
4087     if (STRLEN(p) == len && STRNCMP(su->su_fbadword, p, len) == 0)
4088     {
4089 	/* Include badflags: if the badword is onecap or allcap
4090 	 * use that for the goodword too: "The the" -> "The". */
4091 	c = su->su_fbadword[len];
4092 	su->su_fbadword[len] = NUL;
4093 	make_case_word(su->su_fbadword, word, su->su_badflags);
4094 	su->su_fbadword[len] = c;
4095 
4096 	/* Give a soundalike score of 0, compute the score as if deleting one
4097 	 * character. */
4098 	add_suggestion(su, &su->su_ga, word, su->su_badlen,
4099 		       RESCORE(SCORE_REP, 0), 0, TRUE, su->su_sallang, FALSE);
4100     }
4101 }
4102 
4103 /*
4104  * Change the 0 to 1 to measure how much time is spent in each state.
4105  * Output is dumped in "suggestprof".
4106  */
4107 #if 0
4108 # define SUGGEST_PROFILE
4109 proftime_T current;
4110 proftime_T total;
4111 proftime_T times[STATE_FINAL + 1];
4112 long counts[STATE_FINAL + 1];
4113 
4114     static void
4115 prof_init(void)
4116 {
4117     for (int i = 0; i <= STATE_FINAL; ++i)
4118     {
4119 	profile_zero(&times[i]);
4120 	counts[i] = 0;
4121     }
4122     profile_start(&current);
4123     profile_start(&total);
4124 }
4125 
4126 /* call before changing state */
4127     static void
4128 prof_store(state_T state)
4129 {
4130     profile_end(&current);
4131     profile_add(&times[state], &current);
4132     ++counts[state];
4133     profile_start(&current);
4134 }
4135 # define PROF_STORE(state) prof_store(state);
4136 
4137     static void
4138 prof_report(char *name)
4139 {
4140     FILE *fd = fopen("suggestprof", "a");
4141 
4142     profile_end(&total);
4143     fprintf(fd, "-----------------------\n");
4144     fprintf(fd, "%s: %s\n", name, profile_msg(&total));
4145     for (int i = 0; i <= STATE_FINAL; ++i)
4146 	fprintf(fd, "%d: %s (%ld)\n", i, profile_msg(&times[i]), counts[i]);
4147     fclose(fd);
4148 }
4149 #else
4150 # define PROF_STORE(state)
4151 #endif
4152 
4153 /*
4154  * Try finding suggestions by adding/removing/swapping letters.
4155  */
4156     static void
4157 suggest_try_change(suginfo_T *su)
4158 {
4159     char_u	fword[MAXWLEN];	    /* copy of the bad word, case-folded */
4160     int		n;
4161     char_u	*p;
4162     int		lpi;
4163     langp_T	*lp;
4164 
4165     /* We make a copy of the case-folded bad word, so that we can modify it
4166      * to find matches (esp. REP items).  Append some more text, changing
4167      * chars after the bad word may help. */
4168     STRCPY(fword, su->su_fbadword);
4169     n = (int)STRLEN(fword);
4170     p = su->su_badptr + su->su_badlen;
4171     (void)spell_casefold(p, (int)STRLEN(p), fword + n, MAXWLEN - n);
4172 
4173     for (lpi = 0; lpi < curwin->w_s->b_langp.ga_len; ++lpi)
4174     {
4175 	lp = LANGP_ENTRY(curwin->w_s->b_langp, lpi);
4176 
4177 	/* If reloading a spell file fails it's still in the list but
4178 	 * everything has been cleared. */
4179 	if (lp->lp_slang->sl_fbyts == NULL)
4180 	    continue;
4181 
4182 	/* Try it for this language.  Will add possible suggestions. */
4183 #ifdef SUGGEST_PROFILE
4184 	prof_init();
4185 #endif
4186 	suggest_trie_walk(su, lp, fword, FALSE);
4187 #ifdef SUGGEST_PROFILE
4188 	prof_report("try_change");
4189 #endif
4190     }
4191 }
4192 
4193 /* Check the maximum score, if we go over it we won't try this change. */
4194 #define TRY_DEEPER(su, stack, depth, add) \
4195 		(stack[depth].ts_score + (add) < su->su_maxscore)
4196 
4197 /*
4198  * Try finding suggestions by adding/removing/swapping letters.
4199  *
4200  * This uses a state machine.  At each node in the tree we try various
4201  * operations.  When trying if an operation works "depth" is increased and the
4202  * stack[] is used to store info.  This allows combinations, thus insert one
4203  * character, replace one and delete another.  The number of changes is
4204  * limited by su->su_maxscore.
4205  *
4206  * After implementing this I noticed an article by Kemal Oflazer that
4207  * describes something similar: "Error-tolerant Finite State Recognition with
4208  * Applications to Morphological Analysis and Spelling Correction" (1996).
4209  * The implementation in the article is simplified and requires a stack of
4210  * unknown depth.  The implementation here only needs a stack depth equal to
4211  * the length of the word.
4212  *
4213  * This is also used for the sound-folded word, "soundfold" is TRUE then.
4214  * The mechanism is the same, but we find a match with a sound-folded word
4215  * that comes from one or more original words.  Each of these words may be
4216  * added, this is done by add_sound_suggest().
4217  * Don't use:
4218  *	the prefix tree or the keep-case tree
4219  *	"su->su_badlen"
4220  *	anything to do with upper and lower case
4221  *	anything to do with word or non-word characters ("spell_iswordp()")
4222  *	banned words
4223  *	word flags (rare, region, compounding)
4224  *	word splitting for now
4225  *	"similar_chars()"
4226  *	use "slang->sl_repsal" instead of "lp->lp_replang->sl_rep"
4227  */
4228     static void
4229 suggest_trie_walk(
4230     suginfo_T	*su,
4231     langp_T	*lp,
4232     char_u	*fword,
4233     int		soundfold)
4234 {
4235     char_u	tword[MAXWLEN];	    /* good word collected so far */
4236     trystate_T	stack[MAXWLEN];
4237     char_u	preword[MAXWLEN * 3]; /* word found with proper case;
4238 				       * concatenation of prefix compound
4239 				       * words and split word.  NUL terminated
4240 				       * when going deeper but not when coming
4241 				       * back. */
4242     char_u	compflags[MAXWLEN];	/* compound flags, one for each word */
4243     trystate_T	*sp;
4244     int		newscore;
4245     int		score;
4246     char_u	*byts, *fbyts, *pbyts;
4247     idx_T	*idxs, *fidxs, *pidxs;
4248     int		depth;
4249     int		c, c2, c3;
4250     int		n = 0;
4251     int		flags;
4252     garray_T	*gap;
4253     idx_T	arridx;
4254     int		len;
4255     char_u	*p;
4256     fromto_T	*ftp;
4257     int		fl = 0, tl;
4258     int		repextra = 0;	    /* extra bytes in fword[] from REP item */
4259     slang_T	*slang = lp->lp_slang;
4260     int		fword_ends;
4261     int		goodword_ends;
4262 #ifdef DEBUG_TRIEWALK
4263     /* Stores the name of the change made at each level. */
4264     char_u	changename[MAXWLEN][80];
4265 #endif
4266     int		breakcheckcount = 1000;
4267     int		compound_ok;
4268 
4269     /*
4270      * Go through the whole case-fold tree, try changes at each node.
4271      * "tword[]" contains the word collected from nodes in the tree.
4272      * "fword[]" the word we are trying to match with (initially the bad
4273      * word).
4274      */
4275     depth = 0;
4276     sp = &stack[0];
4277     vim_memset(sp, 0, sizeof(trystate_T));
4278     sp->ts_curi = 1;
4279 
4280     if (soundfold)
4281     {
4282 	/* Going through the soundfold tree. */
4283 	byts = fbyts = slang->sl_sbyts;
4284 	idxs = fidxs = slang->sl_sidxs;
4285 	pbyts = NULL;
4286 	pidxs = NULL;
4287 	sp->ts_prefixdepth = PFD_NOPREFIX;
4288 	sp->ts_state = STATE_START;
4289     }
4290     else
4291     {
4292 	/*
4293 	 * When there are postponed prefixes we need to use these first.  At
4294 	 * the end of the prefix we continue in the case-fold tree.
4295 	 */
4296 	fbyts = slang->sl_fbyts;
4297 	fidxs = slang->sl_fidxs;
4298 	pbyts = slang->sl_pbyts;
4299 	pidxs = slang->sl_pidxs;
4300 	if (pbyts != NULL)
4301 	{
4302 	    byts = pbyts;
4303 	    idxs = pidxs;
4304 	    sp->ts_prefixdepth = PFD_PREFIXTREE;
4305 	    sp->ts_state = STATE_NOPREFIX;	/* try without prefix first */
4306 	}
4307 	else
4308 	{
4309 	    byts = fbyts;
4310 	    idxs = fidxs;
4311 	    sp->ts_prefixdepth = PFD_NOPREFIX;
4312 	    sp->ts_state = STATE_START;
4313 	}
4314     }
4315 
4316     /*
4317      * Loop to find all suggestions.  At each round we either:
4318      * - For the current state try one operation, advance "ts_curi",
4319      *   increase "depth".
4320      * - When a state is done go to the next, set "ts_state".
4321      * - When all states are tried decrease "depth".
4322      */
4323     while (depth >= 0 && !got_int)
4324     {
4325 	sp = &stack[depth];
4326 	switch (sp->ts_state)
4327 	{
4328 	case STATE_START:
4329 	case STATE_NOPREFIX:
4330 	    /*
4331 	     * Start of node: Deal with NUL bytes, which means
4332 	     * tword[] may end here.
4333 	     */
4334 	    arridx = sp->ts_arridx;	    /* current node in the tree */
4335 	    len = byts[arridx];		    /* bytes in this node */
4336 	    arridx += sp->ts_curi;	    /* index of current byte */
4337 
4338 	    if (sp->ts_prefixdepth == PFD_PREFIXTREE)
4339 	    {
4340 		/* Skip over the NUL bytes, we use them later. */
4341 		for (n = 0; n < len && byts[arridx + n] == 0; ++n)
4342 		    ;
4343 		sp->ts_curi += n;
4344 
4345 		/* Always past NUL bytes now. */
4346 		n = (int)sp->ts_state;
4347 		PROF_STORE(sp->ts_state)
4348 		sp->ts_state = STATE_ENDNUL;
4349 		sp->ts_save_badflags = su->su_badflags;
4350 
4351 		/* At end of a prefix or at start of prefixtree: check for
4352 		 * following word. */
4353 		if (byts[arridx] == 0 || n == (int)STATE_NOPREFIX)
4354 		{
4355 		    /* Set su->su_badflags to the caps type at this position.
4356 		     * Use the caps type until here for the prefix itself. */
4357 		    if (has_mbyte)
4358 			n = nofold_len(fword, sp->ts_fidx, su->su_badptr);
4359 		    else
4360 			n = sp->ts_fidx;
4361 		    flags = badword_captype(su->su_badptr, su->su_badptr + n);
4362 		    su->su_badflags = badword_captype(su->su_badptr + n,
4363 					       su->su_badptr + su->su_badlen);
4364 #ifdef DEBUG_TRIEWALK
4365 		    sprintf(changename[depth], "prefix");
4366 #endif
4367 		    go_deeper(stack, depth, 0);
4368 		    ++depth;
4369 		    sp = &stack[depth];
4370 		    sp->ts_prefixdepth = depth - 1;
4371 		    byts = fbyts;
4372 		    idxs = fidxs;
4373 		    sp->ts_arridx = 0;
4374 
4375 		    /* Move the prefix to preword[] with the right case
4376 		     * and make find_keepcap_word() works. */
4377 		    tword[sp->ts_twordlen] = NUL;
4378 		    make_case_word(tword + sp->ts_splitoff,
4379 					  preword + sp->ts_prewordlen, flags);
4380 		    sp->ts_prewordlen = (char_u)STRLEN(preword);
4381 		    sp->ts_splitoff = sp->ts_twordlen;
4382 		}
4383 		break;
4384 	    }
4385 
4386 	    if (sp->ts_curi > len || byts[arridx] != 0)
4387 	    {
4388 		/* Past bytes in node and/or past NUL bytes. */
4389 		PROF_STORE(sp->ts_state)
4390 		sp->ts_state = STATE_ENDNUL;
4391 		sp->ts_save_badflags = su->su_badflags;
4392 		break;
4393 	    }
4394 
4395 	    /*
4396 	     * End of word in tree.
4397 	     */
4398 	    ++sp->ts_curi;		/* eat one NUL byte */
4399 
4400 	    flags = (int)idxs[arridx];
4401 
4402 	    /* Skip words with the NOSUGGEST flag. */
4403 	    if (flags & WF_NOSUGGEST)
4404 		break;
4405 
4406 	    fword_ends = (fword[sp->ts_fidx] == NUL
4407 			   || (soundfold
4408 			       ? VIM_ISWHITE(fword[sp->ts_fidx])
4409 			       : !spell_iswordp(fword + sp->ts_fidx, curwin)));
4410 	    tword[sp->ts_twordlen] = NUL;
4411 
4412 	    if (sp->ts_prefixdepth <= PFD_NOTSPECIAL
4413 					&& (sp->ts_flags & TSF_PREFIXOK) == 0)
4414 	    {
4415 		/* There was a prefix before the word.  Check that the prefix
4416 		 * can be used with this word. */
4417 		/* Count the length of the NULs in the prefix.  If there are
4418 		 * none this must be the first try without a prefix.  */
4419 		n = stack[sp->ts_prefixdepth].ts_arridx;
4420 		len = pbyts[n++];
4421 		for (c = 0; c < len && pbyts[n + c] == 0; ++c)
4422 		    ;
4423 		if (c > 0)
4424 		{
4425 		    c = valid_word_prefix(c, n, flags,
4426 				       tword + sp->ts_splitoff, slang, FALSE);
4427 		    if (c == 0)
4428 			break;
4429 
4430 		    /* Use the WF_RARE flag for a rare prefix. */
4431 		    if (c & WF_RAREPFX)
4432 			flags |= WF_RARE;
4433 
4434 		    /* Tricky: when checking for both prefix and compounding
4435 		     * we run into the prefix flag first.
4436 		     * Remember that it's OK, so that we accept the prefix
4437 		     * when arriving at a compound flag. */
4438 		    sp->ts_flags |= TSF_PREFIXOK;
4439 		}
4440 	    }
4441 
4442 	    /* Check NEEDCOMPOUND: can't use word without compounding.  Do try
4443 	     * appending another compound word below. */
4444 	    if (sp->ts_complen == sp->ts_compsplit && fword_ends
4445 						     && (flags & WF_NEEDCOMP))
4446 		goodword_ends = FALSE;
4447 	    else
4448 		goodword_ends = TRUE;
4449 
4450 	    p = NULL;
4451 	    compound_ok = TRUE;
4452 	    if (sp->ts_complen > sp->ts_compsplit)
4453 	    {
4454 		if (slang->sl_nobreak)
4455 		{
4456 		    /* There was a word before this word.  When there was no
4457 		     * change in this word (it was correct) add the first word
4458 		     * as a suggestion.  If this word was corrected too, we
4459 		     * need to check if a correct word follows. */
4460 		    if (sp->ts_fidx - sp->ts_splitfidx
4461 					  == sp->ts_twordlen - sp->ts_splitoff
4462 			    && STRNCMP(fword + sp->ts_splitfidx,
4463 					tword + sp->ts_splitoff,
4464 					 sp->ts_fidx - sp->ts_splitfidx) == 0)
4465 		    {
4466 			preword[sp->ts_prewordlen] = NUL;
4467 			newscore = score_wordcount_adj(slang, sp->ts_score,
4468 						 preword + sp->ts_prewordlen,
4469 						 sp->ts_prewordlen > 0);
4470 			/* Add the suggestion if the score isn't too bad. */
4471 			if (newscore <= su->su_maxscore)
4472 			    add_suggestion(su, &su->su_ga, preword,
4473 				    sp->ts_splitfidx - repextra,
4474 				    newscore, 0, FALSE,
4475 				    lp->lp_sallang, FALSE);
4476 			break;
4477 		    }
4478 		}
4479 		else
4480 		{
4481 		    /* There was a compound word before this word.  If this
4482 		     * word does not support compounding then give up
4483 		     * (splitting is tried for the word without compound
4484 		     * flag). */
4485 		    if (((unsigned)flags >> 24) == 0
4486 			    || sp->ts_twordlen - sp->ts_splitoff
4487 						       < slang->sl_compminlen)
4488 			break;
4489 		    /* For multi-byte chars check character length against
4490 		     * COMPOUNDMIN. */
4491 		    if (has_mbyte
4492 			    && slang->sl_compminlen > 0
4493 			    && mb_charlen(tword + sp->ts_splitoff)
4494 						       < slang->sl_compminlen)
4495 			break;
4496 
4497 		    compflags[sp->ts_complen] = ((unsigned)flags >> 24);
4498 		    compflags[sp->ts_complen + 1] = NUL;
4499 		    vim_strncpy(preword + sp->ts_prewordlen,
4500 			    tword + sp->ts_splitoff,
4501 			    sp->ts_twordlen - sp->ts_splitoff);
4502 
4503 		    /* Verify CHECKCOMPOUNDPATTERN  rules. */
4504 		    if (match_checkcompoundpattern(preword,  sp->ts_prewordlen,
4505 							  &slang->sl_comppat))
4506 			compound_ok = FALSE;
4507 
4508 		    if (compound_ok)
4509 		    {
4510 			p = preword;
4511 			while (*skiptowhite(p) != NUL)
4512 			    p = skipwhite(skiptowhite(p));
4513 			if (fword_ends && !can_compound(slang, p,
4514 						compflags + sp->ts_compsplit))
4515 			    /* Compound is not allowed.  But it may still be
4516 			     * possible if we add another (short) word. */
4517 			    compound_ok = FALSE;
4518 		    }
4519 
4520 		    /* Get pointer to last char of previous word. */
4521 		    p = preword + sp->ts_prewordlen;
4522 		    MB_PTR_BACK(preword, p);
4523 		}
4524 	    }
4525 
4526 	    /*
4527 	     * Form the word with proper case in preword.
4528 	     * If there is a word from a previous split, append.
4529 	     * For the soundfold tree don't change the case, simply append.
4530 	     */
4531 	    if (soundfold)
4532 		STRCPY(preword + sp->ts_prewordlen, tword + sp->ts_splitoff);
4533 	    else if (flags & WF_KEEPCAP)
4534 		/* Must find the word in the keep-case tree. */
4535 		find_keepcap_word(slang, tword + sp->ts_splitoff,
4536 						 preword + sp->ts_prewordlen);
4537 	    else
4538 	    {
4539 		/* Include badflags: If the badword is onecap or allcap
4540 		 * use that for the goodword too.  But if the badword is
4541 		 * allcap and it's only one char long use onecap. */
4542 		c = su->su_badflags;
4543 		if ((c & WF_ALLCAP)
4544 			&& su->su_badlen == (*mb_ptr2len)(su->su_badptr))
4545 		    c = WF_ONECAP;
4546 		c |= flags;
4547 
4548 		/* When appending a compound word after a word character don't
4549 		 * use Onecap. */
4550 		if (p != NULL && spell_iswordp_nmw(p, curwin))
4551 		    c &= ~WF_ONECAP;
4552 		make_case_word(tword + sp->ts_splitoff,
4553 					      preword + sp->ts_prewordlen, c);
4554 	    }
4555 
4556 	    if (!soundfold)
4557 	    {
4558 		/* Don't use a banned word.  It may appear again as a good
4559 		 * word, thus remember it. */
4560 		if (flags & WF_BANNED)
4561 		{
4562 		    add_banned(su, preword + sp->ts_prewordlen);
4563 		    break;
4564 		}
4565 		if ((sp->ts_complen == sp->ts_compsplit
4566 			    && WAS_BANNED(su, preword + sp->ts_prewordlen))
4567 						   || WAS_BANNED(su, preword))
4568 		{
4569 		    if (slang->sl_compprog == NULL)
4570 			break;
4571 		    /* the word so far was banned but we may try compounding */
4572 		    goodword_ends = FALSE;
4573 		}
4574 	    }
4575 
4576 	    newscore = 0;
4577 	    if (!soundfold)	/* soundfold words don't have flags */
4578 	    {
4579 		if ((flags & WF_REGION)
4580 			    && (((unsigned)flags >> 16) & lp->lp_region) == 0)
4581 		    newscore += SCORE_REGION;
4582 		if (flags & WF_RARE)
4583 		    newscore += SCORE_RARE;
4584 
4585 		if (!spell_valid_case(su->su_badflags,
4586 				  captype(preword + sp->ts_prewordlen, NULL)))
4587 		    newscore += SCORE_ICASE;
4588 	    }
4589 
4590 	    /* TODO: how about splitting in the soundfold tree? */
4591 	    if (fword_ends
4592 		    && goodword_ends
4593 		    && sp->ts_fidx >= sp->ts_fidxtry
4594 		    && compound_ok)
4595 	    {
4596 		/* The badword also ends: add suggestions. */
4597 #ifdef DEBUG_TRIEWALK
4598 		if (soundfold && STRCMP(preword, "smwrd") == 0)
4599 		{
4600 		    int	    j;
4601 
4602 		    /* print the stack of changes that brought us here */
4603 		    smsg("------ %s -------", fword);
4604 		    for (j = 0; j < depth; ++j)
4605 			smsg("%s", changename[j]);
4606 		}
4607 #endif
4608 		if (soundfold)
4609 		{
4610 		    /* For soundfolded words we need to find the original
4611 		     * words, the edit distance and then add them. */
4612 		    add_sound_suggest(su, preword, sp->ts_score, lp);
4613 		}
4614 		else if (sp->ts_fidx > 0)
4615 		{
4616 		    /* Give a penalty when changing non-word char to word
4617 		     * char, e.g., "thes," -> "these". */
4618 		    p = fword + sp->ts_fidx;
4619 		    MB_PTR_BACK(fword, p);
4620 		    if (!spell_iswordp(p, curwin))
4621 		    {
4622 			p = preword + STRLEN(preword);
4623 			MB_PTR_BACK(preword, p);
4624 			if (spell_iswordp(p, curwin))
4625 			    newscore += SCORE_NONWORD;
4626 		    }
4627 
4628 		    /* Give a bonus to words seen before. */
4629 		    score = score_wordcount_adj(slang,
4630 						sp->ts_score + newscore,
4631 						preword + sp->ts_prewordlen,
4632 						sp->ts_prewordlen > 0);
4633 
4634 		    /* Add the suggestion if the score isn't too bad. */
4635 		    if (score <= su->su_maxscore)
4636 		    {
4637 			add_suggestion(su, &su->su_ga, preword,
4638 				    sp->ts_fidx - repextra,
4639 				    score, 0, FALSE, lp->lp_sallang, FALSE);
4640 
4641 			if (su->su_badflags & WF_MIXCAP)
4642 			{
4643 			    /* We really don't know if the word should be
4644 			     * upper or lower case, add both. */
4645 			    c = captype(preword, NULL);
4646 			    if (c == 0 || c == WF_ALLCAP)
4647 			    {
4648 				make_case_word(tword + sp->ts_splitoff,
4649 					      preword + sp->ts_prewordlen,
4650 						      c == 0 ? WF_ALLCAP : 0);
4651 
4652 				add_suggestion(su, &su->su_ga, preword,
4653 					sp->ts_fidx - repextra,
4654 					score + SCORE_ICASE, 0, FALSE,
4655 					lp->lp_sallang, FALSE);
4656 			    }
4657 			}
4658 		    }
4659 		}
4660 	    }
4661 
4662 	    /*
4663 	     * Try word split and/or compounding.
4664 	     */
4665 	    if ((sp->ts_fidx >= sp->ts_fidxtry || fword_ends)
4666 		    /* Don't split halfway a character. */
4667 		    && (!has_mbyte || sp->ts_tcharlen == 0))
4668 	    {
4669 		int	try_compound;
4670 		int	try_split;
4671 
4672 		/* If past the end of the bad word don't try a split.
4673 		 * Otherwise try changing the next word.  E.g., find
4674 		 * suggestions for "the the" where the second "the" is
4675 		 * different.  It's done like a split.
4676 		 * TODO: word split for soundfold words */
4677 		try_split = (sp->ts_fidx - repextra < su->su_badlen)
4678 								&& !soundfold;
4679 
4680 		/* Get here in several situations:
4681 		 * 1. The word in the tree ends:
4682 		 *    If the word allows compounding try that.  Otherwise try
4683 		 *    a split by inserting a space.  For both check that a
4684 		 *    valid words starts at fword[sp->ts_fidx].
4685 		 *    For NOBREAK do like compounding to be able to check if
4686 		 *    the next word is valid.
4687 		 * 2. The badword does end, but it was due to a change (e.g.,
4688 		 *    a swap).  No need to split, but do check that the
4689 		 *    following word is valid.
4690 		 * 3. The badword and the word in the tree end.  It may still
4691 		 *    be possible to compound another (short) word.
4692 		 */
4693 		try_compound = FALSE;
4694 		if (!soundfold
4695 			&& !slang->sl_nocompoundsugs
4696 			&& slang->sl_compprog != NULL
4697 			&& ((unsigned)flags >> 24) != 0
4698 			&& sp->ts_twordlen - sp->ts_splitoff
4699 						       >= slang->sl_compminlen
4700 			&& (!has_mbyte
4701 			    || slang->sl_compminlen == 0
4702 			    || mb_charlen(tword + sp->ts_splitoff)
4703 						      >= slang->sl_compminlen)
4704 			&& (slang->sl_compsylmax < MAXWLEN
4705 			    || sp->ts_complen + 1 - sp->ts_compsplit
4706 							  < slang->sl_compmax)
4707 			&& (can_be_compound(sp, slang,
4708 					 compflags, ((unsigned)flags >> 24))))
4709 
4710 		{
4711 		    try_compound = TRUE;
4712 		    compflags[sp->ts_complen] = ((unsigned)flags >> 24);
4713 		    compflags[sp->ts_complen + 1] = NUL;
4714 		}
4715 
4716 		/* For NOBREAK we never try splitting, it won't make any word
4717 		 * valid. */
4718 		if (slang->sl_nobreak && !slang->sl_nocompoundsugs)
4719 		    try_compound = TRUE;
4720 
4721 		/* If we could add a compound word, and it's also possible to
4722 		 * split at this point, do the split first and set
4723 		 * TSF_DIDSPLIT to avoid doing it again. */
4724 		else if (!fword_ends
4725 			&& try_compound
4726 			&& (sp->ts_flags & TSF_DIDSPLIT) == 0)
4727 		{
4728 		    try_compound = FALSE;
4729 		    sp->ts_flags |= TSF_DIDSPLIT;
4730 		    --sp->ts_curi;	    /* do the same NUL again */
4731 		    compflags[sp->ts_complen] = NUL;
4732 		}
4733 		else
4734 		    sp->ts_flags &= ~TSF_DIDSPLIT;
4735 
4736 		if (try_split || try_compound)
4737 		{
4738 		    if (!try_compound && (!fword_ends || !goodword_ends))
4739 		    {
4740 			/* If we're going to split need to check that the
4741 			 * words so far are valid for compounding.  If there
4742 			 * is only one word it must not have the NEEDCOMPOUND
4743 			 * flag. */
4744 			if (sp->ts_complen == sp->ts_compsplit
4745 						     && (flags & WF_NEEDCOMP))
4746 			    break;
4747 			p = preword;
4748 			while (*skiptowhite(p) != NUL)
4749 			    p = skipwhite(skiptowhite(p));
4750 			if (sp->ts_complen > sp->ts_compsplit
4751 				&& !can_compound(slang, p,
4752 						compflags + sp->ts_compsplit))
4753 			    break;
4754 
4755 			if (slang->sl_nosplitsugs)
4756 			    newscore += SCORE_SPLIT_NO;
4757 			else
4758 			    newscore += SCORE_SPLIT;
4759 
4760 			/* Give a bonus to words seen before. */
4761 			newscore = score_wordcount_adj(slang, newscore,
4762 					   preword + sp->ts_prewordlen, TRUE);
4763 		    }
4764 
4765 		    if (TRY_DEEPER(su, stack, depth, newscore))
4766 		    {
4767 			go_deeper(stack, depth, newscore);
4768 #ifdef DEBUG_TRIEWALK
4769 			if (!try_compound && !fword_ends)
4770 			    sprintf(changename[depth], "%.*s-%s: split",
4771 				 sp->ts_twordlen, tword, fword + sp->ts_fidx);
4772 			else
4773 			    sprintf(changename[depth], "%.*s-%s: compound",
4774 				 sp->ts_twordlen, tword, fword + sp->ts_fidx);
4775 #endif
4776 			/* Save things to be restored at STATE_SPLITUNDO. */
4777 			sp->ts_save_badflags = su->su_badflags;
4778 			PROF_STORE(sp->ts_state)
4779 			sp->ts_state = STATE_SPLITUNDO;
4780 
4781 			++depth;
4782 			sp = &stack[depth];
4783 
4784 			/* Append a space to preword when splitting. */
4785 			if (!try_compound && !fword_ends)
4786 			    STRCAT(preword, " ");
4787 			sp->ts_prewordlen = (char_u)STRLEN(preword);
4788 			sp->ts_splitoff = sp->ts_twordlen;
4789 			sp->ts_splitfidx = sp->ts_fidx;
4790 
4791 			/* If the badword has a non-word character at this
4792 			 * position skip it.  That means replacing the
4793 			 * non-word character with a space.  Always skip a
4794 			 * character when the word ends.  But only when the
4795 			 * good word can end. */
4796 			if (((!try_compound && !spell_iswordp_nmw(fword
4797 							       + sp->ts_fidx,
4798 							       curwin))
4799 				    || fword_ends)
4800 				&& fword[sp->ts_fidx] != NUL
4801 				&& goodword_ends)
4802 			{
4803 			    int	    l;
4804 
4805 			    l = MB_PTR2LEN(fword + sp->ts_fidx);
4806 			    if (fword_ends)
4807 			    {
4808 				/* Copy the skipped character to preword. */
4809 				mch_memmove(preword + sp->ts_prewordlen,
4810 						      fword + sp->ts_fidx, l);
4811 				sp->ts_prewordlen += l;
4812 				preword[sp->ts_prewordlen] = NUL;
4813 			    }
4814 			    else
4815 				sp->ts_score -= SCORE_SPLIT - SCORE_SUBST;
4816 			    sp->ts_fidx += l;
4817 			}
4818 
4819 			/* When compounding include compound flag in
4820 			 * compflags[] (already set above).  When splitting we
4821 			 * may start compounding over again.  */
4822 			if (try_compound)
4823 			    ++sp->ts_complen;
4824 			else
4825 			    sp->ts_compsplit = sp->ts_complen;
4826 			sp->ts_prefixdepth = PFD_NOPREFIX;
4827 
4828 			/* set su->su_badflags to the caps type at this
4829 			 * position */
4830 			if (has_mbyte)
4831 			    n = nofold_len(fword, sp->ts_fidx, su->su_badptr);
4832 			else
4833 			    n = sp->ts_fidx;
4834 			su->su_badflags = badword_captype(su->su_badptr + n,
4835 					       su->su_badptr + su->su_badlen);
4836 
4837 			/* Restart at top of the tree. */
4838 			sp->ts_arridx = 0;
4839 
4840 			/* If there are postponed prefixes, try these too. */
4841 			if (pbyts != NULL)
4842 			{
4843 			    byts = pbyts;
4844 			    idxs = pidxs;
4845 			    sp->ts_prefixdepth = PFD_PREFIXTREE;
4846 			    PROF_STORE(sp->ts_state)
4847 			    sp->ts_state = STATE_NOPREFIX;
4848 			}
4849 		    }
4850 		}
4851 	    }
4852 	    break;
4853 
4854 	case STATE_SPLITUNDO:
4855 	    /* Undo the changes done for word split or compound word. */
4856 	    su->su_badflags = sp->ts_save_badflags;
4857 
4858 	    /* Continue looking for NUL bytes. */
4859 	    PROF_STORE(sp->ts_state)
4860 	    sp->ts_state = STATE_START;
4861 
4862 	    /* In case we went into the prefix tree. */
4863 	    byts = fbyts;
4864 	    idxs = fidxs;
4865 	    break;
4866 
4867 	case STATE_ENDNUL:
4868 	    /* Past the NUL bytes in the node. */
4869 	    su->su_badflags = sp->ts_save_badflags;
4870 	    if (fword[sp->ts_fidx] == NUL && sp->ts_tcharlen == 0)
4871 	    {
4872 		/* The badword ends, can't use STATE_PLAIN. */
4873 		PROF_STORE(sp->ts_state)
4874 		sp->ts_state = STATE_DEL;
4875 		break;
4876 	    }
4877 	    PROF_STORE(sp->ts_state)
4878 	    sp->ts_state = STATE_PLAIN;
4879 	    /* FALLTHROUGH */
4880 
4881 	case STATE_PLAIN:
4882 	    /*
4883 	     * Go over all possible bytes at this node, add each to tword[]
4884 	     * and use child node.  "ts_curi" is the index.
4885 	     */
4886 	    arridx = sp->ts_arridx;
4887 	    if (sp->ts_curi > byts[arridx])
4888 	    {
4889 		/* Done all bytes at this node, do next state.  When still at
4890 		 * already changed bytes skip the other tricks. */
4891 		PROF_STORE(sp->ts_state)
4892 		if (sp->ts_fidx >= sp->ts_fidxtry)
4893 		    sp->ts_state = STATE_DEL;
4894 		else
4895 		    sp->ts_state = STATE_FINAL;
4896 	    }
4897 	    else
4898 	    {
4899 		arridx += sp->ts_curi++;
4900 		c = byts[arridx];
4901 
4902 		/* Normal byte, go one level deeper.  If it's not equal to the
4903 		 * byte in the bad word adjust the score.  But don't even try
4904 		 * when the byte was already changed.  And don't try when we
4905 		 * just deleted this byte, accepting it is always cheaper than
4906 		 * delete + substitute. */
4907 		if (c == fword[sp->ts_fidx]
4908 			|| (sp->ts_tcharlen > 0 && sp->ts_isdiff != DIFF_NONE))
4909 		    newscore = 0;
4910 		else
4911 		    newscore = SCORE_SUBST;
4912 		if ((newscore == 0
4913 			    || (sp->ts_fidx >= sp->ts_fidxtry
4914 				&& ((sp->ts_flags & TSF_DIDDEL) == 0
4915 				    || c != fword[sp->ts_delidx])))
4916 			&& TRY_DEEPER(su, stack, depth, newscore))
4917 		{
4918 		    go_deeper(stack, depth, newscore);
4919 #ifdef DEBUG_TRIEWALK
4920 		    if (newscore > 0)
4921 			sprintf(changename[depth], "%.*s-%s: subst %c to %c",
4922 				sp->ts_twordlen, tword, fword + sp->ts_fidx,
4923 				fword[sp->ts_fidx], c);
4924 		    else
4925 			sprintf(changename[depth], "%.*s-%s: accept %c",
4926 				sp->ts_twordlen, tword, fword + sp->ts_fidx,
4927 				fword[sp->ts_fidx]);
4928 #endif
4929 		    ++depth;
4930 		    sp = &stack[depth];
4931 		    ++sp->ts_fidx;
4932 		    tword[sp->ts_twordlen++] = c;
4933 		    sp->ts_arridx = idxs[arridx];
4934 		    if (newscore == SCORE_SUBST)
4935 			sp->ts_isdiff = DIFF_YES;
4936 		    if (has_mbyte)
4937 		    {
4938 			/* Multi-byte characters are a bit complicated to
4939 			 * handle: They differ when any of the bytes differ
4940 			 * and then their length may also differ. */
4941 			if (sp->ts_tcharlen == 0)
4942 			{
4943 			    /* First byte. */
4944 			    sp->ts_tcharidx = 0;
4945 			    sp->ts_tcharlen = MB_BYTE2LEN(c);
4946 			    sp->ts_fcharstart = sp->ts_fidx - 1;
4947 			    sp->ts_isdiff = (newscore != 0)
4948 						       ? DIFF_YES : DIFF_NONE;
4949 			}
4950 			else if (sp->ts_isdiff == DIFF_INSERT)
4951 			    /* When inserting trail bytes don't advance in the
4952 			     * bad word. */
4953 			    --sp->ts_fidx;
4954 			if (++sp->ts_tcharidx == sp->ts_tcharlen)
4955 			{
4956 			    /* Last byte of character. */
4957 			    if (sp->ts_isdiff == DIFF_YES)
4958 			    {
4959 				/* Correct ts_fidx for the byte length of the
4960 				 * character (we didn't check that before). */
4961 				sp->ts_fidx = sp->ts_fcharstart
4962 					    + MB_PTR2LEN(
4963 						    fword + sp->ts_fcharstart);
4964 				/* For changing a composing character adjust
4965 				 * the score from SCORE_SUBST to
4966 				 * SCORE_SUBCOMP. */
4967 				if (enc_utf8
4968 					&& utf_iscomposing(
4969 					    utf_ptr2char(tword
4970 						+ sp->ts_twordlen
4971 							   - sp->ts_tcharlen))
4972 					&& utf_iscomposing(
4973 					    utf_ptr2char(fword
4974 							+ sp->ts_fcharstart)))
4975 				    sp->ts_score -=
4976 						  SCORE_SUBST - SCORE_SUBCOMP;
4977 
4978 				/* For a similar character adjust score from
4979 				 * SCORE_SUBST to SCORE_SIMILAR. */
4980 				else if (!soundfold
4981 					&& slang->sl_has_map
4982 					&& similar_chars(slang,
4983 					    mb_ptr2char(tword
4984 						+ sp->ts_twordlen
4985 							   - sp->ts_tcharlen),
4986 					    mb_ptr2char(fword
4987 							+ sp->ts_fcharstart)))
4988 				    sp->ts_score -=
4989 						  SCORE_SUBST - SCORE_SIMILAR;
4990 			    }
4991 			    else if (sp->ts_isdiff == DIFF_INSERT
4992 					 && sp->ts_twordlen > sp->ts_tcharlen)
4993 			    {
4994 				p = tword + sp->ts_twordlen - sp->ts_tcharlen;
4995 				c = mb_ptr2char(p);
4996 				if (enc_utf8 && utf_iscomposing(c))
4997 				{
4998 				    /* Inserting a composing char doesn't
4999 				     * count that much. */
5000 				    sp->ts_score -= SCORE_INS - SCORE_INSCOMP;
5001 				}
5002 				else
5003 				{
5004 				    /* If the previous character was the same,
5005 				     * thus doubling a character, give a bonus
5006 				     * to the score.  Also for the soundfold
5007 				     * tree (might seem illogical but does
5008 				     * give better scores). */
5009 				    MB_PTR_BACK(tword, p);
5010 				    if (c == mb_ptr2char(p))
5011 					sp->ts_score -= SCORE_INS
5012 							       - SCORE_INSDUP;
5013 				}
5014 			    }
5015 
5016 			    /* Starting a new char, reset the length. */
5017 			    sp->ts_tcharlen = 0;
5018 			}
5019 		    }
5020 		    else
5021 		    {
5022 			/* If we found a similar char adjust the score.
5023 			 * We do this after calling go_deeper() because
5024 			 * it's slow. */
5025 			if (newscore != 0
5026 				&& !soundfold
5027 				&& slang->sl_has_map
5028 				&& similar_chars(slang,
5029 						   c, fword[sp->ts_fidx - 1]))
5030 			    sp->ts_score -= SCORE_SUBST - SCORE_SIMILAR;
5031 		    }
5032 		}
5033 	    }
5034 	    break;
5035 
5036 	case STATE_DEL:
5037 	    /* When past the first byte of a multi-byte char don't try
5038 	     * delete/insert/swap a character. */
5039 	    if (has_mbyte && sp->ts_tcharlen > 0)
5040 	    {
5041 		PROF_STORE(sp->ts_state)
5042 		sp->ts_state = STATE_FINAL;
5043 		break;
5044 	    }
5045 	    /*
5046 	     * Try skipping one character in the bad word (delete it).
5047 	     */
5048 	    PROF_STORE(sp->ts_state)
5049 	    sp->ts_state = STATE_INS_PREP;
5050 	    sp->ts_curi = 1;
5051 	    if (soundfold && sp->ts_fidx == 0 && fword[sp->ts_fidx] == '*')
5052 		/* Deleting a vowel at the start of a word counts less, see
5053 		 * soundalike_score(). */
5054 		newscore = 2 * SCORE_DEL / 3;
5055 	    else
5056 		newscore = SCORE_DEL;
5057 	    if (fword[sp->ts_fidx] != NUL
5058 				    && TRY_DEEPER(su, stack, depth, newscore))
5059 	    {
5060 		go_deeper(stack, depth, newscore);
5061 #ifdef DEBUG_TRIEWALK
5062 		sprintf(changename[depth], "%.*s-%s: delete %c",
5063 			sp->ts_twordlen, tword, fword + sp->ts_fidx,
5064 			fword[sp->ts_fidx]);
5065 #endif
5066 		++depth;
5067 
5068 		/* Remember what character we deleted, so that we can avoid
5069 		 * inserting it again. */
5070 		stack[depth].ts_flags |= TSF_DIDDEL;
5071 		stack[depth].ts_delidx = sp->ts_fidx;
5072 
5073 		/* Advance over the character in fword[].  Give a bonus to the
5074 		 * score if the same character is following "nn" -> "n".  It's
5075 		 * a bit illogical for soundfold tree but it does give better
5076 		 * results. */
5077 		if (has_mbyte)
5078 		{
5079 		    c = mb_ptr2char(fword + sp->ts_fidx);
5080 		    stack[depth].ts_fidx += MB_PTR2LEN(fword + sp->ts_fidx);
5081 		    if (enc_utf8 && utf_iscomposing(c))
5082 			stack[depth].ts_score -= SCORE_DEL - SCORE_DELCOMP;
5083 		    else if (c == mb_ptr2char(fword + stack[depth].ts_fidx))
5084 			stack[depth].ts_score -= SCORE_DEL - SCORE_DELDUP;
5085 		}
5086 		else
5087 		{
5088 		    ++stack[depth].ts_fidx;
5089 		    if (fword[sp->ts_fidx] == fword[sp->ts_fidx + 1])
5090 			stack[depth].ts_score -= SCORE_DEL - SCORE_DELDUP;
5091 		}
5092 		break;
5093 	    }
5094 	    /* FALLTHROUGH */
5095 
5096 	case STATE_INS_PREP:
5097 	    if (sp->ts_flags & TSF_DIDDEL)
5098 	    {
5099 		/* If we just deleted a byte then inserting won't make sense,
5100 		 * a substitute is always cheaper. */
5101 		PROF_STORE(sp->ts_state)
5102 		sp->ts_state = STATE_SWAP;
5103 		break;
5104 	    }
5105 
5106 	    /* skip over NUL bytes */
5107 	    n = sp->ts_arridx;
5108 	    for (;;)
5109 	    {
5110 		if (sp->ts_curi > byts[n])
5111 		{
5112 		    /* Only NUL bytes at this node, go to next state. */
5113 		    PROF_STORE(sp->ts_state)
5114 		    sp->ts_state = STATE_SWAP;
5115 		    break;
5116 		}
5117 		if (byts[n + sp->ts_curi] != NUL)
5118 		{
5119 		    /* Found a byte to insert. */
5120 		    PROF_STORE(sp->ts_state)
5121 		    sp->ts_state = STATE_INS;
5122 		    break;
5123 		}
5124 		++sp->ts_curi;
5125 	    }
5126 	    break;
5127 
5128 	    /* FALLTHROUGH */
5129 
5130 	case STATE_INS:
5131 	    /* Insert one byte.  Repeat this for each possible byte at this
5132 	     * node. */
5133 	    n = sp->ts_arridx;
5134 	    if (sp->ts_curi > byts[n])
5135 	    {
5136 		/* Done all bytes at this node, go to next state. */
5137 		PROF_STORE(sp->ts_state)
5138 		sp->ts_state = STATE_SWAP;
5139 		break;
5140 	    }
5141 
5142 	    /* Do one more byte at this node, but:
5143 	     * - Skip NUL bytes.
5144 	     * - Skip the byte if it's equal to the byte in the word,
5145 	     *   accepting that byte is always better.
5146 	     */
5147 	    n += sp->ts_curi++;
5148 	    c = byts[n];
5149 	    if (soundfold && sp->ts_twordlen == 0 && c == '*')
5150 		/* Inserting a vowel at the start of a word counts less,
5151 		 * see soundalike_score(). */
5152 		newscore = 2 * SCORE_INS / 3;
5153 	    else
5154 		newscore = SCORE_INS;
5155 	    if (c != fword[sp->ts_fidx]
5156 				    && TRY_DEEPER(su, stack, depth, newscore))
5157 	    {
5158 		go_deeper(stack, depth, newscore);
5159 #ifdef DEBUG_TRIEWALK
5160 		sprintf(changename[depth], "%.*s-%s: insert %c",
5161 			sp->ts_twordlen, tword, fword + sp->ts_fidx,
5162 			c);
5163 #endif
5164 		++depth;
5165 		sp = &stack[depth];
5166 		tword[sp->ts_twordlen++] = c;
5167 		sp->ts_arridx = idxs[n];
5168 		if (has_mbyte)
5169 		{
5170 		    fl = MB_BYTE2LEN(c);
5171 		    if (fl > 1)
5172 		    {
5173 			/* There are following bytes for the same character.
5174 			 * We must find all bytes before trying
5175 			 * delete/insert/swap/etc. */
5176 			sp->ts_tcharlen = fl;
5177 			sp->ts_tcharidx = 1;
5178 			sp->ts_isdiff = DIFF_INSERT;
5179 		    }
5180 		}
5181 		else
5182 		    fl = 1;
5183 		if (fl == 1)
5184 		{
5185 		    /* If the previous character was the same, thus doubling a
5186 		     * character, give a bonus to the score.  Also for
5187 		     * soundfold words (illogical but does give a better
5188 		     * score). */
5189 		    if (sp->ts_twordlen >= 2
5190 					   && tword[sp->ts_twordlen - 2] == c)
5191 			sp->ts_score -= SCORE_INS - SCORE_INSDUP;
5192 		}
5193 	    }
5194 	    break;
5195 
5196 	case STATE_SWAP:
5197 	    /*
5198 	     * Swap two bytes in the bad word: "12" -> "21".
5199 	     * We change "fword" here, it's changed back afterwards at
5200 	     * STATE_UNSWAP.
5201 	     */
5202 	    p = fword + sp->ts_fidx;
5203 	    c = *p;
5204 	    if (c == NUL)
5205 	    {
5206 		/* End of word, can't swap or replace. */
5207 		PROF_STORE(sp->ts_state)
5208 		sp->ts_state = STATE_FINAL;
5209 		break;
5210 	    }
5211 
5212 	    /* Don't swap if the first character is not a word character.
5213 	     * SWAP3 etc. also don't make sense then. */
5214 	    if (!soundfold && !spell_iswordp(p, curwin))
5215 	    {
5216 		PROF_STORE(sp->ts_state)
5217 		sp->ts_state = STATE_REP_INI;
5218 		break;
5219 	    }
5220 
5221 	    if (has_mbyte)
5222 	    {
5223 		n = MB_CPTR2LEN(p);
5224 		c = mb_ptr2char(p);
5225 		if (p[n] == NUL)
5226 		    c2 = NUL;
5227 		else if (!soundfold && !spell_iswordp(p + n, curwin))
5228 		    c2 = c; /* don't swap non-word char */
5229 		else
5230 		    c2 = mb_ptr2char(p + n);
5231 	    }
5232 	    else
5233 	    {
5234 		if (p[1] == NUL)
5235 		    c2 = NUL;
5236 		else if (!soundfold && !spell_iswordp(p + 1, curwin))
5237 		    c2 = c; /* don't swap non-word char */
5238 		else
5239 		    c2 = p[1];
5240 	    }
5241 
5242 	    /* When the second character is NUL we can't swap. */
5243 	    if (c2 == NUL)
5244 	    {
5245 		PROF_STORE(sp->ts_state)
5246 		sp->ts_state = STATE_REP_INI;
5247 		break;
5248 	    }
5249 
5250 	    /* When characters are identical, swap won't do anything.
5251 	     * Also get here if the second char is not a word character. */
5252 	    if (c == c2)
5253 	    {
5254 		PROF_STORE(sp->ts_state)
5255 		sp->ts_state = STATE_SWAP3;
5256 		break;
5257 	    }
5258 	    if (c2 != NUL && TRY_DEEPER(su, stack, depth, SCORE_SWAP))
5259 	    {
5260 		go_deeper(stack, depth, SCORE_SWAP);
5261 #ifdef DEBUG_TRIEWALK
5262 		sprintf(changename[depth], "%.*s-%s: swap %c and %c",
5263 			sp->ts_twordlen, tword, fword + sp->ts_fidx,
5264 			c, c2);
5265 #endif
5266 		PROF_STORE(sp->ts_state)
5267 		sp->ts_state = STATE_UNSWAP;
5268 		++depth;
5269 		if (has_mbyte)
5270 		{
5271 		    fl = mb_char2len(c2);
5272 		    mch_memmove(p, p + n, fl);
5273 		    mb_char2bytes(c, p + fl);
5274 		    stack[depth].ts_fidxtry = sp->ts_fidx + n + fl;
5275 		}
5276 		else
5277 		{
5278 		    p[0] = c2;
5279 		    p[1] = c;
5280 		    stack[depth].ts_fidxtry = sp->ts_fidx + 2;
5281 		}
5282 	    }
5283 	    else
5284 	    {
5285 		/* If this swap doesn't work then SWAP3 won't either. */
5286 		PROF_STORE(sp->ts_state)
5287 		sp->ts_state = STATE_REP_INI;
5288 	    }
5289 	    break;
5290 
5291 	case STATE_UNSWAP:
5292 	    /* Undo the STATE_SWAP swap: "21" -> "12". */
5293 	    p = fword + sp->ts_fidx;
5294 	    if (has_mbyte)
5295 	    {
5296 		n = MB_PTR2LEN(p);
5297 		c = mb_ptr2char(p + n);
5298 		mch_memmove(p + MB_PTR2LEN(p + n), p, n);
5299 		mb_char2bytes(c, p);
5300 	    }
5301 	    else
5302 	    {
5303 		c = *p;
5304 		*p = p[1];
5305 		p[1] = c;
5306 	    }
5307 	    /* FALLTHROUGH */
5308 
5309 	case STATE_SWAP3:
5310 	    /* Swap two bytes, skipping one: "123" -> "321".  We change
5311 	     * "fword" here, it's changed back afterwards at STATE_UNSWAP3. */
5312 	    p = fword + sp->ts_fidx;
5313 	    if (has_mbyte)
5314 	    {
5315 		n = MB_CPTR2LEN(p);
5316 		c = mb_ptr2char(p);
5317 		fl = MB_CPTR2LEN(p + n);
5318 		c2 = mb_ptr2char(p + n);
5319 		if (!soundfold && !spell_iswordp(p + n + fl, curwin))
5320 		    c3 = c;	/* don't swap non-word char */
5321 		else
5322 		    c3 = mb_ptr2char(p + n + fl);
5323 	    }
5324 	    else
5325 	    {
5326 		c = *p;
5327 		c2 = p[1];
5328 		if (!soundfold && !spell_iswordp(p + 2, curwin))
5329 		    c3 = c;	/* don't swap non-word char */
5330 		else
5331 		    c3 = p[2];
5332 	    }
5333 
5334 	    /* When characters are identical: "121" then SWAP3 result is
5335 	     * identical, ROT3L result is same as SWAP: "211", ROT3L result is
5336 	     * same as SWAP on next char: "112".  Thus skip all swapping.
5337 	     * Also skip when c3 is NUL.
5338 	     * Also get here when the third character is not a word character.
5339 	     * Second character may any char: "a.b" -> "b.a" */
5340 	    if (c == c3 || c3 == NUL)
5341 	    {
5342 		PROF_STORE(sp->ts_state)
5343 		sp->ts_state = STATE_REP_INI;
5344 		break;
5345 	    }
5346 	    if (TRY_DEEPER(su, stack, depth, SCORE_SWAP3))
5347 	    {
5348 		go_deeper(stack, depth, SCORE_SWAP3);
5349 #ifdef DEBUG_TRIEWALK
5350 		sprintf(changename[depth], "%.*s-%s: swap3 %c and %c",
5351 			sp->ts_twordlen, tword, fword + sp->ts_fidx,
5352 			c, c3);
5353 #endif
5354 		PROF_STORE(sp->ts_state)
5355 		sp->ts_state = STATE_UNSWAP3;
5356 		++depth;
5357 		if (has_mbyte)
5358 		{
5359 		    tl = mb_char2len(c3);
5360 		    mch_memmove(p, p + n + fl, tl);
5361 		    mb_char2bytes(c2, p + tl);
5362 		    mb_char2bytes(c, p + fl + tl);
5363 		    stack[depth].ts_fidxtry = sp->ts_fidx + n + fl + tl;
5364 		}
5365 		else
5366 		{
5367 		    p[0] = p[2];
5368 		    p[2] = c;
5369 		    stack[depth].ts_fidxtry = sp->ts_fidx + 3;
5370 		}
5371 	    }
5372 	    else
5373 	    {
5374 		PROF_STORE(sp->ts_state)
5375 		sp->ts_state = STATE_REP_INI;
5376 	    }
5377 	    break;
5378 
5379 	case STATE_UNSWAP3:
5380 	    /* Undo STATE_SWAP3: "321" -> "123" */
5381 	    p = fword + sp->ts_fidx;
5382 	    if (has_mbyte)
5383 	    {
5384 		n = MB_PTR2LEN(p);
5385 		c2 = mb_ptr2char(p + n);
5386 		fl = MB_PTR2LEN(p + n);
5387 		c = mb_ptr2char(p + n + fl);
5388 		tl = MB_PTR2LEN(p + n + fl);
5389 		mch_memmove(p + fl + tl, p, n);
5390 		mb_char2bytes(c, p);
5391 		mb_char2bytes(c2, p + tl);
5392 		p = p + tl;
5393 	    }
5394 	    else
5395 	    {
5396 		c = *p;
5397 		*p = p[2];
5398 		p[2] = c;
5399 		++p;
5400 	    }
5401 
5402 	    if (!soundfold && !spell_iswordp(p, curwin))
5403 	    {
5404 		/* Middle char is not a word char, skip the rotate.  First and
5405 		 * third char were already checked at swap and swap3. */
5406 		PROF_STORE(sp->ts_state)
5407 		sp->ts_state = STATE_REP_INI;
5408 		break;
5409 	    }
5410 
5411 	    /* Rotate three characters left: "123" -> "231".  We change
5412 	     * "fword" here, it's changed back afterwards at STATE_UNROT3L. */
5413 	    if (TRY_DEEPER(su, stack, depth, SCORE_SWAP3))
5414 	    {
5415 		go_deeper(stack, depth, SCORE_SWAP3);
5416 #ifdef DEBUG_TRIEWALK
5417 		p = fword + sp->ts_fidx;
5418 		sprintf(changename[depth], "%.*s-%s: rotate left %c%c%c",
5419 			sp->ts_twordlen, tword, fword + sp->ts_fidx,
5420 			p[0], p[1], p[2]);
5421 #endif
5422 		PROF_STORE(sp->ts_state)
5423 		sp->ts_state = STATE_UNROT3L;
5424 		++depth;
5425 		p = fword + sp->ts_fidx;
5426 		if (has_mbyte)
5427 		{
5428 		    n = MB_CPTR2LEN(p);
5429 		    c = mb_ptr2char(p);
5430 		    fl = MB_CPTR2LEN(p + n);
5431 		    fl += MB_CPTR2LEN(p + n + fl);
5432 		    mch_memmove(p, p + n, fl);
5433 		    mb_char2bytes(c, p + fl);
5434 		    stack[depth].ts_fidxtry = sp->ts_fidx + n + fl;
5435 		}
5436 		else
5437 		{
5438 		    c = *p;
5439 		    *p = p[1];
5440 		    p[1] = p[2];
5441 		    p[2] = c;
5442 		    stack[depth].ts_fidxtry = sp->ts_fidx + 3;
5443 		}
5444 	    }
5445 	    else
5446 	    {
5447 		PROF_STORE(sp->ts_state)
5448 		sp->ts_state = STATE_REP_INI;
5449 	    }
5450 	    break;
5451 
5452 	case STATE_UNROT3L:
5453 	    /* Undo ROT3L: "231" -> "123" */
5454 	    p = fword + sp->ts_fidx;
5455 	    if (has_mbyte)
5456 	    {
5457 		n = MB_PTR2LEN(p);
5458 		n += MB_PTR2LEN(p + n);
5459 		c = mb_ptr2char(p + n);
5460 		tl = MB_PTR2LEN(p + n);
5461 		mch_memmove(p + tl, p, n);
5462 		mb_char2bytes(c, p);
5463 	    }
5464 	    else
5465 	    {
5466 		c = p[2];
5467 		p[2] = p[1];
5468 		p[1] = *p;
5469 		*p = c;
5470 	    }
5471 
5472 	    /* Rotate three bytes right: "123" -> "312".  We change "fword"
5473 	     * here, it's changed back afterwards at STATE_UNROT3R. */
5474 	    if (TRY_DEEPER(su, stack, depth, SCORE_SWAP3))
5475 	    {
5476 		go_deeper(stack, depth, SCORE_SWAP3);
5477 #ifdef DEBUG_TRIEWALK
5478 		p = fword + sp->ts_fidx;
5479 		sprintf(changename[depth], "%.*s-%s: rotate right %c%c%c",
5480 			sp->ts_twordlen, tword, fword + sp->ts_fidx,
5481 			p[0], p[1], p[2]);
5482 #endif
5483 		PROF_STORE(sp->ts_state)
5484 		sp->ts_state = STATE_UNROT3R;
5485 		++depth;
5486 		p = fword + sp->ts_fidx;
5487 		if (has_mbyte)
5488 		{
5489 		    n = MB_CPTR2LEN(p);
5490 		    n += MB_CPTR2LEN(p + n);
5491 		    c = mb_ptr2char(p + n);
5492 		    tl = MB_CPTR2LEN(p + n);
5493 		    mch_memmove(p + tl, p, n);
5494 		    mb_char2bytes(c, p);
5495 		    stack[depth].ts_fidxtry = sp->ts_fidx + n + tl;
5496 		}
5497 		else
5498 		{
5499 		    c = p[2];
5500 		    p[2] = p[1];
5501 		    p[1] = *p;
5502 		    *p = c;
5503 		    stack[depth].ts_fidxtry = sp->ts_fidx + 3;
5504 		}
5505 	    }
5506 	    else
5507 	    {
5508 		PROF_STORE(sp->ts_state)
5509 		sp->ts_state = STATE_REP_INI;
5510 	    }
5511 	    break;
5512 
5513 	case STATE_UNROT3R:
5514 	    /* Undo ROT3R: "312" -> "123" */
5515 	    p = fword + sp->ts_fidx;
5516 	    if (has_mbyte)
5517 	    {
5518 		c = mb_ptr2char(p);
5519 		tl = MB_PTR2LEN(p);
5520 		n = MB_PTR2LEN(p + tl);
5521 		n += MB_PTR2LEN(p + tl + n);
5522 		mch_memmove(p, p + tl, n);
5523 		mb_char2bytes(c, p + n);
5524 	    }
5525 	    else
5526 	    {
5527 		c = *p;
5528 		*p = p[1];
5529 		p[1] = p[2];
5530 		p[2] = c;
5531 	    }
5532 	    /* FALLTHROUGH */
5533 
5534 	case STATE_REP_INI:
5535 	    /* Check if matching with REP items from the .aff file would work.
5536 	     * Quickly skip if:
5537 	     * - there are no REP items and we are not in the soundfold trie
5538 	     * - the score is going to be too high anyway
5539 	     * - already applied a REP item or swapped here  */
5540 	    if ((lp->lp_replang == NULL && !soundfold)
5541 		    || sp->ts_score + SCORE_REP >= su->su_maxscore
5542 		    || sp->ts_fidx < sp->ts_fidxtry)
5543 	    {
5544 		PROF_STORE(sp->ts_state)
5545 		sp->ts_state = STATE_FINAL;
5546 		break;
5547 	    }
5548 
5549 	    /* Use the first byte to quickly find the first entry that may
5550 	     * match.  If the index is -1 there is none. */
5551 	    if (soundfold)
5552 		sp->ts_curi = slang->sl_repsal_first[fword[sp->ts_fidx]];
5553 	    else
5554 		sp->ts_curi = lp->lp_replang->sl_rep_first[fword[sp->ts_fidx]];
5555 
5556 	    if (sp->ts_curi < 0)
5557 	    {
5558 		PROF_STORE(sp->ts_state)
5559 		sp->ts_state = STATE_FINAL;
5560 		break;
5561 	    }
5562 
5563 	    PROF_STORE(sp->ts_state)
5564 	    sp->ts_state = STATE_REP;
5565 	    /* FALLTHROUGH */
5566 
5567 	case STATE_REP:
5568 	    /* Try matching with REP items from the .aff file.  For each match
5569 	     * replace the characters and check if the resulting word is
5570 	     * valid. */
5571 	    p = fword + sp->ts_fidx;
5572 
5573 	    if (soundfold)
5574 		gap = &slang->sl_repsal;
5575 	    else
5576 		gap = &lp->lp_replang->sl_rep;
5577 	    while (sp->ts_curi < gap->ga_len)
5578 	    {
5579 		ftp = (fromto_T *)gap->ga_data + sp->ts_curi++;
5580 		if (*ftp->ft_from != *p)
5581 		{
5582 		    /* past possible matching entries */
5583 		    sp->ts_curi = gap->ga_len;
5584 		    break;
5585 		}
5586 		if (STRNCMP(ftp->ft_from, p, STRLEN(ftp->ft_from)) == 0
5587 			&& TRY_DEEPER(su, stack, depth, SCORE_REP))
5588 		{
5589 		    go_deeper(stack, depth, SCORE_REP);
5590 #ifdef DEBUG_TRIEWALK
5591 		    sprintf(changename[depth], "%.*s-%s: replace %s with %s",
5592 			    sp->ts_twordlen, tword, fword + sp->ts_fidx,
5593 			    ftp->ft_from, ftp->ft_to);
5594 #endif
5595 		    /* Need to undo this afterwards. */
5596 		    PROF_STORE(sp->ts_state)
5597 		    sp->ts_state = STATE_REP_UNDO;
5598 
5599 		    /* Change the "from" to the "to" string. */
5600 		    ++depth;
5601 		    fl = (int)STRLEN(ftp->ft_from);
5602 		    tl = (int)STRLEN(ftp->ft_to);
5603 		    if (fl != tl)
5604 		    {
5605 			STRMOVE(p + tl, p + fl);
5606 			repextra += tl - fl;
5607 		    }
5608 		    mch_memmove(p, ftp->ft_to, tl);
5609 		    stack[depth].ts_fidxtry = sp->ts_fidx + tl;
5610 		    stack[depth].ts_tcharlen = 0;
5611 		    break;
5612 		}
5613 	    }
5614 
5615 	    if (sp->ts_curi >= gap->ga_len && sp->ts_state == STATE_REP)
5616 	    {
5617 		/* No (more) matches. */
5618 		PROF_STORE(sp->ts_state)
5619 		sp->ts_state = STATE_FINAL;
5620 	    }
5621 
5622 	    break;
5623 
5624 	case STATE_REP_UNDO:
5625 	    /* Undo a REP replacement and continue with the next one. */
5626 	    if (soundfold)
5627 		gap = &slang->sl_repsal;
5628 	    else
5629 		gap = &lp->lp_replang->sl_rep;
5630 	    ftp = (fromto_T *)gap->ga_data + sp->ts_curi - 1;
5631 	    fl = (int)STRLEN(ftp->ft_from);
5632 	    tl = (int)STRLEN(ftp->ft_to);
5633 	    p = fword + sp->ts_fidx;
5634 	    if (fl != tl)
5635 	    {
5636 		STRMOVE(p + fl, p + tl);
5637 		repextra -= tl - fl;
5638 	    }
5639 	    mch_memmove(p, ftp->ft_from, fl);
5640 	    PROF_STORE(sp->ts_state)
5641 	    sp->ts_state = STATE_REP;
5642 	    break;
5643 
5644 	default:
5645 	    /* Did all possible states at this level, go up one level. */
5646 	    --depth;
5647 
5648 	    if (depth >= 0 && stack[depth].ts_prefixdepth == PFD_PREFIXTREE)
5649 	    {
5650 		/* Continue in or go back to the prefix tree. */
5651 		byts = pbyts;
5652 		idxs = pidxs;
5653 	    }
5654 
5655 	    /* Don't check for CTRL-C too often, it takes time. */
5656 	    if (--breakcheckcount == 0)
5657 	    {
5658 		ui_breakcheck();
5659 		breakcheckcount = 1000;
5660 	    }
5661 	}
5662     }
5663 }
5664 
5665 
5666 /*
5667  * Go one level deeper in the tree.
5668  */
5669     static void
5670 go_deeper(trystate_T *stack, int depth, int score_add)
5671 {
5672     stack[depth + 1] = stack[depth];
5673     stack[depth + 1].ts_state = STATE_START;
5674     stack[depth + 1].ts_score = stack[depth].ts_score + score_add;
5675     stack[depth + 1].ts_curi = 1;	/* start just after length byte */
5676     stack[depth + 1].ts_flags = 0;
5677 }
5678 
5679 /*
5680  * Case-folding may change the number of bytes: Count nr of chars in
5681  * fword[flen] and return the byte length of that many chars in "word".
5682  */
5683     static int
5684 nofold_len(char_u *fword, int flen, char_u *word)
5685 {
5686     char_u	*p;
5687     int		i = 0;
5688 
5689     for (p = fword; p < fword + flen; MB_PTR_ADV(p))
5690 	++i;
5691     for (p = word; i > 0; MB_PTR_ADV(p))
5692 	--i;
5693     return (int)(p - word);
5694 }
5695 
5696 /*
5697  * "fword" is a good word with case folded.  Find the matching keep-case
5698  * words and put it in "kword".
5699  * Theoretically there could be several keep-case words that result in the
5700  * same case-folded word, but we only find one...
5701  */
5702     static void
5703 find_keepcap_word(slang_T *slang, char_u *fword, char_u *kword)
5704 {
5705     char_u	uword[MAXWLEN];		/* "fword" in upper-case */
5706     int		depth;
5707     idx_T	tryidx;
5708 
5709     /* The following arrays are used at each depth in the tree. */
5710     idx_T	arridx[MAXWLEN];
5711     int		round[MAXWLEN];
5712     int		fwordidx[MAXWLEN];
5713     int		uwordidx[MAXWLEN];
5714     int		kwordlen[MAXWLEN];
5715 
5716     int		flen, ulen;
5717     int		l;
5718     int		len;
5719     int		c;
5720     idx_T	lo, hi, m;
5721     char_u	*p;
5722     char_u	*byts = slang->sl_kbyts;    /* array with bytes of the words */
5723     idx_T	*idxs = slang->sl_kidxs;    /* array with indexes */
5724 
5725     if (byts == NULL)
5726     {
5727 	/* array is empty: "cannot happen" */
5728 	*kword = NUL;
5729 	return;
5730     }
5731 
5732     /* Make an all-cap version of "fword". */
5733     allcap_copy(fword, uword);
5734 
5735     /*
5736      * Each character needs to be tried both case-folded and upper-case.
5737      * All this gets very complicated if we keep in mind that changing case
5738      * may change the byte length of a multi-byte character...
5739      */
5740     depth = 0;
5741     arridx[0] = 0;
5742     round[0] = 0;
5743     fwordidx[0] = 0;
5744     uwordidx[0] = 0;
5745     kwordlen[0] = 0;
5746     while (depth >= 0)
5747     {
5748 	if (fword[fwordidx[depth]] == NUL)
5749 	{
5750 	    /* We are at the end of "fword".  If the tree allows a word to end
5751 	     * here we have found a match. */
5752 	    if (byts[arridx[depth] + 1] == 0)
5753 	    {
5754 		kword[kwordlen[depth]] = NUL;
5755 		return;
5756 	    }
5757 
5758 	    /* kword is getting too long, continue one level up */
5759 	    --depth;
5760 	}
5761 	else if (++round[depth] > 2)
5762 	{
5763 	    /* tried both fold-case and upper-case character, continue one
5764 	     * level up */
5765 	    --depth;
5766 	}
5767 	else
5768 	{
5769 	    /*
5770 	     * round[depth] == 1: Try using the folded-case character.
5771 	     * round[depth] == 2: Try using the upper-case character.
5772 	     */
5773 	    if (has_mbyte)
5774 	    {
5775 		flen = MB_CPTR2LEN(fword + fwordidx[depth]);
5776 		ulen = MB_CPTR2LEN(uword + uwordidx[depth]);
5777 	    }
5778 	    else
5779 		ulen = flen = 1;
5780 	    if (round[depth] == 1)
5781 	    {
5782 		p = fword + fwordidx[depth];
5783 		l = flen;
5784 	    }
5785 	    else
5786 	    {
5787 		p = uword + uwordidx[depth];
5788 		l = ulen;
5789 	    }
5790 
5791 	    for (tryidx = arridx[depth]; l > 0; --l)
5792 	    {
5793 		/* Perform a binary search in the list of accepted bytes. */
5794 		len = byts[tryidx++];
5795 		c = *p++;
5796 		lo = tryidx;
5797 		hi = tryidx + len - 1;
5798 		while (lo < hi)
5799 		{
5800 		    m = (lo + hi) / 2;
5801 		    if (byts[m] > c)
5802 			hi = m - 1;
5803 		    else if (byts[m] < c)
5804 			lo = m + 1;
5805 		    else
5806 		    {
5807 			lo = hi = m;
5808 			break;
5809 		    }
5810 		}
5811 
5812 		/* Stop if there is no matching byte. */
5813 		if (hi < lo || byts[lo] != c)
5814 		    break;
5815 
5816 		/* Continue at the child (if there is one). */
5817 		tryidx = idxs[lo];
5818 	    }
5819 
5820 	    if (l == 0)
5821 	    {
5822 		/*
5823 		 * Found the matching char.  Copy it to "kword" and go a
5824 		 * level deeper.
5825 		 */
5826 		if (round[depth] == 1)
5827 		{
5828 		    STRNCPY(kword + kwordlen[depth], fword + fwordidx[depth],
5829 									flen);
5830 		    kwordlen[depth + 1] = kwordlen[depth] + flen;
5831 		}
5832 		else
5833 		{
5834 		    STRNCPY(kword + kwordlen[depth], uword + uwordidx[depth],
5835 									ulen);
5836 		    kwordlen[depth + 1] = kwordlen[depth] + ulen;
5837 		}
5838 		fwordidx[depth + 1] = fwordidx[depth] + flen;
5839 		uwordidx[depth + 1] = uwordidx[depth] + ulen;
5840 
5841 		++depth;
5842 		arridx[depth] = tryidx;
5843 		round[depth] = 0;
5844 	    }
5845 	}
5846     }
5847 
5848     /* Didn't find it: "cannot happen". */
5849     *kword = NUL;
5850 }
5851 
5852 /*
5853  * Compute the sound-a-like score for suggestions in su->su_ga and add them to
5854  * su->su_sga.
5855  */
5856     static void
5857 score_comp_sal(suginfo_T *su)
5858 {
5859     langp_T	*lp;
5860     char_u	badsound[MAXWLEN];
5861     int		i;
5862     suggest_T   *stp;
5863     suggest_T   *sstp;
5864     int		score;
5865     int		lpi;
5866 
5867     if (ga_grow(&su->su_sga, su->su_ga.ga_len) == FAIL)
5868 	return;
5869 
5870     /*	Use the sound-folding of the first language that supports it. */
5871     for (lpi = 0; lpi < curwin->w_s->b_langp.ga_len; ++lpi)
5872     {
5873 	lp = LANGP_ENTRY(curwin->w_s->b_langp, lpi);
5874 	if (lp->lp_slang->sl_sal.ga_len > 0)
5875 	{
5876 	    /* soundfold the bad word */
5877 	    spell_soundfold(lp->lp_slang, su->su_fbadword, TRUE, badsound);
5878 
5879 	    for (i = 0; i < su->su_ga.ga_len; ++i)
5880 	    {
5881 		stp = &SUG(su->su_ga, i);
5882 
5883 		/* Case-fold the suggested word, sound-fold it and compute the
5884 		 * sound-a-like score. */
5885 		score = stp_sal_score(stp, su, lp->lp_slang, badsound);
5886 		if (score < SCORE_MAXMAX)
5887 		{
5888 		    /* Add the suggestion. */
5889 		    sstp = &SUG(su->su_sga, su->su_sga.ga_len);
5890 		    sstp->st_word = vim_strsave(stp->st_word);
5891 		    if (sstp->st_word != NULL)
5892 		    {
5893 			sstp->st_wordlen = stp->st_wordlen;
5894 			sstp->st_score = score;
5895 			sstp->st_altscore = 0;
5896 			sstp->st_orglen = stp->st_orglen;
5897 			++su->su_sga.ga_len;
5898 		    }
5899 		}
5900 	    }
5901 	    break;
5902 	}
5903     }
5904 }
5905 
5906 /*
5907  * Combine the list of suggestions in su->su_ga and su->su_sga.
5908  * They are entwined.
5909  */
5910     static void
5911 score_combine(suginfo_T *su)
5912 {
5913     int		i;
5914     int		j;
5915     garray_T	ga;
5916     garray_T	*gap;
5917     langp_T	*lp;
5918     suggest_T	*stp;
5919     char_u	*p;
5920     char_u	badsound[MAXWLEN];
5921     int		round;
5922     int		lpi;
5923     slang_T	*slang = NULL;
5924 
5925     /* Add the alternate score to su_ga. */
5926     for (lpi = 0; lpi < curwin->w_s->b_langp.ga_len; ++lpi)
5927     {
5928 	lp = LANGP_ENTRY(curwin->w_s->b_langp, lpi);
5929 	if (lp->lp_slang->sl_sal.ga_len > 0)
5930 	{
5931 	    /* soundfold the bad word */
5932 	    slang = lp->lp_slang;
5933 	    spell_soundfold(slang, su->su_fbadword, TRUE, badsound);
5934 
5935 	    for (i = 0; i < su->su_ga.ga_len; ++i)
5936 	    {
5937 		stp = &SUG(su->su_ga, i);
5938 		stp->st_altscore = stp_sal_score(stp, su, slang, badsound);
5939 		if (stp->st_altscore == SCORE_MAXMAX)
5940 		    stp->st_score = (stp->st_score * 3 + SCORE_BIG) / 4;
5941 		else
5942 		    stp->st_score = (stp->st_score * 3
5943 						  + stp->st_altscore) / 4;
5944 		stp->st_salscore = FALSE;
5945 	    }
5946 	    break;
5947 	}
5948     }
5949 
5950     if (slang == NULL)	/* Using "double" without sound folding. */
5951     {
5952 	(void)cleanup_suggestions(&su->su_ga, su->su_maxscore,
5953 							     su->su_maxcount);
5954 	return;
5955     }
5956 
5957     /* Add the alternate score to su_sga. */
5958     for (i = 0; i < su->su_sga.ga_len; ++i)
5959     {
5960 	stp = &SUG(su->su_sga, i);
5961 	stp->st_altscore = spell_edit_score(slang,
5962 						su->su_badword, stp->st_word);
5963 	if (stp->st_score == SCORE_MAXMAX)
5964 	    stp->st_score = (SCORE_BIG * 7 + stp->st_altscore) / 8;
5965 	else
5966 	    stp->st_score = (stp->st_score * 7 + stp->st_altscore) / 8;
5967 	stp->st_salscore = TRUE;
5968     }
5969 
5970     /* Remove bad suggestions, sort the suggestions and truncate at "maxcount"
5971      * for both lists. */
5972     check_suggestions(su, &su->su_ga);
5973     (void)cleanup_suggestions(&su->su_ga, su->su_maxscore, su->su_maxcount);
5974     check_suggestions(su, &su->su_sga);
5975     (void)cleanup_suggestions(&su->su_sga, su->su_maxscore, su->su_maxcount);
5976 
5977     ga_init2(&ga, (int)sizeof(suginfo_T), 1);
5978     if (ga_grow(&ga, su->su_ga.ga_len + su->su_sga.ga_len) == FAIL)
5979 	return;
5980 
5981     stp = &SUG(ga, 0);
5982     for (i = 0; i < su->su_ga.ga_len || i < su->su_sga.ga_len; ++i)
5983     {
5984 	/* round 1: get a suggestion from su_ga
5985 	 * round 2: get a suggestion from su_sga */
5986 	for (round = 1; round <= 2; ++round)
5987 	{
5988 	    gap = round == 1 ? &su->su_ga : &su->su_sga;
5989 	    if (i < gap->ga_len)
5990 	    {
5991 		/* Don't add a word if it's already there. */
5992 		p = SUG(*gap, i).st_word;
5993 		for (j = 0; j < ga.ga_len; ++j)
5994 		    if (STRCMP(stp[j].st_word, p) == 0)
5995 			break;
5996 		if (j == ga.ga_len)
5997 		    stp[ga.ga_len++] = SUG(*gap, i);
5998 		else
5999 		    vim_free(p);
6000 	    }
6001 	}
6002     }
6003 
6004     ga_clear(&su->su_ga);
6005     ga_clear(&su->su_sga);
6006 
6007     /* Truncate the list to the number of suggestions that will be displayed. */
6008     if (ga.ga_len > su->su_maxcount)
6009     {
6010 	for (i = su->su_maxcount; i < ga.ga_len; ++i)
6011 	    vim_free(stp[i].st_word);
6012 	ga.ga_len = su->su_maxcount;
6013     }
6014 
6015     su->su_ga = ga;
6016 }
6017 
6018 /*
6019  * For the goodword in "stp" compute the soundalike score compared to the
6020  * badword.
6021  */
6022     static int
6023 stp_sal_score(
6024     suggest_T	*stp,
6025     suginfo_T	*su,
6026     slang_T	*slang,
6027     char_u	*badsound)	/* sound-folded badword */
6028 {
6029     char_u	*p;
6030     char_u	*pbad;
6031     char_u	*pgood;
6032     char_u	badsound2[MAXWLEN];
6033     char_u	fword[MAXWLEN];
6034     char_u	goodsound[MAXWLEN];
6035     char_u	goodword[MAXWLEN];
6036     int		lendiff;
6037 
6038     lendiff = (int)(su->su_badlen - stp->st_orglen);
6039     if (lendiff >= 0)
6040 	pbad = badsound;
6041     else
6042     {
6043 	/* soundfold the bad word with more characters following */
6044 	(void)spell_casefold(su->su_badptr, stp->st_orglen, fword, MAXWLEN);
6045 
6046 	/* When joining two words the sound often changes a lot.  E.g., "t he"
6047 	 * sounds like "t h" while "the" sounds like "@".  Avoid that by
6048 	 * removing the space.  Don't do it when the good word also contains a
6049 	 * space. */
6050 	if (VIM_ISWHITE(su->su_badptr[su->su_badlen])
6051 					 && *skiptowhite(stp->st_word) == NUL)
6052 	    for (p = fword; *(p = skiptowhite(p)) != NUL; )
6053 		STRMOVE(p, p + 1);
6054 
6055 	spell_soundfold(slang, fword, TRUE, badsound2);
6056 	pbad = badsound2;
6057     }
6058 
6059     if (lendiff > 0 && stp->st_wordlen + lendiff < MAXWLEN)
6060     {
6061 	/* Add part of the bad word to the good word, so that we soundfold
6062 	 * what replaces the bad word. */
6063 	STRCPY(goodword, stp->st_word);
6064 	vim_strncpy(goodword + stp->st_wordlen,
6065 			    su->su_badptr + su->su_badlen - lendiff, lendiff);
6066 	pgood = goodword;
6067     }
6068     else
6069 	pgood = stp->st_word;
6070 
6071     /* Sound-fold the word and compute the score for the difference. */
6072     spell_soundfold(slang, pgood, FALSE, goodsound);
6073 
6074     return soundalike_score(goodsound, pbad);
6075 }
6076 
6077 /* structure used to store soundfolded words that add_sound_suggest() has
6078  * handled already. */
6079 typedef struct
6080 {
6081     short	sft_score;	/* lowest score used */
6082     char_u	sft_word[1];    /* soundfolded word, actually longer */
6083 } sftword_T;
6084 
6085 static sftword_T dumsft;
6086 #define HIKEY2SFT(p)  ((sftword_T *)(p - (dumsft.sft_word - (char_u *)&dumsft)))
6087 #define HI2SFT(hi)     HIKEY2SFT((hi)->hi_key)
6088 
6089 /*
6090  * Prepare for calling suggest_try_soundalike().
6091  */
6092     static void
6093 suggest_try_soundalike_prep(void)
6094 {
6095     langp_T	*lp;
6096     int		lpi;
6097     slang_T	*slang;
6098 
6099     /* Do this for all languages that support sound folding and for which a
6100      * .sug file has been loaded. */
6101     for (lpi = 0; lpi < curwin->w_s->b_langp.ga_len; ++lpi)
6102     {
6103 	lp = LANGP_ENTRY(curwin->w_s->b_langp, lpi);
6104 	slang = lp->lp_slang;
6105 	if (slang->sl_sal.ga_len > 0 && slang->sl_sbyts != NULL)
6106 	    /* prepare the hashtable used by add_sound_suggest() */
6107 	    hash_init(&slang->sl_sounddone);
6108     }
6109 }
6110 
6111 /*
6112  * Find suggestions by comparing the word in a sound-a-like form.
6113  * Note: This doesn't support postponed prefixes.
6114  */
6115     static void
6116 suggest_try_soundalike(suginfo_T *su)
6117 {
6118     char_u	salword[MAXWLEN];
6119     langp_T	*lp;
6120     int		lpi;
6121     slang_T	*slang;
6122 
6123     /* Do this for all languages that support sound folding and for which a
6124      * .sug file has been loaded. */
6125     for (lpi = 0; lpi < curwin->w_s->b_langp.ga_len; ++lpi)
6126     {
6127 	lp = LANGP_ENTRY(curwin->w_s->b_langp, lpi);
6128 	slang = lp->lp_slang;
6129 	if (slang->sl_sal.ga_len > 0 && slang->sl_sbyts != NULL)
6130 	{
6131 	    /* soundfold the bad word */
6132 	    spell_soundfold(slang, su->su_fbadword, TRUE, salword);
6133 
6134 	    /* try all kinds of inserts/deletes/swaps/etc. */
6135 	    /* TODO: also soundfold the next words, so that we can try joining
6136 	     * and splitting */
6137 #ifdef SUGGEST_PROFILE
6138 	prof_init();
6139 #endif
6140 	    suggest_trie_walk(su, lp, salword, TRUE);
6141 #ifdef SUGGEST_PROFILE
6142 	prof_report("soundalike");
6143 #endif
6144 	}
6145     }
6146 }
6147 
6148 /*
6149  * Finish up after calling suggest_try_soundalike().
6150  */
6151     static void
6152 suggest_try_soundalike_finish(void)
6153 {
6154     langp_T	*lp;
6155     int		lpi;
6156     slang_T	*slang;
6157     int		todo;
6158     hashitem_T	*hi;
6159 
6160     /* Do this for all languages that support sound folding and for which a
6161      * .sug file has been loaded. */
6162     for (lpi = 0; lpi < curwin->w_s->b_langp.ga_len; ++lpi)
6163     {
6164 	lp = LANGP_ENTRY(curwin->w_s->b_langp, lpi);
6165 	slang = lp->lp_slang;
6166 	if (slang->sl_sal.ga_len > 0 && slang->sl_sbyts != NULL)
6167 	{
6168 	    /* Free the info about handled words. */
6169 	    todo = (int)slang->sl_sounddone.ht_used;
6170 	    for (hi = slang->sl_sounddone.ht_array; todo > 0; ++hi)
6171 		if (!HASHITEM_EMPTY(hi))
6172 		{
6173 		    vim_free(HI2SFT(hi));
6174 		    --todo;
6175 		}
6176 
6177 	    /* Clear the hashtable, it may also be used by another region. */
6178 	    hash_clear(&slang->sl_sounddone);
6179 	    hash_init(&slang->sl_sounddone);
6180 	}
6181     }
6182 }
6183 
6184 /*
6185  * A match with a soundfolded word is found.  Add the good word(s) that
6186  * produce this soundfolded word.
6187  */
6188     static void
6189 add_sound_suggest(
6190     suginfo_T	*su,
6191     char_u	*goodword,
6192     int		score,		/* soundfold score  */
6193     langp_T	*lp)
6194 {
6195     slang_T	*slang = lp->lp_slang;	/* language for sound folding */
6196     int		sfwordnr;
6197     char_u	*nrline;
6198     int		orgnr;
6199     char_u	theword[MAXWLEN];
6200     int		i;
6201     int		wlen;
6202     char_u	*byts;
6203     idx_T	*idxs;
6204     int		n;
6205     int		wordcount;
6206     int		wc;
6207     int		goodscore;
6208     hash_T	hash;
6209     hashitem_T  *hi;
6210     sftword_T	*sft;
6211     int		bc, gc;
6212     int		limit;
6213 
6214     /*
6215      * It's very well possible that the same soundfold word is found several
6216      * times with different scores.  Since the following is quite slow only do
6217      * the words that have a better score than before.  Use a hashtable to
6218      * remember the words that have been done.
6219      */
6220     hash = hash_hash(goodword);
6221     hi = hash_lookup(&slang->sl_sounddone, goodword, hash);
6222     if (HASHITEM_EMPTY(hi))
6223     {
6224 	sft = (sftword_T *)alloc((unsigned)(sizeof(sftword_T)
6225 							 + STRLEN(goodword)));
6226 	if (sft != NULL)
6227 	{
6228 	    sft->sft_score = score;
6229 	    STRCPY(sft->sft_word, goodword);
6230 	    hash_add_item(&slang->sl_sounddone, hi, sft->sft_word, hash);
6231 	}
6232     }
6233     else
6234     {
6235 	sft = HI2SFT(hi);
6236 	if (score >= sft->sft_score)
6237 	    return;
6238 	sft->sft_score = score;
6239     }
6240 
6241     /*
6242      * Find the word nr in the soundfold tree.
6243      */
6244     sfwordnr = soundfold_find(slang, goodword);
6245     if (sfwordnr < 0)
6246     {
6247 	internal_error("add_sound_suggest()");
6248 	return;
6249     }
6250 
6251     /*
6252      * go over the list of good words that produce this soundfold word
6253      */
6254     nrline = ml_get_buf(slang->sl_sugbuf, (linenr_T)(sfwordnr + 1), FALSE);
6255     orgnr = 0;
6256     while (*nrline != NUL)
6257     {
6258 	/* The wordnr was stored in a minimal nr of bytes as an offset to the
6259 	 * previous wordnr. */
6260 	orgnr += bytes2offset(&nrline);
6261 
6262 	byts = slang->sl_fbyts;
6263 	idxs = slang->sl_fidxs;
6264 
6265 	/* Lookup the word "orgnr" one of the two tries. */
6266 	n = 0;
6267 	wordcount = 0;
6268 	for (wlen = 0; wlen < MAXWLEN - 3; ++wlen)
6269 	{
6270 	    i = 1;
6271 	    if (wordcount == orgnr && byts[n + 1] == NUL)
6272 		break;	/* found end of word */
6273 
6274 	    if (byts[n + 1] == NUL)
6275 		++wordcount;
6276 
6277 	    /* skip over the NUL bytes */
6278 	    for ( ; byts[n + i] == NUL; ++i)
6279 		if (i > byts[n])	/* safety check */
6280 		{
6281 		    STRCPY(theword + wlen, "BAD");
6282 		    wlen += 3;
6283 		    goto badword;
6284 		}
6285 
6286 	    /* One of the siblings must have the word. */
6287 	    for ( ; i < byts[n]; ++i)
6288 	    {
6289 		wc = idxs[idxs[n + i]];	/* nr of words under this byte */
6290 		if (wordcount + wc > orgnr)
6291 		    break;
6292 		wordcount += wc;
6293 	    }
6294 
6295 	    theword[wlen] = byts[n + i];
6296 	    n = idxs[n + i];
6297 	}
6298 badword:
6299 	theword[wlen] = NUL;
6300 
6301 	/* Go over the possible flags and regions. */
6302 	for (; i <= byts[n] && byts[n + i] == NUL; ++i)
6303 	{
6304 	    char_u	cword[MAXWLEN];
6305 	    char_u	*p;
6306 	    int		flags = (int)idxs[n + i];
6307 
6308 	    /* Skip words with the NOSUGGEST flag */
6309 	    if (flags & WF_NOSUGGEST)
6310 		continue;
6311 
6312 	    if (flags & WF_KEEPCAP)
6313 	    {
6314 		/* Must find the word in the keep-case tree. */
6315 		find_keepcap_word(slang, theword, cword);
6316 		p = cword;
6317 	    }
6318 	    else
6319 	    {
6320 		flags |= su->su_badflags;
6321 		if ((flags & WF_CAPMASK) != 0)
6322 		{
6323 		    /* Need to fix case according to "flags". */
6324 		    make_case_word(theword, cword, flags);
6325 		    p = cword;
6326 		}
6327 		else
6328 		    p = theword;
6329 	    }
6330 
6331 	    /* Add the suggestion. */
6332 	    if (sps_flags & SPS_DOUBLE)
6333 	    {
6334 		/* Add the suggestion if the score isn't too bad. */
6335 		if (score <= su->su_maxscore)
6336 		    add_suggestion(su, &su->su_sga, p, su->su_badlen,
6337 					       score, 0, FALSE, slang, FALSE);
6338 	    }
6339 	    else
6340 	    {
6341 		/* Add a penalty for words in another region. */
6342 		if ((flags & WF_REGION)
6343 			    && (((unsigned)flags >> 16) & lp->lp_region) == 0)
6344 		    goodscore = SCORE_REGION;
6345 		else
6346 		    goodscore = 0;
6347 
6348 		/* Add a small penalty for changing the first letter from
6349 		 * lower to upper case.  Helps for "tath" -> "Kath", which is
6350 		 * less common than "tath" -> "path".  Don't do it when the
6351 		 * letter is the same, that has already been counted. */
6352 		gc = PTR2CHAR(p);
6353 		if (SPELL_ISUPPER(gc))
6354 		{
6355 		    bc = PTR2CHAR(su->su_badword);
6356 		    if (!SPELL_ISUPPER(bc)
6357 				      && SPELL_TOFOLD(bc) != SPELL_TOFOLD(gc))
6358 			goodscore += SCORE_ICASE / 2;
6359 		}
6360 
6361 		/* Compute the score for the good word.  This only does letter
6362 		 * insert/delete/swap/replace.  REP items are not considered,
6363 		 * which may make the score a bit higher.
6364 		 * Use a limit for the score to make it work faster.  Use
6365 		 * MAXSCORE(), because RESCORE() will change the score.
6366 		 * If the limit is very high then the iterative method is
6367 		 * inefficient, using an array is quicker. */
6368 		limit = MAXSCORE(su->su_sfmaxscore - goodscore, score);
6369 		if (limit > SCORE_LIMITMAX)
6370 		    goodscore += spell_edit_score(slang, su->su_badword, p);
6371 		else
6372 		    goodscore += spell_edit_score_limit(slang, su->su_badword,
6373 								    p, limit);
6374 
6375 		/* When going over the limit don't bother to do the rest. */
6376 		if (goodscore < SCORE_MAXMAX)
6377 		{
6378 		    /* Give a bonus to words seen before. */
6379 		    goodscore = score_wordcount_adj(slang, goodscore, p, FALSE);
6380 
6381 		    /* Add the suggestion if the score isn't too bad. */
6382 		    goodscore = RESCORE(goodscore, score);
6383 		    if (goodscore <= su->su_sfmaxscore)
6384 			add_suggestion(su, &su->su_ga, p, su->su_badlen,
6385 					 goodscore, score, TRUE, slang, TRUE);
6386 		}
6387 	    }
6388 	}
6389 	/* smsg("word %s (%d): %s (%d)", sftword, sftnr, theword, orgnr); */
6390     }
6391 }
6392 
6393 /*
6394  * Find word "word" in fold-case tree for "slang" and return the word number.
6395  */
6396     static int
6397 soundfold_find(slang_T *slang, char_u *word)
6398 {
6399     idx_T	arridx = 0;
6400     int		len;
6401     int		wlen = 0;
6402     int		c;
6403     char_u	*ptr = word;
6404     char_u	*byts;
6405     idx_T	*idxs;
6406     int		wordnr = 0;
6407 
6408     byts = slang->sl_sbyts;
6409     idxs = slang->sl_sidxs;
6410 
6411     for (;;)
6412     {
6413 	/* First byte is the number of possible bytes. */
6414 	len = byts[arridx++];
6415 
6416 	/* If the first possible byte is a zero the word could end here.
6417 	 * If the word ends we found the word.  If not skip the NUL bytes. */
6418 	c = ptr[wlen];
6419 	if (byts[arridx] == NUL)
6420 	{
6421 	    if (c == NUL)
6422 		break;
6423 
6424 	    /* Skip over the zeros, there can be several. */
6425 	    while (len > 0 && byts[arridx] == NUL)
6426 	    {
6427 		++arridx;
6428 		--len;
6429 	    }
6430 	    if (len == 0)
6431 		return -1;    /* no children, word should have ended here */
6432 	    ++wordnr;
6433 	}
6434 
6435 	/* If the word ends we didn't find it. */
6436 	if (c == NUL)
6437 	    return -1;
6438 
6439 	/* Perform a binary search in the list of accepted bytes. */
6440 	if (c == TAB)	    /* <Tab> is handled like <Space> */
6441 	    c = ' ';
6442 	while (byts[arridx] < c)
6443 	{
6444 	    /* The word count is in the first idxs[] entry of the child. */
6445 	    wordnr += idxs[idxs[arridx]];
6446 	    ++arridx;
6447 	    if (--len == 0)	/* end of the bytes, didn't find it */
6448 		return -1;
6449 	}
6450 	if (byts[arridx] != c)	/* didn't find the byte */
6451 	    return -1;
6452 
6453 	/* Continue at the child (if there is one). */
6454 	arridx = idxs[arridx];
6455 	++wlen;
6456 
6457 	/* One space in the good word may stand for several spaces in the
6458 	 * checked word. */
6459 	if (c == ' ')
6460 	    while (ptr[wlen] == ' ' || ptr[wlen] == TAB)
6461 		++wlen;
6462     }
6463 
6464     return wordnr;
6465 }
6466 
6467 /*
6468  * Copy "fword" to "cword", fixing case according to "flags".
6469  */
6470     static void
6471 make_case_word(char_u *fword, char_u *cword, int flags)
6472 {
6473     if (flags & WF_ALLCAP)
6474 	/* Make it all upper-case */
6475 	allcap_copy(fword, cword);
6476     else if (flags & WF_ONECAP)
6477 	/* Make the first letter upper-case */
6478 	onecap_copy(fword, cword, TRUE);
6479     else
6480 	/* Use goodword as-is. */
6481 	STRCPY(cword, fword);
6482 }
6483 
6484 
6485 /*
6486  * Return TRUE if "c1" and "c2" are similar characters according to the MAP
6487  * lines in the .aff file.
6488  */
6489     static int
6490 similar_chars(slang_T *slang, int c1, int c2)
6491 {
6492     int		m1, m2;
6493     char_u	buf[MB_MAXBYTES + 1];
6494     hashitem_T  *hi;
6495 
6496     if (c1 >= 256)
6497     {
6498 	buf[mb_char2bytes(c1, buf)] = 0;
6499 	hi = hash_find(&slang->sl_map_hash, buf);
6500 	if (HASHITEM_EMPTY(hi))
6501 	    m1 = 0;
6502 	else
6503 	    m1 = mb_ptr2char(hi->hi_key + STRLEN(hi->hi_key) + 1);
6504     }
6505     else
6506 	m1 = slang->sl_map_array[c1];
6507     if (m1 == 0)
6508 	return FALSE;
6509 
6510 
6511     if (c2 >= 256)
6512     {
6513 	buf[mb_char2bytes(c2, buf)] = 0;
6514 	hi = hash_find(&slang->sl_map_hash, buf);
6515 	if (HASHITEM_EMPTY(hi))
6516 	    m2 = 0;
6517 	else
6518 	    m2 = mb_ptr2char(hi->hi_key + STRLEN(hi->hi_key) + 1);
6519     }
6520     else
6521 	m2 = slang->sl_map_array[c2];
6522 
6523     return m1 == m2;
6524 }
6525 
6526 /*
6527  * Add a suggestion to the list of suggestions.
6528  * For a suggestion that is already in the list the lowest score is remembered.
6529  */
6530     static void
6531 add_suggestion(
6532     suginfo_T	*su,
6533     garray_T	*gap,		/* either su_ga or su_sga */
6534     char_u	*goodword,
6535     int		badlenarg,	/* len of bad word replaced with "goodword" */
6536     int		score,
6537     int		altscore,
6538     int		had_bonus,	/* value for st_had_bonus */
6539     slang_T	*slang,		/* language for sound folding */
6540     int		maxsf)		/* su_maxscore applies to soundfold score,
6541 				   su_sfmaxscore to the total score. */
6542 {
6543     int		goodlen;	/* len of goodword changed */
6544     int		badlen;		/* len of bad word changed */
6545     suggest_T   *stp;
6546     suggest_T   new_sug;
6547     int		i;
6548     char_u	*pgood, *pbad;
6549 
6550     /* Minimize "badlen" for consistency.  Avoids that changing "the the" to
6551      * "thee the" is added next to changing the first "the" the "thee".  */
6552     pgood = goodword + STRLEN(goodword);
6553     pbad = su->su_badptr + badlenarg;
6554     for (;;)
6555     {
6556 	goodlen = (int)(pgood - goodword);
6557 	badlen = (int)(pbad - su->su_badptr);
6558 	if (goodlen <= 0 || badlen <= 0)
6559 	    break;
6560 	MB_PTR_BACK(goodword, pgood);
6561 	MB_PTR_BACK(su->su_badptr, pbad);
6562 	if (has_mbyte)
6563 	{
6564 	    if (mb_ptr2char(pgood) != mb_ptr2char(pbad))
6565 		break;
6566 	}
6567 	else if (*pgood != *pbad)
6568 		break;
6569     }
6570 
6571     if (badlen == 0 && goodlen == 0)
6572 	/* goodword doesn't change anything; may happen for "the the" changing
6573 	 * the first "the" to itself. */
6574 	return;
6575 
6576     if (gap->ga_len == 0)
6577 	i = -1;
6578     else
6579     {
6580 	/* Check if the word is already there.  Also check the length that is
6581 	 * being replaced "thes," -> "these" is a different suggestion from
6582 	 * "thes" -> "these". */
6583 	stp = &SUG(*gap, 0);
6584 	for (i = gap->ga_len; --i >= 0; ++stp)
6585 	    if (stp->st_wordlen == goodlen
6586 		    && stp->st_orglen == badlen
6587 		    && STRNCMP(stp->st_word, goodword, goodlen) == 0)
6588 	    {
6589 		/*
6590 		 * Found it.  Remember the word with the lowest score.
6591 		 */
6592 		if (stp->st_slang == NULL)
6593 		    stp->st_slang = slang;
6594 
6595 		new_sug.st_score = score;
6596 		new_sug.st_altscore = altscore;
6597 		new_sug.st_had_bonus = had_bonus;
6598 
6599 		if (stp->st_had_bonus != had_bonus)
6600 		{
6601 		    /* Only one of the two had the soundalike score computed.
6602 		     * Need to do that for the other one now, otherwise the
6603 		     * scores can't be compared.  This happens because
6604 		     * suggest_try_change() doesn't compute the soundalike
6605 		     * word to keep it fast, while some special methods set
6606 		     * the soundalike score to zero. */
6607 		    if (had_bonus)
6608 			rescore_one(su, stp);
6609 		    else
6610 		    {
6611 			new_sug.st_word = stp->st_word;
6612 			new_sug.st_wordlen = stp->st_wordlen;
6613 			new_sug.st_slang = stp->st_slang;
6614 			new_sug.st_orglen = badlen;
6615 			rescore_one(su, &new_sug);
6616 		    }
6617 		}
6618 
6619 		if (stp->st_score > new_sug.st_score)
6620 		{
6621 		    stp->st_score = new_sug.st_score;
6622 		    stp->st_altscore = new_sug.st_altscore;
6623 		    stp->st_had_bonus = new_sug.st_had_bonus;
6624 		}
6625 		break;
6626 	    }
6627     }
6628 
6629     if (i < 0 && ga_grow(gap, 1) == OK)
6630     {
6631 	/* Add a suggestion. */
6632 	stp = &SUG(*gap, gap->ga_len);
6633 	stp->st_word = vim_strnsave(goodword, goodlen);
6634 	if (stp->st_word != NULL)
6635 	{
6636 	    stp->st_wordlen = goodlen;
6637 	    stp->st_score = score;
6638 	    stp->st_altscore = altscore;
6639 	    stp->st_had_bonus = had_bonus;
6640 	    stp->st_orglen = badlen;
6641 	    stp->st_slang = slang;
6642 	    ++gap->ga_len;
6643 
6644 	    /* If we have too many suggestions now, sort the list and keep
6645 	     * the best suggestions. */
6646 	    if (gap->ga_len > SUG_MAX_COUNT(su))
6647 	    {
6648 		if (maxsf)
6649 		    su->su_sfmaxscore = cleanup_suggestions(gap,
6650 				      su->su_sfmaxscore, SUG_CLEAN_COUNT(su));
6651 		else
6652 		    su->su_maxscore = cleanup_suggestions(gap,
6653 					su->su_maxscore, SUG_CLEAN_COUNT(su));
6654 	    }
6655 	}
6656     }
6657 }
6658 
6659 /*
6660  * Suggestions may in fact be flagged as errors.  Esp. for banned words and
6661  * for split words, such as "the the".  Remove these from the list here.
6662  */
6663     static void
6664 check_suggestions(
6665     suginfo_T	*su,
6666     garray_T	*gap)		    /* either su_ga or su_sga */
6667 {
6668     suggest_T   *stp;
6669     int		i;
6670     char_u	longword[MAXWLEN + 1];
6671     int		len;
6672     hlf_T	attr;
6673 
6674     stp = &SUG(*gap, 0);
6675     for (i = gap->ga_len - 1; i >= 0; --i)
6676     {
6677 	/* Need to append what follows to check for "the the". */
6678 	vim_strncpy(longword, stp[i].st_word, MAXWLEN);
6679 	len = stp[i].st_wordlen;
6680 	vim_strncpy(longword + len, su->su_badptr + stp[i].st_orglen,
6681 							       MAXWLEN - len);
6682 	attr = HLF_COUNT;
6683 	(void)spell_check(curwin, longword, &attr, NULL, FALSE);
6684 	if (attr != HLF_COUNT)
6685 	{
6686 	    /* Remove this entry. */
6687 	    vim_free(stp[i].st_word);
6688 	    --gap->ga_len;
6689 	    if (i < gap->ga_len)
6690 		mch_memmove(stp + i, stp + i + 1,
6691 				       sizeof(suggest_T) * (gap->ga_len - i));
6692 	}
6693     }
6694 }
6695 
6696 
6697 /*
6698  * Add a word to be banned.
6699  */
6700     static void
6701 add_banned(
6702     suginfo_T	*su,
6703     char_u	*word)
6704 {
6705     char_u	*s;
6706     hash_T	hash;
6707     hashitem_T	*hi;
6708 
6709     hash = hash_hash(word);
6710     hi = hash_lookup(&su->su_banned, word, hash);
6711     if (HASHITEM_EMPTY(hi))
6712     {
6713 	s = vim_strsave(word);
6714 	if (s != NULL)
6715 	    hash_add_item(&su->su_banned, hi, s, hash);
6716     }
6717 }
6718 
6719 /*
6720  * Recompute the score for all suggestions if sound-folding is possible.  This
6721  * is slow, thus only done for the final results.
6722  */
6723     static void
6724 rescore_suggestions(suginfo_T *su)
6725 {
6726     int		i;
6727 
6728     if (su->su_sallang != NULL)
6729 	for (i = 0; i < su->su_ga.ga_len; ++i)
6730 	    rescore_one(su, &SUG(su->su_ga, i));
6731 }
6732 
6733 /*
6734  * Recompute the score for one suggestion if sound-folding is possible.
6735  */
6736     static void
6737 rescore_one(suginfo_T *su, suggest_T *stp)
6738 {
6739     slang_T	*slang = stp->st_slang;
6740     char_u	sal_badword[MAXWLEN];
6741     char_u	*p;
6742 
6743     /* Only rescore suggestions that have no sal score yet and do have a
6744      * language. */
6745     if (slang != NULL && slang->sl_sal.ga_len > 0 && !stp->st_had_bonus)
6746     {
6747 	if (slang == su->su_sallang)
6748 	    p = su->su_sal_badword;
6749 	else
6750 	{
6751 	    spell_soundfold(slang, su->su_fbadword, TRUE, sal_badword);
6752 	    p = sal_badword;
6753 	}
6754 
6755 	stp->st_altscore = stp_sal_score(stp, su, slang, p);
6756 	if (stp->st_altscore == SCORE_MAXMAX)
6757 	    stp->st_altscore = SCORE_BIG;
6758 	stp->st_score = RESCORE(stp->st_score, stp->st_altscore);
6759 	stp->st_had_bonus = TRUE;
6760     }
6761 }
6762 
6763 static int sug_compare(const void *s1, const void *s2);
6764 
6765 /*
6766  * Function given to qsort() to sort the suggestions on st_score.
6767  * First on "st_score", then "st_altscore" then alphabetically.
6768  */
6769     static int
6770 sug_compare(const void *s1, const void *s2)
6771 {
6772     suggest_T	*p1 = (suggest_T *)s1;
6773     suggest_T	*p2 = (suggest_T *)s2;
6774     int		n = p1->st_score - p2->st_score;
6775 
6776     if (n == 0)
6777     {
6778 	n = p1->st_altscore - p2->st_altscore;
6779 	if (n == 0)
6780 	    n = STRICMP(p1->st_word, p2->st_word);
6781     }
6782     return n;
6783 }
6784 
6785 /*
6786  * Cleanup the suggestions:
6787  * - Sort on score.
6788  * - Remove words that won't be displayed.
6789  * Returns the maximum score in the list or "maxscore" unmodified.
6790  */
6791     static int
6792 cleanup_suggestions(
6793     garray_T	*gap,
6794     int		maxscore,
6795     int		keep)		/* nr of suggestions to keep */
6796 {
6797     suggest_T   *stp = &SUG(*gap, 0);
6798     int		i;
6799 
6800     /* Sort the list. */
6801     qsort(gap->ga_data, (size_t)gap->ga_len, sizeof(suggest_T), sug_compare);
6802 
6803     /* Truncate the list to the number of suggestions that will be displayed. */
6804     if (gap->ga_len > keep)
6805     {
6806 	for (i = keep; i < gap->ga_len; ++i)
6807 	    vim_free(stp[i].st_word);
6808 	gap->ga_len = keep;
6809 	return stp[keep - 1].st_score;
6810     }
6811     return maxscore;
6812 }
6813 
6814 #if defined(FEAT_EVAL) || defined(PROTO)
6815 /*
6816  * Soundfold a string, for soundfold().
6817  * Result is in allocated memory, NULL for an error.
6818  */
6819     char_u *
6820 eval_soundfold(char_u *word)
6821 {
6822     langp_T	*lp;
6823     char_u	sound[MAXWLEN];
6824     int		lpi;
6825 
6826     if (curwin->w_p_spell && *curwin->w_s->b_p_spl != NUL)
6827 	/* Use the sound-folding of the first language that supports it. */
6828 	for (lpi = 0; lpi < curwin->w_s->b_langp.ga_len; ++lpi)
6829 	{
6830 	    lp = LANGP_ENTRY(curwin->w_s->b_langp, lpi);
6831 	    if (lp->lp_slang->sl_sal.ga_len > 0)
6832 	    {
6833 		/* soundfold the word */
6834 		spell_soundfold(lp->lp_slang, word, FALSE, sound);
6835 		return vim_strsave(sound);
6836 	    }
6837 	}
6838 
6839     /* No language with sound folding, return word as-is. */
6840     return vim_strsave(word);
6841 }
6842 #endif
6843 
6844 /*
6845  * Turn "inword" into its sound-a-like equivalent in "res[MAXWLEN]".
6846  *
6847  * There are many ways to turn a word into a sound-a-like representation.  The
6848  * oldest is Soundex (1918!).   A nice overview can be found in "Approximate
6849  * swedish name matching - survey and test of different algorithms" by Klas
6850  * Erikson.
6851  *
6852  * We support two methods:
6853  * 1. SOFOFROM/SOFOTO do a simple character mapping.
6854  * 2. SAL items define a more advanced sound-folding (and much slower).
6855  */
6856     void
6857 spell_soundfold(
6858     slang_T	*slang,
6859     char_u	*inword,
6860     int		folded,	    /* "inword" is already case-folded */
6861     char_u	*res)
6862 {
6863     char_u	fword[MAXWLEN];
6864     char_u	*word;
6865 
6866     if (slang->sl_sofo)
6867 	/* SOFOFROM and SOFOTO used */
6868 	spell_soundfold_sofo(slang, inword, res);
6869     else
6870     {
6871 	/* SAL items used.  Requires the word to be case-folded. */
6872 	if (folded)
6873 	    word = inword;
6874 	else
6875 	{
6876 	    (void)spell_casefold(inword, (int)STRLEN(inword), fword, MAXWLEN);
6877 	    word = fword;
6878 	}
6879 
6880 	if (has_mbyte)
6881 	    spell_soundfold_wsal(slang, word, res);
6882 	else
6883 	    spell_soundfold_sal(slang, word, res);
6884     }
6885 }
6886 
6887 /*
6888  * Perform sound folding of "inword" into "res" according to SOFOFROM and
6889  * SOFOTO lines.
6890  */
6891     static void
6892 spell_soundfold_sofo(slang_T *slang, char_u *inword, char_u *res)
6893 {
6894     char_u	*s;
6895     int		ri = 0;
6896     int		c;
6897 
6898     if (has_mbyte)
6899     {
6900 	int	prevc = 0;
6901 	int	*ip;
6902 
6903 	/* The sl_sal_first[] table contains the translation for chars up to
6904 	 * 255, sl_sal the rest. */
6905 	for (s = inword; *s != NUL; )
6906 	{
6907 	    c = mb_cptr2char_adv(&s);
6908 	    if (enc_utf8 ? utf_class(c) == 0 : VIM_ISWHITE(c))
6909 		c = ' ';
6910 	    else if (c < 256)
6911 		c = slang->sl_sal_first[c];
6912 	    else
6913 	    {
6914 		ip = ((int **)slang->sl_sal.ga_data)[c & 0xff];
6915 		if (ip == NULL)		/* empty list, can't match */
6916 		    c = NUL;
6917 		else
6918 		    for (;;)		/* find "c" in the list */
6919 		    {
6920 			if (*ip == 0)	/* not found */
6921 			{
6922 			    c = NUL;
6923 			    break;
6924 			}
6925 			if (*ip == c)	/* match! */
6926 			{
6927 			    c = ip[1];
6928 			    break;
6929 			}
6930 			ip += 2;
6931 		    }
6932 	    }
6933 
6934 	    if (c != NUL && c != prevc)
6935 	    {
6936 		ri += mb_char2bytes(c, res + ri);
6937 		if (ri + MB_MAXBYTES > MAXWLEN)
6938 		    break;
6939 		prevc = c;
6940 	    }
6941 	}
6942     }
6943     else
6944     {
6945 	/* The sl_sal_first[] table contains the translation. */
6946 	for (s = inword; (c = *s) != NUL; ++s)
6947 	{
6948 	    if (VIM_ISWHITE(c))
6949 		c = ' ';
6950 	    else
6951 		c = slang->sl_sal_first[c];
6952 	    if (c != NUL && (ri == 0 || res[ri - 1] != c))
6953 		res[ri++] = c;
6954 	}
6955     }
6956 
6957     res[ri] = NUL;
6958 }
6959 
6960     static void
6961 spell_soundfold_sal(slang_T *slang, char_u *inword, char_u *res)
6962 {
6963     salitem_T	*smp;
6964     char_u	word[MAXWLEN];
6965     char_u	*s = inword;
6966     char_u	*t;
6967     char_u	*pf;
6968     int		i, j, z;
6969     int		reslen;
6970     int		n, k = 0;
6971     int		z0;
6972     int		k0;
6973     int		n0;
6974     int		c;
6975     int		pri;
6976     int		p0 = -333;
6977     int		c0;
6978 
6979     /* Remove accents, if wanted.  We actually remove all non-word characters.
6980      * But keep white space.  We need a copy, the word may be changed here. */
6981     if (slang->sl_rem_accents)
6982     {
6983 	t = word;
6984 	while (*s != NUL)
6985 	{
6986 	    if (VIM_ISWHITE(*s))
6987 	    {
6988 		*t++ = ' ';
6989 		s = skipwhite(s);
6990 	    }
6991 	    else
6992 	    {
6993 		if (spell_iswordp_nmw(s, curwin))
6994 		    *t++ = *s;
6995 		++s;
6996 	    }
6997 	}
6998 	*t = NUL;
6999     }
7000     else
7001 	vim_strncpy(word, s, MAXWLEN - 1);
7002 
7003     smp = (salitem_T *)slang->sl_sal.ga_data;
7004 
7005     /*
7006      * This comes from Aspell phonet.cpp.  Converted from C++ to C.
7007      * Changed to keep spaces.
7008      */
7009     i = reslen = z = 0;
7010     while ((c = word[i]) != NUL)
7011     {
7012 	/* Start with the first rule that has the character in the word. */
7013 	n = slang->sl_sal_first[c];
7014 	z0 = 0;
7015 
7016 	if (n >= 0)
7017 	{
7018 	    /* check all rules for the same letter */
7019 	    for (; (s = smp[n].sm_lead)[0] == c; ++n)
7020 	    {
7021 		/* Quickly skip entries that don't match the word.  Most
7022 		 * entries are less then three chars, optimize for that. */
7023 		k = smp[n].sm_leadlen;
7024 		if (k > 1)
7025 		{
7026 		    if (word[i + 1] != s[1])
7027 			continue;
7028 		    if (k > 2)
7029 		    {
7030 			for (j = 2; j < k; ++j)
7031 			    if (word[i + j] != s[j])
7032 				break;
7033 			if (j < k)
7034 			    continue;
7035 		    }
7036 		}
7037 
7038 		if ((pf = smp[n].sm_oneof) != NULL)
7039 		{
7040 		    /* Check for match with one of the chars in "sm_oneof". */
7041 		    while (*pf != NUL && *pf != word[i + k])
7042 			++pf;
7043 		    if (*pf == NUL)
7044 			continue;
7045 		    ++k;
7046 		}
7047 		s = smp[n].sm_rules;
7048 		pri = 5;    /* default priority */
7049 
7050 		p0 = *s;
7051 		k0 = k;
7052 		while (*s == '-' && k > 1)
7053 		{
7054 		    k--;
7055 		    s++;
7056 		}
7057 		if (*s == '<')
7058 		    s++;
7059 		if (VIM_ISDIGIT(*s))
7060 		{
7061 		    /* determine priority */
7062 		    pri = *s - '0';
7063 		    s++;
7064 		}
7065 		if (*s == '^' && *(s + 1) == '^')
7066 		    s++;
7067 
7068 		if (*s == NUL
7069 			|| (*s == '^'
7070 			    && (i == 0 || !(word[i - 1] == ' '
7071 				      || spell_iswordp(word + i - 1, curwin)))
7072 			    && (*(s + 1) != '$'
7073 				|| (!spell_iswordp(word + i + k0, curwin))))
7074 			|| (*s == '$' && i > 0
7075 			    && spell_iswordp(word + i - 1, curwin)
7076 			    && (!spell_iswordp(word + i + k0, curwin))))
7077 		{
7078 		    /* search for followup rules, if:    */
7079 		    /* followup and k > 1  and  NO '-' in searchstring */
7080 		    c0 = word[i + k - 1];
7081 		    n0 = slang->sl_sal_first[c0];
7082 
7083 		    if (slang->sl_followup && k > 1 && n0 >= 0
7084 					   && p0 != '-' && word[i + k] != NUL)
7085 		    {
7086 			/* test follow-up rule for "word[i + k]" */
7087 			for ( ; (s = smp[n0].sm_lead)[0] == c0; ++n0)
7088 			{
7089 			    /* Quickly skip entries that don't match the word.
7090 			     * */
7091 			    k0 = smp[n0].sm_leadlen;
7092 			    if (k0 > 1)
7093 			    {
7094 				if (word[i + k] != s[1])
7095 				    continue;
7096 				if (k0 > 2)
7097 				{
7098 				    pf = word + i + k + 1;
7099 				    for (j = 2; j < k0; ++j)
7100 					if (*pf++ != s[j])
7101 					    break;
7102 				    if (j < k0)
7103 					continue;
7104 				}
7105 			    }
7106 			    k0 += k - 1;
7107 
7108 			    if ((pf = smp[n0].sm_oneof) != NULL)
7109 			    {
7110 				/* Check for match with one of the chars in
7111 				 * "sm_oneof". */
7112 				while (*pf != NUL && *pf != word[i + k0])
7113 				    ++pf;
7114 				if (*pf == NUL)
7115 				    continue;
7116 				++k0;
7117 			    }
7118 
7119 			    p0 = 5;
7120 			    s = smp[n0].sm_rules;
7121 			    while (*s == '-')
7122 			    {
7123 				/* "k0" gets NOT reduced because
7124 				 * "if (k0 == k)" */
7125 				s++;
7126 			    }
7127 			    if (*s == '<')
7128 				s++;
7129 			    if (VIM_ISDIGIT(*s))
7130 			    {
7131 				p0 = *s - '0';
7132 				s++;
7133 			    }
7134 
7135 			    if (*s == NUL
7136 				    /* *s == '^' cuts */
7137 				    || (*s == '$'
7138 					    && !spell_iswordp(word + i + k0,
7139 								     curwin)))
7140 			    {
7141 				if (k0 == k)
7142 				    /* this is just a piece of the string */
7143 				    continue;
7144 
7145 				if (p0 < pri)
7146 				    /* priority too low */
7147 				    continue;
7148 				/* rule fits; stop search */
7149 				break;
7150 			    }
7151 			}
7152 
7153 			if (p0 >= pri && smp[n0].sm_lead[0] == c0)
7154 			    continue;
7155 		    }
7156 
7157 		    /* replace string */
7158 		    s = smp[n].sm_to;
7159 		    if (s == NULL)
7160 			s = (char_u *)"";
7161 		    pf = smp[n].sm_rules;
7162 		    p0 = (vim_strchr(pf, '<') != NULL) ? 1 : 0;
7163 		    if (p0 == 1 && z == 0)
7164 		    {
7165 			/* rule with '<' is used */
7166 			if (reslen > 0 && *s != NUL && (res[reslen - 1] == c
7167 						    || res[reslen - 1] == *s))
7168 			    reslen--;
7169 			z0 = 1;
7170 			z = 1;
7171 			k0 = 0;
7172 			while (*s != NUL && word[i + k0] != NUL)
7173 			{
7174 			    word[i + k0] = *s;
7175 			    k0++;
7176 			    s++;
7177 			}
7178 			if (k > k0)
7179 			    STRMOVE(word + i + k0, word + i + k);
7180 
7181 			/* new "actual letter" */
7182 			c = word[i];
7183 		    }
7184 		    else
7185 		    {
7186 			/* no '<' rule used */
7187 			i += k - 1;
7188 			z = 0;
7189 			while (*s != NUL && s[1] != NUL && reslen < MAXWLEN)
7190 			{
7191 			    if (reslen == 0 || res[reslen - 1] != *s)
7192 				res[reslen++] = *s;
7193 			    s++;
7194 			}
7195 			/* new "actual letter" */
7196 			c = *s;
7197 			if (strstr((char *)pf, "^^") != NULL)
7198 			{
7199 			    if (c != NUL)
7200 				res[reslen++] = c;
7201 			    STRMOVE(word, word + i + 1);
7202 			    i = 0;
7203 			    z0 = 1;
7204 			}
7205 		    }
7206 		    break;
7207 		}
7208 	    }
7209 	}
7210 	else if (VIM_ISWHITE(c))
7211 	{
7212 	    c = ' ';
7213 	    k = 1;
7214 	}
7215 
7216 	if (z0 == 0)
7217 	{
7218 	    if (k && !p0 && reslen < MAXWLEN && c != NUL
7219 		    && (!slang->sl_collapse || reslen == 0
7220 						     || res[reslen - 1] != c))
7221 		/* condense only double letters */
7222 		res[reslen++] = c;
7223 
7224 	    i++;
7225 	    z = 0;
7226 	    k = 0;
7227 	}
7228     }
7229 
7230     res[reslen] = NUL;
7231 }
7232 
7233 /*
7234  * Turn "inword" into its sound-a-like equivalent in "res[MAXWLEN]".
7235  * Multi-byte version of spell_soundfold().
7236  */
7237     static void
7238 spell_soundfold_wsal(slang_T *slang, char_u *inword, char_u *res)
7239 {
7240     salitem_T	*smp = (salitem_T *)slang->sl_sal.ga_data;
7241     int		word[MAXWLEN];
7242     int		wres[MAXWLEN];
7243     int		l;
7244     char_u	*s;
7245     int		*ws;
7246     char_u	*t;
7247     int		*pf;
7248     int		i, j, z;
7249     int		reslen;
7250     int		n, k = 0;
7251     int		z0;
7252     int		k0;
7253     int		n0;
7254     int		c;
7255     int		pri;
7256     int		p0 = -333;
7257     int		c0;
7258     int		did_white = FALSE;
7259     int		wordlen;
7260 
7261 
7262     /*
7263      * Convert the multi-byte string to a wide-character string.
7264      * Remove accents, if wanted.  We actually remove all non-word characters.
7265      * But keep white space.
7266      */
7267     wordlen = 0;
7268     for (s = inword; *s != NUL; )
7269     {
7270 	t = s;
7271 	c = mb_cptr2char_adv(&s);
7272 	if (slang->sl_rem_accents)
7273 	{
7274 	    if (enc_utf8 ? utf_class(c) == 0 : VIM_ISWHITE(c))
7275 	    {
7276 		if (did_white)
7277 		    continue;
7278 		c = ' ';
7279 		did_white = TRUE;
7280 	    }
7281 	    else
7282 	    {
7283 		did_white = FALSE;
7284 		if (!spell_iswordp_nmw(t, curwin))
7285 		    continue;
7286 	    }
7287 	}
7288 	word[wordlen++] = c;
7289     }
7290     word[wordlen] = NUL;
7291 
7292     /*
7293      * This algorithm comes from Aspell phonet.cpp.
7294      * Converted from C++ to C.  Added support for multi-byte chars.
7295      * Changed to keep spaces.
7296      */
7297     i = reslen = z = 0;
7298     while ((c = word[i]) != NUL)
7299     {
7300 	/* Start with the first rule that has the character in the word. */
7301 	n = slang->sl_sal_first[c & 0xff];
7302 	z0 = 0;
7303 
7304 	if (n >= 0)
7305 	{
7306 	    /* Check all rules for the same index byte.
7307 	     * If c is 0x300 need extra check for the end of the array, as
7308 	     * (c & 0xff) is NUL. */
7309 	    for (; ((ws = smp[n].sm_lead_w)[0] & 0xff) == (c & 0xff)
7310 							 && ws[0] != NUL; ++n)
7311 	    {
7312 		/* Quickly skip entries that don't match the word.  Most
7313 		 * entries are less then three chars, optimize for that. */
7314 		if (c != ws[0])
7315 		    continue;
7316 		k = smp[n].sm_leadlen;
7317 		if (k > 1)
7318 		{
7319 		    if (word[i + 1] != ws[1])
7320 			continue;
7321 		    if (k > 2)
7322 		    {
7323 			for (j = 2; j < k; ++j)
7324 			    if (word[i + j] != ws[j])
7325 				break;
7326 			if (j < k)
7327 			    continue;
7328 		    }
7329 		}
7330 
7331 		if ((pf = smp[n].sm_oneof_w) != NULL)
7332 		{
7333 		    /* Check for match with one of the chars in "sm_oneof". */
7334 		    while (*pf != NUL && *pf != word[i + k])
7335 			++pf;
7336 		    if (*pf == NUL)
7337 			continue;
7338 		    ++k;
7339 		}
7340 		s = smp[n].sm_rules;
7341 		pri = 5;    /* default priority */
7342 
7343 		p0 = *s;
7344 		k0 = k;
7345 		while (*s == '-' && k > 1)
7346 		{
7347 		    k--;
7348 		    s++;
7349 		}
7350 		if (*s == '<')
7351 		    s++;
7352 		if (VIM_ISDIGIT(*s))
7353 		{
7354 		    /* determine priority */
7355 		    pri = *s - '0';
7356 		    s++;
7357 		}
7358 		if (*s == '^' && *(s + 1) == '^')
7359 		    s++;
7360 
7361 		if (*s == NUL
7362 			|| (*s == '^'
7363 			    && (i == 0 || !(word[i - 1] == ' '
7364 				    || spell_iswordp_w(word + i - 1, curwin)))
7365 			    && (*(s + 1) != '$'
7366 				|| (!spell_iswordp_w(word + i + k0, curwin))))
7367 			|| (*s == '$' && i > 0
7368 			    && spell_iswordp_w(word + i - 1, curwin)
7369 			    && (!spell_iswordp_w(word + i + k0, curwin))))
7370 		{
7371 		    /* search for followup rules, if:    */
7372 		    /* followup and k > 1  and  NO '-' in searchstring */
7373 		    c0 = word[i + k - 1];
7374 		    n0 = slang->sl_sal_first[c0 & 0xff];
7375 
7376 		    if (slang->sl_followup && k > 1 && n0 >= 0
7377 					   && p0 != '-' && word[i + k] != NUL)
7378 		    {
7379 			/* Test follow-up rule for "word[i + k]"; loop over
7380 			 * all entries with the same index byte. */
7381 			for ( ; ((ws = smp[n0].sm_lead_w)[0] & 0xff)
7382 							 == (c0 & 0xff); ++n0)
7383 			{
7384 			    /* Quickly skip entries that don't match the word.
7385 			     */
7386 			    if (c0 != ws[0])
7387 				continue;
7388 			    k0 = smp[n0].sm_leadlen;
7389 			    if (k0 > 1)
7390 			    {
7391 				if (word[i + k] != ws[1])
7392 				    continue;
7393 				if (k0 > 2)
7394 				{
7395 				    pf = word + i + k + 1;
7396 				    for (j = 2; j < k0; ++j)
7397 					if (*pf++ != ws[j])
7398 					    break;
7399 				    if (j < k0)
7400 					continue;
7401 				}
7402 			    }
7403 			    k0 += k - 1;
7404 
7405 			    if ((pf = smp[n0].sm_oneof_w) != NULL)
7406 			    {
7407 				/* Check for match with one of the chars in
7408 				 * "sm_oneof". */
7409 				while (*pf != NUL && *pf != word[i + k0])
7410 				    ++pf;
7411 				if (*pf == NUL)
7412 				    continue;
7413 				++k0;
7414 			    }
7415 
7416 			    p0 = 5;
7417 			    s = smp[n0].sm_rules;
7418 			    while (*s == '-')
7419 			    {
7420 				/* "k0" gets NOT reduced because
7421 				 * "if (k0 == k)" */
7422 				s++;
7423 			    }
7424 			    if (*s == '<')
7425 				s++;
7426 			    if (VIM_ISDIGIT(*s))
7427 			    {
7428 				p0 = *s - '0';
7429 				s++;
7430 			    }
7431 
7432 			    if (*s == NUL
7433 				    /* *s == '^' cuts */
7434 				    || (*s == '$'
7435 					 && !spell_iswordp_w(word + i + k0,
7436 								     curwin)))
7437 			    {
7438 				if (k0 == k)
7439 				    /* this is just a piece of the string */
7440 				    continue;
7441 
7442 				if (p0 < pri)
7443 				    /* priority too low */
7444 				    continue;
7445 				/* rule fits; stop search */
7446 				break;
7447 			    }
7448 			}
7449 
7450 			if (p0 >= pri && (smp[n0].sm_lead_w[0] & 0xff)
7451 							       == (c0 & 0xff))
7452 			    continue;
7453 		    }
7454 
7455 		    /* replace string */
7456 		    ws = smp[n].sm_to_w;
7457 		    s = smp[n].sm_rules;
7458 		    p0 = (vim_strchr(s, '<') != NULL) ? 1 : 0;
7459 		    if (p0 == 1 && z == 0)
7460 		    {
7461 			/* rule with '<' is used */
7462 			if (reslen > 0 && ws != NULL && *ws != NUL
7463 				&& (wres[reslen - 1] == c
7464 						    || wres[reslen - 1] == *ws))
7465 			    reslen--;
7466 			z0 = 1;
7467 			z = 1;
7468 			k0 = 0;
7469 			if (ws != NULL)
7470 			    while (*ws != NUL && word[i + k0] != NUL)
7471 			    {
7472 				word[i + k0] = *ws;
7473 				k0++;
7474 				ws++;
7475 			    }
7476 			if (k > k0)
7477 			    mch_memmove(word + i + k0, word + i + k,
7478 				    sizeof(int) * (wordlen - (i + k) + 1));
7479 
7480 			/* new "actual letter" */
7481 			c = word[i];
7482 		    }
7483 		    else
7484 		    {
7485 			/* no '<' rule used */
7486 			i += k - 1;
7487 			z = 0;
7488 			if (ws != NULL)
7489 			    while (*ws != NUL && ws[1] != NUL
7490 							  && reslen < MAXWLEN)
7491 			    {
7492 				if (reslen == 0 || wres[reslen - 1] != *ws)
7493 				    wres[reslen++] = *ws;
7494 				ws++;
7495 			    }
7496 			/* new "actual letter" */
7497 			if (ws == NULL)
7498 			    c = NUL;
7499 			else
7500 			    c = *ws;
7501 			if (strstr((char *)s, "^^") != NULL)
7502 			{
7503 			    if (c != NUL)
7504 				wres[reslen++] = c;
7505 			    mch_memmove(word, word + i + 1,
7506 				       sizeof(int) * (wordlen - (i + 1) + 1));
7507 			    i = 0;
7508 			    z0 = 1;
7509 			}
7510 		    }
7511 		    break;
7512 		}
7513 	    }
7514 	}
7515 	else if (VIM_ISWHITE(c))
7516 	{
7517 	    c = ' ';
7518 	    k = 1;
7519 	}
7520 
7521 	if (z0 == 0)
7522 	{
7523 	    if (k && !p0 && reslen < MAXWLEN && c != NUL
7524 		    && (!slang->sl_collapse || reslen == 0
7525 						     || wres[reslen - 1] != c))
7526 		/* condense only double letters */
7527 		wres[reslen++] = c;
7528 
7529 	    i++;
7530 	    z = 0;
7531 	    k = 0;
7532 	}
7533     }
7534 
7535     /* Convert wide characters in "wres" to a multi-byte string in "res". */
7536     l = 0;
7537     for (n = 0; n < reslen; ++n)
7538     {
7539 	l += mb_char2bytes(wres[n], res + l);
7540 	if (l + MB_MAXBYTES > MAXWLEN)
7541 	    break;
7542     }
7543     res[l] = NUL;
7544 }
7545 
7546 /*
7547  * Compute a score for two sound-a-like words.
7548  * This permits up to two inserts/deletes/swaps/etc. to keep things fast.
7549  * Instead of a generic loop we write out the code.  That keeps it fast by
7550  * avoiding checks that will not be possible.
7551  */
7552     static int
7553 soundalike_score(
7554     char_u	*goodstart,	/* sound-folded good word */
7555     char_u	*badstart)	/* sound-folded bad word */
7556 {
7557     char_u	*goodsound = goodstart;
7558     char_u	*badsound = badstart;
7559     int		goodlen;
7560     int		badlen;
7561     int		n;
7562     char_u	*pl, *ps;
7563     char_u	*pl2, *ps2;
7564     int		score = 0;
7565 
7566     /* Adding/inserting "*" at the start (word starts with vowel) shouldn't be
7567      * counted so much, vowels halfway the word aren't counted at all. */
7568     if ((*badsound == '*' || *goodsound == '*') && *badsound != *goodsound)
7569     {
7570 	if ((badsound[0] == NUL && goodsound[1] == NUL)
7571 	    || (goodsound[0] == NUL && badsound[1] == NUL))
7572 	    /* changing word with vowel to word without a sound */
7573 	    return SCORE_DEL;
7574 	if (badsound[0] == NUL || goodsound[0] == NUL)
7575 	    /* more than two changes */
7576 	    return SCORE_MAXMAX;
7577 
7578 	if (badsound[1] == goodsound[1]
7579 		|| (badsound[1] != NUL
7580 		    && goodsound[1] != NUL
7581 		    && badsound[2] == goodsound[2]))
7582 	{
7583 	    /* handle like a substitute */
7584 	}
7585 	else
7586 	{
7587 	    score = 2 * SCORE_DEL / 3;
7588 	    if (*badsound == '*')
7589 		++badsound;
7590 	    else
7591 		++goodsound;
7592 	}
7593     }
7594 
7595     goodlen = (int)STRLEN(goodsound);
7596     badlen = (int)STRLEN(badsound);
7597 
7598     /* Return quickly if the lengths are too different to be fixed by two
7599      * changes. */
7600     n = goodlen - badlen;
7601     if (n < -2 || n > 2)
7602 	return SCORE_MAXMAX;
7603 
7604     if (n > 0)
7605     {
7606 	pl = goodsound;	    /* goodsound is longest */
7607 	ps = badsound;
7608     }
7609     else
7610     {
7611 	pl = badsound;	    /* badsound is longest */
7612 	ps = goodsound;
7613     }
7614 
7615     /* Skip over the identical part. */
7616     while (*pl == *ps && *pl != NUL)
7617     {
7618 	++pl;
7619 	++ps;
7620     }
7621 
7622     switch (n)
7623     {
7624 	case -2:
7625 	case 2:
7626 	    /*
7627 	     * Must delete two characters from "pl".
7628 	     */
7629 	    ++pl;	/* first delete */
7630 	    while (*pl == *ps)
7631 	    {
7632 		++pl;
7633 		++ps;
7634 	    }
7635 	    /* strings must be equal after second delete */
7636 	    if (STRCMP(pl + 1, ps) == 0)
7637 		return score + SCORE_DEL * 2;
7638 
7639 	    /* Failed to compare. */
7640 	    break;
7641 
7642 	case -1:
7643 	case 1:
7644 	    /*
7645 	     * Minimal one delete from "pl" required.
7646 	     */
7647 
7648 	    /* 1: delete */
7649 	    pl2 = pl + 1;
7650 	    ps2 = ps;
7651 	    while (*pl2 == *ps2)
7652 	    {
7653 		if (*pl2 == NUL)	/* reached the end */
7654 		    return score + SCORE_DEL;
7655 		++pl2;
7656 		++ps2;
7657 	    }
7658 
7659 	    /* 2: delete then swap, then rest must be equal */
7660 	    if (pl2[0] == ps2[1] && pl2[1] == ps2[0]
7661 					     && STRCMP(pl2 + 2, ps2 + 2) == 0)
7662 		return score + SCORE_DEL + SCORE_SWAP;
7663 
7664 	    /* 3: delete then substitute, then the rest must be equal */
7665 	    if (STRCMP(pl2 + 1, ps2 + 1) == 0)
7666 		return score + SCORE_DEL + SCORE_SUBST;
7667 
7668 	    /* 4: first swap then delete */
7669 	    if (pl[0] == ps[1] && pl[1] == ps[0])
7670 	    {
7671 		pl2 = pl + 2;	    /* swap, skip two chars */
7672 		ps2 = ps + 2;
7673 		while (*pl2 == *ps2)
7674 		{
7675 		    ++pl2;
7676 		    ++ps2;
7677 		}
7678 		/* delete a char and then strings must be equal */
7679 		if (STRCMP(pl2 + 1, ps2) == 0)
7680 		    return score + SCORE_SWAP + SCORE_DEL;
7681 	    }
7682 
7683 	    /* 5: first substitute then delete */
7684 	    pl2 = pl + 1;	    /* substitute, skip one char */
7685 	    ps2 = ps + 1;
7686 	    while (*pl2 == *ps2)
7687 	    {
7688 		++pl2;
7689 		++ps2;
7690 	    }
7691 	    /* delete a char and then strings must be equal */
7692 	    if (STRCMP(pl2 + 1, ps2) == 0)
7693 		return score + SCORE_SUBST + SCORE_DEL;
7694 
7695 	    /* Failed to compare. */
7696 	    break;
7697 
7698 	case 0:
7699 	    /*
7700 	     * Lengths are equal, thus changes must result in same length: An
7701 	     * insert is only possible in combination with a delete.
7702 	     * 1: check if for identical strings
7703 	     */
7704 	    if (*pl == NUL)
7705 		return score;
7706 
7707 	    /* 2: swap */
7708 	    if (pl[0] == ps[1] && pl[1] == ps[0])
7709 	    {
7710 		pl2 = pl + 2;	    /* swap, skip two chars */
7711 		ps2 = ps + 2;
7712 		while (*pl2 == *ps2)
7713 		{
7714 		    if (*pl2 == NUL)	/* reached the end */
7715 			return score + SCORE_SWAP;
7716 		    ++pl2;
7717 		    ++ps2;
7718 		}
7719 		/* 3: swap and swap again */
7720 		if (pl2[0] == ps2[1] && pl2[1] == ps2[0]
7721 					     && STRCMP(pl2 + 2, ps2 + 2) == 0)
7722 		    return score + SCORE_SWAP + SCORE_SWAP;
7723 
7724 		/* 4: swap and substitute */
7725 		if (STRCMP(pl2 + 1, ps2 + 1) == 0)
7726 		    return score + SCORE_SWAP + SCORE_SUBST;
7727 	    }
7728 
7729 	    /* 5: substitute */
7730 	    pl2 = pl + 1;
7731 	    ps2 = ps + 1;
7732 	    while (*pl2 == *ps2)
7733 	    {
7734 		if (*pl2 == NUL)	/* reached the end */
7735 		    return score + SCORE_SUBST;
7736 		++pl2;
7737 		++ps2;
7738 	    }
7739 
7740 	    /* 6: substitute and swap */
7741 	    if (pl2[0] == ps2[1] && pl2[1] == ps2[0]
7742 					     && STRCMP(pl2 + 2, ps2 + 2) == 0)
7743 		return score + SCORE_SUBST + SCORE_SWAP;
7744 
7745 	    /* 7: substitute and substitute */
7746 	    if (STRCMP(pl2 + 1, ps2 + 1) == 0)
7747 		return score + SCORE_SUBST + SCORE_SUBST;
7748 
7749 	    /* 8: insert then delete */
7750 	    pl2 = pl;
7751 	    ps2 = ps + 1;
7752 	    while (*pl2 == *ps2)
7753 	    {
7754 		++pl2;
7755 		++ps2;
7756 	    }
7757 	    if (STRCMP(pl2 + 1, ps2) == 0)
7758 		return score + SCORE_INS + SCORE_DEL;
7759 
7760 	    /* 9: delete then insert */
7761 	    pl2 = pl + 1;
7762 	    ps2 = ps;
7763 	    while (*pl2 == *ps2)
7764 	    {
7765 		++pl2;
7766 		++ps2;
7767 	    }
7768 	    if (STRCMP(pl2, ps2 + 1) == 0)
7769 		return score + SCORE_INS + SCORE_DEL;
7770 
7771 	    /* Failed to compare. */
7772 	    break;
7773     }
7774 
7775     return SCORE_MAXMAX;
7776 }
7777 
7778 /*
7779  * Compute the "edit distance" to turn "badword" into "goodword".  The less
7780  * deletes/inserts/substitutes/swaps are required the lower the score.
7781  *
7782  * The algorithm is described by Du and Chang, 1992.
7783  * The implementation of the algorithm comes from Aspell editdist.cpp,
7784  * edit_distance().  It has been converted from C++ to C and modified to
7785  * support multi-byte characters.
7786  */
7787     static int
7788 spell_edit_score(
7789     slang_T	*slang,
7790     char_u	*badword,
7791     char_u	*goodword)
7792 {
7793     int		*cnt;
7794     int		badlen, goodlen;	/* lengths including NUL */
7795     int		j, i;
7796     int		t;
7797     int		bc, gc;
7798     int		pbc, pgc;
7799     char_u	*p;
7800     int		wbadword[MAXWLEN];
7801     int		wgoodword[MAXWLEN];
7802 
7803     if (has_mbyte)
7804     {
7805 	/* Get the characters from the multi-byte strings and put them in an
7806 	 * int array for easy access. */
7807 	for (p = badword, badlen = 0; *p != NUL; )
7808 	    wbadword[badlen++] = mb_cptr2char_adv(&p);
7809 	wbadword[badlen++] = 0;
7810 	for (p = goodword, goodlen = 0; *p != NUL; )
7811 	    wgoodword[goodlen++] = mb_cptr2char_adv(&p);
7812 	wgoodword[goodlen++] = 0;
7813     }
7814     else
7815     {
7816 	badlen = (int)STRLEN(badword) + 1;
7817 	goodlen = (int)STRLEN(goodword) + 1;
7818     }
7819 
7820     /* We use "cnt" as an array: CNT(badword_idx, goodword_idx). */
7821 #define CNT(a, b)   cnt[(a) + (b) * (badlen + 1)]
7822     cnt = (int *)lalloc((long_u)(sizeof(int) * (badlen + 1) * (goodlen + 1)),
7823 									TRUE);
7824     if (cnt == NULL)
7825 	return 0;	/* out of memory */
7826 
7827     CNT(0, 0) = 0;
7828     for (j = 1; j <= goodlen; ++j)
7829 	CNT(0, j) = CNT(0, j - 1) + SCORE_INS;
7830 
7831     for (i = 1; i <= badlen; ++i)
7832     {
7833 	CNT(i, 0) = CNT(i - 1, 0) + SCORE_DEL;
7834 	for (j = 1; j <= goodlen; ++j)
7835 	{
7836 	    if (has_mbyte)
7837 	    {
7838 		bc = wbadword[i - 1];
7839 		gc = wgoodword[j - 1];
7840 	    }
7841 	    else
7842 	    {
7843 		bc = badword[i - 1];
7844 		gc = goodword[j - 1];
7845 	    }
7846 	    if (bc == gc)
7847 		CNT(i, j) = CNT(i - 1, j - 1);
7848 	    else
7849 	    {
7850 		/* Use a better score when there is only a case difference. */
7851 		if (SPELL_TOFOLD(bc) == SPELL_TOFOLD(gc))
7852 		    CNT(i, j) = SCORE_ICASE + CNT(i - 1, j - 1);
7853 		else
7854 		{
7855 		    /* For a similar character use SCORE_SIMILAR. */
7856 		    if (slang != NULL
7857 			    && slang->sl_has_map
7858 			    && similar_chars(slang, gc, bc))
7859 			CNT(i, j) = SCORE_SIMILAR + CNT(i - 1, j - 1);
7860 		    else
7861 			CNT(i, j) = SCORE_SUBST + CNT(i - 1, j - 1);
7862 		}
7863 
7864 		if (i > 1 && j > 1)
7865 		{
7866 		    if (has_mbyte)
7867 		    {
7868 			pbc = wbadword[i - 2];
7869 			pgc = wgoodword[j - 2];
7870 		    }
7871 		    else
7872 		    {
7873 			pbc = badword[i - 2];
7874 			pgc = goodword[j - 2];
7875 		    }
7876 		    if (bc == pgc && pbc == gc)
7877 		    {
7878 			t = SCORE_SWAP + CNT(i - 2, j - 2);
7879 			if (t < CNT(i, j))
7880 			    CNT(i, j) = t;
7881 		    }
7882 		}
7883 		t = SCORE_DEL + CNT(i - 1, j);
7884 		if (t < CNT(i, j))
7885 		    CNT(i, j) = t;
7886 		t = SCORE_INS + CNT(i, j - 1);
7887 		if (t < CNT(i, j))
7888 		    CNT(i, j) = t;
7889 	    }
7890 	}
7891     }
7892 
7893     i = CNT(badlen - 1, goodlen - 1);
7894     vim_free(cnt);
7895     return i;
7896 }
7897 
7898 typedef struct
7899 {
7900     int		badi;
7901     int		goodi;
7902     int		score;
7903 } limitscore_T;
7904 
7905 /*
7906  * Like spell_edit_score(), but with a limit on the score to make it faster.
7907  * May return SCORE_MAXMAX when the score is higher than "limit".
7908  *
7909  * This uses a stack for the edits still to be tried.
7910  * The idea comes from Aspell leditdist.cpp.  Rewritten in C and added support
7911  * for multi-byte characters.
7912  */
7913     static int
7914 spell_edit_score_limit(
7915     slang_T	*slang,
7916     char_u	*badword,
7917     char_u	*goodword,
7918     int		limit)
7919 {
7920     limitscore_T    stack[10];		/* allow for over 3 * 2 edits */
7921     int		    stackidx;
7922     int		    bi, gi;
7923     int		    bi2, gi2;
7924     int		    bc, gc;
7925     int		    score;
7926     int		    score_off;
7927     int		    minscore;
7928     int		    round;
7929 
7930     /* Multi-byte characters require a bit more work, use a different function
7931      * to avoid testing "has_mbyte" quite often. */
7932     if (has_mbyte)
7933 	return spell_edit_score_limit_w(slang, badword, goodword, limit);
7934 
7935     /*
7936      * The idea is to go from start to end over the words.  So long as
7937      * characters are equal just continue, this always gives the lowest score.
7938      * When there is a difference try several alternatives.  Each alternative
7939      * increases "score" for the edit distance.  Some of the alternatives are
7940      * pushed unto a stack and tried later, some are tried right away.  At the
7941      * end of the word the score for one alternative is known.  The lowest
7942      * possible score is stored in "minscore".
7943      */
7944     stackidx = 0;
7945     bi = 0;
7946     gi = 0;
7947     score = 0;
7948     minscore = limit + 1;
7949 
7950     for (;;)
7951     {
7952 	/* Skip over an equal part, score remains the same. */
7953 	for (;;)
7954 	{
7955 	    bc = badword[bi];
7956 	    gc = goodword[gi];
7957 	    if (bc != gc)	/* stop at a char that's different */
7958 		break;
7959 	    if (bc == NUL)	/* both words end */
7960 	    {
7961 		if (score < minscore)
7962 		    minscore = score;
7963 		goto pop;	/* do next alternative */
7964 	    }
7965 	    ++bi;
7966 	    ++gi;
7967 	}
7968 
7969 	if (gc == NUL)    /* goodword ends, delete badword chars */
7970 	{
7971 	    do
7972 	    {
7973 		if ((score += SCORE_DEL) >= minscore)
7974 		    goto pop;	    /* do next alternative */
7975 	    } while (badword[++bi] != NUL);
7976 	    minscore = score;
7977 	}
7978 	else if (bc == NUL) /* badword ends, insert badword chars */
7979 	{
7980 	    do
7981 	    {
7982 		if ((score += SCORE_INS) >= minscore)
7983 		    goto pop;	    /* do next alternative */
7984 	    } while (goodword[++gi] != NUL);
7985 	    minscore = score;
7986 	}
7987 	else			/* both words continue */
7988 	{
7989 	    /* If not close to the limit, perform a change.  Only try changes
7990 	     * that may lead to a lower score than "minscore".
7991 	     * round 0: try deleting a char from badword
7992 	     * round 1: try inserting a char in badword */
7993 	    for (round = 0; round <= 1; ++round)
7994 	    {
7995 		score_off = score + (round == 0 ? SCORE_DEL : SCORE_INS);
7996 		if (score_off < minscore)
7997 		{
7998 		    if (score_off + SCORE_EDIT_MIN >= minscore)
7999 		    {
8000 			/* Near the limit, rest of the words must match.  We
8001 			 * can check that right now, no need to push an item
8002 			 * onto the stack. */
8003 			bi2 = bi + 1 - round;
8004 			gi2 = gi + round;
8005 			while (goodword[gi2] == badword[bi2])
8006 			{
8007 			    if (goodword[gi2] == NUL)
8008 			    {
8009 				minscore = score_off;
8010 				break;
8011 			    }
8012 			    ++bi2;
8013 			    ++gi2;
8014 			}
8015 		    }
8016 		    else
8017 		    {
8018 			/* try deleting/inserting a character later */
8019 			stack[stackidx].badi = bi + 1 - round;
8020 			stack[stackidx].goodi = gi + round;
8021 			stack[stackidx].score = score_off;
8022 			++stackidx;
8023 		    }
8024 		}
8025 	    }
8026 
8027 	    if (score + SCORE_SWAP < minscore)
8028 	    {
8029 		/* If swapping two characters makes a match then the
8030 		 * substitution is more expensive, thus there is no need to
8031 		 * try both. */
8032 		if (gc == badword[bi + 1] && bc == goodword[gi + 1])
8033 		{
8034 		    /* Swap two characters, that is: skip them. */
8035 		    gi += 2;
8036 		    bi += 2;
8037 		    score += SCORE_SWAP;
8038 		    continue;
8039 		}
8040 	    }
8041 
8042 	    /* Substitute one character for another which is the same
8043 	     * thing as deleting a character from both goodword and badword.
8044 	     * Use a better score when there is only a case difference. */
8045 	    if (SPELL_TOFOLD(bc) == SPELL_TOFOLD(gc))
8046 		score += SCORE_ICASE;
8047 	    else
8048 	    {
8049 		/* For a similar character use SCORE_SIMILAR. */
8050 		if (slang != NULL
8051 			&& slang->sl_has_map
8052 			&& similar_chars(slang, gc, bc))
8053 		    score += SCORE_SIMILAR;
8054 		else
8055 		    score += SCORE_SUBST;
8056 	    }
8057 
8058 	    if (score < minscore)
8059 	    {
8060 		/* Do the substitution. */
8061 		++gi;
8062 		++bi;
8063 		continue;
8064 	    }
8065 	}
8066 pop:
8067 	/*
8068 	 * Get here to try the next alternative, pop it from the stack.
8069 	 */
8070 	if (stackidx == 0)		/* stack is empty, finished */
8071 	    break;
8072 
8073 	/* pop an item from the stack */
8074 	--stackidx;
8075 	gi = stack[stackidx].goodi;
8076 	bi = stack[stackidx].badi;
8077 	score = stack[stackidx].score;
8078     }
8079 
8080     /* When the score goes over "limit" it may actually be much higher.
8081      * Return a very large number to avoid going below the limit when giving a
8082      * bonus. */
8083     if (minscore > limit)
8084 	return SCORE_MAXMAX;
8085     return minscore;
8086 }
8087 
8088 /*
8089  * Multi-byte version of spell_edit_score_limit().
8090  * Keep it in sync with the above!
8091  */
8092     static int
8093 spell_edit_score_limit_w(
8094     slang_T	*slang,
8095     char_u	*badword,
8096     char_u	*goodword,
8097     int		limit)
8098 {
8099     limitscore_T    stack[10];		/* allow for over 3 * 2 edits */
8100     int		    stackidx;
8101     int		    bi, gi;
8102     int		    bi2, gi2;
8103     int		    bc, gc;
8104     int		    score;
8105     int		    score_off;
8106     int		    minscore;
8107     int		    round;
8108     char_u	    *p;
8109     int		    wbadword[MAXWLEN];
8110     int		    wgoodword[MAXWLEN];
8111 
8112     /* Get the characters from the multi-byte strings and put them in an
8113      * int array for easy access. */
8114     bi = 0;
8115     for (p = badword; *p != NUL; )
8116 	wbadword[bi++] = mb_cptr2char_adv(&p);
8117     wbadword[bi++] = 0;
8118     gi = 0;
8119     for (p = goodword; *p != NUL; )
8120 	wgoodword[gi++] = mb_cptr2char_adv(&p);
8121     wgoodword[gi++] = 0;
8122 
8123     /*
8124      * The idea is to go from start to end over the words.  So long as
8125      * characters are equal just continue, this always gives the lowest score.
8126      * When there is a difference try several alternatives.  Each alternative
8127      * increases "score" for the edit distance.  Some of the alternatives are
8128      * pushed unto a stack and tried later, some are tried right away.  At the
8129      * end of the word the score for one alternative is known.  The lowest
8130      * possible score is stored in "minscore".
8131      */
8132     stackidx = 0;
8133     bi = 0;
8134     gi = 0;
8135     score = 0;
8136     minscore = limit + 1;
8137 
8138     for (;;)
8139     {
8140 	/* Skip over an equal part, score remains the same. */
8141 	for (;;)
8142 	{
8143 	    bc = wbadword[bi];
8144 	    gc = wgoodword[gi];
8145 
8146 	    if (bc != gc)	/* stop at a char that's different */
8147 		break;
8148 	    if (bc == NUL)	/* both words end */
8149 	    {
8150 		if (score < minscore)
8151 		    minscore = score;
8152 		goto pop;	/* do next alternative */
8153 	    }
8154 	    ++bi;
8155 	    ++gi;
8156 	}
8157 
8158 	if (gc == NUL)    /* goodword ends, delete badword chars */
8159 	{
8160 	    do
8161 	    {
8162 		if ((score += SCORE_DEL) >= minscore)
8163 		    goto pop;	    /* do next alternative */
8164 	    } while (wbadword[++bi] != NUL);
8165 	    minscore = score;
8166 	}
8167 	else if (bc == NUL) /* badword ends, insert badword chars */
8168 	{
8169 	    do
8170 	    {
8171 		if ((score += SCORE_INS) >= minscore)
8172 		    goto pop;	    /* do next alternative */
8173 	    } while (wgoodword[++gi] != NUL);
8174 	    minscore = score;
8175 	}
8176 	else			/* both words continue */
8177 	{
8178 	    /* If not close to the limit, perform a change.  Only try changes
8179 	     * that may lead to a lower score than "minscore".
8180 	     * round 0: try deleting a char from badword
8181 	     * round 1: try inserting a char in badword */
8182 	    for (round = 0; round <= 1; ++round)
8183 	    {
8184 		score_off = score + (round == 0 ? SCORE_DEL : SCORE_INS);
8185 		if (score_off < minscore)
8186 		{
8187 		    if (score_off + SCORE_EDIT_MIN >= minscore)
8188 		    {
8189 			/* Near the limit, rest of the words must match.  We
8190 			 * can check that right now, no need to push an item
8191 			 * onto the stack. */
8192 			bi2 = bi + 1 - round;
8193 			gi2 = gi + round;
8194 			while (wgoodword[gi2] == wbadword[bi2])
8195 			{
8196 			    if (wgoodword[gi2] == NUL)
8197 			    {
8198 				minscore = score_off;
8199 				break;
8200 			    }
8201 			    ++bi2;
8202 			    ++gi2;
8203 			}
8204 		    }
8205 		    else
8206 		    {
8207 			/* try deleting a character from badword later */
8208 			stack[stackidx].badi = bi + 1 - round;
8209 			stack[stackidx].goodi = gi + round;
8210 			stack[stackidx].score = score_off;
8211 			++stackidx;
8212 		    }
8213 		}
8214 	    }
8215 
8216 	    if (score + SCORE_SWAP < minscore)
8217 	    {
8218 		/* If swapping two characters makes a match then the
8219 		 * substitution is more expensive, thus there is no need to
8220 		 * try both. */
8221 		if (gc == wbadword[bi + 1] && bc == wgoodword[gi + 1])
8222 		{
8223 		    /* Swap two characters, that is: skip them. */
8224 		    gi += 2;
8225 		    bi += 2;
8226 		    score += SCORE_SWAP;
8227 		    continue;
8228 		}
8229 	    }
8230 
8231 	    /* Substitute one character for another which is the same
8232 	     * thing as deleting a character from both goodword and badword.
8233 	     * Use a better score when there is only a case difference. */
8234 	    if (SPELL_TOFOLD(bc) == SPELL_TOFOLD(gc))
8235 		score += SCORE_ICASE;
8236 	    else
8237 	    {
8238 		/* For a similar character use SCORE_SIMILAR. */
8239 		if (slang != NULL
8240 			&& slang->sl_has_map
8241 			&& similar_chars(slang, gc, bc))
8242 		    score += SCORE_SIMILAR;
8243 		else
8244 		    score += SCORE_SUBST;
8245 	    }
8246 
8247 	    if (score < minscore)
8248 	    {
8249 		/* Do the substitution. */
8250 		++gi;
8251 		++bi;
8252 		continue;
8253 	    }
8254 	}
8255 pop:
8256 	/*
8257 	 * Get here to try the next alternative, pop it from the stack.
8258 	 */
8259 	if (stackidx == 0)		/* stack is empty, finished */
8260 	    break;
8261 
8262 	/* pop an item from the stack */
8263 	--stackidx;
8264 	gi = stack[stackidx].goodi;
8265 	bi = stack[stackidx].badi;
8266 	score = stack[stackidx].score;
8267     }
8268 
8269     /* When the score goes over "limit" it may actually be much higher.
8270      * Return a very large number to avoid going below the limit when giving a
8271      * bonus. */
8272     if (minscore > limit)
8273 	return SCORE_MAXMAX;
8274     return minscore;
8275 }
8276 
8277 /*
8278  * ":spellinfo"
8279  */
8280     void
8281 ex_spellinfo(exarg_T *eap UNUSED)
8282 {
8283     int		lpi;
8284     langp_T	*lp;
8285     char_u	*p;
8286 
8287     if (no_spell_checking(curwin))
8288 	return;
8289 
8290     msg_start();
8291     for (lpi = 0; lpi < curwin->w_s->b_langp.ga_len && !got_int; ++lpi)
8292     {
8293 	lp = LANGP_ENTRY(curwin->w_s->b_langp, lpi);
8294 	msg_puts("file: ");
8295 	msg_puts((char *)lp->lp_slang->sl_fname);
8296 	msg_putchar('\n');
8297 	p = lp->lp_slang->sl_info;
8298 	if (p != NULL)
8299 	{
8300 	    msg_puts((char *)p);
8301 	    msg_putchar('\n');
8302 	}
8303     }
8304     msg_end();
8305 }
8306 
8307 #define DUMPFLAG_KEEPCASE   1	/* round 2: keep-case tree */
8308 #define DUMPFLAG_COUNT	    2	/* include word count */
8309 #define DUMPFLAG_ICASE	    4	/* ignore case when finding matches */
8310 #define DUMPFLAG_ONECAP	    8	/* pattern starts with capital */
8311 #define DUMPFLAG_ALLCAP	    16	/* pattern is all capitals */
8312 
8313 /*
8314  * ":spelldump"
8315  */
8316     void
8317 ex_spelldump(exarg_T *eap)
8318 {
8319     char_u  *spl;
8320     long    dummy;
8321 
8322     if (no_spell_checking(curwin))
8323 	return;
8324     get_option_value((char_u*)"spl", &dummy, &spl, OPT_LOCAL);
8325 
8326     /* Create a new empty buffer in a new window. */
8327     do_cmdline_cmd((char_u *)"new");
8328 
8329     /* enable spelling locally in the new window */
8330     set_option_value((char_u*)"spell", TRUE, (char_u*)"", OPT_LOCAL);
8331     set_option_value((char_u*)"spl",  dummy, spl, OPT_LOCAL);
8332     vim_free(spl);
8333 
8334     if (!BUFEMPTY())
8335 	return;
8336 
8337     spell_dump_compl(NULL, 0, NULL, eap->forceit ? DUMPFLAG_COUNT : 0);
8338 
8339     /* Delete the empty line that we started with. */
8340     if (curbuf->b_ml.ml_line_count > 1)
8341 	ml_delete(curbuf->b_ml.ml_line_count, FALSE);
8342 
8343     redraw_later(NOT_VALID);
8344 }
8345 
8346 /*
8347  * Go through all possible words and:
8348  * 1. When "pat" is NULL: dump a list of all words in the current buffer.
8349  *	"ic" and "dir" are not used.
8350  * 2. When "pat" is not NULL: add matching words to insert mode completion.
8351  */
8352     void
8353 spell_dump_compl(
8354     char_u	*pat,	    /* leading part of the word */
8355     int		ic,	    /* ignore case */
8356     int		*dir,	    /* direction for adding matches */
8357     int		dumpflags_arg)	/* DUMPFLAG_* */
8358 {
8359     langp_T	*lp;
8360     slang_T	*slang;
8361     idx_T	arridx[MAXWLEN];
8362     int		curi[MAXWLEN];
8363     char_u	word[MAXWLEN];
8364     int		c;
8365     char_u	*byts;
8366     idx_T	*idxs;
8367     linenr_T	lnum = 0;
8368     int		round;
8369     int		depth;
8370     int		n;
8371     int		flags;
8372     char_u	*region_names = NULL;	    /* region names being used */
8373     int		do_region = TRUE;	    /* dump region names and numbers */
8374     char_u	*p;
8375     int		lpi;
8376     int		dumpflags = dumpflags_arg;
8377     int		patlen;
8378 
8379     /* When ignoring case or when the pattern starts with capital pass this on
8380      * to dump_word(). */
8381     if (pat != NULL)
8382     {
8383 	if (ic)
8384 	    dumpflags |= DUMPFLAG_ICASE;
8385 	else
8386 	{
8387 	    n = captype(pat, NULL);
8388 	    if (n == WF_ONECAP)
8389 		dumpflags |= DUMPFLAG_ONECAP;
8390 	    else if (n == WF_ALLCAP && (int)STRLEN(pat) > mb_ptr2len(pat))
8391 		dumpflags |= DUMPFLAG_ALLCAP;
8392 	}
8393     }
8394 
8395     /* Find out if we can support regions: All languages must support the same
8396      * regions or none at all. */
8397     for (lpi = 0; lpi < curwin->w_s->b_langp.ga_len; ++lpi)
8398     {
8399 	lp = LANGP_ENTRY(curwin->w_s->b_langp, lpi);
8400 	p = lp->lp_slang->sl_regions;
8401 	if (p[0] != 0)
8402 	{
8403 	    if (region_names == NULL)	    /* first language with regions */
8404 		region_names = p;
8405 	    else if (STRCMP(region_names, p) != 0)
8406 	    {
8407 		do_region = FALSE;	    /* region names are different */
8408 		break;
8409 	    }
8410 	}
8411     }
8412 
8413     if (do_region && region_names != NULL)
8414     {
8415 	if (pat == NULL)
8416 	{
8417 	    vim_snprintf((char *)IObuff, IOSIZE, "/regions=%s", region_names);
8418 	    ml_append(lnum++, IObuff, (colnr_T)0, FALSE);
8419 	}
8420     }
8421     else
8422 	do_region = FALSE;
8423 
8424     /*
8425      * Loop over all files loaded for the entries in 'spelllang'.
8426      */
8427     for (lpi = 0; lpi < curwin->w_s->b_langp.ga_len; ++lpi)
8428     {
8429 	lp = LANGP_ENTRY(curwin->w_s->b_langp, lpi);
8430 	slang = lp->lp_slang;
8431 	if (slang->sl_fbyts == NULL)	    /* reloading failed */
8432 	    continue;
8433 
8434 	if (pat == NULL)
8435 	{
8436 	    vim_snprintf((char *)IObuff, IOSIZE, "# file: %s", slang->sl_fname);
8437 	    ml_append(lnum++, IObuff, (colnr_T)0, FALSE);
8438 	}
8439 
8440 	/* When matching with a pattern and there are no prefixes only use
8441 	 * parts of the tree that match "pat". */
8442 	if (pat != NULL && slang->sl_pbyts == NULL)
8443 	    patlen = (int)STRLEN(pat);
8444 	else
8445 	    patlen = -1;
8446 
8447 	/* round 1: case-folded tree
8448 	 * round 2: keep-case tree */
8449 	for (round = 1; round <= 2; ++round)
8450 	{
8451 	    if (round == 1)
8452 	    {
8453 		dumpflags &= ~DUMPFLAG_KEEPCASE;
8454 		byts = slang->sl_fbyts;
8455 		idxs = slang->sl_fidxs;
8456 	    }
8457 	    else
8458 	    {
8459 		dumpflags |= DUMPFLAG_KEEPCASE;
8460 		byts = slang->sl_kbyts;
8461 		idxs = slang->sl_kidxs;
8462 	    }
8463 	    if (byts == NULL)
8464 		continue;		/* array is empty */
8465 
8466 	    depth = 0;
8467 	    arridx[0] = 0;
8468 	    curi[0] = 1;
8469 	    while (depth >= 0 && !got_int
8470 				  && (pat == NULL || !ins_compl_interrupted()))
8471 	    {
8472 		if (curi[depth] > byts[arridx[depth]])
8473 		{
8474 		    /* Done all bytes at this node, go up one level. */
8475 		    --depth;
8476 		    line_breakcheck();
8477 		    ins_compl_check_keys(50, FALSE);
8478 		}
8479 		else
8480 		{
8481 		    /* Do one more byte at this node. */
8482 		    n = arridx[depth] + curi[depth];
8483 		    ++curi[depth];
8484 		    c = byts[n];
8485 		    if (c == 0)
8486 		    {
8487 			/* End of word, deal with the word.
8488 			 * Don't use keep-case words in the fold-case tree,
8489 			 * they will appear in the keep-case tree.
8490 			 * Only use the word when the region matches. */
8491 			flags = (int)idxs[n];
8492 			if ((round == 2 || (flags & WF_KEEPCAP) == 0)
8493 				&& (flags & WF_NEEDCOMP) == 0
8494 				&& (do_region
8495 				    || (flags & WF_REGION) == 0
8496 				    || (((unsigned)flags >> 16)
8497 						       & lp->lp_region) != 0))
8498 			{
8499 			    word[depth] = NUL;
8500 			    if (!do_region)
8501 				flags &= ~WF_REGION;
8502 
8503 			    /* Dump the basic word if there is no prefix or
8504 			     * when it's the first one. */
8505 			    c = (unsigned)flags >> 24;
8506 			    if (c == 0 || curi[depth] == 2)
8507 			    {
8508 				dump_word(slang, word, pat, dir,
8509 						      dumpflags, flags, lnum);
8510 				if (pat == NULL)
8511 				    ++lnum;
8512 			    }
8513 
8514 			    /* Apply the prefix, if there is one. */
8515 			    if (c != 0)
8516 				lnum = dump_prefixes(slang, word, pat, dir,
8517 						      dumpflags, flags, lnum);
8518 			}
8519 		    }
8520 		    else
8521 		    {
8522 			/* Normal char, go one level deeper. */
8523 			word[depth++] = c;
8524 			arridx[depth] = idxs[n];
8525 			curi[depth] = 1;
8526 
8527 			/* Check if this characters matches with the pattern.
8528 			 * If not skip the whole tree below it.
8529 			 * Always ignore case here, dump_word() will check
8530 			 * proper case later.  This isn't exactly right when
8531 			 * length changes for multi-byte characters with
8532 			 * ignore case... */
8533 			if (depth <= patlen
8534 					&& MB_STRNICMP(word, pat, depth) != 0)
8535 			    --depth;
8536 		    }
8537 		}
8538 	    }
8539 	}
8540     }
8541 }
8542 
8543 /*
8544  * Dump one word: apply case modifications and append a line to the buffer.
8545  * When "lnum" is zero add insert mode completion.
8546  */
8547     static void
8548 dump_word(
8549     slang_T	*slang,
8550     char_u	*word,
8551     char_u	*pat,
8552     int		*dir,
8553     int		dumpflags,
8554     int		wordflags,
8555     linenr_T	lnum)
8556 {
8557     int		keepcap = FALSE;
8558     char_u	*p;
8559     char_u	*tw;
8560     char_u	cword[MAXWLEN];
8561     char_u	badword[MAXWLEN + 10];
8562     int		i;
8563     int		flags = wordflags;
8564 
8565     if (dumpflags & DUMPFLAG_ONECAP)
8566 	flags |= WF_ONECAP;
8567     if (dumpflags & DUMPFLAG_ALLCAP)
8568 	flags |= WF_ALLCAP;
8569 
8570     if ((dumpflags & DUMPFLAG_KEEPCASE) == 0 && (flags & WF_CAPMASK) != 0)
8571     {
8572 	/* Need to fix case according to "flags". */
8573 	make_case_word(word, cword, flags);
8574 	p = cword;
8575     }
8576     else
8577     {
8578 	p = word;
8579 	if ((dumpflags & DUMPFLAG_KEEPCASE)
8580 		&& ((captype(word, NULL) & WF_KEEPCAP) == 0
8581 						 || (flags & WF_FIXCAP) != 0))
8582 	    keepcap = TRUE;
8583     }
8584     tw = p;
8585 
8586     if (pat == NULL)
8587     {
8588 	/* Add flags and regions after a slash. */
8589 	if ((flags & (WF_BANNED | WF_RARE | WF_REGION)) || keepcap)
8590 	{
8591 	    STRCPY(badword, p);
8592 	    STRCAT(badword, "/");
8593 	    if (keepcap)
8594 		STRCAT(badword, "=");
8595 	    if (flags & WF_BANNED)
8596 		STRCAT(badword, "!");
8597 	    else if (flags & WF_RARE)
8598 		STRCAT(badword, "?");
8599 	    if (flags & WF_REGION)
8600 		for (i = 0; i < 7; ++i)
8601 		    if (flags & (0x10000 << i))
8602 			sprintf((char *)badword + STRLEN(badword), "%d", i + 1);
8603 	    p = badword;
8604 	}
8605 
8606 	if (dumpflags & DUMPFLAG_COUNT)
8607 	{
8608 	    hashitem_T  *hi;
8609 
8610 	    /* Include the word count for ":spelldump!". */
8611 	    hi = hash_find(&slang->sl_wordcount, tw);
8612 	    if (!HASHITEM_EMPTY(hi))
8613 	    {
8614 		vim_snprintf((char *)IObuff, IOSIZE, "%s\t%d",
8615 						     tw, HI2WC(hi)->wc_count);
8616 		p = IObuff;
8617 	    }
8618 	}
8619 
8620 	ml_append(lnum, p, (colnr_T)0, FALSE);
8621     }
8622     else if (((dumpflags & DUMPFLAG_ICASE)
8623 		    ? MB_STRNICMP(p, pat, STRLEN(pat)) == 0
8624 		    : STRNCMP(p, pat, STRLEN(pat)) == 0)
8625 		&& ins_compl_add_infercase(p, (int)STRLEN(p),
8626 					  p_ic, NULL, *dir, FALSE) == OK)
8627 	/* if dir was BACKWARD then honor it just once */
8628 	*dir = FORWARD;
8629 }
8630 
8631 /*
8632  * For ":spelldump": Find matching prefixes for "word".  Prepend each to
8633  * "word" and append a line to the buffer.
8634  * When "lnum" is zero add insert mode completion.
8635  * Return the updated line number.
8636  */
8637     static linenr_T
8638 dump_prefixes(
8639     slang_T	*slang,
8640     char_u	*word,	    /* case-folded word */
8641     char_u	*pat,
8642     int		*dir,
8643     int		dumpflags,
8644     int		flags,	    /* flags with prefix ID */
8645     linenr_T	startlnum)
8646 {
8647     idx_T	arridx[MAXWLEN];
8648     int		curi[MAXWLEN];
8649     char_u	prefix[MAXWLEN];
8650     char_u	word_up[MAXWLEN];
8651     int		has_word_up = FALSE;
8652     int		c;
8653     char_u	*byts;
8654     idx_T	*idxs;
8655     linenr_T	lnum = startlnum;
8656     int		depth;
8657     int		n;
8658     int		len;
8659     int		i;
8660 
8661     /* If the word starts with a lower-case letter make the word with an
8662      * upper-case letter in word_up[]. */
8663     c = PTR2CHAR(word);
8664     if (SPELL_TOUPPER(c) != c)
8665     {
8666 	onecap_copy(word, word_up, TRUE);
8667 	has_word_up = TRUE;
8668     }
8669 
8670     byts = slang->sl_pbyts;
8671     idxs = slang->sl_pidxs;
8672     if (byts != NULL)		/* array not is empty */
8673     {
8674 	/*
8675 	 * Loop over all prefixes, building them byte-by-byte in prefix[].
8676 	 * When at the end of a prefix check that it supports "flags".
8677 	 */
8678 	depth = 0;
8679 	arridx[0] = 0;
8680 	curi[0] = 1;
8681 	while (depth >= 0 && !got_int)
8682 	{
8683 	    n = arridx[depth];
8684 	    len = byts[n];
8685 	    if (curi[depth] > len)
8686 	    {
8687 		/* Done all bytes at this node, go up one level. */
8688 		--depth;
8689 		line_breakcheck();
8690 	    }
8691 	    else
8692 	    {
8693 		/* Do one more byte at this node. */
8694 		n += curi[depth];
8695 		++curi[depth];
8696 		c = byts[n];
8697 		if (c == 0)
8698 		{
8699 		    /* End of prefix, find out how many IDs there are. */
8700 		    for (i = 1; i < len; ++i)
8701 			if (byts[n + i] != 0)
8702 			    break;
8703 		    curi[depth] += i - 1;
8704 
8705 		    c = valid_word_prefix(i, n, flags, word, slang, FALSE);
8706 		    if (c != 0)
8707 		    {
8708 			vim_strncpy(prefix + depth, word, MAXWLEN - depth - 1);
8709 			dump_word(slang, prefix, pat, dir, dumpflags,
8710 				(c & WF_RAREPFX) ? (flags | WF_RARE)
8711 							       : flags, lnum);
8712 			if (lnum != 0)
8713 			    ++lnum;
8714 		    }
8715 
8716 		    /* Check for prefix that matches the word when the
8717 		     * first letter is upper-case, but only if the prefix has
8718 		     * a condition. */
8719 		    if (has_word_up)
8720 		    {
8721 			c = valid_word_prefix(i, n, flags, word_up, slang,
8722 									TRUE);
8723 			if (c != 0)
8724 			{
8725 			    vim_strncpy(prefix + depth, word_up,
8726 							 MAXWLEN - depth - 1);
8727 			    dump_word(slang, prefix, pat, dir, dumpflags,
8728 				    (c & WF_RAREPFX) ? (flags | WF_RARE)
8729 							       : flags, lnum);
8730 			    if (lnum != 0)
8731 				++lnum;
8732 			}
8733 		    }
8734 		}
8735 		else
8736 		{
8737 		    /* Normal char, go one level deeper. */
8738 		    prefix[depth++] = c;
8739 		    arridx[depth] = idxs[n];
8740 		    curi[depth] = 1;
8741 		}
8742 	    }
8743 	}
8744     }
8745 
8746     return lnum;
8747 }
8748 
8749 /*
8750  * Move "p" to the end of word "start".
8751  * Uses the spell-checking word characters.
8752  */
8753     char_u *
8754 spell_to_word_end(char_u *start, win_T *win)
8755 {
8756     char_u  *p = start;
8757 
8758     while (*p != NUL && spell_iswordp(p, win))
8759 	MB_PTR_ADV(p);
8760     return p;
8761 }
8762 
8763 #if defined(FEAT_INS_EXPAND) || defined(PROTO)
8764 /*
8765  * For Insert mode completion CTRL-X s:
8766  * Find start of the word in front of column "startcol".
8767  * We don't check if it is badly spelled, with completion we can only change
8768  * the word in front of the cursor.
8769  * Returns the column number of the word.
8770  */
8771     int
8772 spell_word_start(int startcol)
8773 {
8774     char_u	*line;
8775     char_u	*p;
8776     int		col = 0;
8777 
8778     if (no_spell_checking(curwin))
8779 	return startcol;
8780 
8781     /* Find a word character before "startcol". */
8782     line = ml_get_curline();
8783     for (p = line + startcol; p > line; )
8784     {
8785 	MB_PTR_BACK(line, p);
8786 	if (spell_iswordp_nmw(p, curwin))
8787 	    break;
8788     }
8789 
8790     /* Go back to start of the word. */
8791     while (p > line)
8792     {
8793 	col = (int)(p - line);
8794 	MB_PTR_BACK(line, p);
8795 	if (!spell_iswordp(p, curwin))
8796 	    break;
8797 	col = 0;
8798     }
8799 
8800     return col;
8801 }
8802 
8803 /*
8804  * Need to check for 'spellcapcheck' now, the word is removed before
8805  * expand_spelling() is called.  Therefore the ugly global variable.
8806  */
8807 static int spell_expand_need_cap;
8808 
8809     void
8810 spell_expand_check_cap(colnr_T col)
8811 {
8812     spell_expand_need_cap = check_need_cap(curwin->w_cursor.lnum, col);
8813 }
8814 
8815 /*
8816  * Get list of spelling suggestions.
8817  * Used for Insert mode completion CTRL-X ?.
8818  * Returns the number of matches.  The matches are in "matchp[]", array of
8819  * allocated strings.
8820  */
8821     int
8822 expand_spelling(
8823     linenr_T	lnum UNUSED,
8824     char_u	*pat,
8825     char_u	***matchp)
8826 {
8827     garray_T	ga;
8828 
8829     spell_suggest_list(&ga, pat, 100, spell_expand_need_cap, TRUE);
8830     *matchp = ga.ga_data;
8831     return ga.ga_len;
8832 }
8833 #endif
8834 
8835 #endif  /* FEAT_SPELL */
8836