xref: /vim-8.2.3635/src/spell.c (revision fcfe1a9b)
1 /* vi:set ts=8 sts=4 sw=4 noet:
2  *
3  * VIM - Vi IMproved	by Bram Moolenaar
4  *
5  * Do ":help uganda"  in Vim to read copying and usage conditions.
6  * Do ":help credits" in Vim to see a list of people who contributed.
7  * See README.txt for an overview of the Vim source code.
8  */
9 
10 /*
11  * spell.c: code for spell checking
12  *
13  * See spellfile.c for the Vim spell file format.
14  *
15  * The spell checking mechanism uses a tree (aka trie).  Each node in the tree
16  * has a list of bytes that can appear (siblings).  For each byte there is a
17  * pointer to the node with the byte that follows in the word (child).
18  *
19  * A NUL byte is used where the word may end.  The bytes are sorted, so that
20  * binary searching can be used and the NUL bytes are at the start.  The
21  * number of possible bytes is stored before the list of bytes.
22  *
23  * The tree uses two arrays: "byts" stores the characters, "idxs" stores
24  * either the next index or flags.  The tree starts at index 0.  For example,
25  * to lookup "vi" this sequence is followed:
26  *	i = 0
27  *	len = byts[i]
28  *	n = where "v" appears in byts[i + 1] to byts[i + len]
29  *	i = idxs[n]
30  *	len = byts[i]
31  *	n = where "i" appears in byts[i + 1] to byts[i + len]
32  *	i = idxs[n]
33  *	len = byts[i]
34  *	find that byts[i + 1] is 0, idxs[i + 1] has flags for "vi".
35  *
36  * There are two word trees: one with case-folded words and one with words in
37  * original case.  The second one is only used for keep-case words and is
38  * usually small.
39  *
40  * There is one additional tree for when not all prefixes are applied when
41  * generating the .spl file.  This tree stores all the possible prefixes, as
42  * if they were words.  At each word (prefix) end the prefix nr is stored, the
43  * following word must support this prefix nr.  And the condition nr is
44  * stored, used to lookup the condition that the word must match with.
45  *
46  * Thanks to Olaf Seibert for providing an example implementation of this tree
47  * and the compression mechanism.
48  * LZ trie ideas:
49  *	http://www.irb.hr/hr/home/ristov/papers/RistovLZtrieRevision1.pdf
50  * More papers: http://www-igm.univ-mlv.fr/~laporte/publi_en.html
51  *
52  * Matching involves checking the caps type: Onecap ALLCAP KeepCap.
53  *
54  * Why doesn't Vim use aspell/ispell/myspell/etc.?
55  * See ":help develop-spell".
56  */
57 
58 /*
59  * Use this to adjust the score after finding suggestions, based on the
60  * suggested word sounding like the bad word.  This is much faster than doing
61  * it for every possible suggestion.
62  * Disadvantage: When "the" is typed as "hte" it sounds quite different ("@"
63  * vs "ht") and goes down in the list.
64  * Used when 'spellsuggest' is set to "best".
65  */
66 #define RESCORE(word_score, sound_score) ((3 * word_score + sound_score) / 4)
67 
68 /*
69  * Do the opposite: based on a maximum end score and a known sound score,
70  * compute the maximum word score that can be used.
71  */
72 #define MAXSCORE(word_score, sound_score) ((4 * word_score - sound_score) / 3)
73 
74 #define IN_SPELL_C
75 #include "vim.h"
76 
77 #if defined(FEAT_SPELL) || defined(PROTO)
78 
79 #ifndef UNIX		/* it's in os_unix.h for Unix */
80 # include <time.h>	/* for time_t */
81 #endif
82 
83 /* only used for su_badflags */
84 #define WF_MIXCAP   0x20	/* mix of upper and lower case: macaRONI */
85 
86 #define WF_CAPMASK (WF_ONECAP | WF_ALLCAP | WF_KEEPCAP | WF_FIXCAP)
87 
88 #define REGION_ALL 0xff		/* word valid in all regions */
89 
90 #define VIMSUGMAGIC "VIMsug"	/* string at start of Vim .sug file */
91 #define VIMSUGMAGICL 6
92 #define VIMSUGVERSION 1
93 
94 /* Result values.  Lower number is accepted over higher one. */
95 #define SP_BANNED	-1
96 #define SP_OK		0
97 #define SP_RARE		1
98 #define SP_LOCAL	2
99 #define SP_BAD		3
100 
101 typedef struct wordcount_S
102 {
103     short_u	wc_count;	    /* nr of times word was seen */
104     char_u	wc_word[1];	    /* word, actually longer */
105 } wordcount_T;
106 
107 #define WC_KEY_OFF  offsetof(wordcount_T, wc_word)
108 #define HI2WC(hi)     ((wordcount_T *)((hi)->hi_key - WC_KEY_OFF))
109 #define MAXWORDCOUNT 0xffff
110 
111 /*
112  * Information used when looking for suggestions.
113  */
114 typedef struct suginfo_S
115 {
116     garray_T	su_ga;		    /* suggestions, contains "suggest_T" */
117     int		su_maxcount;	    /* max. number of suggestions displayed */
118     int		su_maxscore;	    /* maximum score for adding to su_ga */
119     int		su_sfmaxscore;	    /* idem, for when doing soundfold words */
120     garray_T	su_sga;		    /* like su_ga, sound-folded scoring */
121     char_u	*su_badptr;	    /* start of bad word in line */
122     int		su_badlen;	    /* length of detected bad word in line */
123     int		su_badflags;	    /* caps flags for bad word */
124     char_u	su_badword[MAXWLEN]; /* bad word truncated at su_badlen */
125     char_u	su_fbadword[MAXWLEN]; /* su_badword case-folded */
126     char_u	su_sal_badword[MAXWLEN]; /* su_badword soundfolded */
127     hashtab_T	su_banned;	    /* table with banned words */
128     slang_T	*su_sallang;	    /* default language for sound folding */
129 } suginfo_T;
130 
131 /* One word suggestion.  Used in "si_ga". */
132 typedef struct suggest_S
133 {
134     char_u	*st_word;	/* suggested word, allocated string */
135     int		st_wordlen;	/* STRLEN(st_word) */
136     int		st_orglen;	/* length of replaced text */
137     int		st_score;	/* lower is better */
138     int		st_altscore;	/* used when st_score compares equal */
139     int		st_salscore;	/* st_score is for soundalike */
140     int		st_had_bonus;	/* bonus already included in score */
141     slang_T	*st_slang;	/* language used for sound folding */
142 } suggest_T;
143 
144 #define SUG(ga, i) (((suggest_T *)(ga).ga_data)[i])
145 
146 /* TRUE if a word appears in the list of banned words.  */
147 #define WAS_BANNED(su, word) (!HASHITEM_EMPTY(hash_find(&su->su_banned, word)))
148 
149 /* Number of suggestions kept when cleaning up.  We need to keep more than
150  * what is displayed, because when rescore_suggestions() is called the score
151  * may change and wrong suggestions may be removed later. */
152 #define SUG_CLEAN_COUNT(su)    ((su)->su_maxcount < 130 ? 150 : (su)->su_maxcount + 20)
153 
154 /* Threshold for sorting and cleaning up suggestions.  Don't want to keep lots
155  * of suggestions that are not going to be displayed. */
156 #define SUG_MAX_COUNT(su)	(SUG_CLEAN_COUNT(su) + 50)
157 
158 /* score for various changes */
159 #define SCORE_SPLIT	149	/* split bad word */
160 #define SCORE_SPLIT_NO	249	/* split bad word with NOSPLITSUGS */
161 #define SCORE_ICASE	52	/* slightly different case */
162 #define SCORE_REGION	200	/* word is for different region */
163 #define SCORE_RARE	180	/* rare word */
164 #define SCORE_SWAP	75	/* swap two characters */
165 #define SCORE_SWAP3	110	/* swap two characters in three */
166 #define SCORE_REP	65	/* REP replacement */
167 #define SCORE_SUBST	93	/* substitute a character */
168 #define SCORE_SIMILAR	33	/* substitute a similar character */
169 #define SCORE_SUBCOMP	33	/* substitute a composing character */
170 #define SCORE_DEL	94	/* delete a character */
171 #define SCORE_DELDUP	66	/* delete a duplicated character */
172 #define SCORE_DELCOMP	28	/* delete a composing character */
173 #define SCORE_INS	96	/* insert a character */
174 #define SCORE_INSDUP	67	/* insert a duplicate character */
175 #define SCORE_INSCOMP	30	/* insert a composing character */
176 #define SCORE_NONWORD	103	/* change non-word to word char */
177 
178 #define SCORE_FILE	30	/* suggestion from a file */
179 #define SCORE_MAXINIT	350	/* Initial maximum score: higher == slower.
180 				 * 350 allows for about three changes. */
181 
182 #define SCORE_COMMON1	30	/* subtracted for words seen before */
183 #define SCORE_COMMON2	40	/* subtracted for words often seen */
184 #define SCORE_COMMON3	50	/* subtracted for words very often seen */
185 #define SCORE_THRES2	10	/* word count threshold for COMMON2 */
186 #define SCORE_THRES3	100	/* word count threshold for COMMON3 */
187 
188 /* When trying changed soundfold words it becomes slow when trying more than
189  * two changes.  With less then two changes it's slightly faster but we miss a
190  * few good suggestions.  In rare cases we need to try three of four changes.
191  */
192 #define SCORE_SFMAX1	200	/* maximum score for first try */
193 #define SCORE_SFMAX2	300	/* maximum score for second try */
194 #define SCORE_SFMAX3	400	/* maximum score for third try */
195 
196 #define SCORE_BIG	SCORE_INS * 3	/* big difference */
197 #define SCORE_MAXMAX	999999		/* accept any score */
198 #define SCORE_LIMITMAX	350		/* for spell_edit_score_limit() */
199 
200 /* for spell_edit_score_limit() we need to know the minimum value of
201  * SCORE_ICASE, SCORE_SWAP, SCORE_DEL, SCORE_SIMILAR and SCORE_INS */
202 #define SCORE_EDIT_MIN	SCORE_SIMILAR
203 
204 /*
205  * Structure to store info for word matching.
206  */
207 typedef struct matchinf_S
208 {
209     langp_T	*mi_lp;			/* info for language and region */
210 
211     /* pointers to original text to be checked */
212     char_u	*mi_word;		/* start of word being checked */
213     char_u	*mi_end;		/* end of matching word so far */
214     char_u	*mi_fend;		/* next char to be added to mi_fword */
215     char_u	*mi_cend;		/* char after what was used for
216 					   mi_capflags */
217 
218     /* case-folded text */
219     char_u	mi_fword[MAXWLEN + 1];	/* mi_word case-folded */
220     int		mi_fwordlen;		/* nr of valid bytes in mi_fword */
221 
222     /* for when checking word after a prefix */
223     int		mi_prefarridx;		/* index in sl_pidxs with list of
224 					   affixID/condition */
225     int		mi_prefcnt;		/* number of entries at mi_prefarridx */
226     int		mi_prefixlen;		/* byte length of prefix */
227     int		mi_cprefixlen;		/* byte length of prefix in original
228 					   case */
229 
230     /* for when checking a compound word */
231     int		mi_compoff;		/* start of following word offset */
232     char_u	mi_compflags[MAXWLEN];	/* flags for compound words used */
233     int		mi_complen;		/* nr of compound words used */
234     int		mi_compextra;		/* nr of COMPOUNDROOT words */
235 
236     /* others */
237     int		mi_result;		/* result so far: SP_BAD, SP_OK, etc. */
238     int		mi_capflags;		/* WF_ONECAP WF_ALLCAP WF_KEEPCAP */
239     win_T	*mi_win;		/* buffer being checked */
240 
241     /* for NOBREAK */
242     int		mi_result2;		/* "mi_resul" without following word */
243     char_u	*mi_end2;		/* "mi_end" without following word */
244 } matchinf_T;
245 
246 
247 static int spell_iswordp(char_u *p, win_T *wp);
248 static int spell_mb_isword_class(int cl, win_T *wp);
249 
250 /*
251  * For finding suggestions: At each node in the tree these states are tried:
252  */
253 typedef enum
254 {
255     STATE_START = 0,	/* At start of node check for NUL bytes (goodword
256 			 * ends); if badword ends there is a match, otherwise
257 			 * try splitting word. */
258     STATE_NOPREFIX,	/* try without prefix */
259     STATE_SPLITUNDO,	/* Undo splitting. */
260     STATE_ENDNUL,	/* Past NUL bytes at start of the node. */
261     STATE_PLAIN,	/* Use each byte of the node. */
262     STATE_DEL,		/* Delete a byte from the bad word. */
263     STATE_INS_PREP,	/* Prepare for inserting bytes. */
264     STATE_INS,		/* Insert a byte in the bad word. */
265     STATE_SWAP,		/* Swap two bytes. */
266     STATE_UNSWAP,	/* Undo swap two characters. */
267     STATE_SWAP3,	/* Swap two characters over three. */
268     STATE_UNSWAP3,	/* Undo Swap two characters over three. */
269     STATE_UNROT3L,	/* Undo rotate three characters left */
270     STATE_UNROT3R,	/* Undo rotate three characters right */
271     STATE_REP_INI,	/* Prepare for using REP items. */
272     STATE_REP,		/* Use matching REP items from the .aff file. */
273     STATE_REP_UNDO,	/* Undo a REP item replacement. */
274     STATE_FINAL		/* End of this node. */
275 } state_T;
276 
277 /*
278  * Struct to keep the state at each level in suggest_try_change().
279  */
280 typedef struct trystate_S
281 {
282     state_T	ts_state;	/* state at this level, STATE_ */
283     int		ts_score;	/* score */
284     idx_T	ts_arridx;	/* index in tree array, start of node */
285     short	ts_curi;	/* index in list of child nodes */
286     char_u	ts_fidx;	/* index in fword[], case-folded bad word */
287     char_u	ts_fidxtry;	/* ts_fidx at which bytes may be changed */
288     char_u	ts_twordlen;	/* valid length of tword[] */
289     char_u	ts_prefixdepth;	/* stack depth for end of prefix or
290 				 * PFD_PREFIXTREE or PFD_NOPREFIX */
291     char_u	ts_flags;	/* TSF_ flags */
292     char_u	ts_tcharlen;	/* number of bytes in tword character */
293     char_u	ts_tcharidx;	/* current byte index in tword character */
294     char_u	ts_isdiff;	/* DIFF_ values */
295     char_u	ts_fcharstart;	/* index in fword where badword char started */
296     char_u	ts_prewordlen;	/* length of word in "preword[]" */
297     char_u	ts_splitoff;	/* index in "tword" after last split */
298     char_u	ts_splitfidx;	/* "ts_fidx" at word split */
299     char_u	ts_complen;	/* nr of compound words used */
300     char_u	ts_compsplit;	/* index for "compflags" where word was spit */
301     char_u	ts_save_badflags;   /* su_badflags saved here */
302     char_u	ts_delidx;	/* index in fword for char that was deleted,
303 				   valid when "ts_flags" has TSF_DIDDEL */
304 } trystate_T;
305 
306 /* values for ts_isdiff */
307 #define DIFF_NONE	0	/* no different byte (yet) */
308 #define DIFF_YES	1	/* different byte found */
309 #define DIFF_INSERT	2	/* inserting character */
310 
311 /* values for ts_flags */
312 #define TSF_PREFIXOK	1	/* already checked that prefix is OK */
313 #define TSF_DIDSPLIT	2	/* tried split at this point */
314 #define TSF_DIDDEL	4	/* did a delete, "ts_delidx" has index */
315 
316 /* special values ts_prefixdepth */
317 #define PFD_NOPREFIX	0xff	/* not using prefixes */
318 #define PFD_PREFIXTREE	0xfe	/* walking through the prefix tree */
319 #define PFD_NOTSPECIAL	0xfd	/* highest value that's not special */
320 
321 /* mode values for find_word */
322 #define FIND_FOLDWORD	    0	/* find word case-folded */
323 #define FIND_KEEPWORD	    1	/* find keep-case word */
324 #define FIND_PREFIX	    2	/* find word after prefix */
325 #define FIND_COMPOUND	    3	/* find case-folded compound word */
326 #define FIND_KEEPCOMPOUND   4	/* find keep-case compound word */
327 
328 static void find_word(matchinf_T *mip, int mode);
329 static int match_checkcompoundpattern(char_u *ptr, int wlen, garray_T *gap);
330 static int can_compound(slang_T *slang, char_u *word, char_u *flags);
331 static int match_compoundrule(slang_T *slang, char_u *compflags);
332 static int valid_word_prefix(int totprefcnt, int arridx, int flags, char_u *word, slang_T *slang, int cond_req);
333 static void find_prefix(matchinf_T *mip, int mode);
334 static int fold_more(matchinf_T *mip);
335 static int spell_valid_case(int wordflags, int treeflags);
336 static void spell_load_cb(char_u *fname, void *cookie);
337 static int count_syllables(slang_T *slang, char_u *word);
338 static void clear_midword(win_T *buf);
339 static void use_midword(slang_T *lp, win_T *buf);
340 static int find_region(char_u *rp, char_u *region);
341 static int check_need_cap(linenr_T lnum, colnr_T col);
342 static void spell_find_suggest(char_u *badptr, int badlen, suginfo_T *su, int maxcount, int banbadword, int need_cap, int interactive);
343 #ifdef FEAT_EVAL
344 static void spell_suggest_expr(suginfo_T *su, char_u *expr);
345 #endif
346 static void spell_suggest_file(suginfo_T *su, char_u *fname);
347 static void spell_suggest_intern(suginfo_T *su, int interactive);
348 static void spell_find_cleanup(suginfo_T *su);
349 static void suggest_try_special(suginfo_T *su);
350 static void suggest_try_change(suginfo_T *su);
351 static void suggest_trie_walk(suginfo_T *su, langp_T *lp, char_u *fword, int soundfold);
352 static void go_deeper(trystate_T *stack, int depth, int score_add);
353 static int nofold_len(char_u *fword, int flen, char_u *word);
354 static void find_keepcap_word(slang_T *slang, char_u *fword, char_u *kword);
355 static void score_comp_sal(suginfo_T *su);
356 static void score_combine(suginfo_T *su);
357 static int stp_sal_score(suggest_T *stp, suginfo_T *su, slang_T *slang, char_u *badsound);
358 static void suggest_try_soundalike_prep(void);
359 static void suggest_try_soundalike(suginfo_T *su);
360 static void suggest_try_soundalike_finish(void);
361 static void add_sound_suggest(suginfo_T *su, char_u *goodword, int score, langp_T *lp);
362 static int soundfold_find(slang_T *slang, char_u *word);
363 static void make_case_word(char_u *fword, char_u *cword, int flags);
364 static int similar_chars(slang_T *slang, int c1, int c2);
365 static void add_suggestion(suginfo_T *su, garray_T *gap, char_u *goodword, int badlen, int score, int altscore, int had_bonus, slang_T *slang, int maxsf);
366 static void check_suggestions(suginfo_T *su, garray_T *gap);
367 static void add_banned(suginfo_T *su, char_u *word);
368 static void rescore_suggestions(suginfo_T *su);
369 static void rescore_one(suginfo_T *su, suggest_T *stp);
370 static int cleanup_suggestions(garray_T *gap, int maxscore, int keep);
371 static void spell_soundfold_sofo(slang_T *slang, char_u *inword, char_u *res);
372 static void spell_soundfold_sal(slang_T *slang, char_u *inword, char_u *res);
373 static void spell_soundfold_wsal(slang_T *slang, char_u *inword, char_u *res);
374 static int soundalike_score(char_u *goodsound, char_u *badsound);
375 static int spell_edit_score(slang_T *slang, char_u *badword, char_u *goodword);
376 static int spell_edit_score_limit(slang_T *slang, char_u *badword, char_u *goodword, int limit);
377 static int spell_edit_score_limit_w(slang_T *slang, char_u *badword, char_u *goodword, int limit);
378 static void dump_word(slang_T *slang, char_u *word, char_u *pat, int *dir, int round, int flags, linenr_T lnum);
379 static linenr_T dump_prefixes(slang_T *slang, char_u *word, char_u *pat, int *dir, int round, int flags, linenr_T startlnum);
380 
381 
382 /* Remember what "z?" replaced. */
383 static char_u	*repl_from = NULL;
384 static char_u	*repl_to = NULL;
385 
386 /*
387  * Main spell-checking function.
388  * "ptr" points to a character that could be the start of a word.
389  * "*attrp" is set to the highlight index for a badly spelled word.  For a
390  * non-word or when it's OK it remains unchanged.
391  * This must only be called when 'spelllang' is not empty.
392  *
393  * "capcol" is used to check for a Capitalised word after the end of a
394  * sentence.  If it's zero then perform the check.  Return the column where to
395  * check next, or -1 when no sentence end was found.  If it's NULL then don't
396  * worry.
397  *
398  * Returns the length of the word in bytes, also when it's OK, so that the
399  * caller can skip over the word.
400  */
401     int
402 spell_check(
403     win_T	*wp,		/* current window */
404     char_u	*ptr,
405     hlf_T	*attrp,
406     int		*capcol,	/* column to check for Capital */
407     int		docount)	/* count good words */
408 {
409     matchinf_T	mi;		/* Most things are put in "mi" so that it can
410 				   be passed to functions quickly. */
411     int		nrlen = 0;	/* found a number first */
412     int		c;
413     int		wrongcaplen = 0;
414     int		lpi;
415     int		count_word = docount;
416 
417     /* A word never starts at a space or a control character.  Return quickly
418      * then, skipping over the character. */
419     if (*ptr <= ' ')
420 	return 1;
421 
422     /* Return here when loading language files failed. */
423     if (wp->w_s->b_langp.ga_len == 0)
424 	return 1;
425 
426     vim_memset(&mi, 0, sizeof(matchinf_T));
427 
428     /* A number is always OK.  Also skip hexadecimal numbers 0xFF99 and
429      * 0X99FF.  But always do check spelling to find "3GPP" and "11
430      * julifeest". */
431     if (*ptr >= '0' && *ptr <= '9')
432     {
433 	if (*ptr == '0' && (ptr[1] == 'b' || ptr[1] == 'B'))
434 	    mi.mi_end = skipbin(ptr + 2);
435 	else if (*ptr == '0' && (ptr[1] == 'x' || ptr[1] == 'X'))
436 	    mi.mi_end = skiphex(ptr + 2);
437 	else
438 	    mi.mi_end = skipdigits(ptr);
439 	nrlen = (int)(mi.mi_end - ptr);
440     }
441 
442     /* Find the normal end of the word (until the next non-word character). */
443     mi.mi_word = ptr;
444     mi.mi_fend = ptr;
445     if (spell_iswordp(mi.mi_fend, wp))
446     {
447 	do
448 	    MB_PTR_ADV(mi.mi_fend);
449 	while (*mi.mi_fend != NUL && spell_iswordp(mi.mi_fend, wp));
450 
451 	if (capcol != NULL && *capcol == 0 && wp->w_s->b_cap_prog != NULL)
452 	{
453 	    /* Check word starting with capital letter. */
454 	    c = PTR2CHAR(ptr);
455 	    if (!SPELL_ISUPPER(c))
456 		wrongcaplen = (int)(mi.mi_fend - ptr);
457 	}
458     }
459     if (capcol != NULL)
460 	*capcol = -1;
461 
462     /* We always use the characters up to the next non-word character,
463      * also for bad words. */
464     mi.mi_end = mi.mi_fend;
465 
466     /* Check caps type later. */
467     mi.mi_capflags = 0;
468     mi.mi_cend = NULL;
469     mi.mi_win = wp;
470 
471     /* case-fold the word with one non-word character, so that we can check
472      * for the word end. */
473     if (*mi.mi_fend != NUL)
474 	MB_PTR_ADV(mi.mi_fend);
475 
476     (void)spell_casefold(ptr, (int)(mi.mi_fend - ptr), mi.mi_fword,
477 							     MAXWLEN + 1);
478     mi.mi_fwordlen = (int)STRLEN(mi.mi_fword);
479 
480     /* The word is bad unless we recognize it. */
481     mi.mi_result = SP_BAD;
482     mi.mi_result2 = SP_BAD;
483 
484     /*
485      * Loop over the languages specified in 'spelllang'.
486      * We check them all, because a word may be matched longer in another
487      * language.
488      */
489     for (lpi = 0; lpi < wp->w_s->b_langp.ga_len; ++lpi)
490     {
491 	mi.mi_lp = LANGP_ENTRY(wp->w_s->b_langp, lpi);
492 
493 	/* If reloading fails the language is still in the list but everything
494 	 * has been cleared. */
495 	if (mi.mi_lp->lp_slang->sl_fidxs == NULL)
496 	    continue;
497 
498 	/* Check for a matching word in case-folded words. */
499 	find_word(&mi, FIND_FOLDWORD);
500 
501 	/* Check for a matching word in keep-case words. */
502 	find_word(&mi, FIND_KEEPWORD);
503 
504 	/* Check for matching prefixes. */
505 	find_prefix(&mi, FIND_FOLDWORD);
506 
507 	/* For a NOBREAK language, may want to use a word without a following
508 	 * word as a backup. */
509 	if (mi.mi_lp->lp_slang->sl_nobreak && mi.mi_result == SP_BAD
510 						   && mi.mi_result2 != SP_BAD)
511 	{
512 	    mi.mi_result = mi.mi_result2;
513 	    mi.mi_end = mi.mi_end2;
514 	}
515 
516 	/* Count the word in the first language where it's found to be OK. */
517 	if (count_word && mi.mi_result == SP_OK)
518 	{
519 	    count_common_word(mi.mi_lp->lp_slang, ptr,
520 						   (int)(mi.mi_end - ptr), 1);
521 	    count_word = FALSE;
522 	}
523     }
524 
525     if (mi.mi_result != SP_OK)
526     {
527 	/* If we found a number skip over it.  Allows for "42nd".  Do flag
528 	 * rare and local words, e.g., "3GPP". */
529 	if (nrlen > 0)
530 	{
531 	    if (mi.mi_result == SP_BAD || mi.mi_result == SP_BANNED)
532 		return nrlen;
533 	}
534 
535 	/* When we are at a non-word character there is no error, just
536 	 * skip over the character (try looking for a word after it). */
537 	else if (!spell_iswordp_nmw(ptr, wp))
538 	{
539 	    if (capcol != NULL && wp->w_s->b_cap_prog != NULL)
540 	    {
541 		regmatch_T	regmatch;
542 		int		r;
543 
544 		/* Check for end of sentence. */
545 		regmatch.regprog = wp->w_s->b_cap_prog;
546 		regmatch.rm_ic = FALSE;
547 		r = vim_regexec(&regmatch, ptr, 0);
548 		wp->w_s->b_cap_prog = regmatch.regprog;
549 		if (r)
550 		    *capcol = (int)(regmatch.endp[0] - ptr);
551 	    }
552 
553 	    if (has_mbyte)
554 		return (*mb_ptr2len)(ptr);
555 	    return 1;
556 	}
557 	else if (mi.mi_end == ptr)
558 	    /* Always include at least one character.  Required for when there
559 	     * is a mixup in "midword". */
560 	    MB_PTR_ADV(mi.mi_end);
561 	else if (mi.mi_result == SP_BAD
562 		&& LANGP_ENTRY(wp->w_s->b_langp, 0)->lp_slang->sl_nobreak)
563 	{
564 	    char_u	*p, *fp;
565 	    int		save_result = mi.mi_result;
566 
567 	    /* First language in 'spelllang' is NOBREAK.  Find first position
568 	     * at which any word would be valid. */
569 	    mi.mi_lp = LANGP_ENTRY(wp->w_s->b_langp, 0);
570 	    if (mi.mi_lp->lp_slang->sl_fidxs != NULL)
571 	    {
572 		p = mi.mi_word;
573 		fp = mi.mi_fword;
574 		for (;;)
575 		{
576 		    MB_PTR_ADV(p);
577 		    MB_PTR_ADV(fp);
578 		    if (p >= mi.mi_end)
579 			break;
580 		    mi.mi_compoff = (int)(fp - mi.mi_fword);
581 		    find_word(&mi, FIND_COMPOUND);
582 		    if (mi.mi_result != SP_BAD)
583 		    {
584 			mi.mi_end = p;
585 			break;
586 		    }
587 		}
588 		mi.mi_result = save_result;
589 	    }
590 	}
591 
592 	if (mi.mi_result == SP_BAD || mi.mi_result == SP_BANNED)
593 	    *attrp = HLF_SPB;
594 	else if (mi.mi_result == SP_RARE)
595 	    *attrp = HLF_SPR;
596 	else
597 	    *attrp = HLF_SPL;
598     }
599 
600     if (wrongcaplen > 0 && (mi.mi_result == SP_OK || mi.mi_result == SP_RARE))
601     {
602 	/* Report SpellCap only when the word isn't badly spelled. */
603 	*attrp = HLF_SPC;
604 	return wrongcaplen;
605     }
606 
607     return (int)(mi.mi_end - ptr);
608 }
609 
610 /*
611  * Check if the word at "mip->mi_word" is in the tree.
612  * When "mode" is FIND_FOLDWORD check in fold-case word tree.
613  * When "mode" is FIND_KEEPWORD check in keep-case word tree.
614  * When "mode" is FIND_PREFIX check for word after prefix in fold-case word
615  * tree.
616  *
617  * For a match mip->mi_result is updated.
618  */
619     static void
620 find_word(matchinf_T *mip, int mode)
621 {
622     idx_T	arridx = 0;
623     int		endlen[MAXWLEN];    /* length at possible word endings */
624     idx_T	endidx[MAXWLEN];    /* possible word endings */
625     int		endidxcnt = 0;
626     int		len;
627     int		wlen = 0;
628     int		flen;
629     int		c;
630     char_u	*ptr;
631     idx_T	lo, hi, m;
632     char_u	*s;
633     char_u	*p;
634     int		res = SP_BAD;
635     slang_T	*slang = mip->mi_lp->lp_slang;
636     unsigned	flags;
637     char_u	*byts;
638     idx_T	*idxs;
639     int		word_ends;
640     int		prefix_found;
641     int		nobreak_result;
642 
643     if (mode == FIND_KEEPWORD || mode == FIND_KEEPCOMPOUND)
644     {
645 	/* Check for word with matching case in keep-case tree. */
646 	ptr = mip->mi_word;
647 	flen = 9999;		    /* no case folding, always enough bytes */
648 	byts = slang->sl_kbyts;
649 	idxs = slang->sl_kidxs;
650 
651 	if (mode == FIND_KEEPCOMPOUND)
652 	    /* Skip over the previously found word(s). */
653 	    wlen += mip->mi_compoff;
654     }
655     else
656     {
657 	/* Check for case-folded in case-folded tree. */
658 	ptr = mip->mi_fword;
659 	flen = mip->mi_fwordlen;    /* available case-folded bytes */
660 	byts = slang->sl_fbyts;
661 	idxs = slang->sl_fidxs;
662 
663 	if (mode == FIND_PREFIX)
664 	{
665 	    /* Skip over the prefix. */
666 	    wlen = mip->mi_prefixlen;
667 	    flen -= mip->mi_prefixlen;
668 	}
669 	else if (mode == FIND_COMPOUND)
670 	{
671 	    /* Skip over the previously found word(s). */
672 	    wlen = mip->mi_compoff;
673 	    flen -= mip->mi_compoff;
674 	}
675 
676     }
677 
678     if (byts == NULL)
679 	return;			/* array is empty */
680 
681     /*
682      * Repeat advancing in the tree until:
683      * - there is a byte that doesn't match,
684      * - we reach the end of the tree,
685      * - or we reach the end of the line.
686      */
687     for (;;)
688     {
689 	if (flen <= 0 && *mip->mi_fend != NUL)
690 	    flen = fold_more(mip);
691 
692 	len = byts[arridx++];
693 
694 	/* If the first possible byte is a zero the word could end here.
695 	 * Remember this index, we first check for the longest word. */
696 	if (byts[arridx] == 0)
697 	{
698 	    if (endidxcnt == MAXWLEN)
699 	    {
700 		/* Must be a corrupted spell file. */
701 		emsg(_(e_format));
702 		return;
703 	    }
704 	    endlen[endidxcnt] = wlen;
705 	    endidx[endidxcnt++] = arridx++;
706 	    --len;
707 
708 	    /* Skip over the zeros, there can be several flag/region
709 	     * combinations. */
710 	    while (len > 0 && byts[arridx] == 0)
711 	    {
712 		++arridx;
713 		--len;
714 	    }
715 	    if (len == 0)
716 		break;	    /* no children, word must end here */
717 	}
718 
719 	/* Stop looking at end of the line. */
720 	if (ptr[wlen] == NUL)
721 	    break;
722 
723 	/* Perform a binary search in the list of accepted bytes. */
724 	c = ptr[wlen];
725 	if (c == TAB)	    /* <Tab> is handled like <Space> */
726 	    c = ' ';
727 	lo = arridx;
728 	hi = arridx + len - 1;
729 	while (lo < hi)
730 	{
731 	    m = (lo + hi) / 2;
732 	    if (byts[m] > c)
733 		hi = m - 1;
734 	    else if (byts[m] < c)
735 		lo = m + 1;
736 	    else
737 	    {
738 		lo = hi = m;
739 		break;
740 	    }
741 	}
742 
743 	/* Stop if there is no matching byte. */
744 	if (hi < lo || byts[lo] != c)
745 	    break;
746 
747 	/* Continue at the child (if there is one). */
748 	arridx = idxs[lo];
749 	++wlen;
750 	--flen;
751 
752 	/* One space in the good word may stand for several spaces in the
753 	 * checked word. */
754 	if (c == ' ')
755 	{
756 	    for (;;)
757 	    {
758 		if (flen <= 0 && *mip->mi_fend != NUL)
759 		    flen = fold_more(mip);
760 		if (ptr[wlen] != ' ' && ptr[wlen] != TAB)
761 		    break;
762 		++wlen;
763 		--flen;
764 	    }
765 	}
766     }
767 
768     /*
769      * Verify that one of the possible endings is valid.  Try the longest
770      * first.
771      */
772     while (endidxcnt > 0)
773     {
774 	--endidxcnt;
775 	arridx = endidx[endidxcnt];
776 	wlen = endlen[endidxcnt];
777 
778 	if ((*mb_head_off)(ptr, ptr + wlen) > 0)
779 	    continue;	    /* not at first byte of character */
780 	if (spell_iswordp(ptr + wlen, mip->mi_win))
781 	{
782 	    if (slang->sl_compprog == NULL && !slang->sl_nobreak)
783 		continue;	    /* next char is a word character */
784 	    word_ends = FALSE;
785 	}
786 	else
787 	    word_ends = TRUE;
788 	/* The prefix flag is before compound flags.  Once a valid prefix flag
789 	 * has been found we try compound flags. */
790 	prefix_found = FALSE;
791 
792 	if (mode != FIND_KEEPWORD && has_mbyte)
793 	{
794 	    /* Compute byte length in original word, length may change
795 	     * when folding case.  This can be slow, take a shortcut when the
796 	     * case-folded word is equal to the keep-case word. */
797 	    p = mip->mi_word;
798 	    if (STRNCMP(ptr, p, wlen) != 0)
799 	    {
800 		for (s = ptr; s < ptr + wlen; MB_PTR_ADV(s))
801 		    MB_PTR_ADV(p);
802 		wlen = (int)(p - mip->mi_word);
803 	    }
804 	}
805 
806 	/* Check flags and region.  For FIND_PREFIX check the condition and
807 	 * prefix ID.
808 	 * Repeat this if there are more flags/region alternatives until there
809 	 * is a match. */
810 	res = SP_BAD;
811 	for (len = byts[arridx - 1]; len > 0 && byts[arridx] == 0;
812 							      --len, ++arridx)
813 	{
814 	    flags = idxs[arridx];
815 
816 	    /* For the fold-case tree check that the case of the checked word
817 	     * matches with what the word in the tree requires.
818 	     * For keep-case tree the case is always right.  For prefixes we
819 	     * don't bother to check. */
820 	    if (mode == FIND_FOLDWORD)
821 	    {
822 		if (mip->mi_cend != mip->mi_word + wlen)
823 		{
824 		    /* mi_capflags was set for a different word length, need
825 		     * to do it again. */
826 		    mip->mi_cend = mip->mi_word + wlen;
827 		    mip->mi_capflags = captype(mip->mi_word, mip->mi_cend);
828 		}
829 
830 		if (mip->mi_capflags == WF_KEEPCAP
831 				|| !spell_valid_case(mip->mi_capflags, flags))
832 		    continue;
833 	    }
834 
835 	    /* When mode is FIND_PREFIX the word must support the prefix:
836 	     * check the prefix ID and the condition.  Do that for the list at
837 	     * mip->mi_prefarridx that find_prefix() filled. */
838 	    else if (mode == FIND_PREFIX && !prefix_found)
839 	    {
840 		c = valid_word_prefix(mip->mi_prefcnt, mip->mi_prefarridx,
841 				    flags,
842 				    mip->mi_word + mip->mi_cprefixlen, slang,
843 				    FALSE);
844 		if (c == 0)
845 		    continue;
846 
847 		/* Use the WF_RARE flag for a rare prefix. */
848 		if (c & WF_RAREPFX)
849 		    flags |= WF_RARE;
850 		prefix_found = TRUE;
851 	    }
852 
853 	    if (slang->sl_nobreak)
854 	    {
855 		if ((mode == FIND_COMPOUND || mode == FIND_KEEPCOMPOUND)
856 			&& (flags & WF_BANNED) == 0)
857 		{
858 		    /* NOBREAK: found a valid following word.  That's all we
859 		     * need to know, so return. */
860 		    mip->mi_result = SP_OK;
861 		    break;
862 		}
863 	    }
864 
865 	    else if ((mode == FIND_COMPOUND || mode == FIND_KEEPCOMPOUND
866 								|| !word_ends))
867 	    {
868 		/* If there is no compound flag or the word is shorter than
869 		 * COMPOUNDMIN reject it quickly.
870 		 * Makes you wonder why someone puts a compound flag on a word
871 		 * that's too short...  Myspell compatibility requires this
872 		 * anyway. */
873 		if (((unsigned)flags >> 24) == 0
874 			     || wlen - mip->mi_compoff < slang->sl_compminlen)
875 		    continue;
876 		/* For multi-byte chars check character length against
877 		 * COMPOUNDMIN. */
878 		if (has_mbyte
879 			&& slang->sl_compminlen > 0
880 			&& mb_charlen_len(mip->mi_word + mip->mi_compoff,
881 				wlen - mip->mi_compoff) < slang->sl_compminlen)
882 			continue;
883 
884 		/* Limit the number of compound words to COMPOUNDWORDMAX if no
885 		 * maximum for syllables is specified. */
886 		if (!word_ends && mip->mi_complen + mip->mi_compextra + 2
887 							   > slang->sl_compmax
888 					   && slang->sl_compsylmax == MAXWLEN)
889 		    continue;
890 
891 		/* Don't allow compounding on a side where an affix was added,
892 		 * unless COMPOUNDPERMITFLAG was used. */
893 		if (mip->mi_complen > 0 && (flags & WF_NOCOMPBEF))
894 		    continue;
895 		if (!word_ends && (flags & WF_NOCOMPAFT))
896 		    continue;
897 
898 		/* Quickly check if compounding is possible with this flag. */
899 		if (!byte_in_str(mip->mi_complen == 0
900 					? slang->sl_compstartflags
901 					: slang->sl_compallflags,
902 					    ((unsigned)flags >> 24)))
903 		    continue;
904 
905 		/* If there is a match with a CHECKCOMPOUNDPATTERN rule
906 		 * discard the compound word. */
907 		if (match_checkcompoundpattern(ptr, wlen, &slang->sl_comppat))
908 		    continue;
909 
910 		if (mode == FIND_COMPOUND)
911 		{
912 		    int	    capflags;
913 
914 		    /* Need to check the caps type of the appended compound
915 		     * word. */
916 		    if (has_mbyte && STRNCMP(ptr, mip->mi_word,
917 							mip->mi_compoff) != 0)
918 		    {
919 			/* case folding may have changed the length */
920 			p = mip->mi_word;
921 			for (s = ptr; s < ptr + mip->mi_compoff; MB_PTR_ADV(s))
922 			    MB_PTR_ADV(p);
923 		    }
924 		    else
925 			p = mip->mi_word + mip->mi_compoff;
926 		    capflags = captype(p, mip->mi_word + wlen);
927 		    if (capflags == WF_KEEPCAP || (capflags == WF_ALLCAP
928 						 && (flags & WF_FIXCAP) != 0))
929 			continue;
930 
931 		    if (capflags != WF_ALLCAP)
932 		    {
933 			/* When the character before the word is a word
934 			 * character we do not accept a Onecap word.  We do
935 			 * accept a no-caps word, even when the dictionary
936 			 * word specifies ONECAP. */
937 			MB_PTR_BACK(mip->mi_word, p);
938 			if (spell_iswordp_nmw(p, mip->mi_win)
939 				? capflags == WF_ONECAP
940 				: (flags & WF_ONECAP) != 0
941 						     && capflags != WF_ONECAP)
942 			    continue;
943 		    }
944 		}
945 
946 		/* If the word ends the sequence of compound flags of the
947 		 * words must match with one of the COMPOUNDRULE items and
948 		 * the number of syllables must not be too large. */
949 		mip->mi_compflags[mip->mi_complen] = ((unsigned)flags >> 24);
950 		mip->mi_compflags[mip->mi_complen + 1] = NUL;
951 		if (word_ends)
952 		{
953 		    char_u	fword[MAXWLEN];
954 
955 		    if (slang->sl_compsylmax < MAXWLEN)
956 		    {
957 			/* "fword" is only needed for checking syllables. */
958 			if (ptr == mip->mi_word)
959 			    (void)spell_casefold(ptr, wlen, fword, MAXWLEN);
960 			else
961 			    vim_strncpy(fword, ptr, endlen[endidxcnt]);
962 		    }
963 		    if (!can_compound(slang, fword, mip->mi_compflags))
964 			continue;
965 		}
966 		else if (slang->sl_comprules != NULL
967 			     && !match_compoundrule(slang, mip->mi_compflags))
968 		    /* The compound flags collected so far do not match any
969 		     * COMPOUNDRULE, discard the compounded word. */
970 		    continue;
971 	    }
972 
973 	    /* Check NEEDCOMPOUND: can't use word without compounding. */
974 	    else if (flags & WF_NEEDCOMP)
975 		continue;
976 
977 	    nobreak_result = SP_OK;
978 
979 	    if (!word_ends)
980 	    {
981 		int	save_result = mip->mi_result;
982 		char_u	*save_end = mip->mi_end;
983 		langp_T	*save_lp = mip->mi_lp;
984 		int	lpi;
985 
986 		/* Check that a valid word follows.  If there is one and we
987 		 * are compounding, it will set "mi_result", thus we are
988 		 * always finished here.  For NOBREAK we only check that a
989 		 * valid word follows.
990 		 * Recursive! */
991 		if (slang->sl_nobreak)
992 		    mip->mi_result = SP_BAD;
993 
994 		/* Find following word in case-folded tree. */
995 		mip->mi_compoff = endlen[endidxcnt];
996 		if (has_mbyte && mode == FIND_KEEPWORD)
997 		{
998 		    /* Compute byte length in case-folded word from "wlen":
999 		     * byte length in keep-case word.  Length may change when
1000 		     * folding case.  This can be slow, take a shortcut when
1001 		     * the case-folded word is equal to the keep-case word. */
1002 		    p = mip->mi_fword;
1003 		    if (STRNCMP(ptr, p, wlen) != 0)
1004 		    {
1005 			for (s = ptr; s < ptr + wlen; MB_PTR_ADV(s))
1006 			    MB_PTR_ADV(p);
1007 			mip->mi_compoff = (int)(p - mip->mi_fword);
1008 		    }
1009 		}
1010 #if 0 /* Disabled, see below */
1011 		c = mip->mi_compoff;
1012 #endif
1013 		++mip->mi_complen;
1014 		if (flags & WF_COMPROOT)
1015 		    ++mip->mi_compextra;
1016 
1017 		/* For NOBREAK we need to try all NOBREAK languages, at least
1018 		 * to find the ".add" file(s). */
1019 		for (lpi = 0; lpi < mip->mi_win->w_s->b_langp.ga_len; ++lpi)
1020 		{
1021 		    if (slang->sl_nobreak)
1022 		    {
1023 			mip->mi_lp = LANGP_ENTRY(mip->mi_win->w_s->b_langp, lpi);
1024 			if (mip->mi_lp->lp_slang->sl_fidxs == NULL
1025 					 || !mip->mi_lp->lp_slang->sl_nobreak)
1026 			    continue;
1027 		    }
1028 
1029 		    find_word(mip, FIND_COMPOUND);
1030 
1031 		    /* When NOBREAK any word that matches is OK.  Otherwise we
1032 		     * need to find the longest match, thus try with keep-case
1033 		     * and prefix too. */
1034 		    if (!slang->sl_nobreak || mip->mi_result == SP_BAD)
1035 		    {
1036 			/* Find following word in keep-case tree. */
1037 			mip->mi_compoff = wlen;
1038 			find_word(mip, FIND_KEEPCOMPOUND);
1039 
1040 #if 0	    /* Disabled, a prefix must not appear halfway a compound word,
1041 	       unless the COMPOUNDPERMITFLAG is used and then it can't be a
1042 	       postponed prefix. */
1043 			if (!slang->sl_nobreak || mip->mi_result == SP_BAD)
1044 			{
1045 			    /* Check for following word with prefix. */
1046 			    mip->mi_compoff = c;
1047 			    find_prefix(mip, FIND_COMPOUND);
1048 			}
1049 #endif
1050 		    }
1051 
1052 		    if (!slang->sl_nobreak)
1053 			break;
1054 		}
1055 		--mip->mi_complen;
1056 		if (flags & WF_COMPROOT)
1057 		    --mip->mi_compextra;
1058 		mip->mi_lp = save_lp;
1059 
1060 		if (slang->sl_nobreak)
1061 		{
1062 		    nobreak_result = mip->mi_result;
1063 		    mip->mi_result = save_result;
1064 		    mip->mi_end = save_end;
1065 		}
1066 		else
1067 		{
1068 		    if (mip->mi_result == SP_OK)
1069 			break;
1070 		    continue;
1071 		}
1072 	    }
1073 
1074 	    if (flags & WF_BANNED)
1075 		res = SP_BANNED;
1076 	    else if (flags & WF_REGION)
1077 	    {
1078 		/* Check region. */
1079 		if ((mip->mi_lp->lp_region & (flags >> 16)) != 0)
1080 		    res = SP_OK;
1081 		else
1082 		    res = SP_LOCAL;
1083 	    }
1084 	    else if (flags & WF_RARE)
1085 		res = SP_RARE;
1086 	    else
1087 		res = SP_OK;
1088 
1089 	    /* Always use the longest match and the best result.  For NOBREAK
1090 	     * we separately keep the longest match without a following good
1091 	     * word as a fall-back. */
1092 	    if (nobreak_result == SP_BAD)
1093 	    {
1094 		if (mip->mi_result2 > res)
1095 		{
1096 		    mip->mi_result2 = res;
1097 		    mip->mi_end2 = mip->mi_word + wlen;
1098 		}
1099 		else if (mip->mi_result2 == res
1100 					&& mip->mi_end2 < mip->mi_word + wlen)
1101 		    mip->mi_end2 = mip->mi_word + wlen;
1102 	    }
1103 	    else if (mip->mi_result > res)
1104 	    {
1105 		mip->mi_result = res;
1106 		mip->mi_end = mip->mi_word + wlen;
1107 	    }
1108 	    else if (mip->mi_result == res && mip->mi_end < mip->mi_word + wlen)
1109 		mip->mi_end = mip->mi_word + wlen;
1110 
1111 	    if (mip->mi_result == SP_OK)
1112 		break;
1113 	}
1114 
1115 	if (mip->mi_result == SP_OK)
1116 	    break;
1117     }
1118 }
1119 
1120 /*
1121  * Return TRUE if there is a match between the word ptr[wlen] and
1122  * CHECKCOMPOUNDPATTERN rules, assuming that we will concatenate with another
1123  * word.
1124  * A match means that the first part of CHECKCOMPOUNDPATTERN matches at the
1125  * end of ptr[wlen] and the second part matches after it.
1126  */
1127     static int
1128 match_checkcompoundpattern(
1129     char_u	*ptr,
1130     int		wlen,
1131     garray_T	*gap)  /* &sl_comppat */
1132 {
1133     int		i;
1134     char_u	*p;
1135     int		len;
1136 
1137     for (i = 0; i + 1 < gap->ga_len; i += 2)
1138     {
1139 	p = ((char_u **)gap->ga_data)[i + 1];
1140 	if (STRNCMP(ptr + wlen, p, STRLEN(p)) == 0)
1141 	{
1142 	    /* Second part matches at start of following compound word, now
1143 	     * check if first part matches at end of previous word. */
1144 	    p = ((char_u **)gap->ga_data)[i];
1145 	    len = (int)STRLEN(p);
1146 	    if (len <= wlen && STRNCMP(ptr + wlen - len, p, len) == 0)
1147 		return TRUE;
1148 	}
1149     }
1150     return FALSE;
1151 }
1152 
1153 /*
1154  * Return TRUE if "flags" is a valid sequence of compound flags and "word"
1155  * does not have too many syllables.
1156  */
1157     static int
1158 can_compound(slang_T *slang, char_u *word, char_u *flags)
1159 {
1160     char_u	uflags[MAXWLEN * 2];
1161     int		i;
1162     char_u	*p;
1163 
1164     if (slang->sl_compprog == NULL)
1165 	return FALSE;
1166     if (enc_utf8)
1167     {
1168 	/* Need to convert the single byte flags to utf8 characters. */
1169 	p = uflags;
1170 	for (i = 0; flags[i] != NUL; ++i)
1171 	    p += utf_char2bytes(flags[i], p);
1172 	*p = NUL;
1173 	p = uflags;
1174     }
1175     else
1176 	p = flags;
1177     if (!vim_regexec_prog(&slang->sl_compprog, FALSE, p, 0))
1178 	return FALSE;
1179 
1180     /* Count the number of syllables.  This may be slow, do it last.  If there
1181      * are too many syllables AND the number of compound words is above
1182      * COMPOUNDWORDMAX then compounding is not allowed. */
1183     if (slang->sl_compsylmax < MAXWLEN
1184 		       && count_syllables(slang, word) > slang->sl_compsylmax)
1185 	return (int)STRLEN(flags) < slang->sl_compmax;
1186     return TRUE;
1187 }
1188 
1189 /*
1190  * Return TRUE when the sequence of flags in "compflags" plus "flag" can
1191  * possibly form a valid compounded word.  This also checks the COMPOUNDRULE
1192  * lines if they don't contain wildcards.
1193  */
1194     static int
1195 can_be_compound(
1196     trystate_T	*sp,
1197     slang_T	*slang,
1198     char_u	*compflags,
1199     int		flag)
1200 {
1201     /* If the flag doesn't appear in sl_compstartflags or sl_compallflags
1202      * then it can't possibly compound. */
1203     if (!byte_in_str(sp->ts_complen == sp->ts_compsplit
1204 		? slang->sl_compstartflags : slang->sl_compallflags, flag))
1205 	return FALSE;
1206 
1207     /* If there are no wildcards, we can check if the flags collected so far
1208      * possibly can form a match with COMPOUNDRULE patterns.  This only
1209      * makes sense when we have two or more words. */
1210     if (slang->sl_comprules != NULL && sp->ts_complen > sp->ts_compsplit)
1211     {
1212 	int v;
1213 
1214 	compflags[sp->ts_complen] = flag;
1215 	compflags[sp->ts_complen + 1] = NUL;
1216 	v = match_compoundrule(slang, compflags + sp->ts_compsplit);
1217 	compflags[sp->ts_complen] = NUL;
1218 	return v;
1219     }
1220 
1221     return TRUE;
1222 }
1223 
1224 
1225 /*
1226  * Return TRUE if the compound flags in compflags[] match the start of any
1227  * compound rule.  This is used to stop trying a compound if the flags
1228  * collected so far can't possibly match any compound rule.
1229  * Caller must check that slang->sl_comprules is not NULL.
1230  */
1231     static int
1232 match_compoundrule(slang_T *slang, char_u *compflags)
1233 {
1234     char_u	*p;
1235     int		i;
1236     int		c;
1237 
1238     /* loop over all the COMPOUNDRULE entries */
1239     for (p = slang->sl_comprules; *p != NUL; ++p)
1240     {
1241 	/* loop over the flags in the compound word we have made, match
1242 	 * them against the current rule entry */
1243 	for (i = 0; ; ++i)
1244 	{
1245 	    c = compflags[i];
1246 	    if (c == NUL)
1247 		/* found a rule that matches for the flags we have so far */
1248 		return TRUE;
1249 	    if (*p == '/' || *p == NUL)
1250 		break;  /* end of rule, it's too short */
1251 	    if (*p == '[')
1252 	    {
1253 		int match = FALSE;
1254 
1255 		/* compare against all the flags in [] */
1256 		++p;
1257 		while (*p != ']' && *p != NUL)
1258 		    if (*p++ == c)
1259 			match = TRUE;
1260 		if (!match)
1261 		    break;  /* none matches */
1262 	    }
1263 	    else if (*p != c)
1264 		break;  /* flag of word doesn't match flag in pattern */
1265 	    ++p;
1266 	}
1267 
1268 	/* Skip to the next "/", where the next pattern starts. */
1269 	p = vim_strchr(p, '/');
1270 	if (p == NULL)
1271 	    break;
1272     }
1273 
1274     /* Checked all the rules and none of them match the flags, so there
1275      * can't possibly be a compound starting with these flags. */
1276     return FALSE;
1277 }
1278 
1279 /*
1280  * Return non-zero if the prefix indicated by "arridx" matches with the prefix
1281  * ID in "flags" for the word "word".
1282  * The WF_RAREPFX flag is included in the return value for a rare prefix.
1283  */
1284     static int
1285 valid_word_prefix(
1286     int		totprefcnt,	/* nr of prefix IDs */
1287     int		arridx,		/* idx in sl_pidxs[] */
1288     int		flags,
1289     char_u	*word,
1290     slang_T	*slang,
1291     int		cond_req)	/* only use prefixes with a condition */
1292 {
1293     int		prefcnt;
1294     int		pidx;
1295     regprog_T	**rp;
1296     int		prefid;
1297 
1298     prefid = (unsigned)flags >> 24;
1299     for (prefcnt = totprefcnt - 1; prefcnt >= 0; --prefcnt)
1300     {
1301 	pidx = slang->sl_pidxs[arridx + prefcnt];
1302 
1303 	/* Check the prefix ID. */
1304 	if (prefid != (pidx & 0xff))
1305 	    continue;
1306 
1307 	/* Check if the prefix doesn't combine and the word already has a
1308 	 * suffix. */
1309 	if ((flags & WF_HAS_AFF) && (pidx & WF_PFX_NC))
1310 	    continue;
1311 
1312 	/* Check the condition, if there is one.  The condition index is
1313 	 * stored in the two bytes above the prefix ID byte.  */
1314 	rp = &slang->sl_prefprog[((unsigned)pidx >> 8) & 0xffff];
1315 	if (*rp != NULL)
1316 	{
1317 	    if (!vim_regexec_prog(rp, FALSE, word, 0))
1318 		continue;
1319 	}
1320 	else if (cond_req)
1321 	    continue;
1322 
1323 	/* It's a match!  Return the WF_ flags. */
1324 	return pidx;
1325     }
1326     return 0;
1327 }
1328 
1329 /*
1330  * Check if the word at "mip->mi_word" has a matching prefix.
1331  * If it does, then check the following word.
1332  *
1333  * If "mode" is "FIND_COMPOUND" then do the same after another word, find a
1334  * prefix in a compound word.
1335  *
1336  * For a match mip->mi_result is updated.
1337  */
1338     static void
1339 find_prefix(matchinf_T *mip, int mode)
1340 {
1341     idx_T	arridx = 0;
1342     int		len;
1343     int		wlen = 0;
1344     int		flen;
1345     int		c;
1346     char_u	*ptr;
1347     idx_T	lo, hi, m;
1348     slang_T	*slang = mip->mi_lp->lp_slang;
1349     char_u	*byts;
1350     idx_T	*idxs;
1351 
1352     byts = slang->sl_pbyts;
1353     if (byts == NULL)
1354 	return;			/* array is empty */
1355 
1356     /* We use the case-folded word here, since prefixes are always
1357      * case-folded. */
1358     ptr = mip->mi_fword;
1359     flen = mip->mi_fwordlen;    /* available case-folded bytes */
1360     if (mode == FIND_COMPOUND)
1361     {
1362 	/* Skip over the previously found word(s). */
1363 	ptr += mip->mi_compoff;
1364 	flen -= mip->mi_compoff;
1365     }
1366     idxs = slang->sl_pidxs;
1367 
1368     /*
1369      * Repeat advancing in the tree until:
1370      * - there is a byte that doesn't match,
1371      * - we reach the end of the tree,
1372      * - or we reach the end of the line.
1373      */
1374     for (;;)
1375     {
1376 	if (flen == 0 && *mip->mi_fend != NUL)
1377 	    flen = fold_more(mip);
1378 
1379 	len = byts[arridx++];
1380 
1381 	/* If the first possible byte is a zero the prefix could end here.
1382 	 * Check if the following word matches and supports the prefix. */
1383 	if (byts[arridx] == 0)
1384 	{
1385 	    /* There can be several prefixes with different conditions.  We
1386 	     * try them all, since we don't know which one will give the
1387 	     * longest match.  The word is the same each time, pass the list
1388 	     * of possible prefixes to find_word(). */
1389 	    mip->mi_prefarridx = arridx;
1390 	    mip->mi_prefcnt = len;
1391 	    while (len > 0 && byts[arridx] == 0)
1392 	    {
1393 		++arridx;
1394 		--len;
1395 	    }
1396 	    mip->mi_prefcnt -= len;
1397 
1398 	    /* Find the word that comes after the prefix. */
1399 	    mip->mi_prefixlen = wlen;
1400 	    if (mode == FIND_COMPOUND)
1401 		/* Skip over the previously found word(s). */
1402 		mip->mi_prefixlen += mip->mi_compoff;
1403 
1404 	    if (has_mbyte)
1405 	    {
1406 		/* Case-folded length may differ from original length. */
1407 		mip->mi_cprefixlen = nofold_len(mip->mi_fword,
1408 					     mip->mi_prefixlen, mip->mi_word);
1409 	    }
1410 	    else
1411 		mip->mi_cprefixlen = mip->mi_prefixlen;
1412 	    find_word(mip, FIND_PREFIX);
1413 
1414 
1415 	    if (len == 0)
1416 		break;	    /* no children, word must end here */
1417 	}
1418 
1419 	/* Stop looking at end of the line. */
1420 	if (ptr[wlen] == NUL)
1421 	    break;
1422 
1423 	/* Perform a binary search in the list of accepted bytes. */
1424 	c = ptr[wlen];
1425 	lo = arridx;
1426 	hi = arridx + len - 1;
1427 	while (lo < hi)
1428 	{
1429 	    m = (lo + hi) / 2;
1430 	    if (byts[m] > c)
1431 		hi = m - 1;
1432 	    else if (byts[m] < c)
1433 		lo = m + 1;
1434 	    else
1435 	    {
1436 		lo = hi = m;
1437 		break;
1438 	    }
1439 	}
1440 
1441 	/* Stop if there is no matching byte. */
1442 	if (hi < lo || byts[lo] != c)
1443 	    break;
1444 
1445 	/* Continue at the child (if there is one). */
1446 	arridx = idxs[lo];
1447 	++wlen;
1448 	--flen;
1449     }
1450 }
1451 
1452 /*
1453  * Need to fold at least one more character.  Do until next non-word character
1454  * for efficiency.  Include the non-word character too.
1455  * Return the length of the folded chars in bytes.
1456  */
1457     static int
1458 fold_more(matchinf_T *mip)
1459 {
1460     int		flen;
1461     char_u	*p;
1462 
1463     p = mip->mi_fend;
1464     do
1465 	MB_PTR_ADV(mip->mi_fend);
1466     while (*mip->mi_fend != NUL && spell_iswordp(mip->mi_fend, mip->mi_win));
1467 
1468     /* Include the non-word character so that we can check for the word end. */
1469     if (*mip->mi_fend != NUL)
1470 	MB_PTR_ADV(mip->mi_fend);
1471 
1472     (void)spell_casefold(p, (int)(mip->mi_fend - p),
1473 			     mip->mi_fword + mip->mi_fwordlen,
1474 			     MAXWLEN - mip->mi_fwordlen);
1475     flen = (int)STRLEN(mip->mi_fword + mip->mi_fwordlen);
1476     mip->mi_fwordlen += flen;
1477     return flen;
1478 }
1479 
1480 /*
1481  * Check case flags for a word.  Return TRUE if the word has the requested
1482  * case.
1483  */
1484     static int
1485 spell_valid_case(
1486     int	    wordflags,	    /* flags for the checked word. */
1487     int	    treeflags)	    /* flags for the word in the spell tree */
1488 {
1489     return ((wordflags == WF_ALLCAP && (treeflags & WF_FIXCAP) == 0)
1490 	    || ((treeflags & (WF_ALLCAP | WF_KEEPCAP)) == 0
1491 		&& ((treeflags & WF_ONECAP) == 0
1492 					   || (wordflags & WF_ONECAP) != 0)));
1493 }
1494 
1495 /*
1496  * Return TRUE if spell checking is not enabled.
1497  */
1498     static int
1499 no_spell_checking(win_T *wp)
1500 {
1501     if (!wp->w_p_spell || *wp->w_s->b_p_spl == NUL
1502 					 || wp->w_s->b_langp.ga_len == 0)
1503     {
1504 	emsg(_("E756: Spell checking is not enabled"));
1505 	return TRUE;
1506     }
1507     return FALSE;
1508 }
1509 
1510 /*
1511  * Move to next spell error.
1512  * "curline" is FALSE for "[s", "]s", "[S" and "]S".
1513  * "curline" is TRUE to find word under/after cursor in the same line.
1514  * For Insert mode completion "dir" is BACKWARD and "curline" is TRUE: move
1515  * to after badly spelled word before the cursor.
1516  * Return 0 if not found, length of the badly spelled word otherwise.
1517  */
1518     int
1519 spell_move_to(
1520     win_T	*wp,
1521     int		dir,		/* FORWARD or BACKWARD */
1522     int		allwords,	/* TRUE for "[s"/"]s", FALSE for "[S"/"]S" */
1523     int		curline,
1524     hlf_T	*attrp)		/* return: attributes of bad word or NULL
1525 				   (only when "dir" is FORWARD) */
1526 {
1527     linenr_T	lnum;
1528     pos_T	found_pos;
1529     int		found_len = 0;
1530     char_u	*line;
1531     char_u	*p;
1532     char_u	*endp;
1533     hlf_T	attr;
1534     int		len;
1535 #ifdef FEAT_SYN_HL
1536     int		has_syntax = syntax_present(wp);
1537 #endif
1538     int		col;
1539     int		can_spell;
1540     char_u	*buf = NULL;
1541     int		buflen = 0;
1542     int		skip = 0;
1543     int		capcol = -1;
1544     int		found_one = FALSE;
1545     int		wrapped = FALSE;
1546 
1547     if (no_spell_checking(wp))
1548 	return 0;
1549 
1550     /*
1551      * Start looking for bad word at the start of the line, because we can't
1552      * start halfway a word, we don't know where it starts or ends.
1553      *
1554      * When searching backwards, we continue in the line to find the last
1555      * bad word (in the cursor line: before the cursor).
1556      *
1557      * We concatenate the start of the next line, so that wrapped words work
1558      * (e.g. "et<line-break>cetera").  Doesn't work when searching backwards
1559      * though...
1560      */
1561     lnum = wp->w_cursor.lnum;
1562     CLEAR_POS(&found_pos);
1563 
1564     while (!got_int)
1565     {
1566 	line = ml_get_buf(wp->w_buffer, lnum, FALSE);
1567 
1568 	len = (int)STRLEN(line);
1569 	if (buflen < len + MAXWLEN + 2)
1570 	{
1571 	    vim_free(buf);
1572 	    buflen = len + MAXWLEN + 2;
1573 	    buf = alloc(buflen);
1574 	    if (buf == NULL)
1575 		break;
1576 	}
1577 
1578 	/* In first line check first word for Capital. */
1579 	if (lnum == 1)
1580 	    capcol = 0;
1581 
1582 	/* For checking first word with a capital skip white space. */
1583 	if (capcol == 0)
1584 	    capcol = getwhitecols(line);
1585 	else if (curline && wp == curwin)
1586 	{
1587 	    /* For spellbadword(): check if first word needs a capital. */
1588 	    col = getwhitecols(line);
1589 	    if (check_need_cap(lnum, col))
1590 		capcol = col;
1591 
1592 	    /* Need to get the line again, may have looked at the previous
1593 	     * one. */
1594 	    line = ml_get_buf(wp->w_buffer, lnum, FALSE);
1595 	}
1596 
1597 	/* Copy the line into "buf" and append the start of the next line if
1598 	 * possible. */
1599 	STRCPY(buf, line);
1600 	if (lnum < wp->w_buffer->b_ml.ml_line_count)
1601 	    spell_cat_line(buf + STRLEN(buf),
1602 			  ml_get_buf(wp->w_buffer, lnum + 1, FALSE), MAXWLEN);
1603 
1604 	p = buf + skip;
1605 	endp = buf + len;
1606 	while (p < endp)
1607 	{
1608 	    /* When searching backward don't search after the cursor.  Unless
1609 	     * we wrapped around the end of the buffer. */
1610 	    if (dir == BACKWARD
1611 		    && lnum == wp->w_cursor.lnum
1612 		    && !wrapped
1613 		    && (colnr_T)(p - buf) >= wp->w_cursor.col)
1614 		break;
1615 
1616 	    /* start of word */
1617 	    attr = HLF_COUNT;
1618 	    len = spell_check(wp, p, &attr, &capcol, FALSE);
1619 
1620 	    if (attr != HLF_COUNT)
1621 	    {
1622 		/* We found a bad word.  Check the attribute. */
1623 		if (allwords || attr == HLF_SPB)
1624 		{
1625 		    /* When searching forward only accept a bad word after
1626 		     * the cursor. */
1627 		    if (dir == BACKWARD
1628 			    || lnum != wp->w_cursor.lnum
1629 			    || (lnum == wp->w_cursor.lnum
1630 				&& (wrapped
1631 				    || (colnr_T)(curline ? p - buf + len
1632 						     : p - buf)
1633 						  > wp->w_cursor.col)))
1634 		    {
1635 #ifdef FEAT_SYN_HL
1636 			if (has_syntax)
1637 			{
1638 			    col = (int)(p - buf);
1639 			    (void)syn_get_id(wp, lnum, (colnr_T)col,
1640 						    FALSE, &can_spell, FALSE);
1641 			    if (!can_spell)
1642 				attr = HLF_COUNT;
1643 			}
1644 			else
1645 #endif
1646 			    can_spell = TRUE;
1647 
1648 			if (can_spell)
1649 			{
1650 			    found_one = TRUE;
1651 			    found_pos.lnum = lnum;
1652 			    found_pos.col = (int)(p - buf);
1653 			    found_pos.coladd = 0;
1654 			    if (dir == FORWARD)
1655 			    {
1656 				/* No need to search further. */
1657 				wp->w_cursor = found_pos;
1658 				vim_free(buf);
1659 				if (attrp != NULL)
1660 				    *attrp = attr;
1661 				return len;
1662 			    }
1663 			    else if (curline)
1664 				/* Insert mode completion: put cursor after
1665 				 * the bad word. */
1666 				found_pos.col += len;
1667 			    found_len = len;
1668 			}
1669 		    }
1670 		    else
1671 			found_one = TRUE;
1672 		}
1673 	    }
1674 
1675 	    /* advance to character after the word */
1676 	    p += len;
1677 	    capcol -= len;
1678 	}
1679 
1680 	if (dir == BACKWARD && found_pos.lnum != 0)
1681 	{
1682 	    /* Use the last match in the line (before the cursor). */
1683 	    wp->w_cursor = found_pos;
1684 	    vim_free(buf);
1685 	    return found_len;
1686 	}
1687 
1688 	if (curline)
1689 	    break;	/* only check cursor line */
1690 
1691 	/* If we are back at the starting line and searched it again there
1692 	 * is no match, give up. */
1693 	if (lnum == wp->w_cursor.lnum && wrapped)
1694 	    break;
1695 
1696 	/* Advance to next line. */
1697 	if (dir == BACKWARD)
1698 	{
1699 	    if (lnum > 1)
1700 		--lnum;
1701 	    else if (!p_ws)
1702 		break;	    /* at first line and 'nowrapscan' */
1703 	    else
1704 	    {
1705 		/* Wrap around to the end of the buffer.  May search the
1706 		 * starting line again and accept the last match. */
1707 		lnum = wp->w_buffer->b_ml.ml_line_count;
1708 		wrapped = TRUE;
1709 		if (!shortmess(SHM_SEARCH))
1710 		    give_warning((char_u *)_(top_bot_msg), TRUE);
1711 	    }
1712 	    capcol = -1;
1713 	}
1714 	else
1715 	{
1716 	    if (lnum < wp->w_buffer->b_ml.ml_line_count)
1717 		++lnum;
1718 	    else if (!p_ws)
1719 		break;	    /* at first line and 'nowrapscan' */
1720 	    else
1721 	    {
1722 		/* Wrap around to the start of the buffer.  May search the
1723 		 * starting line again and accept the first match. */
1724 		lnum = 1;
1725 		wrapped = TRUE;
1726 		if (!shortmess(SHM_SEARCH))
1727 		    give_warning((char_u *)_(bot_top_msg), TRUE);
1728 	    }
1729 
1730 	    /* If we are back at the starting line and there is no match then
1731 	     * give up. */
1732 	    if (lnum == wp->w_cursor.lnum && !found_one)
1733 		break;
1734 
1735 	    /* Skip the characters at the start of the next line that were
1736 	     * included in a match crossing line boundaries. */
1737 	    if (attr == HLF_COUNT)
1738 		skip = (int)(p - endp);
1739 	    else
1740 		skip = 0;
1741 
1742 	    /* Capcol skips over the inserted space. */
1743 	    --capcol;
1744 
1745 	    /* But after empty line check first word in next line */
1746 	    if (*skipwhite(line) == NUL)
1747 		capcol = 0;
1748 	}
1749 
1750 	line_breakcheck();
1751     }
1752 
1753     vim_free(buf);
1754     return 0;
1755 }
1756 
1757 /*
1758  * For spell checking: concatenate the start of the following line "line" into
1759  * "buf", blanking-out special characters.  Copy less then "maxlen" bytes.
1760  * Keep the blanks at the start of the next line, this is used in win_line()
1761  * to skip those bytes if the word was OK.
1762  */
1763     void
1764 spell_cat_line(char_u *buf, char_u *line, int maxlen)
1765 {
1766     char_u	*p;
1767     int		n;
1768 
1769     p = skipwhite(line);
1770     while (vim_strchr((char_u *)"*#/\"\t", *p) != NULL)
1771 	p = skipwhite(p + 1);
1772 
1773     if (*p != NUL)
1774     {
1775 	/* Only worth concatenating if there is something else than spaces to
1776 	 * concatenate. */
1777 	n = (int)(p - line) + 1;
1778 	if (n < maxlen - 1)
1779 	{
1780 	    vim_memset(buf, ' ', n);
1781 	    vim_strncpy(buf +  n, p, maxlen - 1 - n);
1782 	}
1783     }
1784 }
1785 
1786 /*
1787  * Structure used for the cookie argument of do_in_runtimepath().
1788  */
1789 typedef struct spelload_S
1790 {
1791     char_u  sl_lang[MAXWLEN + 1];	/* language name */
1792     slang_T *sl_slang;			/* resulting slang_T struct */
1793     int	    sl_nobreak;			/* NOBREAK language found */
1794 } spelload_T;
1795 
1796 /*
1797  * Load word list(s) for "lang" from Vim spell file(s).
1798  * "lang" must be the language without the region: e.g., "en".
1799  */
1800     static void
1801 spell_load_lang(char_u *lang)
1802 {
1803     char_u	fname_enc[85];
1804     int		r;
1805     spelload_T	sl;
1806     int		round;
1807 
1808     /* Copy the language name to pass it to spell_load_cb() as a cookie.
1809      * It's truncated when an error is detected. */
1810     STRCPY(sl.sl_lang, lang);
1811     sl.sl_slang = NULL;
1812     sl.sl_nobreak = FALSE;
1813 
1814     /* We may retry when no spell file is found for the language, an
1815      * autocommand may load it then. */
1816     for (round = 1; round <= 2; ++round)
1817     {
1818 	/*
1819 	 * Find the first spell file for "lang" in 'runtimepath' and load it.
1820 	 */
1821 	vim_snprintf((char *)fname_enc, sizeof(fname_enc) - 5,
1822 #ifdef VMS
1823 					"spell/%s_%s.spl",
1824 #else
1825 					"spell/%s.%s.spl",
1826 #endif
1827 							   lang, spell_enc());
1828 	r = do_in_runtimepath(fname_enc, 0, spell_load_cb, &sl);
1829 
1830 	if (r == FAIL && *sl.sl_lang != NUL)
1831 	{
1832 	    /* Try loading the ASCII version. */
1833 	    vim_snprintf((char *)fname_enc, sizeof(fname_enc) - 5,
1834 #ifdef VMS
1835 						  "spell/%s_ascii.spl",
1836 #else
1837 						  "spell/%s.ascii.spl",
1838 #endif
1839 									lang);
1840 	    r = do_in_runtimepath(fname_enc, 0, spell_load_cb, &sl);
1841 
1842 	    if (r == FAIL && *sl.sl_lang != NUL && round == 1
1843 		    && apply_autocmds(EVENT_SPELLFILEMISSING, lang,
1844 					      curbuf->b_fname, FALSE, curbuf))
1845 		continue;
1846 	    break;
1847 	}
1848 	break;
1849     }
1850 
1851     if (r == FAIL)
1852     {
1853 	smsg(
1854 #ifdef VMS
1855 	_("Warning: Cannot find word list \"%s_%s.spl\" or \"%s_ascii.spl\""),
1856 #else
1857 	_("Warning: Cannot find word list \"%s.%s.spl\" or \"%s.ascii.spl\""),
1858 #endif
1859 						     lang, spell_enc(), lang);
1860     }
1861     else if (sl.sl_slang != NULL)
1862     {
1863 	/* At least one file was loaded, now load ALL the additions. */
1864 	STRCPY(fname_enc + STRLEN(fname_enc) - 3, "add.spl");
1865 	do_in_runtimepath(fname_enc, DIP_ALL, spell_load_cb, &sl);
1866     }
1867 }
1868 
1869 /*
1870  * Return the encoding used for spell checking: Use 'encoding', except that we
1871  * use "latin1" for "latin9".  And limit to 60 characters (just in case).
1872  */
1873     char_u *
1874 spell_enc(void)
1875 {
1876 
1877     if (STRLEN(p_enc) < 60 && STRCMP(p_enc, "iso-8859-15") != 0)
1878 	return p_enc;
1879     return (char_u *)"latin1";
1880 }
1881 
1882 /*
1883  * Get the name of the .spl file for the internal wordlist into
1884  * "fname[MAXPATHL]".
1885  */
1886     static void
1887 int_wordlist_spl(char_u *fname)
1888 {
1889     vim_snprintf((char *)fname, MAXPATHL, SPL_FNAME_TMPL,
1890 						  int_wordlist, spell_enc());
1891 }
1892 
1893 /*
1894  * Allocate a new slang_T for language "lang".  "lang" can be NULL.
1895  * Caller must fill "sl_next".
1896  */
1897     slang_T *
1898 slang_alloc(char_u *lang)
1899 {
1900     slang_T *lp;
1901 
1902     lp = ALLOC_CLEAR_ONE(slang_T);
1903     if (lp != NULL)
1904     {
1905 	if (lang != NULL)
1906 	    lp->sl_name = vim_strsave(lang);
1907 	ga_init2(&lp->sl_rep, sizeof(fromto_T), 10);
1908 	ga_init2(&lp->sl_repsal, sizeof(fromto_T), 10);
1909 	lp->sl_compmax = MAXWLEN;
1910 	lp->sl_compsylmax = MAXWLEN;
1911 	hash_init(&lp->sl_wordcount);
1912     }
1913 
1914     return lp;
1915 }
1916 
1917 /*
1918  * Free the contents of an slang_T and the structure itself.
1919  */
1920     void
1921 slang_free(slang_T *lp)
1922 {
1923     vim_free(lp->sl_name);
1924     vim_free(lp->sl_fname);
1925     slang_clear(lp);
1926     vim_free(lp);
1927 }
1928 
1929 /*
1930  * Clear an slang_T so that the file can be reloaded.
1931  */
1932     void
1933 slang_clear(slang_T *lp)
1934 {
1935     garray_T	*gap;
1936     fromto_T	*ftp;
1937     salitem_T	*smp;
1938     int		i;
1939     int		round;
1940 
1941     VIM_CLEAR(lp->sl_fbyts);
1942     VIM_CLEAR(lp->sl_kbyts);
1943     VIM_CLEAR(lp->sl_pbyts);
1944 
1945     VIM_CLEAR(lp->sl_fidxs);
1946     VIM_CLEAR(lp->sl_kidxs);
1947     VIM_CLEAR(lp->sl_pidxs);
1948 
1949     for (round = 1; round <= 2; ++round)
1950     {
1951 	gap = round == 1 ? &lp->sl_rep : &lp->sl_repsal;
1952 	while (gap->ga_len > 0)
1953 	{
1954 	    ftp = &((fromto_T *)gap->ga_data)[--gap->ga_len];
1955 	    vim_free(ftp->ft_from);
1956 	    vim_free(ftp->ft_to);
1957 	}
1958 	ga_clear(gap);
1959     }
1960 
1961     gap = &lp->sl_sal;
1962     if (lp->sl_sofo)
1963     {
1964 	/* "ga_len" is set to 1 without adding an item for latin1 */
1965 	if (gap->ga_data != NULL)
1966 	    /* SOFOFROM and SOFOTO items: free lists of wide characters. */
1967 	    for (i = 0; i < gap->ga_len; ++i)
1968 		vim_free(((int **)gap->ga_data)[i]);
1969     }
1970     else
1971 	/* SAL items: free salitem_T items */
1972 	while (gap->ga_len > 0)
1973 	{
1974 	    smp = &((salitem_T *)gap->ga_data)[--gap->ga_len];
1975 	    vim_free(smp->sm_lead);
1976 	    /* Don't free sm_oneof and sm_rules, they point into sm_lead. */
1977 	    vim_free(smp->sm_to);
1978 	    vim_free(smp->sm_lead_w);
1979 	    vim_free(smp->sm_oneof_w);
1980 	    vim_free(smp->sm_to_w);
1981 	}
1982     ga_clear(gap);
1983 
1984     for (i = 0; i < lp->sl_prefixcnt; ++i)
1985 	vim_regfree(lp->sl_prefprog[i]);
1986     lp->sl_prefixcnt = 0;
1987     VIM_CLEAR(lp->sl_prefprog);
1988 
1989     VIM_CLEAR(lp->sl_info);
1990 
1991     VIM_CLEAR(lp->sl_midword);
1992 
1993     vim_regfree(lp->sl_compprog);
1994     lp->sl_compprog = NULL;
1995     VIM_CLEAR(lp->sl_comprules);
1996     VIM_CLEAR(lp->sl_compstartflags);
1997     VIM_CLEAR(lp->sl_compallflags);
1998 
1999     VIM_CLEAR(lp->sl_syllable);
2000     ga_clear(&lp->sl_syl_items);
2001 
2002     ga_clear_strings(&lp->sl_comppat);
2003 
2004     hash_clear_all(&lp->sl_wordcount, WC_KEY_OFF);
2005     hash_init(&lp->sl_wordcount);
2006 
2007     hash_clear_all(&lp->sl_map_hash, 0);
2008 
2009     /* Clear info from .sug file. */
2010     slang_clear_sug(lp);
2011 
2012     lp->sl_compmax = MAXWLEN;
2013     lp->sl_compminlen = 0;
2014     lp->sl_compsylmax = MAXWLEN;
2015     lp->sl_regions[0] = NUL;
2016 }
2017 
2018 /*
2019  * Clear the info from the .sug file in "lp".
2020  */
2021     void
2022 slang_clear_sug(slang_T *lp)
2023 {
2024     VIM_CLEAR(lp->sl_sbyts);
2025     VIM_CLEAR(lp->sl_sidxs);
2026     close_spellbuf(lp->sl_sugbuf);
2027     lp->sl_sugbuf = NULL;
2028     lp->sl_sugloaded = FALSE;
2029     lp->sl_sugtime = 0;
2030 }
2031 
2032 /*
2033  * Load one spell file and store the info into a slang_T.
2034  * Invoked through do_in_runtimepath().
2035  */
2036     static void
2037 spell_load_cb(char_u *fname, void *cookie)
2038 {
2039     spelload_T	*slp = (spelload_T *)cookie;
2040     slang_T	*slang;
2041 
2042     slang = spell_load_file(fname, slp->sl_lang, NULL, FALSE);
2043     if (slang != NULL)
2044     {
2045 	/* When a previously loaded file has NOBREAK also use it for the
2046 	 * ".add" files. */
2047 	if (slp->sl_nobreak && slang->sl_add)
2048 	    slang->sl_nobreak = TRUE;
2049 	else if (slang->sl_nobreak)
2050 	    slp->sl_nobreak = TRUE;
2051 
2052 	slp->sl_slang = slang;
2053     }
2054 }
2055 
2056 
2057 /*
2058  * Add a word to the hashtable of common words.
2059  * If it's already there then the counter is increased.
2060  */
2061     void
2062 count_common_word(
2063     slang_T	*lp,
2064     char_u	*word,
2065     int		len,	    /* word length, -1 for upto NUL */
2066     int		count)	    /* 1 to count once, 10 to init */
2067 {
2068     hash_T	hash;
2069     hashitem_T	*hi;
2070     wordcount_T	*wc;
2071     char_u	buf[MAXWLEN];
2072     char_u	*p;
2073 
2074     if (len == -1)
2075 	p = word;
2076     else
2077     {
2078 	vim_strncpy(buf, word, len);
2079 	p = buf;
2080     }
2081 
2082     hash = hash_hash(p);
2083     hi = hash_lookup(&lp->sl_wordcount, p, hash);
2084     if (HASHITEM_EMPTY(hi))
2085     {
2086 	wc = alloc(sizeof(wordcount_T) + STRLEN(p));
2087 	if (wc == NULL)
2088 	    return;
2089 	STRCPY(wc->wc_word, p);
2090 	wc->wc_count = count;
2091 	hash_add_item(&lp->sl_wordcount, hi, wc->wc_word, hash);
2092     }
2093     else
2094     {
2095 	wc = HI2WC(hi);
2096 	if ((wc->wc_count += count) < (unsigned)count)	/* check for overflow */
2097 	    wc->wc_count = MAXWORDCOUNT;
2098     }
2099 }
2100 
2101 /*
2102  * Adjust the score of common words.
2103  */
2104     static int
2105 score_wordcount_adj(
2106     slang_T	*slang,
2107     int		score,
2108     char_u	*word,
2109     int		split)	    /* word was split, less bonus */
2110 {
2111     hashitem_T	*hi;
2112     wordcount_T	*wc;
2113     int		bonus;
2114     int		newscore;
2115 
2116     hi = hash_find(&slang->sl_wordcount, word);
2117     if (!HASHITEM_EMPTY(hi))
2118     {
2119 	wc = HI2WC(hi);
2120 	if (wc->wc_count < SCORE_THRES2)
2121 	    bonus = SCORE_COMMON1;
2122 	else if (wc->wc_count < SCORE_THRES3)
2123 	    bonus = SCORE_COMMON2;
2124 	else
2125 	    bonus = SCORE_COMMON3;
2126 	if (split)
2127 	    newscore = score - bonus / 2;
2128 	else
2129 	    newscore = score - bonus;
2130 	if (newscore < 0)
2131 	    return 0;
2132 	return newscore;
2133     }
2134     return score;
2135 }
2136 
2137 
2138 /*
2139  * Return TRUE if byte "n" appears in "str".
2140  * Like strchr() but independent of locale.
2141  */
2142     int
2143 byte_in_str(char_u *str, int n)
2144 {
2145     char_u	*p;
2146 
2147     for (p = str; *p != NUL; ++p)
2148 	if (*p == n)
2149 	    return TRUE;
2150     return FALSE;
2151 }
2152 
2153 #define SY_MAXLEN   30
2154 typedef struct syl_item_S
2155 {
2156     char_u	sy_chars[SY_MAXLEN];	    /* the sequence of chars */
2157     int		sy_len;
2158 } syl_item_T;
2159 
2160 /*
2161  * Truncate "slang->sl_syllable" at the first slash and put the following items
2162  * in "slang->sl_syl_items".
2163  */
2164     int
2165 init_syl_tab(slang_T *slang)
2166 {
2167     char_u	*p;
2168     char_u	*s;
2169     int		l;
2170     syl_item_T	*syl;
2171 
2172     ga_init2(&slang->sl_syl_items, sizeof(syl_item_T), 4);
2173     p = vim_strchr(slang->sl_syllable, '/');
2174     while (p != NULL)
2175     {
2176 	*p++ = NUL;
2177 	if (*p == NUL)	    /* trailing slash */
2178 	    break;
2179 	s = p;
2180 	p = vim_strchr(p, '/');
2181 	if (p == NULL)
2182 	    l = (int)STRLEN(s);
2183 	else
2184 	    l = (int)(p - s);
2185 	if (l >= SY_MAXLEN)
2186 	    return SP_FORMERROR;
2187 	if (ga_grow(&slang->sl_syl_items, 1) == FAIL)
2188 	    return SP_OTHERERROR;
2189 	syl = ((syl_item_T *)slang->sl_syl_items.ga_data)
2190 					       + slang->sl_syl_items.ga_len++;
2191 	vim_strncpy(syl->sy_chars, s, l);
2192 	syl->sy_len = l;
2193     }
2194     return OK;
2195 }
2196 
2197 /*
2198  * Count the number of syllables in "word".
2199  * When "word" contains spaces the syllables after the last space are counted.
2200  * Returns zero if syllables are not defines.
2201  */
2202     static int
2203 count_syllables(slang_T *slang, char_u *word)
2204 {
2205     int		cnt = 0;
2206     int		skip = FALSE;
2207     char_u	*p;
2208     int		len;
2209     int		i;
2210     syl_item_T	*syl;
2211     int		c;
2212 
2213     if (slang->sl_syllable == NULL)
2214 	return 0;
2215 
2216     for (p = word; *p != NUL; p += len)
2217     {
2218 	/* When running into a space reset counter. */
2219 	if (*p == ' ')
2220 	{
2221 	    len = 1;
2222 	    cnt = 0;
2223 	    continue;
2224 	}
2225 
2226 	/* Find longest match of syllable items. */
2227 	len = 0;
2228 	for (i = 0; i < slang->sl_syl_items.ga_len; ++i)
2229 	{
2230 	    syl = ((syl_item_T *)slang->sl_syl_items.ga_data) + i;
2231 	    if (syl->sy_len > len
2232 			       && STRNCMP(p, syl->sy_chars, syl->sy_len) == 0)
2233 		len = syl->sy_len;
2234 	}
2235 	if (len != 0)	/* found a match, count syllable  */
2236 	{
2237 	    ++cnt;
2238 	    skip = FALSE;
2239 	}
2240 	else
2241 	{
2242 	    /* No recognized syllable item, at least a syllable char then? */
2243 	    c = mb_ptr2char(p);
2244 	    len = (*mb_ptr2len)(p);
2245 	    if (vim_strchr(slang->sl_syllable, c) == NULL)
2246 		skip = FALSE;	    /* No, search for next syllable */
2247 	    else if (!skip)
2248 	    {
2249 		++cnt;		    /* Yes, count it */
2250 		skip = TRUE;	    /* don't count following syllable chars */
2251 	    }
2252 	}
2253     }
2254     return cnt;
2255 }
2256 
2257 /*
2258  * Parse 'spelllang' and set w_s->b_langp accordingly.
2259  * Returns NULL if it's OK, an error message otherwise.
2260  */
2261     char *
2262 did_set_spelllang(win_T *wp)
2263 {
2264     garray_T	ga;
2265     char_u	*splp;
2266     char_u	*region;
2267     char_u	region_cp[3];
2268     int		filename;
2269     int		region_mask;
2270     slang_T	*slang;
2271     int		c;
2272     char_u	lang[MAXWLEN + 1];
2273     char_u	spf_name[MAXPATHL];
2274     int		len;
2275     char_u	*p;
2276     int		round;
2277     char_u	*spf;
2278     char_u	*use_region = NULL;
2279     int		dont_use_region = FALSE;
2280     int		nobreak = FALSE;
2281     int		i, j;
2282     langp_T	*lp, *lp2;
2283     static int	recursive = FALSE;
2284     char	*ret_msg = NULL;
2285     char_u	*spl_copy;
2286     bufref_T	bufref;
2287 
2288     set_bufref(&bufref, wp->w_buffer);
2289 
2290     /* We don't want to do this recursively.  May happen when a language is
2291      * not available and the SpellFileMissing autocommand opens a new buffer
2292      * in which 'spell' is set. */
2293     if (recursive)
2294 	return NULL;
2295     recursive = TRUE;
2296 
2297     ga_init2(&ga, sizeof(langp_T), 2);
2298     clear_midword(wp);
2299 
2300     /* Make a copy of 'spelllang', the SpellFileMissing autocommands may change
2301      * it under our fingers. */
2302     spl_copy = vim_strsave(wp->w_s->b_p_spl);
2303     if (spl_copy == NULL)
2304 	goto theend;
2305 
2306     wp->w_s->b_cjk = 0;
2307 
2308     /* Loop over comma separated language names. */
2309     for (splp = spl_copy; *splp != NUL; )
2310     {
2311 	// Get one language name.
2312 	copy_option_part(&splp, lang, MAXWLEN, ",");
2313 	region = NULL;
2314 	len = (int)STRLEN(lang);
2315 
2316 	if (!valid_spellang(lang))
2317 	    continue;
2318 
2319 	if (STRCMP(lang, "cjk") == 0)
2320 	{
2321 	    wp->w_s->b_cjk = 1;
2322 	    continue;
2323 	}
2324 
2325 	/* If the name ends in ".spl" use it as the name of the spell file.
2326 	 * If there is a region name let "region" point to it and remove it
2327 	 * from the name. */
2328 	if (len > 4 && fnamecmp(lang + len - 4, ".spl") == 0)
2329 	{
2330 	    filename = TRUE;
2331 
2332 	    /* Locate a region and remove it from the file name. */
2333 	    p = vim_strchr(gettail(lang), '_');
2334 	    if (p != NULL && ASCII_ISALPHA(p[1]) && ASCII_ISALPHA(p[2])
2335 						      && !ASCII_ISALPHA(p[3]))
2336 	    {
2337 		vim_strncpy(region_cp, p + 1, 2);
2338 		mch_memmove(p, p + 3, len - (p - lang) - 2);
2339 		region = region_cp;
2340 	    }
2341 	    else
2342 		dont_use_region = TRUE;
2343 
2344 	    /* Check if we loaded this language before. */
2345 	    for (slang = first_lang; slang != NULL; slang = slang->sl_next)
2346 		if (fullpathcmp(lang, slang->sl_fname, FALSE, TRUE) == FPC_SAME)
2347 		    break;
2348 	}
2349 	else
2350 	{
2351 	    filename = FALSE;
2352 	    if (len > 3 && lang[len - 3] == '_')
2353 	    {
2354 		region = lang + len - 2;
2355 		len -= 3;
2356 		lang[len] = NUL;
2357 	    }
2358 	    else
2359 		dont_use_region = TRUE;
2360 
2361 	    /* Check if we loaded this language before. */
2362 	    for (slang = first_lang; slang != NULL; slang = slang->sl_next)
2363 		if (STRICMP(lang, slang->sl_name) == 0)
2364 		    break;
2365 	}
2366 
2367 	if (region != NULL)
2368 	{
2369 	    /* If the region differs from what was used before then don't
2370 	     * use it for 'spellfile'. */
2371 	    if (use_region != NULL && STRCMP(region, use_region) != 0)
2372 		dont_use_region = TRUE;
2373 	    use_region = region;
2374 	}
2375 
2376 	/* If not found try loading the language now. */
2377 	if (slang == NULL)
2378 	{
2379 	    if (filename)
2380 		(void)spell_load_file(lang, lang, NULL, FALSE);
2381 	    else
2382 	    {
2383 		spell_load_lang(lang);
2384 		/* SpellFileMissing autocommands may do anything, including
2385 		 * destroying the buffer we are using... */
2386 		if (!bufref_valid(&bufref))
2387 		{
2388 		    ret_msg = N_("E797: SpellFileMissing autocommand deleted buffer");
2389 		    goto theend;
2390 		}
2391 	    }
2392 	}
2393 
2394 	/*
2395 	 * Loop over the languages, there can be several files for "lang".
2396 	 */
2397 	for (slang = first_lang; slang != NULL; slang = slang->sl_next)
2398 	    if (filename ? fullpathcmp(lang, slang->sl_fname, FALSE, TRUE)
2399 								    == FPC_SAME
2400 			 : STRICMP(lang, slang->sl_name) == 0)
2401 	    {
2402 		region_mask = REGION_ALL;
2403 		if (!filename && region != NULL)
2404 		{
2405 		    /* find region in sl_regions */
2406 		    c = find_region(slang->sl_regions, region);
2407 		    if (c == REGION_ALL)
2408 		    {
2409 			if (slang->sl_add)
2410 			{
2411 			    if (*slang->sl_regions != NUL)
2412 				/* This addition file is for other regions. */
2413 				region_mask = 0;
2414 			}
2415 			else
2416 			    /* This is probably an error.  Give a warning and
2417 			     * accept the words anyway. */
2418 			    smsg(_("Warning: region %s not supported"),
2419 								      region);
2420 		    }
2421 		    else
2422 			region_mask = 1 << c;
2423 		}
2424 
2425 		if (region_mask != 0)
2426 		{
2427 		    if (ga_grow(&ga, 1) == FAIL)
2428 		    {
2429 			ga_clear(&ga);
2430 			ret_msg = e_outofmem;
2431 			goto theend;
2432 		    }
2433 		    LANGP_ENTRY(ga, ga.ga_len)->lp_slang = slang;
2434 		    LANGP_ENTRY(ga, ga.ga_len)->lp_region = region_mask;
2435 		    ++ga.ga_len;
2436 		    use_midword(slang, wp);
2437 		    if (slang->sl_nobreak)
2438 			nobreak = TRUE;
2439 		}
2440 	    }
2441     }
2442 
2443     /* round 0: load int_wordlist, if possible.
2444      * round 1: load first name in 'spellfile'.
2445      * round 2: load second name in 'spellfile.
2446      * etc. */
2447     spf = curwin->w_s->b_p_spf;
2448     for (round = 0; round == 0 || *spf != NUL; ++round)
2449     {
2450 	if (round == 0)
2451 	{
2452 	    /* Internal wordlist, if there is one. */
2453 	    if (int_wordlist == NULL)
2454 		continue;
2455 	    int_wordlist_spl(spf_name);
2456 	}
2457 	else
2458 	{
2459 	    /* One entry in 'spellfile'. */
2460 	    copy_option_part(&spf, spf_name, MAXPATHL - 5, ",");
2461 	    STRCAT(spf_name, ".spl");
2462 
2463 	    /* If it was already found above then skip it. */
2464 	    for (c = 0; c < ga.ga_len; ++c)
2465 	    {
2466 		p = LANGP_ENTRY(ga, c)->lp_slang->sl_fname;
2467 		if (p != NULL && fullpathcmp(spf_name, p, FALSE, TRUE)
2468 								== FPC_SAME)
2469 		    break;
2470 	    }
2471 	    if (c < ga.ga_len)
2472 		continue;
2473 	}
2474 
2475 	/* Check if it was loaded already. */
2476 	for (slang = first_lang; slang != NULL; slang = slang->sl_next)
2477 	    if (fullpathcmp(spf_name, slang->sl_fname, FALSE, TRUE)
2478 								== FPC_SAME)
2479 		break;
2480 	if (slang == NULL)
2481 	{
2482 	    /* Not loaded, try loading it now.  The language name includes the
2483 	     * region name, the region is ignored otherwise.  for int_wordlist
2484 	     * use an arbitrary name. */
2485 	    if (round == 0)
2486 		STRCPY(lang, "internal wordlist");
2487 	    else
2488 	    {
2489 		vim_strncpy(lang, gettail(spf_name), MAXWLEN);
2490 		p = vim_strchr(lang, '.');
2491 		if (p != NULL)
2492 		    *p = NUL;	/* truncate at ".encoding.add" */
2493 	    }
2494 	    slang = spell_load_file(spf_name, lang, NULL, TRUE);
2495 
2496 	    /* If one of the languages has NOBREAK we assume the addition
2497 	     * files also have this. */
2498 	    if (slang != NULL && nobreak)
2499 		slang->sl_nobreak = TRUE;
2500 	}
2501 	if (slang != NULL && ga_grow(&ga, 1) == OK)
2502 	{
2503 	    region_mask = REGION_ALL;
2504 	    if (use_region != NULL && !dont_use_region)
2505 	    {
2506 		/* find region in sl_regions */
2507 		c = find_region(slang->sl_regions, use_region);
2508 		if (c != REGION_ALL)
2509 		    region_mask = 1 << c;
2510 		else if (*slang->sl_regions != NUL)
2511 		    /* This spell file is for other regions. */
2512 		    region_mask = 0;
2513 	    }
2514 
2515 	    if (region_mask != 0)
2516 	    {
2517 		LANGP_ENTRY(ga, ga.ga_len)->lp_slang = slang;
2518 		LANGP_ENTRY(ga, ga.ga_len)->lp_sallang = NULL;
2519 		LANGP_ENTRY(ga, ga.ga_len)->lp_replang = NULL;
2520 		LANGP_ENTRY(ga, ga.ga_len)->lp_region = region_mask;
2521 		++ga.ga_len;
2522 		use_midword(slang, wp);
2523 	    }
2524 	}
2525     }
2526 
2527     /* Everything is fine, store the new b_langp value. */
2528     ga_clear(&wp->w_s->b_langp);
2529     wp->w_s->b_langp = ga;
2530 
2531     /* For each language figure out what language to use for sound folding and
2532      * REP items.  If the language doesn't support it itself use another one
2533      * with the same name.  E.g. for "en-math" use "en". */
2534     for (i = 0; i < ga.ga_len; ++i)
2535     {
2536 	lp = LANGP_ENTRY(ga, i);
2537 
2538 	/* sound folding */
2539 	if (lp->lp_slang->sl_sal.ga_len > 0)
2540 	    /* language does sound folding itself */
2541 	    lp->lp_sallang = lp->lp_slang;
2542 	else
2543 	    /* find first similar language that does sound folding */
2544 	    for (j = 0; j < ga.ga_len; ++j)
2545 	    {
2546 		lp2 = LANGP_ENTRY(ga, j);
2547 		if (lp2->lp_slang->sl_sal.ga_len > 0
2548 			&& STRNCMP(lp->lp_slang->sl_name,
2549 					      lp2->lp_slang->sl_name, 2) == 0)
2550 		{
2551 		    lp->lp_sallang = lp2->lp_slang;
2552 		    break;
2553 		}
2554 	    }
2555 
2556 	/* REP items */
2557 	if (lp->lp_slang->sl_rep.ga_len > 0)
2558 	    /* language has REP items itself */
2559 	    lp->lp_replang = lp->lp_slang;
2560 	else
2561 	    /* find first similar language that has REP items */
2562 	    for (j = 0; j < ga.ga_len; ++j)
2563 	    {
2564 		lp2 = LANGP_ENTRY(ga, j);
2565 		if (lp2->lp_slang->sl_rep.ga_len > 0
2566 			&& STRNCMP(lp->lp_slang->sl_name,
2567 					      lp2->lp_slang->sl_name, 2) == 0)
2568 		{
2569 		    lp->lp_replang = lp2->lp_slang;
2570 		    break;
2571 		}
2572 	    }
2573     }
2574 
2575 theend:
2576     vim_free(spl_copy);
2577     recursive = FALSE;
2578     redraw_win_later(wp, NOT_VALID);
2579     return ret_msg;
2580 }
2581 
2582 /*
2583  * Clear the midword characters for buffer "buf".
2584  */
2585     static void
2586 clear_midword(win_T *wp)
2587 {
2588     vim_memset(wp->w_s->b_spell_ismw, 0, 256);
2589     VIM_CLEAR(wp->w_s->b_spell_ismw_mb);
2590 }
2591 
2592 /*
2593  * Use the "sl_midword" field of language "lp" for buffer "buf".
2594  * They add up to any currently used midword characters.
2595  */
2596     static void
2597 use_midword(slang_T *lp, win_T *wp)
2598 {
2599     char_u	*p;
2600 
2601     if (lp->sl_midword == NULL)	    /* there aren't any */
2602 	return;
2603 
2604     for (p = lp->sl_midword; *p != NUL; )
2605 	if (has_mbyte)
2606 	{
2607 	    int	    c, l, n;
2608 	    char_u  *bp;
2609 
2610 	    c = mb_ptr2char(p);
2611 	    l = (*mb_ptr2len)(p);
2612 	    if (c < 256 && l <= 2)
2613 		wp->w_s->b_spell_ismw[c] = TRUE;
2614 	    else if (wp->w_s->b_spell_ismw_mb == NULL)
2615 		/* First multi-byte char in "b_spell_ismw_mb". */
2616 		wp->w_s->b_spell_ismw_mb = vim_strnsave(p, l);
2617 	    else
2618 	    {
2619 		/* Append multi-byte chars to "b_spell_ismw_mb". */
2620 		n = (int)STRLEN(wp->w_s->b_spell_ismw_mb);
2621 		bp = vim_strnsave(wp->w_s->b_spell_ismw_mb, n + l);
2622 		if (bp != NULL)
2623 		{
2624 		    vim_free(wp->w_s->b_spell_ismw_mb);
2625 		    wp->w_s->b_spell_ismw_mb = bp;
2626 		    vim_strncpy(bp + n, p, l);
2627 		}
2628 	    }
2629 	    p += l;
2630 	}
2631 	else
2632 	    wp->w_s->b_spell_ismw[*p++] = TRUE;
2633 }
2634 
2635 /*
2636  * Find the region "region[2]" in "rp" (points to "sl_regions").
2637  * Each region is simply stored as the two characters of its name.
2638  * Returns the index if found (first is 0), REGION_ALL if not found.
2639  */
2640     static int
2641 find_region(char_u *rp, char_u *region)
2642 {
2643     int		i;
2644 
2645     for (i = 0; ; i += 2)
2646     {
2647 	if (rp[i] == NUL)
2648 	    return REGION_ALL;
2649 	if (rp[i] == region[0] && rp[i + 1] == region[1])
2650 	    break;
2651     }
2652     return i / 2;
2653 }
2654 
2655 /*
2656  * Return case type of word:
2657  * w word	0
2658  * Word		WF_ONECAP
2659  * W WORD	WF_ALLCAP
2660  * WoRd	wOrd	WF_KEEPCAP
2661  */
2662     int
2663 captype(
2664     char_u	*word,
2665     char_u	*end)	    /* When NULL use up to NUL byte. */
2666 {
2667     char_u	*p;
2668     int		c;
2669     int		firstcap;
2670     int		allcap;
2671     int		past_second = FALSE;	/* past second word char */
2672 
2673     /* find first letter */
2674     for (p = word; !spell_iswordp_nmw(p, curwin); MB_PTR_ADV(p))
2675 	if (end == NULL ? *p == NUL : p >= end)
2676 	    return 0;	    /* only non-word characters, illegal word */
2677     if (has_mbyte)
2678 	c = mb_ptr2char_adv(&p);
2679     else
2680 	c = *p++;
2681     firstcap = allcap = SPELL_ISUPPER(c);
2682 
2683     /*
2684      * Need to check all letters to find a word with mixed upper/lower.
2685      * But a word with an upper char only at start is a ONECAP.
2686      */
2687     for ( ; end == NULL ? *p != NUL : p < end; MB_PTR_ADV(p))
2688 	if (spell_iswordp_nmw(p, curwin))
2689 	{
2690 	    c = PTR2CHAR(p);
2691 	    if (!SPELL_ISUPPER(c))
2692 	    {
2693 		/* UUl -> KEEPCAP */
2694 		if (past_second && allcap)
2695 		    return WF_KEEPCAP;
2696 		allcap = FALSE;
2697 	    }
2698 	    else if (!allcap)
2699 		/* UlU -> KEEPCAP */
2700 		return WF_KEEPCAP;
2701 	    past_second = TRUE;
2702 	}
2703 
2704     if (allcap)
2705 	return WF_ALLCAP;
2706     if (firstcap)
2707 	return WF_ONECAP;
2708     return 0;
2709 }
2710 
2711 /*
2712  * Like captype() but for a KEEPCAP word add ONECAP if the word starts with a
2713  * capital.  So that make_case_word() can turn WOrd into Word.
2714  * Add ALLCAP for "WOrD".
2715  */
2716     static int
2717 badword_captype(char_u *word, char_u *end)
2718 {
2719     int		flags = captype(word, end);
2720     int		c;
2721     int		l, u;
2722     int		first;
2723     char_u	*p;
2724 
2725     if (flags & WF_KEEPCAP)
2726     {
2727 	/* Count the number of UPPER and lower case letters. */
2728 	l = u = 0;
2729 	first = FALSE;
2730 	for (p = word; p < end; MB_PTR_ADV(p))
2731 	{
2732 	    c = PTR2CHAR(p);
2733 	    if (SPELL_ISUPPER(c))
2734 	    {
2735 		++u;
2736 		if (p == word)
2737 		    first = TRUE;
2738 	    }
2739 	    else
2740 		++l;
2741 	}
2742 
2743 	/* If there are more UPPER than lower case letters suggest an
2744 	 * ALLCAP word.  Otherwise, if the first letter is UPPER then
2745 	 * suggest ONECAP.  Exception: "ALl" most likely should be "All",
2746 	 * require three upper case letters. */
2747 	if (u > l && u > 2)
2748 	    flags |= WF_ALLCAP;
2749 	else if (first)
2750 	    flags |= WF_ONECAP;
2751 
2752 	if (u >= 2 && l >= 2)	/* maCARONI maCAroni */
2753 	    flags |= WF_MIXCAP;
2754     }
2755     return flags;
2756 }
2757 
2758 /*
2759  * Delete the internal wordlist and its .spl file.
2760  */
2761     void
2762 spell_delete_wordlist(void)
2763 {
2764     char_u	fname[MAXPATHL];
2765 
2766     if (int_wordlist != NULL)
2767     {
2768 	mch_remove(int_wordlist);
2769 	int_wordlist_spl(fname);
2770 	mch_remove(fname);
2771 	VIM_CLEAR(int_wordlist);
2772     }
2773 }
2774 
2775 /*
2776  * Free all languages.
2777  */
2778     void
2779 spell_free_all(void)
2780 {
2781     slang_T	*slang;
2782     buf_T	*buf;
2783 
2784     /* Go through all buffers and handle 'spelllang'. <VN> */
2785     FOR_ALL_BUFFERS(buf)
2786 	ga_clear(&buf->b_s.b_langp);
2787 
2788     while (first_lang != NULL)
2789     {
2790 	slang = first_lang;
2791 	first_lang = slang->sl_next;
2792 	slang_free(slang);
2793     }
2794 
2795     spell_delete_wordlist();
2796 
2797     VIM_CLEAR(repl_to);
2798     VIM_CLEAR(repl_from);
2799 }
2800 
2801 /*
2802  * Clear all spelling tables and reload them.
2803  * Used after 'encoding' is set and when ":mkspell" was used.
2804  */
2805     void
2806 spell_reload(void)
2807 {
2808     win_T	*wp;
2809 
2810     /* Initialize the table for spell_iswordp(). */
2811     init_spell_chartab();
2812 
2813     /* Unload all allocated memory. */
2814     spell_free_all();
2815 
2816     /* Go through all buffers and handle 'spelllang'. */
2817     FOR_ALL_WINDOWS(wp)
2818     {
2819 	/* Only load the wordlists when 'spelllang' is set and there is a
2820 	 * window for this buffer in which 'spell' is set. */
2821 	if (*wp->w_s->b_p_spl != NUL)
2822 	{
2823 		if (wp->w_p_spell)
2824 		{
2825 		    (void)did_set_spelllang(wp);
2826 		    break;
2827 		}
2828 	}
2829     }
2830 }
2831 
2832 /*
2833  * Opposite of offset2bytes().
2834  * "pp" points to the bytes and is advanced over it.
2835  * Returns the offset.
2836  */
2837     static int
2838 bytes2offset(char_u **pp)
2839 {
2840     char_u	*p = *pp;
2841     int		nr;
2842     int		c;
2843 
2844     c = *p++;
2845     if ((c & 0x80) == 0x00)		/* 1 byte */
2846     {
2847 	nr = c - 1;
2848     }
2849     else if ((c & 0xc0) == 0x80)	/* 2 bytes */
2850     {
2851 	nr = (c & 0x3f) - 1;
2852 	nr = nr * 255 + (*p++ - 1);
2853     }
2854     else if ((c & 0xe0) == 0xc0)	/* 3 bytes */
2855     {
2856 	nr = (c & 0x1f) - 1;
2857 	nr = nr * 255 + (*p++ - 1);
2858 	nr = nr * 255 + (*p++ - 1);
2859     }
2860     else				/* 4 bytes */
2861     {
2862 	nr = (c & 0x0f) - 1;
2863 	nr = nr * 255 + (*p++ - 1);
2864 	nr = nr * 255 + (*p++ - 1);
2865 	nr = nr * 255 + (*p++ - 1);
2866     }
2867 
2868     *pp = p;
2869     return nr;
2870 }
2871 
2872 
2873 /*
2874  * Open a spell buffer.  This is a nameless buffer that is not in the buffer
2875  * list and only contains text lines.  Can use a swapfile to reduce memory
2876  * use.
2877  * Most other fields are invalid!  Esp. watch out for string options being
2878  * NULL and there is no undo info.
2879  * Returns NULL when out of memory.
2880  */
2881     buf_T *
2882 open_spellbuf(void)
2883 {
2884     buf_T	*buf;
2885 
2886     buf = ALLOC_CLEAR_ONE(buf_T);
2887     if (buf != NULL)
2888     {
2889 	buf->b_spell = TRUE;
2890 	buf->b_p_swf = TRUE;	/* may create a swap file */
2891 #ifdef FEAT_CRYPT
2892 	buf->b_p_key = empty_option;
2893 #endif
2894 	ml_open(buf);
2895 	ml_open_file(buf);	/* create swap file now */
2896     }
2897     return buf;
2898 }
2899 
2900 /*
2901  * Close the buffer used for spell info.
2902  */
2903     void
2904 close_spellbuf(buf_T *buf)
2905 {
2906     if (buf != NULL)
2907     {
2908 	ml_close(buf, TRUE);
2909 	vim_free(buf);
2910     }
2911 }
2912 
2913 /*
2914  * Init the chartab used for spelling for ASCII.
2915  * EBCDIC is not supported!
2916  */
2917     void
2918 clear_spell_chartab(spelltab_T *sp)
2919 {
2920     int		i;
2921 
2922     /* Init everything to FALSE. */
2923     vim_memset(sp->st_isw, FALSE, sizeof(sp->st_isw));
2924     vim_memset(sp->st_isu, FALSE, sizeof(sp->st_isu));
2925     for (i = 0; i < 256; ++i)
2926     {
2927 	sp->st_fold[i] = i;
2928 	sp->st_upper[i] = i;
2929     }
2930 
2931     /* We include digits.  A word shouldn't start with a digit, but handling
2932      * that is done separately. */
2933     for (i = '0'; i <= '9'; ++i)
2934 	sp->st_isw[i] = TRUE;
2935     for (i = 'A'; i <= 'Z'; ++i)
2936     {
2937 	sp->st_isw[i] = TRUE;
2938 	sp->st_isu[i] = TRUE;
2939 	sp->st_fold[i] = i + 0x20;
2940     }
2941     for (i = 'a'; i <= 'z'; ++i)
2942     {
2943 	sp->st_isw[i] = TRUE;
2944 	sp->st_upper[i] = i - 0x20;
2945     }
2946 }
2947 
2948 /*
2949  * Init the chartab used for spelling.  Only depends on 'encoding'.
2950  * Called once while starting up and when 'encoding' changes.
2951  * The default is to use isalpha(), but the spell file should define the word
2952  * characters to make it possible that 'encoding' differs from the current
2953  * locale.  For utf-8 we don't use isalpha() but our own functions.
2954  */
2955     void
2956 init_spell_chartab(void)
2957 {
2958     int	    i;
2959 
2960     did_set_spelltab = FALSE;
2961     clear_spell_chartab(&spelltab);
2962     if (enc_dbcs)
2963     {
2964 	/* DBCS: assume double-wide characters are word characters. */
2965 	for (i = 128; i <= 255; ++i)
2966 	    if (MB_BYTE2LEN(i) == 2)
2967 		spelltab.st_isw[i] = TRUE;
2968     }
2969     else if (enc_utf8)
2970     {
2971 	for (i = 128; i < 256; ++i)
2972 	{
2973 	    int f = utf_fold(i);
2974 	    int u = utf_toupper(i);
2975 
2976 	    spelltab.st_isu[i] = utf_isupper(i);
2977 	    spelltab.st_isw[i] = spelltab.st_isu[i] || utf_islower(i);
2978 	    /* The folded/upper-cased value is different between latin1 and
2979 	     * utf8 for 0xb5, causing E763 for no good reason.  Use the latin1
2980 	     * value for utf-8 to avoid this. */
2981 	    spelltab.st_fold[i] = (f < 256) ? f : i;
2982 	    spelltab.st_upper[i] = (u < 256) ? u : i;
2983 	}
2984     }
2985     else
2986     {
2987 	/* Rough guess: use locale-dependent library functions. */
2988 	for (i = 128; i < 256; ++i)
2989 	{
2990 	    if (MB_ISUPPER(i))
2991 	    {
2992 		spelltab.st_isw[i] = TRUE;
2993 		spelltab.st_isu[i] = TRUE;
2994 		spelltab.st_fold[i] = MB_TOLOWER(i);
2995 	    }
2996 	    else if (MB_ISLOWER(i))
2997 	    {
2998 		spelltab.st_isw[i] = TRUE;
2999 		spelltab.st_upper[i] = MB_TOUPPER(i);
3000 	    }
3001 	}
3002     }
3003 }
3004 
3005 
3006 /*
3007  * Return TRUE if "p" points to a word character.
3008  * As a special case we see "midword" characters as word character when it is
3009  * followed by a word character.  This finds they'there but not 'they there'.
3010  * Thus this only works properly when past the first character of the word.
3011  */
3012     static int
3013 spell_iswordp(
3014     char_u	*p,
3015     win_T	*wp)	    /* buffer used */
3016 {
3017     char_u	*s;
3018     int		l;
3019     int		c;
3020 
3021     if (has_mbyte)
3022     {
3023 	l = MB_PTR2LEN(p);
3024 	s = p;
3025 	if (l == 1)
3026 	{
3027 	    /* be quick for ASCII */
3028 	    if (wp->w_s->b_spell_ismw[*p])
3029 		s = p + 1;		/* skip a mid-word character */
3030 	}
3031 	else
3032 	{
3033 	    c = mb_ptr2char(p);
3034 	    if (c < 256 ? wp->w_s->b_spell_ismw[c]
3035 		    : (wp->w_s->b_spell_ismw_mb != NULL
3036 			   && vim_strchr(wp->w_s->b_spell_ismw_mb, c) != NULL))
3037 		s = p + l;
3038 	}
3039 
3040 	c = mb_ptr2char(s);
3041 	if (c > 255)
3042 	    return spell_mb_isword_class(mb_get_class(s), wp);
3043 	return spelltab.st_isw[c];
3044     }
3045 
3046     return spelltab.st_isw[wp->w_s->b_spell_ismw[*p] ? p[1] : p[0]];
3047 }
3048 
3049 /*
3050  * Return TRUE if "p" points to a word character.
3051  * Unlike spell_iswordp() this doesn't check for "midword" characters.
3052  */
3053     int
3054 spell_iswordp_nmw(char_u *p, win_T *wp)
3055 {
3056     int		c;
3057 
3058     if (has_mbyte)
3059     {
3060 	c = mb_ptr2char(p);
3061 	if (c > 255)
3062 	    return spell_mb_isword_class(mb_get_class(p), wp);
3063 	return spelltab.st_isw[c];
3064     }
3065     return spelltab.st_isw[*p];
3066 }
3067 
3068 /*
3069  * Return TRUE if word class indicates a word character.
3070  * Only for characters above 255.
3071  * Unicode subscript and superscript are not considered word characters.
3072  * See also dbcs_class() and utf_class() in mbyte.c.
3073  */
3074     static int
3075 spell_mb_isword_class(int cl, win_T *wp)
3076 {
3077     if (wp->w_s->b_cjk)
3078 	/* East Asian characters are not considered word characters. */
3079 	return cl == 2 || cl == 0x2800;
3080     return cl >= 2 && cl != 0x2070 && cl != 0x2080 && cl != 3;
3081 }
3082 
3083 /*
3084  * Return TRUE if "p" points to a word character.
3085  * Wide version of spell_iswordp().
3086  */
3087     static int
3088 spell_iswordp_w(int *p, win_T *wp)
3089 {
3090     int		*s;
3091 
3092     if (*p < 256 ? wp->w_s->b_spell_ismw[*p]
3093 		 : (wp->w_s->b_spell_ismw_mb != NULL
3094 			     && vim_strchr(wp->w_s->b_spell_ismw_mb, *p) != NULL))
3095 	s = p + 1;
3096     else
3097 	s = p;
3098 
3099     if (*s > 255)
3100     {
3101 	if (enc_utf8)
3102 	    return spell_mb_isword_class(utf_class(*s), wp);
3103 	if (enc_dbcs)
3104 	    return spell_mb_isword_class(
3105 				dbcs_class((unsigned)*s >> 8, *s & 0xff), wp);
3106 	return 0;
3107     }
3108     return spelltab.st_isw[*s];
3109 }
3110 
3111 /*
3112  * Case-fold "str[len]" into "buf[buflen]".  The result is NUL terminated.
3113  * Uses the character definitions from the .spl file.
3114  * When using a multi-byte 'encoding' the length may change!
3115  * Returns FAIL when something wrong.
3116  */
3117     int
3118 spell_casefold(
3119     char_u	*str,
3120     int		len,
3121     char_u	*buf,
3122     int		buflen)
3123 {
3124     int		i;
3125 
3126     if (len >= buflen)
3127     {
3128 	buf[0] = NUL;
3129 	return FAIL;		/* result will not fit */
3130     }
3131 
3132     if (has_mbyte)
3133     {
3134 	int	outi = 0;
3135 	char_u	*p;
3136 	int	c;
3137 
3138 	/* Fold one character at a time. */
3139 	for (p = str; p < str + len; )
3140 	{
3141 	    if (outi + MB_MAXBYTES > buflen)
3142 	    {
3143 		buf[outi] = NUL;
3144 		return FAIL;
3145 	    }
3146 	    c = mb_cptr2char_adv(&p);
3147 	    outi += mb_char2bytes(SPELL_TOFOLD(c), buf + outi);
3148 	}
3149 	buf[outi] = NUL;
3150     }
3151     else
3152     {
3153 	/* Be quick for non-multibyte encodings. */
3154 	for (i = 0; i < len; ++i)
3155 	    buf[i] = spelltab.st_fold[str[i]];
3156 	buf[i] = NUL;
3157     }
3158 
3159     return OK;
3160 }
3161 
3162 /* values for sps_flags */
3163 #define SPS_BEST    1
3164 #define SPS_FAST    2
3165 #define SPS_DOUBLE  4
3166 
3167 static int sps_flags = SPS_BEST;	/* flags from 'spellsuggest' */
3168 static int sps_limit = 9999;		/* max nr of suggestions given */
3169 
3170 /*
3171  * Check the 'spellsuggest' option.  Return FAIL if it's wrong.
3172  * Sets "sps_flags" and "sps_limit".
3173  */
3174     int
3175 spell_check_sps(void)
3176 {
3177     char_u	*p;
3178     char_u	*s;
3179     char_u	buf[MAXPATHL];
3180     int		f;
3181 
3182     sps_flags = 0;
3183     sps_limit = 9999;
3184 
3185     for (p = p_sps; *p != NUL; )
3186     {
3187 	copy_option_part(&p, buf, MAXPATHL, ",");
3188 
3189 	f = 0;
3190 	if (VIM_ISDIGIT(*buf))
3191 	{
3192 	    s = buf;
3193 	    sps_limit = getdigits(&s);
3194 	    if (*s != NUL && !VIM_ISDIGIT(*s))
3195 		f = -1;
3196 	}
3197 	else if (STRCMP(buf, "best") == 0)
3198 	    f = SPS_BEST;
3199 	else if (STRCMP(buf, "fast") == 0)
3200 	    f = SPS_FAST;
3201 	else if (STRCMP(buf, "double") == 0)
3202 	    f = SPS_DOUBLE;
3203 	else if (STRNCMP(buf, "expr:", 5) != 0
3204 		&& STRNCMP(buf, "file:", 5) != 0)
3205 	    f = -1;
3206 
3207 	if (f == -1 || (sps_flags != 0 && f != 0))
3208 	{
3209 	    sps_flags = SPS_BEST;
3210 	    sps_limit = 9999;
3211 	    return FAIL;
3212 	}
3213 	if (f != 0)
3214 	    sps_flags = f;
3215     }
3216 
3217     if (sps_flags == 0)
3218 	sps_flags = SPS_BEST;
3219 
3220     return OK;
3221 }
3222 
3223 /*
3224  * "z=": Find badly spelled word under or after the cursor.
3225  * Give suggestions for the properly spelled word.
3226  * In Visual mode use the highlighted word as the bad word.
3227  * When "count" is non-zero use that suggestion.
3228  */
3229     void
3230 spell_suggest(int count)
3231 {
3232     char_u	*line;
3233     pos_T	prev_cursor = curwin->w_cursor;
3234     char_u	wcopy[MAXWLEN + 2];
3235     char_u	*p;
3236     int		i;
3237     int		c;
3238     suginfo_T	sug;
3239     suggest_T	*stp;
3240     int		mouse_used;
3241     int		need_cap;
3242     int		limit;
3243     int		selected = count;
3244     int		badlen = 0;
3245     int		msg_scroll_save = msg_scroll;
3246 
3247     if (no_spell_checking(curwin))
3248 	return;
3249 
3250     if (VIsual_active)
3251     {
3252 	/* Use the Visually selected text as the bad word.  But reject
3253 	 * a multi-line selection. */
3254 	if (curwin->w_cursor.lnum != VIsual.lnum)
3255 	{
3256 	    vim_beep(BO_SPELL);
3257 	    return;
3258 	}
3259 	badlen = (int)curwin->w_cursor.col - (int)VIsual.col;
3260 	if (badlen < 0)
3261 	    badlen = -badlen;
3262 	else
3263 	    curwin->w_cursor.col = VIsual.col;
3264 	++badlen;
3265 	end_visual_mode();
3266     }
3267     /* Find the start of the badly spelled word. */
3268     else if (spell_move_to(curwin, FORWARD, TRUE, TRUE, NULL) == 0
3269 	    || curwin->w_cursor.col > prev_cursor.col)
3270     {
3271 	/* No bad word or it starts after the cursor: use the word under the
3272 	 * cursor. */
3273 	curwin->w_cursor = prev_cursor;
3274 	line = ml_get_curline();
3275 	p = line + curwin->w_cursor.col;
3276 	/* Backup to before start of word. */
3277 	while (p > line && spell_iswordp_nmw(p, curwin))
3278 	    MB_PTR_BACK(line, p);
3279 	/* Forward to start of word. */
3280 	while (*p != NUL && !spell_iswordp_nmw(p, curwin))
3281 	    MB_PTR_ADV(p);
3282 
3283 	if (!spell_iswordp_nmw(p, curwin))		/* No word found. */
3284 	{
3285 	    beep_flush();
3286 	    return;
3287 	}
3288 	curwin->w_cursor.col = (colnr_T)(p - line);
3289     }
3290 
3291     /* Get the word and its length. */
3292 
3293     /* Figure out if the word should be capitalised. */
3294     need_cap = check_need_cap(curwin->w_cursor.lnum, curwin->w_cursor.col);
3295 
3296     /* Make a copy of current line since autocommands may free the line. */
3297     line = vim_strsave(ml_get_curline());
3298     if (line == NULL)
3299 	goto skip;
3300 
3301     /* Get the list of suggestions.  Limit to 'lines' - 2 or the number in
3302      * 'spellsuggest', whatever is smaller. */
3303     if (sps_limit > (int)Rows - 2)
3304 	limit = (int)Rows - 2;
3305     else
3306 	limit = sps_limit;
3307     spell_find_suggest(line + curwin->w_cursor.col, badlen, &sug, limit,
3308 							TRUE, need_cap, TRUE);
3309 
3310     if (sug.su_ga.ga_len == 0)
3311 	msg(_("Sorry, no suggestions"));
3312     else if (count > 0)
3313     {
3314 	if (count > sug.su_ga.ga_len)
3315 	    smsg(_("Sorry, only %ld suggestions"),
3316 						      (long)sug.su_ga.ga_len);
3317     }
3318     else
3319     {
3320 	VIM_CLEAR(repl_from);
3321 	VIM_CLEAR(repl_to);
3322 
3323 #ifdef FEAT_RIGHTLEFT
3324 	/* When 'rightleft' is set the list is drawn right-left. */
3325 	cmdmsg_rl = curwin->w_p_rl;
3326 	if (cmdmsg_rl)
3327 	    msg_col = Columns - 1;
3328 #endif
3329 
3330 	/* List the suggestions. */
3331 	msg_start();
3332 	msg_row = Rows - 1;	/* for when 'cmdheight' > 1 */
3333 	lines_left = Rows;	/* avoid more prompt */
3334 	vim_snprintf((char *)IObuff, IOSIZE, _("Change \"%.*s\" to:"),
3335 						sug.su_badlen, sug.su_badptr);
3336 #ifdef FEAT_RIGHTLEFT
3337 	if (cmdmsg_rl && STRNCMP(IObuff, "Change", 6) == 0)
3338 	{
3339 	    /* And now the rabbit from the high hat: Avoid showing the
3340 	     * untranslated message rightleft. */
3341 	    vim_snprintf((char *)IObuff, IOSIZE, ":ot \"%.*s\" egnahC",
3342 						sug.su_badlen, sug.su_badptr);
3343 	}
3344 #endif
3345 	msg_puts((char *)IObuff);
3346 	msg_clr_eos();
3347 	msg_putchar('\n');
3348 
3349 	msg_scroll = TRUE;
3350 	for (i = 0; i < sug.su_ga.ga_len; ++i)
3351 	{
3352 	    stp = &SUG(sug.su_ga, i);
3353 
3354 	    /* The suggested word may replace only part of the bad word, add
3355 	     * the not replaced part. */
3356 	    vim_strncpy(wcopy, stp->st_word, MAXWLEN);
3357 	    if (sug.su_badlen > stp->st_orglen)
3358 		vim_strncpy(wcopy + stp->st_wordlen,
3359 					       sug.su_badptr + stp->st_orglen,
3360 					      sug.su_badlen - stp->st_orglen);
3361 	    vim_snprintf((char *)IObuff, IOSIZE, "%2d", i + 1);
3362 #ifdef FEAT_RIGHTLEFT
3363 	    if (cmdmsg_rl)
3364 		rl_mirror(IObuff);
3365 #endif
3366 	    msg_puts((char *)IObuff);
3367 
3368 	    vim_snprintf((char *)IObuff, IOSIZE, " \"%s\"", wcopy);
3369 	    msg_puts((char *)IObuff);
3370 
3371 	    /* The word may replace more than "su_badlen". */
3372 	    if (sug.su_badlen < stp->st_orglen)
3373 	    {
3374 		vim_snprintf((char *)IObuff, IOSIZE, _(" < \"%.*s\""),
3375 					       stp->st_orglen, sug.su_badptr);
3376 		msg_puts((char *)IObuff);
3377 	    }
3378 
3379 	    if (p_verbose > 0)
3380 	    {
3381 		/* Add the score. */
3382 		if (sps_flags & (SPS_DOUBLE | SPS_BEST))
3383 		    vim_snprintf((char *)IObuff, IOSIZE, " (%s%d - %d)",
3384 			stp->st_salscore ? "s " : "",
3385 			stp->st_score, stp->st_altscore);
3386 		else
3387 		    vim_snprintf((char *)IObuff, IOSIZE, " (%d)",
3388 			    stp->st_score);
3389 #ifdef FEAT_RIGHTLEFT
3390 		if (cmdmsg_rl)
3391 		    /* Mirror the numbers, but keep the leading space. */
3392 		    rl_mirror(IObuff + 1);
3393 #endif
3394 		msg_advance(30);
3395 		msg_puts((char *)IObuff);
3396 	    }
3397 	    msg_putchar('\n');
3398 	}
3399 
3400 #ifdef FEAT_RIGHTLEFT
3401 	cmdmsg_rl = FALSE;
3402 	msg_col = 0;
3403 #endif
3404 	/* Ask for choice. */
3405 	selected = prompt_for_number(&mouse_used);
3406 	if (mouse_used)
3407 	    selected -= lines_left;
3408 	lines_left = Rows;		/* avoid more prompt */
3409 	/* don't delay for 'smd' in normal_cmd() */
3410 	msg_scroll = msg_scroll_save;
3411     }
3412 
3413     if (selected > 0 && selected <= sug.su_ga.ga_len && u_save_cursor() == OK)
3414     {
3415 	/* Save the from and to text for :spellrepall. */
3416 	stp = &SUG(sug.su_ga, selected - 1);
3417 	if (sug.su_badlen > stp->st_orglen)
3418 	{
3419 	    /* Replacing less than "su_badlen", append the remainder to
3420 	     * repl_to. */
3421 	    repl_from = vim_strnsave(sug.su_badptr, sug.su_badlen);
3422 	    vim_snprintf((char *)IObuff, IOSIZE, "%s%.*s", stp->st_word,
3423 		    sug.su_badlen - stp->st_orglen,
3424 					      sug.su_badptr + stp->st_orglen);
3425 	    repl_to = vim_strsave(IObuff);
3426 	}
3427 	else
3428 	{
3429 	    /* Replacing su_badlen or more, use the whole word. */
3430 	    repl_from = vim_strnsave(sug.su_badptr, stp->st_orglen);
3431 	    repl_to = vim_strsave(stp->st_word);
3432 	}
3433 
3434 	/* Replace the word. */
3435 	p = alloc(STRLEN(line) - stp->st_orglen + stp->st_wordlen + 1);
3436 	if (p != NULL)
3437 	{
3438 	    c = (int)(sug.su_badptr - line);
3439 	    mch_memmove(p, line, c);
3440 	    STRCPY(p + c, stp->st_word);
3441 	    STRCAT(p, sug.su_badptr + stp->st_orglen);
3442 	    ml_replace(curwin->w_cursor.lnum, p, FALSE);
3443 	    curwin->w_cursor.col = c;
3444 
3445 	    /* For redo we use a change-word command. */
3446 	    ResetRedobuff();
3447 	    AppendToRedobuff((char_u *)"ciw");
3448 	    AppendToRedobuffLit(p + c,
3449 			    stp->st_wordlen + sug.su_badlen - stp->st_orglen);
3450 	    AppendCharToRedobuff(ESC);
3451 
3452 	    /* After this "p" may be invalid. */
3453 	    changed_bytes(curwin->w_cursor.lnum, c);
3454 	}
3455     }
3456     else
3457 	curwin->w_cursor = prev_cursor;
3458 
3459     spell_find_cleanup(&sug);
3460 skip:
3461     vim_free(line);
3462 }
3463 
3464 /*
3465  * Check if the word at line "lnum" column "col" is required to start with a
3466  * capital.  This uses 'spellcapcheck' of the current buffer.
3467  */
3468     static int
3469 check_need_cap(linenr_T lnum, colnr_T col)
3470 {
3471     int		need_cap = FALSE;
3472     char_u	*line;
3473     char_u	*line_copy = NULL;
3474     char_u	*p;
3475     colnr_T	endcol;
3476     regmatch_T	regmatch;
3477 
3478     if (curwin->w_s->b_cap_prog == NULL)
3479 	return FALSE;
3480 
3481     line = ml_get_curline();
3482     endcol = 0;
3483     if (getwhitecols(line) >= (int)col)
3484     {
3485 	/* At start of line, check if previous line is empty or sentence
3486 	 * ends there. */
3487 	if (lnum == 1)
3488 	    need_cap = TRUE;
3489 	else
3490 	{
3491 	    line = ml_get(lnum - 1);
3492 	    if (*skipwhite(line) == NUL)
3493 		need_cap = TRUE;
3494 	    else
3495 	    {
3496 		/* Append a space in place of the line break. */
3497 		line_copy = concat_str(line, (char_u *)" ");
3498 		line = line_copy;
3499 		endcol = (colnr_T)STRLEN(line);
3500 	    }
3501 	}
3502     }
3503     else
3504 	endcol = col;
3505 
3506     if (endcol > 0)
3507     {
3508 	/* Check if sentence ends before the bad word. */
3509 	regmatch.regprog = curwin->w_s->b_cap_prog;
3510 	regmatch.rm_ic = FALSE;
3511 	p = line + endcol;
3512 	for (;;)
3513 	{
3514 	    MB_PTR_BACK(line, p);
3515 	    if (p == line || spell_iswordp_nmw(p, curwin))
3516 		break;
3517 	    if (vim_regexec(&regmatch, p, 0)
3518 					 && regmatch.endp[0] == line + endcol)
3519 	    {
3520 		need_cap = TRUE;
3521 		break;
3522 	    }
3523 	}
3524 	curwin->w_s->b_cap_prog = regmatch.regprog;
3525     }
3526 
3527     vim_free(line_copy);
3528 
3529     return need_cap;
3530 }
3531 
3532 
3533 /*
3534  * ":spellrepall"
3535  */
3536     void
3537 ex_spellrepall(exarg_T *eap UNUSED)
3538 {
3539     pos_T	pos = curwin->w_cursor;
3540     char_u	*frompat;
3541     int		addlen;
3542     char_u	*line;
3543     char_u	*p;
3544     int		save_ws = p_ws;
3545     linenr_T	prev_lnum = 0;
3546 
3547     if (repl_from == NULL || repl_to == NULL)
3548     {
3549 	emsg(_("E752: No previous spell replacement"));
3550 	return;
3551     }
3552     addlen = (int)(STRLEN(repl_to) - STRLEN(repl_from));
3553 
3554     frompat = alloc(STRLEN(repl_from) + 7);
3555     if (frompat == NULL)
3556 	return;
3557     sprintf((char *)frompat, "\\V\\<%s\\>", repl_from);
3558     p_ws = FALSE;
3559 
3560     sub_nsubs = 0;
3561     sub_nlines = 0;
3562     curwin->w_cursor.lnum = 0;
3563     while (!got_int)
3564     {
3565 	if (do_search(NULL, '/', frompat, 1L, SEARCH_KEEP, NULL, NULL) == 0
3566 						   || u_save_cursor() == FAIL)
3567 	    break;
3568 
3569 	/* Only replace when the right word isn't there yet.  This happens
3570 	 * when changing "etc" to "etc.". */
3571 	line = ml_get_curline();
3572 	if (addlen <= 0 || STRNCMP(line + curwin->w_cursor.col,
3573 					       repl_to, STRLEN(repl_to)) != 0)
3574 	{
3575 	    p = alloc(STRLEN(line) + addlen + 1);
3576 	    if (p == NULL)
3577 		break;
3578 	    mch_memmove(p, line, curwin->w_cursor.col);
3579 	    STRCPY(p + curwin->w_cursor.col, repl_to);
3580 	    STRCAT(p, line + curwin->w_cursor.col + STRLEN(repl_from));
3581 	    ml_replace(curwin->w_cursor.lnum, p, FALSE);
3582 	    changed_bytes(curwin->w_cursor.lnum, curwin->w_cursor.col);
3583 
3584 	    if (curwin->w_cursor.lnum != prev_lnum)
3585 	    {
3586 		++sub_nlines;
3587 		prev_lnum = curwin->w_cursor.lnum;
3588 	    }
3589 	    ++sub_nsubs;
3590 	}
3591 	curwin->w_cursor.col += (colnr_T)STRLEN(repl_to);
3592     }
3593 
3594     p_ws = save_ws;
3595     curwin->w_cursor = pos;
3596     vim_free(frompat);
3597 
3598     if (sub_nsubs == 0)
3599 	semsg(_("E753: Not found: %s"), repl_from);
3600     else
3601 	do_sub_msg(FALSE);
3602 }
3603 
3604 /*
3605  * Find spell suggestions for "word".  Return them in the growarray "*gap" as
3606  * a list of allocated strings.
3607  */
3608     void
3609 spell_suggest_list(
3610     garray_T	*gap,
3611     char_u	*word,
3612     int		maxcount,	/* maximum nr of suggestions */
3613     int		need_cap,	/* 'spellcapcheck' matched */
3614     int		interactive)
3615 {
3616     suginfo_T	sug;
3617     int		i;
3618     suggest_T	*stp;
3619     char_u	*wcopy;
3620 
3621     spell_find_suggest(word, 0, &sug, maxcount, FALSE, need_cap, interactive);
3622 
3623     /* Make room in "gap". */
3624     ga_init2(gap, sizeof(char_u *), sug.su_ga.ga_len + 1);
3625     if (ga_grow(gap, sug.su_ga.ga_len) == OK)
3626     {
3627 	for (i = 0; i < sug.su_ga.ga_len; ++i)
3628 	{
3629 	    stp = &SUG(sug.su_ga, i);
3630 
3631 	    /* The suggested word may replace only part of "word", add the not
3632 	     * replaced part. */
3633 	    wcopy = alloc(stp->st_wordlen
3634 		      + (unsigned)STRLEN(sug.su_badptr + stp->st_orglen) + 1);
3635 	    if (wcopy == NULL)
3636 		break;
3637 	    STRCPY(wcopy, stp->st_word);
3638 	    STRCPY(wcopy + stp->st_wordlen, sug.su_badptr + stp->st_orglen);
3639 	    ((char_u **)gap->ga_data)[gap->ga_len++] = wcopy;
3640 	}
3641     }
3642 
3643     spell_find_cleanup(&sug);
3644 }
3645 
3646 /*
3647  * Find spell suggestions for the word at the start of "badptr".
3648  * Return the suggestions in "su->su_ga".
3649  * The maximum number of suggestions is "maxcount".
3650  * Note: does use info for the current window.
3651  * This is based on the mechanisms of Aspell, but completely reimplemented.
3652  */
3653     static void
3654 spell_find_suggest(
3655     char_u	*badptr,
3656     int		badlen,		/* length of bad word or 0 if unknown */
3657     suginfo_T	*su,
3658     int		maxcount,
3659     int		banbadword,	/* don't include badword in suggestions */
3660     int		need_cap,	/* word should start with capital */
3661     int		interactive)
3662 {
3663     hlf_T	attr = HLF_COUNT;
3664     char_u	buf[MAXPATHL];
3665     char_u	*p;
3666     int		do_combine = FALSE;
3667     char_u	*sps_copy;
3668 #ifdef FEAT_EVAL
3669     static int	expr_busy = FALSE;
3670 #endif
3671     int		c;
3672     int		i;
3673     langp_T	*lp;
3674 
3675     /*
3676      * Set the info in "*su".
3677      */
3678     vim_memset(su, 0, sizeof(suginfo_T));
3679     ga_init2(&su->su_ga, (int)sizeof(suggest_T), 10);
3680     ga_init2(&su->su_sga, (int)sizeof(suggest_T), 10);
3681     if (*badptr == NUL)
3682 	return;
3683     hash_init(&su->su_banned);
3684 
3685     su->su_badptr = badptr;
3686     if (badlen != 0)
3687 	su->su_badlen = badlen;
3688     else
3689 	su->su_badlen = spell_check(curwin, su->su_badptr, &attr, NULL, FALSE);
3690     su->su_maxcount = maxcount;
3691     su->su_maxscore = SCORE_MAXINIT;
3692 
3693     if (su->su_badlen >= MAXWLEN)
3694 	su->su_badlen = MAXWLEN - 1;	/* just in case */
3695     vim_strncpy(su->su_badword, su->su_badptr, su->su_badlen);
3696     (void)spell_casefold(su->su_badptr, su->su_badlen,
3697 						    su->su_fbadword, MAXWLEN);
3698     /* TODO: make this work if the case-folded text is longer than the original
3699      * text. Currently an illegal byte causes wrong pointer computations. */
3700     su->su_fbadword[su->su_badlen] = NUL;
3701 
3702     /* get caps flags for bad word */
3703     su->su_badflags = badword_captype(su->su_badptr,
3704 					       su->su_badptr + su->su_badlen);
3705     if (need_cap)
3706 	su->su_badflags |= WF_ONECAP;
3707 
3708     /* Find the default language for sound folding.  We simply use the first
3709      * one in 'spelllang' that supports sound folding.  That's good for when
3710      * using multiple files for one language, it's not that bad when mixing
3711      * languages (e.g., "pl,en"). */
3712     for (i = 0; i < curbuf->b_s.b_langp.ga_len; ++i)
3713     {
3714 	lp = LANGP_ENTRY(curbuf->b_s.b_langp, i);
3715 	if (lp->lp_sallang != NULL)
3716 	{
3717 	    su->su_sallang = lp->lp_sallang;
3718 	    break;
3719 	}
3720     }
3721 
3722     /* Soundfold the bad word with the default sound folding, so that we don't
3723      * have to do this many times. */
3724     if (su->su_sallang != NULL)
3725 	spell_soundfold(su->su_sallang, su->su_fbadword, TRUE,
3726 							  su->su_sal_badword);
3727 
3728     /* If the word is not capitalised and spell_check() doesn't consider the
3729      * word to be bad then it might need to be capitalised.  Add a suggestion
3730      * for that. */
3731     c = PTR2CHAR(su->su_badptr);
3732     if (!SPELL_ISUPPER(c) && attr == HLF_COUNT)
3733     {
3734 	make_case_word(su->su_badword, buf, WF_ONECAP);
3735 	add_suggestion(su, &su->su_ga, buf, su->su_badlen, SCORE_ICASE,
3736 					      0, TRUE, su->su_sallang, FALSE);
3737     }
3738 
3739     /* Ban the bad word itself.  It may appear in another region. */
3740     if (banbadword)
3741 	add_banned(su, su->su_badword);
3742 
3743     /* Make a copy of 'spellsuggest', because the expression may change it. */
3744     sps_copy = vim_strsave(p_sps);
3745     if (sps_copy == NULL)
3746 	return;
3747 
3748     /* Loop over the items in 'spellsuggest'. */
3749     for (p = sps_copy; *p != NUL; )
3750     {
3751 	copy_option_part(&p, buf, MAXPATHL, ",");
3752 
3753 	if (STRNCMP(buf, "expr:", 5) == 0)
3754 	{
3755 #ifdef FEAT_EVAL
3756 	    /* Evaluate an expression.  Skip this when called recursively,
3757 	     * when using spellsuggest() in the expression. */
3758 	    if (!expr_busy)
3759 	    {
3760 		expr_busy = TRUE;
3761 		spell_suggest_expr(su, buf + 5);
3762 		expr_busy = FALSE;
3763 	    }
3764 #endif
3765 	}
3766 	else if (STRNCMP(buf, "file:", 5) == 0)
3767 	    /* Use list of suggestions in a file. */
3768 	    spell_suggest_file(su, buf + 5);
3769 	else
3770 	{
3771 	    /* Use internal method. */
3772 	    spell_suggest_intern(su, interactive);
3773 	    if (sps_flags & SPS_DOUBLE)
3774 		do_combine = TRUE;
3775 	}
3776     }
3777 
3778     vim_free(sps_copy);
3779 
3780     if (do_combine)
3781 	/* Combine the two list of suggestions.  This must be done last,
3782 	 * because sorting changes the order again. */
3783 	score_combine(su);
3784 }
3785 
3786 #ifdef FEAT_EVAL
3787 /*
3788  * Find suggestions by evaluating expression "expr".
3789  */
3790     static void
3791 spell_suggest_expr(suginfo_T *su, char_u *expr)
3792 {
3793     list_T	*list;
3794     listitem_T	*li;
3795     int		score;
3796     char_u	*p;
3797 
3798     /* The work is split up in a few parts to avoid having to export
3799      * suginfo_T.
3800      * First evaluate the expression and get the resulting list. */
3801     list = eval_spell_expr(su->su_badword, expr);
3802     if (list != NULL)
3803     {
3804 	/* Loop over the items in the list. */
3805 	for (li = list->lv_first; li != NULL; li = li->li_next)
3806 	    if (li->li_tv.v_type == VAR_LIST)
3807 	    {
3808 		/* Get the word and the score from the items. */
3809 		score = get_spellword(li->li_tv.vval.v_list, &p);
3810 		if (score >= 0 && score <= su->su_maxscore)
3811 		    add_suggestion(su, &su->su_ga, p, su->su_badlen,
3812 				       score, 0, TRUE, su->su_sallang, FALSE);
3813 	    }
3814 	list_unref(list);
3815     }
3816 
3817     /* Remove bogus suggestions, sort and truncate at "maxcount". */
3818     check_suggestions(su, &su->su_ga);
3819     (void)cleanup_suggestions(&su->su_ga, su->su_maxscore, su->su_maxcount);
3820 }
3821 #endif
3822 
3823 /*
3824  * Find suggestions in file "fname".  Used for "file:" in 'spellsuggest'.
3825  */
3826     static void
3827 spell_suggest_file(suginfo_T *su, char_u *fname)
3828 {
3829     FILE	*fd;
3830     char_u	line[MAXWLEN * 2];
3831     char_u	*p;
3832     int		len;
3833     char_u	cword[MAXWLEN];
3834 
3835     /* Open the file. */
3836     fd = mch_fopen((char *)fname, "r");
3837     if (fd == NULL)
3838     {
3839 	semsg(_(e_notopen), fname);
3840 	return;
3841     }
3842 
3843     /* Read it line by line. */
3844     while (!vim_fgets(line, MAXWLEN * 2, fd) && !got_int)
3845     {
3846 	line_breakcheck();
3847 
3848 	p = vim_strchr(line, '/');
3849 	if (p == NULL)
3850 	    continue;	    /* No Tab found, just skip the line. */
3851 	*p++ = NUL;
3852 	if (STRICMP(su->su_badword, line) == 0)
3853 	{
3854 	    /* Match!  Isolate the good word, until CR or NL. */
3855 	    for (len = 0; p[len] >= ' '; ++len)
3856 		;
3857 	    p[len] = NUL;
3858 
3859 	    /* If the suggestion doesn't have specific case duplicate the case
3860 	     * of the bad word. */
3861 	    if (captype(p, NULL) == 0)
3862 	    {
3863 		make_case_word(p, cword, su->su_badflags);
3864 		p = cword;
3865 	    }
3866 
3867 	    add_suggestion(su, &su->su_ga, p, su->su_badlen,
3868 				  SCORE_FILE, 0, TRUE, su->su_sallang, FALSE);
3869 	}
3870     }
3871 
3872     fclose(fd);
3873 
3874     /* Remove bogus suggestions, sort and truncate at "maxcount". */
3875     check_suggestions(su, &su->su_ga);
3876     (void)cleanup_suggestions(&su->su_ga, su->su_maxscore, su->su_maxcount);
3877 }
3878 
3879 /*
3880  * Find suggestions for the internal method indicated by "sps_flags".
3881  */
3882     static void
3883 spell_suggest_intern(suginfo_T *su, int interactive)
3884 {
3885     /*
3886      * Load the .sug file(s) that are available and not done yet.
3887      */
3888     suggest_load_files();
3889 
3890     /*
3891      * 1. Try special cases, such as repeating a word: "the the" -> "the".
3892      *
3893      * Set a maximum score to limit the combination of operations that is
3894      * tried.
3895      */
3896     suggest_try_special(su);
3897 
3898     /*
3899      * 2. Try inserting/deleting/swapping/changing a letter, use REP entries
3900      *    from the .aff file and inserting a space (split the word).
3901      */
3902     suggest_try_change(su);
3903 
3904     /* For the resulting top-scorers compute the sound-a-like score. */
3905     if (sps_flags & SPS_DOUBLE)
3906 	score_comp_sal(su);
3907 
3908     /*
3909      * 3. Try finding sound-a-like words.
3910      */
3911     if ((sps_flags & SPS_FAST) == 0)
3912     {
3913 	if (sps_flags & SPS_BEST)
3914 	    /* Adjust the word score for the suggestions found so far for how
3915 	     * they sounds like. */
3916 	    rescore_suggestions(su);
3917 
3918 	/*
3919 	 * While going through the soundfold tree "su_maxscore" is the score
3920 	 * for the soundfold word, limits the changes that are being tried,
3921 	 * and "su_sfmaxscore" the rescored score, which is set by
3922 	 * cleanup_suggestions().
3923 	 * First find words with a small edit distance, because this is much
3924 	 * faster and often already finds the top-N suggestions.  If we didn't
3925 	 * find many suggestions try again with a higher edit distance.
3926 	 * "sl_sounddone" is used to avoid doing the same word twice.
3927 	 */
3928 	suggest_try_soundalike_prep();
3929 	su->su_maxscore = SCORE_SFMAX1;
3930 	su->su_sfmaxscore = SCORE_MAXINIT * 3;
3931 	suggest_try_soundalike(su);
3932 	if (su->su_ga.ga_len < SUG_CLEAN_COUNT(su))
3933 	{
3934 	    /* We didn't find enough matches, try again, allowing more
3935 	     * changes to the soundfold word. */
3936 	    su->su_maxscore = SCORE_SFMAX2;
3937 	    suggest_try_soundalike(su);
3938 	    if (su->su_ga.ga_len < SUG_CLEAN_COUNT(su))
3939 	    {
3940 		/* Still didn't find enough matches, try again, allowing even
3941 		 * more changes to the soundfold word. */
3942 		su->su_maxscore = SCORE_SFMAX3;
3943 		suggest_try_soundalike(su);
3944 	    }
3945 	}
3946 	su->su_maxscore = su->su_sfmaxscore;
3947 	suggest_try_soundalike_finish();
3948     }
3949 
3950     /* When CTRL-C was hit while searching do show the results.  Only clear
3951      * got_int when using a command, not for spellsuggest(). */
3952     ui_breakcheck();
3953     if (interactive && got_int)
3954     {
3955 	(void)vgetc();
3956 	got_int = FALSE;
3957     }
3958 
3959     if ((sps_flags & SPS_DOUBLE) == 0 && su->su_ga.ga_len != 0)
3960     {
3961 	if (sps_flags & SPS_BEST)
3962 	    /* Adjust the word score for how it sounds like. */
3963 	    rescore_suggestions(su);
3964 
3965 	/* Remove bogus suggestions, sort and truncate at "maxcount". */
3966 	check_suggestions(su, &su->su_ga);
3967 	(void)cleanup_suggestions(&su->su_ga, su->su_maxscore, su->su_maxcount);
3968     }
3969 }
3970 
3971 /*
3972  * Free the info put in "*su" by spell_find_suggest().
3973  */
3974     static void
3975 spell_find_cleanup(suginfo_T *su)
3976 {
3977     int		i;
3978 
3979     /* Free the suggestions. */
3980     for (i = 0; i < su->su_ga.ga_len; ++i)
3981 	vim_free(SUG(su->su_ga, i).st_word);
3982     ga_clear(&su->su_ga);
3983     for (i = 0; i < su->su_sga.ga_len; ++i)
3984 	vim_free(SUG(su->su_sga, i).st_word);
3985     ga_clear(&su->su_sga);
3986 
3987     /* Free the banned words. */
3988     hash_clear_all(&su->su_banned, 0);
3989 }
3990 
3991 /*
3992  * Make a copy of "word", with the first letter upper or lower cased, to
3993  * "wcopy[MAXWLEN]".  "word" must not be empty.
3994  * The result is NUL terminated.
3995  */
3996     void
3997 onecap_copy(
3998     char_u	*word,
3999     char_u	*wcopy,
4000     int		upper)	    /* TRUE: first letter made upper case */
4001 {
4002     char_u	*p;
4003     int		c;
4004     int		l;
4005 
4006     p = word;
4007     if (has_mbyte)
4008 	c = mb_cptr2char_adv(&p);
4009     else
4010 	c = *p++;
4011     if (upper)
4012 	c = SPELL_TOUPPER(c);
4013     else
4014 	c = SPELL_TOFOLD(c);
4015     if (has_mbyte)
4016 	l = mb_char2bytes(c, wcopy);
4017     else
4018     {
4019 	l = 1;
4020 	wcopy[0] = c;
4021     }
4022     vim_strncpy(wcopy + l, p, MAXWLEN - l - 1);
4023 }
4024 
4025 /*
4026  * Make a copy of "word" with all the letters upper cased into
4027  * "wcopy[MAXWLEN]".  The result is NUL terminated.
4028  */
4029     static void
4030 allcap_copy(char_u *word, char_u *wcopy)
4031 {
4032     char_u	*s;
4033     char_u	*d;
4034     int		c;
4035 
4036     d = wcopy;
4037     for (s = word; *s != NUL; )
4038     {
4039 	if (has_mbyte)
4040 	    c = mb_cptr2char_adv(&s);
4041 	else
4042 	    c = *s++;
4043 
4044 	/* We only change 0xdf to SS when we are certain latin1 is used.  It
4045 	 * would cause weird errors in other 8-bit encodings. */
4046 	if (enc_latin1like && c == 0xdf)
4047 	{
4048 	    c = 'S';
4049 	    if (d - wcopy >= MAXWLEN - 1)
4050 		break;
4051 	    *d++ = c;
4052 	}
4053 	else
4054 	    c = SPELL_TOUPPER(c);
4055 
4056 	if (has_mbyte)
4057 	{
4058 	    if (d - wcopy >= MAXWLEN - MB_MAXBYTES)
4059 		break;
4060 	    d += mb_char2bytes(c, d);
4061 	}
4062 	else
4063 	{
4064 	    if (d - wcopy >= MAXWLEN - 1)
4065 		break;
4066 	    *d++ = c;
4067 	}
4068     }
4069     *d = NUL;
4070 }
4071 
4072 /*
4073  * Try finding suggestions by recognizing specific situations.
4074  */
4075     static void
4076 suggest_try_special(suginfo_T *su)
4077 {
4078     char_u	*p;
4079     size_t	len;
4080     int		c;
4081     char_u	word[MAXWLEN];
4082 
4083     /*
4084      * Recognize a word that is repeated: "the the".
4085      */
4086     p = skiptowhite(su->su_fbadword);
4087     len = p - su->su_fbadword;
4088     p = skipwhite(p);
4089     if (STRLEN(p) == len && STRNCMP(su->su_fbadword, p, len) == 0)
4090     {
4091 	/* Include badflags: if the badword is onecap or allcap
4092 	 * use that for the goodword too: "The the" -> "The". */
4093 	c = su->su_fbadword[len];
4094 	su->su_fbadword[len] = NUL;
4095 	make_case_word(su->su_fbadword, word, su->su_badflags);
4096 	su->su_fbadword[len] = c;
4097 
4098 	/* Give a soundalike score of 0, compute the score as if deleting one
4099 	 * character. */
4100 	add_suggestion(su, &su->su_ga, word, su->su_badlen,
4101 		       RESCORE(SCORE_REP, 0), 0, TRUE, su->su_sallang, FALSE);
4102     }
4103 }
4104 
4105 /*
4106  * Change the 0 to 1 to measure how much time is spent in each state.
4107  * Output is dumped in "suggestprof".
4108  */
4109 #if 0
4110 # define SUGGEST_PROFILE
4111 proftime_T current;
4112 proftime_T total;
4113 proftime_T times[STATE_FINAL + 1];
4114 long counts[STATE_FINAL + 1];
4115 
4116     static void
4117 prof_init(void)
4118 {
4119     for (int i = 0; i <= STATE_FINAL; ++i)
4120     {
4121 	profile_zero(&times[i]);
4122 	counts[i] = 0;
4123     }
4124     profile_start(&current);
4125     profile_start(&total);
4126 }
4127 
4128 /* call before changing state */
4129     static void
4130 prof_store(state_T state)
4131 {
4132     profile_end(&current);
4133     profile_add(&times[state], &current);
4134     ++counts[state];
4135     profile_start(&current);
4136 }
4137 # define PROF_STORE(state) prof_store(state);
4138 
4139     static void
4140 prof_report(char *name)
4141 {
4142     FILE *fd = fopen("suggestprof", "a");
4143 
4144     profile_end(&total);
4145     fprintf(fd, "-----------------------\n");
4146     fprintf(fd, "%s: %s\n", name, profile_msg(&total));
4147     for (int i = 0; i <= STATE_FINAL; ++i)
4148 	fprintf(fd, "%d: %s (%ld)\n", i, profile_msg(&times[i]), counts[i]);
4149     fclose(fd);
4150 }
4151 #else
4152 # define PROF_STORE(state)
4153 #endif
4154 
4155 /*
4156  * Try finding suggestions by adding/removing/swapping letters.
4157  */
4158     static void
4159 suggest_try_change(suginfo_T *su)
4160 {
4161     char_u	fword[MAXWLEN];	    /* copy of the bad word, case-folded */
4162     int		n;
4163     char_u	*p;
4164     int		lpi;
4165     langp_T	*lp;
4166 
4167     /* We make a copy of the case-folded bad word, so that we can modify it
4168      * to find matches (esp. REP items).  Append some more text, changing
4169      * chars after the bad word may help. */
4170     STRCPY(fword, su->su_fbadword);
4171     n = (int)STRLEN(fword);
4172     p = su->su_badptr + su->su_badlen;
4173     (void)spell_casefold(p, (int)STRLEN(p), fword + n, MAXWLEN - n);
4174 
4175     for (lpi = 0; lpi < curwin->w_s->b_langp.ga_len; ++lpi)
4176     {
4177 	lp = LANGP_ENTRY(curwin->w_s->b_langp, lpi);
4178 
4179 	/* If reloading a spell file fails it's still in the list but
4180 	 * everything has been cleared. */
4181 	if (lp->lp_slang->sl_fbyts == NULL)
4182 	    continue;
4183 
4184 	/* Try it for this language.  Will add possible suggestions. */
4185 #ifdef SUGGEST_PROFILE
4186 	prof_init();
4187 #endif
4188 	suggest_trie_walk(su, lp, fword, FALSE);
4189 #ifdef SUGGEST_PROFILE
4190 	prof_report("try_change");
4191 #endif
4192     }
4193 }
4194 
4195 /* Check the maximum score, if we go over it we won't try this change. */
4196 #define TRY_DEEPER(su, stack, depth, add) \
4197 		(stack[depth].ts_score + (add) < su->su_maxscore)
4198 
4199 /*
4200  * Try finding suggestions by adding/removing/swapping letters.
4201  *
4202  * This uses a state machine.  At each node in the tree we try various
4203  * operations.  When trying if an operation works "depth" is increased and the
4204  * stack[] is used to store info.  This allows combinations, thus insert one
4205  * character, replace one and delete another.  The number of changes is
4206  * limited by su->su_maxscore.
4207  *
4208  * After implementing this I noticed an article by Kemal Oflazer that
4209  * describes something similar: "Error-tolerant Finite State Recognition with
4210  * Applications to Morphological Analysis and Spelling Correction" (1996).
4211  * The implementation in the article is simplified and requires a stack of
4212  * unknown depth.  The implementation here only needs a stack depth equal to
4213  * the length of the word.
4214  *
4215  * This is also used for the sound-folded word, "soundfold" is TRUE then.
4216  * The mechanism is the same, but we find a match with a sound-folded word
4217  * that comes from one or more original words.  Each of these words may be
4218  * added, this is done by add_sound_suggest().
4219  * Don't use:
4220  *	the prefix tree or the keep-case tree
4221  *	"su->su_badlen"
4222  *	anything to do with upper and lower case
4223  *	anything to do with word or non-word characters ("spell_iswordp()")
4224  *	banned words
4225  *	word flags (rare, region, compounding)
4226  *	word splitting for now
4227  *	"similar_chars()"
4228  *	use "slang->sl_repsal" instead of "lp->lp_replang->sl_rep"
4229  */
4230     static void
4231 suggest_trie_walk(
4232     suginfo_T	*su,
4233     langp_T	*lp,
4234     char_u	*fword,
4235     int		soundfold)
4236 {
4237     char_u	tword[MAXWLEN];	    /* good word collected so far */
4238     trystate_T	stack[MAXWLEN];
4239     char_u	preword[MAXWLEN * 3]; /* word found with proper case;
4240 				       * concatenation of prefix compound
4241 				       * words and split word.  NUL terminated
4242 				       * when going deeper but not when coming
4243 				       * back. */
4244     char_u	compflags[MAXWLEN];	/* compound flags, one for each word */
4245     trystate_T	*sp;
4246     int		newscore;
4247     int		score;
4248     char_u	*byts, *fbyts, *pbyts;
4249     idx_T	*idxs, *fidxs, *pidxs;
4250     int		depth;
4251     int		c, c2, c3;
4252     int		n = 0;
4253     int		flags;
4254     garray_T	*gap;
4255     idx_T	arridx;
4256     int		len;
4257     char_u	*p;
4258     fromto_T	*ftp;
4259     int		fl = 0, tl;
4260     int		repextra = 0;	    /* extra bytes in fword[] from REP item */
4261     slang_T	*slang = lp->lp_slang;
4262     int		fword_ends;
4263     int		goodword_ends;
4264 #ifdef DEBUG_TRIEWALK
4265     /* Stores the name of the change made at each level. */
4266     char_u	changename[MAXWLEN][80];
4267 #endif
4268     int		breakcheckcount = 1000;
4269     int		compound_ok;
4270 
4271     /*
4272      * Go through the whole case-fold tree, try changes at each node.
4273      * "tword[]" contains the word collected from nodes in the tree.
4274      * "fword[]" the word we are trying to match with (initially the bad
4275      * word).
4276      */
4277     depth = 0;
4278     sp = &stack[0];
4279     vim_memset(sp, 0, sizeof(trystate_T));
4280     sp->ts_curi = 1;
4281 
4282     if (soundfold)
4283     {
4284 	/* Going through the soundfold tree. */
4285 	byts = fbyts = slang->sl_sbyts;
4286 	idxs = fidxs = slang->sl_sidxs;
4287 	pbyts = NULL;
4288 	pidxs = NULL;
4289 	sp->ts_prefixdepth = PFD_NOPREFIX;
4290 	sp->ts_state = STATE_START;
4291     }
4292     else
4293     {
4294 	/*
4295 	 * When there are postponed prefixes we need to use these first.  At
4296 	 * the end of the prefix we continue in the case-fold tree.
4297 	 */
4298 	fbyts = slang->sl_fbyts;
4299 	fidxs = slang->sl_fidxs;
4300 	pbyts = slang->sl_pbyts;
4301 	pidxs = slang->sl_pidxs;
4302 	if (pbyts != NULL)
4303 	{
4304 	    byts = pbyts;
4305 	    idxs = pidxs;
4306 	    sp->ts_prefixdepth = PFD_PREFIXTREE;
4307 	    sp->ts_state = STATE_NOPREFIX;	/* try without prefix first */
4308 	}
4309 	else
4310 	{
4311 	    byts = fbyts;
4312 	    idxs = fidxs;
4313 	    sp->ts_prefixdepth = PFD_NOPREFIX;
4314 	    sp->ts_state = STATE_START;
4315 	}
4316     }
4317 
4318     /*
4319      * Loop to find all suggestions.  At each round we either:
4320      * - For the current state try one operation, advance "ts_curi",
4321      *   increase "depth".
4322      * - When a state is done go to the next, set "ts_state".
4323      * - When all states are tried decrease "depth".
4324      */
4325     while (depth >= 0 && !got_int)
4326     {
4327 	sp = &stack[depth];
4328 	switch (sp->ts_state)
4329 	{
4330 	case STATE_START:
4331 	case STATE_NOPREFIX:
4332 	    /*
4333 	     * Start of node: Deal with NUL bytes, which means
4334 	     * tword[] may end here.
4335 	     */
4336 	    arridx = sp->ts_arridx;	    /* current node in the tree */
4337 	    len = byts[arridx];		    /* bytes in this node */
4338 	    arridx += sp->ts_curi;	    /* index of current byte */
4339 
4340 	    if (sp->ts_prefixdepth == PFD_PREFIXTREE)
4341 	    {
4342 		/* Skip over the NUL bytes, we use them later. */
4343 		for (n = 0; n < len && byts[arridx + n] == 0; ++n)
4344 		    ;
4345 		sp->ts_curi += n;
4346 
4347 		/* Always past NUL bytes now. */
4348 		n = (int)sp->ts_state;
4349 		PROF_STORE(sp->ts_state)
4350 		sp->ts_state = STATE_ENDNUL;
4351 		sp->ts_save_badflags = su->su_badflags;
4352 
4353 		/* At end of a prefix or at start of prefixtree: check for
4354 		 * following word. */
4355 		if (byts[arridx] == 0 || n == (int)STATE_NOPREFIX)
4356 		{
4357 		    /* Set su->su_badflags to the caps type at this position.
4358 		     * Use the caps type until here for the prefix itself. */
4359 		    if (has_mbyte)
4360 			n = nofold_len(fword, sp->ts_fidx, su->su_badptr);
4361 		    else
4362 			n = sp->ts_fidx;
4363 		    flags = badword_captype(su->su_badptr, su->su_badptr + n);
4364 		    su->su_badflags = badword_captype(su->su_badptr + n,
4365 					       su->su_badptr + su->su_badlen);
4366 #ifdef DEBUG_TRIEWALK
4367 		    sprintf(changename[depth], "prefix");
4368 #endif
4369 		    go_deeper(stack, depth, 0);
4370 		    ++depth;
4371 		    sp = &stack[depth];
4372 		    sp->ts_prefixdepth = depth - 1;
4373 		    byts = fbyts;
4374 		    idxs = fidxs;
4375 		    sp->ts_arridx = 0;
4376 
4377 		    /* Move the prefix to preword[] with the right case
4378 		     * and make find_keepcap_word() works. */
4379 		    tword[sp->ts_twordlen] = NUL;
4380 		    make_case_word(tword + sp->ts_splitoff,
4381 					  preword + sp->ts_prewordlen, flags);
4382 		    sp->ts_prewordlen = (char_u)STRLEN(preword);
4383 		    sp->ts_splitoff = sp->ts_twordlen;
4384 		}
4385 		break;
4386 	    }
4387 
4388 	    if (sp->ts_curi > len || byts[arridx] != 0)
4389 	    {
4390 		/* Past bytes in node and/or past NUL bytes. */
4391 		PROF_STORE(sp->ts_state)
4392 		sp->ts_state = STATE_ENDNUL;
4393 		sp->ts_save_badflags = su->su_badflags;
4394 		break;
4395 	    }
4396 
4397 	    /*
4398 	     * End of word in tree.
4399 	     */
4400 	    ++sp->ts_curi;		/* eat one NUL byte */
4401 
4402 	    flags = (int)idxs[arridx];
4403 
4404 	    /* Skip words with the NOSUGGEST flag. */
4405 	    if (flags & WF_NOSUGGEST)
4406 		break;
4407 
4408 	    fword_ends = (fword[sp->ts_fidx] == NUL
4409 			   || (soundfold
4410 			       ? VIM_ISWHITE(fword[sp->ts_fidx])
4411 			       : !spell_iswordp(fword + sp->ts_fidx, curwin)));
4412 	    tword[sp->ts_twordlen] = NUL;
4413 
4414 	    if (sp->ts_prefixdepth <= PFD_NOTSPECIAL
4415 					&& (sp->ts_flags & TSF_PREFIXOK) == 0)
4416 	    {
4417 		/* There was a prefix before the word.  Check that the prefix
4418 		 * can be used with this word. */
4419 		/* Count the length of the NULs in the prefix.  If there are
4420 		 * none this must be the first try without a prefix.  */
4421 		n = stack[sp->ts_prefixdepth].ts_arridx;
4422 		len = pbyts[n++];
4423 		for (c = 0; c < len && pbyts[n + c] == 0; ++c)
4424 		    ;
4425 		if (c > 0)
4426 		{
4427 		    c = valid_word_prefix(c, n, flags,
4428 				       tword + sp->ts_splitoff, slang, FALSE);
4429 		    if (c == 0)
4430 			break;
4431 
4432 		    /* Use the WF_RARE flag for a rare prefix. */
4433 		    if (c & WF_RAREPFX)
4434 			flags |= WF_RARE;
4435 
4436 		    /* Tricky: when checking for both prefix and compounding
4437 		     * we run into the prefix flag first.
4438 		     * Remember that it's OK, so that we accept the prefix
4439 		     * when arriving at a compound flag. */
4440 		    sp->ts_flags |= TSF_PREFIXOK;
4441 		}
4442 	    }
4443 
4444 	    /* Check NEEDCOMPOUND: can't use word without compounding.  Do try
4445 	     * appending another compound word below. */
4446 	    if (sp->ts_complen == sp->ts_compsplit && fword_ends
4447 						     && (flags & WF_NEEDCOMP))
4448 		goodword_ends = FALSE;
4449 	    else
4450 		goodword_ends = TRUE;
4451 
4452 	    p = NULL;
4453 	    compound_ok = TRUE;
4454 	    if (sp->ts_complen > sp->ts_compsplit)
4455 	    {
4456 		if (slang->sl_nobreak)
4457 		{
4458 		    /* There was a word before this word.  When there was no
4459 		     * change in this word (it was correct) add the first word
4460 		     * as a suggestion.  If this word was corrected too, we
4461 		     * need to check if a correct word follows. */
4462 		    if (sp->ts_fidx - sp->ts_splitfidx
4463 					  == sp->ts_twordlen - sp->ts_splitoff
4464 			    && STRNCMP(fword + sp->ts_splitfidx,
4465 					tword + sp->ts_splitoff,
4466 					 sp->ts_fidx - sp->ts_splitfidx) == 0)
4467 		    {
4468 			preword[sp->ts_prewordlen] = NUL;
4469 			newscore = score_wordcount_adj(slang, sp->ts_score,
4470 						 preword + sp->ts_prewordlen,
4471 						 sp->ts_prewordlen > 0);
4472 			/* Add the suggestion if the score isn't too bad. */
4473 			if (newscore <= su->su_maxscore)
4474 			    add_suggestion(su, &su->su_ga, preword,
4475 				    sp->ts_splitfidx - repextra,
4476 				    newscore, 0, FALSE,
4477 				    lp->lp_sallang, FALSE);
4478 			break;
4479 		    }
4480 		}
4481 		else
4482 		{
4483 		    /* There was a compound word before this word.  If this
4484 		     * word does not support compounding then give up
4485 		     * (splitting is tried for the word without compound
4486 		     * flag). */
4487 		    if (((unsigned)flags >> 24) == 0
4488 			    || sp->ts_twordlen - sp->ts_splitoff
4489 						       < slang->sl_compminlen)
4490 			break;
4491 		    /* For multi-byte chars check character length against
4492 		     * COMPOUNDMIN. */
4493 		    if (has_mbyte
4494 			    && slang->sl_compminlen > 0
4495 			    && mb_charlen(tword + sp->ts_splitoff)
4496 						       < slang->sl_compminlen)
4497 			break;
4498 
4499 		    compflags[sp->ts_complen] = ((unsigned)flags >> 24);
4500 		    compflags[sp->ts_complen + 1] = NUL;
4501 		    vim_strncpy(preword + sp->ts_prewordlen,
4502 			    tword + sp->ts_splitoff,
4503 			    sp->ts_twordlen - sp->ts_splitoff);
4504 
4505 		    /* Verify CHECKCOMPOUNDPATTERN  rules. */
4506 		    if (match_checkcompoundpattern(preword,  sp->ts_prewordlen,
4507 							  &slang->sl_comppat))
4508 			compound_ok = FALSE;
4509 
4510 		    if (compound_ok)
4511 		    {
4512 			p = preword;
4513 			while (*skiptowhite(p) != NUL)
4514 			    p = skipwhite(skiptowhite(p));
4515 			if (fword_ends && !can_compound(slang, p,
4516 						compflags + sp->ts_compsplit))
4517 			    /* Compound is not allowed.  But it may still be
4518 			     * possible if we add another (short) word. */
4519 			    compound_ok = FALSE;
4520 		    }
4521 
4522 		    /* Get pointer to last char of previous word. */
4523 		    p = preword + sp->ts_prewordlen;
4524 		    MB_PTR_BACK(preword, p);
4525 		}
4526 	    }
4527 
4528 	    /*
4529 	     * Form the word with proper case in preword.
4530 	     * If there is a word from a previous split, append.
4531 	     * For the soundfold tree don't change the case, simply append.
4532 	     */
4533 	    if (soundfold)
4534 		STRCPY(preword + sp->ts_prewordlen, tword + sp->ts_splitoff);
4535 	    else if (flags & WF_KEEPCAP)
4536 		/* Must find the word in the keep-case tree. */
4537 		find_keepcap_word(slang, tword + sp->ts_splitoff,
4538 						 preword + sp->ts_prewordlen);
4539 	    else
4540 	    {
4541 		/* Include badflags: If the badword is onecap or allcap
4542 		 * use that for the goodword too.  But if the badword is
4543 		 * allcap and it's only one char long use onecap. */
4544 		c = su->su_badflags;
4545 		if ((c & WF_ALLCAP)
4546 			&& su->su_badlen == (*mb_ptr2len)(su->su_badptr))
4547 		    c = WF_ONECAP;
4548 		c |= flags;
4549 
4550 		/* When appending a compound word after a word character don't
4551 		 * use Onecap. */
4552 		if (p != NULL && spell_iswordp_nmw(p, curwin))
4553 		    c &= ~WF_ONECAP;
4554 		make_case_word(tword + sp->ts_splitoff,
4555 					      preword + sp->ts_prewordlen, c);
4556 	    }
4557 
4558 	    if (!soundfold)
4559 	    {
4560 		/* Don't use a banned word.  It may appear again as a good
4561 		 * word, thus remember it. */
4562 		if (flags & WF_BANNED)
4563 		{
4564 		    add_banned(su, preword + sp->ts_prewordlen);
4565 		    break;
4566 		}
4567 		if ((sp->ts_complen == sp->ts_compsplit
4568 			    && WAS_BANNED(su, preword + sp->ts_prewordlen))
4569 						   || WAS_BANNED(su, preword))
4570 		{
4571 		    if (slang->sl_compprog == NULL)
4572 			break;
4573 		    /* the word so far was banned but we may try compounding */
4574 		    goodword_ends = FALSE;
4575 		}
4576 	    }
4577 
4578 	    newscore = 0;
4579 	    if (!soundfold)	/* soundfold words don't have flags */
4580 	    {
4581 		if ((flags & WF_REGION)
4582 			    && (((unsigned)flags >> 16) & lp->lp_region) == 0)
4583 		    newscore += SCORE_REGION;
4584 		if (flags & WF_RARE)
4585 		    newscore += SCORE_RARE;
4586 
4587 		if (!spell_valid_case(su->su_badflags,
4588 				  captype(preword + sp->ts_prewordlen, NULL)))
4589 		    newscore += SCORE_ICASE;
4590 	    }
4591 
4592 	    /* TODO: how about splitting in the soundfold tree? */
4593 	    if (fword_ends
4594 		    && goodword_ends
4595 		    && sp->ts_fidx >= sp->ts_fidxtry
4596 		    && compound_ok)
4597 	    {
4598 		/* The badword also ends: add suggestions. */
4599 #ifdef DEBUG_TRIEWALK
4600 		if (soundfold && STRCMP(preword, "smwrd") == 0)
4601 		{
4602 		    int	    j;
4603 
4604 		    /* print the stack of changes that brought us here */
4605 		    smsg("------ %s -------", fword);
4606 		    for (j = 0; j < depth; ++j)
4607 			smsg("%s", changename[j]);
4608 		}
4609 #endif
4610 		if (soundfold)
4611 		{
4612 		    /* For soundfolded words we need to find the original
4613 		     * words, the edit distance and then add them. */
4614 		    add_sound_suggest(su, preword, sp->ts_score, lp);
4615 		}
4616 		else if (sp->ts_fidx > 0)
4617 		{
4618 		    /* Give a penalty when changing non-word char to word
4619 		     * char, e.g., "thes," -> "these". */
4620 		    p = fword + sp->ts_fidx;
4621 		    MB_PTR_BACK(fword, p);
4622 		    if (!spell_iswordp(p, curwin))
4623 		    {
4624 			p = preword + STRLEN(preword);
4625 			MB_PTR_BACK(preword, p);
4626 			if (spell_iswordp(p, curwin))
4627 			    newscore += SCORE_NONWORD;
4628 		    }
4629 
4630 		    /* Give a bonus to words seen before. */
4631 		    score = score_wordcount_adj(slang,
4632 						sp->ts_score + newscore,
4633 						preword + sp->ts_prewordlen,
4634 						sp->ts_prewordlen > 0);
4635 
4636 		    /* Add the suggestion if the score isn't too bad. */
4637 		    if (score <= su->su_maxscore)
4638 		    {
4639 			add_suggestion(su, &su->su_ga, preword,
4640 				    sp->ts_fidx - repextra,
4641 				    score, 0, FALSE, lp->lp_sallang, FALSE);
4642 
4643 			if (su->su_badflags & WF_MIXCAP)
4644 			{
4645 			    /* We really don't know if the word should be
4646 			     * upper or lower case, add both. */
4647 			    c = captype(preword, NULL);
4648 			    if (c == 0 || c == WF_ALLCAP)
4649 			    {
4650 				make_case_word(tword + sp->ts_splitoff,
4651 					      preword + sp->ts_prewordlen,
4652 						      c == 0 ? WF_ALLCAP : 0);
4653 
4654 				add_suggestion(su, &su->su_ga, preword,
4655 					sp->ts_fidx - repextra,
4656 					score + SCORE_ICASE, 0, FALSE,
4657 					lp->lp_sallang, FALSE);
4658 			    }
4659 			}
4660 		    }
4661 		}
4662 	    }
4663 
4664 	    /*
4665 	     * Try word split and/or compounding.
4666 	     */
4667 	    if ((sp->ts_fidx >= sp->ts_fidxtry || fword_ends)
4668 		    /* Don't split halfway a character. */
4669 		    && (!has_mbyte || sp->ts_tcharlen == 0))
4670 	    {
4671 		int	try_compound;
4672 		int	try_split;
4673 
4674 		/* If past the end of the bad word don't try a split.
4675 		 * Otherwise try changing the next word.  E.g., find
4676 		 * suggestions for "the the" where the second "the" is
4677 		 * different.  It's done like a split.
4678 		 * TODO: word split for soundfold words */
4679 		try_split = (sp->ts_fidx - repextra < su->su_badlen)
4680 								&& !soundfold;
4681 
4682 		/* Get here in several situations:
4683 		 * 1. The word in the tree ends:
4684 		 *    If the word allows compounding try that.  Otherwise try
4685 		 *    a split by inserting a space.  For both check that a
4686 		 *    valid words starts at fword[sp->ts_fidx].
4687 		 *    For NOBREAK do like compounding to be able to check if
4688 		 *    the next word is valid.
4689 		 * 2. The badword does end, but it was due to a change (e.g.,
4690 		 *    a swap).  No need to split, but do check that the
4691 		 *    following word is valid.
4692 		 * 3. The badword and the word in the tree end.  It may still
4693 		 *    be possible to compound another (short) word.
4694 		 */
4695 		try_compound = FALSE;
4696 		if (!soundfold
4697 			&& !slang->sl_nocompoundsugs
4698 			&& slang->sl_compprog != NULL
4699 			&& ((unsigned)flags >> 24) != 0
4700 			&& sp->ts_twordlen - sp->ts_splitoff
4701 						       >= slang->sl_compminlen
4702 			&& (!has_mbyte
4703 			    || slang->sl_compminlen == 0
4704 			    || mb_charlen(tword + sp->ts_splitoff)
4705 						      >= slang->sl_compminlen)
4706 			&& (slang->sl_compsylmax < MAXWLEN
4707 			    || sp->ts_complen + 1 - sp->ts_compsplit
4708 							  < slang->sl_compmax)
4709 			&& (can_be_compound(sp, slang,
4710 					 compflags, ((unsigned)flags >> 24))))
4711 
4712 		{
4713 		    try_compound = TRUE;
4714 		    compflags[sp->ts_complen] = ((unsigned)flags >> 24);
4715 		    compflags[sp->ts_complen + 1] = NUL;
4716 		}
4717 
4718 		/* For NOBREAK we never try splitting, it won't make any word
4719 		 * valid. */
4720 		if (slang->sl_nobreak && !slang->sl_nocompoundsugs)
4721 		    try_compound = TRUE;
4722 
4723 		/* If we could add a compound word, and it's also possible to
4724 		 * split at this point, do the split first and set
4725 		 * TSF_DIDSPLIT to avoid doing it again. */
4726 		else if (!fword_ends
4727 			&& try_compound
4728 			&& (sp->ts_flags & TSF_DIDSPLIT) == 0)
4729 		{
4730 		    try_compound = FALSE;
4731 		    sp->ts_flags |= TSF_DIDSPLIT;
4732 		    --sp->ts_curi;	    /* do the same NUL again */
4733 		    compflags[sp->ts_complen] = NUL;
4734 		}
4735 		else
4736 		    sp->ts_flags &= ~TSF_DIDSPLIT;
4737 
4738 		if (try_split || try_compound)
4739 		{
4740 		    if (!try_compound && (!fword_ends || !goodword_ends))
4741 		    {
4742 			/* If we're going to split need to check that the
4743 			 * words so far are valid for compounding.  If there
4744 			 * is only one word it must not have the NEEDCOMPOUND
4745 			 * flag. */
4746 			if (sp->ts_complen == sp->ts_compsplit
4747 						     && (flags & WF_NEEDCOMP))
4748 			    break;
4749 			p = preword;
4750 			while (*skiptowhite(p) != NUL)
4751 			    p = skipwhite(skiptowhite(p));
4752 			if (sp->ts_complen > sp->ts_compsplit
4753 				&& !can_compound(slang, p,
4754 						compflags + sp->ts_compsplit))
4755 			    break;
4756 
4757 			if (slang->sl_nosplitsugs)
4758 			    newscore += SCORE_SPLIT_NO;
4759 			else
4760 			    newscore += SCORE_SPLIT;
4761 
4762 			/* Give a bonus to words seen before. */
4763 			newscore = score_wordcount_adj(slang, newscore,
4764 					   preword + sp->ts_prewordlen, TRUE);
4765 		    }
4766 
4767 		    if (TRY_DEEPER(su, stack, depth, newscore))
4768 		    {
4769 			go_deeper(stack, depth, newscore);
4770 #ifdef DEBUG_TRIEWALK
4771 			if (!try_compound && !fword_ends)
4772 			    sprintf(changename[depth], "%.*s-%s: split",
4773 				 sp->ts_twordlen, tword, fword + sp->ts_fidx);
4774 			else
4775 			    sprintf(changename[depth], "%.*s-%s: compound",
4776 				 sp->ts_twordlen, tword, fword + sp->ts_fidx);
4777 #endif
4778 			/* Save things to be restored at STATE_SPLITUNDO. */
4779 			sp->ts_save_badflags = su->su_badflags;
4780 			PROF_STORE(sp->ts_state)
4781 			sp->ts_state = STATE_SPLITUNDO;
4782 
4783 			++depth;
4784 			sp = &stack[depth];
4785 
4786 			/* Append a space to preword when splitting. */
4787 			if (!try_compound && !fword_ends)
4788 			    STRCAT(preword, " ");
4789 			sp->ts_prewordlen = (char_u)STRLEN(preword);
4790 			sp->ts_splitoff = sp->ts_twordlen;
4791 			sp->ts_splitfidx = sp->ts_fidx;
4792 
4793 			/* If the badword has a non-word character at this
4794 			 * position skip it.  That means replacing the
4795 			 * non-word character with a space.  Always skip a
4796 			 * character when the word ends.  But only when the
4797 			 * good word can end. */
4798 			if (((!try_compound && !spell_iswordp_nmw(fword
4799 							       + sp->ts_fidx,
4800 							       curwin))
4801 				    || fword_ends)
4802 				&& fword[sp->ts_fidx] != NUL
4803 				&& goodword_ends)
4804 			{
4805 			    int	    l;
4806 
4807 			    l = MB_PTR2LEN(fword + sp->ts_fidx);
4808 			    if (fword_ends)
4809 			    {
4810 				/* Copy the skipped character to preword. */
4811 				mch_memmove(preword + sp->ts_prewordlen,
4812 						      fword + sp->ts_fidx, l);
4813 				sp->ts_prewordlen += l;
4814 				preword[sp->ts_prewordlen] = NUL;
4815 			    }
4816 			    else
4817 				sp->ts_score -= SCORE_SPLIT - SCORE_SUBST;
4818 			    sp->ts_fidx += l;
4819 			}
4820 
4821 			/* When compounding include compound flag in
4822 			 * compflags[] (already set above).  When splitting we
4823 			 * may start compounding over again.  */
4824 			if (try_compound)
4825 			    ++sp->ts_complen;
4826 			else
4827 			    sp->ts_compsplit = sp->ts_complen;
4828 			sp->ts_prefixdepth = PFD_NOPREFIX;
4829 
4830 			/* set su->su_badflags to the caps type at this
4831 			 * position */
4832 			if (has_mbyte)
4833 			    n = nofold_len(fword, sp->ts_fidx, su->su_badptr);
4834 			else
4835 			    n = sp->ts_fidx;
4836 			su->su_badflags = badword_captype(su->su_badptr + n,
4837 					       su->su_badptr + su->su_badlen);
4838 
4839 			/* Restart at top of the tree. */
4840 			sp->ts_arridx = 0;
4841 
4842 			/* If there are postponed prefixes, try these too. */
4843 			if (pbyts != NULL)
4844 			{
4845 			    byts = pbyts;
4846 			    idxs = pidxs;
4847 			    sp->ts_prefixdepth = PFD_PREFIXTREE;
4848 			    PROF_STORE(sp->ts_state)
4849 			    sp->ts_state = STATE_NOPREFIX;
4850 			}
4851 		    }
4852 		}
4853 	    }
4854 	    break;
4855 
4856 	case STATE_SPLITUNDO:
4857 	    /* Undo the changes done for word split or compound word. */
4858 	    su->su_badflags = sp->ts_save_badflags;
4859 
4860 	    /* Continue looking for NUL bytes. */
4861 	    PROF_STORE(sp->ts_state)
4862 	    sp->ts_state = STATE_START;
4863 
4864 	    /* In case we went into the prefix tree. */
4865 	    byts = fbyts;
4866 	    idxs = fidxs;
4867 	    break;
4868 
4869 	case STATE_ENDNUL:
4870 	    /* Past the NUL bytes in the node. */
4871 	    su->su_badflags = sp->ts_save_badflags;
4872 	    if (fword[sp->ts_fidx] == NUL && sp->ts_tcharlen == 0)
4873 	    {
4874 		/* The badword ends, can't use STATE_PLAIN. */
4875 		PROF_STORE(sp->ts_state)
4876 		sp->ts_state = STATE_DEL;
4877 		break;
4878 	    }
4879 	    PROF_STORE(sp->ts_state)
4880 	    sp->ts_state = STATE_PLAIN;
4881 	    /* FALLTHROUGH */
4882 
4883 	case STATE_PLAIN:
4884 	    /*
4885 	     * Go over all possible bytes at this node, add each to tword[]
4886 	     * and use child node.  "ts_curi" is the index.
4887 	     */
4888 	    arridx = sp->ts_arridx;
4889 	    if (sp->ts_curi > byts[arridx])
4890 	    {
4891 		/* Done all bytes at this node, do next state.  When still at
4892 		 * already changed bytes skip the other tricks. */
4893 		PROF_STORE(sp->ts_state)
4894 		if (sp->ts_fidx >= sp->ts_fidxtry)
4895 		    sp->ts_state = STATE_DEL;
4896 		else
4897 		    sp->ts_state = STATE_FINAL;
4898 	    }
4899 	    else
4900 	    {
4901 		arridx += sp->ts_curi++;
4902 		c = byts[arridx];
4903 
4904 		/* Normal byte, go one level deeper.  If it's not equal to the
4905 		 * byte in the bad word adjust the score.  But don't even try
4906 		 * when the byte was already changed.  And don't try when we
4907 		 * just deleted this byte, accepting it is always cheaper than
4908 		 * delete + substitute. */
4909 		if (c == fword[sp->ts_fidx]
4910 			|| (sp->ts_tcharlen > 0 && sp->ts_isdiff != DIFF_NONE))
4911 		    newscore = 0;
4912 		else
4913 		    newscore = SCORE_SUBST;
4914 		if ((newscore == 0
4915 			    || (sp->ts_fidx >= sp->ts_fidxtry
4916 				&& ((sp->ts_flags & TSF_DIDDEL) == 0
4917 				    || c != fword[sp->ts_delidx])))
4918 			&& TRY_DEEPER(su, stack, depth, newscore))
4919 		{
4920 		    go_deeper(stack, depth, newscore);
4921 #ifdef DEBUG_TRIEWALK
4922 		    if (newscore > 0)
4923 			sprintf(changename[depth], "%.*s-%s: subst %c to %c",
4924 				sp->ts_twordlen, tword, fword + sp->ts_fidx,
4925 				fword[sp->ts_fidx], c);
4926 		    else
4927 			sprintf(changename[depth], "%.*s-%s: accept %c",
4928 				sp->ts_twordlen, tword, fword + sp->ts_fidx,
4929 				fword[sp->ts_fidx]);
4930 #endif
4931 		    ++depth;
4932 		    sp = &stack[depth];
4933 		    ++sp->ts_fidx;
4934 		    tword[sp->ts_twordlen++] = c;
4935 		    sp->ts_arridx = idxs[arridx];
4936 		    if (newscore == SCORE_SUBST)
4937 			sp->ts_isdiff = DIFF_YES;
4938 		    if (has_mbyte)
4939 		    {
4940 			/* Multi-byte characters are a bit complicated to
4941 			 * handle: They differ when any of the bytes differ
4942 			 * and then their length may also differ. */
4943 			if (sp->ts_tcharlen == 0)
4944 			{
4945 			    /* First byte. */
4946 			    sp->ts_tcharidx = 0;
4947 			    sp->ts_tcharlen = MB_BYTE2LEN(c);
4948 			    sp->ts_fcharstart = sp->ts_fidx - 1;
4949 			    sp->ts_isdiff = (newscore != 0)
4950 						       ? DIFF_YES : DIFF_NONE;
4951 			}
4952 			else if (sp->ts_isdiff == DIFF_INSERT)
4953 			    /* When inserting trail bytes don't advance in the
4954 			     * bad word. */
4955 			    --sp->ts_fidx;
4956 			if (++sp->ts_tcharidx == sp->ts_tcharlen)
4957 			{
4958 			    /* Last byte of character. */
4959 			    if (sp->ts_isdiff == DIFF_YES)
4960 			    {
4961 				/* Correct ts_fidx for the byte length of the
4962 				 * character (we didn't check that before). */
4963 				sp->ts_fidx = sp->ts_fcharstart
4964 					    + MB_PTR2LEN(
4965 						    fword + sp->ts_fcharstart);
4966 				/* For changing a composing character adjust
4967 				 * the score from SCORE_SUBST to
4968 				 * SCORE_SUBCOMP. */
4969 				if (enc_utf8
4970 					&& utf_iscomposing(
4971 					    utf_ptr2char(tword
4972 						+ sp->ts_twordlen
4973 							   - sp->ts_tcharlen))
4974 					&& utf_iscomposing(
4975 					    utf_ptr2char(fword
4976 							+ sp->ts_fcharstart)))
4977 				    sp->ts_score -=
4978 						  SCORE_SUBST - SCORE_SUBCOMP;
4979 
4980 				/* For a similar character adjust score from
4981 				 * SCORE_SUBST to SCORE_SIMILAR. */
4982 				else if (!soundfold
4983 					&& slang->sl_has_map
4984 					&& similar_chars(slang,
4985 					    mb_ptr2char(tword
4986 						+ sp->ts_twordlen
4987 							   - sp->ts_tcharlen),
4988 					    mb_ptr2char(fword
4989 							+ sp->ts_fcharstart)))
4990 				    sp->ts_score -=
4991 						  SCORE_SUBST - SCORE_SIMILAR;
4992 			    }
4993 			    else if (sp->ts_isdiff == DIFF_INSERT
4994 					 && sp->ts_twordlen > sp->ts_tcharlen)
4995 			    {
4996 				p = tword + sp->ts_twordlen - sp->ts_tcharlen;
4997 				c = mb_ptr2char(p);
4998 				if (enc_utf8 && utf_iscomposing(c))
4999 				{
5000 				    /* Inserting a composing char doesn't
5001 				     * count that much. */
5002 				    sp->ts_score -= SCORE_INS - SCORE_INSCOMP;
5003 				}
5004 				else
5005 				{
5006 				    /* If the previous character was the same,
5007 				     * thus doubling a character, give a bonus
5008 				     * to the score.  Also for the soundfold
5009 				     * tree (might seem illogical but does
5010 				     * give better scores). */
5011 				    MB_PTR_BACK(tword, p);
5012 				    if (c == mb_ptr2char(p))
5013 					sp->ts_score -= SCORE_INS
5014 							       - SCORE_INSDUP;
5015 				}
5016 			    }
5017 
5018 			    /* Starting a new char, reset the length. */
5019 			    sp->ts_tcharlen = 0;
5020 			}
5021 		    }
5022 		    else
5023 		    {
5024 			/* If we found a similar char adjust the score.
5025 			 * We do this after calling go_deeper() because
5026 			 * it's slow. */
5027 			if (newscore != 0
5028 				&& !soundfold
5029 				&& slang->sl_has_map
5030 				&& similar_chars(slang,
5031 						   c, fword[sp->ts_fidx - 1]))
5032 			    sp->ts_score -= SCORE_SUBST - SCORE_SIMILAR;
5033 		    }
5034 		}
5035 	    }
5036 	    break;
5037 
5038 	case STATE_DEL:
5039 	    /* When past the first byte of a multi-byte char don't try
5040 	     * delete/insert/swap a character. */
5041 	    if (has_mbyte && sp->ts_tcharlen > 0)
5042 	    {
5043 		PROF_STORE(sp->ts_state)
5044 		sp->ts_state = STATE_FINAL;
5045 		break;
5046 	    }
5047 	    /*
5048 	     * Try skipping one character in the bad word (delete it).
5049 	     */
5050 	    PROF_STORE(sp->ts_state)
5051 	    sp->ts_state = STATE_INS_PREP;
5052 	    sp->ts_curi = 1;
5053 	    if (soundfold && sp->ts_fidx == 0 && fword[sp->ts_fidx] == '*')
5054 		/* Deleting a vowel at the start of a word counts less, see
5055 		 * soundalike_score(). */
5056 		newscore = 2 * SCORE_DEL / 3;
5057 	    else
5058 		newscore = SCORE_DEL;
5059 	    if (fword[sp->ts_fidx] != NUL
5060 				    && TRY_DEEPER(su, stack, depth, newscore))
5061 	    {
5062 		go_deeper(stack, depth, newscore);
5063 #ifdef DEBUG_TRIEWALK
5064 		sprintf(changename[depth], "%.*s-%s: delete %c",
5065 			sp->ts_twordlen, tword, fword + sp->ts_fidx,
5066 			fword[sp->ts_fidx]);
5067 #endif
5068 		++depth;
5069 
5070 		/* Remember what character we deleted, so that we can avoid
5071 		 * inserting it again. */
5072 		stack[depth].ts_flags |= TSF_DIDDEL;
5073 		stack[depth].ts_delidx = sp->ts_fidx;
5074 
5075 		/* Advance over the character in fword[].  Give a bonus to the
5076 		 * score if the same character is following "nn" -> "n".  It's
5077 		 * a bit illogical for soundfold tree but it does give better
5078 		 * results. */
5079 		if (has_mbyte)
5080 		{
5081 		    c = mb_ptr2char(fword + sp->ts_fidx);
5082 		    stack[depth].ts_fidx += MB_PTR2LEN(fword + sp->ts_fidx);
5083 		    if (enc_utf8 && utf_iscomposing(c))
5084 			stack[depth].ts_score -= SCORE_DEL - SCORE_DELCOMP;
5085 		    else if (c == mb_ptr2char(fword + stack[depth].ts_fidx))
5086 			stack[depth].ts_score -= SCORE_DEL - SCORE_DELDUP;
5087 		}
5088 		else
5089 		{
5090 		    ++stack[depth].ts_fidx;
5091 		    if (fword[sp->ts_fidx] == fword[sp->ts_fidx + 1])
5092 			stack[depth].ts_score -= SCORE_DEL - SCORE_DELDUP;
5093 		}
5094 		break;
5095 	    }
5096 	    /* FALLTHROUGH */
5097 
5098 	case STATE_INS_PREP:
5099 	    if (sp->ts_flags & TSF_DIDDEL)
5100 	    {
5101 		/* If we just deleted a byte then inserting won't make sense,
5102 		 * a substitute is always cheaper. */
5103 		PROF_STORE(sp->ts_state)
5104 		sp->ts_state = STATE_SWAP;
5105 		break;
5106 	    }
5107 
5108 	    /* skip over NUL bytes */
5109 	    n = sp->ts_arridx;
5110 	    for (;;)
5111 	    {
5112 		if (sp->ts_curi > byts[n])
5113 		{
5114 		    /* Only NUL bytes at this node, go to next state. */
5115 		    PROF_STORE(sp->ts_state)
5116 		    sp->ts_state = STATE_SWAP;
5117 		    break;
5118 		}
5119 		if (byts[n + sp->ts_curi] != NUL)
5120 		{
5121 		    /* Found a byte to insert. */
5122 		    PROF_STORE(sp->ts_state)
5123 		    sp->ts_state = STATE_INS;
5124 		    break;
5125 		}
5126 		++sp->ts_curi;
5127 	    }
5128 	    break;
5129 
5130 	    /* FALLTHROUGH */
5131 
5132 	case STATE_INS:
5133 	    /* Insert one byte.  Repeat this for each possible byte at this
5134 	     * node. */
5135 	    n = sp->ts_arridx;
5136 	    if (sp->ts_curi > byts[n])
5137 	    {
5138 		/* Done all bytes at this node, go to next state. */
5139 		PROF_STORE(sp->ts_state)
5140 		sp->ts_state = STATE_SWAP;
5141 		break;
5142 	    }
5143 
5144 	    /* Do one more byte at this node, but:
5145 	     * - Skip NUL bytes.
5146 	     * - Skip the byte if it's equal to the byte in the word,
5147 	     *   accepting that byte is always better.
5148 	     */
5149 	    n += sp->ts_curi++;
5150 	    c = byts[n];
5151 	    if (soundfold && sp->ts_twordlen == 0 && c == '*')
5152 		/* Inserting a vowel at the start of a word counts less,
5153 		 * see soundalike_score(). */
5154 		newscore = 2 * SCORE_INS / 3;
5155 	    else
5156 		newscore = SCORE_INS;
5157 	    if (c != fword[sp->ts_fidx]
5158 				    && TRY_DEEPER(su, stack, depth, newscore))
5159 	    {
5160 		go_deeper(stack, depth, newscore);
5161 #ifdef DEBUG_TRIEWALK
5162 		sprintf(changename[depth], "%.*s-%s: insert %c",
5163 			sp->ts_twordlen, tword, fword + sp->ts_fidx,
5164 			c);
5165 #endif
5166 		++depth;
5167 		sp = &stack[depth];
5168 		tword[sp->ts_twordlen++] = c;
5169 		sp->ts_arridx = idxs[n];
5170 		if (has_mbyte)
5171 		{
5172 		    fl = MB_BYTE2LEN(c);
5173 		    if (fl > 1)
5174 		    {
5175 			/* There are following bytes for the same character.
5176 			 * We must find all bytes before trying
5177 			 * delete/insert/swap/etc. */
5178 			sp->ts_tcharlen = fl;
5179 			sp->ts_tcharidx = 1;
5180 			sp->ts_isdiff = DIFF_INSERT;
5181 		    }
5182 		}
5183 		else
5184 		    fl = 1;
5185 		if (fl == 1)
5186 		{
5187 		    /* If the previous character was the same, thus doubling a
5188 		     * character, give a bonus to the score.  Also for
5189 		     * soundfold words (illogical but does give a better
5190 		     * score). */
5191 		    if (sp->ts_twordlen >= 2
5192 					   && tword[sp->ts_twordlen - 2] == c)
5193 			sp->ts_score -= SCORE_INS - SCORE_INSDUP;
5194 		}
5195 	    }
5196 	    break;
5197 
5198 	case STATE_SWAP:
5199 	    /*
5200 	     * Swap two bytes in the bad word: "12" -> "21".
5201 	     * We change "fword" here, it's changed back afterwards at
5202 	     * STATE_UNSWAP.
5203 	     */
5204 	    p = fword + sp->ts_fidx;
5205 	    c = *p;
5206 	    if (c == NUL)
5207 	    {
5208 		/* End of word, can't swap or replace. */
5209 		PROF_STORE(sp->ts_state)
5210 		sp->ts_state = STATE_FINAL;
5211 		break;
5212 	    }
5213 
5214 	    /* Don't swap if the first character is not a word character.
5215 	     * SWAP3 etc. also don't make sense then. */
5216 	    if (!soundfold && !spell_iswordp(p, curwin))
5217 	    {
5218 		PROF_STORE(sp->ts_state)
5219 		sp->ts_state = STATE_REP_INI;
5220 		break;
5221 	    }
5222 
5223 	    if (has_mbyte)
5224 	    {
5225 		n = MB_CPTR2LEN(p);
5226 		c = mb_ptr2char(p);
5227 		if (p[n] == NUL)
5228 		    c2 = NUL;
5229 		else if (!soundfold && !spell_iswordp(p + n, curwin))
5230 		    c2 = c; /* don't swap non-word char */
5231 		else
5232 		    c2 = mb_ptr2char(p + n);
5233 	    }
5234 	    else
5235 	    {
5236 		if (p[1] == NUL)
5237 		    c2 = NUL;
5238 		else if (!soundfold && !spell_iswordp(p + 1, curwin))
5239 		    c2 = c; /* don't swap non-word char */
5240 		else
5241 		    c2 = p[1];
5242 	    }
5243 
5244 	    /* When the second character is NUL we can't swap. */
5245 	    if (c2 == NUL)
5246 	    {
5247 		PROF_STORE(sp->ts_state)
5248 		sp->ts_state = STATE_REP_INI;
5249 		break;
5250 	    }
5251 
5252 	    /* When characters are identical, swap won't do anything.
5253 	     * Also get here if the second char is not a word character. */
5254 	    if (c == c2)
5255 	    {
5256 		PROF_STORE(sp->ts_state)
5257 		sp->ts_state = STATE_SWAP3;
5258 		break;
5259 	    }
5260 	    if (c2 != NUL && TRY_DEEPER(su, stack, depth, SCORE_SWAP))
5261 	    {
5262 		go_deeper(stack, depth, SCORE_SWAP);
5263 #ifdef DEBUG_TRIEWALK
5264 		sprintf(changename[depth], "%.*s-%s: swap %c and %c",
5265 			sp->ts_twordlen, tword, fword + sp->ts_fidx,
5266 			c, c2);
5267 #endif
5268 		PROF_STORE(sp->ts_state)
5269 		sp->ts_state = STATE_UNSWAP;
5270 		++depth;
5271 		if (has_mbyte)
5272 		{
5273 		    fl = mb_char2len(c2);
5274 		    mch_memmove(p, p + n, fl);
5275 		    mb_char2bytes(c, p + fl);
5276 		    stack[depth].ts_fidxtry = sp->ts_fidx + n + fl;
5277 		}
5278 		else
5279 		{
5280 		    p[0] = c2;
5281 		    p[1] = c;
5282 		    stack[depth].ts_fidxtry = sp->ts_fidx + 2;
5283 		}
5284 	    }
5285 	    else
5286 	    {
5287 		/* If this swap doesn't work then SWAP3 won't either. */
5288 		PROF_STORE(sp->ts_state)
5289 		sp->ts_state = STATE_REP_INI;
5290 	    }
5291 	    break;
5292 
5293 	case STATE_UNSWAP:
5294 	    /* Undo the STATE_SWAP swap: "21" -> "12". */
5295 	    p = fword + sp->ts_fidx;
5296 	    if (has_mbyte)
5297 	    {
5298 		n = MB_PTR2LEN(p);
5299 		c = mb_ptr2char(p + n);
5300 		mch_memmove(p + MB_PTR2LEN(p + n), p, n);
5301 		mb_char2bytes(c, p);
5302 	    }
5303 	    else
5304 	    {
5305 		c = *p;
5306 		*p = p[1];
5307 		p[1] = c;
5308 	    }
5309 	    /* FALLTHROUGH */
5310 
5311 	case STATE_SWAP3:
5312 	    /* Swap two bytes, skipping one: "123" -> "321".  We change
5313 	     * "fword" here, it's changed back afterwards at STATE_UNSWAP3. */
5314 	    p = fword + sp->ts_fidx;
5315 	    if (has_mbyte)
5316 	    {
5317 		n = MB_CPTR2LEN(p);
5318 		c = mb_ptr2char(p);
5319 		fl = MB_CPTR2LEN(p + n);
5320 		c2 = mb_ptr2char(p + n);
5321 		if (!soundfold && !spell_iswordp(p + n + fl, curwin))
5322 		    c3 = c;	/* don't swap non-word char */
5323 		else
5324 		    c3 = mb_ptr2char(p + n + fl);
5325 	    }
5326 	    else
5327 	    {
5328 		c = *p;
5329 		c2 = p[1];
5330 		if (!soundfold && !spell_iswordp(p + 2, curwin))
5331 		    c3 = c;	/* don't swap non-word char */
5332 		else
5333 		    c3 = p[2];
5334 	    }
5335 
5336 	    /* When characters are identical: "121" then SWAP3 result is
5337 	     * identical, ROT3L result is same as SWAP: "211", ROT3L result is
5338 	     * same as SWAP on next char: "112".  Thus skip all swapping.
5339 	     * Also skip when c3 is NUL.
5340 	     * Also get here when the third character is not a word character.
5341 	     * Second character may any char: "a.b" -> "b.a" */
5342 	    if (c == c3 || c3 == NUL)
5343 	    {
5344 		PROF_STORE(sp->ts_state)
5345 		sp->ts_state = STATE_REP_INI;
5346 		break;
5347 	    }
5348 	    if (TRY_DEEPER(su, stack, depth, SCORE_SWAP3))
5349 	    {
5350 		go_deeper(stack, depth, SCORE_SWAP3);
5351 #ifdef DEBUG_TRIEWALK
5352 		sprintf(changename[depth], "%.*s-%s: swap3 %c and %c",
5353 			sp->ts_twordlen, tword, fword + sp->ts_fidx,
5354 			c, c3);
5355 #endif
5356 		PROF_STORE(sp->ts_state)
5357 		sp->ts_state = STATE_UNSWAP3;
5358 		++depth;
5359 		if (has_mbyte)
5360 		{
5361 		    tl = mb_char2len(c3);
5362 		    mch_memmove(p, p + n + fl, tl);
5363 		    mb_char2bytes(c2, p + tl);
5364 		    mb_char2bytes(c, p + fl + tl);
5365 		    stack[depth].ts_fidxtry = sp->ts_fidx + n + fl + tl;
5366 		}
5367 		else
5368 		{
5369 		    p[0] = p[2];
5370 		    p[2] = c;
5371 		    stack[depth].ts_fidxtry = sp->ts_fidx + 3;
5372 		}
5373 	    }
5374 	    else
5375 	    {
5376 		PROF_STORE(sp->ts_state)
5377 		sp->ts_state = STATE_REP_INI;
5378 	    }
5379 	    break;
5380 
5381 	case STATE_UNSWAP3:
5382 	    /* Undo STATE_SWAP3: "321" -> "123" */
5383 	    p = fword + sp->ts_fidx;
5384 	    if (has_mbyte)
5385 	    {
5386 		n = MB_PTR2LEN(p);
5387 		c2 = mb_ptr2char(p + n);
5388 		fl = MB_PTR2LEN(p + n);
5389 		c = mb_ptr2char(p + n + fl);
5390 		tl = MB_PTR2LEN(p + n + fl);
5391 		mch_memmove(p + fl + tl, p, n);
5392 		mb_char2bytes(c, p);
5393 		mb_char2bytes(c2, p + tl);
5394 		p = p + tl;
5395 	    }
5396 	    else
5397 	    {
5398 		c = *p;
5399 		*p = p[2];
5400 		p[2] = c;
5401 		++p;
5402 	    }
5403 
5404 	    if (!soundfold && !spell_iswordp(p, curwin))
5405 	    {
5406 		/* Middle char is not a word char, skip the rotate.  First and
5407 		 * third char were already checked at swap and swap3. */
5408 		PROF_STORE(sp->ts_state)
5409 		sp->ts_state = STATE_REP_INI;
5410 		break;
5411 	    }
5412 
5413 	    /* Rotate three characters left: "123" -> "231".  We change
5414 	     * "fword" here, it's changed back afterwards at STATE_UNROT3L. */
5415 	    if (TRY_DEEPER(su, stack, depth, SCORE_SWAP3))
5416 	    {
5417 		go_deeper(stack, depth, SCORE_SWAP3);
5418 #ifdef DEBUG_TRIEWALK
5419 		p = fword + sp->ts_fidx;
5420 		sprintf(changename[depth], "%.*s-%s: rotate left %c%c%c",
5421 			sp->ts_twordlen, tword, fword + sp->ts_fidx,
5422 			p[0], p[1], p[2]);
5423 #endif
5424 		PROF_STORE(sp->ts_state)
5425 		sp->ts_state = STATE_UNROT3L;
5426 		++depth;
5427 		p = fword + sp->ts_fidx;
5428 		if (has_mbyte)
5429 		{
5430 		    n = MB_CPTR2LEN(p);
5431 		    c = mb_ptr2char(p);
5432 		    fl = MB_CPTR2LEN(p + n);
5433 		    fl += MB_CPTR2LEN(p + n + fl);
5434 		    mch_memmove(p, p + n, fl);
5435 		    mb_char2bytes(c, p + fl);
5436 		    stack[depth].ts_fidxtry = sp->ts_fidx + n + fl;
5437 		}
5438 		else
5439 		{
5440 		    c = *p;
5441 		    *p = p[1];
5442 		    p[1] = p[2];
5443 		    p[2] = c;
5444 		    stack[depth].ts_fidxtry = sp->ts_fidx + 3;
5445 		}
5446 	    }
5447 	    else
5448 	    {
5449 		PROF_STORE(sp->ts_state)
5450 		sp->ts_state = STATE_REP_INI;
5451 	    }
5452 	    break;
5453 
5454 	case STATE_UNROT3L:
5455 	    /* Undo ROT3L: "231" -> "123" */
5456 	    p = fword + sp->ts_fidx;
5457 	    if (has_mbyte)
5458 	    {
5459 		n = MB_PTR2LEN(p);
5460 		n += MB_PTR2LEN(p + n);
5461 		c = mb_ptr2char(p + n);
5462 		tl = MB_PTR2LEN(p + n);
5463 		mch_memmove(p + tl, p, n);
5464 		mb_char2bytes(c, p);
5465 	    }
5466 	    else
5467 	    {
5468 		c = p[2];
5469 		p[2] = p[1];
5470 		p[1] = *p;
5471 		*p = c;
5472 	    }
5473 
5474 	    /* Rotate three bytes right: "123" -> "312".  We change "fword"
5475 	     * here, it's changed back afterwards at STATE_UNROT3R. */
5476 	    if (TRY_DEEPER(su, stack, depth, SCORE_SWAP3))
5477 	    {
5478 		go_deeper(stack, depth, SCORE_SWAP3);
5479 #ifdef DEBUG_TRIEWALK
5480 		p = fword + sp->ts_fidx;
5481 		sprintf(changename[depth], "%.*s-%s: rotate right %c%c%c",
5482 			sp->ts_twordlen, tword, fword + sp->ts_fidx,
5483 			p[0], p[1], p[2]);
5484 #endif
5485 		PROF_STORE(sp->ts_state)
5486 		sp->ts_state = STATE_UNROT3R;
5487 		++depth;
5488 		p = fword + sp->ts_fidx;
5489 		if (has_mbyte)
5490 		{
5491 		    n = MB_CPTR2LEN(p);
5492 		    n += MB_CPTR2LEN(p + n);
5493 		    c = mb_ptr2char(p + n);
5494 		    tl = MB_CPTR2LEN(p + n);
5495 		    mch_memmove(p + tl, p, n);
5496 		    mb_char2bytes(c, p);
5497 		    stack[depth].ts_fidxtry = sp->ts_fidx + n + tl;
5498 		}
5499 		else
5500 		{
5501 		    c = p[2];
5502 		    p[2] = p[1];
5503 		    p[1] = *p;
5504 		    *p = c;
5505 		    stack[depth].ts_fidxtry = sp->ts_fidx + 3;
5506 		}
5507 	    }
5508 	    else
5509 	    {
5510 		PROF_STORE(sp->ts_state)
5511 		sp->ts_state = STATE_REP_INI;
5512 	    }
5513 	    break;
5514 
5515 	case STATE_UNROT3R:
5516 	    /* Undo ROT3R: "312" -> "123" */
5517 	    p = fword + sp->ts_fidx;
5518 	    if (has_mbyte)
5519 	    {
5520 		c = mb_ptr2char(p);
5521 		tl = MB_PTR2LEN(p);
5522 		n = MB_PTR2LEN(p + tl);
5523 		n += MB_PTR2LEN(p + tl + n);
5524 		mch_memmove(p, p + tl, n);
5525 		mb_char2bytes(c, p + n);
5526 	    }
5527 	    else
5528 	    {
5529 		c = *p;
5530 		*p = p[1];
5531 		p[1] = p[2];
5532 		p[2] = c;
5533 	    }
5534 	    /* FALLTHROUGH */
5535 
5536 	case STATE_REP_INI:
5537 	    /* Check if matching with REP items from the .aff file would work.
5538 	     * Quickly skip if:
5539 	     * - there are no REP items and we are not in the soundfold trie
5540 	     * - the score is going to be too high anyway
5541 	     * - already applied a REP item or swapped here  */
5542 	    if ((lp->lp_replang == NULL && !soundfold)
5543 		    || sp->ts_score + SCORE_REP >= su->su_maxscore
5544 		    || sp->ts_fidx < sp->ts_fidxtry)
5545 	    {
5546 		PROF_STORE(sp->ts_state)
5547 		sp->ts_state = STATE_FINAL;
5548 		break;
5549 	    }
5550 
5551 	    /* Use the first byte to quickly find the first entry that may
5552 	     * match.  If the index is -1 there is none. */
5553 	    if (soundfold)
5554 		sp->ts_curi = slang->sl_repsal_first[fword[sp->ts_fidx]];
5555 	    else
5556 		sp->ts_curi = lp->lp_replang->sl_rep_first[fword[sp->ts_fidx]];
5557 
5558 	    if (sp->ts_curi < 0)
5559 	    {
5560 		PROF_STORE(sp->ts_state)
5561 		sp->ts_state = STATE_FINAL;
5562 		break;
5563 	    }
5564 
5565 	    PROF_STORE(sp->ts_state)
5566 	    sp->ts_state = STATE_REP;
5567 	    /* FALLTHROUGH */
5568 
5569 	case STATE_REP:
5570 	    /* Try matching with REP items from the .aff file.  For each match
5571 	     * replace the characters and check if the resulting word is
5572 	     * valid. */
5573 	    p = fword + sp->ts_fidx;
5574 
5575 	    if (soundfold)
5576 		gap = &slang->sl_repsal;
5577 	    else
5578 		gap = &lp->lp_replang->sl_rep;
5579 	    while (sp->ts_curi < gap->ga_len)
5580 	    {
5581 		ftp = (fromto_T *)gap->ga_data + sp->ts_curi++;
5582 		if (*ftp->ft_from != *p)
5583 		{
5584 		    /* past possible matching entries */
5585 		    sp->ts_curi = gap->ga_len;
5586 		    break;
5587 		}
5588 		if (STRNCMP(ftp->ft_from, p, STRLEN(ftp->ft_from)) == 0
5589 			&& TRY_DEEPER(su, stack, depth, SCORE_REP))
5590 		{
5591 		    go_deeper(stack, depth, SCORE_REP);
5592 #ifdef DEBUG_TRIEWALK
5593 		    sprintf(changename[depth], "%.*s-%s: replace %s with %s",
5594 			    sp->ts_twordlen, tword, fword + sp->ts_fidx,
5595 			    ftp->ft_from, ftp->ft_to);
5596 #endif
5597 		    /* Need to undo this afterwards. */
5598 		    PROF_STORE(sp->ts_state)
5599 		    sp->ts_state = STATE_REP_UNDO;
5600 
5601 		    /* Change the "from" to the "to" string. */
5602 		    ++depth;
5603 		    fl = (int)STRLEN(ftp->ft_from);
5604 		    tl = (int)STRLEN(ftp->ft_to);
5605 		    if (fl != tl)
5606 		    {
5607 			STRMOVE(p + tl, p + fl);
5608 			repextra += tl - fl;
5609 		    }
5610 		    mch_memmove(p, ftp->ft_to, tl);
5611 		    stack[depth].ts_fidxtry = sp->ts_fidx + tl;
5612 		    stack[depth].ts_tcharlen = 0;
5613 		    break;
5614 		}
5615 	    }
5616 
5617 	    if (sp->ts_curi >= gap->ga_len && sp->ts_state == STATE_REP)
5618 	    {
5619 		/* No (more) matches. */
5620 		PROF_STORE(sp->ts_state)
5621 		sp->ts_state = STATE_FINAL;
5622 	    }
5623 
5624 	    break;
5625 
5626 	case STATE_REP_UNDO:
5627 	    /* Undo a REP replacement and continue with the next one. */
5628 	    if (soundfold)
5629 		gap = &slang->sl_repsal;
5630 	    else
5631 		gap = &lp->lp_replang->sl_rep;
5632 	    ftp = (fromto_T *)gap->ga_data + sp->ts_curi - 1;
5633 	    fl = (int)STRLEN(ftp->ft_from);
5634 	    tl = (int)STRLEN(ftp->ft_to);
5635 	    p = fword + sp->ts_fidx;
5636 	    if (fl != tl)
5637 	    {
5638 		STRMOVE(p + fl, p + tl);
5639 		repextra -= tl - fl;
5640 	    }
5641 	    mch_memmove(p, ftp->ft_from, fl);
5642 	    PROF_STORE(sp->ts_state)
5643 	    sp->ts_state = STATE_REP;
5644 	    break;
5645 
5646 	default:
5647 	    /* Did all possible states at this level, go up one level. */
5648 	    --depth;
5649 
5650 	    if (depth >= 0 && stack[depth].ts_prefixdepth == PFD_PREFIXTREE)
5651 	    {
5652 		/* Continue in or go back to the prefix tree. */
5653 		byts = pbyts;
5654 		idxs = pidxs;
5655 	    }
5656 
5657 	    /* Don't check for CTRL-C too often, it takes time. */
5658 	    if (--breakcheckcount == 0)
5659 	    {
5660 		ui_breakcheck();
5661 		breakcheckcount = 1000;
5662 	    }
5663 	}
5664     }
5665 }
5666 
5667 
5668 /*
5669  * Go one level deeper in the tree.
5670  */
5671     static void
5672 go_deeper(trystate_T *stack, int depth, int score_add)
5673 {
5674     stack[depth + 1] = stack[depth];
5675     stack[depth + 1].ts_state = STATE_START;
5676     stack[depth + 1].ts_score = stack[depth].ts_score + score_add;
5677     stack[depth + 1].ts_curi = 1;	/* start just after length byte */
5678     stack[depth + 1].ts_flags = 0;
5679 }
5680 
5681 /*
5682  * Case-folding may change the number of bytes: Count nr of chars in
5683  * fword[flen] and return the byte length of that many chars in "word".
5684  */
5685     static int
5686 nofold_len(char_u *fword, int flen, char_u *word)
5687 {
5688     char_u	*p;
5689     int		i = 0;
5690 
5691     for (p = fword; p < fword + flen; MB_PTR_ADV(p))
5692 	++i;
5693     for (p = word; i > 0; MB_PTR_ADV(p))
5694 	--i;
5695     return (int)(p - word);
5696 }
5697 
5698 /*
5699  * "fword" is a good word with case folded.  Find the matching keep-case
5700  * words and put it in "kword".
5701  * Theoretically there could be several keep-case words that result in the
5702  * same case-folded word, but we only find one...
5703  */
5704     static void
5705 find_keepcap_word(slang_T *slang, char_u *fword, char_u *kword)
5706 {
5707     char_u	uword[MAXWLEN];		/* "fword" in upper-case */
5708     int		depth;
5709     idx_T	tryidx;
5710 
5711     /* The following arrays are used at each depth in the tree. */
5712     idx_T	arridx[MAXWLEN];
5713     int		round[MAXWLEN];
5714     int		fwordidx[MAXWLEN];
5715     int		uwordidx[MAXWLEN];
5716     int		kwordlen[MAXWLEN];
5717 
5718     int		flen, ulen;
5719     int		l;
5720     int		len;
5721     int		c;
5722     idx_T	lo, hi, m;
5723     char_u	*p;
5724     char_u	*byts = slang->sl_kbyts;    /* array with bytes of the words */
5725     idx_T	*idxs = slang->sl_kidxs;    /* array with indexes */
5726 
5727     if (byts == NULL)
5728     {
5729 	/* array is empty: "cannot happen" */
5730 	*kword = NUL;
5731 	return;
5732     }
5733 
5734     /* Make an all-cap version of "fword". */
5735     allcap_copy(fword, uword);
5736 
5737     /*
5738      * Each character needs to be tried both case-folded and upper-case.
5739      * All this gets very complicated if we keep in mind that changing case
5740      * may change the byte length of a multi-byte character...
5741      */
5742     depth = 0;
5743     arridx[0] = 0;
5744     round[0] = 0;
5745     fwordidx[0] = 0;
5746     uwordidx[0] = 0;
5747     kwordlen[0] = 0;
5748     while (depth >= 0)
5749     {
5750 	if (fword[fwordidx[depth]] == NUL)
5751 	{
5752 	    /* We are at the end of "fword".  If the tree allows a word to end
5753 	     * here we have found a match. */
5754 	    if (byts[arridx[depth] + 1] == 0)
5755 	    {
5756 		kword[kwordlen[depth]] = NUL;
5757 		return;
5758 	    }
5759 
5760 	    /* kword is getting too long, continue one level up */
5761 	    --depth;
5762 	}
5763 	else if (++round[depth] > 2)
5764 	{
5765 	    /* tried both fold-case and upper-case character, continue one
5766 	     * level up */
5767 	    --depth;
5768 	}
5769 	else
5770 	{
5771 	    /*
5772 	     * round[depth] == 1: Try using the folded-case character.
5773 	     * round[depth] == 2: Try using the upper-case character.
5774 	     */
5775 	    if (has_mbyte)
5776 	    {
5777 		flen = MB_CPTR2LEN(fword + fwordidx[depth]);
5778 		ulen = MB_CPTR2LEN(uword + uwordidx[depth]);
5779 	    }
5780 	    else
5781 		ulen = flen = 1;
5782 	    if (round[depth] == 1)
5783 	    {
5784 		p = fword + fwordidx[depth];
5785 		l = flen;
5786 	    }
5787 	    else
5788 	    {
5789 		p = uword + uwordidx[depth];
5790 		l = ulen;
5791 	    }
5792 
5793 	    for (tryidx = arridx[depth]; l > 0; --l)
5794 	    {
5795 		/* Perform a binary search in the list of accepted bytes. */
5796 		len = byts[tryidx++];
5797 		c = *p++;
5798 		lo = tryidx;
5799 		hi = tryidx + len - 1;
5800 		while (lo < hi)
5801 		{
5802 		    m = (lo + hi) / 2;
5803 		    if (byts[m] > c)
5804 			hi = m - 1;
5805 		    else if (byts[m] < c)
5806 			lo = m + 1;
5807 		    else
5808 		    {
5809 			lo = hi = m;
5810 			break;
5811 		    }
5812 		}
5813 
5814 		/* Stop if there is no matching byte. */
5815 		if (hi < lo || byts[lo] != c)
5816 		    break;
5817 
5818 		/* Continue at the child (if there is one). */
5819 		tryidx = idxs[lo];
5820 	    }
5821 
5822 	    if (l == 0)
5823 	    {
5824 		/*
5825 		 * Found the matching char.  Copy it to "kword" and go a
5826 		 * level deeper.
5827 		 */
5828 		if (round[depth] == 1)
5829 		{
5830 		    STRNCPY(kword + kwordlen[depth], fword + fwordidx[depth],
5831 									flen);
5832 		    kwordlen[depth + 1] = kwordlen[depth] + flen;
5833 		}
5834 		else
5835 		{
5836 		    STRNCPY(kword + kwordlen[depth], uword + uwordidx[depth],
5837 									ulen);
5838 		    kwordlen[depth + 1] = kwordlen[depth] + ulen;
5839 		}
5840 		fwordidx[depth + 1] = fwordidx[depth] + flen;
5841 		uwordidx[depth + 1] = uwordidx[depth] + ulen;
5842 
5843 		++depth;
5844 		arridx[depth] = tryidx;
5845 		round[depth] = 0;
5846 	    }
5847 	}
5848     }
5849 
5850     /* Didn't find it: "cannot happen". */
5851     *kword = NUL;
5852 }
5853 
5854 /*
5855  * Compute the sound-a-like score for suggestions in su->su_ga and add them to
5856  * su->su_sga.
5857  */
5858     static void
5859 score_comp_sal(suginfo_T *su)
5860 {
5861     langp_T	*lp;
5862     char_u	badsound[MAXWLEN];
5863     int		i;
5864     suggest_T   *stp;
5865     suggest_T   *sstp;
5866     int		score;
5867     int		lpi;
5868 
5869     if (ga_grow(&su->su_sga, su->su_ga.ga_len) == FAIL)
5870 	return;
5871 
5872     /*	Use the sound-folding of the first language that supports it. */
5873     for (lpi = 0; lpi < curwin->w_s->b_langp.ga_len; ++lpi)
5874     {
5875 	lp = LANGP_ENTRY(curwin->w_s->b_langp, lpi);
5876 	if (lp->lp_slang->sl_sal.ga_len > 0)
5877 	{
5878 	    /* soundfold the bad word */
5879 	    spell_soundfold(lp->lp_slang, su->su_fbadword, TRUE, badsound);
5880 
5881 	    for (i = 0; i < su->su_ga.ga_len; ++i)
5882 	    {
5883 		stp = &SUG(su->su_ga, i);
5884 
5885 		/* Case-fold the suggested word, sound-fold it and compute the
5886 		 * sound-a-like score. */
5887 		score = stp_sal_score(stp, su, lp->lp_slang, badsound);
5888 		if (score < SCORE_MAXMAX)
5889 		{
5890 		    /* Add the suggestion. */
5891 		    sstp = &SUG(su->su_sga, su->su_sga.ga_len);
5892 		    sstp->st_word = vim_strsave(stp->st_word);
5893 		    if (sstp->st_word != NULL)
5894 		    {
5895 			sstp->st_wordlen = stp->st_wordlen;
5896 			sstp->st_score = score;
5897 			sstp->st_altscore = 0;
5898 			sstp->st_orglen = stp->st_orglen;
5899 			++su->su_sga.ga_len;
5900 		    }
5901 		}
5902 	    }
5903 	    break;
5904 	}
5905     }
5906 }
5907 
5908 /*
5909  * Combine the list of suggestions in su->su_ga and su->su_sga.
5910  * They are entwined.
5911  */
5912     static void
5913 score_combine(suginfo_T *su)
5914 {
5915     int		i;
5916     int		j;
5917     garray_T	ga;
5918     garray_T	*gap;
5919     langp_T	*lp;
5920     suggest_T	*stp;
5921     char_u	*p;
5922     char_u	badsound[MAXWLEN];
5923     int		round;
5924     int		lpi;
5925     slang_T	*slang = NULL;
5926 
5927     /* Add the alternate score to su_ga. */
5928     for (lpi = 0; lpi < curwin->w_s->b_langp.ga_len; ++lpi)
5929     {
5930 	lp = LANGP_ENTRY(curwin->w_s->b_langp, lpi);
5931 	if (lp->lp_slang->sl_sal.ga_len > 0)
5932 	{
5933 	    /* soundfold the bad word */
5934 	    slang = lp->lp_slang;
5935 	    spell_soundfold(slang, su->su_fbadword, TRUE, badsound);
5936 
5937 	    for (i = 0; i < su->su_ga.ga_len; ++i)
5938 	    {
5939 		stp = &SUG(su->su_ga, i);
5940 		stp->st_altscore = stp_sal_score(stp, su, slang, badsound);
5941 		if (stp->st_altscore == SCORE_MAXMAX)
5942 		    stp->st_score = (stp->st_score * 3 + SCORE_BIG) / 4;
5943 		else
5944 		    stp->st_score = (stp->st_score * 3
5945 						  + stp->st_altscore) / 4;
5946 		stp->st_salscore = FALSE;
5947 	    }
5948 	    break;
5949 	}
5950     }
5951 
5952     if (slang == NULL)	/* Using "double" without sound folding. */
5953     {
5954 	(void)cleanup_suggestions(&su->su_ga, su->su_maxscore,
5955 							     su->su_maxcount);
5956 	return;
5957     }
5958 
5959     /* Add the alternate score to su_sga. */
5960     for (i = 0; i < su->su_sga.ga_len; ++i)
5961     {
5962 	stp = &SUG(su->su_sga, i);
5963 	stp->st_altscore = spell_edit_score(slang,
5964 						su->su_badword, stp->st_word);
5965 	if (stp->st_score == SCORE_MAXMAX)
5966 	    stp->st_score = (SCORE_BIG * 7 + stp->st_altscore) / 8;
5967 	else
5968 	    stp->st_score = (stp->st_score * 7 + stp->st_altscore) / 8;
5969 	stp->st_salscore = TRUE;
5970     }
5971 
5972     /* Remove bad suggestions, sort the suggestions and truncate at "maxcount"
5973      * for both lists. */
5974     check_suggestions(su, &su->su_ga);
5975     (void)cleanup_suggestions(&su->su_ga, su->su_maxscore, su->su_maxcount);
5976     check_suggestions(su, &su->su_sga);
5977     (void)cleanup_suggestions(&su->su_sga, su->su_maxscore, su->su_maxcount);
5978 
5979     ga_init2(&ga, (int)sizeof(suginfo_T), 1);
5980     if (ga_grow(&ga, su->su_ga.ga_len + su->su_sga.ga_len) == FAIL)
5981 	return;
5982 
5983     stp = &SUG(ga, 0);
5984     for (i = 0; i < su->su_ga.ga_len || i < su->su_sga.ga_len; ++i)
5985     {
5986 	/* round 1: get a suggestion from su_ga
5987 	 * round 2: get a suggestion from su_sga */
5988 	for (round = 1; round <= 2; ++round)
5989 	{
5990 	    gap = round == 1 ? &su->su_ga : &su->su_sga;
5991 	    if (i < gap->ga_len)
5992 	    {
5993 		/* Don't add a word if it's already there. */
5994 		p = SUG(*gap, i).st_word;
5995 		for (j = 0; j < ga.ga_len; ++j)
5996 		    if (STRCMP(stp[j].st_word, p) == 0)
5997 			break;
5998 		if (j == ga.ga_len)
5999 		    stp[ga.ga_len++] = SUG(*gap, i);
6000 		else
6001 		    vim_free(p);
6002 	    }
6003 	}
6004     }
6005 
6006     ga_clear(&su->su_ga);
6007     ga_clear(&su->su_sga);
6008 
6009     /* Truncate the list to the number of suggestions that will be displayed. */
6010     if (ga.ga_len > su->su_maxcount)
6011     {
6012 	for (i = su->su_maxcount; i < ga.ga_len; ++i)
6013 	    vim_free(stp[i].st_word);
6014 	ga.ga_len = su->su_maxcount;
6015     }
6016 
6017     su->su_ga = ga;
6018 }
6019 
6020 /*
6021  * For the goodword in "stp" compute the soundalike score compared to the
6022  * badword.
6023  */
6024     static int
6025 stp_sal_score(
6026     suggest_T	*stp,
6027     suginfo_T	*su,
6028     slang_T	*slang,
6029     char_u	*badsound)	/* sound-folded badword */
6030 {
6031     char_u	*p;
6032     char_u	*pbad;
6033     char_u	*pgood;
6034     char_u	badsound2[MAXWLEN];
6035     char_u	fword[MAXWLEN];
6036     char_u	goodsound[MAXWLEN];
6037     char_u	goodword[MAXWLEN];
6038     int		lendiff;
6039 
6040     lendiff = (int)(su->su_badlen - stp->st_orglen);
6041     if (lendiff >= 0)
6042 	pbad = badsound;
6043     else
6044     {
6045 	/* soundfold the bad word with more characters following */
6046 	(void)spell_casefold(su->su_badptr, stp->st_orglen, fword, MAXWLEN);
6047 
6048 	/* When joining two words the sound often changes a lot.  E.g., "t he"
6049 	 * sounds like "t h" while "the" sounds like "@".  Avoid that by
6050 	 * removing the space.  Don't do it when the good word also contains a
6051 	 * space. */
6052 	if (VIM_ISWHITE(su->su_badptr[su->su_badlen])
6053 					 && *skiptowhite(stp->st_word) == NUL)
6054 	    for (p = fword; *(p = skiptowhite(p)) != NUL; )
6055 		STRMOVE(p, p + 1);
6056 
6057 	spell_soundfold(slang, fword, TRUE, badsound2);
6058 	pbad = badsound2;
6059     }
6060 
6061     if (lendiff > 0 && stp->st_wordlen + lendiff < MAXWLEN)
6062     {
6063 	/* Add part of the bad word to the good word, so that we soundfold
6064 	 * what replaces the bad word. */
6065 	STRCPY(goodword, stp->st_word);
6066 	vim_strncpy(goodword + stp->st_wordlen,
6067 			    su->su_badptr + su->su_badlen - lendiff, lendiff);
6068 	pgood = goodword;
6069     }
6070     else
6071 	pgood = stp->st_word;
6072 
6073     /* Sound-fold the word and compute the score for the difference. */
6074     spell_soundfold(slang, pgood, FALSE, goodsound);
6075 
6076     return soundalike_score(goodsound, pbad);
6077 }
6078 
6079 /* structure used to store soundfolded words that add_sound_suggest() has
6080  * handled already. */
6081 typedef struct
6082 {
6083     short	sft_score;	/* lowest score used */
6084     char_u	sft_word[1];    /* soundfolded word, actually longer */
6085 } sftword_T;
6086 
6087 static sftword_T dumsft;
6088 #define HIKEY2SFT(p)  ((sftword_T *)(p - (dumsft.sft_word - (char_u *)&dumsft)))
6089 #define HI2SFT(hi)     HIKEY2SFT((hi)->hi_key)
6090 
6091 /*
6092  * Prepare for calling suggest_try_soundalike().
6093  */
6094     static void
6095 suggest_try_soundalike_prep(void)
6096 {
6097     langp_T	*lp;
6098     int		lpi;
6099     slang_T	*slang;
6100 
6101     /* Do this for all languages that support sound folding and for which a
6102      * .sug file has been loaded. */
6103     for (lpi = 0; lpi < curwin->w_s->b_langp.ga_len; ++lpi)
6104     {
6105 	lp = LANGP_ENTRY(curwin->w_s->b_langp, lpi);
6106 	slang = lp->lp_slang;
6107 	if (slang->sl_sal.ga_len > 0 && slang->sl_sbyts != NULL)
6108 	    /* prepare the hashtable used by add_sound_suggest() */
6109 	    hash_init(&slang->sl_sounddone);
6110     }
6111 }
6112 
6113 /*
6114  * Find suggestions by comparing the word in a sound-a-like form.
6115  * Note: This doesn't support postponed prefixes.
6116  */
6117     static void
6118 suggest_try_soundalike(suginfo_T *su)
6119 {
6120     char_u	salword[MAXWLEN];
6121     langp_T	*lp;
6122     int		lpi;
6123     slang_T	*slang;
6124 
6125     /* Do this for all languages that support sound folding and for which a
6126      * .sug file has been loaded. */
6127     for (lpi = 0; lpi < curwin->w_s->b_langp.ga_len; ++lpi)
6128     {
6129 	lp = LANGP_ENTRY(curwin->w_s->b_langp, lpi);
6130 	slang = lp->lp_slang;
6131 	if (slang->sl_sal.ga_len > 0 && slang->sl_sbyts != NULL)
6132 	{
6133 	    /* soundfold the bad word */
6134 	    spell_soundfold(slang, su->su_fbadword, TRUE, salword);
6135 
6136 	    /* try all kinds of inserts/deletes/swaps/etc. */
6137 	    /* TODO: also soundfold the next words, so that we can try joining
6138 	     * and splitting */
6139 #ifdef SUGGEST_PROFILE
6140 	prof_init();
6141 #endif
6142 	    suggest_trie_walk(su, lp, salword, TRUE);
6143 #ifdef SUGGEST_PROFILE
6144 	prof_report("soundalike");
6145 #endif
6146 	}
6147     }
6148 }
6149 
6150 /*
6151  * Finish up after calling suggest_try_soundalike().
6152  */
6153     static void
6154 suggest_try_soundalike_finish(void)
6155 {
6156     langp_T	*lp;
6157     int		lpi;
6158     slang_T	*slang;
6159     int		todo;
6160     hashitem_T	*hi;
6161 
6162     /* Do this for all languages that support sound folding and for which a
6163      * .sug file has been loaded. */
6164     for (lpi = 0; lpi < curwin->w_s->b_langp.ga_len; ++lpi)
6165     {
6166 	lp = LANGP_ENTRY(curwin->w_s->b_langp, lpi);
6167 	slang = lp->lp_slang;
6168 	if (slang->sl_sal.ga_len > 0 && slang->sl_sbyts != NULL)
6169 	{
6170 	    /* Free the info about handled words. */
6171 	    todo = (int)slang->sl_sounddone.ht_used;
6172 	    for (hi = slang->sl_sounddone.ht_array; todo > 0; ++hi)
6173 		if (!HASHITEM_EMPTY(hi))
6174 		{
6175 		    vim_free(HI2SFT(hi));
6176 		    --todo;
6177 		}
6178 
6179 	    /* Clear the hashtable, it may also be used by another region. */
6180 	    hash_clear(&slang->sl_sounddone);
6181 	    hash_init(&slang->sl_sounddone);
6182 	}
6183     }
6184 }
6185 
6186 /*
6187  * A match with a soundfolded word is found.  Add the good word(s) that
6188  * produce this soundfolded word.
6189  */
6190     static void
6191 add_sound_suggest(
6192     suginfo_T	*su,
6193     char_u	*goodword,
6194     int		score,		/* soundfold score  */
6195     langp_T	*lp)
6196 {
6197     slang_T	*slang = lp->lp_slang;	/* language for sound folding */
6198     int		sfwordnr;
6199     char_u	*nrline;
6200     int		orgnr;
6201     char_u	theword[MAXWLEN];
6202     int		i;
6203     int		wlen;
6204     char_u	*byts;
6205     idx_T	*idxs;
6206     int		n;
6207     int		wordcount;
6208     int		wc;
6209     int		goodscore;
6210     hash_T	hash;
6211     hashitem_T  *hi;
6212     sftword_T	*sft;
6213     int		bc, gc;
6214     int		limit;
6215 
6216     /*
6217      * It's very well possible that the same soundfold word is found several
6218      * times with different scores.  Since the following is quite slow only do
6219      * the words that have a better score than before.  Use a hashtable to
6220      * remember the words that have been done.
6221      */
6222     hash = hash_hash(goodword);
6223     hi = hash_lookup(&slang->sl_sounddone, goodword, hash);
6224     if (HASHITEM_EMPTY(hi))
6225     {
6226 	sft = alloc(sizeof(sftword_T) + STRLEN(goodword));
6227 	if (sft != NULL)
6228 	{
6229 	    sft->sft_score = score;
6230 	    STRCPY(sft->sft_word, goodword);
6231 	    hash_add_item(&slang->sl_sounddone, hi, sft->sft_word, hash);
6232 	}
6233     }
6234     else
6235     {
6236 	sft = HI2SFT(hi);
6237 	if (score >= sft->sft_score)
6238 	    return;
6239 	sft->sft_score = score;
6240     }
6241 
6242     /*
6243      * Find the word nr in the soundfold tree.
6244      */
6245     sfwordnr = soundfold_find(slang, goodword);
6246     if (sfwordnr < 0)
6247     {
6248 	internal_error("add_sound_suggest()");
6249 	return;
6250     }
6251 
6252     /*
6253      * go over the list of good words that produce this soundfold word
6254      */
6255     nrline = ml_get_buf(slang->sl_sugbuf, (linenr_T)(sfwordnr + 1), FALSE);
6256     orgnr = 0;
6257     while (*nrline != NUL)
6258     {
6259 	/* The wordnr was stored in a minimal nr of bytes as an offset to the
6260 	 * previous wordnr. */
6261 	orgnr += bytes2offset(&nrline);
6262 
6263 	byts = slang->sl_fbyts;
6264 	idxs = slang->sl_fidxs;
6265 
6266 	/* Lookup the word "orgnr" one of the two tries. */
6267 	n = 0;
6268 	wordcount = 0;
6269 	for (wlen = 0; wlen < MAXWLEN - 3; ++wlen)
6270 	{
6271 	    i = 1;
6272 	    if (wordcount == orgnr && byts[n + 1] == NUL)
6273 		break;	/* found end of word */
6274 
6275 	    if (byts[n + 1] == NUL)
6276 		++wordcount;
6277 
6278 	    /* skip over the NUL bytes */
6279 	    for ( ; byts[n + i] == NUL; ++i)
6280 		if (i > byts[n])	/* safety check */
6281 		{
6282 		    STRCPY(theword + wlen, "BAD");
6283 		    wlen += 3;
6284 		    goto badword;
6285 		}
6286 
6287 	    /* One of the siblings must have the word. */
6288 	    for ( ; i < byts[n]; ++i)
6289 	    {
6290 		wc = idxs[idxs[n + i]];	/* nr of words under this byte */
6291 		if (wordcount + wc > orgnr)
6292 		    break;
6293 		wordcount += wc;
6294 	    }
6295 
6296 	    theword[wlen] = byts[n + i];
6297 	    n = idxs[n + i];
6298 	}
6299 badword:
6300 	theword[wlen] = NUL;
6301 
6302 	/* Go over the possible flags and regions. */
6303 	for (; i <= byts[n] && byts[n + i] == NUL; ++i)
6304 	{
6305 	    char_u	cword[MAXWLEN];
6306 	    char_u	*p;
6307 	    int		flags = (int)idxs[n + i];
6308 
6309 	    /* Skip words with the NOSUGGEST flag */
6310 	    if (flags & WF_NOSUGGEST)
6311 		continue;
6312 
6313 	    if (flags & WF_KEEPCAP)
6314 	    {
6315 		/* Must find the word in the keep-case tree. */
6316 		find_keepcap_word(slang, theword, cword);
6317 		p = cword;
6318 	    }
6319 	    else
6320 	    {
6321 		flags |= su->su_badflags;
6322 		if ((flags & WF_CAPMASK) != 0)
6323 		{
6324 		    /* Need to fix case according to "flags". */
6325 		    make_case_word(theword, cword, flags);
6326 		    p = cword;
6327 		}
6328 		else
6329 		    p = theword;
6330 	    }
6331 
6332 	    /* Add the suggestion. */
6333 	    if (sps_flags & SPS_DOUBLE)
6334 	    {
6335 		/* Add the suggestion if the score isn't too bad. */
6336 		if (score <= su->su_maxscore)
6337 		    add_suggestion(su, &su->su_sga, p, su->su_badlen,
6338 					       score, 0, FALSE, slang, FALSE);
6339 	    }
6340 	    else
6341 	    {
6342 		/* Add a penalty for words in another region. */
6343 		if ((flags & WF_REGION)
6344 			    && (((unsigned)flags >> 16) & lp->lp_region) == 0)
6345 		    goodscore = SCORE_REGION;
6346 		else
6347 		    goodscore = 0;
6348 
6349 		/* Add a small penalty for changing the first letter from
6350 		 * lower to upper case.  Helps for "tath" -> "Kath", which is
6351 		 * less common than "tath" -> "path".  Don't do it when the
6352 		 * letter is the same, that has already been counted. */
6353 		gc = PTR2CHAR(p);
6354 		if (SPELL_ISUPPER(gc))
6355 		{
6356 		    bc = PTR2CHAR(su->su_badword);
6357 		    if (!SPELL_ISUPPER(bc)
6358 				      && SPELL_TOFOLD(bc) != SPELL_TOFOLD(gc))
6359 			goodscore += SCORE_ICASE / 2;
6360 		}
6361 
6362 		/* Compute the score for the good word.  This only does letter
6363 		 * insert/delete/swap/replace.  REP items are not considered,
6364 		 * which may make the score a bit higher.
6365 		 * Use a limit for the score to make it work faster.  Use
6366 		 * MAXSCORE(), because RESCORE() will change the score.
6367 		 * If the limit is very high then the iterative method is
6368 		 * inefficient, using an array is quicker. */
6369 		limit = MAXSCORE(su->su_sfmaxscore - goodscore, score);
6370 		if (limit > SCORE_LIMITMAX)
6371 		    goodscore += spell_edit_score(slang, su->su_badword, p);
6372 		else
6373 		    goodscore += spell_edit_score_limit(slang, su->su_badword,
6374 								    p, limit);
6375 
6376 		/* When going over the limit don't bother to do the rest. */
6377 		if (goodscore < SCORE_MAXMAX)
6378 		{
6379 		    /* Give a bonus to words seen before. */
6380 		    goodscore = score_wordcount_adj(slang, goodscore, p, FALSE);
6381 
6382 		    /* Add the suggestion if the score isn't too bad. */
6383 		    goodscore = RESCORE(goodscore, score);
6384 		    if (goodscore <= su->su_sfmaxscore)
6385 			add_suggestion(su, &su->su_ga, p, su->su_badlen,
6386 					 goodscore, score, TRUE, slang, TRUE);
6387 		}
6388 	    }
6389 	}
6390 	/* smsg("word %s (%d): %s (%d)", sftword, sftnr, theword, orgnr); */
6391     }
6392 }
6393 
6394 /*
6395  * Find word "word" in fold-case tree for "slang" and return the word number.
6396  */
6397     static int
6398 soundfold_find(slang_T *slang, char_u *word)
6399 {
6400     idx_T	arridx = 0;
6401     int		len;
6402     int		wlen = 0;
6403     int		c;
6404     char_u	*ptr = word;
6405     char_u	*byts;
6406     idx_T	*idxs;
6407     int		wordnr = 0;
6408 
6409     byts = slang->sl_sbyts;
6410     idxs = slang->sl_sidxs;
6411 
6412     for (;;)
6413     {
6414 	/* First byte is the number of possible bytes. */
6415 	len = byts[arridx++];
6416 
6417 	/* If the first possible byte is a zero the word could end here.
6418 	 * If the word ends we found the word.  If not skip the NUL bytes. */
6419 	c = ptr[wlen];
6420 	if (byts[arridx] == NUL)
6421 	{
6422 	    if (c == NUL)
6423 		break;
6424 
6425 	    /* Skip over the zeros, there can be several. */
6426 	    while (len > 0 && byts[arridx] == NUL)
6427 	    {
6428 		++arridx;
6429 		--len;
6430 	    }
6431 	    if (len == 0)
6432 		return -1;    /* no children, word should have ended here */
6433 	    ++wordnr;
6434 	}
6435 
6436 	/* If the word ends we didn't find it. */
6437 	if (c == NUL)
6438 	    return -1;
6439 
6440 	/* Perform a binary search in the list of accepted bytes. */
6441 	if (c == TAB)	    /* <Tab> is handled like <Space> */
6442 	    c = ' ';
6443 	while (byts[arridx] < c)
6444 	{
6445 	    /* The word count is in the first idxs[] entry of the child. */
6446 	    wordnr += idxs[idxs[arridx]];
6447 	    ++arridx;
6448 	    if (--len == 0)	/* end of the bytes, didn't find it */
6449 		return -1;
6450 	}
6451 	if (byts[arridx] != c)	/* didn't find the byte */
6452 	    return -1;
6453 
6454 	/* Continue at the child (if there is one). */
6455 	arridx = idxs[arridx];
6456 	++wlen;
6457 
6458 	/* One space in the good word may stand for several spaces in the
6459 	 * checked word. */
6460 	if (c == ' ')
6461 	    while (ptr[wlen] == ' ' || ptr[wlen] == TAB)
6462 		++wlen;
6463     }
6464 
6465     return wordnr;
6466 }
6467 
6468 /*
6469  * Copy "fword" to "cword", fixing case according to "flags".
6470  */
6471     static void
6472 make_case_word(char_u *fword, char_u *cword, int flags)
6473 {
6474     if (flags & WF_ALLCAP)
6475 	/* Make it all upper-case */
6476 	allcap_copy(fword, cword);
6477     else if (flags & WF_ONECAP)
6478 	/* Make the first letter upper-case */
6479 	onecap_copy(fword, cword, TRUE);
6480     else
6481 	/* Use goodword as-is. */
6482 	STRCPY(cword, fword);
6483 }
6484 
6485 
6486 /*
6487  * Return TRUE if "c1" and "c2" are similar characters according to the MAP
6488  * lines in the .aff file.
6489  */
6490     static int
6491 similar_chars(slang_T *slang, int c1, int c2)
6492 {
6493     int		m1, m2;
6494     char_u	buf[MB_MAXBYTES + 1];
6495     hashitem_T  *hi;
6496 
6497     if (c1 >= 256)
6498     {
6499 	buf[mb_char2bytes(c1, buf)] = 0;
6500 	hi = hash_find(&slang->sl_map_hash, buf);
6501 	if (HASHITEM_EMPTY(hi))
6502 	    m1 = 0;
6503 	else
6504 	    m1 = mb_ptr2char(hi->hi_key + STRLEN(hi->hi_key) + 1);
6505     }
6506     else
6507 	m1 = slang->sl_map_array[c1];
6508     if (m1 == 0)
6509 	return FALSE;
6510 
6511 
6512     if (c2 >= 256)
6513     {
6514 	buf[mb_char2bytes(c2, buf)] = 0;
6515 	hi = hash_find(&slang->sl_map_hash, buf);
6516 	if (HASHITEM_EMPTY(hi))
6517 	    m2 = 0;
6518 	else
6519 	    m2 = mb_ptr2char(hi->hi_key + STRLEN(hi->hi_key) + 1);
6520     }
6521     else
6522 	m2 = slang->sl_map_array[c2];
6523 
6524     return m1 == m2;
6525 }
6526 
6527 /*
6528  * Add a suggestion to the list of suggestions.
6529  * For a suggestion that is already in the list the lowest score is remembered.
6530  */
6531     static void
6532 add_suggestion(
6533     suginfo_T	*su,
6534     garray_T	*gap,		/* either su_ga or su_sga */
6535     char_u	*goodword,
6536     int		badlenarg,	/* len of bad word replaced with "goodword" */
6537     int		score,
6538     int		altscore,
6539     int		had_bonus,	/* value for st_had_bonus */
6540     slang_T	*slang,		/* language for sound folding */
6541     int		maxsf)		/* su_maxscore applies to soundfold score,
6542 				   su_sfmaxscore to the total score. */
6543 {
6544     int		goodlen;	/* len of goodword changed */
6545     int		badlen;		/* len of bad word changed */
6546     suggest_T   *stp;
6547     suggest_T   new_sug;
6548     int		i;
6549     char_u	*pgood, *pbad;
6550 
6551     /* Minimize "badlen" for consistency.  Avoids that changing "the the" to
6552      * "thee the" is added next to changing the first "the" the "thee".  */
6553     pgood = goodword + STRLEN(goodword);
6554     pbad = su->su_badptr + badlenarg;
6555     for (;;)
6556     {
6557 	goodlen = (int)(pgood - goodword);
6558 	badlen = (int)(pbad - su->su_badptr);
6559 	if (goodlen <= 0 || badlen <= 0)
6560 	    break;
6561 	MB_PTR_BACK(goodword, pgood);
6562 	MB_PTR_BACK(su->su_badptr, pbad);
6563 	if (has_mbyte)
6564 	{
6565 	    if (mb_ptr2char(pgood) != mb_ptr2char(pbad))
6566 		break;
6567 	}
6568 	else if (*pgood != *pbad)
6569 		break;
6570     }
6571 
6572     if (badlen == 0 && goodlen == 0)
6573 	/* goodword doesn't change anything; may happen for "the the" changing
6574 	 * the first "the" to itself. */
6575 	return;
6576 
6577     if (gap->ga_len == 0)
6578 	i = -1;
6579     else
6580     {
6581 	/* Check if the word is already there.  Also check the length that is
6582 	 * being replaced "thes," -> "these" is a different suggestion from
6583 	 * "thes" -> "these". */
6584 	stp = &SUG(*gap, 0);
6585 	for (i = gap->ga_len; --i >= 0; ++stp)
6586 	    if (stp->st_wordlen == goodlen
6587 		    && stp->st_orglen == badlen
6588 		    && STRNCMP(stp->st_word, goodword, goodlen) == 0)
6589 	    {
6590 		/*
6591 		 * Found it.  Remember the word with the lowest score.
6592 		 */
6593 		if (stp->st_slang == NULL)
6594 		    stp->st_slang = slang;
6595 
6596 		new_sug.st_score = score;
6597 		new_sug.st_altscore = altscore;
6598 		new_sug.st_had_bonus = had_bonus;
6599 
6600 		if (stp->st_had_bonus != had_bonus)
6601 		{
6602 		    /* Only one of the two had the soundalike score computed.
6603 		     * Need to do that for the other one now, otherwise the
6604 		     * scores can't be compared.  This happens because
6605 		     * suggest_try_change() doesn't compute the soundalike
6606 		     * word to keep it fast, while some special methods set
6607 		     * the soundalike score to zero. */
6608 		    if (had_bonus)
6609 			rescore_one(su, stp);
6610 		    else
6611 		    {
6612 			new_sug.st_word = stp->st_word;
6613 			new_sug.st_wordlen = stp->st_wordlen;
6614 			new_sug.st_slang = stp->st_slang;
6615 			new_sug.st_orglen = badlen;
6616 			rescore_one(su, &new_sug);
6617 		    }
6618 		}
6619 
6620 		if (stp->st_score > new_sug.st_score)
6621 		{
6622 		    stp->st_score = new_sug.st_score;
6623 		    stp->st_altscore = new_sug.st_altscore;
6624 		    stp->st_had_bonus = new_sug.st_had_bonus;
6625 		}
6626 		break;
6627 	    }
6628     }
6629 
6630     if (i < 0 && ga_grow(gap, 1) == OK)
6631     {
6632 	/* Add a suggestion. */
6633 	stp = &SUG(*gap, gap->ga_len);
6634 	stp->st_word = vim_strnsave(goodword, goodlen);
6635 	if (stp->st_word != NULL)
6636 	{
6637 	    stp->st_wordlen = goodlen;
6638 	    stp->st_score = score;
6639 	    stp->st_altscore = altscore;
6640 	    stp->st_had_bonus = had_bonus;
6641 	    stp->st_orglen = badlen;
6642 	    stp->st_slang = slang;
6643 	    ++gap->ga_len;
6644 
6645 	    /* If we have too many suggestions now, sort the list and keep
6646 	     * the best suggestions. */
6647 	    if (gap->ga_len > SUG_MAX_COUNT(su))
6648 	    {
6649 		if (maxsf)
6650 		    su->su_sfmaxscore = cleanup_suggestions(gap,
6651 				      su->su_sfmaxscore, SUG_CLEAN_COUNT(su));
6652 		else
6653 		    su->su_maxscore = cleanup_suggestions(gap,
6654 					su->su_maxscore, SUG_CLEAN_COUNT(su));
6655 	    }
6656 	}
6657     }
6658 }
6659 
6660 /*
6661  * Suggestions may in fact be flagged as errors.  Esp. for banned words and
6662  * for split words, such as "the the".  Remove these from the list here.
6663  */
6664     static void
6665 check_suggestions(
6666     suginfo_T	*su,
6667     garray_T	*gap)		    /* either su_ga or su_sga */
6668 {
6669     suggest_T   *stp;
6670     int		i;
6671     char_u	longword[MAXWLEN + 1];
6672     int		len;
6673     hlf_T	attr;
6674 
6675     stp = &SUG(*gap, 0);
6676     for (i = gap->ga_len - 1; i >= 0; --i)
6677     {
6678 	/* Need to append what follows to check for "the the". */
6679 	vim_strncpy(longword, stp[i].st_word, MAXWLEN);
6680 	len = stp[i].st_wordlen;
6681 	vim_strncpy(longword + len, su->su_badptr + stp[i].st_orglen,
6682 							       MAXWLEN - len);
6683 	attr = HLF_COUNT;
6684 	(void)spell_check(curwin, longword, &attr, NULL, FALSE);
6685 	if (attr != HLF_COUNT)
6686 	{
6687 	    /* Remove this entry. */
6688 	    vim_free(stp[i].st_word);
6689 	    --gap->ga_len;
6690 	    if (i < gap->ga_len)
6691 		mch_memmove(stp + i, stp + i + 1,
6692 				       sizeof(suggest_T) * (gap->ga_len - i));
6693 	}
6694     }
6695 }
6696 
6697 
6698 /*
6699  * Add a word to be banned.
6700  */
6701     static void
6702 add_banned(
6703     suginfo_T	*su,
6704     char_u	*word)
6705 {
6706     char_u	*s;
6707     hash_T	hash;
6708     hashitem_T	*hi;
6709 
6710     hash = hash_hash(word);
6711     hi = hash_lookup(&su->su_banned, word, hash);
6712     if (HASHITEM_EMPTY(hi))
6713     {
6714 	s = vim_strsave(word);
6715 	if (s != NULL)
6716 	    hash_add_item(&su->su_banned, hi, s, hash);
6717     }
6718 }
6719 
6720 /*
6721  * Recompute the score for all suggestions if sound-folding is possible.  This
6722  * is slow, thus only done for the final results.
6723  */
6724     static void
6725 rescore_suggestions(suginfo_T *su)
6726 {
6727     int		i;
6728 
6729     if (su->su_sallang != NULL)
6730 	for (i = 0; i < su->su_ga.ga_len; ++i)
6731 	    rescore_one(su, &SUG(su->su_ga, i));
6732 }
6733 
6734 /*
6735  * Recompute the score for one suggestion if sound-folding is possible.
6736  */
6737     static void
6738 rescore_one(suginfo_T *su, suggest_T *stp)
6739 {
6740     slang_T	*slang = stp->st_slang;
6741     char_u	sal_badword[MAXWLEN];
6742     char_u	*p;
6743 
6744     /* Only rescore suggestions that have no sal score yet and do have a
6745      * language. */
6746     if (slang != NULL && slang->sl_sal.ga_len > 0 && !stp->st_had_bonus)
6747     {
6748 	if (slang == su->su_sallang)
6749 	    p = su->su_sal_badword;
6750 	else
6751 	{
6752 	    spell_soundfold(slang, su->su_fbadword, TRUE, sal_badword);
6753 	    p = sal_badword;
6754 	}
6755 
6756 	stp->st_altscore = stp_sal_score(stp, su, slang, p);
6757 	if (stp->st_altscore == SCORE_MAXMAX)
6758 	    stp->st_altscore = SCORE_BIG;
6759 	stp->st_score = RESCORE(stp->st_score, stp->st_altscore);
6760 	stp->st_had_bonus = TRUE;
6761     }
6762 }
6763 
6764 static int sug_compare(const void *s1, const void *s2);
6765 
6766 /*
6767  * Function given to qsort() to sort the suggestions on st_score.
6768  * First on "st_score", then "st_altscore" then alphabetically.
6769  */
6770     static int
6771 sug_compare(const void *s1, const void *s2)
6772 {
6773     suggest_T	*p1 = (suggest_T *)s1;
6774     suggest_T	*p2 = (suggest_T *)s2;
6775     int		n = p1->st_score - p2->st_score;
6776 
6777     if (n == 0)
6778     {
6779 	n = p1->st_altscore - p2->st_altscore;
6780 	if (n == 0)
6781 	    n = STRICMP(p1->st_word, p2->st_word);
6782     }
6783     return n;
6784 }
6785 
6786 /*
6787  * Cleanup the suggestions:
6788  * - Sort on score.
6789  * - Remove words that won't be displayed.
6790  * Returns the maximum score in the list or "maxscore" unmodified.
6791  */
6792     static int
6793 cleanup_suggestions(
6794     garray_T	*gap,
6795     int		maxscore,
6796     int		keep)		/* nr of suggestions to keep */
6797 {
6798     suggest_T   *stp = &SUG(*gap, 0);
6799     int		i;
6800 
6801     /* Sort the list. */
6802     qsort(gap->ga_data, (size_t)gap->ga_len, sizeof(suggest_T), sug_compare);
6803 
6804     /* Truncate the list to the number of suggestions that will be displayed. */
6805     if (gap->ga_len > keep)
6806     {
6807 	for (i = keep; i < gap->ga_len; ++i)
6808 	    vim_free(stp[i].st_word);
6809 	gap->ga_len = keep;
6810 	return stp[keep - 1].st_score;
6811     }
6812     return maxscore;
6813 }
6814 
6815 #if defined(FEAT_EVAL) || defined(PROTO)
6816 /*
6817  * Soundfold a string, for soundfold().
6818  * Result is in allocated memory, NULL for an error.
6819  */
6820     char_u *
6821 eval_soundfold(char_u *word)
6822 {
6823     langp_T	*lp;
6824     char_u	sound[MAXWLEN];
6825     int		lpi;
6826 
6827     if (curwin->w_p_spell && *curwin->w_s->b_p_spl != NUL)
6828 	/* Use the sound-folding of the first language that supports it. */
6829 	for (lpi = 0; lpi < curwin->w_s->b_langp.ga_len; ++lpi)
6830 	{
6831 	    lp = LANGP_ENTRY(curwin->w_s->b_langp, lpi);
6832 	    if (lp->lp_slang->sl_sal.ga_len > 0)
6833 	    {
6834 		/* soundfold the word */
6835 		spell_soundfold(lp->lp_slang, word, FALSE, sound);
6836 		return vim_strsave(sound);
6837 	    }
6838 	}
6839 
6840     /* No language with sound folding, return word as-is. */
6841     return vim_strsave(word);
6842 }
6843 #endif
6844 
6845 /*
6846  * Turn "inword" into its sound-a-like equivalent in "res[MAXWLEN]".
6847  *
6848  * There are many ways to turn a word into a sound-a-like representation.  The
6849  * oldest is Soundex (1918!).   A nice overview can be found in "Approximate
6850  * swedish name matching - survey and test of different algorithms" by Klas
6851  * Erikson.
6852  *
6853  * We support two methods:
6854  * 1. SOFOFROM/SOFOTO do a simple character mapping.
6855  * 2. SAL items define a more advanced sound-folding (and much slower).
6856  */
6857     void
6858 spell_soundfold(
6859     slang_T	*slang,
6860     char_u	*inword,
6861     int		folded,	    /* "inword" is already case-folded */
6862     char_u	*res)
6863 {
6864     char_u	fword[MAXWLEN];
6865     char_u	*word;
6866 
6867     if (slang->sl_sofo)
6868 	/* SOFOFROM and SOFOTO used */
6869 	spell_soundfold_sofo(slang, inword, res);
6870     else
6871     {
6872 	/* SAL items used.  Requires the word to be case-folded. */
6873 	if (folded)
6874 	    word = inword;
6875 	else
6876 	{
6877 	    (void)spell_casefold(inword, (int)STRLEN(inword), fword, MAXWLEN);
6878 	    word = fword;
6879 	}
6880 
6881 	if (has_mbyte)
6882 	    spell_soundfold_wsal(slang, word, res);
6883 	else
6884 	    spell_soundfold_sal(slang, word, res);
6885     }
6886 }
6887 
6888 /*
6889  * Perform sound folding of "inword" into "res" according to SOFOFROM and
6890  * SOFOTO lines.
6891  */
6892     static void
6893 spell_soundfold_sofo(slang_T *slang, char_u *inword, char_u *res)
6894 {
6895     char_u	*s;
6896     int		ri = 0;
6897     int		c;
6898 
6899     if (has_mbyte)
6900     {
6901 	int	prevc = 0;
6902 	int	*ip;
6903 
6904 	/* The sl_sal_first[] table contains the translation for chars up to
6905 	 * 255, sl_sal the rest. */
6906 	for (s = inword; *s != NUL; )
6907 	{
6908 	    c = mb_cptr2char_adv(&s);
6909 	    if (enc_utf8 ? utf_class(c) == 0 : VIM_ISWHITE(c))
6910 		c = ' ';
6911 	    else if (c < 256)
6912 		c = slang->sl_sal_first[c];
6913 	    else
6914 	    {
6915 		ip = ((int **)slang->sl_sal.ga_data)[c & 0xff];
6916 		if (ip == NULL)		/* empty list, can't match */
6917 		    c = NUL;
6918 		else
6919 		    for (;;)		/* find "c" in the list */
6920 		    {
6921 			if (*ip == 0)	/* not found */
6922 			{
6923 			    c = NUL;
6924 			    break;
6925 			}
6926 			if (*ip == c)	/* match! */
6927 			{
6928 			    c = ip[1];
6929 			    break;
6930 			}
6931 			ip += 2;
6932 		    }
6933 	    }
6934 
6935 	    if (c != NUL && c != prevc)
6936 	    {
6937 		ri += mb_char2bytes(c, res + ri);
6938 		if (ri + MB_MAXBYTES > MAXWLEN)
6939 		    break;
6940 		prevc = c;
6941 	    }
6942 	}
6943     }
6944     else
6945     {
6946 	/* The sl_sal_first[] table contains the translation. */
6947 	for (s = inword; (c = *s) != NUL; ++s)
6948 	{
6949 	    if (VIM_ISWHITE(c))
6950 		c = ' ';
6951 	    else
6952 		c = slang->sl_sal_first[c];
6953 	    if (c != NUL && (ri == 0 || res[ri - 1] != c))
6954 		res[ri++] = c;
6955 	}
6956     }
6957 
6958     res[ri] = NUL;
6959 }
6960 
6961     static void
6962 spell_soundfold_sal(slang_T *slang, char_u *inword, char_u *res)
6963 {
6964     salitem_T	*smp;
6965     char_u	word[MAXWLEN];
6966     char_u	*s = inword;
6967     char_u	*t;
6968     char_u	*pf;
6969     int		i, j, z;
6970     int		reslen;
6971     int		n, k = 0;
6972     int		z0;
6973     int		k0;
6974     int		n0;
6975     int		c;
6976     int		pri;
6977     int		p0 = -333;
6978     int		c0;
6979 
6980     /* Remove accents, if wanted.  We actually remove all non-word characters.
6981      * But keep white space.  We need a copy, the word may be changed here. */
6982     if (slang->sl_rem_accents)
6983     {
6984 	t = word;
6985 	while (*s != NUL)
6986 	{
6987 	    if (VIM_ISWHITE(*s))
6988 	    {
6989 		*t++ = ' ';
6990 		s = skipwhite(s);
6991 	    }
6992 	    else
6993 	    {
6994 		if (spell_iswordp_nmw(s, curwin))
6995 		    *t++ = *s;
6996 		++s;
6997 	    }
6998 	}
6999 	*t = NUL;
7000     }
7001     else
7002 	vim_strncpy(word, s, MAXWLEN - 1);
7003 
7004     smp = (salitem_T *)slang->sl_sal.ga_data;
7005 
7006     /*
7007      * This comes from Aspell phonet.cpp.  Converted from C++ to C.
7008      * Changed to keep spaces.
7009      */
7010     i = reslen = z = 0;
7011     while ((c = word[i]) != NUL)
7012     {
7013 	/* Start with the first rule that has the character in the word. */
7014 	n = slang->sl_sal_first[c];
7015 	z0 = 0;
7016 
7017 	if (n >= 0)
7018 	{
7019 	    /* check all rules for the same letter */
7020 	    for (; (s = smp[n].sm_lead)[0] == c; ++n)
7021 	    {
7022 		/* Quickly skip entries that don't match the word.  Most
7023 		 * entries are less then three chars, optimize for that. */
7024 		k = smp[n].sm_leadlen;
7025 		if (k > 1)
7026 		{
7027 		    if (word[i + 1] != s[1])
7028 			continue;
7029 		    if (k > 2)
7030 		    {
7031 			for (j = 2; j < k; ++j)
7032 			    if (word[i + j] != s[j])
7033 				break;
7034 			if (j < k)
7035 			    continue;
7036 		    }
7037 		}
7038 
7039 		if ((pf = smp[n].sm_oneof) != NULL)
7040 		{
7041 		    /* Check for match with one of the chars in "sm_oneof". */
7042 		    while (*pf != NUL && *pf != word[i + k])
7043 			++pf;
7044 		    if (*pf == NUL)
7045 			continue;
7046 		    ++k;
7047 		}
7048 		s = smp[n].sm_rules;
7049 		pri = 5;    /* default priority */
7050 
7051 		p0 = *s;
7052 		k0 = k;
7053 		while (*s == '-' && k > 1)
7054 		{
7055 		    k--;
7056 		    s++;
7057 		}
7058 		if (*s == '<')
7059 		    s++;
7060 		if (VIM_ISDIGIT(*s))
7061 		{
7062 		    /* determine priority */
7063 		    pri = *s - '0';
7064 		    s++;
7065 		}
7066 		if (*s == '^' && *(s + 1) == '^')
7067 		    s++;
7068 
7069 		if (*s == NUL
7070 			|| (*s == '^'
7071 			    && (i == 0 || !(word[i - 1] == ' '
7072 				      || spell_iswordp(word + i - 1, curwin)))
7073 			    && (*(s + 1) != '$'
7074 				|| (!spell_iswordp(word + i + k0, curwin))))
7075 			|| (*s == '$' && i > 0
7076 			    && spell_iswordp(word + i - 1, curwin)
7077 			    && (!spell_iswordp(word + i + k0, curwin))))
7078 		{
7079 		    /* search for followup rules, if:    */
7080 		    /* followup and k > 1  and  NO '-' in searchstring */
7081 		    c0 = word[i + k - 1];
7082 		    n0 = slang->sl_sal_first[c0];
7083 
7084 		    if (slang->sl_followup && k > 1 && n0 >= 0
7085 					   && p0 != '-' && word[i + k] != NUL)
7086 		    {
7087 			/* test follow-up rule for "word[i + k]" */
7088 			for ( ; (s = smp[n0].sm_lead)[0] == c0; ++n0)
7089 			{
7090 			    /* Quickly skip entries that don't match the word.
7091 			     * */
7092 			    k0 = smp[n0].sm_leadlen;
7093 			    if (k0 > 1)
7094 			    {
7095 				if (word[i + k] != s[1])
7096 				    continue;
7097 				if (k0 > 2)
7098 				{
7099 				    pf = word + i + k + 1;
7100 				    for (j = 2; j < k0; ++j)
7101 					if (*pf++ != s[j])
7102 					    break;
7103 				    if (j < k0)
7104 					continue;
7105 				}
7106 			    }
7107 			    k0 += k - 1;
7108 
7109 			    if ((pf = smp[n0].sm_oneof) != NULL)
7110 			    {
7111 				/* Check for match with one of the chars in
7112 				 * "sm_oneof". */
7113 				while (*pf != NUL && *pf != word[i + k0])
7114 				    ++pf;
7115 				if (*pf == NUL)
7116 				    continue;
7117 				++k0;
7118 			    }
7119 
7120 			    p0 = 5;
7121 			    s = smp[n0].sm_rules;
7122 			    while (*s == '-')
7123 			    {
7124 				/* "k0" gets NOT reduced because
7125 				 * "if (k0 == k)" */
7126 				s++;
7127 			    }
7128 			    if (*s == '<')
7129 				s++;
7130 			    if (VIM_ISDIGIT(*s))
7131 			    {
7132 				p0 = *s - '0';
7133 				s++;
7134 			    }
7135 
7136 			    if (*s == NUL
7137 				    /* *s == '^' cuts */
7138 				    || (*s == '$'
7139 					    && !spell_iswordp(word + i + k0,
7140 								     curwin)))
7141 			    {
7142 				if (k0 == k)
7143 				    /* this is just a piece of the string */
7144 				    continue;
7145 
7146 				if (p0 < pri)
7147 				    /* priority too low */
7148 				    continue;
7149 				/* rule fits; stop search */
7150 				break;
7151 			    }
7152 			}
7153 
7154 			if (p0 >= pri && smp[n0].sm_lead[0] == c0)
7155 			    continue;
7156 		    }
7157 
7158 		    /* replace string */
7159 		    s = smp[n].sm_to;
7160 		    if (s == NULL)
7161 			s = (char_u *)"";
7162 		    pf = smp[n].sm_rules;
7163 		    p0 = (vim_strchr(pf, '<') != NULL) ? 1 : 0;
7164 		    if (p0 == 1 && z == 0)
7165 		    {
7166 			/* rule with '<' is used */
7167 			if (reslen > 0 && *s != NUL && (res[reslen - 1] == c
7168 						    || res[reslen - 1] == *s))
7169 			    reslen--;
7170 			z0 = 1;
7171 			z = 1;
7172 			k0 = 0;
7173 			while (*s != NUL && word[i + k0] != NUL)
7174 			{
7175 			    word[i + k0] = *s;
7176 			    k0++;
7177 			    s++;
7178 			}
7179 			if (k > k0)
7180 			    STRMOVE(word + i + k0, word + i + k);
7181 
7182 			/* new "actual letter" */
7183 			c = word[i];
7184 		    }
7185 		    else
7186 		    {
7187 			/* no '<' rule used */
7188 			i += k - 1;
7189 			z = 0;
7190 			while (*s != NUL && s[1] != NUL && reslen < MAXWLEN)
7191 			{
7192 			    if (reslen == 0 || res[reslen - 1] != *s)
7193 				res[reslen++] = *s;
7194 			    s++;
7195 			}
7196 			/* new "actual letter" */
7197 			c = *s;
7198 			if (strstr((char *)pf, "^^") != NULL)
7199 			{
7200 			    if (c != NUL)
7201 				res[reslen++] = c;
7202 			    STRMOVE(word, word + i + 1);
7203 			    i = 0;
7204 			    z0 = 1;
7205 			}
7206 		    }
7207 		    break;
7208 		}
7209 	    }
7210 	}
7211 	else if (VIM_ISWHITE(c))
7212 	{
7213 	    c = ' ';
7214 	    k = 1;
7215 	}
7216 
7217 	if (z0 == 0)
7218 	{
7219 	    if (k && !p0 && reslen < MAXWLEN && c != NUL
7220 		    && (!slang->sl_collapse || reslen == 0
7221 						     || res[reslen - 1] != c))
7222 		/* condense only double letters */
7223 		res[reslen++] = c;
7224 
7225 	    i++;
7226 	    z = 0;
7227 	    k = 0;
7228 	}
7229     }
7230 
7231     res[reslen] = NUL;
7232 }
7233 
7234 /*
7235  * Turn "inword" into its sound-a-like equivalent in "res[MAXWLEN]".
7236  * Multi-byte version of spell_soundfold().
7237  */
7238     static void
7239 spell_soundfold_wsal(slang_T *slang, char_u *inword, char_u *res)
7240 {
7241     salitem_T	*smp = (salitem_T *)slang->sl_sal.ga_data;
7242     int		word[MAXWLEN];
7243     int		wres[MAXWLEN];
7244     int		l;
7245     char_u	*s;
7246     int		*ws;
7247     char_u	*t;
7248     int		*pf;
7249     int		i, j, z;
7250     int		reslen;
7251     int		n, k = 0;
7252     int		z0;
7253     int		k0;
7254     int		n0;
7255     int		c;
7256     int		pri;
7257     int		p0 = -333;
7258     int		c0;
7259     int		did_white = FALSE;
7260     int		wordlen;
7261 
7262 
7263     /*
7264      * Convert the multi-byte string to a wide-character string.
7265      * Remove accents, if wanted.  We actually remove all non-word characters.
7266      * But keep white space.
7267      */
7268     wordlen = 0;
7269     for (s = inword; *s != NUL; )
7270     {
7271 	t = s;
7272 	c = mb_cptr2char_adv(&s);
7273 	if (slang->sl_rem_accents)
7274 	{
7275 	    if (enc_utf8 ? utf_class(c) == 0 : VIM_ISWHITE(c))
7276 	    {
7277 		if (did_white)
7278 		    continue;
7279 		c = ' ';
7280 		did_white = TRUE;
7281 	    }
7282 	    else
7283 	    {
7284 		did_white = FALSE;
7285 		if (!spell_iswordp_nmw(t, curwin))
7286 		    continue;
7287 	    }
7288 	}
7289 	word[wordlen++] = c;
7290     }
7291     word[wordlen] = NUL;
7292 
7293     /*
7294      * This algorithm comes from Aspell phonet.cpp.
7295      * Converted from C++ to C.  Added support for multi-byte chars.
7296      * Changed to keep spaces.
7297      */
7298     i = reslen = z = 0;
7299     while ((c = word[i]) != NUL)
7300     {
7301 	/* Start with the first rule that has the character in the word. */
7302 	n = slang->sl_sal_first[c & 0xff];
7303 	z0 = 0;
7304 
7305 	if (n >= 0)
7306 	{
7307 	    /* Check all rules for the same index byte.
7308 	     * If c is 0x300 need extra check for the end of the array, as
7309 	     * (c & 0xff) is NUL. */
7310 	    for (; ((ws = smp[n].sm_lead_w)[0] & 0xff) == (c & 0xff)
7311 							 && ws[0] != NUL; ++n)
7312 	    {
7313 		/* Quickly skip entries that don't match the word.  Most
7314 		 * entries are less then three chars, optimize for that. */
7315 		if (c != ws[0])
7316 		    continue;
7317 		k = smp[n].sm_leadlen;
7318 		if (k > 1)
7319 		{
7320 		    if (word[i + 1] != ws[1])
7321 			continue;
7322 		    if (k > 2)
7323 		    {
7324 			for (j = 2; j < k; ++j)
7325 			    if (word[i + j] != ws[j])
7326 				break;
7327 			if (j < k)
7328 			    continue;
7329 		    }
7330 		}
7331 
7332 		if ((pf = smp[n].sm_oneof_w) != NULL)
7333 		{
7334 		    /* Check for match with one of the chars in "sm_oneof". */
7335 		    while (*pf != NUL && *pf != word[i + k])
7336 			++pf;
7337 		    if (*pf == NUL)
7338 			continue;
7339 		    ++k;
7340 		}
7341 		s = smp[n].sm_rules;
7342 		pri = 5;    /* default priority */
7343 
7344 		p0 = *s;
7345 		k0 = k;
7346 		while (*s == '-' && k > 1)
7347 		{
7348 		    k--;
7349 		    s++;
7350 		}
7351 		if (*s == '<')
7352 		    s++;
7353 		if (VIM_ISDIGIT(*s))
7354 		{
7355 		    /* determine priority */
7356 		    pri = *s - '0';
7357 		    s++;
7358 		}
7359 		if (*s == '^' && *(s + 1) == '^')
7360 		    s++;
7361 
7362 		if (*s == NUL
7363 			|| (*s == '^'
7364 			    && (i == 0 || !(word[i - 1] == ' '
7365 				    || spell_iswordp_w(word + i - 1, curwin)))
7366 			    && (*(s + 1) != '$'
7367 				|| (!spell_iswordp_w(word + i + k0, curwin))))
7368 			|| (*s == '$' && i > 0
7369 			    && spell_iswordp_w(word + i - 1, curwin)
7370 			    && (!spell_iswordp_w(word + i + k0, curwin))))
7371 		{
7372 		    /* search for followup rules, if:    */
7373 		    /* followup and k > 1  and  NO '-' in searchstring */
7374 		    c0 = word[i + k - 1];
7375 		    n0 = slang->sl_sal_first[c0 & 0xff];
7376 
7377 		    if (slang->sl_followup && k > 1 && n0 >= 0
7378 					   && p0 != '-' && word[i + k] != NUL)
7379 		    {
7380 			/* Test follow-up rule for "word[i + k]"; loop over
7381 			 * all entries with the same index byte. */
7382 			for ( ; ((ws = smp[n0].sm_lead_w)[0] & 0xff)
7383 							 == (c0 & 0xff); ++n0)
7384 			{
7385 			    /* Quickly skip entries that don't match the word.
7386 			     */
7387 			    if (c0 != ws[0])
7388 				continue;
7389 			    k0 = smp[n0].sm_leadlen;
7390 			    if (k0 > 1)
7391 			    {
7392 				if (word[i + k] != ws[1])
7393 				    continue;
7394 				if (k0 > 2)
7395 				{
7396 				    pf = word + i + k + 1;
7397 				    for (j = 2; j < k0; ++j)
7398 					if (*pf++ != ws[j])
7399 					    break;
7400 				    if (j < k0)
7401 					continue;
7402 				}
7403 			    }
7404 			    k0 += k - 1;
7405 
7406 			    if ((pf = smp[n0].sm_oneof_w) != NULL)
7407 			    {
7408 				/* Check for match with one of the chars in
7409 				 * "sm_oneof". */
7410 				while (*pf != NUL && *pf != word[i + k0])
7411 				    ++pf;
7412 				if (*pf == NUL)
7413 				    continue;
7414 				++k0;
7415 			    }
7416 
7417 			    p0 = 5;
7418 			    s = smp[n0].sm_rules;
7419 			    while (*s == '-')
7420 			    {
7421 				/* "k0" gets NOT reduced because
7422 				 * "if (k0 == k)" */
7423 				s++;
7424 			    }
7425 			    if (*s == '<')
7426 				s++;
7427 			    if (VIM_ISDIGIT(*s))
7428 			    {
7429 				p0 = *s - '0';
7430 				s++;
7431 			    }
7432 
7433 			    if (*s == NUL
7434 				    /* *s == '^' cuts */
7435 				    || (*s == '$'
7436 					 && !spell_iswordp_w(word + i + k0,
7437 								     curwin)))
7438 			    {
7439 				if (k0 == k)
7440 				    /* this is just a piece of the string */
7441 				    continue;
7442 
7443 				if (p0 < pri)
7444 				    /* priority too low */
7445 				    continue;
7446 				/* rule fits; stop search */
7447 				break;
7448 			    }
7449 			}
7450 
7451 			if (p0 >= pri && (smp[n0].sm_lead_w[0] & 0xff)
7452 							       == (c0 & 0xff))
7453 			    continue;
7454 		    }
7455 
7456 		    /* replace string */
7457 		    ws = smp[n].sm_to_w;
7458 		    s = smp[n].sm_rules;
7459 		    p0 = (vim_strchr(s, '<') != NULL) ? 1 : 0;
7460 		    if (p0 == 1 && z == 0)
7461 		    {
7462 			/* rule with '<' is used */
7463 			if (reslen > 0 && ws != NULL && *ws != NUL
7464 				&& (wres[reslen - 1] == c
7465 						    || wres[reslen - 1] == *ws))
7466 			    reslen--;
7467 			z0 = 1;
7468 			z = 1;
7469 			k0 = 0;
7470 			if (ws != NULL)
7471 			    while (*ws != NUL && word[i + k0] != NUL)
7472 			    {
7473 				word[i + k0] = *ws;
7474 				k0++;
7475 				ws++;
7476 			    }
7477 			if (k > k0)
7478 			    mch_memmove(word + i + k0, word + i + k,
7479 				    sizeof(int) * (wordlen - (i + k) + 1));
7480 
7481 			/* new "actual letter" */
7482 			c = word[i];
7483 		    }
7484 		    else
7485 		    {
7486 			/* no '<' rule used */
7487 			i += k - 1;
7488 			z = 0;
7489 			if (ws != NULL)
7490 			    while (*ws != NUL && ws[1] != NUL
7491 							  && reslen < MAXWLEN)
7492 			    {
7493 				if (reslen == 0 || wres[reslen - 1] != *ws)
7494 				    wres[reslen++] = *ws;
7495 				ws++;
7496 			    }
7497 			/* new "actual letter" */
7498 			if (ws == NULL)
7499 			    c = NUL;
7500 			else
7501 			    c = *ws;
7502 			if (strstr((char *)s, "^^") != NULL)
7503 			{
7504 			    if (c != NUL)
7505 				wres[reslen++] = c;
7506 			    mch_memmove(word, word + i + 1,
7507 				       sizeof(int) * (wordlen - (i + 1) + 1));
7508 			    i = 0;
7509 			    z0 = 1;
7510 			}
7511 		    }
7512 		    break;
7513 		}
7514 	    }
7515 	}
7516 	else if (VIM_ISWHITE(c))
7517 	{
7518 	    c = ' ';
7519 	    k = 1;
7520 	}
7521 
7522 	if (z0 == 0)
7523 	{
7524 	    if (k && !p0 && reslen < MAXWLEN && c != NUL
7525 		    && (!slang->sl_collapse || reslen == 0
7526 						     || wres[reslen - 1] != c))
7527 		/* condense only double letters */
7528 		wres[reslen++] = c;
7529 
7530 	    i++;
7531 	    z = 0;
7532 	    k = 0;
7533 	}
7534     }
7535 
7536     /* Convert wide characters in "wres" to a multi-byte string in "res". */
7537     l = 0;
7538     for (n = 0; n < reslen; ++n)
7539     {
7540 	l += mb_char2bytes(wres[n], res + l);
7541 	if (l + MB_MAXBYTES > MAXWLEN)
7542 	    break;
7543     }
7544     res[l] = NUL;
7545 }
7546 
7547 /*
7548  * Compute a score for two sound-a-like words.
7549  * This permits up to two inserts/deletes/swaps/etc. to keep things fast.
7550  * Instead of a generic loop we write out the code.  That keeps it fast by
7551  * avoiding checks that will not be possible.
7552  */
7553     static int
7554 soundalike_score(
7555     char_u	*goodstart,	/* sound-folded good word */
7556     char_u	*badstart)	/* sound-folded bad word */
7557 {
7558     char_u	*goodsound = goodstart;
7559     char_u	*badsound = badstart;
7560     int		goodlen;
7561     int		badlen;
7562     int		n;
7563     char_u	*pl, *ps;
7564     char_u	*pl2, *ps2;
7565     int		score = 0;
7566 
7567     /* Adding/inserting "*" at the start (word starts with vowel) shouldn't be
7568      * counted so much, vowels halfway the word aren't counted at all. */
7569     if ((*badsound == '*' || *goodsound == '*') && *badsound != *goodsound)
7570     {
7571 	if ((badsound[0] == NUL && goodsound[1] == NUL)
7572 	    || (goodsound[0] == NUL && badsound[1] == NUL))
7573 	    /* changing word with vowel to word without a sound */
7574 	    return SCORE_DEL;
7575 	if (badsound[0] == NUL || goodsound[0] == NUL)
7576 	    /* more than two changes */
7577 	    return SCORE_MAXMAX;
7578 
7579 	if (badsound[1] == goodsound[1]
7580 		|| (badsound[1] != NUL
7581 		    && goodsound[1] != NUL
7582 		    && badsound[2] == goodsound[2]))
7583 	{
7584 	    /* handle like a substitute */
7585 	}
7586 	else
7587 	{
7588 	    score = 2 * SCORE_DEL / 3;
7589 	    if (*badsound == '*')
7590 		++badsound;
7591 	    else
7592 		++goodsound;
7593 	}
7594     }
7595 
7596     goodlen = (int)STRLEN(goodsound);
7597     badlen = (int)STRLEN(badsound);
7598 
7599     /* Return quickly if the lengths are too different to be fixed by two
7600      * changes. */
7601     n = goodlen - badlen;
7602     if (n < -2 || n > 2)
7603 	return SCORE_MAXMAX;
7604 
7605     if (n > 0)
7606     {
7607 	pl = goodsound;	    /* goodsound is longest */
7608 	ps = badsound;
7609     }
7610     else
7611     {
7612 	pl = badsound;	    /* badsound is longest */
7613 	ps = goodsound;
7614     }
7615 
7616     /* Skip over the identical part. */
7617     while (*pl == *ps && *pl != NUL)
7618     {
7619 	++pl;
7620 	++ps;
7621     }
7622 
7623     switch (n)
7624     {
7625 	case -2:
7626 	case 2:
7627 	    /*
7628 	     * Must delete two characters from "pl".
7629 	     */
7630 	    ++pl;	/* first delete */
7631 	    while (*pl == *ps)
7632 	    {
7633 		++pl;
7634 		++ps;
7635 	    }
7636 	    /* strings must be equal after second delete */
7637 	    if (STRCMP(pl + 1, ps) == 0)
7638 		return score + SCORE_DEL * 2;
7639 
7640 	    /* Failed to compare. */
7641 	    break;
7642 
7643 	case -1:
7644 	case 1:
7645 	    /*
7646 	     * Minimal one delete from "pl" required.
7647 	     */
7648 
7649 	    /* 1: delete */
7650 	    pl2 = pl + 1;
7651 	    ps2 = ps;
7652 	    while (*pl2 == *ps2)
7653 	    {
7654 		if (*pl2 == NUL)	/* reached the end */
7655 		    return score + SCORE_DEL;
7656 		++pl2;
7657 		++ps2;
7658 	    }
7659 
7660 	    /* 2: delete then swap, then rest must be equal */
7661 	    if (pl2[0] == ps2[1] && pl2[1] == ps2[0]
7662 					     && STRCMP(pl2 + 2, ps2 + 2) == 0)
7663 		return score + SCORE_DEL + SCORE_SWAP;
7664 
7665 	    /* 3: delete then substitute, then the rest must be equal */
7666 	    if (STRCMP(pl2 + 1, ps2 + 1) == 0)
7667 		return score + SCORE_DEL + SCORE_SUBST;
7668 
7669 	    /* 4: first swap then delete */
7670 	    if (pl[0] == ps[1] && pl[1] == ps[0])
7671 	    {
7672 		pl2 = pl + 2;	    /* swap, skip two chars */
7673 		ps2 = ps + 2;
7674 		while (*pl2 == *ps2)
7675 		{
7676 		    ++pl2;
7677 		    ++ps2;
7678 		}
7679 		/* delete a char and then strings must be equal */
7680 		if (STRCMP(pl2 + 1, ps2) == 0)
7681 		    return score + SCORE_SWAP + SCORE_DEL;
7682 	    }
7683 
7684 	    /* 5: first substitute then delete */
7685 	    pl2 = pl + 1;	    /* substitute, skip one char */
7686 	    ps2 = ps + 1;
7687 	    while (*pl2 == *ps2)
7688 	    {
7689 		++pl2;
7690 		++ps2;
7691 	    }
7692 	    /* delete a char and then strings must be equal */
7693 	    if (STRCMP(pl2 + 1, ps2) == 0)
7694 		return score + SCORE_SUBST + SCORE_DEL;
7695 
7696 	    /* Failed to compare. */
7697 	    break;
7698 
7699 	case 0:
7700 	    /*
7701 	     * Lengths are equal, thus changes must result in same length: An
7702 	     * insert is only possible in combination with a delete.
7703 	     * 1: check if for identical strings
7704 	     */
7705 	    if (*pl == NUL)
7706 		return score;
7707 
7708 	    /* 2: swap */
7709 	    if (pl[0] == ps[1] && pl[1] == ps[0])
7710 	    {
7711 		pl2 = pl + 2;	    /* swap, skip two chars */
7712 		ps2 = ps + 2;
7713 		while (*pl2 == *ps2)
7714 		{
7715 		    if (*pl2 == NUL)	/* reached the end */
7716 			return score + SCORE_SWAP;
7717 		    ++pl2;
7718 		    ++ps2;
7719 		}
7720 		/* 3: swap and swap again */
7721 		if (pl2[0] == ps2[1] && pl2[1] == ps2[0]
7722 					     && STRCMP(pl2 + 2, ps2 + 2) == 0)
7723 		    return score + SCORE_SWAP + SCORE_SWAP;
7724 
7725 		/* 4: swap and substitute */
7726 		if (STRCMP(pl2 + 1, ps2 + 1) == 0)
7727 		    return score + SCORE_SWAP + SCORE_SUBST;
7728 	    }
7729 
7730 	    /* 5: substitute */
7731 	    pl2 = pl + 1;
7732 	    ps2 = ps + 1;
7733 	    while (*pl2 == *ps2)
7734 	    {
7735 		if (*pl2 == NUL)	/* reached the end */
7736 		    return score + SCORE_SUBST;
7737 		++pl2;
7738 		++ps2;
7739 	    }
7740 
7741 	    /* 6: substitute and swap */
7742 	    if (pl2[0] == ps2[1] && pl2[1] == ps2[0]
7743 					     && STRCMP(pl2 + 2, ps2 + 2) == 0)
7744 		return score + SCORE_SUBST + SCORE_SWAP;
7745 
7746 	    /* 7: substitute and substitute */
7747 	    if (STRCMP(pl2 + 1, ps2 + 1) == 0)
7748 		return score + SCORE_SUBST + SCORE_SUBST;
7749 
7750 	    /* 8: insert then delete */
7751 	    pl2 = pl;
7752 	    ps2 = ps + 1;
7753 	    while (*pl2 == *ps2)
7754 	    {
7755 		++pl2;
7756 		++ps2;
7757 	    }
7758 	    if (STRCMP(pl2 + 1, ps2) == 0)
7759 		return score + SCORE_INS + SCORE_DEL;
7760 
7761 	    /* 9: delete then insert */
7762 	    pl2 = pl + 1;
7763 	    ps2 = ps;
7764 	    while (*pl2 == *ps2)
7765 	    {
7766 		++pl2;
7767 		++ps2;
7768 	    }
7769 	    if (STRCMP(pl2, ps2 + 1) == 0)
7770 		return score + SCORE_INS + SCORE_DEL;
7771 
7772 	    /* Failed to compare. */
7773 	    break;
7774     }
7775 
7776     return SCORE_MAXMAX;
7777 }
7778 
7779 /*
7780  * Compute the "edit distance" to turn "badword" into "goodword".  The less
7781  * deletes/inserts/substitutes/swaps are required the lower the score.
7782  *
7783  * The algorithm is described by Du and Chang, 1992.
7784  * The implementation of the algorithm comes from Aspell editdist.cpp,
7785  * edit_distance().  It has been converted from C++ to C and modified to
7786  * support multi-byte characters.
7787  */
7788     static int
7789 spell_edit_score(
7790     slang_T	*slang,
7791     char_u	*badword,
7792     char_u	*goodword)
7793 {
7794     int		*cnt;
7795     int		badlen, goodlen;	/* lengths including NUL */
7796     int		j, i;
7797     int		t;
7798     int		bc, gc;
7799     int		pbc, pgc;
7800     char_u	*p;
7801     int		wbadword[MAXWLEN];
7802     int		wgoodword[MAXWLEN];
7803 
7804     if (has_mbyte)
7805     {
7806 	/* Get the characters from the multi-byte strings and put them in an
7807 	 * int array for easy access. */
7808 	for (p = badword, badlen = 0; *p != NUL; )
7809 	    wbadword[badlen++] = mb_cptr2char_adv(&p);
7810 	wbadword[badlen++] = 0;
7811 	for (p = goodword, goodlen = 0; *p != NUL; )
7812 	    wgoodword[goodlen++] = mb_cptr2char_adv(&p);
7813 	wgoodword[goodlen++] = 0;
7814     }
7815     else
7816     {
7817 	badlen = (int)STRLEN(badword) + 1;
7818 	goodlen = (int)STRLEN(goodword) + 1;
7819     }
7820 
7821     /* We use "cnt" as an array: CNT(badword_idx, goodword_idx). */
7822 #define CNT(a, b)   cnt[(a) + (b) * (badlen + 1)]
7823     cnt = ALLOC_MULT(int, (badlen + 1) * (goodlen + 1));
7824     if (cnt == NULL)
7825 	return 0;	/* out of memory */
7826 
7827     CNT(0, 0) = 0;
7828     for (j = 1; j <= goodlen; ++j)
7829 	CNT(0, j) = CNT(0, j - 1) + SCORE_INS;
7830 
7831     for (i = 1; i <= badlen; ++i)
7832     {
7833 	CNT(i, 0) = CNT(i - 1, 0) + SCORE_DEL;
7834 	for (j = 1; j <= goodlen; ++j)
7835 	{
7836 	    if (has_mbyte)
7837 	    {
7838 		bc = wbadword[i - 1];
7839 		gc = wgoodword[j - 1];
7840 	    }
7841 	    else
7842 	    {
7843 		bc = badword[i - 1];
7844 		gc = goodword[j - 1];
7845 	    }
7846 	    if (bc == gc)
7847 		CNT(i, j) = CNT(i - 1, j - 1);
7848 	    else
7849 	    {
7850 		/* Use a better score when there is only a case difference. */
7851 		if (SPELL_TOFOLD(bc) == SPELL_TOFOLD(gc))
7852 		    CNT(i, j) = SCORE_ICASE + CNT(i - 1, j - 1);
7853 		else
7854 		{
7855 		    /* For a similar character use SCORE_SIMILAR. */
7856 		    if (slang != NULL
7857 			    && slang->sl_has_map
7858 			    && similar_chars(slang, gc, bc))
7859 			CNT(i, j) = SCORE_SIMILAR + CNT(i - 1, j - 1);
7860 		    else
7861 			CNT(i, j) = SCORE_SUBST + CNT(i - 1, j - 1);
7862 		}
7863 
7864 		if (i > 1 && j > 1)
7865 		{
7866 		    if (has_mbyte)
7867 		    {
7868 			pbc = wbadword[i - 2];
7869 			pgc = wgoodword[j - 2];
7870 		    }
7871 		    else
7872 		    {
7873 			pbc = badword[i - 2];
7874 			pgc = goodword[j - 2];
7875 		    }
7876 		    if (bc == pgc && pbc == gc)
7877 		    {
7878 			t = SCORE_SWAP + CNT(i - 2, j - 2);
7879 			if (t < CNT(i, j))
7880 			    CNT(i, j) = t;
7881 		    }
7882 		}
7883 		t = SCORE_DEL + CNT(i - 1, j);
7884 		if (t < CNT(i, j))
7885 		    CNT(i, j) = t;
7886 		t = SCORE_INS + CNT(i, j - 1);
7887 		if (t < CNT(i, j))
7888 		    CNT(i, j) = t;
7889 	    }
7890 	}
7891     }
7892 
7893     i = CNT(badlen - 1, goodlen - 1);
7894     vim_free(cnt);
7895     return i;
7896 }
7897 
7898 typedef struct
7899 {
7900     int		badi;
7901     int		goodi;
7902     int		score;
7903 } limitscore_T;
7904 
7905 /*
7906  * Like spell_edit_score(), but with a limit on the score to make it faster.
7907  * May return SCORE_MAXMAX when the score is higher than "limit".
7908  *
7909  * This uses a stack for the edits still to be tried.
7910  * The idea comes from Aspell leditdist.cpp.  Rewritten in C and added support
7911  * for multi-byte characters.
7912  */
7913     static int
7914 spell_edit_score_limit(
7915     slang_T	*slang,
7916     char_u	*badword,
7917     char_u	*goodword,
7918     int		limit)
7919 {
7920     limitscore_T    stack[10];		/* allow for over 3 * 2 edits */
7921     int		    stackidx;
7922     int		    bi, gi;
7923     int		    bi2, gi2;
7924     int		    bc, gc;
7925     int		    score;
7926     int		    score_off;
7927     int		    minscore;
7928     int		    round;
7929 
7930     /* Multi-byte characters require a bit more work, use a different function
7931      * to avoid testing "has_mbyte" quite often. */
7932     if (has_mbyte)
7933 	return spell_edit_score_limit_w(slang, badword, goodword, limit);
7934 
7935     /*
7936      * The idea is to go from start to end over the words.  So long as
7937      * characters are equal just continue, this always gives the lowest score.
7938      * When there is a difference try several alternatives.  Each alternative
7939      * increases "score" for the edit distance.  Some of the alternatives are
7940      * pushed unto a stack and tried later, some are tried right away.  At the
7941      * end of the word the score for one alternative is known.  The lowest
7942      * possible score is stored in "minscore".
7943      */
7944     stackidx = 0;
7945     bi = 0;
7946     gi = 0;
7947     score = 0;
7948     minscore = limit + 1;
7949 
7950     for (;;)
7951     {
7952 	/* Skip over an equal part, score remains the same. */
7953 	for (;;)
7954 	{
7955 	    bc = badword[bi];
7956 	    gc = goodword[gi];
7957 	    if (bc != gc)	/* stop at a char that's different */
7958 		break;
7959 	    if (bc == NUL)	/* both words end */
7960 	    {
7961 		if (score < minscore)
7962 		    minscore = score;
7963 		goto pop;	/* do next alternative */
7964 	    }
7965 	    ++bi;
7966 	    ++gi;
7967 	}
7968 
7969 	if (gc == NUL)    /* goodword ends, delete badword chars */
7970 	{
7971 	    do
7972 	    {
7973 		if ((score += SCORE_DEL) >= minscore)
7974 		    goto pop;	    /* do next alternative */
7975 	    } while (badword[++bi] != NUL);
7976 	    minscore = score;
7977 	}
7978 	else if (bc == NUL) /* badword ends, insert badword chars */
7979 	{
7980 	    do
7981 	    {
7982 		if ((score += SCORE_INS) >= minscore)
7983 		    goto pop;	    /* do next alternative */
7984 	    } while (goodword[++gi] != NUL);
7985 	    minscore = score;
7986 	}
7987 	else			/* both words continue */
7988 	{
7989 	    /* If not close to the limit, perform a change.  Only try changes
7990 	     * that may lead to a lower score than "minscore".
7991 	     * round 0: try deleting a char from badword
7992 	     * round 1: try inserting a char in badword */
7993 	    for (round = 0; round <= 1; ++round)
7994 	    {
7995 		score_off = score + (round == 0 ? SCORE_DEL : SCORE_INS);
7996 		if (score_off < minscore)
7997 		{
7998 		    if (score_off + SCORE_EDIT_MIN >= minscore)
7999 		    {
8000 			/* Near the limit, rest of the words must match.  We
8001 			 * can check that right now, no need to push an item
8002 			 * onto the stack. */
8003 			bi2 = bi + 1 - round;
8004 			gi2 = gi + round;
8005 			while (goodword[gi2] == badword[bi2])
8006 			{
8007 			    if (goodword[gi2] == NUL)
8008 			    {
8009 				minscore = score_off;
8010 				break;
8011 			    }
8012 			    ++bi2;
8013 			    ++gi2;
8014 			}
8015 		    }
8016 		    else
8017 		    {
8018 			/* try deleting/inserting a character later */
8019 			stack[stackidx].badi = bi + 1 - round;
8020 			stack[stackidx].goodi = gi + round;
8021 			stack[stackidx].score = score_off;
8022 			++stackidx;
8023 		    }
8024 		}
8025 	    }
8026 
8027 	    if (score + SCORE_SWAP < minscore)
8028 	    {
8029 		/* If swapping two characters makes a match then the
8030 		 * substitution is more expensive, thus there is no need to
8031 		 * try both. */
8032 		if (gc == badword[bi + 1] && bc == goodword[gi + 1])
8033 		{
8034 		    /* Swap two characters, that is: skip them. */
8035 		    gi += 2;
8036 		    bi += 2;
8037 		    score += SCORE_SWAP;
8038 		    continue;
8039 		}
8040 	    }
8041 
8042 	    /* Substitute one character for another which is the same
8043 	     * thing as deleting a character from both goodword and badword.
8044 	     * Use a better score when there is only a case difference. */
8045 	    if (SPELL_TOFOLD(bc) == SPELL_TOFOLD(gc))
8046 		score += SCORE_ICASE;
8047 	    else
8048 	    {
8049 		/* For a similar character use SCORE_SIMILAR. */
8050 		if (slang != NULL
8051 			&& slang->sl_has_map
8052 			&& similar_chars(slang, gc, bc))
8053 		    score += SCORE_SIMILAR;
8054 		else
8055 		    score += SCORE_SUBST;
8056 	    }
8057 
8058 	    if (score < minscore)
8059 	    {
8060 		/* Do the substitution. */
8061 		++gi;
8062 		++bi;
8063 		continue;
8064 	    }
8065 	}
8066 pop:
8067 	/*
8068 	 * Get here to try the next alternative, pop it from the stack.
8069 	 */
8070 	if (stackidx == 0)		/* stack is empty, finished */
8071 	    break;
8072 
8073 	/* pop an item from the stack */
8074 	--stackidx;
8075 	gi = stack[stackidx].goodi;
8076 	bi = stack[stackidx].badi;
8077 	score = stack[stackidx].score;
8078     }
8079 
8080     /* When the score goes over "limit" it may actually be much higher.
8081      * Return a very large number to avoid going below the limit when giving a
8082      * bonus. */
8083     if (minscore > limit)
8084 	return SCORE_MAXMAX;
8085     return minscore;
8086 }
8087 
8088 /*
8089  * Multi-byte version of spell_edit_score_limit().
8090  * Keep it in sync with the above!
8091  */
8092     static int
8093 spell_edit_score_limit_w(
8094     slang_T	*slang,
8095     char_u	*badword,
8096     char_u	*goodword,
8097     int		limit)
8098 {
8099     limitscore_T    stack[10];		/* allow for over 3 * 2 edits */
8100     int		    stackidx;
8101     int		    bi, gi;
8102     int		    bi2, gi2;
8103     int		    bc, gc;
8104     int		    score;
8105     int		    score_off;
8106     int		    minscore;
8107     int		    round;
8108     char_u	    *p;
8109     int		    wbadword[MAXWLEN];
8110     int		    wgoodword[MAXWLEN];
8111 
8112     /* Get the characters from the multi-byte strings and put them in an
8113      * int array for easy access. */
8114     bi = 0;
8115     for (p = badword; *p != NUL; )
8116 	wbadword[bi++] = mb_cptr2char_adv(&p);
8117     wbadword[bi++] = 0;
8118     gi = 0;
8119     for (p = goodword; *p != NUL; )
8120 	wgoodword[gi++] = mb_cptr2char_adv(&p);
8121     wgoodword[gi++] = 0;
8122 
8123     /*
8124      * The idea is to go from start to end over the words.  So long as
8125      * characters are equal just continue, this always gives the lowest score.
8126      * When there is a difference try several alternatives.  Each alternative
8127      * increases "score" for the edit distance.  Some of the alternatives are
8128      * pushed unto a stack and tried later, some are tried right away.  At the
8129      * end of the word the score for one alternative is known.  The lowest
8130      * possible score is stored in "minscore".
8131      */
8132     stackidx = 0;
8133     bi = 0;
8134     gi = 0;
8135     score = 0;
8136     minscore = limit + 1;
8137 
8138     for (;;)
8139     {
8140 	/* Skip over an equal part, score remains the same. */
8141 	for (;;)
8142 	{
8143 	    bc = wbadword[bi];
8144 	    gc = wgoodword[gi];
8145 
8146 	    if (bc != gc)	/* stop at a char that's different */
8147 		break;
8148 	    if (bc == NUL)	/* both words end */
8149 	    {
8150 		if (score < minscore)
8151 		    minscore = score;
8152 		goto pop;	/* do next alternative */
8153 	    }
8154 	    ++bi;
8155 	    ++gi;
8156 	}
8157 
8158 	if (gc == NUL)    /* goodword ends, delete badword chars */
8159 	{
8160 	    do
8161 	    {
8162 		if ((score += SCORE_DEL) >= minscore)
8163 		    goto pop;	    /* do next alternative */
8164 	    } while (wbadword[++bi] != NUL);
8165 	    minscore = score;
8166 	}
8167 	else if (bc == NUL) /* badword ends, insert badword chars */
8168 	{
8169 	    do
8170 	    {
8171 		if ((score += SCORE_INS) >= minscore)
8172 		    goto pop;	    /* do next alternative */
8173 	    } while (wgoodword[++gi] != NUL);
8174 	    minscore = score;
8175 	}
8176 	else			/* both words continue */
8177 	{
8178 	    /* If not close to the limit, perform a change.  Only try changes
8179 	     * that may lead to a lower score than "minscore".
8180 	     * round 0: try deleting a char from badword
8181 	     * round 1: try inserting a char in badword */
8182 	    for (round = 0; round <= 1; ++round)
8183 	    {
8184 		score_off = score + (round == 0 ? SCORE_DEL : SCORE_INS);
8185 		if (score_off < minscore)
8186 		{
8187 		    if (score_off + SCORE_EDIT_MIN >= minscore)
8188 		    {
8189 			/* Near the limit, rest of the words must match.  We
8190 			 * can check that right now, no need to push an item
8191 			 * onto the stack. */
8192 			bi2 = bi + 1 - round;
8193 			gi2 = gi + round;
8194 			while (wgoodword[gi2] == wbadword[bi2])
8195 			{
8196 			    if (wgoodword[gi2] == NUL)
8197 			    {
8198 				minscore = score_off;
8199 				break;
8200 			    }
8201 			    ++bi2;
8202 			    ++gi2;
8203 			}
8204 		    }
8205 		    else
8206 		    {
8207 			/* try deleting a character from badword later */
8208 			stack[stackidx].badi = bi + 1 - round;
8209 			stack[stackidx].goodi = gi + round;
8210 			stack[stackidx].score = score_off;
8211 			++stackidx;
8212 		    }
8213 		}
8214 	    }
8215 
8216 	    if (score + SCORE_SWAP < minscore)
8217 	    {
8218 		/* If swapping two characters makes a match then the
8219 		 * substitution is more expensive, thus there is no need to
8220 		 * try both. */
8221 		if (gc == wbadword[bi + 1] && bc == wgoodword[gi + 1])
8222 		{
8223 		    /* Swap two characters, that is: skip them. */
8224 		    gi += 2;
8225 		    bi += 2;
8226 		    score += SCORE_SWAP;
8227 		    continue;
8228 		}
8229 	    }
8230 
8231 	    /* Substitute one character for another which is the same
8232 	     * thing as deleting a character from both goodword and badword.
8233 	     * Use a better score when there is only a case difference. */
8234 	    if (SPELL_TOFOLD(bc) == SPELL_TOFOLD(gc))
8235 		score += SCORE_ICASE;
8236 	    else
8237 	    {
8238 		/* For a similar character use SCORE_SIMILAR. */
8239 		if (slang != NULL
8240 			&& slang->sl_has_map
8241 			&& similar_chars(slang, gc, bc))
8242 		    score += SCORE_SIMILAR;
8243 		else
8244 		    score += SCORE_SUBST;
8245 	    }
8246 
8247 	    if (score < minscore)
8248 	    {
8249 		/* Do the substitution. */
8250 		++gi;
8251 		++bi;
8252 		continue;
8253 	    }
8254 	}
8255 pop:
8256 	/*
8257 	 * Get here to try the next alternative, pop it from the stack.
8258 	 */
8259 	if (stackidx == 0)		/* stack is empty, finished */
8260 	    break;
8261 
8262 	/* pop an item from the stack */
8263 	--stackidx;
8264 	gi = stack[stackidx].goodi;
8265 	bi = stack[stackidx].badi;
8266 	score = stack[stackidx].score;
8267     }
8268 
8269     /* When the score goes over "limit" it may actually be much higher.
8270      * Return a very large number to avoid going below the limit when giving a
8271      * bonus. */
8272     if (minscore > limit)
8273 	return SCORE_MAXMAX;
8274     return minscore;
8275 }
8276 
8277 /*
8278  * ":spellinfo"
8279  */
8280     void
8281 ex_spellinfo(exarg_T *eap UNUSED)
8282 {
8283     int		lpi;
8284     langp_T	*lp;
8285     char_u	*p;
8286 
8287     if (no_spell_checking(curwin))
8288 	return;
8289 
8290     msg_start();
8291     for (lpi = 0; lpi < curwin->w_s->b_langp.ga_len && !got_int; ++lpi)
8292     {
8293 	lp = LANGP_ENTRY(curwin->w_s->b_langp, lpi);
8294 	msg_puts("file: ");
8295 	msg_puts((char *)lp->lp_slang->sl_fname);
8296 	msg_putchar('\n');
8297 	p = lp->lp_slang->sl_info;
8298 	if (p != NULL)
8299 	{
8300 	    msg_puts((char *)p);
8301 	    msg_putchar('\n');
8302 	}
8303     }
8304     msg_end();
8305 }
8306 
8307 #define DUMPFLAG_KEEPCASE   1	/* round 2: keep-case tree */
8308 #define DUMPFLAG_COUNT	    2	/* include word count */
8309 #define DUMPFLAG_ICASE	    4	/* ignore case when finding matches */
8310 #define DUMPFLAG_ONECAP	    8	/* pattern starts with capital */
8311 #define DUMPFLAG_ALLCAP	    16	/* pattern is all capitals */
8312 
8313 /*
8314  * ":spelldump"
8315  */
8316     void
8317 ex_spelldump(exarg_T *eap)
8318 {
8319     char_u  *spl;
8320     long    dummy;
8321 
8322     if (no_spell_checking(curwin))
8323 	return;
8324     get_option_value((char_u*)"spl", &dummy, &spl, OPT_LOCAL);
8325 
8326     /* Create a new empty buffer in a new window. */
8327     do_cmdline_cmd((char_u *)"new");
8328 
8329     /* enable spelling locally in the new window */
8330     set_option_value((char_u*)"spell", TRUE, (char_u*)"", OPT_LOCAL);
8331     set_option_value((char_u*)"spl",  dummy, spl, OPT_LOCAL);
8332     vim_free(spl);
8333 
8334     if (!BUFEMPTY())
8335 	return;
8336 
8337     spell_dump_compl(NULL, 0, NULL, eap->forceit ? DUMPFLAG_COUNT : 0);
8338 
8339     /* Delete the empty line that we started with. */
8340     if (curbuf->b_ml.ml_line_count > 1)
8341 	ml_delete(curbuf->b_ml.ml_line_count, FALSE);
8342 
8343     redraw_later(NOT_VALID);
8344 }
8345 
8346 /*
8347  * Go through all possible words and:
8348  * 1. When "pat" is NULL: dump a list of all words in the current buffer.
8349  *	"ic" and "dir" are not used.
8350  * 2. When "pat" is not NULL: add matching words to insert mode completion.
8351  */
8352     void
8353 spell_dump_compl(
8354     char_u	*pat,	    /* leading part of the word */
8355     int		ic,	    /* ignore case */
8356     int		*dir,	    /* direction for adding matches */
8357     int		dumpflags_arg)	/* DUMPFLAG_* */
8358 {
8359     langp_T	*lp;
8360     slang_T	*slang;
8361     idx_T	arridx[MAXWLEN];
8362     int		curi[MAXWLEN];
8363     char_u	word[MAXWLEN];
8364     int		c;
8365     char_u	*byts;
8366     idx_T	*idxs;
8367     linenr_T	lnum = 0;
8368     int		round;
8369     int		depth;
8370     int		n;
8371     int		flags;
8372     char_u	*region_names = NULL;	    /* region names being used */
8373     int		do_region = TRUE;	    /* dump region names and numbers */
8374     char_u	*p;
8375     int		lpi;
8376     int		dumpflags = dumpflags_arg;
8377     int		patlen;
8378 
8379     /* When ignoring case or when the pattern starts with capital pass this on
8380      * to dump_word(). */
8381     if (pat != NULL)
8382     {
8383 	if (ic)
8384 	    dumpflags |= DUMPFLAG_ICASE;
8385 	else
8386 	{
8387 	    n = captype(pat, NULL);
8388 	    if (n == WF_ONECAP)
8389 		dumpflags |= DUMPFLAG_ONECAP;
8390 	    else if (n == WF_ALLCAP && (int)STRLEN(pat) > mb_ptr2len(pat))
8391 		dumpflags |= DUMPFLAG_ALLCAP;
8392 	}
8393     }
8394 
8395     /* Find out if we can support regions: All languages must support the same
8396      * regions or none at all. */
8397     for (lpi = 0; lpi < curwin->w_s->b_langp.ga_len; ++lpi)
8398     {
8399 	lp = LANGP_ENTRY(curwin->w_s->b_langp, lpi);
8400 	p = lp->lp_slang->sl_regions;
8401 	if (p[0] != 0)
8402 	{
8403 	    if (region_names == NULL)	    /* first language with regions */
8404 		region_names = p;
8405 	    else if (STRCMP(region_names, p) != 0)
8406 	    {
8407 		do_region = FALSE;	    /* region names are different */
8408 		break;
8409 	    }
8410 	}
8411     }
8412 
8413     if (do_region && region_names != NULL)
8414     {
8415 	if (pat == NULL)
8416 	{
8417 	    vim_snprintf((char *)IObuff, IOSIZE, "/regions=%s", region_names);
8418 	    ml_append(lnum++, IObuff, (colnr_T)0, FALSE);
8419 	}
8420     }
8421     else
8422 	do_region = FALSE;
8423 
8424     /*
8425      * Loop over all files loaded for the entries in 'spelllang'.
8426      */
8427     for (lpi = 0; lpi < curwin->w_s->b_langp.ga_len; ++lpi)
8428     {
8429 	lp = LANGP_ENTRY(curwin->w_s->b_langp, lpi);
8430 	slang = lp->lp_slang;
8431 	if (slang->sl_fbyts == NULL)	    /* reloading failed */
8432 	    continue;
8433 
8434 	if (pat == NULL)
8435 	{
8436 	    vim_snprintf((char *)IObuff, IOSIZE, "# file: %s", slang->sl_fname);
8437 	    ml_append(lnum++, IObuff, (colnr_T)0, FALSE);
8438 	}
8439 
8440 	/* When matching with a pattern and there are no prefixes only use
8441 	 * parts of the tree that match "pat". */
8442 	if (pat != NULL && slang->sl_pbyts == NULL)
8443 	    patlen = (int)STRLEN(pat);
8444 	else
8445 	    patlen = -1;
8446 
8447 	/* round 1: case-folded tree
8448 	 * round 2: keep-case tree */
8449 	for (round = 1; round <= 2; ++round)
8450 	{
8451 	    if (round == 1)
8452 	    {
8453 		dumpflags &= ~DUMPFLAG_KEEPCASE;
8454 		byts = slang->sl_fbyts;
8455 		idxs = slang->sl_fidxs;
8456 	    }
8457 	    else
8458 	    {
8459 		dumpflags |= DUMPFLAG_KEEPCASE;
8460 		byts = slang->sl_kbyts;
8461 		idxs = slang->sl_kidxs;
8462 	    }
8463 	    if (byts == NULL)
8464 		continue;		/* array is empty */
8465 
8466 	    depth = 0;
8467 	    arridx[0] = 0;
8468 	    curi[0] = 1;
8469 	    while (depth >= 0 && !got_int
8470 				  && (pat == NULL || !ins_compl_interrupted()))
8471 	    {
8472 		if (curi[depth] > byts[arridx[depth]])
8473 		{
8474 		    /* Done all bytes at this node, go up one level. */
8475 		    --depth;
8476 		    line_breakcheck();
8477 		    ins_compl_check_keys(50, FALSE);
8478 		}
8479 		else
8480 		{
8481 		    /* Do one more byte at this node. */
8482 		    n = arridx[depth] + curi[depth];
8483 		    ++curi[depth];
8484 		    c = byts[n];
8485 		    if (c == 0)
8486 		    {
8487 			/* End of word, deal with the word.
8488 			 * Don't use keep-case words in the fold-case tree,
8489 			 * they will appear in the keep-case tree.
8490 			 * Only use the word when the region matches. */
8491 			flags = (int)idxs[n];
8492 			if ((round == 2 || (flags & WF_KEEPCAP) == 0)
8493 				&& (flags & WF_NEEDCOMP) == 0
8494 				&& (do_region
8495 				    || (flags & WF_REGION) == 0
8496 				    || (((unsigned)flags >> 16)
8497 						       & lp->lp_region) != 0))
8498 			{
8499 			    word[depth] = NUL;
8500 			    if (!do_region)
8501 				flags &= ~WF_REGION;
8502 
8503 			    /* Dump the basic word if there is no prefix or
8504 			     * when it's the first one. */
8505 			    c = (unsigned)flags >> 24;
8506 			    if (c == 0 || curi[depth] == 2)
8507 			    {
8508 				dump_word(slang, word, pat, dir,
8509 						      dumpflags, flags, lnum);
8510 				if (pat == NULL)
8511 				    ++lnum;
8512 			    }
8513 
8514 			    /* Apply the prefix, if there is one. */
8515 			    if (c != 0)
8516 				lnum = dump_prefixes(slang, word, pat, dir,
8517 						      dumpflags, flags, lnum);
8518 			}
8519 		    }
8520 		    else
8521 		    {
8522 			/* Normal char, go one level deeper. */
8523 			word[depth++] = c;
8524 			arridx[depth] = idxs[n];
8525 			curi[depth] = 1;
8526 
8527 			/* Check if this characters matches with the pattern.
8528 			 * If not skip the whole tree below it.
8529 			 * Always ignore case here, dump_word() will check
8530 			 * proper case later.  This isn't exactly right when
8531 			 * length changes for multi-byte characters with
8532 			 * ignore case... */
8533 			if (depth <= patlen
8534 					&& MB_STRNICMP(word, pat, depth) != 0)
8535 			    --depth;
8536 		    }
8537 		}
8538 	    }
8539 	}
8540     }
8541 }
8542 
8543 /*
8544  * Dump one word: apply case modifications and append a line to the buffer.
8545  * When "lnum" is zero add insert mode completion.
8546  */
8547     static void
8548 dump_word(
8549     slang_T	*slang,
8550     char_u	*word,
8551     char_u	*pat,
8552     int		*dir,
8553     int		dumpflags,
8554     int		wordflags,
8555     linenr_T	lnum)
8556 {
8557     int		keepcap = FALSE;
8558     char_u	*p;
8559     char_u	*tw;
8560     char_u	cword[MAXWLEN];
8561     char_u	badword[MAXWLEN + 10];
8562     int		i;
8563     int		flags = wordflags;
8564 
8565     if (dumpflags & DUMPFLAG_ONECAP)
8566 	flags |= WF_ONECAP;
8567     if (dumpflags & DUMPFLAG_ALLCAP)
8568 	flags |= WF_ALLCAP;
8569 
8570     if ((dumpflags & DUMPFLAG_KEEPCASE) == 0 && (flags & WF_CAPMASK) != 0)
8571     {
8572 	/* Need to fix case according to "flags". */
8573 	make_case_word(word, cword, flags);
8574 	p = cword;
8575     }
8576     else
8577     {
8578 	p = word;
8579 	if ((dumpflags & DUMPFLAG_KEEPCASE)
8580 		&& ((captype(word, NULL) & WF_KEEPCAP) == 0
8581 						 || (flags & WF_FIXCAP) != 0))
8582 	    keepcap = TRUE;
8583     }
8584     tw = p;
8585 
8586     if (pat == NULL)
8587     {
8588 	/* Add flags and regions after a slash. */
8589 	if ((flags & (WF_BANNED | WF_RARE | WF_REGION)) || keepcap)
8590 	{
8591 	    STRCPY(badword, p);
8592 	    STRCAT(badword, "/");
8593 	    if (keepcap)
8594 		STRCAT(badword, "=");
8595 	    if (flags & WF_BANNED)
8596 		STRCAT(badword, "!");
8597 	    else if (flags & WF_RARE)
8598 		STRCAT(badword, "?");
8599 	    if (flags & WF_REGION)
8600 		for (i = 0; i < 7; ++i)
8601 		    if (flags & (0x10000 << i))
8602 			sprintf((char *)badword + STRLEN(badword), "%d", i + 1);
8603 	    p = badword;
8604 	}
8605 
8606 	if (dumpflags & DUMPFLAG_COUNT)
8607 	{
8608 	    hashitem_T  *hi;
8609 
8610 	    /* Include the word count for ":spelldump!". */
8611 	    hi = hash_find(&slang->sl_wordcount, tw);
8612 	    if (!HASHITEM_EMPTY(hi))
8613 	    {
8614 		vim_snprintf((char *)IObuff, IOSIZE, "%s\t%d",
8615 						     tw, HI2WC(hi)->wc_count);
8616 		p = IObuff;
8617 	    }
8618 	}
8619 
8620 	ml_append(lnum, p, (colnr_T)0, FALSE);
8621     }
8622     else if (((dumpflags & DUMPFLAG_ICASE)
8623 		    ? MB_STRNICMP(p, pat, STRLEN(pat)) == 0
8624 		    : STRNCMP(p, pat, STRLEN(pat)) == 0)
8625 		&& ins_compl_add_infercase(p, (int)STRLEN(p),
8626 					  p_ic, NULL, *dir, FALSE) == OK)
8627 	/* if dir was BACKWARD then honor it just once */
8628 	*dir = FORWARD;
8629 }
8630 
8631 /*
8632  * For ":spelldump": Find matching prefixes for "word".  Prepend each to
8633  * "word" and append a line to the buffer.
8634  * When "lnum" is zero add insert mode completion.
8635  * Return the updated line number.
8636  */
8637     static linenr_T
8638 dump_prefixes(
8639     slang_T	*slang,
8640     char_u	*word,	    /* case-folded word */
8641     char_u	*pat,
8642     int		*dir,
8643     int		dumpflags,
8644     int		flags,	    /* flags with prefix ID */
8645     linenr_T	startlnum)
8646 {
8647     idx_T	arridx[MAXWLEN];
8648     int		curi[MAXWLEN];
8649     char_u	prefix[MAXWLEN];
8650     char_u	word_up[MAXWLEN];
8651     int		has_word_up = FALSE;
8652     int		c;
8653     char_u	*byts;
8654     idx_T	*idxs;
8655     linenr_T	lnum = startlnum;
8656     int		depth;
8657     int		n;
8658     int		len;
8659     int		i;
8660 
8661     /* If the word starts with a lower-case letter make the word with an
8662      * upper-case letter in word_up[]. */
8663     c = PTR2CHAR(word);
8664     if (SPELL_TOUPPER(c) != c)
8665     {
8666 	onecap_copy(word, word_up, TRUE);
8667 	has_word_up = TRUE;
8668     }
8669 
8670     byts = slang->sl_pbyts;
8671     idxs = slang->sl_pidxs;
8672     if (byts != NULL)		/* array not is empty */
8673     {
8674 	/*
8675 	 * Loop over all prefixes, building them byte-by-byte in prefix[].
8676 	 * When at the end of a prefix check that it supports "flags".
8677 	 */
8678 	depth = 0;
8679 	arridx[0] = 0;
8680 	curi[0] = 1;
8681 	while (depth >= 0 && !got_int)
8682 	{
8683 	    n = arridx[depth];
8684 	    len = byts[n];
8685 	    if (curi[depth] > len)
8686 	    {
8687 		/* Done all bytes at this node, go up one level. */
8688 		--depth;
8689 		line_breakcheck();
8690 	    }
8691 	    else
8692 	    {
8693 		/* Do one more byte at this node. */
8694 		n += curi[depth];
8695 		++curi[depth];
8696 		c = byts[n];
8697 		if (c == 0)
8698 		{
8699 		    /* End of prefix, find out how many IDs there are. */
8700 		    for (i = 1; i < len; ++i)
8701 			if (byts[n + i] != 0)
8702 			    break;
8703 		    curi[depth] += i - 1;
8704 
8705 		    c = valid_word_prefix(i, n, flags, word, slang, FALSE);
8706 		    if (c != 0)
8707 		    {
8708 			vim_strncpy(prefix + depth, word, MAXWLEN - depth - 1);
8709 			dump_word(slang, prefix, pat, dir, dumpflags,
8710 				(c & WF_RAREPFX) ? (flags | WF_RARE)
8711 							       : flags, lnum);
8712 			if (lnum != 0)
8713 			    ++lnum;
8714 		    }
8715 
8716 		    /* Check for prefix that matches the word when the
8717 		     * first letter is upper-case, but only if the prefix has
8718 		     * a condition. */
8719 		    if (has_word_up)
8720 		    {
8721 			c = valid_word_prefix(i, n, flags, word_up, slang,
8722 									TRUE);
8723 			if (c != 0)
8724 			{
8725 			    vim_strncpy(prefix + depth, word_up,
8726 							 MAXWLEN - depth - 1);
8727 			    dump_word(slang, prefix, pat, dir, dumpflags,
8728 				    (c & WF_RAREPFX) ? (flags | WF_RARE)
8729 							       : flags, lnum);
8730 			    if (lnum != 0)
8731 				++lnum;
8732 			}
8733 		    }
8734 		}
8735 		else
8736 		{
8737 		    /* Normal char, go one level deeper. */
8738 		    prefix[depth++] = c;
8739 		    arridx[depth] = idxs[n];
8740 		    curi[depth] = 1;
8741 		}
8742 	    }
8743 	}
8744     }
8745 
8746     return lnum;
8747 }
8748 
8749 /*
8750  * Move "p" to the end of word "start".
8751  * Uses the spell-checking word characters.
8752  */
8753     char_u *
8754 spell_to_word_end(char_u *start, win_T *win)
8755 {
8756     char_u  *p = start;
8757 
8758     while (*p != NUL && spell_iswordp(p, win))
8759 	MB_PTR_ADV(p);
8760     return p;
8761 }
8762 
8763 #if defined(FEAT_INS_EXPAND) || defined(PROTO)
8764 /*
8765  * For Insert mode completion CTRL-X s:
8766  * Find start of the word in front of column "startcol".
8767  * We don't check if it is badly spelled, with completion we can only change
8768  * the word in front of the cursor.
8769  * Returns the column number of the word.
8770  */
8771     int
8772 spell_word_start(int startcol)
8773 {
8774     char_u	*line;
8775     char_u	*p;
8776     int		col = 0;
8777 
8778     if (no_spell_checking(curwin))
8779 	return startcol;
8780 
8781     /* Find a word character before "startcol". */
8782     line = ml_get_curline();
8783     for (p = line + startcol; p > line; )
8784     {
8785 	MB_PTR_BACK(line, p);
8786 	if (spell_iswordp_nmw(p, curwin))
8787 	    break;
8788     }
8789 
8790     /* Go back to start of the word. */
8791     while (p > line)
8792     {
8793 	col = (int)(p - line);
8794 	MB_PTR_BACK(line, p);
8795 	if (!spell_iswordp(p, curwin))
8796 	    break;
8797 	col = 0;
8798     }
8799 
8800     return col;
8801 }
8802 
8803 /*
8804  * Need to check for 'spellcapcheck' now, the word is removed before
8805  * expand_spelling() is called.  Therefore the ugly global variable.
8806  */
8807 static int spell_expand_need_cap;
8808 
8809     void
8810 spell_expand_check_cap(colnr_T col)
8811 {
8812     spell_expand_need_cap = check_need_cap(curwin->w_cursor.lnum, col);
8813 }
8814 
8815 /*
8816  * Get list of spelling suggestions.
8817  * Used for Insert mode completion CTRL-X ?.
8818  * Returns the number of matches.  The matches are in "matchp[]", array of
8819  * allocated strings.
8820  */
8821     int
8822 expand_spelling(
8823     linenr_T	lnum UNUSED,
8824     char_u	*pat,
8825     char_u	***matchp)
8826 {
8827     garray_T	ga;
8828 
8829     spell_suggest_list(&ga, pat, 100, spell_expand_need_cap, TRUE);
8830     *matchp = ga.ga_data;
8831     return ga.ga_len;
8832 }
8833 #endif
8834 
8835 #endif  /* FEAT_SPELL */
8836