xref: /vim-8.2.3635/src/spellsuggest.c (revision 15d9890e)
1 /* vi:set ts=8 sts=4 sw=4 noet:
2  *
3  * VIM - Vi IMproved	by Bram Moolenaar
4  *
5  * Do ":help uganda"  in Vim to read copying and usage conditions.
6  * Do ":help credits" in Vim to see a list of people who contributed.
7  * See README.txt for an overview of the Vim source code.
8  */
9 
10 /*
11  * spellsuggest.c: functions for spelling suggestions
12  */
13 
14 #include "vim.h"
15 
16 #if defined(FEAT_SPELL) || defined(PROTO)
17 
18 /*
19  * Use this to adjust the score after finding suggestions, based on the
20  * suggested word sounding like the bad word.  This is much faster than doing
21  * it for every possible suggestion.
22  * Disadvantage: When "the" is typed as "hte" it sounds quite different ("@"
23  * vs "ht") and goes down in the list.
24  * Used when 'spellsuggest' is set to "best".
25  */
26 #define RESCORE(word_score, sound_score) ((3 * word_score + sound_score) / 4)
27 
28 /*
29  * Do the opposite: based on a maximum end score and a known sound score,
30  * compute the maximum word score that can be used.
31  */
32 #define MAXSCORE(word_score, sound_score) ((4 * word_score - sound_score) / 3)
33 
34 // only used for su_badflags
35 #define WF_MIXCAP   0x20	// mix of upper and lower case: macaRONI
36 
37 /*
38  * Information used when looking for suggestions.
39  */
40 typedef struct suginfo_S
41 {
42     garray_T	su_ga;		    // suggestions, contains "suggest_T"
43     int		su_maxcount;	    // max. number of suggestions displayed
44     int		su_maxscore;	    // maximum score for adding to su_ga
45     int		su_sfmaxscore;	    // idem, for when doing soundfold words
46     garray_T	su_sga;		    // like su_ga, sound-folded scoring
47     char_u	*su_badptr;	    // start of bad word in line
48     int		su_badlen;	    // length of detected bad word in line
49     int		su_badflags;	    // caps flags for bad word
50     char_u	su_badword[MAXWLEN]; // bad word truncated at su_badlen
51     char_u	su_fbadword[MAXWLEN]; // su_badword case-folded
52     char_u	su_sal_badword[MAXWLEN]; // su_badword soundfolded
53     hashtab_T	su_banned;	    // table with banned words
54     slang_T	*su_sallang;	    // default language for sound folding
55 } suginfo_T;
56 
57 // One word suggestion.  Used in "si_ga".
58 typedef struct suggest_S
59 {
60     char_u	*st_word;	// suggested word, allocated string
61     int		st_wordlen;	// STRLEN(st_word)
62     int		st_orglen;	// length of replaced text
63     int		st_score;	// lower is better
64     int		st_altscore;	// used when st_score compares equal
65     int		st_salscore;	// st_score is for soundalike
66     int		st_had_bonus;	// bonus already included in score
67     slang_T	*st_slang;	// language used for sound folding
68 } suggest_T;
69 
70 #define SUG(ga, i) (((suggest_T *)(ga).ga_data)[i])
71 
72 // TRUE if a word appears in the list of banned words.
73 #define WAS_BANNED(su, word) (!HASHITEM_EMPTY(hash_find(&su->su_banned, word)))
74 
75 // Number of suggestions kept when cleaning up.  We need to keep more than
76 // what is displayed, because when rescore_suggestions() is called the score
77 // may change and wrong suggestions may be removed later.
78 #define SUG_CLEAN_COUNT(su)    ((su)->su_maxcount < 130 ? 150 : (su)->su_maxcount + 20)
79 
80 // Threshold for sorting and cleaning up suggestions.  Don't want to keep lots
81 // of suggestions that are not going to be displayed.
82 #define SUG_MAX_COUNT(su)	(SUG_CLEAN_COUNT(su) + 50)
83 
84 // score for various changes
85 #define SCORE_SPLIT	149	// split bad word
86 #define SCORE_SPLIT_NO	249	// split bad word with NOSPLITSUGS
87 #define SCORE_ICASE	52	// slightly different case
88 #define SCORE_REGION	200	// word is for different region
89 #define SCORE_RARE	180	// rare word
90 #define SCORE_SWAP	75	// swap two characters
91 #define SCORE_SWAP3	110	// swap two characters in three
92 #define SCORE_REP	65	// REP replacement
93 #define SCORE_SUBST	93	// substitute a character
94 #define SCORE_SIMILAR	33	// substitute a similar character
95 #define SCORE_SUBCOMP	33	// substitute a composing character
96 #define SCORE_DEL	94	// delete a character
97 #define SCORE_DELDUP	66	// delete a duplicated character
98 #define SCORE_DELCOMP	28	// delete a composing character
99 #define SCORE_INS	96	// insert a character
100 #define SCORE_INSDUP	67	// insert a duplicate character
101 #define SCORE_INSCOMP	30	// insert a composing character
102 #define SCORE_NONWORD	103	// change non-word to word char
103 
104 #define SCORE_FILE	30	// suggestion from a file
105 #define SCORE_MAXINIT	350	// Initial maximum score: higher == slower.
106 				// 350 allows for about three changes.
107 
108 #define SCORE_COMMON1	30	// subtracted for words seen before
109 #define SCORE_COMMON2	40	// subtracted for words often seen
110 #define SCORE_COMMON3	50	// subtracted for words very often seen
111 #define SCORE_THRES2	10	// word count threshold for COMMON2
112 #define SCORE_THRES3	100	// word count threshold for COMMON3
113 
114 // When trying changed soundfold words it becomes slow when trying more than
115 // two changes.  With less then two changes it's slightly faster but we miss a
116 // few good suggestions.  In rare cases we need to try three of four changes.
117 #define SCORE_SFMAX1	200	// maximum score for first try
118 #define SCORE_SFMAX2	300	// maximum score for second try
119 #define SCORE_SFMAX3	400	// maximum score for third try
120 
121 #define SCORE_BIG	SCORE_INS * 3	// big difference
122 #define SCORE_MAXMAX	999999		// accept any score
123 #define SCORE_LIMITMAX	350		// for spell_edit_score_limit()
124 
125 // for spell_edit_score_limit() we need to know the minimum value of
126 // SCORE_ICASE, SCORE_SWAP, SCORE_DEL, SCORE_SIMILAR and SCORE_INS
127 #define SCORE_EDIT_MIN	SCORE_SIMILAR
128 
129 /*
130  * For finding suggestions: At each node in the tree these states are tried:
131  */
132 typedef enum
133 {
134     STATE_START = 0,	// At start of node check for NUL bytes (goodword
135 			// ends); if badword ends there is a match, otherwise
136 			// try splitting word.
137     STATE_NOPREFIX,	// try without prefix
138     STATE_SPLITUNDO,	// Undo splitting.
139     STATE_ENDNUL,	// Past NUL bytes at start of the node.
140     STATE_PLAIN,	// Use each byte of the node.
141     STATE_DEL,		// Delete a byte from the bad word.
142     STATE_INS_PREP,	// Prepare for inserting bytes.
143     STATE_INS,		// Insert a byte in the bad word.
144     STATE_SWAP,		// Swap two bytes.
145     STATE_UNSWAP,	// Undo swap two characters.
146     STATE_SWAP3,	// Swap two characters over three.
147     STATE_UNSWAP3,	// Undo Swap two characters over three.
148     STATE_UNROT3L,	// Undo rotate three characters left
149     STATE_UNROT3R,	// Undo rotate three characters right
150     STATE_REP_INI,	// Prepare for using REP items.
151     STATE_REP,		// Use matching REP items from the .aff file.
152     STATE_REP_UNDO,	// Undo a REP item replacement.
153     STATE_FINAL		// End of this node.
154 } state_T;
155 
156 /*
157  * Struct to keep the state at each level in suggest_try_change().
158  */
159 typedef struct trystate_S
160 {
161     state_T	ts_state;	// state at this level, STATE_
162     int		ts_score;	// score
163     idx_T	ts_arridx;	// index in tree array, start of node
164     short	ts_curi;	// index in list of child nodes
165     char_u	ts_fidx;	// index in fword[], case-folded bad word
166     char_u	ts_fidxtry;	// ts_fidx at which bytes may be changed
167     char_u	ts_twordlen;	// valid length of tword[]
168     char_u	ts_prefixdepth;	// stack depth for end of prefix or
169 				// PFD_PREFIXTREE or PFD_NOPREFIX
170     char_u	ts_flags;	// TSF_ flags
171     char_u	ts_tcharlen;	// number of bytes in tword character
172     char_u	ts_tcharidx;	// current byte index in tword character
173     char_u	ts_isdiff;	// DIFF_ values
174     char_u	ts_fcharstart;	// index in fword where badword char started
175     char_u	ts_prewordlen;	// length of word in "preword[]"
176     char_u	ts_splitoff;	// index in "tword" after last split
177     char_u	ts_splitfidx;	// "ts_fidx" at word split
178     char_u	ts_complen;	// nr of compound words used
179     char_u	ts_compsplit;	// index for "compflags" where word was spit
180     char_u	ts_save_badflags;   // su_badflags saved here
181     char_u	ts_delidx;	// index in fword for char that was deleted,
182 				// valid when "ts_flags" has TSF_DIDDEL
183 } trystate_T;
184 
185 // values for ts_isdiff
186 #define DIFF_NONE	0	// no different byte (yet)
187 #define DIFF_YES	1	// different byte found
188 #define DIFF_INSERT	2	// inserting character
189 
190 // values for ts_flags
191 #define TSF_PREFIXOK	1	// already checked that prefix is OK
192 #define TSF_DIDSPLIT	2	// tried split at this point
193 #define TSF_DIDDEL	4	// did a delete, "ts_delidx" has index
194 
195 // special values ts_prefixdepth
196 #define PFD_NOPREFIX	0xff	// not using prefixes
197 #define PFD_PREFIXTREE	0xfe	// walking through the prefix tree
198 #define PFD_NOTSPECIAL	0xfd	// highest value that's not special
199 
200 static void spell_find_suggest(char_u *badptr, int badlen, suginfo_T *su, int maxcount, int banbadword, int need_cap, int interactive);
201 #ifdef FEAT_EVAL
202 static void spell_suggest_expr(suginfo_T *su, char_u *expr);
203 #endif
204 static void spell_suggest_file(suginfo_T *su, char_u *fname);
205 static void spell_suggest_intern(suginfo_T *su, int interactive);
206 static void spell_find_cleanup(suginfo_T *su);
207 static void suggest_try_special(suginfo_T *su);
208 static void suggest_try_change(suginfo_T *su);
209 static void suggest_trie_walk(suginfo_T *su, langp_T *lp, char_u *fword, int soundfold);
210 static void go_deeper(trystate_T *stack, int depth, int score_add);
211 static void find_keepcap_word(slang_T *slang, char_u *fword, char_u *kword);
212 static void score_comp_sal(suginfo_T *su);
213 static void score_combine(suginfo_T *su);
214 static int stp_sal_score(suggest_T *stp, suginfo_T *su, slang_T *slang, char_u *badsound);
215 static void suggest_try_soundalike_prep(void);
216 static void suggest_try_soundalike(suginfo_T *su);
217 static void suggest_try_soundalike_finish(void);
218 static void add_sound_suggest(suginfo_T *su, char_u *goodword, int score, langp_T *lp);
219 static int soundfold_find(slang_T *slang, char_u *word);
220 static int similar_chars(slang_T *slang, int c1, int c2);
221 static void add_suggestion(suginfo_T *su, garray_T *gap, char_u *goodword, int badlen, int score, int altscore, int had_bonus, slang_T *slang, int maxsf);
222 static void check_suggestions(suginfo_T *su, garray_T *gap);
223 static void add_banned(suginfo_T *su, char_u *word);
224 static void rescore_suggestions(suginfo_T *su);
225 static void rescore_one(suginfo_T *su, suggest_T *stp);
226 static int cleanup_suggestions(garray_T *gap, int maxscore, int keep);
227 static int soundalike_score(char_u *goodsound, char_u *badsound);
228 static int spell_edit_score(slang_T *slang, char_u *badword, char_u *goodword);
229 static int spell_edit_score_limit(slang_T *slang, char_u *badword, char_u *goodword, int limit);
230 static int spell_edit_score_limit_w(slang_T *slang, char_u *badword, char_u *goodword, int limit);
231 
232 /*
233  * Return TRUE when the sequence of flags in "compflags" plus "flag" can
234  * possibly form a valid compounded word.  This also checks the COMPOUNDRULE
235  * lines if they don't contain wildcards.
236  */
237     static int
can_be_compound(trystate_T * sp,slang_T * slang,char_u * compflags,int flag)238 can_be_compound(
239     trystate_T	*sp,
240     slang_T	*slang,
241     char_u	*compflags,
242     int		flag)
243 {
244     // If the flag doesn't appear in sl_compstartflags or sl_compallflags
245     // then it can't possibly compound.
246     if (!byte_in_str(sp->ts_complen == sp->ts_compsplit
247 		? slang->sl_compstartflags : slang->sl_compallflags, flag))
248 	return FALSE;
249 
250     // If there are no wildcards, we can check if the flags collected so far
251     // possibly can form a match with COMPOUNDRULE patterns.  This only
252     // makes sense when we have two or more words.
253     if (slang->sl_comprules != NULL && sp->ts_complen > sp->ts_compsplit)
254     {
255 	int v;
256 
257 	compflags[sp->ts_complen] = flag;
258 	compflags[sp->ts_complen + 1] = NUL;
259 	v = match_compoundrule(slang, compflags + sp->ts_compsplit);
260 	compflags[sp->ts_complen] = NUL;
261 	return v;
262     }
263 
264     return TRUE;
265 }
266 
267 /*
268  * Adjust the score of common words.
269  */
270     static int
score_wordcount_adj(slang_T * slang,int score,char_u * word,int split)271 score_wordcount_adj(
272     slang_T	*slang,
273     int		score,
274     char_u	*word,
275     int		split)	    // word was split, less bonus
276 {
277     hashitem_T	*hi;
278     wordcount_T	*wc;
279     int		bonus;
280     int		newscore;
281 
282     hi = hash_find(&slang->sl_wordcount, word);
283     if (!HASHITEM_EMPTY(hi))
284     {
285 	wc = HI2WC(hi);
286 	if (wc->wc_count < SCORE_THRES2)
287 	    bonus = SCORE_COMMON1;
288 	else if (wc->wc_count < SCORE_THRES3)
289 	    bonus = SCORE_COMMON2;
290 	else
291 	    bonus = SCORE_COMMON3;
292 	if (split)
293 	    newscore = score - bonus / 2;
294 	else
295 	    newscore = score - bonus;
296 	if (newscore < 0)
297 	    return 0;
298 	return newscore;
299     }
300     return score;
301 }
302 
303 /*
304  * Like captype() but for a KEEPCAP word add ONECAP if the word starts with a
305  * capital.  So that make_case_word() can turn WOrd into Word.
306  * Add ALLCAP for "WOrD".
307  */
308     static int
badword_captype(char_u * word,char_u * end)309 badword_captype(char_u *word, char_u *end)
310 {
311     int		flags = captype(word, end);
312     int		c;
313     int		l, u;
314     int		first;
315     char_u	*p;
316 
317     if (flags & WF_KEEPCAP)
318     {
319 	// Count the number of UPPER and lower case letters.
320 	l = u = 0;
321 	first = FALSE;
322 	for (p = word; p < end; MB_PTR_ADV(p))
323 	{
324 	    c = PTR2CHAR(p);
325 	    if (SPELL_ISUPPER(c))
326 	    {
327 		++u;
328 		if (p == word)
329 		    first = TRUE;
330 	    }
331 	    else
332 		++l;
333 	}
334 
335 	// If there are more UPPER than lower case letters suggest an
336 	// ALLCAP word.  Otherwise, if the first letter is UPPER then
337 	// suggest ONECAP.  Exception: "ALl" most likely should be "All",
338 	// require three upper case letters.
339 	if (u > l && u > 2)
340 	    flags |= WF_ALLCAP;
341 	else if (first)
342 	    flags |= WF_ONECAP;
343 
344 	if (u >= 2 && l >= 2)	// maCARONI maCAroni
345 	    flags |= WF_MIXCAP;
346     }
347     return flags;
348 }
349 
350 /*
351  * Opposite of offset2bytes().
352  * "pp" points to the bytes and is advanced over it.
353  * Returns the offset.
354  */
355     static int
bytes2offset(char_u ** pp)356 bytes2offset(char_u **pp)
357 {
358     char_u	*p = *pp;
359     int		nr;
360     int		c;
361 
362     c = *p++;
363     if ((c & 0x80) == 0x00)		// 1 byte
364     {
365 	nr = c - 1;
366     }
367     else if ((c & 0xc0) == 0x80)	// 2 bytes
368     {
369 	nr = (c & 0x3f) - 1;
370 	nr = nr * 255 + (*p++ - 1);
371     }
372     else if ((c & 0xe0) == 0xc0)	// 3 bytes
373     {
374 	nr = (c & 0x1f) - 1;
375 	nr = nr * 255 + (*p++ - 1);
376 	nr = nr * 255 + (*p++ - 1);
377     }
378     else				// 4 bytes
379     {
380 	nr = (c & 0x0f) - 1;
381 	nr = nr * 255 + (*p++ - 1);
382 	nr = nr * 255 + (*p++ - 1);
383 	nr = nr * 255 + (*p++ - 1);
384     }
385 
386     *pp = p;
387     return nr;
388 }
389 
390 // values for sps_flags
391 #define SPS_BEST    1
392 #define SPS_FAST    2
393 #define SPS_DOUBLE  4
394 
395 static int sps_flags = SPS_BEST;	// flags from 'spellsuggest'
396 static int sps_limit = 9999;		// max nr of suggestions given
397 
398 /*
399  * Check the 'spellsuggest' option.  Return FAIL if it's wrong.
400  * Sets "sps_flags" and "sps_limit".
401  */
402     int
spell_check_sps(void)403 spell_check_sps(void)
404 {
405     char_u	*p;
406     char_u	*s;
407     char_u	buf[MAXPATHL];
408     int		f;
409 
410     sps_flags = 0;
411     sps_limit = 9999;
412 
413     for (p = p_sps; *p != NUL; )
414     {
415 	copy_option_part(&p, buf, MAXPATHL, ",");
416 
417 	f = 0;
418 	if (VIM_ISDIGIT(*buf))
419 	{
420 	    s = buf;
421 	    sps_limit = getdigits(&s);
422 	    if (*s != NUL && !VIM_ISDIGIT(*s))
423 		f = -1;
424 	}
425 	else if (STRCMP(buf, "best") == 0)
426 	    f = SPS_BEST;
427 	else if (STRCMP(buf, "fast") == 0)
428 	    f = SPS_FAST;
429 	else if (STRCMP(buf, "double") == 0)
430 	    f = SPS_DOUBLE;
431 	else if (STRNCMP(buf, "expr:", 5) != 0
432 		&& STRNCMP(buf, "file:", 5) != 0)
433 	    f = -1;
434 
435 	if (f == -1 || (sps_flags != 0 && f != 0))
436 	{
437 	    sps_flags = SPS_BEST;
438 	    sps_limit = 9999;
439 	    return FAIL;
440 	}
441 	if (f != 0)
442 	    sps_flags = f;
443     }
444 
445     if (sps_flags == 0)
446 	sps_flags = SPS_BEST;
447 
448     return OK;
449 }
450 
451 /*
452  * "z=": Find badly spelled word under or after the cursor.
453  * Give suggestions for the properly spelled word.
454  * In Visual mode use the highlighted word as the bad word.
455  * When "count" is non-zero use that suggestion.
456  */
457     void
spell_suggest(int count)458 spell_suggest(int count)
459 {
460     char_u	*line;
461     pos_T	prev_cursor = curwin->w_cursor;
462     char_u	wcopy[MAXWLEN + 2];
463     char_u	*p;
464     int		i;
465     int		c;
466     suginfo_T	sug;
467     suggest_T	*stp;
468     int		mouse_used;
469     int		need_cap;
470     int		limit;
471     int		selected = count;
472     int		badlen = 0;
473     int		msg_scroll_save = msg_scroll;
474     int		wo_spell_save = curwin->w_p_spell;
475 
476     if (!curwin->w_p_spell)
477     {
478 	did_set_spelllang(curwin);
479 	curwin->w_p_spell = TRUE;
480     }
481 
482     if (*curwin->w_s->b_p_spl == NUL)
483     {
484 	emsg(_(e_no_spell));
485 	return;
486     }
487 
488     if (VIsual_active)
489     {
490 	// Use the Visually selected text as the bad word.  But reject
491 	// a multi-line selection.
492 	if (curwin->w_cursor.lnum != VIsual.lnum)
493 	{
494 	    vim_beep(BO_SPELL);
495 	    return;
496 	}
497 	badlen = (int)curwin->w_cursor.col - (int)VIsual.col;
498 	if (badlen < 0)
499 	    badlen = -badlen;
500 	else
501 	    curwin->w_cursor.col = VIsual.col;
502 	++badlen;
503 	end_visual_mode();
504     }
505     // Find the start of the badly spelled word.
506     else if (spell_move_to(curwin, FORWARD, TRUE, TRUE, NULL) == 0
507 	    || curwin->w_cursor.col > prev_cursor.col)
508     {
509 	// No bad word or it starts after the cursor: use the word under the
510 	// cursor.
511 	curwin->w_cursor = prev_cursor;
512 	line = ml_get_curline();
513 	p = line + curwin->w_cursor.col;
514 	// Backup to before start of word.
515 	while (p > line && spell_iswordp_nmw(p, curwin))
516 	    MB_PTR_BACK(line, p);
517 	// Forward to start of word.
518 	while (*p != NUL && !spell_iswordp_nmw(p, curwin))
519 	    MB_PTR_ADV(p);
520 
521 	if (!spell_iswordp_nmw(p, curwin))		// No word found.
522 	{
523 	    beep_flush();
524 	    return;
525 	}
526 	curwin->w_cursor.col = (colnr_T)(p - line);
527     }
528 
529     // Get the word and its length.
530 
531     // Figure out if the word should be capitalised.
532     need_cap = check_need_cap(curwin->w_cursor.lnum, curwin->w_cursor.col);
533 
534     // Make a copy of current line since autocommands may free the line.
535     line = vim_strsave(ml_get_curline());
536     if (line == NULL)
537 	goto skip;
538 
539     // Get the list of suggestions.  Limit to 'lines' - 2 or the number in
540     // 'spellsuggest', whatever is smaller.
541     if (sps_limit > (int)Rows - 2)
542 	limit = (int)Rows - 2;
543     else
544 	limit = sps_limit;
545     spell_find_suggest(line + curwin->w_cursor.col, badlen, &sug, limit,
546 							TRUE, need_cap, TRUE);
547 
548     if (sug.su_ga.ga_len == 0)
549 	msg(_("Sorry, no suggestions"));
550     else if (count > 0)
551     {
552 	if (count > sug.su_ga.ga_len)
553 	    smsg(_("Sorry, only %ld suggestions"), (long)sug.su_ga.ga_len);
554     }
555     else
556     {
557 #ifdef FEAT_RIGHTLEFT
558 	// When 'rightleft' is set the list is drawn right-left.
559 	cmdmsg_rl = curwin->w_p_rl;
560 	if (cmdmsg_rl)
561 	    msg_col = Columns - 1;
562 #endif
563 
564 	// List the suggestions.
565 	msg_start();
566 	msg_row = Rows - 1;	// for when 'cmdheight' > 1
567 	lines_left = Rows;	// avoid more prompt
568 	vim_snprintf((char *)IObuff, IOSIZE, _("Change \"%.*s\" to:"),
569 						sug.su_badlen, sug.su_badptr);
570 #ifdef FEAT_RIGHTLEFT
571 	if (cmdmsg_rl && STRNCMP(IObuff, "Change", 6) == 0)
572 	{
573 	    // And now the rabbit from the high hat: Avoid showing the
574 	    // untranslated message rightleft.
575 	    vim_snprintf((char *)IObuff, IOSIZE, ":ot \"%.*s\" egnahC",
576 						sug.su_badlen, sug.su_badptr);
577 	}
578 #endif
579 	msg_puts((char *)IObuff);
580 	msg_clr_eos();
581 	msg_putchar('\n');
582 
583 	msg_scroll = TRUE;
584 	for (i = 0; i < sug.su_ga.ga_len; ++i)
585 	{
586 	    stp = &SUG(sug.su_ga, i);
587 
588 	    // The suggested word may replace only part of the bad word, add
589 	    // the not replaced part.
590 	    vim_strncpy(wcopy, stp->st_word, MAXWLEN);
591 	    if (sug.su_badlen > stp->st_orglen)
592 		vim_strncpy(wcopy + stp->st_wordlen,
593 					       sug.su_badptr + stp->st_orglen,
594 					      sug.su_badlen - stp->st_orglen);
595 	    vim_snprintf((char *)IObuff, IOSIZE, "%2d", i + 1);
596 #ifdef FEAT_RIGHTLEFT
597 	    if (cmdmsg_rl)
598 		rl_mirror(IObuff);
599 #endif
600 	    msg_puts((char *)IObuff);
601 
602 	    vim_snprintf((char *)IObuff, IOSIZE, " \"%s\"", wcopy);
603 	    msg_puts((char *)IObuff);
604 
605 	    // The word may replace more than "su_badlen".
606 	    if (sug.su_badlen < stp->st_orglen)
607 	    {
608 		vim_snprintf((char *)IObuff, IOSIZE, _(" < \"%.*s\""),
609 					       stp->st_orglen, sug.su_badptr);
610 		msg_puts((char *)IObuff);
611 	    }
612 
613 	    if (p_verbose > 0)
614 	    {
615 		// Add the score.
616 		if (sps_flags & (SPS_DOUBLE | SPS_BEST))
617 		    vim_snprintf((char *)IObuff, IOSIZE, " (%s%d - %d)",
618 			stp->st_salscore ? "s " : "",
619 			stp->st_score, stp->st_altscore);
620 		else
621 		    vim_snprintf((char *)IObuff, IOSIZE, " (%d)",
622 			    stp->st_score);
623 #ifdef FEAT_RIGHTLEFT
624 		if (cmdmsg_rl)
625 		    // Mirror the numbers, but keep the leading space.
626 		    rl_mirror(IObuff + 1);
627 #endif
628 		msg_advance(30);
629 		msg_puts((char *)IObuff);
630 	    }
631 	    msg_putchar('\n');
632 	}
633 
634 #ifdef FEAT_RIGHTLEFT
635 	cmdmsg_rl = FALSE;
636 	msg_col = 0;
637 #endif
638 	// Ask for choice.
639 	selected = prompt_for_number(&mouse_used);
640 	if (mouse_used)
641 	    selected -= lines_left;
642 	lines_left = Rows;		// avoid more prompt
643 	// don't delay for 'smd' in normal_cmd()
644 	msg_scroll = msg_scroll_save;
645     }
646 
647     if (selected > 0 && selected <= sug.su_ga.ga_len && u_save_cursor() == OK)
648     {
649 	// Save the from and to text for :spellrepall.
650 	VIM_CLEAR(repl_from);
651 	VIM_CLEAR(repl_to);
652 
653 	stp = &SUG(sug.su_ga, selected - 1);
654 	if (sug.su_badlen > stp->st_orglen)
655 	{
656 	    // Replacing less than "su_badlen", append the remainder to
657 	    // repl_to.
658 	    repl_from = vim_strnsave(sug.su_badptr, sug.su_badlen);
659 	    vim_snprintf((char *)IObuff, IOSIZE, "%s%.*s", stp->st_word,
660 		    sug.su_badlen - stp->st_orglen,
661 					      sug.su_badptr + stp->st_orglen);
662 	    repl_to = vim_strsave(IObuff);
663 	}
664 	else
665 	{
666 	    // Replacing su_badlen or more, use the whole word.
667 	    repl_from = vim_strnsave(sug.su_badptr, stp->st_orglen);
668 	    repl_to = vim_strsave(stp->st_word);
669 	}
670 
671 	// Replace the word.
672 	p = alloc(STRLEN(line) - stp->st_orglen + stp->st_wordlen + 1);
673 	if (p != NULL)
674 	{
675 	    c = (int)(sug.su_badptr - line);
676 	    mch_memmove(p, line, c);
677 	    STRCPY(p + c, stp->st_word);
678 	    STRCAT(p, sug.su_badptr + stp->st_orglen);
679 
680 	    // For redo we use a change-word command.
681 	    ResetRedobuff();
682 	    AppendToRedobuff((char_u *)"ciw");
683 	    AppendToRedobuffLit(p + c,
684 			    stp->st_wordlen + sug.su_badlen - stp->st_orglen);
685 	    AppendCharToRedobuff(ESC);
686 
687 	    // "p" may be freed here
688 	    ml_replace(curwin->w_cursor.lnum, p, FALSE);
689 	    curwin->w_cursor.col = c;
690 
691 	    changed_bytes(curwin->w_cursor.lnum, c);
692 	}
693     }
694     else
695 	curwin->w_cursor = prev_cursor;
696 
697     spell_find_cleanup(&sug);
698 skip:
699     vim_free(line);
700     curwin->w_p_spell = wo_spell_save;
701 }
702 
703 /*
704  * Find spell suggestions for "word".  Return them in the growarray "*gap" as
705  * a list of allocated strings.
706  */
707     void
spell_suggest_list(garray_T * gap,char_u * word,int maxcount,int need_cap,int interactive)708 spell_suggest_list(
709     garray_T	*gap,
710     char_u	*word,
711     int		maxcount,	// maximum nr of suggestions
712     int		need_cap,	// 'spellcapcheck' matched
713     int		interactive)
714 {
715     suginfo_T	sug;
716     int		i;
717     suggest_T	*stp;
718     char_u	*wcopy;
719 
720     spell_find_suggest(word, 0, &sug, maxcount, FALSE, need_cap, interactive);
721 
722     // Make room in "gap".
723     ga_init2(gap, sizeof(char_u *), sug.su_ga.ga_len + 1);
724     if (ga_grow(gap, sug.su_ga.ga_len) == OK)
725     {
726 	for (i = 0; i < sug.su_ga.ga_len; ++i)
727 	{
728 	    stp = &SUG(sug.su_ga, i);
729 
730 	    // The suggested word may replace only part of "word", add the not
731 	    // replaced part.
732 	    wcopy = alloc(stp->st_wordlen
733 		      + (unsigned)STRLEN(sug.su_badptr + stp->st_orglen) + 1);
734 	    if (wcopy == NULL)
735 		break;
736 	    STRCPY(wcopy, stp->st_word);
737 	    STRCPY(wcopy + stp->st_wordlen, sug.su_badptr + stp->st_orglen);
738 	    ((char_u **)gap->ga_data)[gap->ga_len++] = wcopy;
739 	}
740     }
741 
742     spell_find_cleanup(&sug);
743 }
744 
745 /*
746  * Find spell suggestions for the word at the start of "badptr".
747  * Return the suggestions in "su->su_ga".
748  * The maximum number of suggestions is "maxcount".
749  * Note: does use info for the current window.
750  * This is based on the mechanisms of Aspell, but completely reimplemented.
751  */
752     static void
spell_find_suggest(char_u * badptr,int badlen,suginfo_T * su,int maxcount,int banbadword,int need_cap,int interactive)753 spell_find_suggest(
754     char_u	*badptr,
755     int		badlen,		// length of bad word or 0 if unknown
756     suginfo_T	*su,
757     int		maxcount,
758     int		banbadword,	// don't include badword in suggestions
759     int		need_cap,	// word should start with capital
760     int		interactive)
761 {
762     hlf_T	attr = HLF_COUNT;
763     char_u	buf[MAXPATHL];
764     char_u	*p;
765     int		do_combine = FALSE;
766     char_u	*sps_copy;
767 #ifdef FEAT_EVAL
768     static int	expr_busy = FALSE;
769 #endif
770     int		c;
771     int		i;
772     langp_T	*lp;
773     int		did_intern = FALSE;
774 
775     // Set the info in "*su".
776     CLEAR_POINTER(su);
777     ga_init2(&su->su_ga, (int)sizeof(suggest_T), 10);
778     ga_init2(&su->su_sga, (int)sizeof(suggest_T), 10);
779     if (*badptr == NUL)
780 	return;
781     hash_init(&su->su_banned);
782 
783     su->su_badptr = badptr;
784     if (badlen != 0)
785 	su->su_badlen = badlen;
786     else
787 	su->su_badlen = spell_check(curwin, su->su_badptr, &attr, NULL, FALSE);
788     su->su_maxcount = maxcount;
789     su->su_maxscore = SCORE_MAXINIT;
790 
791     if (su->su_badlen >= MAXWLEN)
792 	su->su_badlen = MAXWLEN - 1;	// just in case
793     vim_strncpy(su->su_badword, su->su_badptr, su->su_badlen);
794     (void)spell_casefold(curwin, su->su_badptr, su->su_badlen,
795 						    su->su_fbadword, MAXWLEN);
796     // TODO: make this work if the case-folded text is longer than the original
797     // text. Currently an illegal byte causes wrong pointer computations.
798     su->su_fbadword[su->su_badlen] = NUL;
799 
800     // get caps flags for bad word
801     su->su_badflags = badword_captype(su->su_badptr,
802 					       su->su_badptr + su->su_badlen);
803     if (need_cap)
804 	su->su_badflags |= WF_ONECAP;
805 
806     // Find the default language for sound folding.  We simply use the first
807     // one in 'spelllang' that supports sound folding.  That's good for when
808     // using multiple files for one language, it's not that bad when mixing
809     // languages (e.g., "pl,en").
810     for (i = 0; i < curbuf->b_s.b_langp.ga_len; ++i)
811     {
812 	lp = LANGP_ENTRY(curbuf->b_s.b_langp, i);
813 	if (lp->lp_sallang != NULL)
814 	{
815 	    su->su_sallang = lp->lp_sallang;
816 	    break;
817 	}
818     }
819 
820     // Soundfold the bad word with the default sound folding, so that we don't
821     // have to do this many times.
822     if (su->su_sallang != NULL)
823 	spell_soundfold(su->su_sallang, su->su_fbadword, TRUE,
824 							  su->su_sal_badword);
825 
826     // If the word is not capitalised and spell_check() doesn't consider the
827     // word to be bad then it might need to be capitalised.  Add a suggestion
828     // for that.
829     c = PTR2CHAR(su->su_badptr);
830     if (!SPELL_ISUPPER(c) && attr == HLF_COUNT)
831     {
832 	make_case_word(su->su_badword, buf, WF_ONECAP);
833 	add_suggestion(su, &su->su_ga, buf, su->su_badlen, SCORE_ICASE,
834 					      0, TRUE, su->su_sallang, FALSE);
835     }
836 
837     // Ban the bad word itself.  It may appear in another region.
838     if (banbadword)
839 	add_banned(su, su->su_badword);
840 
841     // Make a copy of 'spellsuggest', because the expression may change it.
842     sps_copy = vim_strsave(p_sps);
843     if (sps_copy == NULL)
844 	return;
845 
846     // Loop over the items in 'spellsuggest'.
847     for (p = sps_copy; *p != NUL; )
848     {
849 	copy_option_part(&p, buf, MAXPATHL, ",");
850 
851 	if (STRNCMP(buf, "expr:", 5) == 0)
852 	{
853 #ifdef FEAT_EVAL
854 	    // Evaluate an expression.  Skip this when called recursively,
855 	    // when using spellsuggest() in the expression.
856 	    if (!expr_busy)
857 	    {
858 		expr_busy = TRUE;
859 		spell_suggest_expr(su, buf + 5);
860 		expr_busy = FALSE;
861 	    }
862 #endif
863 	}
864 	else if (STRNCMP(buf, "file:", 5) == 0)
865 	    // Use list of suggestions in a file.
866 	    spell_suggest_file(su, buf + 5);
867 	else if (!did_intern)
868 	{
869 	    // Use internal method once.
870 	    spell_suggest_intern(su, interactive);
871 	    if (sps_flags & SPS_DOUBLE)
872 		do_combine = TRUE;
873 	    did_intern = TRUE;
874 	}
875     }
876 
877     vim_free(sps_copy);
878 
879     if (do_combine)
880 	// Combine the two list of suggestions.  This must be done last,
881 	// because sorting changes the order again.
882 	score_combine(su);
883 }
884 
885 #ifdef FEAT_EVAL
886 /*
887  * Find suggestions by evaluating expression "expr".
888  */
889     static void
spell_suggest_expr(suginfo_T * su,char_u * expr)890 spell_suggest_expr(suginfo_T *su, char_u *expr)
891 {
892     list_T	*list;
893     listitem_T	*li;
894     int		score;
895     char_u	*p;
896 
897     // The work is split up in a few parts to avoid having to export
898     // suginfo_T.
899     // First evaluate the expression and get the resulting list.
900     list = eval_spell_expr(su->su_badword, expr);
901     if (list != NULL)
902     {
903 	// Loop over the items in the list.
904 	FOR_ALL_LIST_ITEMS(list, li)
905 	    if (li->li_tv.v_type == VAR_LIST)
906 	    {
907 		// Get the word and the score from the items.
908 		score = get_spellword(li->li_tv.vval.v_list, &p);
909 		if (score >= 0 && score <= su->su_maxscore)
910 		    add_suggestion(su, &su->su_ga, p, su->su_badlen,
911 				       score, 0, TRUE, su->su_sallang, FALSE);
912 	    }
913 	list_unref(list);
914     }
915 
916     // Remove bogus suggestions, sort and truncate at "maxcount".
917     check_suggestions(su, &su->su_ga);
918     (void)cleanup_suggestions(&su->su_ga, su->su_maxscore, su->su_maxcount);
919 }
920 #endif
921 
922 /*
923  * Find suggestions in file "fname".  Used for "file:" in 'spellsuggest'.
924  */
925     static void
spell_suggest_file(suginfo_T * su,char_u * fname)926 spell_suggest_file(suginfo_T *su, char_u *fname)
927 {
928     FILE	*fd;
929     char_u	line[MAXWLEN * 2];
930     char_u	*p;
931     int		len;
932     char_u	cword[MAXWLEN];
933 
934     // Open the file.
935     fd = mch_fopen((char *)fname, "r");
936     if (fd == NULL)
937     {
938 	semsg(_(e_notopen), fname);
939 	return;
940     }
941 
942     // Read it line by line.
943     while (!vim_fgets(line, MAXWLEN * 2, fd) && !got_int)
944     {
945 	line_breakcheck();
946 
947 	p = vim_strchr(line, '/');
948 	if (p == NULL)
949 	    continue;	    // No Tab found, just skip the line.
950 	*p++ = NUL;
951 	if (STRICMP(su->su_badword, line) == 0)
952 	{
953 	    // Match!  Isolate the good word, until CR or NL.
954 	    for (len = 0; p[len] >= ' '; ++len)
955 		;
956 	    p[len] = NUL;
957 
958 	    // If the suggestion doesn't have specific case duplicate the case
959 	    // of the bad word.
960 	    if (captype(p, NULL) == 0)
961 	    {
962 		make_case_word(p, cword, su->su_badflags);
963 		p = cword;
964 	    }
965 
966 	    add_suggestion(su, &su->su_ga, p, su->su_badlen,
967 				  SCORE_FILE, 0, TRUE, su->su_sallang, FALSE);
968 	}
969     }
970 
971     fclose(fd);
972 
973     // Remove bogus suggestions, sort and truncate at "maxcount".
974     check_suggestions(su, &su->su_ga);
975     (void)cleanup_suggestions(&su->su_ga, su->su_maxscore, su->su_maxcount);
976 }
977 
978 /*
979  * Find suggestions for the internal method indicated by "sps_flags".
980  */
981     static void
spell_suggest_intern(suginfo_T * su,int interactive)982 spell_suggest_intern(suginfo_T *su, int interactive)
983 {
984     // Load the .sug file(s) that are available and not done yet.
985     suggest_load_files();
986 
987     // 1. Try special cases, such as repeating a word: "the the" -> "the".
988     //
989     // Set a maximum score to limit the combination of operations that is
990     // tried.
991     suggest_try_special(su);
992 
993     // 2. Try inserting/deleting/swapping/changing a letter, use REP entries
994     //    from the .aff file and inserting a space (split the word).
995     suggest_try_change(su);
996 
997     // For the resulting top-scorers compute the sound-a-like score.
998     if (sps_flags & SPS_DOUBLE)
999 	score_comp_sal(su);
1000 
1001     // 3. Try finding sound-a-like words.
1002     if ((sps_flags & SPS_FAST) == 0)
1003     {
1004 	if (sps_flags & SPS_BEST)
1005 	    // Adjust the word score for the suggestions found so far for how
1006 	    // they sound like.
1007 	    rescore_suggestions(su);
1008 
1009 	// While going through the soundfold tree "su_maxscore" is the score
1010 	// for the soundfold word, limits the changes that are being tried,
1011 	// and "su_sfmaxscore" the rescored score, which is set by
1012 	// cleanup_suggestions().
1013 	// First find words with a small edit distance, because this is much
1014 	// faster and often already finds the top-N suggestions.  If we didn't
1015 	// find many suggestions try again with a higher edit distance.
1016 	// "sl_sounddone" is used to avoid doing the same word twice.
1017 	suggest_try_soundalike_prep();
1018 	su->su_maxscore = SCORE_SFMAX1;
1019 	su->su_sfmaxscore = SCORE_MAXINIT * 3;
1020 	suggest_try_soundalike(su);
1021 	if (su->su_ga.ga_len < SUG_CLEAN_COUNT(su))
1022 	{
1023 	    // We didn't find enough matches, try again, allowing more
1024 	    // changes to the soundfold word.
1025 	    su->su_maxscore = SCORE_SFMAX2;
1026 	    suggest_try_soundalike(su);
1027 	    if (su->su_ga.ga_len < SUG_CLEAN_COUNT(su))
1028 	    {
1029 		// Still didn't find enough matches, try again, allowing even
1030 		// more changes to the soundfold word.
1031 		su->su_maxscore = SCORE_SFMAX3;
1032 		suggest_try_soundalike(su);
1033 	    }
1034 	}
1035 	su->su_maxscore = su->su_sfmaxscore;
1036 	suggest_try_soundalike_finish();
1037     }
1038 
1039     // When CTRL-C was hit while searching do show the results.  Only clear
1040     // got_int when using a command, not for spellsuggest().
1041     ui_breakcheck();
1042     if (interactive && got_int)
1043     {
1044 	(void)vgetc();
1045 	got_int = FALSE;
1046     }
1047 
1048     if ((sps_flags & SPS_DOUBLE) == 0 && su->su_ga.ga_len != 0)
1049     {
1050 	if (sps_flags & SPS_BEST)
1051 	    // Adjust the word score for how it sounds like.
1052 	    rescore_suggestions(su);
1053 
1054 	// Remove bogus suggestions, sort and truncate at "maxcount".
1055 	check_suggestions(su, &su->su_ga);
1056 	(void)cleanup_suggestions(&su->su_ga, su->su_maxscore, su->su_maxcount);
1057     }
1058 }
1059 
1060 /*
1061  * Free the info put in "*su" by spell_find_suggest().
1062  */
1063     static void
spell_find_cleanup(suginfo_T * su)1064 spell_find_cleanup(suginfo_T *su)
1065 {
1066     int		i;
1067 
1068     // Free the suggestions.
1069     for (i = 0; i < su->su_ga.ga_len; ++i)
1070 	vim_free(SUG(su->su_ga, i).st_word);
1071     ga_clear(&su->su_ga);
1072     for (i = 0; i < su->su_sga.ga_len; ++i)
1073 	vim_free(SUG(su->su_sga, i).st_word);
1074     ga_clear(&su->su_sga);
1075 
1076     // Free the banned words.
1077     hash_clear_all(&su->su_banned, 0);
1078 }
1079 
1080 /*
1081  * Try finding suggestions by recognizing specific situations.
1082  */
1083     static void
suggest_try_special(suginfo_T * su)1084 suggest_try_special(suginfo_T *su)
1085 {
1086     char_u	*p;
1087     size_t	len;
1088     int		c;
1089     char_u	word[MAXWLEN];
1090 
1091     // Recognize a word that is repeated: "the the".
1092     p = skiptowhite(su->su_fbadword);
1093     len = p - su->su_fbadword;
1094     p = skipwhite(p);
1095     if (STRLEN(p) == len && STRNCMP(su->su_fbadword, p, len) == 0)
1096     {
1097 	// Include badflags: if the badword is onecap or allcap
1098 	// use that for the goodword too: "The the" -> "The".
1099 	c = su->su_fbadword[len];
1100 	su->su_fbadword[len] = NUL;
1101 	make_case_word(su->su_fbadword, word, su->su_badflags);
1102 	su->su_fbadword[len] = c;
1103 
1104 	// Give a soundalike score of 0, compute the score as if deleting one
1105 	// character.
1106 	add_suggestion(su, &su->su_ga, word, su->su_badlen,
1107 		       RESCORE(SCORE_REP, 0), 0, TRUE, su->su_sallang, FALSE);
1108     }
1109 }
1110 
1111 /*
1112  * Change the 0 to 1 to measure how much time is spent in each state.
1113  * Output is dumped in "suggestprof".
1114  */
1115 #if 0
1116 # define SUGGEST_PROFILE
1117 proftime_T current;
1118 proftime_T total;
1119 proftime_T times[STATE_FINAL + 1];
1120 long counts[STATE_FINAL + 1];
1121 
1122     static void
1123 prof_init(void)
1124 {
1125     for (int i = 0; i <= STATE_FINAL; ++i)
1126     {
1127 	profile_zero(&times[i]);
1128 	counts[i] = 0;
1129     }
1130     profile_start(&current);
1131     profile_start(&total);
1132 }
1133 
1134 // call before changing state
1135     static void
1136 prof_store(state_T state)
1137 {
1138     profile_end(&current);
1139     profile_add(&times[state], &current);
1140     ++counts[state];
1141     profile_start(&current);
1142 }
1143 # define PROF_STORE(state) prof_store(state);
1144 
1145     static void
1146 prof_report(char *name)
1147 {
1148     FILE *fd = fopen("suggestprof", "a");
1149 
1150     profile_end(&total);
1151     fprintf(fd, "-----------------------\n");
1152     fprintf(fd, "%s: %s\n", name, profile_msg(&total));
1153     for (int i = 0; i <= STATE_FINAL; ++i)
1154 	fprintf(fd, "%d: %s (%ld)\n", i, profile_msg(&times[i]), counts[i]);
1155     fclose(fd);
1156 }
1157 #else
1158 # define PROF_STORE(state)
1159 #endif
1160 
1161 /*
1162  * Try finding suggestions by adding/removing/swapping letters.
1163  */
1164     static void
suggest_try_change(suginfo_T * su)1165 suggest_try_change(suginfo_T *su)
1166 {
1167     char_u	fword[MAXWLEN];	    // copy of the bad word, case-folded
1168     int		n;
1169     char_u	*p;
1170     int		lpi;
1171     langp_T	*lp;
1172 
1173     // We make a copy of the case-folded bad word, so that we can modify it
1174     // to find matches (esp. REP items).  Append some more text, changing
1175     // chars after the bad word may help.
1176     STRCPY(fword, su->su_fbadword);
1177     n = (int)STRLEN(fword);
1178     p = su->su_badptr + su->su_badlen;
1179     (void)spell_casefold(curwin, p, (int)STRLEN(p), fword + n, MAXWLEN - n);
1180 
1181     // Make sure the resulting text is not longer than the original text.
1182     n = (int)STRLEN(su->su_badptr);
1183     if (n < MAXWLEN)
1184 	fword[n] = NUL;
1185 
1186     for (lpi = 0; lpi < curwin->w_s->b_langp.ga_len; ++lpi)
1187     {
1188 	lp = LANGP_ENTRY(curwin->w_s->b_langp, lpi);
1189 
1190 	// If reloading a spell file fails it's still in the list but
1191 	// everything has been cleared.
1192 	if (lp->lp_slang->sl_fbyts == NULL)
1193 	    continue;
1194 
1195 	// Try it for this language.  Will add possible suggestions.
1196 #ifdef SUGGEST_PROFILE
1197 	prof_init();
1198 #endif
1199 	suggest_trie_walk(su, lp, fword, FALSE);
1200 #ifdef SUGGEST_PROFILE
1201 	prof_report("try_change");
1202 #endif
1203     }
1204 }
1205 
1206 // Check the maximum score, if we go over it we won't try this change.
1207 #define TRY_DEEPER(su, stack, depth, add) \
1208 		(stack[depth].ts_score + (add) < su->su_maxscore)
1209 
1210 /*
1211  * Try finding suggestions by adding/removing/swapping letters.
1212  *
1213  * This uses a state machine.  At each node in the tree we try various
1214  * operations.  When trying if an operation works "depth" is increased and the
1215  * stack[] is used to store info.  This allows combinations, thus insert one
1216  * character, replace one and delete another.  The number of changes is
1217  * limited by su->su_maxscore.
1218  *
1219  * After implementing this I noticed an article by Kemal Oflazer that
1220  * describes something similar: "Error-tolerant Finite State Recognition with
1221  * Applications to Morphological Analysis and Spelling Correction" (1996).
1222  * The implementation in the article is simplified and requires a stack of
1223  * unknown depth.  The implementation here only needs a stack depth equal to
1224  * the length of the word.
1225  *
1226  * This is also used for the sound-folded word, "soundfold" is TRUE then.
1227  * The mechanism is the same, but we find a match with a sound-folded word
1228  * that comes from one or more original words.  Each of these words may be
1229  * added, this is done by add_sound_suggest().
1230  * Don't use:
1231  *	the prefix tree or the keep-case tree
1232  *	"su->su_badlen"
1233  *	anything to do with upper and lower case
1234  *	anything to do with word or non-word characters ("spell_iswordp()")
1235  *	banned words
1236  *	word flags (rare, region, compounding)
1237  *	word splitting for now
1238  *	"similar_chars()"
1239  *	use "slang->sl_repsal" instead of "lp->lp_replang->sl_rep"
1240  */
1241     static void
suggest_trie_walk(suginfo_T * su,langp_T * lp,char_u * fword,int soundfold)1242 suggest_trie_walk(
1243     suginfo_T	*su,
1244     langp_T	*lp,
1245     char_u	*fword,
1246     int		soundfold)
1247 {
1248     char_u	tword[MAXWLEN];	    // good word collected so far
1249     trystate_T	stack[MAXWLEN];
1250     char_u	preword[MAXWLEN * 3]; // word found with proper case;
1251 				      // concatenation of prefix compound
1252 				      // words and split word.  NUL terminated
1253 				      // when going deeper but not when coming
1254 				      // back.
1255     char_u	compflags[MAXWLEN];	// compound flags, one for each word
1256     trystate_T	*sp;
1257     int		newscore;
1258     int		score;
1259     char_u	*byts, *fbyts, *pbyts;
1260     idx_T	*idxs, *fidxs, *pidxs;
1261     int		depth;
1262     int		c, c2, c3;
1263     int		n = 0;
1264     int		flags;
1265     garray_T	*gap;
1266     idx_T	arridx;
1267     int		len;
1268     char_u	*p;
1269     fromto_T	*ftp;
1270     int		fl = 0, tl;
1271     int		repextra = 0;	    // extra bytes in fword[] from REP item
1272     slang_T	*slang = lp->lp_slang;
1273     int		fword_ends;
1274     int		goodword_ends;
1275 #ifdef DEBUG_TRIEWALK
1276     // Stores the name of the change made at each level.
1277     char_u	changename[MAXWLEN][80];
1278 #endif
1279     int		breakcheckcount = 1000;
1280     int		compound_ok;
1281 
1282     // Go through the whole case-fold tree, try changes at each node.
1283     // "tword[]" contains the word collected from nodes in the tree.
1284     // "fword[]" the word we are trying to match with (initially the bad
1285     // word).
1286     depth = 0;
1287     sp = &stack[0];
1288     CLEAR_POINTER(sp);
1289     sp->ts_curi = 1;
1290 
1291     if (soundfold)
1292     {
1293 	// Going through the soundfold tree.
1294 	byts = fbyts = slang->sl_sbyts;
1295 	idxs = fidxs = slang->sl_sidxs;
1296 	pbyts = NULL;
1297 	pidxs = NULL;
1298 	sp->ts_prefixdepth = PFD_NOPREFIX;
1299 	sp->ts_state = STATE_START;
1300     }
1301     else
1302     {
1303 	// When there are postponed prefixes we need to use these first.  At
1304 	// the end of the prefix we continue in the case-fold tree.
1305 	fbyts = slang->sl_fbyts;
1306 	fidxs = slang->sl_fidxs;
1307 	pbyts = slang->sl_pbyts;
1308 	pidxs = slang->sl_pidxs;
1309 	if (pbyts != NULL)
1310 	{
1311 	    byts = pbyts;
1312 	    idxs = pidxs;
1313 	    sp->ts_prefixdepth = PFD_PREFIXTREE;
1314 	    sp->ts_state = STATE_NOPREFIX;	// try without prefix first
1315 	}
1316 	else
1317 	{
1318 	    byts = fbyts;
1319 	    idxs = fidxs;
1320 	    sp->ts_prefixdepth = PFD_NOPREFIX;
1321 	    sp->ts_state = STATE_START;
1322 	}
1323     }
1324 
1325     // Loop to find all suggestions.  At each round we either:
1326     // - For the current state try one operation, advance "ts_curi",
1327     //   increase "depth".
1328     // - When a state is done go to the next, set "ts_state".
1329     // - When all states are tried decrease "depth".
1330     while (depth >= 0 && !got_int)
1331     {
1332 	sp = &stack[depth];
1333 	switch (sp->ts_state)
1334 	{
1335 	case STATE_START:
1336 	case STATE_NOPREFIX:
1337 	    // Start of node: Deal with NUL bytes, which means
1338 	    // tword[] may end here.
1339 	    arridx = sp->ts_arridx;	    // current node in the tree
1340 	    len = byts[arridx];		    // bytes in this node
1341 	    arridx += sp->ts_curi;	    // index of current byte
1342 
1343 	    if (sp->ts_prefixdepth == PFD_PREFIXTREE)
1344 	    {
1345 		// Skip over the NUL bytes, we use them later.
1346 		for (n = 0; n < len && byts[arridx + n] == 0; ++n)
1347 		    ;
1348 		sp->ts_curi += n;
1349 
1350 		// Always past NUL bytes now.
1351 		n = (int)sp->ts_state;
1352 		PROF_STORE(sp->ts_state)
1353 		sp->ts_state = STATE_ENDNUL;
1354 		sp->ts_save_badflags = su->su_badflags;
1355 
1356 		// At end of a prefix or at start of prefixtree: check for
1357 		// following word.
1358 		if (byts[arridx] == 0 || n == (int)STATE_NOPREFIX)
1359 		{
1360 		    // Set su->su_badflags to the caps type at this position.
1361 		    // Use the caps type until here for the prefix itself.
1362 		    if (has_mbyte)
1363 			n = nofold_len(fword, sp->ts_fidx, su->su_badptr);
1364 		    else
1365 			n = sp->ts_fidx;
1366 		    flags = badword_captype(su->su_badptr, su->su_badptr + n);
1367 		    su->su_badflags = badword_captype(su->su_badptr + n,
1368 					       su->su_badptr + su->su_badlen);
1369 #ifdef DEBUG_TRIEWALK
1370 		    sprintf(changename[depth], "prefix");
1371 #endif
1372 		    go_deeper(stack, depth, 0);
1373 		    ++depth;
1374 		    sp = &stack[depth];
1375 		    sp->ts_prefixdepth = depth - 1;
1376 		    byts = fbyts;
1377 		    idxs = fidxs;
1378 		    sp->ts_arridx = 0;
1379 
1380 		    // Move the prefix to preword[] with the right case
1381 		    // and make find_keepcap_word() works.
1382 		    tword[sp->ts_twordlen] = NUL;
1383 		    make_case_word(tword + sp->ts_splitoff,
1384 					  preword + sp->ts_prewordlen, flags);
1385 		    sp->ts_prewordlen = (char_u)STRLEN(preword);
1386 		    sp->ts_splitoff = sp->ts_twordlen;
1387 		}
1388 		break;
1389 	    }
1390 
1391 	    if (sp->ts_curi > len || byts[arridx] != 0)
1392 	    {
1393 		// Past bytes in node and/or past NUL bytes.
1394 		PROF_STORE(sp->ts_state)
1395 		sp->ts_state = STATE_ENDNUL;
1396 		sp->ts_save_badflags = su->su_badflags;
1397 		break;
1398 	    }
1399 
1400 	    // End of word in tree.
1401 	    ++sp->ts_curi;		// eat one NUL byte
1402 
1403 	    flags = (int)idxs[arridx];
1404 
1405 	    // Skip words with the NOSUGGEST flag.
1406 	    if (flags & WF_NOSUGGEST)
1407 		break;
1408 
1409 	    fword_ends = (fword[sp->ts_fidx] == NUL
1410 			   || (soundfold
1411 			       ? VIM_ISWHITE(fword[sp->ts_fidx])
1412 			       : !spell_iswordp(fword + sp->ts_fidx, curwin)));
1413 	    tword[sp->ts_twordlen] = NUL;
1414 
1415 	    if (sp->ts_prefixdepth <= PFD_NOTSPECIAL
1416 					&& (sp->ts_flags & TSF_PREFIXOK) == 0
1417 					&& pbyts != NULL)
1418 	    {
1419 		// There was a prefix before the word.  Check that the prefix
1420 		// can be used with this word.
1421 		// Count the length of the NULs in the prefix.  If there are
1422 		// none this must be the first try without a prefix.
1423 		n = stack[sp->ts_prefixdepth].ts_arridx;
1424 		len = pbyts[n++];
1425 		for (c = 0; c < len && pbyts[n + c] == 0; ++c)
1426 		    ;
1427 		if (c > 0)
1428 		{
1429 		    c = valid_word_prefix(c, n, flags,
1430 				       tword + sp->ts_splitoff, slang, FALSE);
1431 		    if (c == 0)
1432 			break;
1433 
1434 		    // Use the WF_RARE flag for a rare prefix.
1435 		    if (c & WF_RAREPFX)
1436 			flags |= WF_RARE;
1437 
1438 		    // Tricky: when checking for both prefix and compounding
1439 		    // we run into the prefix flag first.
1440 		    // Remember that it's OK, so that we accept the prefix
1441 		    // when arriving at a compound flag.
1442 		    sp->ts_flags |= TSF_PREFIXOK;
1443 		}
1444 	    }
1445 
1446 	    // Check NEEDCOMPOUND: can't use word without compounding.  Do try
1447 	    // appending another compound word below.
1448 	    if (sp->ts_complen == sp->ts_compsplit && fword_ends
1449 						     && (flags & WF_NEEDCOMP))
1450 		goodword_ends = FALSE;
1451 	    else
1452 		goodword_ends = TRUE;
1453 
1454 	    p = NULL;
1455 	    compound_ok = TRUE;
1456 	    if (sp->ts_complen > sp->ts_compsplit)
1457 	    {
1458 		if (slang->sl_nobreak)
1459 		{
1460 		    // There was a word before this word.  When there was no
1461 		    // change in this word (it was correct) add the first word
1462 		    // as a suggestion.  If this word was corrected too, we
1463 		    // need to check if a correct word follows.
1464 		    if (sp->ts_fidx - sp->ts_splitfidx
1465 					  == sp->ts_twordlen - sp->ts_splitoff
1466 			    && STRNCMP(fword + sp->ts_splitfidx,
1467 					tword + sp->ts_splitoff,
1468 					 sp->ts_fidx - sp->ts_splitfidx) == 0)
1469 		    {
1470 			preword[sp->ts_prewordlen] = NUL;
1471 			newscore = score_wordcount_adj(slang, sp->ts_score,
1472 						 preword + sp->ts_prewordlen,
1473 						 sp->ts_prewordlen > 0);
1474 			// Add the suggestion if the score isn't too bad.
1475 			if (newscore <= su->su_maxscore)
1476 			    add_suggestion(su, &su->su_ga, preword,
1477 				    sp->ts_splitfidx - repextra,
1478 				    newscore, 0, FALSE,
1479 				    lp->lp_sallang, FALSE);
1480 			break;
1481 		    }
1482 		}
1483 		else
1484 		{
1485 		    // There was a compound word before this word.  If this
1486 		    // word does not support compounding then give up
1487 		    // (splitting is tried for the word without compound
1488 		    // flag).
1489 		    if (((unsigned)flags >> 24) == 0
1490 			    || sp->ts_twordlen - sp->ts_splitoff
1491 						       < slang->sl_compminlen)
1492 			break;
1493 		    // For multi-byte chars check character length against
1494 		    // COMPOUNDMIN.
1495 		    if (has_mbyte
1496 			    && slang->sl_compminlen > 0
1497 			    && mb_charlen(tword + sp->ts_splitoff)
1498 						       < slang->sl_compminlen)
1499 			break;
1500 
1501 		    compflags[sp->ts_complen] = ((unsigned)flags >> 24);
1502 		    compflags[sp->ts_complen + 1] = NUL;
1503 		    vim_strncpy(preword + sp->ts_prewordlen,
1504 			    tword + sp->ts_splitoff,
1505 			    sp->ts_twordlen - sp->ts_splitoff);
1506 
1507 		    // Verify CHECKCOMPOUNDPATTERN  rules.
1508 		    if (match_checkcompoundpattern(preword,  sp->ts_prewordlen,
1509 							  &slang->sl_comppat))
1510 			compound_ok = FALSE;
1511 
1512 		    if (compound_ok)
1513 		    {
1514 			p = preword;
1515 			while (*skiptowhite(p) != NUL)
1516 			    p = skipwhite(skiptowhite(p));
1517 			if (fword_ends && !can_compound(slang, p,
1518 						compflags + sp->ts_compsplit))
1519 			    // Compound is not allowed.  But it may still be
1520 			    // possible if we add another (short) word.
1521 			    compound_ok = FALSE;
1522 		    }
1523 
1524 		    // Get pointer to last char of previous word.
1525 		    p = preword + sp->ts_prewordlen;
1526 		    MB_PTR_BACK(preword, p);
1527 		}
1528 	    }
1529 
1530 	    // Form the word with proper case in preword.
1531 	    // If there is a word from a previous split, append.
1532 	    // For the soundfold tree don't change the case, simply append.
1533 	    if (soundfold)
1534 		STRCPY(preword + sp->ts_prewordlen, tword + sp->ts_splitoff);
1535 	    else if (flags & WF_KEEPCAP)
1536 		// Must find the word in the keep-case tree.
1537 		find_keepcap_word(slang, tword + sp->ts_splitoff,
1538 						 preword + sp->ts_prewordlen);
1539 	    else
1540 	    {
1541 		// Include badflags: If the badword is onecap or allcap
1542 		// use that for the goodword too.  But if the badword is
1543 		// allcap and it's only one char long use onecap.
1544 		c = su->su_badflags;
1545 		if ((c & WF_ALLCAP)
1546 			&& su->su_badlen == (*mb_ptr2len)(su->su_badptr))
1547 		    c = WF_ONECAP;
1548 		c |= flags;
1549 
1550 		// When appending a compound word after a word character don't
1551 		// use Onecap.
1552 		if (p != NULL && spell_iswordp_nmw(p, curwin))
1553 		    c &= ~WF_ONECAP;
1554 		make_case_word(tword + sp->ts_splitoff,
1555 					      preword + sp->ts_prewordlen, c);
1556 	    }
1557 
1558 	    if (!soundfold)
1559 	    {
1560 		// Don't use a banned word.  It may appear again as a good
1561 		// word, thus remember it.
1562 		if (flags & WF_BANNED)
1563 		{
1564 		    add_banned(su, preword + sp->ts_prewordlen);
1565 		    break;
1566 		}
1567 		if ((sp->ts_complen == sp->ts_compsplit
1568 			    && WAS_BANNED(su, preword + sp->ts_prewordlen))
1569 						   || WAS_BANNED(su, preword))
1570 		{
1571 		    if (slang->sl_compprog == NULL)
1572 			break;
1573 		    // the word so far was banned but we may try compounding
1574 		    goodword_ends = FALSE;
1575 		}
1576 	    }
1577 
1578 	    newscore = 0;
1579 	    if (!soundfold)	// soundfold words don't have flags
1580 	    {
1581 		if ((flags & WF_REGION)
1582 			    && (((unsigned)flags >> 16) & lp->lp_region) == 0)
1583 		    newscore += SCORE_REGION;
1584 		if (flags & WF_RARE)
1585 		    newscore += SCORE_RARE;
1586 
1587 		if (!spell_valid_case(su->su_badflags,
1588 				  captype(preword + sp->ts_prewordlen, NULL)))
1589 		    newscore += SCORE_ICASE;
1590 	    }
1591 
1592 	    // TODO: how about splitting in the soundfold tree?
1593 	    if (fword_ends
1594 		    && goodword_ends
1595 		    && sp->ts_fidx >= sp->ts_fidxtry
1596 		    && compound_ok)
1597 	    {
1598 		// The badword also ends: add suggestions.
1599 #ifdef DEBUG_TRIEWALK
1600 		if (soundfold && STRCMP(preword, "smwrd") == 0)
1601 		{
1602 		    int	    j;
1603 
1604 		    // print the stack of changes that brought us here
1605 		    smsg("------ %s -------", fword);
1606 		    for (j = 0; j < depth; ++j)
1607 			smsg("%s", changename[j]);
1608 		}
1609 #endif
1610 		if (soundfold)
1611 		{
1612 		    // For soundfolded words we need to find the original
1613 		    // words, the edit distance and then add them.
1614 		    add_sound_suggest(su, preword, sp->ts_score, lp);
1615 		}
1616 		else if (sp->ts_fidx > 0)
1617 		{
1618 		    // Give a penalty when changing non-word char to word
1619 		    // char, e.g., "thes," -> "these".
1620 		    p = fword + sp->ts_fidx;
1621 		    MB_PTR_BACK(fword, p);
1622 		    if (!spell_iswordp(p, curwin) && *preword != NUL)
1623 		    {
1624 			p = preword + STRLEN(preword);
1625 			MB_PTR_BACK(preword, p);
1626 			if (spell_iswordp(p, curwin))
1627 			    newscore += SCORE_NONWORD;
1628 		    }
1629 
1630 		    // Give a bonus to words seen before.
1631 		    score = score_wordcount_adj(slang,
1632 						sp->ts_score + newscore,
1633 						preword + sp->ts_prewordlen,
1634 						sp->ts_prewordlen > 0);
1635 
1636 		    // Add the suggestion if the score isn't too bad.
1637 		    if (score <= su->su_maxscore)
1638 		    {
1639 			add_suggestion(su, &su->su_ga, preword,
1640 				    sp->ts_fidx - repextra,
1641 				    score, 0, FALSE, lp->lp_sallang, FALSE);
1642 
1643 			if (su->su_badflags & WF_MIXCAP)
1644 			{
1645 			    // We really don't know if the word should be
1646 			    // upper or lower case, add both.
1647 			    c = captype(preword, NULL);
1648 			    if (c == 0 || c == WF_ALLCAP)
1649 			    {
1650 				make_case_word(tword + sp->ts_splitoff,
1651 					      preword + sp->ts_prewordlen,
1652 						      c == 0 ? WF_ALLCAP : 0);
1653 
1654 				add_suggestion(su, &su->su_ga, preword,
1655 					sp->ts_fidx - repextra,
1656 					score + SCORE_ICASE, 0, FALSE,
1657 					lp->lp_sallang, FALSE);
1658 			    }
1659 			}
1660 		    }
1661 		}
1662 	    }
1663 
1664 	    // Try word split and/or compounding.
1665 	    if ((sp->ts_fidx >= sp->ts_fidxtry || fword_ends)
1666 		    // Don't split halfway a character.
1667 		    && (!has_mbyte || sp->ts_tcharlen == 0))
1668 	    {
1669 		int	try_compound;
1670 		int	try_split;
1671 
1672 		// If past the end of the bad word don't try a split.
1673 		// Otherwise try changing the next word.  E.g., find
1674 		// suggestions for "the the" where the second "the" is
1675 		// different.  It's done like a split.
1676 		// TODO: word split for soundfold words
1677 		try_split = (sp->ts_fidx - repextra < su->su_badlen)
1678 								&& !soundfold;
1679 
1680 		// Get here in several situations:
1681 		// 1. The word in the tree ends:
1682 		//    If the word allows compounding try that.  Otherwise try
1683 		//    a split by inserting a space.  For both check that a
1684 		//    valid words starts at fword[sp->ts_fidx].
1685 		//    For NOBREAK do like compounding to be able to check if
1686 		//    the next word is valid.
1687 		// 2. The badword does end, but it was due to a change (e.g.,
1688 		//    a swap).  No need to split, but do check that the
1689 		//    following word is valid.
1690 		// 3. The badword and the word in the tree end.  It may still
1691 		//    be possible to compound another (short) word.
1692 		try_compound = FALSE;
1693 		if (!soundfold
1694 			&& !slang->sl_nocompoundsugs
1695 			&& slang->sl_compprog != NULL
1696 			&& ((unsigned)flags >> 24) != 0
1697 			&& sp->ts_twordlen - sp->ts_splitoff
1698 						       >= slang->sl_compminlen
1699 			&& (!has_mbyte
1700 			    || slang->sl_compminlen == 0
1701 			    || mb_charlen(tword + sp->ts_splitoff)
1702 						      >= slang->sl_compminlen)
1703 			&& (slang->sl_compsylmax < MAXWLEN
1704 			    || sp->ts_complen + 1 - sp->ts_compsplit
1705 							  < slang->sl_compmax)
1706 			&& (can_be_compound(sp, slang,
1707 					 compflags, ((unsigned)flags >> 24))))
1708 
1709 		{
1710 		    try_compound = TRUE;
1711 		    compflags[sp->ts_complen] = ((unsigned)flags >> 24);
1712 		    compflags[sp->ts_complen + 1] = NUL;
1713 		}
1714 
1715 		// For NOBREAK we never try splitting, it won't make any word
1716 		// valid.
1717 		if (slang->sl_nobreak && !slang->sl_nocompoundsugs)
1718 		    try_compound = TRUE;
1719 
1720 		// If we could add a compound word, and it's also possible to
1721 		// split at this point, do the split first and set
1722 		// TSF_DIDSPLIT to avoid doing it again.
1723 		else if (!fword_ends
1724 			&& try_compound
1725 			&& (sp->ts_flags & TSF_DIDSPLIT) == 0)
1726 		{
1727 		    try_compound = FALSE;
1728 		    sp->ts_flags |= TSF_DIDSPLIT;
1729 		    --sp->ts_curi;	    // do the same NUL again
1730 		    compflags[sp->ts_complen] = NUL;
1731 		}
1732 		else
1733 		    sp->ts_flags &= ~TSF_DIDSPLIT;
1734 
1735 		if (try_split || try_compound)
1736 		{
1737 		    if (!try_compound && (!fword_ends || !goodword_ends))
1738 		    {
1739 			// If we're going to split need to check that the
1740 			// words so far are valid for compounding.  If there
1741 			// is only one word it must not have the NEEDCOMPOUND
1742 			// flag.
1743 			if (sp->ts_complen == sp->ts_compsplit
1744 						     && (flags & WF_NEEDCOMP))
1745 			    break;
1746 			p = preword;
1747 			while (*skiptowhite(p) != NUL)
1748 			    p = skipwhite(skiptowhite(p));
1749 			if (sp->ts_complen > sp->ts_compsplit
1750 				&& !can_compound(slang, p,
1751 						compflags + sp->ts_compsplit))
1752 			    break;
1753 
1754 			if (slang->sl_nosplitsugs)
1755 			    newscore += SCORE_SPLIT_NO;
1756 			else
1757 			    newscore += SCORE_SPLIT;
1758 
1759 			// Give a bonus to words seen before.
1760 			newscore = score_wordcount_adj(slang, newscore,
1761 					   preword + sp->ts_prewordlen, TRUE);
1762 		    }
1763 
1764 		    if (TRY_DEEPER(su, stack, depth, newscore))
1765 		    {
1766 			go_deeper(stack, depth, newscore);
1767 #ifdef DEBUG_TRIEWALK
1768 			if (!try_compound && !fword_ends)
1769 			    sprintf(changename[depth], "%.*s-%s: split",
1770 				 sp->ts_twordlen, tword, fword + sp->ts_fidx);
1771 			else
1772 			    sprintf(changename[depth], "%.*s-%s: compound",
1773 				 sp->ts_twordlen, tword, fword + sp->ts_fidx);
1774 #endif
1775 			// Save things to be restored at STATE_SPLITUNDO.
1776 			sp->ts_save_badflags = su->su_badflags;
1777 			PROF_STORE(sp->ts_state)
1778 			sp->ts_state = STATE_SPLITUNDO;
1779 
1780 			++depth;
1781 			sp = &stack[depth];
1782 
1783 			// Append a space to preword when splitting.
1784 			if (!try_compound && !fword_ends)
1785 			    STRCAT(preword, " ");
1786 			sp->ts_prewordlen = (char_u)STRLEN(preword);
1787 			sp->ts_splitoff = sp->ts_twordlen;
1788 			sp->ts_splitfidx = sp->ts_fidx;
1789 
1790 			// If the badword has a non-word character at this
1791 			// position skip it.  That means replacing the
1792 			// non-word character with a space.  Always skip a
1793 			// character when the word ends.  But only when the
1794 			// good word can end.
1795 			if (((!try_compound && !spell_iswordp_nmw(fword
1796 							       + sp->ts_fidx,
1797 							       curwin))
1798 				    || fword_ends)
1799 				&& fword[sp->ts_fidx] != NUL
1800 				&& goodword_ends)
1801 			{
1802 			    int	    l;
1803 
1804 			    l = mb_ptr2len(fword + sp->ts_fidx);
1805 			    if (fword_ends)
1806 			    {
1807 				// Copy the skipped character to preword.
1808 				mch_memmove(preword + sp->ts_prewordlen,
1809 						      fword + sp->ts_fidx, l);
1810 				sp->ts_prewordlen += l;
1811 				preword[sp->ts_prewordlen] = NUL;
1812 			    }
1813 			    else
1814 				sp->ts_score -= SCORE_SPLIT - SCORE_SUBST;
1815 			    sp->ts_fidx += l;
1816 			}
1817 
1818 			// When compounding include compound flag in
1819 			// compflags[] (already set above).  When splitting we
1820 			// may start compounding over again.
1821 			if (try_compound)
1822 			    ++sp->ts_complen;
1823 			else
1824 			    sp->ts_compsplit = sp->ts_complen;
1825 			sp->ts_prefixdepth = PFD_NOPREFIX;
1826 
1827 			// set su->su_badflags to the caps type at this
1828 			// position
1829 			if (has_mbyte)
1830 			    n = nofold_len(fword, sp->ts_fidx, su->su_badptr);
1831 			else
1832 			    n = sp->ts_fidx;
1833 			su->su_badflags = badword_captype(su->su_badptr + n,
1834 					       su->su_badptr + su->su_badlen);
1835 
1836 			// Restart at top of the tree.
1837 			sp->ts_arridx = 0;
1838 
1839 			// If there are postponed prefixes, try these too.
1840 			if (pbyts != NULL)
1841 			{
1842 			    byts = pbyts;
1843 			    idxs = pidxs;
1844 			    sp->ts_prefixdepth = PFD_PREFIXTREE;
1845 			    PROF_STORE(sp->ts_state)
1846 			    sp->ts_state = STATE_NOPREFIX;
1847 			}
1848 		    }
1849 		}
1850 	    }
1851 	    break;
1852 
1853 	case STATE_SPLITUNDO:
1854 	    // Undo the changes done for word split or compound word.
1855 	    su->su_badflags = sp->ts_save_badflags;
1856 
1857 	    // Continue looking for NUL bytes.
1858 	    PROF_STORE(sp->ts_state)
1859 	    sp->ts_state = STATE_START;
1860 
1861 	    // In case we went into the prefix tree.
1862 	    byts = fbyts;
1863 	    idxs = fidxs;
1864 	    break;
1865 
1866 	case STATE_ENDNUL:
1867 	    // Past the NUL bytes in the node.
1868 	    su->su_badflags = sp->ts_save_badflags;
1869 	    if (fword[sp->ts_fidx] == NUL && sp->ts_tcharlen == 0)
1870 	    {
1871 		// The badword ends, can't use STATE_PLAIN.
1872 		PROF_STORE(sp->ts_state)
1873 		sp->ts_state = STATE_DEL;
1874 		break;
1875 	    }
1876 	    PROF_STORE(sp->ts_state)
1877 	    sp->ts_state = STATE_PLAIN;
1878 	    // FALLTHROUGH
1879 
1880 	case STATE_PLAIN:
1881 	    // Go over all possible bytes at this node, add each to tword[]
1882 	    // and use child node.  "ts_curi" is the index.
1883 	    arridx = sp->ts_arridx;
1884 	    if (sp->ts_curi > byts[arridx])
1885 	    {
1886 		// Done all bytes at this node, do next state.  When still at
1887 		// already changed bytes skip the other tricks.
1888 		PROF_STORE(sp->ts_state)
1889 		if (sp->ts_fidx >= sp->ts_fidxtry)
1890 		    sp->ts_state = STATE_DEL;
1891 		else
1892 		    sp->ts_state = STATE_FINAL;
1893 	    }
1894 	    else
1895 	    {
1896 		arridx += sp->ts_curi++;
1897 		c = byts[arridx];
1898 
1899 		// Normal byte, go one level deeper.  If it's not equal to the
1900 		// byte in the bad word adjust the score.  But don't even try
1901 		// when the byte was already changed.  And don't try when we
1902 		// just deleted this byte, accepting it is always cheaper than
1903 		// delete + substitute.
1904 		if (c == fword[sp->ts_fidx]
1905 			|| (sp->ts_tcharlen > 0 && sp->ts_isdiff != DIFF_NONE))
1906 		    newscore = 0;
1907 		else
1908 		    newscore = SCORE_SUBST;
1909 		if ((newscore == 0
1910 			    || (sp->ts_fidx >= sp->ts_fidxtry
1911 				&& ((sp->ts_flags & TSF_DIDDEL) == 0
1912 				    || c != fword[sp->ts_delidx])))
1913 			&& TRY_DEEPER(su, stack, depth, newscore))
1914 		{
1915 		    go_deeper(stack, depth, newscore);
1916 #ifdef DEBUG_TRIEWALK
1917 		    if (newscore > 0)
1918 			sprintf(changename[depth], "%.*s-%s: subst %c to %c",
1919 				sp->ts_twordlen, tword, fword + sp->ts_fidx,
1920 				fword[sp->ts_fidx], c);
1921 		    else
1922 			sprintf(changename[depth], "%.*s-%s: accept %c",
1923 				sp->ts_twordlen, tword, fword + sp->ts_fidx,
1924 				fword[sp->ts_fidx]);
1925 #endif
1926 		    ++depth;
1927 		    sp = &stack[depth];
1928 		    ++sp->ts_fidx;
1929 		    tword[sp->ts_twordlen++] = c;
1930 		    sp->ts_arridx = idxs[arridx];
1931 		    if (newscore == SCORE_SUBST)
1932 			sp->ts_isdiff = DIFF_YES;
1933 		    if (has_mbyte)
1934 		    {
1935 			// Multi-byte characters are a bit complicated to
1936 			// handle: They differ when any of the bytes differ
1937 			// and then their length may also differ.
1938 			if (sp->ts_tcharlen == 0)
1939 			{
1940 			    // First byte.
1941 			    sp->ts_tcharidx = 0;
1942 			    sp->ts_tcharlen = MB_BYTE2LEN(c);
1943 			    sp->ts_fcharstart = sp->ts_fidx - 1;
1944 			    sp->ts_isdiff = (newscore != 0)
1945 						       ? DIFF_YES : DIFF_NONE;
1946 			}
1947 			else if (sp->ts_isdiff == DIFF_INSERT)
1948 			    // When inserting trail bytes don't advance in the
1949 			    // bad word.
1950 			    --sp->ts_fidx;
1951 			if (++sp->ts_tcharidx == sp->ts_tcharlen)
1952 			{
1953 			    // Last byte of character.
1954 			    if (sp->ts_isdiff == DIFF_YES)
1955 			    {
1956 				// Correct ts_fidx for the byte length of the
1957 				// character (we didn't check that before).
1958 				sp->ts_fidx = sp->ts_fcharstart
1959 					    + mb_ptr2len(
1960 						    fword + sp->ts_fcharstart);
1961 				// For changing a composing character adjust
1962 				// the score from SCORE_SUBST to
1963 				// SCORE_SUBCOMP.
1964 				if (enc_utf8
1965 					&& utf_iscomposing(
1966 					    utf_ptr2char(tword
1967 						+ sp->ts_twordlen
1968 							   - sp->ts_tcharlen))
1969 					&& utf_iscomposing(
1970 					    utf_ptr2char(fword
1971 							+ sp->ts_fcharstart)))
1972 				    sp->ts_score -=
1973 						  SCORE_SUBST - SCORE_SUBCOMP;
1974 
1975 				// For a similar character adjust score from
1976 				// SCORE_SUBST to SCORE_SIMILAR.
1977 				else if (!soundfold
1978 					&& slang->sl_has_map
1979 					&& similar_chars(slang,
1980 					    mb_ptr2char(tword
1981 						+ sp->ts_twordlen
1982 							   - sp->ts_tcharlen),
1983 					    mb_ptr2char(fword
1984 							+ sp->ts_fcharstart)))
1985 				    sp->ts_score -=
1986 						  SCORE_SUBST - SCORE_SIMILAR;
1987 			    }
1988 			    else if (sp->ts_isdiff == DIFF_INSERT
1989 					 && sp->ts_twordlen > sp->ts_tcharlen)
1990 			    {
1991 				p = tword + sp->ts_twordlen - sp->ts_tcharlen;
1992 				c = mb_ptr2char(p);
1993 				if (enc_utf8 && utf_iscomposing(c))
1994 				{
1995 				    // Inserting a composing char doesn't
1996 				    // count that much.
1997 				    sp->ts_score -= SCORE_INS - SCORE_INSCOMP;
1998 				}
1999 				else
2000 				{
2001 				    // If the previous character was the same,
2002 				    // thus doubling a character, give a bonus
2003 				    // to the score.  Also for the soundfold
2004 				    // tree (might seem illogical but does
2005 				    // give better scores).
2006 				    MB_PTR_BACK(tword, p);
2007 				    if (c == mb_ptr2char(p))
2008 					sp->ts_score -= SCORE_INS
2009 							       - SCORE_INSDUP;
2010 				}
2011 			    }
2012 
2013 			    // Starting a new char, reset the length.
2014 			    sp->ts_tcharlen = 0;
2015 			}
2016 		    }
2017 		    else
2018 		    {
2019 			// If we found a similar char adjust the score.
2020 			// We do this after calling go_deeper() because
2021 			// it's slow.
2022 			if (newscore != 0
2023 				&& !soundfold
2024 				&& slang->sl_has_map
2025 				&& similar_chars(slang,
2026 						   c, fword[sp->ts_fidx - 1]))
2027 			    sp->ts_score -= SCORE_SUBST - SCORE_SIMILAR;
2028 		    }
2029 		}
2030 	    }
2031 	    break;
2032 
2033 	case STATE_DEL:
2034 	    // When past the first byte of a multi-byte char don't try
2035 	    // delete/insert/swap a character.
2036 	    if (has_mbyte && sp->ts_tcharlen > 0)
2037 	    {
2038 		PROF_STORE(sp->ts_state)
2039 		sp->ts_state = STATE_FINAL;
2040 		break;
2041 	    }
2042 	    // Try skipping one character in the bad word (delete it).
2043 	    PROF_STORE(sp->ts_state)
2044 	    sp->ts_state = STATE_INS_PREP;
2045 	    sp->ts_curi = 1;
2046 	    if (soundfold && sp->ts_fidx == 0 && fword[sp->ts_fidx] == '*')
2047 		// Deleting a vowel at the start of a word counts less, see
2048 		// soundalike_score().
2049 		newscore = 2 * SCORE_DEL / 3;
2050 	    else
2051 		newscore = SCORE_DEL;
2052 	    if (fword[sp->ts_fidx] != NUL
2053 				    && TRY_DEEPER(su, stack, depth, newscore))
2054 	    {
2055 		go_deeper(stack, depth, newscore);
2056 #ifdef DEBUG_TRIEWALK
2057 		sprintf(changename[depth], "%.*s-%s: delete %c",
2058 			sp->ts_twordlen, tword, fword + sp->ts_fidx,
2059 			fword[sp->ts_fidx]);
2060 #endif
2061 		++depth;
2062 
2063 		// Remember what character we deleted, so that we can avoid
2064 		// inserting it again.
2065 		stack[depth].ts_flags |= TSF_DIDDEL;
2066 		stack[depth].ts_delidx = sp->ts_fidx;
2067 
2068 		// Advance over the character in fword[].  Give a bonus to the
2069 		// score if the same character is following "nn" -> "n".  It's
2070 		// a bit illogical for soundfold tree but it does give better
2071 		// results.
2072 		if (has_mbyte)
2073 		{
2074 		    c = mb_ptr2char(fword + sp->ts_fidx);
2075 		    stack[depth].ts_fidx += mb_ptr2len(fword + sp->ts_fidx);
2076 		    if (enc_utf8 && utf_iscomposing(c))
2077 			stack[depth].ts_score -= SCORE_DEL - SCORE_DELCOMP;
2078 		    else if (c == mb_ptr2char(fword + stack[depth].ts_fidx))
2079 			stack[depth].ts_score -= SCORE_DEL - SCORE_DELDUP;
2080 		}
2081 		else
2082 		{
2083 		    ++stack[depth].ts_fidx;
2084 		    if (fword[sp->ts_fidx] == fword[sp->ts_fidx + 1])
2085 			stack[depth].ts_score -= SCORE_DEL - SCORE_DELDUP;
2086 		}
2087 		break;
2088 	    }
2089 	    // FALLTHROUGH
2090 
2091 	case STATE_INS_PREP:
2092 	    if (sp->ts_flags & TSF_DIDDEL)
2093 	    {
2094 		// If we just deleted a byte then inserting won't make sense,
2095 		// a substitute is always cheaper.
2096 		PROF_STORE(sp->ts_state)
2097 		sp->ts_state = STATE_SWAP;
2098 		break;
2099 	    }
2100 
2101 	    // skip over NUL bytes
2102 	    n = sp->ts_arridx;
2103 	    for (;;)
2104 	    {
2105 		if (sp->ts_curi > byts[n])
2106 		{
2107 		    // Only NUL bytes at this node, go to next state.
2108 		    PROF_STORE(sp->ts_state)
2109 		    sp->ts_state = STATE_SWAP;
2110 		    break;
2111 		}
2112 		if (byts[n + sp->ts_curi] != NUL)
2113 		{
2114 		    // Found a byte to insert.
2115 		    PROF_STORE(sp->ts_state)
2116 		    sp->ts_state = STATE_INS;
2117 		    break;
2118 		}
2119 		++sp->ts_curi;
2120 	    }
2121 	    break;
2122 
2123 	    // FALLTHROUGH
2124 
2125 	case STATE_INS:
2126 	    // Insert one byte.  Repeat this for each possible byte at this
2127 	    // node.
2128 	    n = sp->ts_arridx;
2129 	    if (sp->ts_curi > byts[n])
2130 	    {
2131 		// Done all bytes at this node, go to next state.
2132 		PROF_STORE(sp->ts_state)
2133 		sp->ts_state = STATE_SWAP;
2134 		break;
2135 	    }
2136 
2137 	    // Do one more byte at this node, but:
2138 	    // - Skip NUL bytes.
2139 	    // - Skip the byte if it's equal to the byte in the word,
2140 	    //   accepting that byte is always better.
2141 	    n += sp->ts_curi++;
2142 	    c = byts[n];
2143 	    if (soundfold && sp->ts_twordlen == 0 && c == '*')
2144 		// Inserting a vowel at the start of a word counts less,
2145 		// see soundalike_score().
2146 		newscore = 2 * SCORE_INS / 3;
2147 	    else
2148 		newscore = SCORE_INS;
2149 	    if (c != fword[sp->ts_fidx]
2150 				    && TRY_DEEPER(su, stack, depth, newscore))
2151 	    {
2152 		go_deeper(stack, depth, newscore);
2153 #ifdef DEBUG_TRIEWALK
2154 		sprintf(changename[depth], "%.*s-%s: insert %c",
2155 			sp->ts_twordlen, tword, fword + sp->ts_fidx,
2156 			c);
2157 #endif
2158 		++depth;
2159 		sp = &stack[depth];
2160 		tword[sp->ts_twordlen++] = c;
2161 		sp->ts_arridx = idxs[n];
2162 		if (has_mbyte)
2163 		{
2164 		    fl = MB_BYTE2LEN(c);
2165 		    if (fl > 1)
2166 		    {
2167 			// There are following bytes for the same character.
2168 			// We must find all bytes before trying
2169 			// delete/insert/swap/etc.
2170 			sp->ts_tcharlen = fl;
2171 			sp->ts_tcharidx = 1;
2172 			sp->ts_isdiff = DIFF_INSERT;
2173 		    }
2174 		}
2175 		else
2176 		    fl = 1;
2177 		if (fl == 1)
2178 		{
2179 		    // If the previous character was the same, thus doubling a
2180 		    // character, give a bonus to the score.  Also for
2181 		    // soundfold words (illogical but does give a better
2182 		    // score).
2183 		    if (sp->ts_twordlen >= 2
2184 					   && tword[sp->ts_twordlen - 2] == c)
2185 			sp->ts_score -= SCORE_INS - SCORE_INSDUP;
2186 		}
2187 	    }
2188 	    break;
2189 
2190 	case STATE_SWAP:
2191 	    // Swap two bytes in the bad word: "12" -> "21".
2192 	    // We change "fword" here, it's changed back afterwards at
2193 	    // STATE_UNSWAP.
2194 	    p = fword + sp->ts_fidx;
2195 	    c = *p;
2196 	    if (c == NUL)
2197 	    {
2198 		// End of word, can't swap or replace.
2199 		PROF_STORE(sp->ts_state)
2200 		sp->ts_state = STATE_FINAL;
2201 		break;
2202 	    }
2203 
2204 	    // Don't swap if the first character is not a word character.
2205 	    // SWAP3 etc. also don't make sense then.
2206 	    if (!soundfold && !spell_iswordp(p, curwin))
2207 	    {
2208 		PROF_STORE(sp->ts_state)
2209 		sp->ts_state = STATE_REP_INI;
2210 		break;
2211 	    }
2212 
2213 	    if (has_mbyte)
2214 	    {
2215 		n = MB_CPTR2LEN(p);
2216 		c = mb_ptr2char(p);
2217 		if (p[n] == NUL)
2218 		    c2 = NUL;
2219 		else if (!soundfold && !spell_iswordp(p + n, curwin))
2220 		    c2 = c; // don't swap non-word char
2221 		else
2222 		    c2 = mb_ptr2char(p + n);
2223 	    }
2224 	    else
2225 	    {
2226 		if (p[1] == NUL)
2227 		    c2 = NUL;
2228 		else if (!soundfold && !spell_iswordp(p + 1, curwin))
2229 		    c2 = c; // don't swap non-word char
2230 		else
2231 		    c2 = p[1];
2232 	    }
2233 
2234 	    // When the second character is NUL we can't swap.
2235 	    if (c2 == NUL)
2236 	    {
2237 		PROF_STORE(sp->ts_state)
2238 		sp->ts_state = STATE_REP_INI;
2239 		break;
2240 	    }
2241 
2242 	    // When characters are identical, swap won't do anything.
2243 	    // Also get here if the second char is not a word character.
2244 	    if (c == c2)
2245 	    {
2246 		PROF_STORE(sp->ts_state)
2247 		sp->ts_state = STATE_SWAP3;
2248 		break;
2249 	    }
2250 	    if (c2 != NUL && TRY_DEEPER(su, stack, depth, SCORE_SWAP))
2251 	    {
2252 		go_deeper(stack, depth, SCORE_SWAP);
2253 #ifdef DEBUG_TRIEWALK
2254 		sprintf(changename[depth], "%.*s-%s: swap %c and %c",
2255 			sp->ts_twordlen, tword, fword + sp->ts_fidx,
2256 			c, c2);
2257 #endif
2258 		PROF_STORE(sp->ts_state)
2259 		sp->ts_state = STATE_UNSWAP;
2260 		++depth;
2261 		if (has_mbyte)
2262 		{
2263 		    fl = mb_char2len(c2);
2264 		    mch_memmove(p, p + n, fl);
2265 		    mb_char2bytes(c, p + fl);
2266 		    stack[depth].ts_fidxtry = sp->ts_fidx + n + fl;
2267 		}
2268 		else
2269 		{
2270 		    p[0] = c2;
2271 		    p[1] = c;
2272 		    stack[depth].ts_fidxtry = sp->ts_fidx + 2;
2273 		}
2274 	    }
2275 	    else
2276 	    {
2277 		// If this swap doesn't work then SWAP3 won't either.
2278 		PROF_STORE(sp->ts_state)
2279 		sp->ts_state = STATE_REP_INI;
2280 	    }
2281 	    break;
2282 
2283 	case STATE_UNSWAP:
2284 	    // Undo the STATE_SWAP swap: "21" -> "12".
2285 	    p = fword + sp->ts_fidx;
2286 	    if (has_mbyte)
2287 	    {
2288 		n = mb_ptr2len(p);
2289 		c = mb_ptr2char(p + n);
2290 		mch_memmove(p + mb_ptr2len(p + n), p, n);
2291 		mb_char2bytes(c, p);
2292 	    }
2293 	    else
2294 	    {
2295 		c = *p;
2296 		*p = p[1];
2297 		p[1] = c;
2298 	    }
2299 	    // FALLTHROUGH
2300 
2301 	case STATE_SWAP3:
2302 	    // Swap two bytes, skipping one: "123" -> "321".  We change
2303 	    // "fword" here, it's changed back afterwards at STATE_UNSWAP3.
2304 	    p = fword + sp->ts_fidx;
2305 	    if (has_mbyte)
2306 	    {
2307 		n = MB_CPTR2LEN(p);
2308 		c = mb_ptr2char(p);
2309 		fl = MB_CPTR2LEN(p + n);
2310 		c2 = mb_ptr2char(p + n);
2311 		if (!soundfold && !spell_iswordp(p + n + fl, curwin))
2312 		    c3 = c;	// don't swap non-word char
2313 		else
2314 		    c3 = mb_ptr2char(p + n + fl);
2315 	    }
2316 	    else
2317 	    {
2318 		c = *p;
2319 		c2 = p[1];
2320 		if (!soundfold && !spell_iswordp(p + 2, curwin))
2321 		    c3 = c;	// don't swap non-word char
2322 		else
2323 		    c3 = p[2];
2324 	    }
2325 
2326 	    // When characters are identical: "121" then SWAP3 result is
2327 	    // identical, ROT3L result is same as SWAP: "211", ROT3L result is
2328 	    // same as SWAP on next char: "112".  Thus skip all swapping.
2329 	    // Also skip when c3 is NUL.
2330 	    // Also get here when the third character is not a word character.
2331 	    // Second character may any char: "a.b" -> "b.a"
2332 	    if (c == c3 || c3 == NUL)
2333 	    {
2334 		PROF_STORE(sp->ts_state)
2335 		sp->ts_state = STATE_REP_INI;
2336 		break;
2337 	    }
2338 	    if (TRY_DEEPER(su, stack, depth, SCORE_SWAP3))
2339 	    {
2340 		go_deeper(stack, depth, SCORE_SWAP3);
2341 #ifdef DEBUG_TRIEWALK
2342 		sprintf(changename[depth], "%.*s-%s: swap3 %c and %c",
2343 			sp->ts_twordlen, tword, fword + sp->ts_fidx,
2344 			c, c3);
2345 #endif
2346 		PROF_STORE(sp->ts_state)
2347 		sp->ts_state = STATE_UNSWAP3;
2348 		++depth;
2349 		if (has_mbyte)
2350 		{
2351 		    tl = mb_char2len(c3);
2352 		    mch_memmove(p, p + n + fl, tl);
2353 		    mb_char2bytes(c2, p + tl);
2354 		    mb_char2bytes(c, p + fl + tl);
2355 		    stack[depth].ts_fidxtry = sp->ts_fidx + n + fl + tl;
2356 		}
2357 		else
2358 		{
2359 		    p[0] = p[2];
2360 		    p[2] = c;
2361 		    stack[depth].ts_fidxtry = sp->ts_fidx + 3;
2362 		}
2363 	    }
2364 	    else
2365 	    {
2366 		PROF_STORE(sp->ts_state)
2367 		sp->ts_state = STATE_REP_INI;
2368 	    }
2369 	    break;
2370 
2371 	case STATE_UNSWAP3:
2372 	    // Undo STATE_SWAP3: "321" -> "123"
2373 	    p = fword + sp->ts_fidx;
2374 	    if (has_mbyte)
2375 	    {
2376 		n = mb_ptr2len(p);
2377 		c2 = mb_ptr2char(p + n);
2378 		fl = mb_ptr2len(p + n);
2379 		c = mb_ptr2char(p + n + fl);
2380 		tl = mb_ptr2len(p + n + fl);
2381 		mch_memmove(p + fl + tl, p, n);
2382 		mb_char2bytes(c, p);
2383 		mb_char2bytes(c2, p + tl);
2384 		p = p + tl;
2385 	    }
2386 	    else
2387 	    {
2388 		c = *p;
2389 		*p = p[2];
2390 		p[2] = c;
2391 		++p;
2392 	    }
2393 
2394 	    if (!soundfold && !spell_iswordp(p, curwin))
2395 	    {
2396 		// Middle char is not a word char, skip the rotate.  First and
2397 		// third char were already checked at swap and swap3.
2398 		PROF_STORE(sp->ts_state)
2399 		sp->ts_state = STATE_REP_INI;
2400 		break;
2401 	    }
2402 
2403 	    // Rotate three characters left: "123" -> "231".  We change
2404 	    // "fword" here, it's changed back afterwards at STATE_UNROT3L.
2405 	    if (TRY_DEEPER(su, stack, depth, SCORE_SWAP3))
2406 	    {
2407 		go_deeper(stack, depth, SCORE_SWAP3);
2408 #ifdef DEBUG_TRIEWALK
2409 		p = fword + sp->ts_fidx;
2410 		sprintf(changename[depth], "%.*s-%s: rotate left %c%c%c",
2411 			sp->ts_twordlen, tword, fword + sp->ts_fidx,
2412 			p[0], p[1], p[2]);
2413 #endif
2414 		PROF_STORE(sp->ts_state)
2415 		sp->ts_state = STATE_UNROT3L;
2416 		++depth;
2417 		p = fword + sp->ts_fidx;
2418 		if (has_mbyte)
2419 		{
2420 		    n = MB_CPTR2LEN(p);
2421 		    c = mb_ptr2char(p);
2422 		    fl = MB_CPTR2LEN(p + n);
2423 		    fl += MB_CPTR2LEN(p + n + fl);
2424 		    mch_memmove(p, p + n, fl);
2425 		    mb_char2bytes(c, p + fl);
2426 		    stack[depth].ts_fidxtry = sp->ts_fidx + n + fl;
2427 		}
2428 		else
2429 		{
2430 		    c = *p;
2431 		    *p = p[1];
2432 		    p[1] = p[2];
2433 		    p[2] = c;
2434 		    stack[depth].ts_fidxtry = sp->ts_fidx + 3;
2435 		}
2436 	    }
2437 	    else
2438 	    {
2439 		PROF_STORE(sp->ts_state)
2440 		sp->ts_state = STATE_REP_INI;
2441 	    }
2442 	    break;
2443 
2444 	case STATE_UNROT3L:
2445 	    // Undo ROT3L: "231" -> "123"
2446 	    p = fword + sp->ts_fidx;
2447 	    if (has_mbyte)
2448 	    {
2449 		n = mb_ptr2len(p);
2450 		n += mb_ptr2len(p + n);
2451 		c = mb_ptr2char(p + n);
2452 		tl = mb_ptr2len(p + n);
2453 		mch_memmove(p + tl, p, n);
2454 		mb_char2bytes(c, p);
2455 	    }
2456 	    else
2457 	    {
2458 		c = p[2];
2459 		p[2] = p[1];
2460 		p[1] = *p;
2461 		*p = c;
2462 	    }
2463 
2464 	    // Rotate three bytes right: "123" -> "312".  We change "fword"
2465 	    // here, it's changed back afterwards at STATE_UNROT3R.
2466 	    if (TRY_DEEPER(su, stack, depth, SCORE_SWAP3))
2467 	    {
2468 		go_deeper(stack, depth, SCORE_SWAP3);
2469 #ifdef DEBUG_TRIEWALK
2470 		p = fword + sp->ts_fidx;
2471 		sprintf(changename[depth], "%.*s-%s: rotate right %c%c%c",
2472 			sp->ts_twordlen, tword, fword + sp->ts_fidx,
2473 			p[0], p[1], p[2]);
2474 #endif
2475 		PROF_STORE(sp->ts_state)
2476 		sp->ts_state = STATE_UNROT3R;
2477 		++depth;
2478 		p = fword + sp->ts_fidx;
2479 		if (has_mbyte)
2480 		{
2481 		    n = MB_CPTR2LEN(p);
2482 		    n += MB_CPTR2LEN(p + n);
2483 		    c = mb_ptr2char(p + n);
2484 		    tl = MB_CPTR2LEN(p + n);
2485 		    mch_memmove(p + tl, p, n);
2486 		    mb_char2bytes(c, p);
2487 		    stack[depth].ts_fidxtry = sp->ts_fidx + n + tl;
2488 		}
2489 		else
2490 		{
2491 		    c = p[2];
2492 		    p[2] = p[1];
2493 		    p[1] = *p;
2494 		    *p = c;
2495 		    stack[depth].ts_fidxtry = sp->ts_fidx + 3;
2496 		}
2497 	    }
2498 	    else
2499 	    {
2500 		PROF_STORE(sp->ts_state)
2501 		sp->ts_state = STATE_REP_INI;
2502 	    }
2503 	    break;
2504 
2505 	case STATE_UNROT3R:
2506 	    // Undo ROT3R: "312" -> "123"
2507 	    p = fword + sp->ts_fidx;
2508 	    if (has_mbyte)
2509 	    {
2510 		c = mb_ptr2char(p);
2511 		tl = mb_ptr2len(p);
2512 		n = mb_ptr2len(p + tl);
2513 		n += mb_ptr2len(p + tl + n);
2514 		mch_memmove(p, p + tl, n);
2515 		mb_char2bytes(c, p + n);
2516 	    }
2517 	    else
2518 	    {
2519 		c = *p;
2520 		*p = p[1];
2521 		p[1] = p[2];
2522 		p[2] = c;
2523 	    }
2524 	    // FALLTHROUGH
2525 
2526 	case STATE_REP_INI:
2527 	    // Check if matching with REP items from the .aff file would work.
2528 	    // Quickly skip if:
2529 	    // - there are no REP items and we are not in the soundfold trie
2530 	    // - the score is going to be too high anyway
2531 	    // - already applied a REP item or swapped here
2532 	    if ((lp->lp_replang == NULL && !soundfold)
2533 		    || sp->ts_score + SCORE_REP >= su->su_maxscore
2534 		    || sp->ts_fidx < sp->ts_fidxtry)
2535 	    {
2536 		PROF_STORE(sp->ts_state)
2537 		sp->ts_state = STATE_FINAL;
2538 		break;
2539 	    }
2540 
2541 	    // Use the first byte to quickly find the first entry that may
2542 	    // match.  If the index is -1 there is none.
2543 	    if (soundfold)
2544 		sp->ts_curi = slang->sl_repsal_first[fword[sp->ts_fidx]];
2545 	    else
2546 		sp->ts_curi = lp->lp_replang->sl_rep_first[fword[sp->ts_fidx]];
2547 
2548 	    if (sp->ts_curi < 0)
2549 	    {
2550 		PROF_STORE(sp->ts_state)
2551 		sp->ts_state = STATE_FINAL;
2552 		break;
2553 	    }
2554 
2555 	    PROF_STORE(sp->ts_state)
2556 	    sp->ts_state = STATE_REP;
2557 	    // FALLTHROUGH
2558 
2559 	case STATE_REP:
2560 	    // Try matching with REP items from the .aff file.  For each match
2561 	    // replace the characters and check if the resulting word is
2562 	    // valid.
2563 	    p = fword + sp->ts_fidx;
2564 
2565 	    if (soundfold)
2566 		gap = &slang->sl_repsal;
2567 	    else
2568 		gap = &lp->lp_replang->sl_rep;
2569 	    while (sp->ts_curi < gap->ga_len)
2570 	    {
2571 		ftp = (fromto_T *)gap->ga_data + sp->ts_curi++;
2572 		if (*ftp->ft_from != *p)
2573 		{
2574 		    // past possible matching entries
2575 		    sp->ts_curi = gap->ga_len;
2576 		    break;
2577 		}
2578 		if (STRNCMP(ftp->ft_from, p, STRLEN(ftp->ft_from)) == 0
2579 			&& TRY_DEEPER(su, stack, depth, SCORE_REP))
2580 		{
2581 		    go_deeper(stack, depth, SCORE_REP);
2582 #ifdef DEBUG_TRIEWALK
2583 		    sprintf(changename[depth], "%.*s-%s: replace %s with %s",
2584 			    sp->ts_twordlen, tword, fword + sp->ts_fidx,
2585 			    ftp->ft_from, ftp->ft_to);
2586 #endif
2587 		    // Need to undo this afterwards.
2588 		    PROF_STORE(sp->ts_state)
2589 		    sp->ts_state = STATE_REP_UNDO;
2590 
2591 		    // Change the "from" to the "to" string.
2592 		    ++depth;
2593 		    fl = (int)STRLEN(ftp->ft_from);
2594 		    tl = (int)STRLEN(ftp->ft_to);
2595 		    if (fl != tl)
2596 		    {
2597 			STRMOVE(p + tl, p + fl);
2598 			repextra += tl - fl;
2599 		    }
2600 		    mch_memmove(p, ftp->ft_to, tl);
2601 		    stack[depth].ts_fidxtry = sp->ts_fidx + tl;
2602 		    stack[depth].ts_tcharlen = 0;
2603 		    break;
2604 		}
2605 	    }
2606 
2607 	    if (sp->ts_curi >= gap->ga_len && sp->ts_state == STATE_REP)
2608 	    {
2609 		// No (more) matches.
2610 		PROF_STORE(sp->ts_state)
2611 		sp->ts_state = STATE_FINAL;
2612 	    }
2613 
2614 	    break;
2615 
2616 	case STATE_REP_UNDO:
2617 	    // Undo a REP replacement and continue with the next one.
2618 	    if (soundfold)
2619 		gap = &slang->sl_repsal;
2620 	    else
2621 		gap = &lp->lp_replang->sl_rep;
2622 	    ftp = (fromto_T *)gap->ga_data + sp->ts_curi - 1;
2623 	    fl = (int)STRLEN(ftp->ft_from);
2624 	    tl = (int)STRLEN(ftp->ft_to);
2625 	    p = fword + sp->ts_fidx;
2626 	    if (fl != tl)
2627 	    {
2628 		STRMOVE(p + fl, p + tl);
2629 		repextra -= tl - fl;
2630 	    }
2631 	    mch_memmove(p, ftp->ft_from, fl);
2632 	    PROF_STORE(sp->ts_state)
2633 	    sp->ts_state = STATE_REP;
2634 	    break;
2635 
2636 	default:
2637 	    // Did all possible states at this level, go up one level.
2638 	    --depth;
2639 
2640 	    if (depth >= 0 && stack[depth].ts_prefixdepth == PFD_PREFIXTREE)
2641 	    {
2642 		// Continue in or go back to the prefix tree.
2643 		byts = pbyts;
2644 		idxs = pidxs;
2645 	    }
2646 
2647 	    // Don't check for CTRL-C too often, it takes time.
2648 	    if (--breakcheckcount == 0)
2649 	    {
2650 		ui_breakcheck();
2651 		breakcheckcount = 1000;
2652 	    }
2653 	}
2654     }
2655 }
2656 
2657 
2658 /*
2659  * Go one level deeper in the tree.
2660  */
2661     static void
go_deeper(trystate_T * stack,int depth,int score_add)2662 go_deeper(trystate_T *stack, int depth, int score_add)
2663 {
2664     stack[depth + 1] = stack[depth];
2665     stack[depth + 1].ts_state = STATE_START;
2666     stack[depth + 1].ts_score = stack[depth].ts_score + score_add;
2667     stack[depth + 1].ts_curi = 1;	// start just after length byte
2668     stack[depth + 1].ts_flags = 0;
2669 }
2670 
2671 /*
2672  * "fword" is a good word with case folded.  Find the matching keep-case
2673  * words and put it in "kword".
2674  * Theoretically there could be several keep-case words that result in the
2675  * same case-folded word, but we only find one...
2676  */
2677     static void
find_keepcap_word(slang_T * slang,char_u * fword,char_u * kword)2678 find_keepcap_word(slang_T *slang, char_u *fword, char_u *kword)
2679 {
2680     char_u	uword[MAXWLEN];		// "fword" in upper-case
2681     int		depth;
2682     idx_T	tryidx;
2683 
2684     // The following arrays are used at each depth in the tree.
2685     idx_T	arridx[MAXWLEN];
2686     int		round[MAXWLEN];
2687     int		fwordidx[MAXWLEN];
2688     int		uwordidx[MAXWLEN];
2689     int		kwordlen[MAXWLEN];
2690 
2691     int		flen, ulen;
2692     int		l;
2693     int		len;
2694     int		c;
2695     idx_T	lo, hi, m;
2696     char_u	*p;
2697     char_u	*byts = slang->sl_kbyts;    // array with bytes of the words
2698     idx_T	*idxs = slang->sl_kidxs;    // array with indexes
2699 
2700     if (byts == NULL)
2701     {
2702 	// array is empty: "cannot happen"
2703 	*kword = NUL;
2704 	return;
2705     }
2706 
2707     // Make an all-cap version of "fword".
2708     allcap_copy(fword, uword);
2709 
2710     // Each character needs to be tried both case-folded and upper-case.
2711     // All this gets very complicated if we keep in mind that changing case
2712     // may change the byte length of a multi-byte character...
2713     depth = 0;
2714     arridx[0] = 0;
2715     round[0] = 0;
2716     fwordidx[0] = 0;
2717     uwordidx[0] = 0;
2718     kwordlen[0] = 0;
2719     while (depth >= 0)
2720     {
2721 	if (fword[fwordidx[depth]] == NUL)
2722 	{
2723 	    // We are at the end of "fword".  If the tree allows a word to end
2724 	    // here we have found a match.
2725 	    if (byts[arridx[depth] + 1] == 0)
2726 	    {
2727 		kword[kwordlen[depth]] = NUL;
2728 		return;
2729 	    }
2730 
2731 	    // kword is getting too long, continue one level up
2732 	    --depth;
2733 	}
2734 	else if (++round[depth] > 2)
2735 	{
2736 	    // tried both fold-case and upper-case character, continue one
2737 	    // level up
2738 	    --depth;
2739 	}
2740 	else
2741 	{
2742 	    // round[depth] == 1: Try using the folded-case character.
2743 	    // round[depth] == 2: Try using the upper-case character.
2744 	    if (has_mbyte)
2745 	    {
2746 		flen = MB_CPTR2LEN(fword + fwordidx[depth]);
2747 		ulen = MB_CPTR2LEN(uword + uwordidx[depth]);
2748 	    }
2749 	    else
2750 		ulen = flen = 1;
2751 	    if (round[depth] == 1)
2752 	    {
2753 		p = fword + fwordidx[depth];
2754 		l = flen;
2755 	    }
2756 	    else
2757 	    {
2758 		p = uword + uwordidx[depth];
2759 		l = ulen;
2760 	    }
2761 
2762 	    for (tryidx = arridx[depth]; l > 0; --l)
2763 	    {
2764 		// Perform a binary search in the list of accepted bytes.
2765 		len = byts[tryidx++];
2766 		c = *p++;
2767 		lo = tryidx;
2768 		hi = tryidx + len - 1;
2769 		while (lo < hi)
2770 		{
2771 		    m = (lo + hi) / 2;
2772 		    if (byts[m] > c)
2773 			hi = m - 1;
2774 		    else if (byts[m] < c)
2775 			lo = m + 1;
2776 		    else
2777 		    {
2778 			lo = hi = m;
2779 			break;
2780 		    }
2781 		}
2782 
2783 		// Stop if there is no matching byte.
2784 		if (hi < lo || byts[lo] != c)
2785 		    break;
2786 
2787 		// Continue at the child (if there is one).
2788 		tryidx = idxs[lo];
2789 	    }
2790 
2791 	    if (l == 0)
2792 	    {
2793 		// Found the matching char.  Copy it to "kword" and go a
2794 		// level deeper.
2795 		if (round[depth] == 1)
2796 		{
2797 		    STRNCPY(kword + kwordlen[depth], fword + fwordidx[depth],
2798 									flen);
2799 		    kwordlen[depth + 1] = kwordlen[depth] + flen;
2800 		}
2801 		else
2802 		{
2803 		    STRNCPY(kword + kwordlen[depth], uword + uwordidx[depth],
2804 									ulen);
2805 		    kwordlen[depth + 1] = kwordlen[depth] + ulen;
2806 		}
2807 		fwordidx[depth + 1] = fwordidx[depth] + flen;
2808 		uwordidx[depth + 1] = uwordidx[depth] + ulen;
2809 
2810 		++depth;
2811 		arridx[depth] = tryidx;
2812 		round[depth] = 0;
2813 	    }
2814 	}
2815     }
2816 
2817     // Didn't find it: "cannot happen".
2818     *kword = NUL;
2819 }
2820 
2821 /*
2822  * Compute the sound-a-like score for suggestions in su->su_ga and add them to
2823  * su->su_sga.
2824  */
2825     static void
score_comp_sal(suginfo_T * su)2826 score_comp_sal(suginfo_T *su)
2827 {
2828     langp_T	*lp;
2829     char_u	badsound[MAXWLEN];
2830     int		i;
2831     suggest_T   *stp;
2832     suggest_T   *sstp;
2833     int		score;
2834     int		lpi;
2835 
2836     if (ga_grow(&su->su_sga, su->su_ga.ga_len) == FAIL)
2837 	return;
2838 
2839     // Use the sound-folding of the first language that supports it.
2840     for (lpi = 0; lpi < curwin->w_s->b_langp.ga_len; ++lpi)
2841     {
2842 	lp = LANGP_ENTRY(curwin->w_s->b_langp, lpi);
2843 	if (lp->lp_slang->sl_sal.ga_len > 0)
2844 	{
2845 	    // soundfold the bad word
2846 	    spell_soundfold(lp->lp_slang, su->su_fbadword, TRUE, badsound);
2847 
2848 	    for (i = 0; i < su->su_ga.ga_len; ++i)
2849 	    {
2850 		stp = &SUG(su->su_ga, i);
2851 
2852 		// Case-fold the suggested word, sound-fold it and compute the
2853 		// sound-a-like score.
2854 		score = stp_sal_score(stp, su, lp->lp_slang, badsound);
2855 		if (score < SCORE_MAXMAX)
2856 		{
2857 		    // Add the suggestion.
2858 		    sstp = &SUG(su->su_sga, su->su_sga.ga_len);
2859 		    sstp->st_word = vim_strsave(stp->st_word);
2860 		    if (sstp->st_word != NULL)
2861 		    {
2862 			sstp->st_wordlen = stp->st_wordlen;
2863 			sstp->st_score = score;
2864 			sstp->st_altscore = 0;
2865 			sstp->st_orglen = stp->st_orglen;
2866 			++su->su_sga.ga_len;
2867 		    }
2868 		}
2869 	    }
2870 	    break;
2871 	}
2872     }
2873 }
2874 
2875 /*
2876  * Combine the list of suggestions in su->su_ga and su->su_sga.
2877  * They are entwined.
2878  */
2879     static void
score_combine(suginfo_T * su)2880 score_combine(suginfo_T *su)
2881 {
2882     int		i;
2883     int		j;
2884     garray_T	ga;
2885     garray_T	*gap;
2886     langp_T	*lp;
2887     suggest_T	*stp;
2888     char_u	*p;
2889     char_u	badsound[MAXWLEN];
2890     int		round;
2891     int		lpi;
2892     slang_T	*slang = NULL;
2893 
2894     // Add the alternate score to su_ga.
2895     for (lpi = 0; lpi < curwin->w_s->b_langp.ga_len; ++lpi)
2896     {
2897 	lp = LANGP_ENTRY(curwin->w_s->b_langp, lpi);
2898 	if (lp->lp_slang->sl_sal.ga_len > 0)
2899 	{
2900 	    // soundfold the bad word
2901 	    slang = lp->lp_slang;
2902 	    spell_soundfold(slang, su->su_fbadword, TRUE, badsound);
2903 
2904 	    for (i = 0; i < su->su_ga.ga_len; ++i)
2905 	    {
2906 		stp = &SUG(su->su_ga, i);
2907 		stp->st_altscore = stp_sal_score(stp, su, slang, badsound);
2908 		if (stp->st_altscore == SCORE_MAXMAX)
2909 		    stp->st_score = (stp->st_score * 3 + SCORE_BIG) / 4;
2910 		else
2911 		    stp->st_score = (stp->st_score * 3
2912 						  + stp->st_altscore) / 4;
2913 		stp->st_salscore = FALSE;
2914 	    }
2915 	    break;
2916 	}
2917     }
2918 
2919     if (slang == NULL)	// Using "double" without sound folding.
2920     {
2921 	(void)cleanup_suggestions(&su->su_ga, su->su_maxscore,
2922 							     su->su_maxcount);
2923 	return;
2924     }
2925 
2926     // Add the alternate score to su_sga.
2927     for (i = 0; i < su->su_sga.ga_len; ++i)
2928     {
2929 	stp = &SUG(su->su_sga, i);
2930 	stp->st_altscore = spell_edit_score(slang,
2931 						su->su_badword, stp->st_word);
2932 	if (stp->st_score == SCORE_MAXMAX)
2933 	    stp->st_score = (SCORE_BIG * 7 + stp->st_altscore) / 8;
2934 	else
2935 	    stp->st_score = (stp->st_score * 7 + stp->st_altscore) / 8;
2936 	stp->st_salscore = TRUE;
2937     }
2938 
2939     // Remove bad suggestions, sort the suggestions and truncate at "maxcount"
2940     // for both lists.
2941     check_suggestions(su, &su->su_ga);
2942     (void)cleanup_suggestions(&su->su_ga, su->su_maxscore, su->su_maxcount);
2943     check_suggestions(su, &su->su_sga);
2944     (void)cleanup_suggestions(&su->su_sga, su->su_maxscore, su->su_maxcount);
2945 
2946     ga_init2(&ga, (int)sizeof(suginfo_T), 1);
2947     if (ga_grow(&ga, su->su_ga.ga_len + su->su_sga.ga_len) == FAIL)
2948 	return;
2949 
2950     stp = &SUG(ga, 0);
2951     for (i = 0; i < su->su_ga.ga_len || i < su->su_sga.ga_len; ++i)
2952     {
2953 	// round 1: get a suggestion from su_ga
2954 	// round 2: get a suggestion from su_sga
2955 	for (round = 1; round <= 2; ++round)
2956 	{
2957 	    gap = round == 1 ? &su->su_ga : &su->su_sga;
2958 	    if (i < gap->ga_len)
2959 	    {
2960 		// Don't add a word if it's already there.
2961 		p = SUG(*gap, i).st_word;
2962 		for (j = 0; j < ga.ga_len; ++j)
2963 		    if (STRCMP(stp[j].st_word, p) == 0)
2964 			break;
2965 		if (j == ga.ga_len)
2966 		    stp[ga.ga_len++] = SUG(*gap, i);
2967 		else
2968 		    vim_free(p);
2969 	    }
2970 	}
2971     }
2972 
2973     ga_clear(&su->su_ga);
2974     ga_clear(&su->su_sga);
2975 
2976     // Truncate the list to the number of suggestions that will be displayed.
2977     if (ga.ga_len > su->su_maxcount)
2978     {
2979 	for (i = su->su_maxcount; i < ga.ga_len; ++i)
2980 	    vim_free(stp[i].st_word);
2981 	ga.ga_len = su->su_maxcount;
2982     }
2983 
2984     su->su_ga = ga;
2985 }
2986 
2987 /*
2988  * For the goodword in "stp" compute the soundalike score compared to the
2989  * badword.
2990  */
2991     static int
stp_sal_score(suggest_T * stp,suginfo_T * su,slang_T * slang,char_u * badsound)2992 stp_sal_score(
2993     suggest_T	*stp,
2994     suginfo_T	*su,
2995     slang_T	*slang,
2996     char_u	*badsound)	// sound-folded badword
2997 {
2998     char_u	*p;
2999     char_u	*pbad;
3000     char_u	*pgood;
3001     char_u	badsound2[MAXWLEN];
3002     char_u	fword[MAXWLEN];
3003     char_u	goodsound[MAXWLEN];
3004     char_u	goodword[MAXWLEN];
3005     int		lendiff;
3006 
3007     lendiff = (int)(su->su_badlen - stp->st_orglen);
3008     if (lendiff >= 0)
3009 	pbad = badsound;
3010     else
3011     {
3012 	// soundfold the bad word with more characters following
3013 	(void)spell_casefold(curwin,
3014 				su->su_badptr, stp->st_orglen, fword, MAXWLEN);
3015 
3016 	// When joining two words the sound often changes a lot.  E.g., "t he"
3017 	// sounds like "t h" while "the" sounds like "@".  Avoid that by
3018 	// removing the space.  Don't do it when the good word also contains a
3019 	// space.
3020 	if (VIM_ISWHITE(su->su_badptr[su->su_badlen])
3021 					 && *skiptowhite(stp->st_word) == NUL)
3022 	    for (p = fword; *(p = skiptowhite(p)) != NUL; )
3023 		STRMOVE(p, p + 1);
3024 
3025 	spell_soundfold(slang, fword, TRUE, badsound2);
3026 	pbad = badsound2;
3027     }
3028 
3029     if (lendiff > 0 && stp->st_wordlen + lendiff < MAXWLEN)
3030     {
3031 	// Add part of the bad word to the good word, so that we soundfold
3032 	// what replaces the bad word.
3033 	STRCPY(goodword, stp->st_word);
3034 	vim_strncpy(goodword + stp->st_wordlen,
3035 			    su->su_badptr + su->su_badlen - lendiff, lendiff);
3036 	pgood = goodword;
3037     }
3038     else
3039 	pgood = stp->st_word;
3040 
3041     // Sound-fold the word and compute the score for the difference.
3042     spell_soundfold(slang, pgood, FALSE, goodsound);
3043 
3044     return soundalike_score(goodsound, pbad);
3045 }
3046 
3047 // structure used to store soundfolded words that add_sound_suggest() has
3048 // handled already.
3049 typedef struct
3050 {
3051     short	sft_score;	// lowest score used
3052     char_u	sft_word[1];    // soundfolded word, actually longer
3053 } sftword_T;
3054 
3055 static sftword_T dumsft;
3056 #define HIKEY2SFT(p)  ((sftword_T *)(p - (dumsft.sft_word - (char_u *)&dumsft)))
3057 #define HI2SFT(hi)     HIKEY2SFT((hi)->hi_key)
3058 
3059 /*
3060  * Prepare for calling suggest_try_soundalike().
3061  */
3062     static void
suggest_try_soundalike_prep(void)3063 suggest_try_soundalike_prep(void)
3064 {
3065     langp_T	*lp;
3066     int		lpi;
3067     slang_T	*slang;
3068 
3069     // Do this for all languages that support sound folding and for which a
3070     // .sug file has been loaded.
3071     for (lpi = 0; lpi < curwin->w_s->b_langp.ga_len; ++lpi)
3072     {
3073 	lp = LANGP_ENTRY(curwin->w_s->b_langp, lpi);
3074 	slang = lp->lp_slang;
3075 	if (slang->sl_sal.ga_len > 0 && slang->sl_sbyts != NULL)
3076 	    // prepare the hashtable used by add_sound_suggest()
3077 	    hash_init(&slang->sl_sounddone);
3078     }
3079 }
3080 
3081 /*
3082  * Find suggestions by comparing the word in a sound-a-like form.
3083  * Note: This doesn't support postponed prefixes.
3084  */
3085     static void
suggest_try_soundalike(suginfo_T * su)3086 suggest_try_soundalike(suginfo_T *su)
3087 {
3088     char_u	salword[MAXWLEN];
3089     langp_T	*lp;
3090     int		lpi;
3091     slang_T	*slang;
3092 
3093     // Do this for all languages that support sound folding and for which a
3094     // .sug file has been loaded.
3095     for (lpi = 0; lpi < curwin->w_s->b_langp.ga_len; ++lpi)
3096     {
3097 	lp = LANGP_ENTRY(curwin->w_s->b_langp, lpi);
3098 	slang = lp->lp_slang;
3099 	if (slang->sl_sal.ga_len > 0 && slang->sl_sbyts != NULL)
3100 	{
3101 	    // soundfold the bad word
3102 	    spell_soundfold(slang, su->su_fbadword, TRUE, salword);
3103 
3104 	    // try all kinds of inserts/deletes/swaps/etc.
3105 	    // TODO: also soundfold the next words, so that we can try joining
3106 	    // and splitting
3107 #ifdef SUGGEST_PROFILE
3108 	prof_init();
3109 #endif
3110 	    suggest_trie_walk(su, lp, salword, TRUE);
3111 #ifdef SUGGEST_PROFILE
3112 	prof_report("soundalike");
3113 #endif
3114 	}
3115     }
3116 }
3117 
3118 /*
3119  * Finish up after calling suggest_try_soundalike().
3120  */
3121     static void
suggest_try_soundalike_finish(void)3122 suggest_try_soundalike_finish(void)
3123 {
3124     langp_T	*lp;
3125     int		lpi;
3126     slang_T	*slang;
3127     int		todo;
3128     hashitem_T	*hi;
3129 
3130     // Do this for all languages that support sound folding and for which a
3131     // .sug file has been loaded.
3132     for (lpi = 0; lpi < curwin->w_s->b_langp.ga_len; ++lpi)
3133     {
3134 	lp = LANGP_ENTRY(curwin->w_s->b_langp, lpi);
3135 	slang = lp->lp_slang;
3136 	if (slang->sl_sal.ga_len > 0 && slang->sl_sbyts != NULL)
3137 	{
3138 	    // Free the info about handled words.
3139 	    todo = (int)slang->sl_sounddone.ht_used;
3140 	    for (hi = slang->sl_sounddone.ht_array; todo > 0; ++hi)
3141 		if (!HASHITEM_EMPTY(hi))
3142 		{
3143 		    vim_free(HI2SFT(hi));
3144 		    --todo;
3145 		}
3146 
3147 	    // Clear the hashtable, it may also be used by another region.
3148 	    hash_clear(&slang->sl_sounddone);
3149 	    hash_init(&slang->sl_sounddone);
3150 	}
3151     }
3152 }
3153 
3154 /*
3155  * A match with a soundfolded word is found.  Add the good word(s) that
3156  * produce this soundfolded word.
3157  */
3158     static void
add_sound_suggest(suginfo_T * su,char_u * goodword,int score,langp_T * lp)3159 add_sound_suggest(
3160     suginfo_T	*su,
3161     char_u	*goodword,
3162     int		score,		// soundfold score
3163     langp_T	*lp)
3164 {
3165     slang_T	*slang = lp->lp_slang;	// language for sound folding
3166     int		sfwordnr;
3167     char_u	*nrline;
3168     int		orgnr;
3169     char_u	theword[MAXWLEN];
3170     int		i;
3171     int		wlen;
3172     char_u	*byts;
3173     idx_T	*idxs;
3174     int		n;
3175     int		wordcount;
3176     int		wc;
3177     int		goodscore;
3178     hash_T	hash;
3179     hashitem_T  *hi;
3180     sftword_T	*sft;
3181     int		bc, gc;
3182     int		limit;
3183 
3184     // It's very well possible that the same soundfold word is found several
3185     // times with different scores.  Since the following is quite slow only do
3186     // the words that have a better score than before.  Use a hashtable to
3187     // remember the words that have been done.
3188     hash = hash_hash(goodword);
3189     hi = hash_lookup(&slang->sl_sounddone, goodword, hash);
3190     if (HASHITEM_EMPTY(hi))
3191     {
3192 	sft = alloc(sizeof(sftword_T) + STRLEN(goodword));
3193 	if (sft != NULL)
3194 	{
3195 	    sft->sft_score = score;
3196 	    STRCPY(sft->sft_word, goodword);
3197 	    hash_add_item(&slang->sl_sounddone, hi, sft->sft_word, hash);
3198 	}
3199     }
3200     else
3201     {
3202 	sft = HI2SFT(hi);
3203 	if (score >= sft->sft_score)
3204 	    return;
3205 	sft->sft_score = score;
3206     }
3207 
3208     // Find the word nr in the soundfold tree.
3209     sfwordnr = soundfold_find(slang, goodword);
3210     if (sfwordnr < 0)
3211     {
3212 	internal_error("add_sound_suggest()");
3213 	return;
3214     }
3215 
3216     // go over the list of good words that produce this soundfold word
3217     nrline = ml_get_buf(slang->sl_sugbuf, (linenr_T)(sfwordnr + 1), FALSE);
3218     orgnr = 0;
3219     while (*nrline != NUL)
3220     {
3221 	// The wordnr was stored in a minimal nr of bytes as an offset to the
3222 	// previous wordnr.
3223 	orgnr += bytes2offset(&nrline);
3224 
3225 	byts = slang->sl_fbyts;
3226 	idxs = slang->sl_fidxs;
3227 
3228 	// Lookup the word "orgnr" one of the two tries.
3229 	n = 0;
3230 	wordcount = 0;
3231 	for (wlen = 0; wlen < MAXWLEN - 3; ++wlen)
3232 	{
3233 	    i = 1;
3234 	    if (wordcount == orgnr && byts[n + 1] == NUL)
3235 		break;	// found end of word
3236 
3237 	    if (byts[n + 1] == NUL)
3238 		++wordcount;
3239 
3240 	    // skip over the NUL bytes
3241 	    for ( ; byts[n + i] == NUL; ++i)
3242 		if (i > byts[n])	// safety check
3243 		{
3244 		    STRCPY(theword + wlen, "BAD");
3245 		    wlen += 3;
3246 		    goto badword;
3247 		}
3248 
3249 	    // One of the siblings must have the word.
3250 	    for ( ; i < byts[n]; ++i)
3251 	    {
3252 		wc = idxs[idxs[n + i]];	// nr of words under this byte
3253 		if (wordcount + wc > orgnr)
3254 		    break;
3255 		wordcount += wc;
3256 	    }
3257 
3258 	    theword[wlen] = byts[n + i];
3259 	    n = idxs[n + i];
3260 	}
3261 badword:
3262 	theword[wlen] = NUL;
3263 
3264 	// Go over the possible flags and regions.
3265 	for (; i <= byts[n] && byts[n + i] == NUL; ++i)
3266 	{
3267 	    char_u	cword[MAXWLEN];
3268 	    char_u	*p;
3269 	    int		flags = (int)idxs[n + i];
3270 
3271 	    // Skip words with the NOSUGGEST flag
3272 	    if (flags & WF_NOSUGGEST)
3273 		continue;
3274 
3275 	    if (flags & WF_KEEPCAP)
3276 	    {
3277 		// Must find the word in the keep-case tree.
3278 		find_keepcap_word(slang, theword, cword);
3279 		p = cword;
3280 	    }
3281 	    else
3282 	    {
3283 		flags |= su->su_badflags;
3284 		if ((flags & WF_CAPMASK) != 0)
3285 		{
3286 		    // Need to fix case according to "flags".
3287 		    make_case_word(theword, cword, flags);
3288 		    p = cword;
3289 		}
3290 		else
3291 		    p = theword;
3292 	    }
3293 
3294 	    // Add the suggestion.
3295 	    if (sps_flags & SPS_DOUBLE)
3296 	    {
3297 		// Add the suggestion if the score isn't too bad.
3298 		if (score <= su->su_maxscore)
3299 		    add_suggestion(su, &su->su_sga, p, su->su_badlen,
3300 					       score, 0, FALSE, slang, FALSE);
3301 	    }
3302 	    else
3303 	    {
3304 		// Add a penalty for words in another region.
3305 		if ((flags & WF_REGION)
3306 			    && (((unsigned)flags >> 16) & lp->lp_region) == 0)
3307 		    goodscore = SCORE_REGION;
3308 		else
3309 		    goodscore = 0;
3310 
3311 		// Add a small penalty for changing the first letter from
3312 		// lower to upper case.  Helps for "tath" -> "Kath", which is
3313 		// less common than "tath" -> "path".  Don't do it when the
3314 		// letter is the same, that has already been counted.
3315 		gc = PTR2CHAR(p);
3316 		if (SPELL_ISUPPER(gc))
3317 		{
3318 		    bc = PTR2CHAR(su->su_badword);
3319 		    if (!SPELL_ISUPPER(bc)
3320 				      && SPELL_TOFOLD(bc) != SPELL_TOFOLD(gc))
3321 			goodscore += SCORE_ICASE / 2;
3322 		}
3323 
3324 		// Compute the score for the good word.  This only does letter
3325 		// insert/delete/swap/replace.  REP items are not considered,
3326 		// which may make the score a bit higher.
3327 		// Use a limit for the score to make it work faster.  Use
3328 		// MAXSCORE(), because RESCORE() will change the score.
3329 		// If the limit is very high then the iterative method is
3330 		// inefficient, using an array is quicker.
3331 		limit = MAXSCORE(su->su_sfmaxscore - goodscore, score);
3332 		if (limit > SCORE_LIMITMAX)
3333 		    goodscore += spell_edit_score(slang, su->su_badword, p);
3334 		else
3335 		    goodscore += spell_edit_score_limit(slang, su->su_badword,
3336 								    p, limit);
3337 
3338 		// When going over the limit don't bother to do the rest.
3339 		if (goodscore < SCORE_MAXMAX)
3340 		{
3341 		    // Give a bonus to words seen before.
3342 		    goodscore = score_wordcount_adj(slang, goodscore, p, FALSE);
3343 
3344 		    // Add the suggestion if the score isn't too bad.
3345 		    goodscore = RESCORE(goodscore, score);
3346 		    if (goodscore <= su->su_sfmaxscore)
3347 			add_suggestion(su, &su->su_ga, p, su->su_badlen,
3348 					 goodscore, score, TRUE, slang, TRUE);
3349 		}
3350 	    }
3351 	}
3352 	// smsg("word %s (%d): %s (%d)", sftword, sftnr, theword, orgnr);
3353     }
3354 }
3355 
3356 /*
3357  * Find word "word" in fold-case tree for "slang" and return the word number.
3358  */
3359     static int
soundfold_find(slang_T * slang,char_u * word)3360 soundfold_find(slang_T *slang, char_u *word)
3361 {
3362     idx_T	arridx = 0;
3363     int		len;
3364     int		wlen = 0;
3365     int		c;
3366     char_u	*ptr = word;
3367     char_u	*byts;
3368     idx_T	*idxs;
3369     int		wordnr = 0;
3370 
3371     byts = slang->sl_sbyts;
3372     idxs = slang->sl_sidxs;
3373 
3374     for (;;)
3375     {
3376 	// First byte is the number of possible bytes.
3377 	len = byts[arridx++];
3378 
3379 	// If the first possible byte is a zero the word could end here.
3380 	// If the word ends we found the word.  If not skip the NUL bytes.
3381 	c = ptr[wlen];
3382 	if (byts[arridx] == NUL)
3383 	{
3384 	    if (c == NUL)
3385 		break;
3386 
3387 	    // Skip over the zeros, there can be several.
3388 	    while (len > 0 && byts[arridx] == NUL)
3389 	    {
3390 		++arridx;
3391 		--len;
3392 	    }
3393 	    if (len == 0)
3394 		return -1;    // no children, word should have ended here
3395 	    ++wordnr;
3396 	}
3397 
3398 	// If the word ends we didn't find it.
3399 	if (c == NUL)
3400 	    return -1;
3401 
3402 	// Perform a binary search in the list of accepted bytes.
3403 	if (c == TAB)	    // <Tab> is handled like <Space>
3404 	    c = ' ';
3405 	while (byts[arridx] < c)
3406 	{
3407 	    // The word count is in the first idxs[] entry of the child.
3408 	    wordnr += idxs[idxs[arridx]];
3409 	    ++arridx;
3410 	    if (--len == 0)	// end of the bytes, didn't find it
3411 		return -1;
3412 	}
3413 	if (byts[arridx] != c)	// didn't find the byte
3414 	    return -1;
3415 
3416 	// Continue at the child (if there is one).
3417 	arridx = idxs[arridx];
3418 	++wlen;
3419 
3420 	// One space in the good word may stand for several spaces in the
3421 	// checked word.
3422 	if (c == ' ')
3423 	    while (ptr[wlen] == ' ' || ptr[wlen] == TAB)
3424 		++wlen;
3425     }
3426 
3427     return wordnr;
3428 }
3429 
3430 /*
3431  * Return TRUE if "c1" and "c2" are similar characters according to the MAP
3432  * lines in the .aff file.
3433  */
3434     static int
similar_chars(slang_T * slang,int c1,int c2)3435 similar_chars(slang_T *slang, int c1, int c2)
3436 {
3437     int		m1, m2;
3438     char_u	buf[MB_MAXBYTES + 1];
3439     hashitem_T  *hi;
3440 
3441     if (c1 >= 256)
3442     {
3443 	buf[mb_char2bytes(c1, buf)] = 0;
3444 	hi = hash_find(&slang->sl_map_hash, buf);
3445 	if (HASHITEM_EMPTY(hi))
3446 	    m1 = 0;
3447 	else
3448 	    m1 = mb_ptr2char(hi->hi_key + STRLEN(hi->hi_key) + 1);
3449     }
3450     else
3451 	m1 = slang->sl_map_array[c1];
3452     if (m1 == 0)
3453 	return FALSE;
3454 
3455 
3456     if (c2 >= 256)
3457     {
3458 	buf[mb_char2bytes(c2, buf)] = 0;
3459 	hi = hash_find(&slang->sl_map_hash, buf);
3460 	if (HASHITEM_EMPTY(hi))
3461 	    m2 = 0;
3462 	else
3463 	    m2 = mb_ptr2char(hi->hi_key + STRLEN(hi->hi_key) + 1);
3464     }
3465     else
3466 	m2 = slang->sl_map_array[c2];
3467 
3468     return m1 == m2;
3469 }
3470 
3471 /*
3472  * Add a suggestion to the list of suggestions.
3473  * For a suggestion that is already in the list the lowest score is remembered.
3474  */
3475     static void
add_suggestion(suginfo_T * su,garray_T * gap,char_u * goodword,int badlenarg,int score,int altscore,int had_bonus,slang_T * slang,int maxsf)3476 add_suggestion(
3477     suginfo_T	*su,
3478     garray_T	*gap,		// either su_ga or su_sga
3479     char_u	*goodword,
3480     int		badlenarg,	// len of bad word replaced with "goodword"
3481     int		score,
3482     int		altscore,
3483     int		had_bonus,	// value for st_had_bonus
3484     slang_T	*slang,		// language for sound folding
3485     int		maxsf)		// su_maxscore applies to soundfold score,
3486 				// su_sfmaxscore to the total score.
3487 {
3488     int		goodlen;	// len of goodword changed
3489     int		badlen;		// len of bad word changed
3490     suggest_T   *stp;
3491     suggest_T   new_sug;
3492     int		i;
3493     char_u	*pgood, *pbad;
3494 
3495     // Minimize "badlen" for consistency.  Avoids that changing "the the" to
3496     // "thee the" is added next to changing the first "the" the "thee".
3497     pgood = goodword + STRLEN(goodword);
3498     pbad = su->su_badptr + badlenarg;
3499     for (;;)
3500     {
3501 	goodlen = (int)(pgood - goodword);
3502 	badlen = (int)(pbad - su->su_badptr);
3503 	if (goodlen <= 0 || badlen <= 0)
3504 	    break;
3505 	MB_PTR_BACK(goodword, pgood);
3506 	MB_PTR_BACK(su->su_badptr, pbad);
3507 	if (has_mbyte)
3508 	{
3509 	    if (mb_ptr2char(pgood) != mb_ptr2char(pbad))
3510 		break;
3511 	}
3512 	else if (*pgood != *pbad)
3513 		break;
3514     }
3515 
3516     if (badlen == 0 && goodlen == 0)
3517 	// goodword doesn't change anything; may happen for "the the" changing
3518 	// the first "the" to itself.
3519 	return;
3520 
3521     if (gap->ga_len == 0)
3522 	i = -1;
3523     else
3524     {
3525 	// Check if the word is already there.  Also check the length that is
3526 	// being replaced "thes," -> "these" is a different suggestion from
3527 	// "thes" -> "these".
3528 	stp = &SUG(*gap, 0);
3529 	for (i = gap->ga_len; --i >= 0; ++stp)
3530 	    if (stp->st_wordlen == goodlen
3531 		    && stp->st_orglen == badlen
3532 		    && STRNCMP(stp->st_word, goodword, goodlen) == 0)
3533 	    {
3534 		// Found it.  Remember the word with the lowest score.
3535 		if (stp->st_slang == NULL)
3536 		    stp->st_slang = slang;
3537 
3538 		new_sug.st_score = score;
3539 		new_sug.st_altscore = altscore;
3540 		new_sug.st_had_bonus = had_bonus;
3541 
3542 		if (stp->st_had_bonus != had_bonus)
3543 		{
3544 		    // Only one of the two had the soundalike score computed.
3545 		    // Need to do that for the other one now, otherwise the
3546 		    // scores can't be compared.  This happens because
3547 		    // suggest_try_change() doesn't compute the soundalike
3548 		    // word to keep it fast, while some special methods set
3549 		    // the soundalike score to zero.
3550 		    if (had_bonus)
3551 			rescore_one(su, stp);
3552 		    else
3553 		    {
3554 			new_sug.st_word = stp->st_word;
3555 			new_sug.st_wordlen = stp->st_wordlen;
3556 			new_sug.st_slang = stp->st_slang;
3557 			new_sug.st_orglen = badlen;
3558 			rescore_one(su, &new_sug);
3559 		    }
3560 		}
3561 
3562 		if (stp->st_score > new_sug.st_score)
3563 		{
3564 		    stp->st_score = new_sug.st_score;
3565 		    stp->st_altscore = new_sug.st_altscore;
3566 		    stp->st_had_bonus = new_sug.st_had_bonus;
3567 		}
3568 		break;
3569 	    }
3570     }
3571 
3572     if (i < 0 && ga_grow(gap, 1) == OK)
3573     {
3574 	// Add a suggestion.
3575 	stp = &SUG(*gap, gap->ga_len);
3576 	stp->st_word = vim_strnsave(goodword, goodlen);
3577 	if (stp->st_word != NULL)
3578 	{
3579 	    stp->st_wordlen = goodlen;
3580 	    stp->st_score = score;
3581 	    stp->st_altscore = altscore;
3582 	    stp->st_had_bonus = had_bonus;
3583 	    stp->st_orglen = badlen;
3584 	    stp->st_slang = slang;
3585 	    ++gap->ga_len;
3586 
3587 	    // If we have too many suggestions now, sort the list and keep
3588 	    // the best suggestions.
3589 	    if (gap->ga_len > SUG_MAX_COUNT(su))
3590 	    {
3591 		if (maxsf)
3592 		    su->su_sfmaxscore = cleanup_suggestions(gap,
3593 				      su->su_sfmaxscore, SUG_CLEAN_COUNT(su));
3594 		else
3595 		    su->su_maxscore = cleanup_suggestions(gap,
3596 					su->su_maxscore, SUG_CLEAN_COUNT(su));
3597 	    }
3598 	}
3599     }
3600 }
3601 
3602 /*
3603  * Suggestions may in fact be flagged as errors.  Esp. for banned words and
3604  * for split words, such as "the the".  Remove these from the list here.
3605  */
3606     static void
check_suggestions(suginfo_T * su,garray_T * gap)3607 check_suggestions(
3608     suginfo_T	*su,
3609     garray_T	*gap)		    // either su_ga or su_sga
3610 {
3611     suggest_T   *stp;
3612     int		i;
3613     char_u	longword[MAXWLEN + 1];
3614     int		len;
3615     hlf_T	attr;
3616 
3617     if (gap->ga_len == 0)
3618 	return;
3619     stp = &SUG(*gap, 0);
3620     for (i = gap->ga_len - 1; i >= 0; --i)
3621     {
3622 	// Need to append what follows to check for "the the".
3623 	vim_strncpy(longword, stp[i].st_word, MAXWLEN);
3624 	len = stp[i].st_wordlen;
3625 	vim_strncpy(longword + len, su->su_badptr + stp[i].st_orglen,
3626 							       MAXWLEN - len);
3627 	attr = HLF_COUNT;
3628 	(void)spell_check(curwin, longword, &attr, NULL, FALSE);
3629 	if (attr != HLF_COUNT)
3630 	{
3631 	    // Remove this entry.
3632 	    vim_free(stp[i].st_word);
3633 	    --gap->ga_len;
3634 	    if (i < gap->ga_len)
3635 		mch_memmove(stp + i, stp + i + 1,
3636 				       sizeof(suggest_T) * (gap->ga_len - i));
3637 	}
3638     }
3639 }
3640 
3641 
3642 /*
3643  * Add a word to be banned.
3644  */
3645     static void
add_banned(suginfo_T * su,char_u * word)3646 add_banned(
3647     suginfo_T	*su,
3648     char_u	*word)
3649 {
3650     char_u	*s;
3651     hash_T	hash;
3652     hashitem_T	*hi;
3653 
3654     hash = hash_hash(word);
3655     hi = hash_lookup(&su->su_banned, word, hash);
3656     if (HASHITEM_EMPTY(hi))
3657     {
3658 	s = vim_strsave(word);
3659 	if (s != NULL)
3660 	    hash_add_item(&su->su_banned, hi, s, hash);
3661     }
3662 }
3663 
3664 /*
3665  * Recompute the score for all suggestions if sound-folding is possible.  This
3666  * is slow, thus only done for the final results.
3667  */
3668     static void
rescore_suggestions(suginfo_T * su)3669 rescore_suggestions(suginfo_T *su)
3670 {
3671     int		i;
3672 
3673     if (su->su_sallang != NULL)
3674 	for (i = 0; i < su->su_ga.ga_len; ++i)
3675 	    rescore_one(su, &SUG(su->su_ga, i));
3676 }
3677 
3678 /*
3679  * Recompute the score for one suggestion if sound-folding is possible.
3680  */
3681     static void
rescore_one(suginfo_T * su,suggest_T * stp)3682 rescore_one(suginfo_T *su, suggest_T *stp)
3683 {
3684     slang_T	*slang = stp->st_slang;
3685     char_u	sal_badword[MAXWLEN];
3686     char_u	*p;
3687 
3688     // Only rescore suggestions that have no sal score yet and do have a
3689     // language.
3690     if (slang != NULL && slang->sl_sal.ga_len > 0 && !stp->st_had_bonus)
3691     {
3692 	if (slang == su->su_sallang)
3693 	    p = su->su_sal_badword;
3694 	else
3695 	{
3696 	    spell_soundfold(slang, su->su_fbadword, TRUE, sal_badword);
3697 	    p = sal_badword;
3698 	}
3699 
3700 	stp->st_altscore = stp_sal_score(stp, su, slang, p);
3701 	if (stp->st_altscore == SCORE_MAXMAX)
3702 	    stp->st_altscore = SCORE_BIG;
3703 	stp->st_score = RESCORE(stp->st_score, stp->st_altscore);
3704 	stp->st_had_bonus = TRUE;
3705     }
3706 }
3707 
3708 static int sug_compare(const void *s1, const void *s2);
3709 
3710 /*
3711  * Function given to qsort() to sort the suggestions on st_score.
3712  * First on "st_score", then "st_altscore" then alphabetically.
3713  */
3714     static int
sug_compare(const void * s1,const void * s2)3715 sug_compare(const void *s1, const void *s2)
3716 {
3717     suggest_T	*p1 = (suggest_T *)s1;
3718     suggest_T	*p2 = (suggest_T *)s2;
3719     int		n = p1->st_score - p2->st_score;
3720 
3721     if (n == 0)
3722     {
3723 	n = p1->st_altscore - p2->st_altscore;
3724 	if (n == 0)
3725 	    n = STRICMP(p1->st_word, p2->st_word);
3726     }
3727     return n;
3728 }
3729 
3730 /*
3731  * Cleanup the suggestions:
3732  * - Sort on score.
3733  * - Remove words that won't be displayed.
3734  * Returns the maximum score in the list or "maxscore" unmodified.
3735  */
3736     static int
cleanup_suggestions(garray_T * gap,int maxscore,int keep)3737 cleanup_suggestions(
3738     garray_T	*gap,
3739     int		maxscore,
3740     int		keep)		// nr of suggestions to keep
3741 {
3742     if (gap->ga_len > 0)
3743     {
3744 	// Sort the list.
3745 	qsort(gap->ga_data, (size_t)gap->ga_len, sizeof(suggest_T),
3746 								  sug_compare);
3747 
3748 	// Truncate the list to the number of suggestions that will be
3749 	// displayed.
3750 	if (gap->ga_len > keep)
3751 	{
3752 	    int		i;
3753 	    suggest_T   *stp = &SUG(*gap, 0);
3754 
3755 	    for (i = keep; i < gap->ga_len; ++i)
3756 		vim_free(stp[i].st_word);
3757 	    gap->ga_len = keep;
3758 	    if (keep >= 1)
3759 		return stp[keep - 1].st_score;
3760 	}
3761     }
3762     return maxscore;
3763 }
3764 
3765 /*
3766  * Compute a score for two sound-a-like words.
3767  * This permits up to two inserts/deletes/swaps/etc. to keep things fast.
3768  * Instead of a generic loop we write out the code.  That keeps it fast by
3769  * avoiding checks that will not be possible.
3770  */
3771     static int
soundalike_score(char_u * goodstart,char_u * badstart)3772 soundalike_score(
3773     char_u	*goodstart,	// sound-folded good word
3774     char_u	*badstart)	// sound-folded bad word
3775 {
3776     char_u	*goodsound = goodstart;
3777     char_u	*badsound = badstart;
3778     int		goodlen;
3779     int		badlen;
3780     int		n;
3781     char_u	*pl, *ps;
3782     char_u	*pl2, *ps2;
3783     int		score = 0;
3784 
3785     // Adding/inserting "*" at the start (word starts with vowel) shouldn't be
3786     // counted so much, vowels halfway the word aren't counted at all.
3787     if ((*badsound == '*' || *goodsound == '*') && *badsound != *goodsound)
3788     {
3789 	if ((badsound[0] == NUL && goodsound[1] == NUL)
3790 	    || (goodsound[0] == NUL && badsound[1] == NUL))
3791 	    // changing word with vowel to word without a sound
3792 	    return SCORE_DEL;
3793 	if (badsound[0] == NUL || goodsound[0] == NUL)
3794 	    // more than two changes
3795 	    return SCORE_MAXMAX;
3796 
3797 	if (badsound[1] == goodsound[1]
3798 		|| (badsound[1] != NUL
3799 		    && goodsound[1] != NUL
3800 		    && badsound[2] == goodsound[2]))
3801 	{
3802 	    // handle like a substitute
3803 	}
3804 	else
3805 	{
3806 	    score = 2 * SCORE_DEL / 3;
3807 	    if (*badsound == '*')
3808 		++badsound;
3809 	    else
3810 		++goodsound;
3811 	}
3812     }
3813 
3814     goodlen = (int)STRLEN(goodsound);
3815     badlen = (int)STRLEN(badsound);
3816 
3817     // Return quickly if the lengths are too different to be fixed by two
3818     // changes.
3819     n = goodlen - badlen;
3820     if (n < -2 || n > 2)
3821 	return SCORE_MAXMAX;
3822 
3823     if (n > 0)
3824     {
3825 	pl = goodsound;	    // goodsound is longest
3826 	ps = badsound;
3827     }
3828     else
3829     {
3830 	pl = badsound;	    // badsound is longest
3831 	ps = goodsound;
3832     }
3833 
3834     // Skip over the identical part.
3835     while (*pl == *ps && *pl != NUL)
3836     {
3837 	++pl;
3838 	++ps;
3839     }
3840 
3841     switch (n)
3842     {
3843 	case -2:
3844 	case 2:
3845 	    // Must delete two characters from "pl".
3846 	    ++pl;	// first delete
3847 	    while (*pl == *ps)
3848 	    {
3849 		++pl;
3850 		++ps;
3851 	    }
3852 	    // strings must be equal after second delete
3853 	    if (STRCMP(pl + 1, ps) == 0)
3854 		return score + SCORE_DEL * 2;
3855 
3856 	    // Failed to compare.
3857 	    break;
3858 
3859 	case -1:
3860 	case 1:
3861 	    // Minimal one delete from "pl" required.
3862 
3863 	    // 1: delete
3864 	    pl2 = pl + 1;
3865 	    ps2 = ps;
3866 	    while (*pl2 == *ps2)
3867 	    {
3868 		if (*pl2 == NUL)	// reached the end
3869 		    return score + SCORE_DEL;
3870 		++pl2;
3871 		++ps2;
3872 	    }
3873 
3874 	    // 2: delete then swap, then rest must be equal
3875 	    if (pl2[0] == ps2[1] && pl2[1] == ps2[0]
3876 					     && STRCMP(pl2 + 2, ps2 + 2) == 0)
3877 		return score + SCORE_DEL + SCORE_SWAP;
3878 
3879 	    // 3: delete then substitute, then the rest must be equal
3880 	    if (STRCMP(pl2 + 1, ps2 + 1) == 0)
3881 		return score + SCORE_DEL + SCORE_SUBST;
3882 
3883 	    // 4: first swap then delete
3884 	    if (pl[0] == ps[1] && pl[1] == ps[0])
3885 	    {
3886 		pl2 = pl + 2;	    // swap, skip two chars
3887 		ps2 = ps + 2;
3888 		while (*pl2 == *ps2)
3889 		{
3890 		    ++pl2;
3891 		    ++ps2;
3892 		}
3893 		// delete a char and then strings must be equal
3894 		if (STRCMP(pl2 + 1, ps2) == 0)
3895 		    return score + SCORE_SWAP + SCORE_DEL;
3896 	    }
3897 
3898 	    // 5: first substitute then delete
3899 	    pl2 = pl + 1;	    // substitute, skip one char
3900 	    ps2 = ps + 1;
3901 	    while (*pl2 == *ps2)
3902 	    {
3903 		++pl2;
3904 		++ps2;
3905 	    }
3906 	    // delete a char and then strings must be equal
3907 	    if (STRCMP(pl2 + 1, ps2) == 0)
3908 		return score + SCORE_SUBST + SCORE_DEL;
3909 
3910 	    // Failed to compare.
3911 	    break;
3912 
3913 	case 0:
3914 	    // Lengths are equal, thus changes must result in same length: An
3915 	    // insert is only possible in combination with a delete.
3916 	    // 1: check if for identical strings
3917 	    if (*pl == NUL)
3918 		return score;
3919 
3920 	    // 2: swap
3921 	    if (pl[0] == ps[1] && pl[1] == ps[0])
3922 	    {
3923 		pl2 = pl + 2;	    // swap, skip two chars
3924 		ps2 = ps + 2;
3925 		while (*pl2 == *ps2)
3926 		{
3927 		    if (*pl2 == NUL)	// reached the end
3928 			return score + SCORE_SWAP;
3929 		    ++pl2;
3930 		    ++ps2;
3931 		}
3932 		// 3: swap and swap again
3933 		if (pl2[0] == ps2[1] && pl2[1] == ps2[0]
3934 					     && STRCMP(pl2 + 2, ps2 + 2) == 0)
3935 		    return score + SCORE_SWAP + SCORE_SWAP;
3936 
3937 		// 4: swap and substitute
3938 		if (STRCMP(pl2 + 1, ps2 + 1) == 0)
3939 		    return score + SCORE_SWAP + SCORE_SUBST;
3940 	    }
3941 
3942 	    // 5: substitute
3943 	    pl2 = pl + 1;
3944 	    ps2 = ps + 1;
3945 	    while (*pl2 == *ps2)
3946 	    {
3947 		if (*pl2 == NUL)	// reached the end
3948 		    return score + SCORE_SUBST;
3949 		++pl2;
3950 		++ps2;
3951 	    }
3952 
3953 	    // 6: substitute and swap
3954 	    if (pl2[0] == ps2[1] && pl2[1] == ps2[0]
3955 					     && STRCMP(pl2 + 2, ps2 + 2) == 0)
3956 		return score + SCORE_SUBST + SCORE_SWAP;
3957 
3958 	    // 7: substitute and substitute
3959 	    if (STRCMP(pl2 + 1, ps2 + 1) == 0)
3960 		return score + SCORE_SUBST + SCORE_SUBST;
3961 
3962 	    // 8: insert then delete
3963 	    pl2 = pl;
3964 	    ps2 = ps + 1;
3965 	    while (*pl2 == *ps2)
3966 	    {
3967 		++pl2;
3968 		++ps2;
3969 	    }
3970 	    if (STRCMP(pl2 + 1, ps2) == 0)
3971 		return score + SCORE_INS + SCORE_DEL;
3972 
3973 	    // 9: delete then insert
3974 	    pl2 = pl + 1;
3975 	    ps2 = ps;
3976 	    while (*pl2 == *ps2)
3977 	    {
3978 		++pl2;
3979 		++ps2;
3980 	    }
3981 	    if (STRCMP(pl2, ps2 + 1) == 0)
3982 		return score + SCORE_INS + SCORE_DEL;
3983 
3984 	    // Failed to compare.
3985 	    break;
3986     }
3987 
3988     return SCORE_MAXMAX;
3989 }
3990 
3991 /*
3992  * Compute the "edit distance" to turn "badword" into "goodword".  The less
3993  * deletes/inserts/substitutes/swaps are required the lower the score.
3994  *
3995  * The algorithm is described by Du and Chang, 1992.
3996  * The implementation of the algorithm comes from Aspell editdist.cpp,
3997  * edit_distance().  It has been converted from C++ to C and modified to
3998  * support multi-byte characters.
3999  */
4000     static int
spell_edit_score(slang_T * slang,char_u * badword,char_u * goodword)4001 spell_edit_score(
4002     slang_T	*slang,
4003     char_u	*badword,
4004     char_u	*goodword)
4005 {
4006     int		*cnt;
4007     int		badlen, goodlen;	// lengths including NUL
4008     int		j, i;
4009     int		t;
4010     int		bc, gc;
4011     int		pbc, pgc;
4012     char_u	*p;
4013     int		wbadword[MAXWLEN];
4014     int		wgoodword[MAXWLEN];
4015 
4016     if (has_mbyte)
4017     {
4018 	// Get the characters from the multi-byte strings and put them in an
4019 	// int array for easy access.
4020 	for (p = badword, badlen = 0; *p != NUL; )
4021 	    wbadword[badlen++] = mb_cptr2char_adv(&p);
4022 	wbadword[badlen++] = 0;
4023 	for (p = goodword, goodlen = 0; *p != NUL; )
4024 	    wgoodword[goodlen++] = mb_cptr2char_adv(&p);
4025 	wgoodword[goodlen++] = 0;
4026     }
4027     else
4028     {
4029 	badlen = (int)STRLEN(badword) + 1;
4030 	goodlen = (int)STRLEN(goodword) + 1;
4031     }
4032 
4033     // We use "cnt" as an array: CNT(badword_idx, goodword_idx).
4034 #define CNT(a, b)   cnt[(a) + (b) * (badlen + 1)]
4035     cnt = ALLOC_MULT(int, (badlen + 1) * (goodlen + 1));
4036     if (cnt == NULL)
4037 	return 0;	// out of memory
4038 
4039     CNT(0, 0) = 0;
4040     for (j = 1; j <= goodlen; ++j)
4041 	CNT(0, j) = CNT(0, j - 1) + SCORE_INS;
4042 
4043     for (i = 1; i <= badlen; ++i)
4044     {
4045 	CNT(i, 0) = CNT(i - 1, 0) + SCORE_DEL;
4046 	for (j = 1; j <= goodlen; ++j)
4047 	{
4048 	    if (has_mbyte)
4049 	    {
4050 		bc = wbadword[i - 1];
4051 		gc = wgoodword[j - 1];
4052 	    }
4053 	    else
4054 	    {
4055 		bc = badword[i - 1];
4056 		gc = goodword[j - 1];
4057 	    }
4058 	    if (bc == gc)
4059 		CNT(i, j) = CNT(i - 1, j - 1);
4060 	    else
4061 	    {
4062 		// Use a better score when there is only a case difference.
4063 		if (SPELL_TOFOLD(bc) == SPELL_TOFOLD(gc))
4064 		    CNT(i, j) = SCORE_ICASE + CNT(i - 1, j - 1);
4065 		else
4066 		{
4067 		    // For a similar character use SCORE_SIMILAR.
4068 		    if (slang != NULL
4069 			    && slang->sl_has_map
4070 			    && similar_chars(slang, gc, bc))
4071 			CNT(i, j) = SCORE_SIMILAR + CNT(i - 1, j - 1);
4072 		    else
4073 			CNT(i, j) = SCORE_SUBST + CNT(i - 1, j - 1);
4074 		}
4075 
4076 		if (i > 1 && j > 1)
4077 		{
4078 		    if (has_mbyte)
4079 		    {
4080 			pbc = wbadword[i - 2];
4081 			pgc = wgoodword[j - 2];
4082 		    }
4083 		    else
4084 		    {
4085 			pbc = badword[i - 2];
4086 			pgc = goodword[j - 2];
4087 		    }
4088 		    if (bc == pgc && pbc == gc)
4089 		    {
4090 			t = SCORE_SWAP + CNT(i - 2, j - 2);
4091 			if (t < CNT(i, j))
4092 			    CNT(i, j) = t;
4093 		    }
4094 		}
4095 		t = SCORE_DEL + CNT(i - 1, j);
4096 		if (t < CNT(i, j))
4097 		    CNT(i, j) = t;
4098 		t = SCORE_INS + CNT(i, j - 1);
4099 		if (t < CNT(i, j))
4100 		    CNT(i, j) = t;
4101 	    }
4102 	}
4103     }
4104 
4105     i = CNT(badlen - 1, goodlen - 1);
4106     vim_free(cnt);
4107     return i;
4108 }
4109 
4110 typedef struct
4111 {
4112     int		badi;
4113     int		goodi;
4114     int		score;
4115 } limitscore_T;
4116 
4117 /*
4118  * Like spell_edit_score(), but with a limit on the score to make it faster.
4119  * May return SCORE_MAXMAX when the score is higher than "limit".
4120  *
4121  * This uses a stack for the edits still to be tried.
4122  * The idea comes from Aspell leditdist.cpp.  Rewritten in C and added support
4123  * for multi-byte characters.
4124  */
4125     static int
spell_edit_score_limit(slang_T * slang,char_u * badword,char_u * goodword,int limit)4126 spell_edit_score_limit(
4127     slang_T	*slang,
4128     char_u	*badword,
4129     char_u	*goodword,
4130     int		limit)
4131 {
4132     limitscore_T    stack[10];		// allow for over 3 * 2 edits
4133     int		    stackidx;
4134     int		    bi, gi;
4135     int		    bi2, gi2;
4136     int		    bc, gc;
4137     int		    score;
4138     int		    score_off;
4139     int		    minscore;
4140     int		    round;
4141 
4142     // Multi-byte characters require a bit more work, use a different function
4143     // to avoid testing "has_mbyte" quite often.
4144     if (has_mbyte)
4145 	return spell_edit_score_limit_w(slang, badword, goodword, limit);
4146 
4147     // The idea is to go from start to end over the words.  So long as
4148     // characters are equal just continue, this always gives the lowest score.
4149     // When there is a difference try several alternatives.  Each alternative
4150     // increases "score" for the edit distance.  Some of the alternatives are
4151     // pushed unto a stack and tried later, some are tried right away.  At the
4152     // end of the word the score for one alternative is known.  The lowest
4153     // possible score is stored in "minscore".
4154     stackidx = 0;
4155     bi = 0;
4156     gi = 0;
4157     score = 0;
4158     minscore = limit + 1;
4159 
4160     for (;;)
4161     {
4162 	// Skip over an equal part, score remains the same.
4163 	for (;;)
4164 	{
4165 	    bc = badword[bi];
4166 	    gc = goodword[gi];
4167 	    if (bc != gc)	// stop at a char that's different
4168 		break;
4169 	    if (bc == NUL)	// both words end
4170 	    {
4171 		if (score < minscore)
4172 		    minscore = score;
4173 		goto pop;	// do next alternative
4174 	    }
4175 	    ++bi;
4176 	    ++gi;
4177 	}
4178 
4179 	if (gc == NUL)    // goodword ends, delete badword chars
4180 	{
4181 	    do
4182 	    {
4183 		if ((score += SCORE_DEL) >= minscore)
4184 		    goto pop;	    // do next alternative
4185 	    } while (badword[++bi] != NUL);
4186 	    minscore = score;
4187 	}
4188 	else if (bc == NUL) // badword ends, insert badword chars
4189 	{
4190 	    do
4191 	    {
4192 		if ((score += SCORE_INS) >= minscore)
4193 		    goto pop;	    // do next alternative
4194 	    } while (goodword[++gi] != NUL);
4195 	    minscore = score;
4196 	}
4197 	else			// both words continue
4198 	{
4199 	    // If not close to the limit, perform a change.  Only try changes
4200 	    // that may lead to a lower score than "minscore".
4201 	    // round 0: try deleting a char from badword
4202 	    // round 1: try inserting a char in badword
4203 	    for (round = 0; round <= 1; ++round)
4204 	    {
4205 		score_off = score + (round == 0 ? SCORE_DEL : SCORE_INS);
4206 		if (score_off < minscore)
4207 		{
4208 		    if (score_off + SCORE_EDIT_MIN >= minscore)
4209 		    {
4210 			// Near the limit, rest of the words must match.  We
4211 			// can check that right now, no need to push an item
4212 			// onto the stack.
4213 			bi2 = bi + 1 - round;
4214 			gi2 = gi + round;
4215 			while (goodword[gi2] == badword[bi2])
4216 			{
4217 			    if (goodword[gi2] == NUL)
4218 			    {
4219 				minscore = score_off;
4220 				break;
4221 			    }
4222 			    ++bi2;
4223 			    ++gi2;
4224 			}
4225 		    }
4226 		    else
4227 		    {
4228 			// try deleting/inserting a character later
4229 			stack[stackidx].badi = bi + 1 - round;
4230 			stack[stackidx].goodi = gi + round;
4231 			stack[stackidx].score = score_off;
4232 			++stackidx;
4233 		    }
4234 		}
4235 	    }
4236 
4237 	    if (score + SCORE_SWAP < minscore)
4238 	    {
4239 		// If swapping two characters makes a match then the
4240 		// substitution is more expensive, thus there is no need to
4241 		// try both.
4242 		if (gc == badword[bi + 1] && bc == goodword[gi + 1])
4243 		{
4244 		    // Swap two characters, that is: skip them.
4245 		    gi += 2;
4246 		    bi += 2;
4247 		    score += SCORE_SWAP;
4248 		    continue;
4249 		}
4250 	    }
4251 
4252 	    // Substitute one character for another which is the same
4253 	    // thing as deleting a character from both goodword and badword.
4254 	    // Use a better score when there is only a case difference.
4255 	    if (SPELL_TOFOLD(bc) == SPELL_TOFOLD(gc))
4256 		score += SCORE_ICASE;
4257 	    else
4258 	    {
4259 		// For a similar character use SCORE_SIMILAR.
4260 		if (slang != NULL
4261 			&& slang->sl_has_map
4262 			&& similar_chars(slang, gc, bc))
4263 		    score += SCORE_SIMILAR;
4264 		else
4265 		    score += SCORE_SUBST;
4266 	    }
4267 
4268 	    if (score < minscore)
4269 	    {
4270 		// Do the substitution.
4271 		++gi;
4272 		++bi;
4273 		continue;
4274 	    }
4275 	}
4276 pop:
4277 	// Get here to try the next alternative, pop it from the stack.
4278 	if (stackidx == 0)		// stack is empty, finished
4279 	    break;
4280 
4281 	// pop an item from the stack
4282 	--stackidx;
4283 	gi = stack[stackidx].goodi;
4284 	bi = stack[stackidx].badi;
4285 	score = stack[stackidx].score;
4286     }
4287 
4288     // When the score goes over "limit" it may actually be much higher.
4289     // Return a very large number to avoid going below the limit when giving a
4290     // bonus.
4291     if (minscore > limit)
4292 	return SCORE_MAXMAX;
4293     return minscore;
4294 }
4295 
4296 /*
4297  * Multi-byte version of spell_edit_score_limit().
4298  * Keep it in sync with the above!
4299  */
4300     static int
spell_edit_score_limit_w(slang_T * slang,char_u * badword,char_u * goodword,int limit)4301 spell_edit_score_limit_w(
4302     slang_T	*slang,
4303     char_u	*badword,
4304     char_u	*goodword,
4305     int		limit)
4306 {
4307     limitscore_T    stack[10];		// allow for over 3 * 2 edits
4308     int		    stackidx;
4309     int		    bi, gi;
4310     int		    bi2, gi2;
4311     int		    bc, gc;
4312     int		    score;
4313     int		    score_off;
4314     int		    minscore;
4315     int		    round;
4316     char_u	    *p;
4317     int		    wbadword[MAXWLEN];
4318     int		    wgoodword[MAXWLEN];
4319 
4320     // Get the characters from the multi-byte strings and put them in an
4321     // int array for easy access.
4322     bi = 0;
4323     for (p = badword; *p != NUL; )
4324 	wbadword[bi++] = mb_cptr2char_adv(&p);
4325     wbadword[bi++] = 0;
4326     gi = 0;
4327     for (p = goodword; *p != NUL; )
4328 	wgoodword[gi++] = mb_cptr2char_adv(&p);
4329     wgoodword[gi++] = 0;
4330 
4331     // The idea is to go from start to end over the words.  So long as
4332     // characters are equal just continue, this always gives the lowest score.
4333     // When there is a difference try several alternatives.  Each alternative
4334     // increases "score" for the edit distance.  Some of the alternatives are
4335     // pushed unto a stack and tried later, some are tried right away.  At the
4336     // end of the word the score for one alternative is known.  The lowest
4337     // possible score is stored in "minscore".
4338     stackidx = 0;
4339     bi = 0;
4340     gi = 0;
4341     score = 0;
4342     minscore = limit + 1;
4343 
4344     for (;;)
4345     {
4346 	// Skip over an equal part, score remains the same.
4347 	for (;;)
4348 	{
4349 	    bc = wbadword[bi];
4350 	    gc = wgoodword[gi];
4351 
4352 	    if (bc != gc)	// stop at a char that's different
4353 		break;
4354 	    if (bc == NUL)	// both words end
4355 	    {
4356 		if (score < minscore)
4357 		    minscore = score;
4358 		goto pop;	// do next alternative
4359 	    }
4360 	    ++bi;
4361 	    ++gi;
4362 	}
4363 
4364 	if (gc == NUL)    // goodword ends, delete badword chars
4365 	{
4366 	    do
4367 	    {
4368 		if ((score += SCORE_DEL) >= minscore)
4369 		    goto pop;	    // do next alternative
4370 	    } while (wbadword[++bi] != NUL);
4371 	    minscore = score;
4372 	}
4373 	else if (bc == NUL) // badword ends, insert badword chars
4374 	{
4375 	    do
4376 	    {
4377 		if ((score += SCORE_INS) >= minscore)
4378 		    goto pop;	    // do next alternative
4379 	    } while (wgoodword[++gi] != NUL);
4380 	    minscore = score;
4381 	}
4382 	else			// both words continue
4383 	{
4384 	    // If not close to the limit, perform a change.  Only try changes
4385 	    // that may lead to a lower score than "minscore".
4386 	    // round 0: try deleting a char from badword
4387 	    // round 1: try inserting a char in badword
4388 	    for (round = 0; round <= 1; ++round)
4389 	    {
4390 		score_off = score + (round == 0 ? SCORE_DEL : SCORE_INS);
4391 		if (score_off < minscore)
4392 		{
4393 		    if (score_off + SCORE_EDIT_MIN >= minscore)
4394 		    {
4395 			// Near the limit, rest of the words must match.  We
4396 			// can check that right now, no need to push an item
4397 			// onto the stack.
4398 			bi2 = bi + 1 - round;
4399 			gi2 = gi + round;
4400 			while (wgoodword[gi2] == wbadword[bi2])
4401 			{
4402 			    if (wgoodword[gi2] == NUL)
4403 			    {
4404 				minscore = score_off;
4405 				break;
4406 			    }
4407 			    ++bi2;
4408 			    ++gi2;
4409 			}
4410 		    }
4411 		    else
4412 		    {
4413 			// try deleting a character from badword later
4414 			stack[stackidx].badi = bi + 1 - round;
4415 			stack[stackidx].goodi = gi + round;
4416 			stack[stackidx].score = score_off;
4417 			++stackidx;
4418 		    }
4419 		}
4420 	    }
4421 
4422 	    if (score + SCORE_SWAP < minscore)
4423 	    {
4424 		// If swapping two characters makes a match then the
4425 		// substitution is more expensive, thus there is no need to
4426 		// try both.
4427 		if (gc == wbadword[bi + 1] && bc == wgoodword[gi + 1])
4428 		{
4429 		    // Swap two characters, that is: skip them.
4430 		    gi += 2;
4431 		    bi += 2;
4432 		    score += SCORE_SWAP;
4433 		    continue;
4434 		}
4435 	    }
4436 
4437 	    // Substitute one character for another which is the same
4438 	    // thing as deleting a character from both goodword and badword.
4439 	    // Use a better score when there is only a case difference.
4440 	    if (SPELL_TOFOLD(bc) == SPELL_TOFOLD(gc))
4441 		score += SCORE_ICASE;
4442 	    else
4443 	    {
4444 		// For a similar character use SCORE_SIMILAR.
4445 		if (slang != NULL
4446 			&& slang->sl_has_map
4447 			&& similar_chars(slang, gc, bc))
4448 		    score += SCORE_SIMILAR;
4449 		else
4450 		    score += SCORE_SUBST;
4451 	    }
4452 
4453 	    if (score < minscore)
4454 	    {
4455 		// Do the substitution.
4456 		++gi;
4457 		++bi;
4458 		continue;
4459 	    }
4460 	}
4461 pop:
4462 	// Get here to try the next alternative, pop it from the stack.
4463 	if (stackidx == 0)		// stack is empty, finished
4464 	    break;
4465 
4466 	// pop an item from the stack
4467 	--stackidx;
4468 	gi = stack[stackidx].goodi;
4469 	bi = stack[stackidx].badi;
4470 	score = stack[stackidx].score;
4471     }
4472 
4473     // When the score goes over "limit" it may actually be much higher.
4474     // Return a very large number to avoid going below the limit when giving a
4475     // bonus.
4476     if (minscore > limit)
4477 	return SCORE_MAXMAX;
4478     return minscore;
4479 }
4480 #endif  // FEAT_SPELL
4481