xref: /vim-8.2.3635/src/spell.c (revision 899dddf8)
1 /* vi:set ts=8 sts=4 sw=4:
2  *
3  * VIM - Vi IMproved	by Bram Moolenaar
4  *
5  * Do ":help uganda"  in Vim to read copying and usage conditions.
6  * Do ":help credits" in Vim to see a list of people who contributed.
7  * See README.txt for an overview of the Vim source code.
8  */
9 
10 /*
11  * spell.c: code for spell checking
12  *
13  * The spell checking mechanism uses a tree (aka trie).  Each node in the tree
14  * has a list of bytes that can appear (siblings).  For each byte there is a
15  * pointer to the node with the byte that follows in the word (child).
16  *
17  * A NUL byte is used where the word may end.  The bytes are sorted, so that
18  * binary searching can be used and the NUL bytes are at the start.  The
19  * number of possible bytes is stored before the list of bytes.
20  *
21  * The tree uses two arrays: "byts" stores the characters, "idxs" stores
22  * either the next index or flags.  The tree starts at index 0.  For example,
23  * to lookup "vi" this sequence is followed:
24  *	i = 0
25  *	len = byts[i]
26  *	n = where "v" appears in byts[i + 1] to byts[i + len]
27  *	i = idxs[n]
28  *	len = byts[i]
29  *	n = where "i" appears in byts[i + 1] to byts[i + len]
30  *	i = idxs[n]
31  *	len = byts[i]
32  *	find that byts[i + 1] is 0, idxs[i + 1] has flags for "vi".
33  *
34  * There are two word trees: one with case-folded words and one with words in
35  * original case.  The second one is only used for keep-case words and is
36  * usually small.
37  *
38  * There is one additional tree for when not all prefixes are applied when
39  * generating the .spl file.  This tree stores all the possible prefixes, as
40  * if they were words.  At each word (prefix) end the prefix nr is stored, the
41  * following word must support this prefix nr.  And the condition nr is
42  * stored, used to lookup the condition that the word must match with.
43  *
44  * Thanks to Olaf Seibert for providing an example implementation of this tree
45  * and the compression mechanism.
46  * LZ trie ideas:
47  *	http://www.irb.hr/hr/home/ristov/papers/RistovLZtrieRevision1.pdf
48  * More papers: http://www-igm.univ-mlv.fr/~laporte/publi_en.html
49  *
50  * Matching involves checking the caps type: Onecap ALLCAP KeepCap.
51  *
52  * Why doesn't Vim use aspell/ispell/myspell/etc.?
53  * See ":help develop-spell".
54  */
55 
56 /* Use SPELL_PRINTTREE for debugging: dump the word tree after adding a word.
57  * Only use it for small word lists! */
58 #if 0
59 # define SPELL_PRINTTREE
60 #endif
61 
62 /* Use DEBUG_TRIEWALK to print the changes made in suggest_trie_walk() for a
63  * specific word. */
64 #if 0
65 # define DEBUG_TRIEWALK
66 #endif
67 
68 /*
69  * Use this to adjust the score after finding suggestions, based on the
70  * suggested word sounding like the bad word.  This is much faster than doing
71  * it for every possible suggestion.
72  * Disadvantage: When "the" is typed as "hte" it sounds quite different ("@"
73  * vs "ht") and goes down in the list.
74  * Used when 'spellsuggest' is set to "best".
75  */
76 #define RESCORE(word_score, sound_score) ((3 * word_score + sound_score) / 4)
77 
78 /*
79  * Do the opposite: based on a maximum end score and a known sound score,
80  * compute the the maximum word score that can be used.
81  */
82 #define MAXSCORE(word_score, sound_score) ((4 * word_score - sound_score) / 3)
83 
84 /*
85  * Vim spell file format: <HEADER>
86  *			  <SECTIONS>
87  *			  <LWORDTREE>
88  *			  <KWORDTREE>
89  *			  <PREFIXTREE>
90  *
91  * <HEADER>: <fileID> <versionnr>
92  *
93  * <fileID>     8 bytes    "VIMspell"
94  * <versionnr>  1 byte	    VIMSPELLVERSION
95  *
96  *
97  * Sections make it possible to add information to the .spl file without
98  * making it incompatible with previous versions.  There are two kinds of
99  * sections:
100  * 1. Not essential for correct spell checking.  E.g. for making suggestions.
101  *    These are skipped when not supported.
102  * 2. Optional information, but essential for spell checking when present.
103  *    E.g. conditions for affixes.  When this section is present but not
104  *    supported an error message is given.
105  *
106  * <SECTIONS>: <section> ... <sectionend>
107  *
108  * <section>: <sectionID> <sectionflags> <sectionlen> (section contents)
109  *
110  * <sectionID>	  1 byte    number from 0 to 254 identifying the section
111  *
112  * <sectionflags> 1 byte    SNF_REQUIRED: this section is required for correct
113  *					    spell checking
114  *
115  * <sectionlen>   4 bytes   length of section contents, MSB first
116  *
117  * <sectionend>	  1 byte    SN_END
118  *
119  *
120  * sectionID == SN_INFO: <infotext>
121  * <infotext>	 N bytes    free format text with spell file info (version,
122  *			    website, etc)
123  *
124  * sectionID == SN_REGION: <regionname> ...
125  * <regionname>	 2 bytes    Up to 8 region names: ca, au, etc.  Lower case.
126  *			    First <regionname> is region 1.
127  *
128  * sectionID == SN_CHARFLAGS: <charflagslen> <charflags>
129  *				<folcharslen> <folchars>
130  * <charflagslen> 1 byte    Number of bytes in <charflags> (should be 128).
131  * <charflags>  N bytes     List of flags (first one is for character 128):
132  *			    0x01  word character	CF_WORD
133  *			    0x02  upper-case character	CF_UPPER
134  * <folcharslen>  2 bytes   Number of bytes in <folchars>.
135  * <folchars>     N bytes   Folded characters, first one is for character 128.
136  *
137  * sectionID == SN_MIDWORD: <midword>
138  * <midword>     N bytes    Characters that are word characters only when used
139  *			    in the middle of a word.
140  *
141  * sectionID == SN_PREFCOND: <prefcondcnt> <prefcond> ...
142  * <prefcondcnt> 2 bytes    Number of <prefcond> items following.
143  * <prefcond> : <condlen> <condstr>
144  * <condlen>	1 byte	    Length of <condstr>.
145  * <condstr>	N bytes	    Condition for the prefix.
146  *
147  * sectionID == SN_REP: <repcount> <rep> ...
148  * <repcount>	 2 bytes    number of <rep> items, MSB first.
149  * <rep> : <repfromlen> <repfrom> <reptolen> <repto>
150  * <repfromlen>	 1 byte	    length of <repfrom>
151  * <repfrom>	 N bytes    "from" part of replacement
152  * <reptolen>	 1 byte	    length of <repto>
153  * <repto>	 N bytes    "to" part of replacement
154  *
155  * sectionID == SN_REPSAL: <repcount> <rep> ...
156  *   just like SN_REP but for soundfolded words
157  *
158  * sectionID == SN_SAL: <salflags> <salcount> <sal> ...
159  * <salflags>	 1 byte	    flags for soundsalike conversion:
160  *			    SAL_F0LLOWUP
161  *			    SAL_COLLAPSE
162  *			    SAL_REM_ACCENTS
163  * <salcount>    2 bytes    number of <sal> items following
164  * <sal> : <salfromlen> <salfrom> <saltolen> <salto>
165  * <salfromlen>	 1 byte	    length of <salfrom>
166  * <salfrom>	 N bytes    "from" part of soundsalike
167  * <saltolen>	 1 byte	    length of <salto>
168  * <salto>	 N bytes    "to" part of soundsalike
169  *
170  * sectionID == SN_SOFO: <sofofromlen> <sofofrom> <sofotolen> <sofoto>
171  * <sofofromlen> 2 bytes    length of <sofofrom>
172  * <sofofrom>	 N bytes    "from" part of soundfold
173  * <sofotolen>	 2 bytes    length of <sofoto>
174  * <sofoto>	 N bytes    "to" part of soundfold
175  *
176  * sectionID == SN_SUGFILE: <timestamp>
177  * <timestamp>   8 bytes    time in seconds that must match with .sug file
178  *
179  * sectionID == SN_NOSPLITSUGS: nothing
180  *
181  * sectionID == SN_WORDS: <word> ...
182  * <word>	 N bytes    NUL terminated common word
183  *
184  * sectionID == SN_MAP: <mapstr>
185  * <mapstr>	 N bytes    String with sequences of similar characters,
186  *			    separated by slashes.
187  *
188  * sectionID == SN_COMPOUND: <compmax> <compminlen> <compsylmax> <compoptions>
189  *				<comppatcount> <comppattern> ... <compflags>
190  * <compmax>     1 byte	    Maximum nr of words in compound word.
191  * <compminlen>  1 byte	    Minimal word length for compounding.
192  * <compsylmax>  1 byte	    Maximum nr of syllables in compound word.
193  * <compoptions> 2 bytes    COMP_ flags.
194  * <comppatcount> 2 bytes   number of <comppattern> following
195  * <compflags>   N bytes    Flags from COMPOUNDRULE items, separated by
196  *			    slashes.
197  *
198  * <comppattern>: <comppatlen> <comppattext>
199  * <comppatlen>	 1 byte	    length of <comppattext>
200  * <comppattext> N bytes    end or begin chars from CHECKCOMPOUNDPATTERN
201  *
202  * sectionID == SN_NOBREAK: (empty, its presence is what matters)
203  *
204  * sectionID == SN_SYLLABLE: <syllable>
205  * <syllable>    N bytes    String from SYLLABLE item.
206  *
207  * <LWORDTREE>: <wordtree>
208  *
209  * <KWORDTREE>: <wordtree>
210  *
211  * <PREFIXTREE>: <wordtree>
212  *
213  *
214  * <wordtree>: <nodecount> <nodedata> ...
215  *
216  * <nodecount>	4 bytes	    Number of nodes following.  MSB first.
217  *
218  * <nodedata>: <siblingcount> <sibling> ...
219  *
220  * <siblingcount> 1 byte    Number of siblings in this node.  The siblings
221  *			    follow in sorted order.
222  *
223  * <sibling>: <byte> [ <nodeidx> <xbyte>
224  *		      | <flags> [<flags2>] [<region>] [<affixID>]
225  *		      | [<pflags>] <affixID> <prefcondnr> ]
226  *
227  * <byte>	1 byte	    Byte value of the sibling.  Special cases:
228  *			    BY_NOFLAGS: End of word without flags and for all
229  *					regions.
230  *					For PREFIXTREE <affixID> and
231  *					<prefcondnr> follow.
232  *			    BY_FLAGS:   End of word, <flags> follow.
233  *					For PREFIXTREE <pflags>, <affixID>
234  *					and <prefcondnr> follow.
235  *			    BY_FLAGS2:  End of word, <flags> and <flags2>
236  *					follow.  Not used in PREFIXTREE.
237  *			    BY_INDEX:   Child of sibling is shared, <nodeidx>
238  *					and <xbyte> follow.
239  *
240  * <nodeidx>	3 bytes	    Index of child for this sibling, MSB first.
241  *
242  * <xbyte>	1 byte	    byte value of the sibling.
243  *
244  * <flags>	1 byte	    bitmask of:
245  *			    WF_ALLCAP	word must have only capitals
246  *			    WF_ONECAP   first char of word must be capital
247  *			    WF_KEEPCAP	keep-case word
248  *			    WF_FIXCAP   keep-case word, all caps not allowed
249  *			    WF_RARE	rare word
250  *			    WF_BANNED	bad word
251  *			    WF_REGION	<region> follows
252  *			    WF_AFX	<affixID> follows
253  *
254  * <flags2>	1 byte	    Bitmask of:
255  *			    WF_HAS_AFF >> 8   word includes affix
256  *			    WF_NEEDCOMP >> 8  word only valid in compound
257  *			    WF_NOSUGGEST >> 8  word not used for suggestions
258  *			    WF_COMPROOT >> 8  word already a compound
259  *
260  * <pflags>	1 byte	    bitmask of:
261  *			    WFP_RARE	rare prefix
262  *			    WFP_NC	non-combining prefix
263  *			    WFP_UP	letter after prefix made upper case
264  *
265  * <region>	1 byte	    Bitmask for regions in which word is valid.  When
266  *			    omitted it's valid in all regions.
267  *			    Lowest bit is for region 1.
268  *
269  * <affixID>	1 byte	    ID of affix that can be used with this word.  In
270  *			    PREFIXTREE used for the required prefix ID.
271  *
272  * <prefcondnr>	2 bytes	    Prefix condition number, index in <prefcond> list
273  *			    from HEADER.
274  *
275  * All text characters are in 'encoding', but stored as single bytes.
276  */
277 
278 /*
279  * Vim .sug file format:  <SUGHEADER>
280  *			  <SUGWORDTREE>
281  *			  <SUGTABLE>
282  *
283  * <SUGHEADER>: <fileID> <versionnr> <timestamp>
284  *
285  * <fileID>     6 bytes     "VIMsug"
286  * <versionnr>  1 byte      VIMSUGVERSION
287  * <timestamp>  8 bytes     timestamp that must match with .spl file
288  *
289  *
290  * <SUGWORDTREE>: <wordtree>  (see above, no flags or region used)
291  *
292  *
293  * <SUGTABLE>: <sugwcount> <sugline> ...
294  *
295  * <sugwcount>	4 bytes	    number of <sugline> following
296  *
297  * <sugline>: <sugnr> ... NUL
298  *
299  * <sugnr>:     X bytes     word number that results in this soundfolded word,
300  *			    stored as an offset to the previous number in as
301  *			    few bytes as possible, see offset2bytes())
302  */
303 
304 #if defined(MSDOS) || defined(WIN16) || defined(WIN32) || defined(_WIN64)
305 # include "vimio.h"	/* for lseek(), must be before vim.h */
306 #endif
307 
308 #include "vim.h"
309 
310 #if defined(FEAT_SPELL) || defined(PROTO)
311 
312 #ifdef HAVE_FCNTL_H
313 # include <fcntl.h>
314 #endif
315 
316 #ifndef UNIX		/* it's in os_unix.h for Unix */
317 # include <time.h>	/* for time_t */
318 #endif
319 
320 #define MAXWLEN 250		/* Assume max. word len is this many bytes.
321 				   Some places assume a word length fits in a
322 				   byte, thus it can't be above 255. */
323 
324 /* Type used for indexes in the word tree need to be at least 4 bytes.  If int
325  * is 8 bytes we could use something smaller, but what? */
326 #if SIZEOF_INT > 3
327 typedef int idx_T;
328 #else
329 typedef long idx_T;
330 #endif
331 
332 /* Flags used for a word.  Only the lowest byte can be used, the region byte
333  * comes above it. */
334 #define WF_REGION   0x01	/* region byte follows */
335 #define WF_ONECAP   0x02	/* word with one capital (or all capitals) */
336 #define WF_ALLCAP   0x04	/* word must be all capitals */
337 #define WF_RARE	    0x08	/* rare word */
338 #define WF_BANNED   0x10	/* bad word */
339 #define WF_AFX	    0x20	/* affix ID follows */
340 #define WF_FIXCAP   0x40	/* keep-case word, allcap not allowed */
341 #define WF_KEEPCAP  0x80	/* keep-case word */
342 
343 /* for <flags2>, shifted up one byte to be used in wn_flags */
344 #define WF_HAS_AFF  0x0100	/* word includes affix */
345 #define WF_NEEDCOMP 0x0200	/* word only valid in compound */
346 #define WF_NOSUGGEST 0x0400	/* word not to be suggested */
347 #define WF_COMPROOT 0x0800	/* already compounded word, COMPOUNDROOT */
348 
349 /* only used for su_badflags */
350 #define WF_MIXCAP   0x20	/* mix of upper and lower case: macaRONI */
351 
352 #define WF_CAPMASK (WF_ONECAP | WF_ALLCAP | WF_KEEPCAP | WF_FIXCAP)
353 
354 /* flags for <pflags> */
355 #define WFP_RARE    0x01	/* rare prefix */
356 #define WFP_NC	    0x02	/* prefix is not combining */
357 #define WFP_UP	    0x04	/* to-upper prefix */
358 
359 /* Flags for postponed prefixes.  Must be above affixID (one byte)
360  * and prefcondnr (two bytes). */
361 #define WF_RAREPFX  (WFP_RARE << 24)	/* in sl_pidxs: flag for rare
362 					 * postponed prefix */
363 #define WF_PFX_NC   (WFP_NC << 24)	/* in sl_pidxs: flag for non-combining
364 					 * postponed prefix */
365 #define WF_PFX_UP   (WFP_UP << 24)	/* in sl_pidxs: flag for to-upper
366 					 * postponed prefix */
367 
368 /* flags for <compoptions> */
369 #define COMP_CHECKDUP		1	/* CHECKCOMPOUNDDUP */
370 #define COMP_CHECKREP		2	/* CHECKCOMPOUNDREP */
371 #define COMP_CHECKCASE		4	/* CHECKCOMPOUNDCASE */
372 #define COMP_CHECKTRIPLE	8	/* CHECKCOMPOUNDTRIPLE */
373 
374 /* Special byte values for <byte>.  Some are only used in the tree for
375  * postponed prefixes, some only in the other trees.  This is a bit messy... */
376 #define BY_NOFLAGS	0	/* end of word without flags or region; for
377 				 * postponed prefix: no <pflags> */
378 #define BY_INDEX	1	/* child is shared, index follows */
379 #define BY_FLAGS	2	/* end of word, <flags> byte follows; for
380 				 * postponed prefix: <pflags> follows */
381 #define BY_FLAGS2	3	/* end of word, <flags> and <flags2> bytes
382 				 * follow; never used in prefix tree */
383 #define BY_SPECIAL  BY_FLAGS2	/* highest special byte value */
384 
385 /* Info from "REP", "REPSAL" and "SAL" entries in ".aff" file used in si_rep,
386  * si_repsal, sl_rep, and si_sal.  Not for sl_sal!
387  * One replacement: from "ft_from" to "ft_to". */
388 typedef struct fromto_S
389 {
390     char_u	*ft_from;
391     char_u	*ft_to;
392 } fromto_T;
393 
394 /* Info from "SAL" entries in ".aff" file used in sl_sal.
395  * The info is split for quick processing by spell_soundfold().
396  * Note that "sm_oneof" and "sm_rules" point into sm_lead. */
397 typedef struct salitem_S
398 {
399     char_u	*sm_lead;	/* leading letters */
400     int		sm_leadlen;	/* length of "sm_lead" */
401     char_u	*sm_oneof;	/* letters from () or NULL */
402     char_u	*sm_rules;	/* rules like ^, $, priority */
403     char_u	*sm_to;		/* replacement. */
404 #ifdef FEAT_MBYTE
405     int		*sm_lead_w;	/* wide character copy of "sm_lead" */
406     int		*sm_oneof_w;	/* wide character copy of "sm_oneof" */
407     int		*sm_to_w;	/* wide character copy of "sm_to" */
408 #endif
409 } salitem_T;
410 
411 #ifdef FEAT_MBYTE
412 typedef int salfirst_T;
413 #else
414 typedef short salfirst_T;
415 #endif
416 
417 /* Values for SP_*ERROR are negative, positive values are used by
418  * read_cnt_string(). */
419 #define	SP_TRUNCERROR	-1	/* spell file truncated error */
420 #define	SP_FORMERROR	-2	/* format error in spell file */
421 #define SP_OTHERERROR	-3	/* other error while reading spell file */
422 
423 /*
424  * Structure used to store words and other info for one language, loaded from
425  * a .spl file.
426  * The main access is through the tree in "sl_fbyts/sl_fidxs", storing the
427  * case-folded words.  "sl_kbyts/sl_kidxs" is for keep-case words.
428  *
429  * The "byts" array stores the possible bytes in each tree node, preceded by
430  * the number of possible bytes, sorted on byte value:
431  *	<len> <byte1> <byte2> ...
432  * The "idxs" array stores the index of the child node corresponding to the
433  * byte in "byts".
434  * Exception: when the byte is zero, the word may end here and "idxs" holds
435  * the flags, region mask and affixID for the word.  There may be several
436  * zeros in sequence for alternative flag/region/affixID combinations.
437  */
438 typedef struct slang_S slang_T;
439 struct slang_S
440 {
441     slang_T	*sl_next;	/* next language */
442     char_u	*sl_name;	/* language name "en", "en.rare", "nl", etc. */
443     char_u	*sl_fname;	/* name of .spl file */
444     int		sl_add;		/* TRUE if it's a .add file. */
445 
446     char_u	*sl_fbyts;	/* case-folded word bytes */
447     idx_T	*sl_fidxs;	/* case-folded word indexes */
448     char_u	*sl_kbyts;	/* keep-case word bytes */
449     idx_T	*sl_kidxs;	/* keep-case word indexes */
450     char_u	*sl_pbyts;	/* prefix tree word bytes */
451     idx_T	*sl_pidxs;	/* prefix tree word indexes */
452 
453     char_u	*sl_info;	/* infotext string or NULL */
454 
455     char_u	sl_regions[17];	/* table with up to 8 region names plus NUL */
456 
457     char_u	*sl_midword;	/* MIDWORD string or NULL */
458 
459     hashtab_T	sl_wordcount;	/* hashtable with word count, wordcount_T */
460 
461     int		sl_compmax;	/* COMPOUNDWORDMAX (default: MAXWLEN) */
462     int		sl_compminlen;	/* COMPOUNDMIN (default: 0) */
463     int		sl_compsylmax;	/* COMPOUNDSYLMAX (default: MAXWLEN) */
464     int		sl_compoptions;	/* COMP_* flags */
465     garray_T	sl_comppat;	/* CHECKCOMPOUNDPATTERN items */
466     regprog_T	*sl_compprog;	/* COMPOUNDRULE turned into a regexp progrm
467 				 * (NULL when no compounding) */
468     char_u	*sl_compstartflags; /* flags for first compound word */
469     char_u	*sl_compallflags; /* all flags for compound words */
470     char_u	sl_nobreak;	/* When TRUE: no spaces between words */
471     char_u	*sl_syllable;	/* SYLLABLE repeatable chars or NULL */
472     garray_T	sl_syl_items;	/* syllable items */
473 
474     int		sl_prefixcnt;	/* number of items in "sl_prefprog" */
475     regprog_T	**sl_prefprog;	/* table with regprogs for prefixes */
476 
477     garray_T	sl_rep;		/* list of fromto_T entries from REP lines */
478     short	sl_rep_first[256];  /* indexes where byte first appears, -1 if
479 				       there is none */
480     garray_T	sl_sal;		/* list of salitem_T entries from SAL lines */
481     salfirst_T	sl_sal_first[256];  /* indexes where byte first appears, -1 if
482 				       there is none */
483     int		sl_followup;	/* SAL followup */
484     int		sl_collapse;	/* SAL collapse_result */
485     int		sl_rem_accents;	/* SAL remove_accents */
486     int		sl_sofo;	/* SOFOFROM and SOFOTO instead of SAL items:
487 				 * "sl_sal_first" maps chars, when has_mbyte
488 				 * "sl_sal" is a list of wide char lists. */
489     garray_T	sl_repsal;	/* list of fromto_T entries from REPSAL lines */
490     short	sl_repsal_first[256];  /* sl_rep_first for REPSAL lines */
491     int		sl_nosplitsugs;	/* don't suggest splitting a word */
492 
493     /* Info from the .sug file.  Loaded on demand. */
494     time_t	sl_sugtime;	/* timestamp for .sug file */
495     char_u	*sl_sbyts;	/* soundfolded word bytes */
496     idx_T	*sl_sidxs;	/* soundfolded word indexes */
497     buf_T	*sl_sugbuf;	/* buffer with word number table */
498     int		sl_sugloaded;	/* TRUE when .sug file was loaded or failed to
499 				   load */
500 
501     int		sl_has_map;	/* TRUE if there is a MAP line */
502 #ifdef FEAT_MBYTE
503     hashtab_T	sl_map_hash;	/* MAP for multi-byte chars */
504     int		sl_map_array[256]; /* MAP for first 256 chars */
505 #else
506     char_u	sl_map_array[256]; /* MAP for first 256 chars */
507 #endif
508     hashtab_T	sl_sounddone;	/* table with soundfolded words that have
509 				   handled, see add_sound_suggest() */
510 };
511 
512 /* First language that is loaded, start of the linked list of loaded
513  * languages. */
514 static slang_T *first_lang = NULL;
515 
516 /* Flags used in .spl file for soundsalike flags. */
517 #define SAL_F0LLOWUP		1
518 #define SAL_COLLAPSE		2
519 #define SAL_REM_ACCENTS		4
520 
521 /*
522  * Structure used in "b_langp", filled from 'spelllang'.
523  */
524 typedef struct langp_S
525 {
526     slang_T	*lp_slang;	/* info for this language */
527     slang_T	*lp_sallang;	/* language used for sound folding or NULL */
528     slang_T	*lp_replang;	/* language used for REP items or NULL */
529     int		lp_region;	/* bitmask for region or REGION_ALL */
530 } langp_T;
531 
532 #define LANGP_ENTRY(ga, i)	(((langp_T *)(ga).ga_data) + (i))
533 
534 #define REGION_ALL 0xff		/* word valid in all regions */
535 
536 #define VIMSPELLMAGIC "VIMspell"  /* string at start of Vim spell file */
537 #define VIMSPELLMAGICL 8
538 #define VIMSPELLVERSION 50
539 
540 #define VIMSUGMAGIC "VIMsug"	/* string at start of Vim .sug file */
541 #define VIMSUGMAGICL 6
542 #define VIMSUGVERSION 1
543 
544 /* Section IDs.  Only renumber them when VIMSPELLVERSION changes! */
545 #define SN_REGION	0	/* <regionname> section */
546 #define SN_CHARFLAGS	1	/* charflags section */
547 #define SN_MIDWORD	2	/* <midword> section */
548 #define SN_PREFCOND	3	/* <prefcond> section */
549 #define SN_REP		4	/* REP items section */
550 #define SN_SAL		5	/* SAL items section */
551 #define SN_SOFO		6	/* soundfolding section */
552 #define SN_MAP		7	/* MAP items section */
553 #define SN_COMPOUND	8	/* compound words section */
554 #define SN_SYLLABLE	9	/* syllable section */
555 #define SN_NOBREAK	10	/* NOBREAK section */
556 #define SN_SUGFILE	11	/* timestamp for .sug file */
557 #define SN_REPSAL	12	/* REPSAL items section */
558 #define SN_WORDS	13	/* common words */
559 #define SN_NOSPLITSUGS	14	/* don't split word for suggestions */
560 #define SN_INFO		15	/* info section */
561 #define SN_END		255	/* end of sections */
562 
563 #define SNF_REQUIRED	1	/* <sectionflags>: required section */
564 
565 /* Result values.  Lower number is accepted over higher one. */
566 #define SP_BANNED	-1
567 #define SP_OK		0
568 #define SP_RARE		1
569 #define SP_LOCAL	2
570 #define SP_BAD		3
571 
572 /* file used for "zG" and "zW" */
573 static char_u	*int_wordlist = NULL;
574 
575 typedef struct wordcount_S
576 {
577     short_u	wc_count;	    /* nr of times word was seen */
578     char_u	wc_word[1];	    /* word, actually longer */
579 } wordcount_T;
580 
581 static wordcount_T dumwc;
582 #define WC_KEY_OFF  (dumwc.wc_word - (char_u *)&dumwc)
583 #define HI2WC(hi)     ((wordcount_T *)((hi)->hi_key - WC_KEY_OFF))
584 #define MAXWORDCOUNT 0xffff
585 
586 /*
587  * Information used when looking for suggestions.
588  */
589 typedef struct suginfo_S
590 {
591     garray_T	su_ga;		    /* suggestions, contains "suggest_T" */
592     int		su_maxcount;	    /* max. number of suggestions displayed */
593     int		su_maxscore;	    /* maximum score for adding to su_ga */
594     int		su_sfmaxscore;	    /* idem, for when doing soundfold words */
595     garray_T	su_sga;		    /* like su_ga, sound-folded scoring */
596     char_u	*su_badptr;	    /* start of bad word in line */
597     int		su_badlen;	    /* length of detected bad word in line */
598     int		su_badflags;	    /* caps flags for bad word */
599     char_u	su_badword[MAXWLEN]; /* bad word truncated at su_badlen */
600     char_u	su_fbadword[MAXWLEN]; /* su_badword case-folded */
601     char_u	su_sal_badword[MAXWLEN]; /* su_badword soundfolded */
602     hashtab_T	su_banned;	    /* table with banned words */
603     slang_T	*su_sallang;	    /* default language for sound folding */
604 } suginfo_T;
605 
606 /* One word suggestion.  Used in "si_ga". */
607 typedef struct suggest_S
608 {
609     char_u	*st_word;	/* suggested word, allocated string */
610     int		st_wordlen;	/* STRLEN(st_word) */
611     int		st_orglen;	/* length of replaced text */
612     int		st_score;	/* lower is better */
613     int		st_altscore;	/* used when st_score compares equal */
614     int		st_salscore;	/* st_score is for soundalike */
615     int		st_had_bonus;	/* bonus already included in score */
616     slang_T	*st_slang;	/* language used for sound folding */
617 } suggest_T;
618 
619 #define SUG(ga, i) (((suggest_T *)(ga).ga_data)[i])
620 
621 /* TRUE if a word appears in the list of banned words.  */
622 #define WAS_BANNED(su, word) (!HASHITEM_EMPTY(hash_find(&su->su_banned, word)))
623 
624 /* Number of suggestions kept when cleaning up.  we need to keep more than
625  * what is displayed, because when rescore_suggestions() is called the score
626  * may change and wrong suggestions may be removed later. */
627 #define SUG_CLEAN_COUNT(su)    ((su)->su_maxcount < 130 ? 150 : (su)->su_maxcount + 20)
628 
629 /* Threshold for sorting and cleaning up suggestions.  Don't want to keep lots
630  * of suggestions that are not going to be displayed. */
631 #define SUG_MAX_COUNT(su)	(SUG_CLEAN_COUNT(su) + 50)
632 
633 /* score for various changes */
634 #define SCORE_SPLIT	149	/* split bad word */
635 #define SCORE_SPLIT_NO	249	/* split bad word with NOSPLITSUGS */
636 #define SCORE_ICASE	52	/* slightly different case */
637 #define SCORE_REGION	200	/* word is for different region */
638 #define SCORE_RARE	180	/* rare word */
639 #define SCORE_SWAP	75	/* swap two characters */
640 #define SCORE_SWAP3	110	/* swap two characters in three */
641 #define SCORE_REP	65	/* REP replacement */
642 #define SCORE_SUBST	93	/* substitute a character */
643 #define SCORE_SIMILAR	33	/* substitute a similar character */
644 #define SCORE_SUBCOMP	33	/* substitute a composing character */
645 #define SCORE_DEL	94	/* delete a character */
646 #define SCORE_DELDUP	66	/* delete a duplicated character */
647 #define SCORE_DELCOMP	28	/* delete a composing character */
648 #define SCORE_INS	96	/* insert a character */
649 #define SCORE_INSDUP	67	/* insert a duplicate character */
650 #define SCORE_INSCOMP	30	/* insert a composing character */
651 #define SCORE_NONWORD	103	/* change non-word to word char */
652 
653 #define SCORE_FILE	30	/* suggestion from a file */
654 #define SCORE_MAXINIT	350	/* Initial maximum score: higher == slower.
655 				 * 350 allows for about three changes. */
656 
657 #define SCORE_COMMON1	30	/* subtracted for words seen before */
658 #define SCORE_COMMON2	40	/* subtracted for words often seen */
659 #define SCORE_COMMON3	50	/* subtracted for words very often seen */
660 #define SCORE_THRES2	10	/* word count threshold for COMMON2 */
661 #define SCORE_THRES3	100	/* word count threshold for COMMON3 */
662 
663 /* When trying changed soundfold words it becomes slow when trying more than
664  * two changes.  With less then two changes it's slightly faster but we miss a
665  * few good suggestions.  In rare cases we need to try three of four changes.
666  */
667 #define SCORE_SFMAX1	200	/* maximum score for first try */
668 #define SCORE_SFMAX2	300	/* maximum score for second try */
669 #define SCORE_SFMAX3	400	/* maximum score for third try */
670 
671 #define SCORE_BIG	SCORE_INS * 3	/* big difference */
672 #define SCORE_MAXMAX	999999		/* accept any score */
673 #define SCORE_LIMITMAX	350		/* for spell_edit_score_limit() */
674 
675 /* for spell_edit_score_limit() we need to know the minimum value of
676  * SCORE_ICASE, SCORE_SWAP, SCORE_DEL, SCORE_SIMILAR and SCORE_INS */
677 #define SCORE_EDIT_MIN	SCORE_SIMILAR
678 
679 /*
680  * Structure to store info for word matching.
681  */
682 typedef struct matchinf_S
683 {
684     langp_T	*mi_lp;			/* info for language and region */
685 
686     /* pointers to original text to be checked */
687     char_u	*mi_word;		/* start of word being checked */
688     char_u	*mi_end;		/* end of matching word so far */
689     char_u	*mi_fend;		/* next char to be added to mi_fword */
690     char_u	*mi_cend;		/* char after what was used for
691 					   mi_capflags */
692 
693     /* case-folded text */
694     char_u	mi_fword[MAXWLEN + 1];	/* mi_word case-folded */
695     int		mi_fwordlen;		/* nr of valid bytes in mi_fword */
696 
697     /* for when checking word after a prefix */
698     int		mi_prefarridx;		/* index in sl_pidxs with list of
699 					   affixID/condition */
700     int		mi_prefcnt;		/* number of entries at mi_prefarridx */
701     int		mi_prefixlen;		/* byte length of prefix */
702 #ifdef FEAT_MBYTE
703     int		mi_cprefixlen;		/* byte length of prefix in original
704 					   case */
705 #else
706 # define mi_cprefixlen mi_prefixlen	/* it's the same value */
707 #endif
708 
709     /* for when checking a compound word */
710     int		mi_compoff;		/* start of following word offset */
711     char_u	mi_compflags[MAXWLEN];	/* flags for compound words used */
712     int		mi_complen;		/* nr of compound words used */
713     int		mi_compextra;		/* nr of COMPOUNDROOT words */
714 
715     /* others */
716     int		mi_result;		/* result so far: SP_BAD, SP_OK, etc. */
717     int		mi_capflags;		/* WF_ONECAP WF_ALLCAP WF_KEEPCAP */
718     buf_T	*mi_buf;		/* buffer being checked */
719 
720     /* for NOBREAK */
721     int		mi_result2;		/* "mi_resul" without following word */
722     char_u	*mi_end2;		/* "mi_end" without following word */
723 } matchinf_T;
724 
725 /*
726  * The tables used for recognizing word characters according to spelling.
727  * These are only used for the first 256 characters of 'encoding'.
728  */
729 typedef struct spelltab_S
730 {
731     char_u  st_isw[256];	/* flags: is word char */
732     char_u  st_isu[256];	/* flags: is uppercase char */
733     char_u  st_fold[256];	/* chars: folded case */
734     char_u  st_upper[256];	/* chars: upper case */
735 } spelltab_T;
736 
737 static spelltab_T   spelltab;
738 static int	    did_set_spelltab;
739 
740 #define CF_WORD		0x01
741 #define CF_UPPER	0x02
742 
743 static void clear_spell_chartab __ARGS((spelltab_T *sp));
744 static int set_spell_finish __ARGS((spelltab_T	*new_st));
745 static int spell_iswordp __ARGS((char_u *p, buf_T *buf));
746 static int spell_iswordp_nmw __ARGS((char_u *p));
747 #ifdef FEAT_MBYTE
748 static int spell_iswordp_w __ARGS((int *p, buf_T *buf));
749 #endif
750 static int write_spell_prefcond __ARGS((FILE *fd, garray_T *gap));
751 
752 /*
753  * For finding suggestions: At each node in the tree these states are tried:
754  */
755 typedef enum
756 {
757     STATE_START = 0,	/* At start of node check for NUL bytes (goodword
758 			 * ends); if badword ends there is a match, otherwise
759 			 * try splitting word. */
760     STATE_NOPREFIX,	/* try without prefix */
761     STATE_SPLITUNDO,	/* Undo splitting. */
762     STATE_ENDNUL,	/* Past NUL bytes at start of the node. */
763     STATE_PLAIN,	/* Use each byte of the node. */
764     STATE_DEL,		/* Delete a byte from the bad word. */
765     STATE_INS_PREP,	/* Prepare for inserting bytes. */
766     STATE_INS,		/* Insert a byte in the bad word. */
767     STATE_SWAP,		/* Swap two bytes. */
768     STATE_UNSWAP,	/* Undo swap two characters. */
769     STATE_SWAP3,	/* Swap two characters over three. */
770     STATE_UNSWAP3,	/* Undo Swap two characters over three. */
771     STATE_UNROT3L,	/* Undo rotate three characters left */
772     STATE_UNROT3R,	/* Undo rotate three characters right */
773     STATE_REP_INI,	/* Prepare for using REP items. */
774     STATE_REP,		/* Use matching REP items from the .aff file. */
775     STATE_REP_UNDO,	/* Undo a REP item replacement. */
776     STATE_FINAL		/* End of this node. */
777 } state_T;
778 
779 /*
780  * Struct to keep the state at each level in suggest_try_change().
781  */
782 typedef struct trystate_S
783 {
784     state_T	ts_state;	/* state at this level, STATE_ */
785     int		ts_score;	/* score */
786     idx_T	ts_arridx;	/* index in tree array, start of node */
787     short	ts_curi;	/* index in list of child nodes */
788     char_u	ts_fidx;	/* index in fword[], case-folded bad word */
789     char_u	ts_fidxtry;	/* ts_fidx at which bytes may be changed */
790     char_u	ts_twordlen;	/* valid length of tword[] */
791     char_u	ts_prefixdepth;	/* stack depth for end of prefix or
792 				 * PFD_PREFIXTREE or PFD_NOPREFIX */
793     char_u	ts_flags;	/* TSF_ flags */
794 #ifdef FEAT_MBYTE
795     char_u	ts_tcharlen;	/* number of bytes in tword character */
796     char_u	ts_tcharidx;	/* current byte index in tword character */
797     char_u	ts_isdiff;	/* DIFF_ values */
798     char_u	ts_fcharstart;	/* index in fword where badword char started */
799 #endif
800     char_u	ts_prewordlen;	/* length of word in "preword[]" */
801     char_u	ts_splitoff;	/* index in "tword" after last split */
802     char_u	ts_splitfidx;	/* "ts_fidx" at word split */
803     char_u	ts_complen;	/* nr of compound words used */
804     char_u	ts_compsplit;	/* index for "compflags" where word was spit */
805     char_u	ts_save_badflags;   /* su_badflags saved here */
806     char_u	ts_delidx;	/* index in fword for char that was deleted,
807 				   valid when "ts_flags" has TSF_DIDDEL */
808 } trystate_T;
809 
810 /* values for ts_isdiff */
811 #define DIFF_NONE	0	/* no different byte (yet) */
812 #define DIFF_YES	1	/* different byte found */
813 #define DIFF_INSERT	2	/* inserting character */
814 
815 /* values for ts_flags */
816 #define TSF_PREFIXOK	1	/* already checked that prefix is OK */
817 #define TSF_DIDSPLIT	2	/* tried split at this point */
818 #define TSF_DIDDEL	4	/* did a delete, "ts_delidx" has index */
819 
820 /* special values ts_prefixdepth */
821 #define PFD_NOPREFIX	0xff	/* not using prefixes */
822 #define PFD_PREFIXTREE	0xfe	/* walking through the prefix tree */
823 #define PFD_NOTSPECIAL	0xfd	/* highest value that's not special */
824 
825 /* mode values for find_word */
826 #define FIND_FOLDWORD	    0	/* find word case-folded */
827 #define FIND_KEEPWORD	    1	/* find keep-case word */
828 #define FIND_PREFIX	    2	/* find word after prefix */
829 #define FIND_COMPOUND	    3	/* find case-folded compound word */
830 #define FIND_KEEPCOMPOUND   4	/* find keep-case compound word */
831 
832 static slang_T *slang_alloc __ARGS((char_u *lang));
833 static void slang_free __ARGS((slang_T *lp));
834 static void slang_clear __ARGS((slang_T *lp));
835 static void slang_clear_sug __ARGS((slang_T *lp));
836 static void find_word __ARGS((matchinf_T *mip, int mode));
837 static int can_compound __ARGS((slang_T *slang, char_u *word, char_u *flags));
838 static int valid_word_prefix __ARGS((int totprefcnt, int arridx, int flags, char_u *word, slang_T *slang, int cond_req));
839 static void find_prefix __ARGS((matchinf_T *mip, int mode));
840 static int fold_more __ARGS((matchinf_T *mip));
841 static int spell_valid_case __ARGS((int wordflags, int treeflags));
842 static int no_spell_checking __ARGS((win_T *wp));
843 static void spell_load_lang __ARGS((char_u *lang));
844 static char_u *spell_enc __ARGS((void));
845 static void int_wordlist_spl __ARGS((char_u *fname));
846 static void spell_load_cb __ARGS((char_u *fname, void *cookie));
847 static slang_T *spell_load_file __ARGS((char_u *fname, char_u *lang, slang_T *old_lp, int silent));
848 static int get2c __ARGS((FILE *fd));
849 static int get3c __ARGS((FILE *fd));
850 static int get4c __ARGS((FILE *fd));
851 static time_t get8c __ARGS((FILE *fd));
852 static char_u *read_cnt_string __ARGS((FILE *fd, int cnt_bytes, int *lenp));
853 static char_u *read_string __ARGS((FILE *fd, int cnt));
854 static int read_region_section __ARGS((FILE *fd, slang_T *slang, int len));
855 static int read_charflags_section __ARGS((FILE *fd));
856 static int read_prefcond_section __ARGS((FILE *fd, slang_T *lp));
857 static int read_rep_section __ARGS((FILE *fd, garray_T *gap, short *first));
858 static int read_sal_section __ARGS((FILE *fd, slang_T *slang));
859 static int read_words_section __ARGS((FILE *fd, slang_T *lp, int len));
860 static void count_common_word __ARGS((slang_T *lp, char_u *word, int len, int count));
861 static int score_wordcount_adj __ARGS((slang_T *slang, int score, char_u *word, int split));
862 static int read_sofo_section __ARGS((FILE *fd, slang_T *slang));
863 static int read_compound __ARGS((FILE *fd, slang_T *slang, int len));
864 static int byte_in_str __ARGS((char_u *str, int byte));
865 static int init_syl_tab __ARGS((slang_T *slang));
866 static int count_syllables __ARGS((slang_T *slang, char_u *word));
867 static int set_sofo __ARGS((slang_T *lp, char_u *from, char_u *to));
868 static void set_sal_first __ARGS((slang_T *lp));
869 #ifdef FEAT_MBYTE
870 static int *mb_str2wide __ARGS((char_u *s));
871 #endif
872 static int spell_read_tree __ARGS((FILE *fd, char_u **bytsp, idx_T **idxsp, int prefixtree, int prefixcnt));
873 static idx_T read_tree_node __ARGS((FILE *fd, char_u *byts, idx_T *idxs, int maxidx, int startidx, int prefixtree, int maxprefcondnr));
874 static void clear_midword __ARGS((buf_T *buf));
875 static void use_midword __ARGS((slang_T *lp, buf_T *buf));
876 static int find_region __ARGS((char_u *rp, char_u *region));
877 static int captype __ARGS((char_u *word, char_u *end));
878 static int badword_captype __ARGS((char_u *word, char_u *end));
879 static void spell_reload_one __ARGS((char_u *fname, int added_word));
880 static void set_spell_charflags __ARGS((char_u *flags, int cnt, char_u *upp));
881 static int set_spell_chartab __ARGS((char_u *fol, char_u *low, char_u *upp));
882 static int spell_casefold __ARGS((char_u *p, int len, char_u *buf, int buflen));
883 static int check_need_cap __ARGS((linenr_T lnum, colnr_T col));
884 static void spell_find_suggest __ARGS((char_u *badptr, int badlen, suginfo_T *su, int maxcount, int banbadword, int need_cap, int interactive));
885 #ifdef FEAT_EVAL
886 static void spell_suggest_expr __ARGS((suginfo_T *su, char_u *expr));
887 #endif
888 static void spell_suggest_file __ARGS((suginfo_T *su, char_u *fname));
889 static void spell_suggest_intern __ARGS((suginfo_T *su, int interactive));
890 static void suggest_load_files __ARGS((void));
891 static void tree_count_words __ARGS((char_u *byts, idx_T *idxs));
892 static void spell_find_cleanup __ARGS((suginfo_T *su));
893 static void onecap_copy __ARGS((char_u *word, char_u *wcopy, int upper));
894 static void allcap_copy __ARGS((char_u *word, char_u *wcopy));
895 static void suggest_try_special __ARGS((suginfo_T *su));
896 static void suggest_try_change __ARGS((suginfo_T *su));
897 static void suggest_trie_walk __ARGS((suginfo_T *su, langp_T *lp, char_u *fword, int soundfold));
898 static void go_deeper __ARGS((trystate_T *stack, int depth, int score_add));
899 #ifdef FEAT_MBYTE
900 static int nofold_len __ARGS((char_u *fword, int flen, char_u *word));
901 #endif
902 static void find_keepcap_word __ARGS((slang_T *slang, char_u *fword, char_u *kword));
903 static void score_comp_sal __ARGS((suginfo_T *su));
904 static void score_combine __ARGS((suginfo_T *su));
905 static int stp_sal_score __ARGS((suggest_T *stp, suginfo_T *su, slang_T *slang, char_u *badsound));
906 static void suggest_try_soundalike_prep __ARGS((void));
907 static void suggest_try_soundalike __ARGS((suginfo_T *su));
908 static void suggest_try_soundalike_finish __ARGS((void));
909 static void add_sound_suggest __ARGS((suginfo_T *su, char_u *goodword, int score, langp_T *lp));
910 static int soundfold_find __ARGS((slang_T *slang, char_u *word));
911 static void make_case_word __ARGS((char_u *fword, char_u *cword, int flags));
912 static void set_map_str __ARGS((slang_T *lp, char_u *map));
913 static int similar_chars __ARGS((slang_T *slang, int c1, int c2));
914 static void add_suggestion __ARGS((suginfo_T *su, garray_T *gap, char_u *goodword, int badlen, int score, int altscore, int had_bonus, slang_T *slang, int maxsf));
915 static void check_suggestions __ARGS((suginfo_T *su, garray_T *gap));
916 static void add_banned __ARGS((suginfo_T *su, char_u *word));
917 static void rescore_suggestions __ARGS((suginfo_T *su));
918 static void rescore_one __ARGS((suginfo_T *su, suggest_T *stp));
919 static int cleanup_suggestions __ARGS((garray_T *gap, int maxscore, int keep));
920 static void spell_soundfold __ARGS((slang_T *slang, char_u *inword, int folded, char_u *res));
921 static void spell_soundfold_sofo __ARGS((slang_T *slang, char_u *inword, char_u *res));
922 static void spell_soundfold_sal __ARGS((slang_T *slang, char_u *inword, char_u *res));
923 #ifdef FEAT_MBYTE
924 static void spell_soundfold_wsal __ARGS((slang_T *slang, char_u *inword, char_u *res));
925 #endif
926 static int soundalike_score __ARGS((char_u *goodsound, char_u *badsound));
927 static int spell_edit_score __ARGS((slang_T *slang, char_u *badword, char_u *goodword));
928 static int spell_edit_score_limit __ARGS((slang_T *slang, char_u *badword, char_u *goodword, int limit));
929 #ifdef FEAT_MBYTE
930 static int spell_edit_score_limit_w __ARGS((slang_T *slang, char_u *badword, char_u *goodword, int limit));
931 #endif
932 static void dump_word __ARGS((slang_T *slang, char_u *word, char_u *pat, int *dir, int round, int flags, linenr_T lnum));
933 static linenr_T dump_prefixes __ARGS((slang_T *slang, char_u *word, char_u *pat, int *dir, int round, int flags, linenr_T startlnum));
934 static buf_T *open_spellbuf __ARGS((void));
935 static void close_spellbuf __ARGS((buf_T *buf));
936 
937 /*
938  * Use our own character-case definitions, because the current locale may
939  * differ from what the .spl file uses.
940  * These must not be called with negative number!
941  */
942 #ifndef FEAT_MBYTE
943 /* Non-multi-byte implementation. */
944 # define SPELL_TOFOLD(c) ((c) < 256 ? spelltab.st_fold[c] : (c))
945 # define SPELL_TOUPPER(c) ((c) < 256 ? spelltab.st_upper[c] : (c))
946 # define SPELL_ISUPPER(c) ((c) < 256 ? spelltab.st_isu[c] : FALSE)
947 #else
948 # if defined(HAVE_WCHAR_H)
949 #  include <wchar.h>	    /* for towupper() and towlower() */
950 # endif
951 /* Multi-byte implementation.  For Unicode we can call utf_*(), but don't do
952  * that for ASCII, because we don't want to use 'casemap' here.  Otherwise use
953  * the "w" library function for characters above 255 if available. */
954 # ifdef HAVE_TOWLOWER
955 #  define SPELL_TOFOLD(c) (enc_utf8 && (c) >= 128 ? utf_fold(c) \
956 	    : (c) < 256 ? spelltab.st_fold[c] : towlower(c))
957 # else
958 #  define SPELL_TOFOLD(c) (enc_utf8 && (c) >= 128 ? utf_fold(c) \
959 	    : (c) < 256 ? spelltab.st_fold[c] : (c))
960 # endif
961 
962 # ifdef HAVE_TOWUPPER
963 #  define SPELL_TOUPPER(c) (enc_utf8 && (c) >= 128 ? utf_toupper(c) \
964 	    : (c) < 256 ? spelltab.st_upper[c] : towupper(c))
965 # else
966 #  define SPELL_TOUPPER(c) (enc_utf8 && (c) >= 128 ? utf_toupper(c) \
967 	    : (c) < 256 ? spelltab.st_upper[c] : (c))
968 # endif
969 
970 # ifdef HAVE_ISWUPPER
971 #  define SPELL_ISUPPER(c) (enc_utf8 && (c) >= 128 ? utf_isupper(c) \
972 	    : (c) < 256 ? spelltab.st_isu[c] : iswupper(c))
973 # else
974 #  define SPELL_ISUPPER(c) (enc_utf8 && (c) >= 128 ? utf_isupper(c) \
975 	    : (c) < 256 ? spelltab.st_isu[c] : (FALSE))
976 # endif
977 #endif
978 
979 
980 static char *e_format = N_("E759: Format error in spell file");
981 static char *e_spell_trunc = N_("E758: Truncated spell file");
982 static char *e_afftrailing = N_("Trailing text in %s line %d: %s");
983 static char *e_affname = N_("Affix name too long in %s line %d: %s");
984 static char *e_affform = N_("E761: Format error in affix file FOL, LOW or UPP");
985 static char *e_affrange = N_("E762: Character in FOL, LOW or UPP is out of range");
986 static char *msg_compressing = N_("Compressing word tree...");
987 
988 /* Remember what "z?" replaced. */
989 static char_u	*repl_from = NULL;
990 static char_u	*repl_to = NULL;
991 
992 /*
993  * Main spell-checking function.
994  * "ptr" points to a character that could be the start of a word.
995  * "*attrp" is set to the highlight index for a badly spelled word.  For a
996  * non-word or when it's OK it remains unchanged.
997  * This must only be called when 'spelllang' is not empty.
998  *
999  * "capcol" is used to check for a Capitalised word after the end of a
1000  * sentence.  If it's zero then perform the check.  Return the column where to
1001  * check next, or -1 when no sentence end was found.  If it's NULL then don't
1002  * worry.
1003  *
1004  * Returns the length of the word in bytes, also when it's OK, so that the
1005  * caller can skip over the word.
1006  */
1007     int
1008 spell_check(wp, ptr, attrp, capcol, docount)
1009     win_T	*wp;		/* current window */
1010     char_u	*ptr;
1011     hlf_T	*attrp;
1012     int		*capcol;	/* column to check for Capital */
1013     int		docount;	/* count good words */
1014 {
1015     matchinf_T	mi;		/* Most things are put in "mi" so that it can
1016 				   be passed to functions quickly. */
1017     int		nrlen = 0;	/* found a number first */
1018     int		c;
1019     int		wrongcaplen = 0;
1020     int		lpi;
1021     int		count_word = docount;
1022 
1023     /* A word never starts at a space or a control character.  Return quickly
1024      * then, skipping over the character. */
1025     if (*ptr <= ' ')
1026 	return 1;
1027 
1028     /* Return here when loading language files failed. */
1029     if (wp->w_buffer->b_langp.ga_len == 0)
1030 	return 1;
1031 
1032     vim_memset(&mi, 0, sizeof(matchinf_T));
1033 
1034     /* A number is always OK.  Also skip hexadecimal numbers 0xFF99 and
1035      * 0X99FF.  But always do check spelling to find "3GPP" and "11
1036      * julifeest". */
1037     if (*ptr >= '0' && *ptr <= '9')
1038     {
1039 	if (*ptr == '0' && (ptr[1] == 'x' || ptr[1] == 'X'))
1040 	    mi.mi_end = skiphex(ptr + 2);
1041 	else
1042 	    mi.mi_end = skipdigits(ptr);
1043 	nrlen = mi.mi_end - ptr;
1044     }
1045 
1046     /* Find the normal end of the word (until the next non-word character). */
1047     mi.mi_word = ptr;
1048     mi.mi_fend = ptr;
1049     if (spell_iswordp(mi.mi_fend, wp->w_buffer))
1050     {
1051 	do
1052 	{
1053 	    mb_ptr_adv(mi.mi_fend);
1054 	} while (*mi.mi_fend != NUL && spell_iswordp(mi.mi_fend, wp->w_buffer));
1055 
1056 	if (capcol != NULL && *capcol == 0 && wp->w_buffer->b_cap_prog != NULL)
1057 	{
1058 	    /* Check word starting with capital letter. */
1059 	    c = PTR2CHAR(ptr);
1060 	    if (!SPELL_ISUPPER(c))
1061 		wrongcaplen = (int)(mi.mi_fend - ptr);
1062 	}
1063     }
1064     if (capcol != NULL)
1065 	*capcol = -1;
1066 
1067     /* We always use the characters up to the next non-word character,
1068      * also for bad words. */
1069     mi.mi_end = mi.mi_fend;
1070 
1071     /* Check caps type later. */
1072     mi.mi_buf = wp->w_buffer;
1073 
1074     /* case-fold the word with one non-word character, so that we can check
1075      * for the word end. */
1076     if (*mi.mi_fend != NUL)
1077 	mb_ptr_adv(mi.mi_fend);
1078 
1079     (void)spell_casefold(ptr, (int)(mi.mi_fend - ptr), mi.mi_fword,
1080 							     MAXWLEN + 1);
1081     mi.mi_fwordlen = STRLEN(mi.mi_fword);
1082 
1083     /* The word is bad unless we recognize it. */
1084     mi.mi_result = SP_BAD;
1085     mi.mi_result2 = SP_BAD;
1086 
1087     /*
1088      * Loop over the languages specified in 'spelllang'.
1089      * We check them all, because a word may be matched longer in another
1090      * language.
1091      */
1092     for (lpi = 0; lpi < wp->w_buffer->b_langp.ga_len; ++lpi)
1093     {
1094 	mi.mi_lp = LANGP_ENTRY(wp->w_buffer->b_langp, lpi);
1095 
1096 	/* If reloading fails the language is still in the list but everything
1097 	 * has been cleared. */
1098 	if (mi.mi_lp->lp_slang->sl_fidxs == NULL)
1099 	    continue;
1100 
1101 	/* Check for a matching word in case-folded words. */
1102 	find_word(&mi, FIND_FOLDWORD);
1103 
1104 	/* Check for a matching word in keep-case words. */
1105 	find_word(&mi, FIND_KEEPWORD);
1106 
1107 	/* Check for matching prefixes. */
1108 	find_prefix(&mi, FIND_FOLDWORD);
1109 
1110 	/* For a NOBREAK language, may want to use a word without a following
1111 	 * word as a backup. */
1112 	if (mi.mi_lp->lp_slang->sl_nobreak && mi.mi_result == SP_BAD
1113 						   && mi.mi_result2 != SP_BAD)
1114 	{
1115 	    mi.mi_result = mi.mi_result2;
1116 	    mi.mi_end = mi.mi_end2;
1117 	}
1118 
1119 	/* Count the word in the first language where it's found to be OK. */
1120 	if (count_word && mi.mi_result == SP_OK)
1121 	{
1122 	    count_common_word(mi.mi_lp->lp_slang, ptr,
1123 						   (int)(mi.mi_end - ptr), 1);
1124 	    count_word = FALSE;
1125 	}
1126     }
1127 
1128     if (mi.mi_result != SP_OK)
1129     {
1130 	/* If we found a number skip over it.  Allows for "42nd".  Do flag
1131 	 * rare and local words, e.g., "3GPP". */
1132 	if (nrlen > 0)
1133 	{
1134 	    if (mi.mi_result == SP_BAD || mi.mi_result == SP_BANNED)
1135 		return nrlen;
1136 	}
1137 
1138 	/* When we are at a non-word character there is no error, just
1139 	 * skip over the character (try looking for a word after it). */
1140 	else if (!spell_iswordp_nmw(ptr))
1141 	{
1142 	    if (capcol != NULL && wp->w_buffer->b_cap_prog != NULL)
1143 	    {
1144 		regmatch_T	regmatch;
1145 
1146 		/* Check for end of sentence. */
1147 		regmatch.regprog = wp->w_buffer->b_cap_prog;
1148 		regmatch.rm_ic = FALSE;
1149 		if (vim_regexec(&regmatch, ptr, 0))
1150 		    *capcol = (int)(regmatch.endp[0] - ptr);
1151 	    }
1152 
1153 #ifdef FEAT_MBYTE
1154 	    if (has_mbyte)
1155 		return (*mb_ptr2len)(ptr);
1156 #endif
1157 	    return 1;
1158 	}
1159 	else if (mi.mi_end == ptr)
1160 	    /* Always include at least one character.  Required for when there
1161 	     * is a mixup in "midword". */
1162 	    mb_ptr_adv(mi.mi_end);
1163 	else if (mi.mi_result == SP_BAD
1164 		&& LANGP_ENTRY(wp->w_buffer->b_langp, 0)->lp_slang->sl_nobreak)
1165 	{
1166 	    char_u	*p, *fp;
1167 	    int		save_result = mi.mi_result;
1168 
1169 	    /* First language in 'spelllang' is NOBREAK.  Find first position
1170 	     * at which any word would be valid. */
1171 	    mi.mi_lp = LANGP_ENTRY(wp->w_buffer->b_langp, 0);
1172 	    if (mi.mi_lp->lp_slang->sl_fidxs != NULL)
1173 	    {
1174 		p = mi.mi_word;
1175 		fp = mi.mi_fword;
1176 		for (;;)
1177 		{
1178 		    mb_ptr_adv(p);
1179 		    mb_ptr_adv(fp);
1180 		    if (p >= mi.mi_end)
1181 			break;
1182 		    mi.mi_compoff = fp - mi.mi_fword;
1183 		    find_word(&mi, FIND_COMPOUND);
1184 		    if (mi.mi_result != SP_BAD)
1185 		    {
1186 			mi.mi_end = p;
1187 			break;
1188 		    }
1189 		}
1190 		mi.mi_result = save_result;
1191 	    }
1192 	}
1193 
1194 	if (mi.mi_result == SP_BAD || mi.mi_result == SP_BANNED)
1195 	    *attrp = HLF_SPB;
1196 	else if (mi.mi_result == SP_RARE)
1197 	    *attrp = HLF_SPR;
1198 	else
1199 	    *attrp = HLF_SPL;
1200     }
1201 
1202     if (wrongcaplen > 0 && (mi.mi_result == SP_OK || mi.mi_result == SP_RARE))
1203     {
1204 	/* Report SpellCap only when the word isn't badly spelled. */
1205 	*attrp = HLF_SPC;
1206 	return wrongcaplen;
1207     }
1208 
1209     return (int)(mi.mi_end - ptr);
1210 }
1211 
1212 /*
1213  * Check if the word at "mip->mi_word" is in the tree.
1214  * When "mode" is FIND_FOLDWORD check in fold-case word tree.
1215  * When "mode" is FIND_KEEPWORD check in keep-case word tree.
1216  * When "mode" is FIND_PREFIX check for word after prefix in fold-case word
1217  * tree.
1218  *
1219  * For a match mip->mi_result is updated.
1220  */
1221     static void
1222 find_word(mip, mode)
1223     matchinf_T	*mip;
1224     int		mode;
1225 {
1226     idx_T	arridx = 0;
1227     int		endlen[MAXWLEN];    /* length at possible word endings */
1228     idx_T	endidx[MAXWLEN];    /* possible word endings */
1229     int		endidxcnt = 0;
1230     int		len;
1231     int		wlen = 0;
1232     int		flen;
1233     int		c;
1234     char_u	*ptr;
1235     idx_T	lo, hi, m;
1236 #ifdef FEAT_MBYTE
1237     char_u	*s;
1238 #endif
1239     char_u	*p;
1240     int		res = SP_BAD;
1241     slang_T	*slang = mip->mi_lp->lp_slang;
1242     unsigned	flags;
1243     char_u	*byts;
1244     idx_T	*idxs;
1245     int		word_ends;
1246     int		prefix_found;
1247     int		nobreak_result;
1248 
1249     if (mode == FIND_KEEPWORD || mode == FIND_KEEPCOMPOUND)
1250     {
1251 	/* Check for word with matching case in keep-case tree. */
1252 	ptr = mip->mi_word;
1253 	flen = 9999;		    /* no case folding, always enough bytes */
1254 	byts = slang->sl_kbyts;
1255 	idxs = slang->sl_kidxs;
1256 
1257 	if (mode == FIND_KEEPCOMPOUND)
1258 	    /* Skip over the previously found word(s). */
1259 	    wlen += mip->mi_compoff;
1260     }
1261     else
1262     {
1263 	/* Check for case-folded in case-folded tree. */
1264 	ptr = mip->mi_fword;
1265 	flen = mip->mi_fwordlen;    /* available case-folded bytes */
1266 	byts = slang->sl_fbyts;
1267 	idxs = slang->sl_fidxs;
1268 
1269 	if (mode == FIND_PREFIX)
1270 	{
1271 	    /* Skip over the prefix. */
1272 	    wlen = mip->mi_prefixlen;
1273 	    flen -= mip->mi_prefixlen;
1274 	}
1275 	else if (mode == FIND_COMPOUND)
1276 	{
1277 	    /* Skip over the previously found word(s). */
1278 	    wlen = mip->mi_compoff;
1279 	    flen -= mip->mi_compoff;
1280 	}
1281 
1282     }
1283 
1284     if (byts == NULL)
1285 	return;			/* array is empty */
1286 
1287     /*
1288      * Repeat advancing in the tree until:
1289      * - there is a byte that doesn't match,
1290      * - we reach the end of the tree,
1291      * - or we reach the end of the line.
1292      */
1293     for (;;)
1294     {
1295 	if (flen <= 0 && *mip->mi_fend != NUL)
1296 	    flen = fold_more(mip);
1297 
1298 	len = byts[arridx++];
1299 
1300 	/* If the first possible byte is a zero the word could end here.
1301 	 * Remember this index, we first check for the longest word. */
1302 	if (byts[arridx] == 0)
1303 	{
1304 	    if (endidxcnt == MAXWLEN)
1305 	    {
1306 		/* Must be a corrupted spell file. */
1307 		EMSG(_(e_format));
1308 		return;
1309 	    }
1310 	    endlen[endidxcnt] = wlen;
1311 	    endidx[endidxcnt++] = arridx++;
1312 	    --len;
1313 
1314 	    /* Skip over the zeros, there can be several flag/region
1315 	     * combinations. */
1316 	    while (len > 0 && byts[arridx] == 0)
1317 	    {
1318 		++arridx;
1319 		--len;
1320 	    }
1321 	    if (len == 0)
1322 		break;	    /* no children, word must end here */
1323 	}
1324 
1325 	/* Stop looking at end of the line. */
1326 	if (ptr[wlen] == NUL)
1327 	    break;
1328 
1329 	/* Perform a binary search in the list of accepted bytes. */
1330 	c = ptr[wlen];
1331 	if (c == TAB)	    /* <Tab> is handled like <Space> */
1332 	    c = ' ';
1333 	lo = arridx;
1334 	hi = arridx + len - 1;
1335 	while (lo < hi)
1336 	{
1337 	    m = (lo + hi) / 2;
1338 	    if (byts[m] > c)
1339 		hi = m - 1;
1340 	    else if (byts[m] < c)
1341 		lo = m + 1;
1342 	    else
1343 	    {
1344 		lo = hi = m;
1345 		break;
1346 	    }
1347 	}
1348 
1349 	/* Stop if there is no matching byte. */
1350 	if (hi < lo || byts[lo] != c)
1351 	    break;
1352 
1353 	/* Continue at the child (if there is one). */
1354 	arridx = idxs[lo];
1355 	++wlen;
1356 	--flen;
1357 
1358 	/* One space in the good word may stand for several spaces in the
1359 	 * checked word. */
1360 	if (c == ' ')
1361 	{
1362 	    for (;;)
1363 	    {
1364 		if (flen <= 0 && *mip->mi_fend != NUL)
1365 		    flen = fold_more(mip);
1366 		if (ptr[wlen] != ' ' && ptr[wlen] != TAB)
1367 		    break;
1368 		++wlen;
1369 		--flen;
1370 	    }
1371 	}
1372     }
1373 
1374     /*
1375      * Verify that one of the possible endings is valid.  Try the longest
1376      * first.
1377      */
1378     while (endidxcnt > 0)
1379     {
1380 	--endidxcnt;
1381 	arridx = endidx[endidxcnt];
1382 	wlen = endlen[endidxcnt];
1383 
1384 #ifdef FEAT_MBYTE
1385 	if ((*mb_head_off)(ptr, ptr + wlen) > 0)
1386 	    continue;	    /* not at first byte of character */
1387 #endif
1388 	if (spell_iswordp(ptr + wlen, mip->mi_buf))
1389 	{
1390 	    if (slang->sl_compprog == NULL && !slang->sl_nobreak)
1391 		continue;	    /* next char is a word character */
1392 	    word_ends = FALSE;
1393 	}
1394 	else
1395 	    word_ends = TRUE;
1396 	/* The prefix flag is before compound flags.  Once a valid prefix flag
1397 	 * has been found we try compound flags. */
1398 	prefix_found = FALSE;
1399 
1400 #ifdef FEAT_MBYTE
1401 	if (mode != FIND_KEEPWORD && has_mbyte)
1402 	{
1403 	    /* Compute byte length in original word, length may change
1404 	     * when folding case.  This can be slow, take a shortcut when the
1405 	     * case-folded word is equal to the keep-case word. */
1406 	    p = mip->mi_word;
1407 	    if (STRNCMP(ptr, p, wlen) != 0)
1408 	    {
1409 		for (s = ptr; s < ptr + wlen; mb_ptr_adv(s))
1410 		    mb_ptr_adv(p);
1411 		wlen = p - mip->mi_word;
1412 	    }
1413 	}
1414 #endif
1415 
1416 	/* Check flags and region.  For FIND_PREFIX check the condition and
1417 	 * prefix ID.
1418 	 * Repeat this if there are more flags/region alternatives until there
1419 	 * is a match. */
1420 	res = SP_BAD;
1421 	for (len = byts[arridx - 1]; len > 0 && byts[arridx] == 0;
1422 							      --len, ++arridx)
1423 	{
1424 	    flags = idxs[arridx];
1425 
1426 	    /* For the fold-case tree check that the case of the checked word
1427 	     * matches with what the word in the tree requires.
1428 	     * For keep-case tree the case is always right.  For prefixes we
1429 	     * don't bother to check. */
1430 	    if (mode == FIND_FOLDWORD)
1431 	    {
1432 		if (mip->mi_cend != mip->mi_word + wlen)
1433 		{
1434 		    /* mi_capflags was set for a different word length, need
1435 		     * to do it again. */
1436 		    mip->mi_cend = mip->mi_word + wlen;
1437 		    mip->mi_capflags = captype(mip->mi_word, mip->mi_cend);
1438 		}
1439 
1440 		if (mip->mi_capflags == WF_KEEPCAP
1441 				|| !spell_valid_case(mip->mi_capflags, flags))
1442 		    continue;
1443 	    }
1444 
1445 	    /* When mode is FIND_PREFIX the word must support the prefix:
1446 	     * check the prefix ID and the condition.  Do that for the list at
1447 	     * mip->mi_prefarridx that find_prefix() filled. */
1448 	    else if (mode == FIND_PREFIX && !prefix_found)
1449 	    {
1450 		c = valid_word_prefix(mip->mi_prefcnt, mip->mi_prefarridx,
1451 				    flags,
1452 				    mip->mi_word + mip->mi_cprefixlen, slang,
1453 				    FALSE);
1454 		if (c == 0)
1455 		    continue;
1456 
1457 		/* Use the WF_RARE flag for a rare prefix. */
1458 		if (c & WF_RAREPFX)
1459 		    flags |= WF_RARE;
1460 		prefix_found = TRUE;
1461 	    }
1462 
1463 	    if (slang->sl_nobreak)
1464 	    {
1465 		if ((mode == FIND_COMPOUND || mode == FIND_KEEPCOMPOUND)
1466 			&& (flags & WF_BANNED) == 0)
1467 		{
1468 		    /* NOBREAK: found a valid following word.  That's all we
1469 		     * need to know, so return. */
1470 		    mip->mi_result = SP_OK;
1471 		    break;
1472 		}
1473 	    }
1474 
1475 	    else if ((mode == FIND_COMPOUND || mode == FIND_KEEPCOMPOUND
1476 								|| !word_ends))
1477 	    {
1478 		/* If there is no  flag or the word is shorter than
1479 		 * COMPOUNDMIN reject it quickly.
1480 		 * Makes you wonder why someone puts a compound flag on a word
1481 		 * that's too short...  Myspell compatibility requires this
1482 		 * anyway. */
1483 		if (((unsigned)flags >> 24) == 0
1484 			     || wlen - mip->mi_compoff < slang->sl_compminlen)
1485 		    continue;
1486 #ifdef FEAT_MBYTE
1487 		/* For multi-byte chars check character length against
1488 		 * COMPOUNDMIN. */
1489 		if (has_mbyte
1490 			&& slang->sl_compminlen > 0
1491 			&& mb_charlen_len(mip->mi_word + mip->mi_compoff,
1492 				wlen - mip->mi_compoff) < slang->sl_compminlen)
1493 			continue;
1494 #endif
1495 
1496 		/* Limit the number of compound words to COMPOUNDWORDMAX if no
1497 		 * maximum for syllables is specified. */
1498 		if (!word_ends && mip->mi_complen + mip->mi_compextra + 2
1499 							   > slang->sl_compmax
1500 					   && slang->sl_compsylmax == MAXWLEN)
1501 		    continue;
1502 
1503 		/* Quickly check if compounding is possible with this flag. */
1504 		if (!byte_in_str(mip->mi_complen == 0
1505 					? slang->sl_compstartflags
1506 					: slang->sl_compallflags,
1507 					    ((unsigned)flags >> 24)))
1508 		    continue;
1509 
1510 		if (mode == FIND_COMPOUND)
1511 		{
1512 		    int	    capflags;
1513 
1514 		    /* Need to check the caps type of the appended compound
1515 		     * word. */
1516 #ifdef FEAT_MBYTE
1517 		    if (has_mbyte && STRNCMP(ptr, mip->mi_word,
1518 							mip->mi_compoff) != 0)
1519 		    {
1520 			/* case folding may have changed the length */
1521 			p = mip->mi_word;
1522 			for (s = ptr; s < ptr + mip->mi_compoff; mb_ptr_adv(s))
1523 			    mb_ptr_adv(p);
1524 		    }
1525 		    else
1526 #endif
1527 			p = mip->mi_word + mip->mi_compoff;
1528 		    capflags = captype(p, mip->mi_word + wlen);
1529 		    if (capflags == WF_KEEPCAP || (capflags == WF_ALLCAP
1530 						 && (flags & WF_FIXCAP) != 0))
1531 			continue;
1532 
1533 		    if (capflags != WF_ALLCAP)
1534 		    {
1535 			/* When the character before the word is a word
1536 			 * character we do not accept a Onecap word.  We do
1537 			 * accept a no-caps word, even when the dictionary
1538 			 * word specifies ONECAP. */
1539 			mb_ptr_back(mip->mi_word, p);
1540 			if (spell_iswordp_nmw(p)
1541 				? capflags == WF_ONECAP
1542 				: (flags & WF_ONECAP) != 0
1543 						     && capflags != WF_ONECAP)
1544 			    continue;
1545 		    }
1546 		}
1547 
1548 		/* If the word ends the sequence of compound flags of the
1549 		 * words must match with one of the COMPOUNDRULE items and
1550 		 * the number of syllables must not be too large. */
1551 		mip->mi_compflags[mip->mi_complen] = ((unsigned)flags >> 24);
1552 		mip->mi_compflags[mip->mi_complen + 1] = NUL;
1553 		if (word_ends)
1554 		{
1555 		    char_u	fword[MAXWLEN];
1556 
1557 		    if (slang->sl_compsylmax < MAXWLEN)
1558 		    {
1559 			/* "fword" is only needed for checking syllables. */
1560 			if (ptr == mip->mi_word)
1561 			    (void)spell_casefold(ptr, wlen, fword, MAXWLEN);
1562 			else
1563 			    vim_strncpy(fword, ptr, endlen[endidxcnt]);
1564 		    }
1565 		    if (!can_compound(slang, fword, mip->mi_compflags))
1566 			continue;
1567 		}
1568 	    }
1569 
1570 	    /* Check NEEDCOMPOUND: can't use word without compounding. */
1571 	    else if (flags & WF_NEEDCOMP)
1572 		continue;
1573 
1574 	    nobreak_result = SP_OK;
1575 
1576 	    if (!word_ends)
1577 	    {
1578 		int	save_result = mip->mi_result;
1579 		char_u	*save_end = mip->mi_end;
1580 		langp_T	*save_lp = mip->mi_lp;
1581 		int	lpi;
1582 
1583 		/* Check that a valid word follows.  If there is one and we
1584 		 * are compounding, it will set "mi_result", thus we are
1585 		 * always finished here.  For NOBREAK we only check that a
1586 		 * valid word follows.
1587 		 * Recursive! */
1588 		if (slang->sl_nobreak)
1589 		    mip->mi_result = SP_BAD;
1590 
1591 		/* Find following word in case-folded tree. */
1592 		mip->mi_compoff = endlen[endidxcnt];
1593 #ifdef FEAT_MBYTE
1594 		if (has_mbyte && mode == FIND_KEEPWORD)
1595 		{
1596 		    /* Compute byte length in case-folded word from "wlen":
1597 		     * byte length in keep-case word.  Length may change when
1598 		     * folding case.  This can be slow, take a shortcut when
1599 		     * the case-folded word is equal to the keep-case word. */
1600 		    p = mip->mi_fword;
1601 		    if (STRNCMP(ptr, p, wlen) != 0)
1602 		    {
1603 			for (s = ptr; s < ptr + wlen; mb_ptr_adv(s))
1604 			    mb_ptr_adv(p);
1605 			mip->mi_compoff = p - mip->mi_fword;
1606 		    }
1607 		}
1608 #endif
1609 		c = mip->mi_compoff;
1610 		++mip->mi_complen;
1611 		if (flags & WF_COMPROOT)
1612 		    ++mip->mi_compextra;
1613 
1614 		/* For NOBREAK we need to try all NOBREAK languages, at least
1615 		 * to find the ".add" file(s). */
1616 		for (lpi = 0; lpi < mip->mi_buf->b_langp.ga_len; ++lpi)
1617 		{
1618 		    if (slang->sl_nobreak)
1619 		    {
1620 			mip->mi_lp = LANGP_ENTRY(mip->mi_buf->b_langp, lpi);
1621 			if (mip->mi_lp->lp_slang->sl_fidxs == NULL
1622 					 || !mip->mi_lp->lp_slang->sl_nobreak)
1623 			    continue;
1624 		    }
1625 
1626 		    find_word(mip, FIND_COMPOUND);
1627 
1628 		    /* When NOBREAK any word that matches is OK.  Otherwise we
1629 		     * need to find the longest match, thus try with keep-case
1630 		     * and prefix too. */
1631 		    if (!slang->sl_nobreak || mip->mi_result == SP_BAD)
1632 		    {
1633 			/* Find following word in keep-case tree. */
1634 			mip->mi_compoff = wlen;
1635 			find_word(mip, FIND_KEEPCOMPOUND);
1636 
1637 			if (!slang->sl_nobreak || mip->mi_result == SP_BAD)
1638 			{
1639 			    /* Check for following word with prefix. */
1640 			    mip->mi_compoff = c;
1641 			    find_prefix(mip, FIND_COMPOUND);
1642 			}
1643 		    }
1644 
1645 		    if (!slang->sl_nobreak)
1646 			break;
1647 		}
1648 		--mip->mi_complen;
1649 		if (flags & WF_COMPROOT)
1650 		    --mip->mi_compextra;
1651 		mip->mi_lp = save_lp;
1652 
1653 		if (slang->sl_nobreak)
1654 		{
1655 		    nobreak_result = mip->mi_result;
1656 		    mip->mi_result = save_result;
1657 		    mip->mi_end = save_end;
1658 		}
1659 		else
1660 		{
1661 		    if (mip->mi_result == SP_OK)
1662 			break;
1663 		    continue;
1664 		}
1665 	    }
1666 
1667 	    if (flags & WF_BANNED)
1668 		res = SP_BANNED;
1669 	    else if (flags & WF_REGION)
1670 	    {
1671 		/* Check region. */
1672 		if ((mip->mi_lp->lp_region & (flags >> 16)) != 0)
1673 		    res = SP_OK;
1674 		else
1675 		    res = SP_LOCAL;
1676 	    }
1677 	    else if (flags & WF_RARE)
1678 		res = SP_RARE;
1679 	    else
1680 		res = SP_OK;
1681 
1682 	    /* Always use the longest match and the best result.  For NOBREAK
1683 	     * we separately keep the longest match without a following good
1684 	     * word as a fall-back. */
1685 	    if (nobreak_result == SP_BAD)
1686 	    {
1687 		if (mip->mi_result2 > res)
1688 		{
1689 		    mip->mi_result2 = res;
1690 		    mip->mi_end2 = mip->mi_word + wlen;
1691 		}
1692 		else if (mip->mi_result2 == res
1693 					&& mip->mi_end2 < mip->mi_word + wlen)
1694 		    mip->mi_end2 = mip->mi_word + wlen;
1695 	    }
1696 	    else if (mip->mi_result > res)
1697 	    {
1698 		mip->mi_result = res;
1699 		mip->mi_end = mip->mi_word + wlen;
1700 	    }
1701 	    else if (mip->mi_result == res && mip->mi_end < mip->mi_word + wlen)
1702 		mip->mi_end = mip->mi_word + wlen;
1703 
1704 	    if (mip->mi_result == SP_OK)
1705 		break;
1706 	}
1707 
1708 	if (mip->mi_result == SP_OK)
1709 	    break;
1710     }
1711 }
1712 
1713 /*
1714  * Return TRUE if "flags" is a valid sequence of compound flags and "word"
1715  * does not have too many syllables.
1716  */
1717     static int
1718 can_compound(slang, word, flags)
1719     slang_T	*slang;
1720     char_u	*word;
1721     char_u	*flags;
1722 {
1723     regmatch_T	regmatch;
1724 #ifdef FEAT_MBYTE
1725     char_u	uflags[MAXWLEN * 2];
1726     int		i;
1727 #endif
1728     char_u	*p;
1729 
1730     if (slang->sl_compprog == NULL)
1731 	return FALSE;
1732 #ifdef FEAT_MBYTE
1733     if (enc_utf8)
1734     {
1735 	/* Need to convert the single byte flags to utf8 characters. */
1736 	p = uflags;
1737 	for (i = 0; flags[i] != NUL; ++i)
1738 	    p += mb_char2bytes(flags[i], p);
1739 	*p = NUL;
1740 	p = uflags;
1741     }
1742     else
1743 #endif
1744 	p = flags;
1745     regmatch.regprog = slang->sl_compprog;
1746     regmatch.rm_ic = FALSE;
1747     if (!vim_regexec(&regmatch, p, 0))
1748 	return FALSE;
1749 
1750     /* Count the number of syllables.  This may be slow, do it last.  If there
1751      * are too many syllables AND the number of compound words is above
1752      * COMPOUNDWORDMAX then compounding is not allowed. */
1753     if (slang->sl_compsylmax < MAXWLEN
1754 		       && count_syllables(slang, word) > slang->sl_compsylmax)
1755 	return (int)STRLEN(flags) < slang->sl_compmax;
1756     return TRUE;
1757 }
1758 
1759 /*
1760  * Return non-zero if the prefix indicated by "arridx" matches with the prefix
1761  * ID in "flags" for the word "word".
1762  * The WF_RAREPFX flag is included in the return value for a rare prefix.
1763  */
1764     static int
1765 valid_word_prefix(totprefcnt, arridx, flags, word, slang, cond_req)
1766     int		totprefcnt;	/* nr of prefix IDs */
1767     int		arridx;		/* idx in sl_pidxs[] */
1768     int		flags;
1769     char_u	*word;
1770     slang_T	*slang;
1771     int		cond_req;	/* only use prefixes with a condition */
1772 {
1773     int		prefcnt;
1774     int		pidx;
1775     regprog_T	*rp;
1776     regmatch_T	regmatch;
1777     int		prefid;
1778 
1779     prefid = (unsigned)flags >> 24;
1780     for (prefcnt = totprefcnt - 1; prefcnt >= 0; --prefcnt)
1781     {
1782 	pidx = slang->sl_pidxs[arridx + prefcnt];
1783 
1784 	/* Check the prefix ID. */
1785 	if (prefid != (pidx & 0xff))
1786 	    continue;
1787 
1788 	/* Check if the prefix doesn't combine and the word already has a
1789 	 * suffix. */
1790 	if ((flags & WF_HAS_AFF) && (pidx & WF_PFX_NC))
1791 	    continue;
1792 
1793 	/* Check the condition, if there is one.  The condition index is
1794 	 * stored in the two bytes above the prefix ID byte.  */
1795 	rp = slang->sl_prefprog[((unsigned)pidx >> 8) & 0xffff];
1796 	if (rp != NULL)
1797 	{
1798 	    regmatch.regprog = rp;
1799 	    regmatch.rm_ic = FALSE;
1800 	    if (!vim_regexec(&regmatch, word, 0))
1801 		continue;
1802 	}
1803 	else if (cond_req)
1804 	    continue;
1805 
1806 	/* It's a match!  Return the WF_ flags. */
1807 	return pidx;
1808     }
1809     return 0;
1810 }
1811 
1812 /*
1813  * Check if the word at "mip->mi_word" has a matching prefix.
1814  * If it does, then check the following word.
1815  *
1816  * If "mode" is "FIND_COMPOUND" then do the same after another word, find a
1817  * prefix in a compound word.
1818  *
1819  * For a match mip->mi_result is updated.
1820  */
1821     static void
1822 find_prefix(mip, mode)
1823     matchinf_T	*mip;
1824     int		mode;
1825 {
1826     idx_T	arridx = 0;
1827     int		len;
1828     int		wlen = 0;
1829     int		flen;
1830     int		c;
1831     char_u	*ptr;
1832     idx_T	lo, hi, m;
1833     slang_T	*slang = mip->mi_lp->lp_slang;
1834     char_u	*byts;
1835     idx_T	*idxs;
1836 
1837     byts = slang->sl_pbyts;
1838     if (byts == NULL)
1839 	return;			/* array is empty */
1840 
1841     /* We use the case-folded word here, since prefixes are always
1842      * case-folded. */
1843     ptr = mip->mi_fword;
1844     flen = mip->mi_fwordlen;    /* available case-folded bytes */
1845     if (mode == FIND_COMPOUND)
1846     {
1847 	/* Skip over the previously found word(s). */
1848 	ptr += mip->mi_compoff;
1849 	flen -= mip->mi_compoff;
1850     }
1851     idxs = slang->sl_pidxs;
1852 
1853     /*
1854      * Repeat advancing in the tree until:
1855      * - there is a byte that doesn't match,
1856      * - we reach the end of the tree,
1857      * - or we reach the end of the line.
1858      */
1859     for (;;)
1860     {
1861 	if (flen == 0 && *mip->mi_fend != NUL)
1862 	    flen = fold_more(mip);
1863 
1864 	len = byts[arridx++];
1865 
1866 	/* If the first possible byte is a zero the prefix could end here.
1867 	 * Check if the following word matches and supports the prefix. */
1868 	if (byts[arridx] == 0)
1869 	{
1870 	    /* There can be several prefixes with different conditions.  We
1871 	     * try them all, since we don't know which one will give the
1872 	     * longest match.  The word is the same each time, pass the list
1873 	     * of possible prefixes to find_word(). */
1874 	    mip->mi_prefarridx = arridx;
1875 	    mip->mi_prefcnt = len;
1876 	    while (len > 0 && byts[arridx] == 0)
1877 	    {
1878 		++arridx;
1879 		--len;
1880 	    }
1881 	    mip->mi_prefcnt -= len;
1882 
1883 	    /* Find the word that comes after the prefix. */
1884 	    mip->mi_prefixlen = wlen;
1885 	    if (mode == FIND_COMPOUND)
1886 		/* Skip over the previously found word(s). */
1887 		mip->mi_prefixlen += mip->mi_compoff;
1888 
1889 #ifdef FEAT_MBYTE
1890 	    if (has_mbyte)
1891 	    {
1892 		/* Case-folded length may differ from original length. */
1893 		mip->mi_cprefixlen = nofold_len(mip->mi_fword,
1894 					     mip->mi_prefixlen, mip->mi_word);
1895 	    }
1896 	    else
1897 		mip->mi_cprefixlen = mip->mi_prefixlen;
1898 #endif
1899 	    find_word(mip, FIND_PREFIX);
1900 
1901 
1902 	    if (len == 0)
1903 		break;	    /* no children, word must end here */
1904 	}
1905 
1906 	/* Stop looking at end of the line. */
1907 	if (ptr[wlen] == NUL)
1908 	    break;
1909 
1910 	/* Perform a binary search in the list of accepted bytes. */
1911 	c = ptr[wlen];
1912 	lo = arridx;
1913 	hi = arridx + len - 1;
1914 	while (lo < hi)
1915 	{
1916 	    m = (lo + hi) / 2;
1917 	    if (byts[m] > c)
1918 		hi = m - 1;
1919 	    else if (byts[m] < c)
1920 		lo = m + 1;
1921 	    else
1922 	    {
1923 		lo = hi = m;
1924 		break;
1925 	    }
1926 	}
1927 
1928 	/* Stop if there is no matching byte. */
1929 	if (hi < lo || byts[lo] != c)
1930 	    break;
1931 
1932 	/* Continue at the child (if there is one). */
1933 	arridx = idxs[lo];
1934 	++wlen;
1935 	--flen;
1936     }
1937 }
1938 
1939 /*
1940  * Need to fold at least one more character.  Do until next non-word character
1941  * for efficiency.  Include the non-word character too.
1942  * Return the length of the folded chars in bytes.
1943  */
1944     static int
1945 fold_more(mip)
1946     matchinf_T	*mip;
1947 {
1948     int		flen;
1949     char_u	*p;
1950 
1951     p = mip->mi_fend;
1952     do
1953     {
1954 	mb_ptr_adv(mip->mi_fend);
1955     } while (*mip->mi_fend != NUL && spell_iswordp(mip->mi_fend, mip->mi_buf));
1956 
1957     /* Include the non-word character so that we can check for the word end. */
1958     if (*mip->mi_fend != NUL)
1959 	mb_ptr_adv(mip->mi_fend);
1960 
1961     (void)spell_casefold(p, (int)(mip->mi_fend - p),
1962 			     mip->mi_fword + mip->mi_fwordlen,
1963 			     MAXWLEN - mip->mi_fwordlen);
1964     flen = STRLEN(mip->mi_fword + mip->mi_fwordlen);
1965     mip->mi_fwordlen += flen;
1966     return flen;
1967 }
1968 
1969 /*
1970  * Check case flags for a word.  Return TRUE if the word has the requested
1971  * case.
1972  */
1973     static int
1974 spell_valid_case(wordflags, treeflags)
1975     int	    wordflags;	    /* flags for the checked word. */
1976     int	    treeflags;	    /* flags for the word in the spell tree */
1977 {
1978     return ((wordflags == WF_ALLCAP && (treeflags & WF_FIXCAP) == 0)
1979 	    || ((treeflags & (WF_ALLCAP | WF_KEEPCAP)) == 0
1980 		&& ((treeflags & WF_ONECAP) == 0
1981 					   || (wordflags & WF_ONECAP) != 0)));
1982 }
1983 
1984 /*
1985  * Return TRUE if spell checking is not enabled.
1986  */
1987     static int
1988 no_spell_checking(wp)
1989     win_T	*wp;
1990 {
1991     if (!wp->w_p_spell || *wp->w_buffer->b_p_spl == NUL
1992 					 || wp->w_buffer->b_langp.ga_len == 0)
1993     {
1994 	EMSG(_("E756: Spell checking is not enabled"));
1995 	return TRUE;
1996     }
1997     return FALSE;
1998 }
1999 
2000 /*
2001  * Move to next spell error.
2002  * "curline" is FALSE for "[s", "]s", "[S" and "]S".
2003  * "curline" is TRUE to find word under/after cursor in the same line.
2004  * For Insert mode completion "dir" is BACKWARD and "curline" is TRUE: move
2005  * to after badly spelled word before the cursor.
2006  * Return 0 if not found, length of the badly spelled word otherwise.
2007  */
2008     int
2009 spell_move_to(wp, dir, allwords, curline, attrp)
2010     win_T	*wp;
2011     int		dir;		/* FORWARD or BACKWARD */
2012     int		allwords;	/* TRUE for "[s"/"]s", FALSE for "[S"/"]S" */
2013     int		curline;
2014     hlf_T	*attrp;		/* return: attributes of bad word or NULL
2015 				   (only when "dir" is FORWARD) */
2016 {
2017     linenr_T	lnum;
2018     pos_T	found_pos;
2019     int		found_len = 0;
2020     char_u	*line;
2021     char_u	*p;
2022     char_u	*endp;
2023     hlf_T	attr;
2024     int		len;
2025 # ifdef FEAT_SYN_HL
2026     int		has_syntax = syntax_present(wp->w_buffer);
2027     int		col;
2028 # endif
2029     int		can_spell;
2030     char_u	*buf = NULL;
2031     int		buflen = 0;
2032     int		skip = 0;
2033     int		capcol = -1;
2034     int		found_one = FALSE;
2035     int		wrapped = FALSE;
2036 
2037     if (no_spell_checking(wp))
2038 	return 0;
2039 
2040     /*
2041      * Start looking for bad word at the start of the line, because we can't
2042      * start halfway a word, we don't know where the it starts or ends.
2043      *
2044      * When searching backwards, we continue in the line to find the last
2045      * bad word (in the cursor line: before the cursor).
2046      *
2047      * We concatenate the start of the next line, so that wrapped words work
2048      * (e.g. "et<line-break>cetera").  Doesn't work when searching backwards
2049      * though...
2050      */
2051     lnum = wp->w_cursor.lnum;
2052     clearpos(&found_pos);
2053 
2054     while (!got_int)
2055     {
2056 	line = ml_get_buf(wp->w_buffer, lnum, FALSE);
2057 
2058 	len = STRLEN(line);
2059 	if (buflen < len + MAXWLEN + 2)
2060 	{
2061 	    vim_free(buf);
2062 	    buflen = len + MAXWLEN + 2;
2063 	    buf = alloc(buflen);
2064 	    if (buf == NULL)
2065 		break;
2066 	}
2067 
2068 	/* In first line check first word for Capital. */
2069 	if (lnum == 1)
2070 	    capcol = 0;
2071 
2072 	/* For checking first word with a capital skip white space. */
2073 	if (capcol == 0)
2074 	    capcol = skipwhite(line) - line;
2075 
2076 	/* Copy the line into "buf" and append the start of the next line if
2077 	 * possible. */
2078 	STRCPY(buf, line);
2079 	if (lnum < wp->w_buffer->b_ml.ml_line_count)
2080 	    spell_cat_line(buf + STRLEN(buf), ml_get(lnum + 1), MAXWLEN);
2081 
2082 	p = buf + skip;
2083 	endp = buf + len;
2084 	while (p < endp)
2085 	{
2086 	    /* When searching backward don't search after the cursor.  Unless
2087 	     * we wrapped around the end of the buffer. */
2088 	    if (dir == BACKWARD
2089 		    && lnum == wp->w_cursor.lnum
2090 		    && !wrapped
2091 		    && (colnr_T)(p - buf) >= wp->w_cursor.col)
2092 		break;
2093 
2094 	    /* start of word */
2095 	    attr = HLF_COUNT;
2096 	    len = spell_check(wp, p, &attr, &capcol, FALSE);
2097 
2098 	    if (attr != HLF_COUNT)
2099 	    {
2100 		/* We found a bad word.  Check the attribute. */
2101 		if (allwords || attr == HLF_SPB)
2102 		{
2103 		    found_one = TRUE;
2104 
2105 		    /* When searching forward only accept a bad word after
2106 		     * the cursor. */
2107 		    if (dir == BACKWARD
2108 			    || lnum != wp->w_cursor.lnum
2109 			    || (lnum == wp->w_cursor.lnum
2110 				&& (wrapped
2111 				    || (colnr_T)(curline ? p - buf + len
2112 						     : p - buf)
2113 						  > wp->w_cursor.col)))
2114 		    {
2115 # ifdef FEAT_SYN_HL
2116 			if (has_syntax)
2117 			{
2118 			    col = p - buf;
2119 			    (void)syn_get_id(wp, lnum, (colnr_T)col,
2120 						       FALSE, &can_spell);
2121 			}
2122 			else
2123 #endif
2124 			    can_spell = TRUE;
2125 
2126 			if (can_spell)
2127 			{
2128 			    found_pos.lnum = lnum;
2129 			    found_pos.col = p - buf;
2130 #ifdef FEAT_VIRTUALEDIT
2131 			    found_pos.coladd = 0;
2132 #endif
2133 			    if (dir == FORWARD)
2134 			    {
2135 				/* No need to search further. */
2136 				wp->w_cursor = found_pos;
2137 				vim_free(buf);
2138 				if (attrp != NULL)
2139 				    *attrp = attr;
2140 				return len;
2141 			    }
2142 			    else if (curline)
2143 				/* Insert mode completion: put cursor after
2144 				 * the bad word. */
2145 				found_pos.col += len;
2146 			    found_len = len;
2147 			}
2148 		    }
2149 		}
2150 	    }
2151 
2152 	    /* advance to character after the word */
2153 	    p += len;
2154 	    capcol -= len;
2155 	}
2156 
2157 	if (dir == BACKWARD && found_pos.lnum != 0)
2158 	{
2159 	    /* Use the last match in the line (before the cursor). */
2160 	    wp->w_cursor = found_pos;
2161 	    vim_free(buf);
2162 	    return found_len;
2163 	}
2164 
2165 	if (curline)
2166 	    break;	/* only check cursor line */
2167 
2168 	/* Advance to next line. */
2169 	if (dir == BACKWARD)
2170 	{
2171 	    /* If we are back at the starting line and searched it again there
2172 	     * is no match, give up. */
2173 	    if (lnum == wp->w_cursor.lnum && wrapped)
2174 		break;
2175 
2176 	    if (lnum > 1)
2177 		--lnum;
2178 	    else if (!p_ws)
2179 		break;	    /* at first line and 'nowrapscan' */
2180 	    else
2181 	    {
2182 		/* Wrap around to the end of the buffer.  May search the
2183 		 * starting line again and accept the last match. */
2184 		lnum = wp->w_buffer->b_ml.ml_line_count;
2185 		wrapped = TRUE;
2186 		if (!shortmess(SHM_SEARCH))
2187 		    give_warning((char_u *)_(top_bot_msg), TRUE);
2188 	    }
2189 	    capcol = -1;
2190 	}
2191 	else
2192 	{
2193 	    if (lnum < wp->w_buffer->b_ml.ml_line_count)
2194 		++lnum;
2195 	    else if (!p_ws)
2196 		break;	    /* at first line and 'nowrapscan' */
2197 	    else
2198 	    {
2199 		/* Wrap around to the start of the buffer.  May search the
2200 		 * starting line again and accept the first match. */
2201 		lnum = 1;
2202 		wrapped = TRUE;
2203 		if (!shortmess(SHM_SEARCH))
2204 		    give_warning((char_u *)_(bot_top_msg), TRUE);
2205 	    }
2206 
2207 	    /* If we are back at the starting line and there is no match then
2208 	     * give up. */
2209 	    if (lnum == wp->w_cursor.lnum && !found_one)
2210 		break;
2211 
2212 	    /* Skip the characters at the start of the next line that were
2213 	     * included in a match crossing line boundaries. */
2214 	    if (attr == HLF_COUNT)
2215 		skip = p - endp;
2216 	    else
2217 		skip = 0;
2218 
2219 	    /* Capscol skips over the inserted space. */
2220 	    --capcol;
2221 
2222 	    /* But after empty line check first word in next line */
2223 	    if (*skipwhite(line) == NUL)
2224 		capcol = 0;
2225 	}
2226 
2227 	line_breakcheck();
2228     }
2229 
2230     vim_free(buf);
2231     return 0;
2232 }
2233 
2234 /*
2235  * For spell checking: concatenate the start of the following line "line" into
2236  * "buf", blanking-out special characters.  Copy less then "maxlen" bytes.
2237  */
2238     void
2239 spell_cat_line(buf, line, maxlen)
2240     char_u	*buf;
2241     char_u	*line;
2242     int		maxlen;
2243 {
2244     char_u	*p;
2245     int		n;
2246 
2247     p = skipwhite(line);
2248     while (vim_strchr((char_u *)"*#/\"\t", *p) != NULL)
2249 	p = skipwhite(p + 1);
2250 
2251     if (*p != NUL)
2252     {
2253 	*buf = ' ';
2254 	vim_strncpy(buf + 1, line, maxlen - 2);
2255 	n = p - line;
2256 	if (n >= maxlen)
2257 	    n = maxlen - 1;
2258 	vim_memset(buf + 1, ' ', n);
2259     }
2260 }
2261 
2262 /*
2263  * Structure used for the cookie argument of do_in_runtimepath().
2264  */
2265 typedef struct spelload_S
2266 {
2267     char_u  sl_lang[MAXWLEN + 1];	/* language name */
2268     slang_T *sl_slang;			/* resulting slang_T struct */
2269     int	    sl_nobreak;			/* NOBREAK language found */
2270 } spelload_T;
2271 
2272 /*
2273  * Load word list(s) for "lang" from Vim spell file(s).
2274  * "lang" must be the language without the region: e.g., "en".
2275  */
2276     static void
2277 spell_load_lang(lang)
2278     char_u	*lang;
2279 {
2280     char_u	fname_enc[85];
2281     int		r;
2282     spelload_T	sl;
2283 #ifdef FEAT_AUTOCMD
2284     int		round;
2285 #endif
2286 
2287     /* Copy the language name to pass it to spell_load_cb() as a cookie.
2288      * It's truncated when an error is detected. */
2289     STRCPY(sl.sl_lang, lang);
2290     sl.sl_slang = NULL;
2291     sl.sl_nobreak = FALSE;
2292 
2293 #ifdef FEAT_AUTOCMD
2294     /* We may retry when no spell file is found for the language, an
2295      * autocommand may load it then. */
2296     for (round = 1; round <= 2; ++round)
2297 #endif
2298     {
2299 	/*
2300 	 * Find the first spell file for "lang" in 'runtimepath' and load it.
2301 	 */
2302 	vim_snprintf((char *)fname_enc, sizeof(fname_enc) - 5,
2303 					"spell/%s.%s.spl", lang, spell_enc());
2304 	r = do_in_runtimepath(fname_enc, FALSE, spell_load_cb, &sl);
2305 
2306 	if (r == FAIL && *sl.sl_lang != NUL)
2307 	{
2308 	    /* Try loading the ASCII version. */
2309 	    vim_snprintf((char *)fname_enc, sizeof(fname_enc) - 5,
2310 						  "spell/%s.ascii.spl", lang);
2311 	    r = do_in_runtimepath(fname_enc, FALSE, spell_load_cb, &sl);
2312 
2313 #ifdef FEAT_AUTOCMD
2314 	    if (r == FAIL && *sl.sl_lang != NUL && round == 1
2315 		    && apply_autocmds(EVENT_SPELLFILEMISSING, lang,
2316 					      curbuf->b_fname, FALSE, curbuf))
2317 		continue;
2318 	    break;
2319 #endif
2320 	}
2321 #ifdef FEAT_AUTOCMD
2322 	break;
2323 #endif
2324     }
2325 
2326     if (r == FAIL)
2327     {
2328 	smsg((char_u *)_("Warning: Cannot find word list \"%s.%s.spl\" or \"%s.ascii.spl\""),
2329 						     lang, spell_enc(), lang);
2330     }
2331     else if (sl.sl_slang != NULL)
2332     {
2333 	/* At least one file was loaded, now load ALL the additions. */
2334 	STRCPY(fname_enc + STRLEN(fname_enc) - 3, "add.spl");
2335 	do_in_runtimepath(fname_enc, TRUE, spell_load_cb, &sl);
2336     }
2337 }
2338 
2339 /*
2340  * Return the encoding used for spell checking: Use 'encoding', except that we
2341  * use "latin1" for "latin9".  And limit to 60 characters (just in case).
2342  */
2343     static char_u *
2344 spell_enc()
2345 {
2346 
2347 #ifdef FEAT_MBYTE
2348     if (STRLEN(p_enc) < 60 && STRCMP(p_enc, "iso-8859-15") != 0)
2349 	return p_enc;
2350 #endif
2351     return (char_u *)"latin1";
2352 }
2353 
2354 /*
2355  * Get the name of the .spl file for the internal wordlist into
2356  * "fname[MAXPATHL]".
2357  */
2358     static void
2359 int_wordlist_spl(fname)
2360     char_u	    *fname;
2361 {
2362     vim_snprintf((char *)fname, MAXPATHL, "%s.%s.spl",
2363 						  int_wordlist, spell_enc());
2364 }
2365 
2366 /*
2367  * Allocate a new slang_T for language "lang".  "lang" can be NULL.
2368  * Caller must fill "sl_next".
2369  */
2370     static slang_T *
2371 slang_alloc(lang)
2372     char_u	*lang;
2373 {
2374     slang_T *lp;
2375 
2376     lp = (slang_T *)alloc_clear(sizeof(slang_T));
2377     if (lp != NULL)
2378     {
2379 	if (lang != NULL)
2380 	    lp->sl_name = vim_strsave(lang);
2381 	ga_init2(&lp->sl_rep, sizeof(fromto_T), 10);
2382 	ga_init2(&lp->sl_repsal, sizeof(fromto_T), 10);
2383 	lp->sl_compmax = MAXWLEN;
2384 	lp->sl_compsylmax = MAXWLEN;
2385 	hash_init(&lp->sl_wordcount);
2386     }
2387 
2388     return lp;
2389 }
2390 
2391 /*
2392  * Free the contents of an slang_T and the structure itself.
2393  */
2394     static void
2395 slang_free(lp)
2396     slang_T	*lp;
2397 {
2398     vim_free(lp->sl_name);
2399     vim_free(lp->sl_fname);
2400     slang_clear(lp);
2401     vim_free(lp);
2402 }
2403 
2404 /*
2405  * Clear an slang_T so that the file can be reloaded.
2406  */
2407     static void
2408 slang_clear(lp)
2409     slang_T	*lp;
2410 {
2411     garray_T	*gap;
2412     fromto_T	*ftp;
2413     salitem_T	*smp;
2414     int		i;
2415     int		round;
2416 
2417     vim_free(lp->sl_fbyts);
2418     lp->sl_fbyts = NULL;
2419     vim_free(lp->sl_kbyts);
2420     lp->sl_kbyts = NULL;
2421     vim_free(lp->sl_pbyts);
2422     lp->sl_pbyts = NULL;
2423 
2424     vim_free(lp->sl_fidxs);
2425     lp->sl_fidxs = NULL;
2426     vim_free(lp->sl_kidxs);
2427     lp->sl_kidxs = NULL;
2428     vim_free(lp->sl_pidxs);
2429     lp->sl_pidxs = NULL;
2430 
2431     for (round = 1; round <= 2; ++round)
2432     {
2433 	gap = round == 1 ? &lp->sl_rep : &lp->sl_repsal;
2434 	while (gap->ga_len > 0)
2435 	{
2436 	    ftp = &((fromto_T *)gap->ga_data)[--gap->ga_len];
2437 	    vim_free(ftp->ft_from);
2438 	    vim_free(ftp->ft_to);
2439 	}
2440 	ga_clear(gap);
2441     }
2442 
2443     gap = &lp->sl_sal;
2444     if (lp->sl_sofo)
2445     {
2446 	/* "ga_len" is set to 1 without adding an item for latin1 */
2447 	if (gap->ga_data != NULL)
2448 	    /* SOFOFROM and SOFOTO items: free lists of wide characters. */
2449 	    for (i = 0; i < gap->ga_len; ++i)
2450 		vim_free(((int **)gap->ga_data)[i]);
2451     }
2452     else
2453 	/* SAL items: free salitem_T items */
2454 	while (gap->ga_len > 0)
2455 	{
2456 	    smp = &((salitem_T *)gap->ga_data)[--gap->ga_len];
2457 	    vim_free(smp->sm_lead);
2458 	    /* Don't free sm_oneof and sm_rules, they point into sm_lead. */
2459 	    vim_free(smp->sm_to);
2460 #ifdef FEAT_MBYTE
2461 	    vim_free(smp->sm_lead_w);
2462 	    vim_free(smp->sm_oneof_w);
2463 	    vim_free(smp->sm_to_w);
2464 #endif
2465 	}
2466     ga_clear(gap);
2467 
2468     for (i = 0; i < lp->sl_prefixcnt; ++i)
2469 	vim_free(lp->sl_prefprog[i]);
2470     lp->sl_prefixcnt = 0;
2471     vim_free(lp->sl_prefprog);
2472     lp->sl_prefprog = NULL;
2473 
2474     vim_free(lp->sl_info);
2475     lp->sl_info = NULL;
2476 
2477     vim_free(lp->sl_midword);
2478     lp->sl_midword = NULL;
2479 
2480     vim_free(lp->sl_compprog);
2481     vim_free(lp->sl_compstartflags);
2482     vim_free(lp->sl_compallflags);
2483     lp->sl_compprog = NULL;
2484     lp->sl_compstartflags = NULL;
2485     lp->sl_compallflags = NULL;
2486 
2487     vim_free(lp->sl_syllable);
2488     lp->sl_syllable = NULL;
2489     ga_clear(&lp->sl_syl_items);
2490 
2491     ga_clear_strings(&lp->sl_comppat);
2492 
2493     hash_clear_all(&lp->sl_wordcount, WC_KEY_OFF);
2494     hash_init(&lp->sl_wordcount);
2495 
2496 #ifdef FEAT_MBYTE
2497     hash_clear_all(&lp->sl_map_hash, 0);
2498 #endif
2499 
2500     /* Clear info from .sug file. */
2501     slang_clear_sug(lp);
2502 
2503     lp->sl_compmax = MAXWLEN;
2504     lp->sl_compminlen = 0;
2505     lp->sl_compsylmax = MAXWLEN;
2506     lp->sl_regions[0] = NUL;
2507 }
2508 
2509 /*
2510  * Clear the info from the .sug file in "lp".
2511  */
2512     static void
2513 slang_clear_sug(lp)
2514     slang_T	*lp;
2515 {
2516     vim_free(lp->sl_sbyts);
2517     lp->sl_sbyts = NULL;
2518     vim_free(lp->sl_sidxs);
2519     lp->sl_sidxs = NULL;
2520     close_spellbuf(lp->sl_sugbuf);
2521     lp->sl_sugbuf = NULL;
2522     lp->sl_sugloaded = FALSE;
2523     lp->sl_sugtime = 0;
2524 }
2525 
2526 /*
2527  * Load one spell file and store the info into a slang_T.
2528  * Invoked through do_in_runtimepath().
2529  */
2530     static void
2531 spell_load_cb(fname, cookie)
2532     char_u	*fname;
2533     void	*cookie;
2534 {
2535     spelload_T	*slp = (spelload_T *)cookie;
2536     slang_T	*slang;
2537 
2538     slang = spell_load_file(fname, slp->sl_lang, NULL, FALSE);
2539     if (slang != NULL)
2540     {
2541 	/* When a previously loaded file has NOBREAK also use it for the
2542 	 * ".add" files. */
2543 	if (slp->sl_nobreak && slang->sl_add)
2544 	    slang->sl_nobreak = TRUE;
2545 	else if (slang->sl_nobreak)
2546 	    slp->sl_nobreak = TRUE;
2547 
2548 	slp->sl_slang = slang;
2549     }
2550 }
2551 
2552 /*
2553  * Load one spell file and store the info into a slang_T.
2554  *
2555  * This is invoked in three ways:
2556  * - From spell_load_cb() to load a spell file for the first time.  "lang" is
2557  *   the language name, "old_lp" is NULL.  Will allocate an slang_T.
2558  * - To reload a spell file that was changed.  "lang" is NULL and "old_lp"
2559  *   points to the existing slang_T.
2560  * - Just after writing a .spl file; it's read back to produce the .sug file.
2561  *   "old_lp" is NULL and "lang" is NULL.  Will allocate an slang_T.
2562  *
2563  * Returns the slang_T the spell file was loaded into.  NULL for error.
2564  */
2565     static slang_T *
2566 spell_load_file(fname, lang, old_lp, silent)
2567     char_u	*fname;
2568     char_u	*lang;
2569     slang_T	*old_lp;
2570     int		silent;		/* no error if file doesn't exist */
2571 {
2572     FILE	*fd;
2573     char_u	buf[VIMSPELLMAGICL];
2574     char_u	*p;
2575     int		i;
2576     int		n;
2577     int		len;
2578     char_u	*save_sourcing_name = sourcing_name;
2579     linenr_T	save_sourcing_lnum = sourcing_lnum;
2580     slang_T	*lp = NULL;
2581     int		c = 0;
2582     int		res;
2583 
2584     fd = mch_fopen((char *)fname, "r");
2585     if (fd == NULL)
2586     {
2587 	if (!silent)
2588 	    EMSG2(_(e_notopen), fname);
2589 	else if (p_verbose > 2)
2590 	{
2591 	    verbose_enter();
2592 	    smsg((char_u *)e_notopen, fname);
2593 	    verbose_leave();
2594 	}
2595 	goto endFAIL;
2596     }
2597     if (p_verbose > 2)
2598     {
2599 	verbose_enter();
2600 	smsg((char_u *)_("Reading spell file \"%s\""), fname);
2601 	verbose_leave();
2602     }
2603 
2604     if (old_lp == NULL)
2605     {
2606 	lp = slang_alloc(lang);
2607 	if (lp == NULL)
2608 	    goto endFAIL;
2609 
2610 	/* Remember the file name, used to reload the file when it's updated. */
2611 	lp->sl_fname = vim_strsave(fname);
2612 	if (lp->sl_fname == NULL)
2613 	    goto endFAIL;
2614 
2615 	/* Check for .add.spl. */
2616 	lp->sl_add = strstr((char *)gettail(fname), ".add.") != NULL;
2617     }
2618     else
2619 	lp = old_lp;
2620 
2621     /* Set sourcing_name, so that error messages mention the file name. */
2622     sourcing_name = fname;
2623     sourcing_lnum = 0;
2624 
2625     /*
2626      * <HEADER>: <fileID>
2627      */
2628     for (i = 0; i < VIMSPELLMAGICL; ++i)
2629 	buf[i] = getc(fd);				/* <fileID> */
2630     if (STRNCMP(buf, VIMSPELLMAGIC, VIMSPELLMAGICL) != 0)
2631     {
2632 	EMSG(_("E757: This does not look like a spell file"));
2633 	goto endFAIL;
2634     }
2635     c = getc(fd);					/* <versionnr> */
2636     if (c < VIMSPELLVERSION)
2637     {
2638 	EMSG(_("E771: Old spell file, needs to be updated"));
2639 	goto endFAIL;
2640     }
2641     else if (c > VIMSPELLVERSION)
2642     {
2643 	EMSG(_("E772: Spell file is for newer version of Vim"));
2644 	goto endFAIL;
2645     }
2646 
2647 
2648     /*
2649      * <SECTIONS>: <section> ... <sectionend>
2650      * <section>: <sectionID> <sectionflags> <sectionlen> (section contents)
2651      */
2652     for (;;)
2653     {
2654 	n = getc(fd);			    /* <sectionID> or <sectionend> */
2655 	if (n == SN_END)
2656 	    break;
2657 	c = getc(fd);					/* <sectionflags> */
2658 	len = get4c(fd);				/* <sectionlen> */
2659 	if (len < 0)
2660 	    goto truncerr;
2661 
2662 	res = 0;
2663 	switch (n)
2664 	{
2665 	    case SN_INFO:
2666 		lp->sl_info = read_string(fd, len);	/* <infotext> */
2667 		if (lp->sl_info == NULL)
2668 		    goto endFAIL;
2669 		break;
2670 
2671 	    case SN_REGION:
2672 		res = read_region_section(fd, lp, len);
2673 		break;
2674 
2675 	    case SN_CHARFLAGS:
2676 		res = read_charflags_section(fd);
2677 		break;
2678 
2679 	    case SN_MIDWORD:
2680 		lp->sl_midword = read_string(fd, len);	/* <midword> */
2681 		if (lp->sl_midword == NULL)
2682 		    goto endFAIL;
2683 		break;
2684 
2685 	    case SN_PREFCOND:
2686 		res = read_prefcond_section(fd, lp);
2687 		break;
2688 
2689 	    case SN_REP:
2690 		res = read_rep_section(fd, &lp->sl_rep, lp->sl_rep_first);
2691 		break;
2692 
2693 	    case SN_REPSAL:
2694 		res = read_rep_section(fd, &lp->sl_repsal, lp->sl_repsal_first);
2695 		break;
2696 
2697 	    case SN_SAL:
2698 		res = read_sal_section(fd, lp);
2699 		break;
2700 
2701 	    case SN_SOFO:
2702 		res = read_sofo_section(fd, lp);
2703 		break;
2704 
2705 	    case SN_MAP:
2706 		p = read_string(fd, len);		/* <mapstr> */
2707 		if (p == NULL)
2708 		    goto endFAIL;
2709 		set_map_str(lp, p);
2710 		vim_free(p);
2711 		break;
2712 
2713 	    case SN_WORDS:
2714 		res = read_words_section(fd, lp, len);
2715 		break;
2716 
2717 	    case SN_SUGFILE:
2718 		lp->sl_sugtime = get8c(fd);		/* <timestamp> */
2719 		break;
2720 
2721 	    case SN_NOSPLITSUGS:
2722 		lp->sl_nosplitsugs = TRUE;		/* <timestamp> */
2723 		break;
2724 
2725 	    case SN_COMPOUND:
2726 		res = read_compound(fd, lp, len);
2727 		break;
2728 
2729 	    case SN_NOBREAK:
2730 		lp->sl_nobreak = TRUE;
2731 		break;
2732 
2733 	    case SN_SYLLABLE:
2734 		lp->sl_syllable = read_string(fd, len);	/* <syllable> */
2735 		if (lp->sl_syllable == NULL)
2736 		    goto endFAIL;
2737 		if (init_syl_tab(lp) == FAIL)
2738 		    goto endFAIL;
2739 		break;
2740 
2741 	    default:
2742 		/* Unsupported section.  When it's required give an error
2743 		 * message.  When it's not required skip the contents. */
2744 		if (c & SNF_REQUIRED)
2745 		{
2746 		    EMSG(_("E770: Unsupported section in spell file"));
2747 		    goto endFAIL;
2748 		}
2749 		while (--len >= 0)
2750 		    if (getc(fd) < 0)
2751 			goto truncerr;
2752 		break;
2753 	}
2754 someerror:
2755 	if (res == SP_FORMERROR)
2756 	{
2757 	    EMSG(_(e_format));
2758 	    goto endFAIL;
2759 	}
2760 	if (res == SP_TRUNCERROR)
2761 	{
2762 truncerr:
2763 	    EMSG(_(e_spell_trunc));
2764 	    goto endFAIL;
2765 	}
2766 	if (res == SP_OTHERERROR)
2767 	    goto endFAIL;
2768     }
2769 
2770     /* <LWORDTREE> */
2771     res = spell_read_tree(fd, &lp->sl_fbyts, &lp->sl_fidxs, FALSE, 0);
2772     if (res != 0)
2773 	goto someerror;
2774 
2775     /* <KWORDTREE> */
2776     res = spell_read_tree(fd, &lp->sl_kbyts, &lp->sl_kidxs, FALSE, 0);
2777     if (res != 0)
2778 	goto someerror;
2779 
2780     /* <PREFIXTREE> */
2781     res = spell_read_tree(fd, &lp->sl_pbyts, &lp->sl_pidxs, TRUE,
2782 							    lp->sl_prefixcnt);
2783     if (res != 0)
2784 	goto someerror;
2785 
2786     /* For a new file link it in the list of spell files. */
2787     if (old_lp == NULL && lang != NULL)
2788     {
2789 	lp->sl_next = first_lang;
2790 	first_lang = lp;
2791     }
2792 
2793     goto endOK;
2794 
2795 endFAIL:
2796     if (lang != NULL)
2797 	/* truncating the name signals the error to spell_load_lang() */
2798 	*lang = NUL;
2799     if (lp != NULL && old_lp == NULL)
2800 	slang_free(lp);
2801     lp = NULL;
2802 
2803 endOK:
2804     if (fd != NULL)
2805 	fclose(fd);
2806     sourcing_name = save_sourcing_name;
2807     sourcing_lnum = save_sourcing_lnum;
2808 
2809     return lp;
2810 }
2811 
2812 /*
2813  * Read 2 bytes from "fd" and turn them into an int, MSB first.
2814  */
2815     static int
2816 get2c(fd)
2817     FILE	*fd;
2818 {
2819     long	n;
2820 
2821     n = getc(fd);
2822     n = (n << 8) + getc(fd);
2823     return n;
2824 }
2825 
2826 /*
2827  * Read 3 bytes from "fd" and turn them into an int, MSB first.
2828  */
2829     static int
2830 get3c(fd)
2831     FILE	*fd;
2832 {
2833     long	n;
2834 
2835     n = getc(fd);
2836     n = (n << 8) + getc(fd);
2837     n = (n << 8) + getc(fd);
2838     return n;
2839 }
2840 
2841 /*
2842  * Read 4 bytes from "fd" and turn them into an int, MSB first.
2843  */
2844     static int
2845 get4c(fd)
2846     FILE	*fd;
2847 {
2848     long	n;
2849 
2850     n = getc(fd);
2851     n = (n << 8) + getc(fd);
2852     n = (n << 8) + getc(fd);
2853     n = (n << 8) + getc(fd);
2854     return n;
2855 }
2856 
2857 /*
2858  * Read 8 bytes from "fd" and turn them into a time_t, MSB first.
2859  */
2860     static time_t
2861 get8c(fd)
2862     FILE	*fd;
2863 {
2864     time_t	n = 0;
2865     int		i;
2866 
2867     for (i = 0; i < 8; ++i)
2868 	n = (n << 8) + getc(fd);
2869     return n;
2870 }
2871 
2872 /*
2873  * Read a length field from "fd" in "cnt_bytes" bytes.
2874  * Allocate memory, read the string into it and add a NUL at the end.
2875  * Returns NULL when the count is zero.
2876  * Sets "*cntp" to SP_*ERROR when there is an error, length of the result
2877  * otherwise.
2878  */
2879     static char_u *
2880 read_cnt_string(fd, cnt_bytes, cntp)
2881     FILE	*fd;
2882     int		cnt_bytes;
2883     int		*cntp;
2884 {
2885     int		cnt = 0;
2886     int		i;
2887     char_u	*str;
2888 
2889     /* read the length bytes, MSB first */
2890     for (i = 0; i < cnt_bytes; ++i)
2891 	cnt = (cnt << 8) + getc(fd);
2892     if (cnt < 0)
2893     {
2894 	*cntp = SP_TRUNCERROR;
2895 	return NULL;
2896     }
2897     *cntp = cnt;
2898     if (cnt == 0)
2899 	return NULL;	    /* nothing to read, return NULL */
2900 
2901     str = read_string(fd, cnt);
2902     if (str == NULL)
2903 	*cntp = SP_OTHERERROR;
2904     return str;
2905 }
2906 
2907 /*
2908  * Read a string of length "cnt" from "fd" into allocated memory.
2909  * Returns NULL when out of memory.
2910  */
2911     static char_u *
2912 read_string(fd, cnt)
2913     FILE	*fd;
2914     int		cnt;
2915 {
2916     char_u	*str;
2917     int		i;
2918 
2919     /* allocate memory */
2920     str = alloc((unsigned)cnt + 1);
2921     if (str != NULL)
2922     {
2923 	/* Read the string.  Doesn't check for truncated file. */
2924 	for (i = 0; i < cnt; ++i)
2925 	    str[i] = getc(fd);
2926 	str[i] = NUL;
2927     }
2928     return str;
2929 }
2930 
2931 /*
2932  * Read SN_REGION: <regionname> ...
2933  * Return SP_*ERROR flags.
2934  */
2935     static int
2936 read_region_section(fd, lp, len)
2937     FILE	*fd;
2938     slang_T	*lp;
2939     int		len;
2940 {
2941     int		i;
2942 
2943     if (len > 16)
2944 	return SP_FORMERROR;
2945     for (i = 0; i < len; ++i)
2946 	lp->sl_regions[i] = getc(fd);			/* <regionname> */
2947     lp->sl_regions[len] = NUL;
2948     return 0;
2949 }
2950 
2951 /*
2952  * Read SN_CHARFLAGS section: <charflagslen> <charflags>
2953  *				<folcharslen> <folchars>
2954  * Return SP_*ERROR flags.
2955  */
2956     static int
2957 read_charflags_section(fd)
2958     FILE	*fd;
2959 {
2960     char_u	*flags;
2961     char_u	*fol;
2962     int		flagslen, follen;
2963 
2964     /* <charflagslen> <charflags> */
2965     flags = read_cnt_string(fd, 1, &flagslen);
2966     if (flagslen < 0)
2967 	return flagslen;
2968 
2969     /* <folcharslen> <folchars> */
2970     fol = read_cnt_string(fd, 2, &follen);
2971     if (follen < 0)
2972     {
2973 	vim_free(flags);
2974 	return follen;
2975     }
2976 
2977     /* Set the word-char flags and fill SPELL_ISUPPER() table. */
2978     if (flags != NULL && fol != NULL)
2979 	set_spell_charflags(flags, flagslen, fol);
2980 
2981     vim_free(flags);
2982     vim_free(fol);
2983 
2984     /* When <charflagslen> is zero then <fcharlen> must also be zero. */
2985     if ((flags == NULL) != (fol == NULL))
2986 	return SP_FORMERROR;
2987     return 0;
2988 }
2989 
2990 /*
2991  * Read SN_PREFCOND section.
2992  * Return SP_*ERROR flags.
2993  */
2994     static int
2995 read_prefcond_section(fd, lp)
2996     FILE	*fd;
2997     slang_T	*lp;
2998 {
2999     int		cnt;
3000     int		i;
3001     int		n;
3002     char_u	*p;
3003     char_u	buf[MAXWLEN + 1];
3004 
3005     /* <prefcondcnt> <prefcond> ... */
3006     cnt = get2c(fd);					/* <prefcondcnt> */
3007     if (cnt <= 0)
3008 	return SP_FORMERROR;
3009 
3010     lp->sl_prefprog = (regprog_T **)alloc_clear(
3011 					 (unsigned)sizeof(regprog_T *) * cnt);
3012     if (lp->sl_prefprog == NULL)
3013 	return SP_OTHERERROR;
3014     lp->sl_prefixcnt = cnt;
3015 
3016     for (i = 0; i < cnt; ++i)
3017     {
3018 	/* <prefcond> : <condlen> <condstr> */
3019 	n = getc(fd);					/* <condlen> */
3020 	if (n < 0 || n >= MAXWLEN)
3021 	    return SP_FORMERROR;
3022 
3023 	/* When <condlen> is zero we have an empty condition.  Otherwise
3024 	 * compile the regexp program used to check for the condition. */
3025 	if (n > 0)
3026 	{
3027 	    buf[0] = '^';	    /* always match at one position only */
3028 	    p = buf + 1;
3029 	    while (n-- > 0)
3030 		*p++ = getc(fd);			/* <condstr> */
3031 	    *p = NUL;
3032 	    lp->sl_prefprog[i] = vim_regcomp(buf, RE_MAGIC + RE_STRING);
3033 	}
3034     }
3035     return 0;
3036 }
3037 
3038 /*
3039  * Read REP or REPSAL items section from "fd": <repcount> <rep> ...
3040  * Return SP_*ERROR flags.
3041  */
3042     static int
3043 read_rep_section(fd, gap, first)
3044     FILE	*fd;
3045     garray_T	*gap;
3046     short	*first;
3047 {
3048     int		cnt;
3049     fromto_T	*ftp;
3050     int		i;
3051 
3052     cnt = get2c(fd);					/* <repcount> */
3053     if (cnt < 0)
3054 	return SP_TRUNCERROR;
3055 
3056     if (ga_grow(gap, cnt) == FAIL)
3057 	return SP_OTHERERROR;
3058 
3059     /* <rep> : <repfromlen> <repfrom> <reptolen> <repto> */
3060     for (; gap->ga_len < cnt; ++gap->ga_len)
3061     {
3062 	ftp = &((fromto_T *)gap->ga_data)[gap->ga_len];
3063 	ftp->ft_from = read_cnt_string(fd, 1, &i);
3064 	if (i < 0)
3065 	    return i;
3066 	if (i == 0)
3067 	    return SP_FORMERROR;
3068 	ftp->ft_to = read_cnt_string(fd, 1, &i);
3069 	if (i <= 0)
3070 	{
3071 	    vim_free(ftp->ft_from);
3072 	    if (i < 0)
3073 		return i;
3074 	    return SP_FORMERROR;
3075 	}
3076     }
3077 
3078     /* Fill the first-index table. */
3079     for (i = 0; i < 256; ++i)
3080 	first[i] = -1;
3081     for (i = 0; i < gap->ga_len; ++i)
3082     {
3083 	ftp = &((fromto_T *)gap->ga_data)[i];
3084 	if (first[*ftp->ft_from] == -1)
3085 	    first[*ftp->ft_from] = i;
3086     }
3087     return 0;
3088 }
3089 
3090 /*
3091  * Read SN_SAL section: <salflags> <salcount> <sal> ...
3092  * Return SP_*ERROR flags.
3093  */
3094     static int
3095 read_sal_section(fd, slang)
3096     FILE	*fd;
3097     slang_T	*slang;
3098 {
3099     int		i;
3100     int		cnt;
3101     garray_T	*gap;
3102     salitem_T	*smp;
3103     int		ccnt;
3104     char_u	*p;
3105     int		c = NUL;
3106 
3107     slang->sl_sofo = FALSE;
3108 
3109     i = getc(fd);				/* <salflags> */
3110     if (i & SAL_F0LLOWUP)
3111 	slang->sl_followup = TRUE;
3112     if (i & SAL_COLLAPSE)
3113 	slang->sl_collapse = TRUE;
3114     if (i & SAL_REM_ACCENTS)
3115 	slang->sl_rem_accents = TRUE;
3116 
3117     cnt = get2c(fd);				/* <salcount> */
3118     if (cnt < 0)
3119 	return SP_TRUNCERROR;
3120 
3121     gap = &slang->sl_sal;
3122     ga_init2(gap, sizeof(salitem_T), 10);
3123     if (ga_grow(gap, cnt + 1) == FAIL)
3124 	return SP_OTHERERROR;
3125 
3126     /* <sal> : <salfromlen> <salfrom> <saltolen> <salto> */
3127     for (; gap->ga_len < cnt; ++gap->ga_len)
3128     {
3129 	smp = &((salitem_T *)gap->ga_data)[gap->ga_len];
3130 	ccnt = getc(fd);			/* <salfromlen> */
3131 	if (ccnt < 0)
3132 	    return SP_TRUNCERROR;
3133 	if ((p = alloc(ccnt + 2)) == NULL)
3134 	    return SP_OTHERERROR;
3135 	smp->sm_lead = p;
3136 
3137 	/* Read up to the first special char into sm_lead. */
3138 	for (i = 0; i < ccnt; ++i)
3139 	{
3140 	    c = getc(fd);			/* <salfrom> */
3141 	    if (vim_strchr((char_u *)"0123456789(-<^$", c) != NULL)
3142 		break;
3143 	    *p++ = c;
3144 	}
3145 	smp->sm_leadlen = p - smp->sm_lead;
3146 	*p++ = NUL;
3147 
3148 	/* Put (abc) chars in sm_oneof, if any. */
3149 	if (c == '(')
3150 	{
3151 	    smp->sm_oneof = p;
3152 	    for (++i; i < ccnt; ++i)
3153 	    {
3154 		c = getc(fd);			/* <salfrom> */
3155 		if (c == ')')
3156 		    break;
3157 		*p++ = c;
3158 	    }
3159 	    *p++ = NUL;
3160 	    if (++i < ccnt)
3161 		c = getc(fd);
3162 	}
3163 	else
3164 	    smp->sm_oneof = NULL;
3165 
3166 	/* Any following chars go in sm_rules. */
3167 	smp->sm_rules = p;
3168 	if (i < ccnt)
3169 	    /* store the char we got while checking for end of sm_lead */
3170 	    *p++ = c;
3171 	for (++i; i < ccnt; ++i)
3172 	    *p++ = getc(fd);			/* <salfrom> */
3173 	*p++ = NUL;
3174 
3175 	/* <saltolen> <salto> */
3176 	smp->sm_to = read_cnt_string(fd, 1, &ccnt);
3177 	if (ccnt < 0)
3178 	{
3179 	    vim_free(smp->sm_lead);
3180 	    return ccnt;
3181 	}
3182 
3183 #ifdef FEAT_MBYTE
3184 	if (has_mbyte)
3185 	{
3186 	    /* convert the multi-byte strings to wide char strings */
3187 	    smp->sm_lead_w = mb_str2wide(smp->sm_lead);
3188 	    smp->sm_leadlen = mb_charlen(smp->sm_lead);
3189 	    if (smp->sm_oneof == NULL)
3190 		smp->sm_oneof_w = NULL;
3191 	    else
3192 		smp->sm_oneof_w = mb_str2wide(smp->sm_oneof);
3193 	    if (smp->sm_to == NULL)
3194 		smp->sm_to_w = NULL;
3195 	    else
3196 		smp->sm_to_w = mb_str2wide(smp->sm_to);
3197 	    if (smp->sm_lead_w == NULL
3198 		    || (smp->sm_oneof_w == NULL && smp->sm_oneof != NULL)
3199 		    || (smp->sm_to_w == NULL && smp->sm_to != NULL))
3200 	    {
3201 		vim_free(smp->sm_lead);
3202 		vim_free(smp->sm_to);
3203 		vim_free(smp->sm_lead_w);
3204 		vim_free(smp->sm_oneof_w);
3205 		vim_free(smp->sm_to_w);
3206 		return SP_OTHERERROR;
3207 	    }
3208 	}
3209 #endif
3210     }
3211 
3212     if (gap->ga_len > 0)
3213     {
3214 	/* Add one extra entry to mark the end with an empty sm_lead.  Avoids
3215 	 * that we need to check the index every time. */
3216 	smp = &((salitem_T *)gap->ga_data)[gap->ga_len];
3217 	if ((p = alloc(1)) == NULL)
3218 	    return SP_OTHERERROR;
3219 	p[0] = NUL;
3220 	smp->sm_lead = p;
3221 	smp->sm_leadlen = 0;
3222 	smp->sm_oneof = NULL;
3223 	smp->sm_rules = p;
3224 	smp->sm_to = NULL;
3225 #ifdef FEAT_MBYTE
3226 	if (has_mbyte)
3227 	{
3228 	    smp->sm_lead_w = mb_str2wide(smp->sm_lead);
3229 	    smp->sm_leadlen = 0;
3230 	    smp->sm_oneof_w = NULL;
3231 	    smp->sm_to_w = NULL;
3232 	}
3233 #endif
3234 	++gap->ga_len;
3235     }
3236 
3237     /* Fill the first-index table. */
3238     set_sal_first(slang);
3239 
3240     return 0;
3241 }
3242 
3243 /*
3244  * Read SN_WORDS: <word> ...
3245  * Return SP_*ERROR flags.
3246  */
3247     static int
3248 read_words_section(fd, lp, len)
3249     FILE	*fd;
3250     slang_T	*lp;
3251     int		len;
3252 {
3253     int		done = 0;
3254     int		i;
3255     char_u	word[MAXWLEN];
3256 
3257     while (done < len)
3258     {
3259 	/* Read one word at a time. */
3260 	for (i = 0; ; ++i)
3261 	{
3262 	    word[i] = getc(fd);
3263 	    if (word[i] == NUL)
3264 		break;
3265 	    if (i == MAXWLEN - 1)
3266 		return SP_FORMERROR;
3267 	}
3268 
3269 	/* Init the count to 10. */
3270 	count_common_word(lp, word, -1, 10);
3271 	done += i + 1;
3272     }
3273     return 0;
3274 }
3275 
3276 /*
3277  * Add a word to the hashtable of common words.
3278  * If it's already there then the counter is increased.
3279  */
3280     static void
3281 count_common_word(lp, word, len, count)
3282     slang_T	*lp;
3283     char_u	*word;
3284     int		len;	    /* word length, -1 for upto NUL */
3285     int		count;	    /* 1 to count once, 10 to init */
3286 {
3287     hash_T	hash;
3288     hashitem_T	*hi;
3289     wordcount_T	*wc;
3290     char_u	buf[MAXWLEN];
3291     char_u	*p;
3292 
3293     if (len == -1)
3294 	p = word;
3295     else
3296     {
3297 	vim_strncpy(buf, word, len);
3298 	p = buf;
3299     }
3300 
3301     hash = hash_hash(p);
3302     hi = hash_lookup(&lp->sl_wordcount, p, hash);
3303     if (HASHITEM_EMPTY(hi))
3304     {
3305 	wc = (wordcount_T *)alloc(sizeof(wordcount_T) + STRLEN(p));
3306 	if (wc == NULL)
3307 	    return;
3308 	STRCPY(wc->wc_word, p);
3309 	wc->wc_count = count;
3310 	hash_add_item(&lp->sl_wordcount, hi, wc->wc_word, hash);
3311     }
3312     else
3313     {
3314 	wc = HI2WC(hi);
3315 	if ((wc->wc_count += count) < (unsigned)count)	/* check for overflow */
3316 	    wc->wc_count = MAXWORDCOUNT;
3317     }
3318 }
3319 
3320 /*
3321  * Adjust the score of common words.
3322  */
3323     static int
3324 score_wordcount_adj(slang, score, word, split)
3325     slang_T	*slang;
3326     int		score;
3327     char_u	*word;
3328     int		split;	    /* word was split, less bonus */
3329 {
3330     hashitem_T	*hi;
3331     wordcount_T	*wc;
3332     int		bonus;
3333     int		newscore;
3334 
3335     hi = hash_find(&slang->sl_wordcount, word);
3336     if (!HASHITEM_EMPTY(hi))
3337     {
3338 	wc = HI2WC(hi);
3339 	if (wc->wc_count < SCORE_THRES2)
3340 	    bonus = SCORE_COMMON1;
3341 	else if (wc->wc_count < SCORE_THRES3)
3342 	    bonus = SCORE_COMMON2;
3343 	else
3344 	    bonus = SCORE_COMMON3;
3345 	if (split)
3346 	    newscore = score - bonus / 2;
3347 	else
3348 	    newscore = score - bonus;
3349 	if (newscore < 0)
3350 	    return 0;
3351 	return newscore;
3352     }
3353     return score;
3354 }
3355 
3356 /*
3357  * SN_SOFO: <sofofromlen> <sofofrom> <sofotolen> <sofoto>
3358  * Return SP_*ERROR flags.
3359  */
3360     static int
3361 read_sofo_section(fd, slang)
3362     FILE	*fd;
3363     slang_T	*slang;
3364 {
3365     int		cnt;
3366     char_u	*from, *to;
3367     int		res;
3368 
3369     slang->sl_sofo = TRUE;
3370 
3371     /* <sofofromlen> <sofofrom> */
3372     from = read_cnt_string(fd, 2, &cnt);
3373     if (cnt < 0)
3374 	return cnt;
3375 
3376     /* <sofotolen> <sofoto> */
3377     to = read_cnt_string(fd, 2, &cnt);
3378     if (cnt < 0)
3379     {
3380 	vim_free(from);
3381 	return cnt;
3382     }
3383 
3384     /* Store the info in slang->sl_sal and/or slang->sl_sal_first. */
3385     if (from != NULL && to != NULL)
3386 	res = set_sofo(slang, from, to);
3387     else if (from != NULL || to != NULL)
3388 	res = SP_FORMERROR;    /* only one of two strings is an error */
3389     else
3390 	res = 0;
3391 
3392     vim_free(from);
3393     vim_free(to);
3394     return res;
3395 }
3396 
3397 /*
3398  * Read the compound section from the .spl file:
3399  *	<compmax> <compminlen> <compsylmax> <compoptions> <compflags>
3400  * Returns SP_*ERROR flags.
3401  */
3402     static int
3403 read_compound(fd, slang, len)
3404     FILE	*fd;
3405     slang_T	*slang;
3406     int		len;
3407 {
3408     int		todo = len;
3409     int		c;
3410     int		atstart;
3411     char_u	*pat;
3412     char_u	*pp;
3413     char_u	*cp;
3414     char_u	*ap;
3415     int		cnt;
3416     garray_T	*gap;
3417 
3418     if (todo < 2)
3419 	return SP_FORMERROR;	/* need at least two bytes */
3420 
3421     --todo;
3422     c = getc(fd);					/* <compmax> */
3423     if (c < 2)
3424 	c = MAXWLEN;
3425     slang->sl_compmax = c;
3426 
3427     --todo;
3428     c = getc(fd);					/* <compminlen> */
3429     if (c < 1)
3430 	c = 0;
3431     slang->sl_compminlen = c;
3432 
3433     --todo;
3434     c = getc(fd);					/* <compsylmax> */
3435     if (c < 1)
3436 	c = MAXWLEN;
3437     slang->sl_compsylmax = c;
3438 
3439     c = getc(fd);					/* <compoptions> */
3440     if (c != 0)
3441 	ungetc(c, fd);	    /* be backwards compatible with Vim 7.0b */
3442     else
3443     {
3444 	--todo;
3445 	c = getc(fd);	    /* only use the lower byte for now */
3446 	--todo;
3447 	slang->sl_compoptions = c;
3448 
3449 	gap = &slang->sl_comppat;
3450 	c = get2c(fd);					/* <comppatcount> */
3451 	todo -= 2;
3452 	ga_init2(gap, sizeof(char_u *), c);
3453 	if (ga_grow(gap, c) == OK)
3454 	    while (--c >= 0)
3455 	    {
3456 		((char_u **)(gap->ga_data))[gap->ga_len++] =
3457 						 read_cnt_string(fd, 1, &cnt);
3458 					    /* <comppatlen> <comppattext> */
3459 		if (cnt < 0)
3460 		    return cnt;
3461 		todo -= cnt + 2;
3462 	    }
3463     }
3464 
3465     /* Turn the COMPOUNDRULE items into a regexp pattern:
3466      * "a[bc]/a*b+" -> "^\(a[bc]\|a*b\+\)$".
3467      * Inserting backslashes may double the length, "^\(\)$<Nul>" is 7 bytes.
3468      * Conversion to utf-8 may double the size. */
3469     c = todo * 2 + 7;
3470 #ifdef FEAT_MBYTE
3471     if (enc_utf8)
3472 	c += todo * 2;
3473 #endif
3474     pat = alloc((unsigned)c);
3475     if (pat == NULL)
3476 	return SP_OTHERERROR;
3477 
3478     /* We also need a list of all flags that can appear at the start and one
3479      * for all flags. */
3480     cp = alloc(todo + 1);
3481     if (cp == NULL)
3482     {
3483 	vim_free(pat);
3484 	return SP_OTHERERROR;
3485     }
3486     slang->sl_compstartflags = cp;
3487     *cp = NUL;
3488 
3489     ap = alloc(todo + 1);
3490     if (ap == NULL)
3491     {
3492 	vim_free(pat);
3493 	return SP_OTHERERROR;
3494     }
3495     slang->sl_compallflags = ap;
3496     *ap = NUL;
3497 
3498     pp = pat;
3499     *pp++ = '^';
3500     *pp++ = '\\';
3501     *pp++ = '(';
3502 
3503     atstart = 1;
3504     while (todo-- > 0)
3505     {
3506 	c = getc(fd);					/* <compflags> */
3507 
3508 	/* Add all flags to "sl_compallflags". */
3509 	if (vim_strchr((char_u *)"+*[]/", c) == NULL
3510 		&& !byte_in_str(slang->sl_compallflags, c))
3511 	{
3512 	    *ap++ = c;
3513 	    *ap = NUL;
3514 	}
3515 
3516 	if (atstart != 0)
3517 	{
3518 	    /* At start of item: copy flags to "sl_compstartflags".  For a
3519 	     * [abc] item set "atstart" to 2 and copy up to the ']'. */
3520 	    if (c == '[')
3521 		atstart = 2;
3522 	    else if (c == ']')
3523 		atstart = 0;
3524 	    else
3525 	    {
3526 		if (!byte_in_str(slang->sl_compstartflags, c))
3527 		{
3528 		    *cp++ = c;
3529 		    *cp = NUL;
3530 		}
3531 		if (atstart == 1)
3532 		    atstart = 0;
3533 	    }
3534 	}
3535 	if (c == '/')	    /* slash separates two items */
3536 	{
3537 	    *pp++ = '\\';
3538 	    *pp++ = '|';
3539 	    atstart = 1;
3540 	}
3541 	else		    /* normal char, "[abc]" and '*' are copied as-is */
3542 	{
3543 	    if (c == '+' || c == '~')
3544 		*pp++ = '\\';	    /* "a+" becomes "a\+" */
3545 #ifdef FEAT_MBYTE
3546 	    if (enc_utf8)
3547 		pp += mb_char2bytes(c, pp);
3548 	    else
3549 #endif
3550 		*pp++ = c;
3551 	}
3552     }
3553 
3554     *pp++ = '\\';
3555     *pp++ = ')';
3556     *pp++ = '$';
3557     *pp = NUL;
3558 
3559     slang->sl_compprog = vim_regcomp(pat, RE_MAGIC + RE_STRING + RE_STRICT);
3560     vim_free(pat);
3561     if (slang->sl_compprog == NULL)
3562 	return SP_FORMERROR;
3563 
3564     return 0;
3565 }
3566 
3567 /*
3568  * Return TRUE if byte "n" appears in "str".
3569  * Like strchr() but independent of locale.
3570  */
3571     static int
3572 byte_in_str(str, n)
3573     char_u	*str;
3574     int		n;
3575 {
3576     char_u	*p;
3577 
3578     for (p = str; *p != NUL; ++p)
3579 	if (*p == n)
3580 	    return TRUE;
3581     return FALSE;
3582 }
3583 
3584 #define SY_MAXLEN   30
3585 typedef struct syl_item_S
3586 {
3587     char_u	sy_chars[SY_MAXLEN];	    /* the sequence of chars */
3588     int		sy_len;
3589 } syl_item_T;
3590 
3591 /*
3592  * Truncate "slang->sl_syllable" at the first slash and put the following items
3593  * in "slang->sl_syl_items".
3594  */
3595     static int
3596 init_syl_tab(slang)
3597     slang_T	*slang;
3598 {
3599     char_u	*p;
3600     char_u	*s;
3601     int		l;
3602     syl_item_T	*syl;
3603 
3604     ga_init2(&slang->sl_syl_items, sizeof(syl_item_T), 4);
3605     p = vim_strchr(slang->sl_syllable, '/');
3606     while (p != NULL)
3607     {
3608 	*p++ = NUL;
3609 	if (*p == NUL)	    /* trailing slash */
3610 	    break;
3611 	s = p;
3612 	p = vim_strchr(p, '/');
3613 	if (p == NULL)
3614 	    l = STRLEN(s);
3615 	else
3616 	    l = p - s;
3617 	if (l >= SY_MAXLEN)
3618 	    return SP_FORMERROR;
3619 	if (ga_grow(&slang->sl_syl_items, 1) == FAIL)
3620 	    return SP_OTHERERROR;
3621 	syl = ((syl_item_T *)slang->sl_syl_items.ga_data)
3622 					       + slang->sl_syl_items.ga_len++;
3623 	vim_strncpy(syl->sy_chars, s, l);
3624 	syl->sy_len = l;
3625     }
3626     return OK;
3627 }
3628 
3629 /*
3630  * Count the number of syllables in "word".
3631  * When "word" contains spaces the syllables after the last space are counted.
3632  * Returns zero if syllables are not defines.
3633  */
3634     static int
3635 count_syllables(slang, word)
3636     slang_T	*slang;
3637     char_u	*word;
3638 {
3639     int		cnt = 0;
3640     int		skip = FALSE;
3641     char_u	*p;
3642     int		len;
3643     int		i;
3644     syl_item_T	*syl;
3645     int		c;
3646 
3647     if (slang->sl_syllable == NULL)
3648 	return 0;
3649 
3650     for (p = word; *p != NUL; p += len)
3651     {
3652 	/* When running into a space reset counter. */
3653 	if (*p == ' ')
3654 	{
3655 	    len = 1;
3656 	    cnt = 0;
3657 	    continue;
3658 	}
3659 
3660 	/* Find longest match of syllable items. */
3661 	len = 0;
3662 	for (i = 0; i < slang->sl_syl_items.ga_len; ++i)
3663 	{
3664 	    syl = ((syl_item_T *)slang->sl_syl_items.ga_data) + i;
3665 	    if (syl->sy_len > len
3666 			       && STRNCMP(p, syl->sy_chars, syl->sy_len) == 0)
3667 		len = syl->sy_len;
3668 	}
3669 	if (len != 0)	/* found a match, count syllable  */
3670 	{
3671 	    ++cnt;
3672 	    skip = FALSE;
3673 	}
3674 	else
3675 	{
3676 	    /* No recognized syllable item, at least a syllable char then? */
3677 #ifdef FEAT_MBYTE
3678 	    c = mb_ptr2char(p);
3679 	    len = (*mb_ptr2len)(p);
3680 #else
3681 	    c = *p;
3682 	    len = 1;
3683 #endif
3684 	    if (vim_strchr(slang->sl_syllable, c) == NULL)
3685 		skip = FALSE;	    /* No, search for next syllable */
3686 	    else if (!skip)
3687 	    {
3688 		++cnt;		    /* Yes, count it */
3689 		skip = TRUE;	    /* don't count following syllable chars */
3690 	    }
3691 	}
3692     }
3693     return cnt;
3694 }
3695 
3696 /*
3697  * Set the SOFOFROM and SOFOTO items in language "lp".
3698  * Returns SP_*ERROR flags when there is something wrong.
3699  */
3700     static int
3701 set_sofo(lp, from, to)
3702     slang_T	*lp;
3703     char_u	*from;
3704     char_u	*to;
3705 {
3706     int		i;
3707 
3708 #ifdef FEAT_MBYTE
3709     garray_T	*gap;
3710     char_u	*s;
3711     char_u	*p;
3712     int		c;
3713     int		*inp;
3714 
3715     if (has_mbyte)
3716     {
3717 	/* Use "sl_sal" as an array with 256 pointers to a list of wide
3718 	 * characters.  The index is the low byte of the character.
3719 	 * The list contains from-to pairs with a terminating NUL.
3720 	 * sl_sal_first[] is used for latin1 "from" characters. */
3721 	gap = &lp->sl_sal;
3722 	ga_init2(gap, sizeof(int *), 1);
3723 	if (ga_grow(gap, 256) == FAIL)
3724 	    return SP_OTHERERROR;
3725 	vim_memset(gap->ga_data, 0, sizeof(int *) * 256);
3726 	gap->ga_len = 256;
3727 
3728 	/* First count the number of items for each list.  Temporarily use
3729 	 * sl_sal_first[] for this. */
3730 	for (p = from, s = to; *p != NUL && *s != NUL; )
3731 	{
3732 	    c = mb_cptr2char_adv(&p);
3733 	    mb_cptr_adv(s);
3734 	    if (c >= 256)
3735 		++lp->sl_sal_first[c & 0xff];
3736 	}
3737 	if (*p != NUL || *s != NUL)	    /* lengths differ */
3738 	    return SP_FORMERROR;
3739 
3740 	/* Allocate the lists. */
3741 	for (i = 0; i < 256; ++i)
3742 	    if (lp->sl_sal_first[i] > 0)
3743 	    {
3744 		p = alloc(sizeof(int) * (lp->sl_sal_first[i] * 2 + 1));
3745 		if (p == NULL)
3746 		    return SP_OTHERERROR;
3747 		((int **)gap->ga_data)[i] = (int *)p;
3748 		*(int *)p = 0;
3749 	    }
3750 
3751 	/* Put the characters up to 255 in sl_sal_first[] the rest in a sl_sal
3752 	 * list. */
3753 	vim_memset(lp->sl_sal_first, 0, sizeof(salfirst_T) * 256);
3754 	for (p = from, s = to; *p != NUL && *s != NUL; )
3755 	{
3756 	    c = mb_cptr2char_adv(&p);
3757 	    i = mb_cptr2char_adv(&s);
3758 	    if (c >= 256)
3759 	    {
3760 		/* Append the from-to chars at the end of the list with
3761 		 * the low byte. */
3762 		inp = ((int **)gap->ga_data)[c & 0xff];
3763 		while (*inp != 0)
3764 		    ++inp;
3765 		*inp++ = c;		/* from char */
3766 		*inp++ = i;		/* to char */
3767 		*inp++ = NUL;		/* NUL at the end */
3768 	    }
3769 	    else
3770 		/* mapping byte to char is done in sl_sal_first[] */
3771 		lp->sl_sal_first[c] = i;
3772 	}
3773     }
3774     else
3775 #endif
3776     {
3777 	/* mapping bytes to bytes is done in sl_sal_first[] */
3778 	if (STRLEN(from) != STRLEN(to))
3779 	    return SP_FORMERROR;
3780 
3781 	for (i = 0; to[i] != NUL; ++i)
3782 	    lp->sl_sal_first[from[i]] = to[i];
3783 	lp->sl_sal.ga_len = 1;		/* indicates we have soundfolding */
3784     }
3785 
3786     return 0;
3787 }
3788 
3789 /*
3790  * Fill the first-index table for "lp".
3791  */
3792     static void
3793 set_sal_first(lp)
3794     slang_T	*lp;
3795 {
3796     salfirst_T	*sfirst;
3797     int		i;
3798     salitem_T	*smp;
3799     int		c;
3800     garray_T	*gap = &lp->sl_sal;
3801 
3802     sfirst = lp->sl_sal_first;
3803     for (i = 0; i < 256; ++i)
3804 	sfirst[i] = -1;
3805     smp = (salitem_T *)gap->ga_data;
3806     for (i = 0; i < gap->ga_len; ++i)
3807     {
3808 #ifdef FEAT_MBYTE
3809 	if (has_mbyte)
3810 	    /* Use the lowest byte of the first character.  For latin1 it's
3811 	     * the character, for other encodings it should differ for most
3812 	     * characters. */
3813 	    c = *smp[i].sm_lead_w & 0xff;
3814 	else
3815 #endif
3816 	    c = *smp[i].sm_lead;
3817 	if (sfirst[c] == -1)
3818 	{
3819 	    sfirst[c] = i;
3820 #ifdef FEAT_MBYTE
3821 	    if (has_mbyte)
3822 	    {
3823 		int		n;
3824 
3825 		/* Make sure all entries with this byte are following each
3826 		 * other.  Move the ones that are in the wrong position.  Do
3827 		 * keep the same ordering! */
3828 		while (i + 1 < gap->ga_len
3829 				       && (*smp[i + 1].sm_lead_w & 0xff) == c)
3830 		    /* Skip over entry with same index byte. */
3831 		    ++i;
3832 
3833 		for (n = 1; i + n < gap->ga_len; ++n)
3834 		    if ((*smp[i + n].sm_lead_w & 0xff) == c)
3835 		    {
3836 			salitem_T  tsal;
3837 
3838 			/* Move entry with same index byte after the entries
3839 			 * we already found. */
3840 			++i;
3841 			--n;
3842 			tsal = smp[i + n];
3843 			mch_memmove(smp + i + 1, smp + i,
3844 						       sizeof(salitem_T) * n);
3845 			smp[i] = tsal;
3846 		    }
3847 	    }
3848 #endif
3849 	}
3850     }
3851 }
3852 
3853 #ifdef FEAT_MBYTE
3854 /*
3855  * Turn a multi-byte string into a wide character string.
3856  * Return it in allocated memory (NULL for out-of-memory)
3857  */
3858     static int *
3859 mb_str2wide(s)
3860     char_u	*s;
3861 {
3862     int		*res;
3863     char_u	*p;
3864     int		i = 0;
3865 
3866     res = (int *)alloc(sizeof(int) * (mb_charlen(s) + 1));
3867     if (res != NULL)
3868     {
3869 	for (p = s; *p != NUL; )
3870 	    res[i++] = mb_ptr2char_adv(&p);
3871 	res[i] = NUL;
3872     }
3873     return res;
3874 }
3875 #endif
3876 
3877 /*
3878  * Read a tree from the .spl or .sug file.
3879  * Allocates the memory and stores pointers in "bytsp" and "idxsp".
3880  * This is skipped when the tree has zero length.
3881  * Returns zero when OK, SP_ value for an error.
3882  */
3883     static int
3884 spell_read_tree(fd, bytsp, idxsp, prefixtree, prefixcnt)
3885     FILE	*fd;
3886     char_u	**bytsp;
3887     idx_T	**idxsp;
3888     int		prefixtree;	/* TRUE for the prefix tree */
3889     int		prefixcnt;	/* when "prefixtree" is TRUE: prefix count */
3890 {
3891     int		len;
3892     int		idx;
3893     char_u	*bp;
3894     idx_T	*ip;
3895 
3896     /* The tree size was computed when writing the file, so that we can
3897      * allocate it as one long block. <nodecount> */
3898     len = get4c(fd);
3899     if (len < 0)
3900 	return SP_TRUNCERROR;
3901     if (len > 0)
3902     {
3903 	/* Allocate the byte array. */
3904 	bp = lalloc((long_u)len, TRUE);
3905 	if (bp == NULL)
3906 	    return SP_OTHERERROR;
3907 	*bytsp = bp;
3908 
3909 	/* Allocate the index array. */
3910 	ip = (idx_T *)lalloc_clear((long_u)(len * sizeof(int)), TRUE);
3911 	if (ip == NULL)
3912 	    return SP_OTHERERROR;
3913 	*idxsp = ip;
3914 
3915 	/* Recursively read the tree and store it in the array. */
3916 	idx = read_tree_node(fd, bp, ip, len, 0, prefixtree, prefixcnt);
3917 	if (idx < 0)
3918 	    return idx;
3919     }
3920     return 0;
3921 }
3922 
3923 /*
3924  * Read one row of siblings from the spell file and store it in the byte array
3925  * "byts" and index array "idxs".  Recursively read the children.
3926  *
3927  * NOTE: The code here must match put_node()!
3928  *
3929  * Returns the index (>= 0) following the siblings.
3930  * Returns SP_TRUNCERROR if the file is shorter than expected.
3931  * Returns SP_FORMERROR if there is a format error.
3932  */
3933     static idx_T
3934 read_tree_node(fd, byts, idxs, maxidx, startidx, prefixtree, maxprefcondnr)
3935     FILE	*fd;
3936     char_u	*byts;
3937     idx_T	*idxs;
3938     int		maxidx;		    /* size of arrays */
3939     idx_T	startidx;	    /* current index in "byts" and "idxs" */
3940     int		prefixtree;	    /* TRUE for reading PREFIXTREE */
3941     int		maxprefcondnr;	    /* maximum for <prefcondnr> */
3942 {
3943     int		len;
3944     int		i;
3945     int		n;
3946     idx_T	idx = startidx;
3947     int		c;
3948     int		c2;
3949 #define SHARED_MASK	0x8000000
3950 
3951     len = getc(fd);					/* <siblingcount> */
3952     if (len <= 0)
3953 	return SP_TRUNCERROR;
3954 
3955     if (startidx + len >= maxidx)
3956 	return SP_FORMERROR;
3957     byts[idx++] = len;
3958 
3959     /* Read the byte values, flag/region bytes and shared indexes. */
3960     for (i = 1; i <= len; ++i)
3961     {
3962 	c = getc(fd);					/* <byte> */
3963 	if (c < 0)
3964 	    return SP_TRUNCERROR;
3965 	if (c <= BY_SPECIAL)
3966 	{
3967 	    if (c == BY_NOFLAGS && !prefixtree)
3968 	    {
3969 		/* No flags, all regions. */
3970 		idxs[idx] = 0;
3971 		c = 0;
3972 	    }
3973 	    else if (c != BY_INDEX)
3974 	    {
3975 		if (prefixtree)
3976 		{
3977 		    /* Read the optional pflags byte, the prefix ID and the
3978 		     * condition nr.  In idxs[] store the prefix ID in the low
3979 		     * byte, the condition index shifted up 8 bits, the flags
3980 		     * shifted up 24 bits. */
3981 		    if (c == BY_FLAGS)
3982 			c = getc(fd) << 24;		/* <pflags> */
3983 		    else
3984 			c = 0;
3985 
3986 		    c |= getc(fd);			/* <affixID> */
3987 
3988 		    n = get2c(fd);			/* <prefcondnr> */
3989 		    if (n >= maxprefcondnr)
3990 			return SP_FORMERROR;
3991 		    c |= (n << 8);
3992 		}
3993 		else /* c must be BY_FLAGS or BY_FLAGS2 */
3994 		{
3995 		    /* Read flags and optional region and prefix ID.  In
3996 		     * idxs[] the flags go in the low two bytes, region above
3997 		     * that and prefix ID above the region. */
3998 		    c2 = c;
3999 		    c = getc(fd);			/* <flags> */
4000 		    if (c2 == BY_FLAGS2)
4001 			c = (getc(fd) << 8) + c;	/* <flags2> */
4002 		    if (c & WF_REGION)
4003 			c = (getc(fd) << 16) + c;	/* <region> */
4004 		    if (c & WF_AFX)
4005 			c = (getc(fd) << 24) + c;	/* <affixID> */
4006 		}
4007 
4008 		idxs[idx] = c;
4009 		c = 0;
4010 	    }
4011 	    else /* c == BY_INDEX */
4012 	    {
4013 							/* <nodeidx> */
4014 		n = get3c(fd);
4015 		if (n < 0 || n >= maxidx)
4016 		    return SP_FORMERROR;
4017 		idxs[idx] = n + SHARED_MASK;
4018 		c = getc(fd);				/* <xbyte> */
4019 	    }
4020 	}
4021 	byts[idx++] = c;
4022     }
4023 
4024     /* Recursively read the children for non-shared siblings.
4025      * Skip the end-of-word ones (zero byte value) and the shared ones (and
4026      * remove SHARED_MASK) */
4027     for (i = 1; i <= len; ++i)
4028 	if (byts[startidx + i] != 0)
4029 	{
4030 	    if (idxs[startidx + i] & SHARED_MASK)
4031 		idxs[startidx + i] &= ~SHARED_MASK;
4032 	    else
4033 	    {
4034 		idxs[startidx + i] = idx;
4035 		idx = read_tree_node(fd, byts, idxs, maxidx, idx,
4036 						     prefixtree, maxprefcondnr);
4037 		if (idx < 0)
4038 		    break;
4039 	    }
4040 	}
4041 
4042     return idx;
4043 }
4044 
4045 /*
4046  * Parse 'spelllang' and set buf->b_langp accordingly.
4047  * Returns NULL if it's OK, an error message otherwise.
4048  */
4049     char_u *
4050 did_set_spelllang(buf)
4051     buf_T	*buf;
4052 {
4053     garray_T	ga;
4054     char_u	*splp;
4055     char_u	*region;
4056     char_u	region_cp[3];
4057     int		filename;
4058     int		region_mask;
4059     slang_T	*slang;
4060     int		c;
4061     char_u	lang[MAXWLEN + 1];
4062     char_u	spf_name[MAXPATHL];
4063     int		len;
4064     char_u	*p;
4065     int		round;
4066     char_u	*spf;
4067     char_u	*use_region = NULL;
4068     int		dont_use_region = FALSE;
4069     int		nobreak = FALSE;
4070     int		i, j;
4071     langp_T	*lp, *lp2;
4072 
4073     ga_init2(&ga, sizeof(langp_T), 2);
4074     clear_midword(buf);
4075 
4076     /* loop over comma separated language names. */
4077     for (splp = buf->b_p_spl; *splp != NUL; )
4078     {
4079 	/* Get one language name. */
4080 	copy_option_part(&splp, lang, MAXWLEN, ",");
4081 
4082 	region = NULL;
4083 	len = STRLEN(lang);
4084 
4085 	/* If the name ends in ".spl" use it as the name of the spell file.
4086 	 * If there is a region name let "region" point to it and remove it
4087 	 * from the name. */
4088 	if (len > 4 && fnamecmp(lang + len - 4, ".spl") == 0)
4089 	{
4090 	    filename = TRUE;
4091 
4092 	    /* Locate a region and remove it from the file name. */
4093 	    p = vim_strchr(gettail(lang), '_');
4094 	    if (p != NULL && ASCII_ISALPHA(p[1]) && ASCII_ISALPHA(p[2])
4095 						      && !ASCII_ISALPHA(p[3]))
4096 	    {
4097 		vim_strncpy(region_cp, p + 1, 2);
4098 		mch_memmove(p, p + 3, len - (p - lang) - 2);
4099 		len -= 3;
4100 		region = region_cp;
4101 	    }
4102 	    else
4103 		dont_use_region = TRUE;
4104 
4105 	    /* Check if we loaded this language before. */
4106 	    for (slang = first_lang; slang != NULL; slang = slang->sl_next)
4107 		if (fullpathcmp(lang, slang->sl_fname, FALSE) == FPC_SAME)
4108 		    break;
4109 	}
4110 	else
4111 	{
4112 	    filename = FALSE;
4113 	    if (len > 3 && lang[len - 3] == '_')
4114 	    {
4115 		region = lang + len - 2;
4116 		len -= 3;
4117 		lang[len] = NUL;
4118 	    }
4119 	    else
4120 		dont_use_region = TRUE;
4121 
4122 	    /* Check if we loaded this language before. */
4123 	    for (slang = first_lang; slang != NULL; slang = slang->sl_next)
4124 		if (STRICMP(lang, slang->sl_name) == 0)
4125 		    break;
4126 	}
4127 
4128 	if (region != NULL)
4129 	{
4130 	    /* If the region differs from what was used before then don't
4131 	     * use it for 'spellfile'. */
4132 	    if (use_region != NULL && STRCMP(region, use_region) != 0)
4133 		dont_use_region = TRUE;
4134 	    use_region = region;
4135 	}
4136 
4137 	/* If not found try loading the language now. */
4138 	if (slang == NULL)
4139 	{
4140 	    if (filename)
4141 		(void)spell_load_file(lang, lang, NULL, FALSE);
4142 	    else
4143 		spell_load_lang(lang);
4144 	}
4145 
4146 	/*
4147 	 * Loop over the languages, there can be several files for "lang".
4148 	 */
4149 	for (slang = first_lang; slang != NULL; slang = slang->sl_next)
4150 	    if (filename ? fullpathcmp(lang, slang->sl_fname, FALSE) == FPC_SAME
4151 			 : STRICMP(lang, slang->sl_name) == 0)
4152 	    {
4153 		region_mask = REGION_ALL;
4154 		if (!filename && region != NULL)
4155 		{
4156 		    /* find region in sl_regions */
4157 		    c = find_region(slang->sl_regions, region);
4158 		    if (c == REGION_ALL)
4159 		    {
4160 			if (slang->sl_add)
4161 			{
4162 			    if (*slang->sl_regions != NUL)
4163 				/* This addition file is for other regions. */
4164 				region_mask = 0;
4165 			}
4166 			else
4167 			    /* This is probably an error.  Give a warning and
4168 			     * accept the words anyway. */
4169 			    smsg((char_u *)
4170 				    _("Warning: region %s not supported"),
4171 								      region);
4172 		    }
4173 		    else
4174 			region_mask = 1 << c;
4175 		}
4176 
4177 		if (region_mask != 0)
4178 		{
4179 		    if (ga_grow(&ga, 1) == FAIL)
4180 		    {
4181 			ga_clear(&ga);
4182 			return e_outofmem;
4183 		    }
4184 		    LANGP_ENTRY(ga, ga.ga_len)->lp_slang = slang;
4185 		    LANGP_ENTRY(ga, ga.ga_len)->lp_region = region_mask;
4186 		    ++ga.ga_len;
4187 		    use_midword(slang, buf);
4188 		    if (slang->sl_nobreak)
4189 			nobreak = TRUE;
4190 		}
4191 	    }
4192     }
4193 
4194     /* round 0: load int_wordlist, if possible.
4195      * round 1: load first name in 'spellfile'.
4196      * round 2: load second name in 'spellfile.
4197      * etc. */
4198     spf = curbuf->b_p_spf;
4199     for (round = 0; round == 0 || *spf != NUL; ++round)
4200     {
4201 	if (round == 0)
4202 	{
4203 	    /* Internal wordlist, if there is one. */
4204 	    if (int_wordlist == NULL)
4205 		continue;
4206 	    int_wordlist_spl(spf_name);
4207 	}
4208 	else
4209 	{
4210 	    /* One entry in 'spellfile'. */
4211 	    copy_option_part(&spf, spf_name, MAXPATHL - 5, ",");
4212 	    STRCAT(spf_name, ".spl");
4213 
4214 	    /* If it was already found above then skip it. */
4215 	    for (c = 0; c < ga.ga_len; ++c)
4216 	    {
4217 		p = LANGP_ENTRY(ga, c)->lp_slang->sl_fname;
4218 		if (p != NULL && fullpathcmp(spf_name, p, FALSE) == FPC_SAME)
4219 		    break;
4220 	    }
4221 	    if (c < ga.ga_len)
4222 		continue;
4223 	}
4224 
4225 	/* Check if it was loaded already. */
4226 	for (slang = first_lang; slang != NULL; slang = slang->sl_next)
4227 	    if (fullpathcmp(spf_name, slang->sl_fname, FALSE) == FPC_SAME)
4228 		break;
4229 	if (slang == NULL)
4230 	{
4231 	    /* Not loaded, try loading it now.  The language name includes the
4232 	     * region name, the region is ignored otherwise.  for int_wordlist
4233 	     * use an arbitrary name. */
4234 	    if (round == 0)
4235 		STRCPY(lang, "internal wordlist");
4236 	    else
4237 	    {
4238 		vim_strncpy(lang, gettail(spf_name), MAXWLEN);
4239 		p = vim_strchr(lang, '.');
4240 		if (p != NULL)
4241 		    *p = NUL;	/* truncate at ".encoding.add" */
4242 	    }
4243 	    slang = spell_load_file(spf_name, lang, NULL, TRUE);
4244 
4245 	    /* If one of the languages has NOBREAK we assume the addition
4246 	     * files also have this. */
4247 	    if (slang != NULL && nobreak)
4248 		slang->sl_nobreak = TRUE;
4249 	}
4250 	if (slang != NULL && ga_grow(&ga, 1) == OK)
4251 	{
4252 	    region_mask = REGION_ALL;
4253 	    if (use_region != NULL && !dont_use_region)
4254 	    {
4255 		/* find region in sl_regions */
4256 		c = find_region(slang->sl_regions, use_region);
4257 		if (c != REGION_ALL)
4258 		    region_mask = 1 << c;
4259 		else if (*slang->sl_regions != NUL)
4260 		    /* This spell file is for other regions. */
4261 		    region_mask = 0;
4262 	    }
4263 
4264 	    if (region_mask != 0)
4265 	    {
4266 		LANGP_ENTRY(ga, ga.ga_len)->lp_slang = slang;
4267 		LANGP_ENTRY(ga, ga.ga_len)->lp_sallang = NULL;
4268 		LANGP_ENTRY(ga, ga.ga_len)->lp_replang = NULL;
4269 		LANGP_ENTRY(ga, ga.ga_len)->lp_region = region_mask;
4270 		++ga.ga_len;
4271 		use_midword(slang, buf);
4272 	    }
4273 	}
4274     }
4275 
4276     /* Everything is fine, store the new b_langp value. */
4277     ga_clear(&buf->b_langp);
4278     buf->b_langp = ga;
4279 
4280     /* For each language figure out what language to use for sound folding and
4281      * REP items.  If the language doesn't support it itself use another one
4282      * with the same name.  E.g. for "en-math" use "en". */
4283     for (i = 0; i < ga.ga_len; ++i)
4284     {
4285 	lp = LANGP_ENTRY(ga, i);
4286 
4287 	/* sound folding */
4288 	if (lp->lp_slang->sl_sal.ga_len > 0)
4289 	    /* language does sound folding itself */
4290 	    lp->lp_sallang = lp->lp_slang;
4291 	else
4292 	    /* find first similar language that does sound folding */
4293 	    for (j = 0; j < ga.ga_len; ++j)
4294 	    {
4295 		lp2 = LANGP_ENTRY(ga, j);
4296 		if (lp2->lp_slang->sl_sal.ga_len > 0
4297 			&& STRNCMP(lp->lp_slang->sl_name,
4298 					      lp2->lp_slang->sl_name, 2) == 0)
4299 		{
4300 		    lp->lp_sallang = lp2->lp_slang;
4301 		    break;
4302 		}
4303 	    }
4304 
4305 	/* REP items */
4306 	if (lp->lp_slang->sl_rep.ga_len > 0)
4307 	    /* language has REP items itself */
4308 	    lp->lp_replang = lp->lp_slang;
4309 	else
4310 	    /* find first similar language that has REP items */
4311 	    for (j = 0; j < ga.ga_len; ++j)
4312 	    {
4313 		lp2 = LANGP_ENTRY(ga, j);
4314 		if (lp2->lp_slang->sl_rep.ga_len > 0
4315 			&& STRNCMP(lp->lp_slang->sl_name,
4316 					      lp2->lp_slang->sl_name, 2) == 0)
4317 		{
4318 		    lp->lp_replang = lp2->lp_slang;
4319 		    break;
4320 		}
4321 	    }
4322     }
4323 
4324     return NULL;
4325 }
4326 
4327 /*
4328  * Clear the midword characters for buffer "buf".
4329  */
4330     static void
4331 clear_midword(buf)
4332     buf_T	*buf;
4333 {
4334     vim_memset(buf->b_spell_ismw, 0, 256);
4335 #ifdef FEAT_MBYTE
4336     vim_free(buf->b_spell_ismw_mb);
4337     buf->b_spell_ismw_mb = NULL;
4338 #endif
4339 }
4340 
4341 /*
4342  * Use the "sl_midword" field of language "lp" for buffer "buf".
4343  * They add up to any currently used midword characters.
4344  */
4345     static void
4346 use_midword(lp, buf)
4347     slang_T	*lp;
4348     buf_T	*buf;
4349 {
4350     char_u	*p;
4351 
4352     if (lp->sl_midword == NULL)	    /* there aren't any */
4353 	return;
4354 
4355     for (p = lp->sl_midword; *p != NUL; )
4356 #ifdef FEAT_MBYTE
4357 	if (has_mbyte)
4358 	{
4359 	    int	    c, l, n;
4360 	    char_u  *bp;
4361 
4362 	    c = mb_ptr2char(p);
4363 	    l = (*mb_ptr2len)(p);
4364 	    if (c < 256 && l <= 2)
4365 		buf->b_spell_ismw[c] = TRUE;
4366 	    else if (buf->b_spell_ismw_mb == NULL)
4367 		/* First multi-byte char in "b_spell_ismw_mb". */
4368 		buf->b_spell_ismw_mb = vim_strnsave(p, l);
4369 	    else
4370 	    {
4371 		/* Append multi-byte chars to "b_spell_ismw_mb". */
4372 		n = STRLEN(buf->b_spell_ismw_mb);
4373 		bp = vim_strnsave(buf->b_spell_ismw_mb, n + l);
4374 		if (bp != NULL)
4375 		{
4376 		    vim_free(buf->b_spell_ismw_mb);
4377 		    buf->b_spell_ismw_mb = bp;
4378 		    vim_strncpy(bp + n, p, l);
4379 		}
4380 	    }
4381 	    p += l;
4382 	}
4383 	else
4384 #endif
4385 	    buf->b_spell_ismw[*p++] = TRUE;
4386 }
4387 
4388 /*
4389  * Find the region "region[2]" in "rp" (points to "sl_regions").
4390  * Each region is simply stored as the two characters of it's name.
4391  * Returns the index if found (first is 0), REGION_ALL if not found.
4392  */
4393     static int
4394 find_region(rp, region)
4395     char_u	*rp;
4396     char_u	*region;
4397 {
4398     int		i;
4399 
4400     for (i = 0; ; i += 2)
4401     {
4402 	if (rp[i] == NUL)
4403 	    return REGION_ALL;
4404 	if (rp[i] == region[0] && rp[i + 1] == region[1])
4405 	    break;
4406     }
4407     return i / 2;
4408 }
4409 
4410 /*
4411  * Return case type of word:
4412  * w word	0
4413  * Word		WF_ONECAP
4414  * W WORD	WF_ALLCAP
4415  * WoRd	wOrd	WF_KEEPCAP
4416  */
4417     static int
4418 captype(word, end)
4419     char_u	*word;
4420     char_u	*end;	    /* When NULL use up to NUL byte. */
4421 {
4422     char_u	*p;
4423     int		c;
4424     int		firstcap;
4425     int		allcap;
4426     int		past_second = FALSE;	/* past second word char */
4427 
4428     /* find first letter */
4429     for (p = word; !spell_iswordp_nmw(p); mb_ptr_adv(p))
4430 	if (end == NULL ? *p == NUL : p >= end)
4431 	    return 0;	    /* only non-word characters, illegal word */
4432 #ifdef FEAT_MBYTE
4433     if (has_mbyte)
4434 	c = mb_ptr2char_adv(&p);
4435     else
4436 #endif
4437 	c = *p++;
4438     firstcap = allcap = SPELL_ISUPPER(c);
4439 
4440     /*
4441      * Need to check all letters to find a word with mixed upper/lower.
4442      * But a word with an upper char only at start is a ONECAP.
4443      */
4444     for ( ; end == NULL ? *p != NUL : p < end; mb_ptr_adv(p))
4445 	if (spell_iswordp_nmw(p))
4446 	{
4447 	    c = PTR2CHAR(p);
4448 	    if (!SPELL_ISUPPER(c))
4449 	    {
4450 		/* UUl -> KEEPCAP */
4451 		if (past_second && allcap)
4452 		    return WF_KEEPCAP;
4453 		allcap = FALSE;
4454 	    }
4455 	    else if (!allcap)
4456 		/* UlU -> KEEPCAP */
4457 		return WF_KEEPCAP;
4458 	    past_second = TRUE;
4459 	}
4460 
4461     if (allcap)
4462 	return WF_ALLCAP;
4463     if (firstcap)
4464 	return WF_ONECAP;
4465     return 0;
4466 }
4467 
4468 /*
4469  * Like captype() but for a KEEPCAP word add ONECAP if the word starts with a
4470  * capital.  So that make_case_word() can turn WOrd into Word.
4471  * Add ALLCAP for "WOrD".
4472  */
4473     static int
4474 badword_captype(word, end)
4475     char_u	*word;
4476     char_u	*end;
4477 {
4478     int		flags = captype(word, end);
4479     int		c;
4480     int		l, u;
4481     int		first;
4482     char_u	*p;
4483 
4484     if (flags & WF_KEEPCAP)
4485     {
4486 	/* Count the number of UPPER and lower case letters. */
4487 	l = u = 0;
4488 	first = FALSE;
4489 	for (p = word; p < end; mb_ptr_adv(p))
4490 	{
4491 	    c = PTR2CHAR(p);
4492 	    if (SPELL_ISUPPER(c))
4493 	    {
4494 		++u;
4495 		if (p == word)
4496 		    first = TRUE;
4497 	    }
4498 	    else
4499 		++l;
4500 	}
4501 
4502 	/* If there are more UPPER than lower case letters suggest an
4503 	 * ALLCAP word.  Otherwise, if the first letter is UPPER then
4504 	 * suggest ONECAP.  Exception: "ALl" most likely should be "All",
4505 	 * require three upper case letters. */
4506 	if (u > l && u > 2)
4507 	    flags |= WF_ALLCAP;
4508 	else if (first)
4509 	    flags |= WF_ONECAP;
4510 
4511 	if (u >= 2 && l >= 2)	/* maCARONI maCAroni */
4512 	    flags |= WF_MIXCAP;
4513     }
4514     return flags;
4515 }
4516 
4517 # if defined(FEAT_MBYTE) || defined(EXITFREE) || defined(PROTO)
4518 /*
4519  * Free all languages.
4520  */
4521     void
4522 spell_free_all()
4523 {
4524     slang_T	*slang;
4525     buf_T	*buf;
4526     char_u	fname[MAXPATHL];
4527 
4528     /* Go through all buffers and handle 'spelllang'. */
4529     for (buf = firstbuf; buf != NULL; buf = buf->b_next)
4530 	ga_clear(&buf->b_langp);
4531 
4532     while (first_lang != NULL)
4533     {
4534 	slang = first_lang;
4535 	first_lang = slang->sl_next;
4536 	slang_free(slang);
4537     }
4538 
4539     if (int_wordlist != NULL)
4540     {
4541 	/* Delete the internal wordlist and its .spl file */
4542 	mch_remove(int_wordlist);
4543 	int_wordlist_spl(fname);
4544 	mch_remove(fname);
4545 	vim_free(int_wordlist);
4546 	int_wordlist = NULL;
4547     }
4548 
4549     init_spell_chartab();
4550 
4551     vim_free(repl_to);
4552     repl_to = NULL;
4553     vim_free(repl_from);
4554     repl_from = NULL;
4555 }
4556 # endif
4557 
4558 # if defined(FEAT_MBYTE) || defined(PROTO)
4559 /*
4560  * Clear all spelling tables and reload them.
4561  * Used after 'encoding' is set and when ":mkspell" was used.
4562  */
4563     void
4564 spell_reload()
4565 {
4566     buf_T	*buf;
4567     win_T	*wp;
4568 
4569     /* Initialize the table for spell_iswordp(). */
4570     init_spell_chartab();
4571 
4572     /* Unload all allocated memory. */
4573     spell_free_all();
4574 
4575     /* Go through all buffers and handle 'spelllang'. */
4576     for (buf = firstbuf; buf != NULL; buf = buf->b_next)
4577     {
4578 	/* Only load the wordlists when 'spelllang' is set and there is a
4579 	 * window for this buffer in which 'spell' is set. */
4580 	if (*buf->b_p_spl != NUL)
4581 	{
4582 	    FOR_ALL_WINDOWS(wp)
4583 		if (wp->w_buffer == buf && wp->w_p_spell)
4584 		{
4585 		    (void)did_set_spelllang(buf);
4586 # ifdef FEAT_WINDOWS
4587 		    break;
4588 # endif
4589 		}
4590 	}
4591     }
4592 }
4593 # endif
4594 
4595 /*
4596  * Reload the spell file "fname" if it's loaded.
4597  */
4598     static void
4599 spell_reload_one(fname, added_word)
4600     char_u	*fname;
4601     int		added_word;	/* invoked through "zg" */
4602 {
4603     slang_T	*slang;
4604     int		didit = FALSE;
4605 
4606     for (slang = first_lang; slang != NULL; slang = slang->sl_next)
4607     {
4608 	if (fullpathcmp(fname, slang->sl_fname, FALSE) == FPC_SAME)
4609 	{
4610 	    slang_clear(slang);
4611 	    if (spell_load_file(fname, NULL, slang, FALSE) == NULL)
4612 		/* reloading failed, clear the language */
4613 		slang_clear(slang);
4614 	    redraw_all_later(SOME_VALID);
4615 	    didit = TRUE;
4616 	}
4617     }
4618 
4619     /* When "zg" was used and the file wasn't loaded yet, should redo
4620      * 'spelllang' to load it now. */
4621     if (added_word && !didit)
4622 	did_set_spelllang(curbuf);
4623 }
4624 
4625 
4626 /*
4627  * Functions for ":mkspell".
4628  */
4629 
4630 #define MAXLINELEN  500		/* Maximum length in bytes of a line in a .aff
4631 				   and .dic file. */
4632 /*
4633  * Main structure to store the contents of a ".aff" file.
4634  */
4635 typedef struct afffile_S
4636 {
4637     char_u	*af_enc;	/* "SET", normalized, alloc'ed string or NULL */
4638     int		af_flagtype;	/* AFT_CHAR, AFT_LONG, AFT_NUM or AFT_CAPLONG */
4639     unsigned	af_rare;	/* RARE ID for rare word */
4640     unsigned	af_keepcase;	/* KEEPCASE ID for keep-case word */
4641     unsigned	af_bad;		/* BAD ID for banned word */
4642     unsigned	af_needaffix;	/* NEEDAFFIX ID */
4643     unsigned	af_needcomp;	/* NEEDCOMPOUND ID */
4644     unsigned	af_comproot;	/* COMPOUNDROOT ID */
4645     unsigned	af_compforbid;	/* COMPOUNDFORBIDFLAG ID */
4646     unsigned	af_comppermit;	/* COMPOUNDPERMITFLAG ID */
4647     unsigned	af_nosuggest;	/* NOSUGGEST ID */
4648     int		af_pfxpostpone;	/* postpone prefixes without chop string and
4649 				   without flags */
4650     hashtab_T	af_pref;	/* hashtable for prefixes, affheader_T */
4651     hashtab_T	af_suff;	/* hashtable for suffixes, affheader_T */
4652     hashtab_T	af_comp;	/* hashtable for compound flags, compitem_T */
4653 } afffile_T;
4654 
4655 #define AFT_CHAR	0	/* flags are one character */
4656 #define AFT_LONG	1	/* flags are two characters */
4657 #define AFT_CAPLONG	2	/* flags are one or two characters */
4658 #define AFT_NUM		3	/* flags are numbers, comma separated */
4659 
4660 typedef struct affentry_S affentry_T;
4661 /* Affix entry from ".aff" file.  Used for prefixes and suffixes. */
4662 struct affentry_S
4663 {
4664     affentry_T	*ae_next;	/* next affix with same name/number */
4665     char_u	*ae_chop;	/* text to chop off basic word (can be NULL) */
4666     char_u	*ae_add;	/* text to add to basic word (can be NULL) */
4667     char_u	*ae_flags;	/* flags on the affix (can be NULL) */
4668     char_u	*ae_cond;	/* condition (NULL for ".") */
4669     regprog_T	*ae_prog;	/* regexp program for ae_cond or NULL */
4670     char_u	ae_nocomp;	/* word with affix not compoundable */
4671 };
4672 
4673 #ifdef FEAT_MBYTE
4674 # define AH_KEY_LEN 17		/* 2 x 8 bytes + NUL */
4675 #else
4676 # define AH_KEY_LEN 7		/* 6 digits + NUL */
4677 #endif
4678 
4679 /* Affix header from ".aff" file.  Used for af_pref and af_suff. */
4680 typedef struct affheader_S
4681 {
4682     char_u	ah_key[AH_KEY_LEN]; /* key for hashtab == name of affix */
4683     unsigned	ah_flag;	/* affix name as number, uses "af_flagtype" */
4684     int		ah_newID;	/* prefix ID after renumbering; 0 if not used */
4685     int		ah_combine;	/* suffix may combine with prefix */
4686     int		ah_follows;	/* another affix block should be following */
4687     affentry_T	*ah_first;	/* first affix entry */
4688 } affheader_T;
4689 
4690 #define HI2AH(hi)   ((affheader_T *)(hi)->hi_key)
4691 
4692 /* Flag used in compound items. */
4693 typedef struct compitem_S
4694 {
4695     char_u	ci_key[AH_KEY_LEN]; /* key for hashtab == name of compound */
4696     unsigned	ci_flag;	/* affix name as number, uses "af_flagtype" */
4697     int		ci_newID;	/* affix ID after renumbering. */
4698 } compitem_T;
4699 
4700 #define HI2CI(hi)   ((compitem_T *)(hi)->hi_key)
4701 
4702 /*
4703  * Structure that is used to store the items in the word tree.  This avoids
4704  * the need to keep track of each allocated thing, everything is freed all at
4705  * once after ":mkspell" is done.
4706  */
4707 #define  SBLOCKSIZE 16000	/* size of sb_data */
4708 typedef struct sblock_S sblock_T;
4709 struct sblock_S
4710 {
4711     sblock_T	*sb_next;	/* next block in list */
4712     int		sb_used;	/* nr of bytes already in use */
4713     char_u	sb_data[1];	/* data, actually longer */
4714 };
4715 
4716 /*
4717  * A node in the tree.
4718  */
4719 typedef struct wordnode_S wordnode_T;
4720 struct wordnode_S
4721 {
4722     union   /* shared to save space */
4723     {
4724 	char_u	hashkey[6];	/* the hash key, only used while compressing */
4725 	int	index;		/* index in written nodes (valid after first
4726 				   round) */
4727     } wn_u1;
4728     union   /* shared to save space */
4729     {
4730 	wordnode_T *next;	/* next node with same hash key */
4731 	wordnode_T *wnode;	/* parent node that will write this node */
4732     } wn_u2;
4733     wordnode_T	*wn_child;	/* child (next byte in word) */
4734     wordnode_T  *wn_sibling;	/* next sibling (alternate byte in word,
4735 				   always sorted) */
4736     int		wn_refs;	/* Nr. of references to this node.  Only
4737 				   relevant for first node in a list of
4738 				   siblings, in following siblings it is
4739 				   always one. */
4740     char_u	wn_byte;	/* Byte for this node. NUL for word end */
4741 
4742     /* Info for when "wn_byte" is NUL.
4743      * In PREFIXTREE "wn_region" is used for the prefcondnr.
4744      * In the soundfolded word tree "wn_flags" has the MSW of the wordnr and
4745      * "wn_region" the LSW of the wordnr. */
4746     char_u	wn_affixID;	/* supported/required prefix ID or 0 */
4747     short_u	wn_flags;	/* WF_ flags */
4748     short	wn_region;	/* region mask */
4749 
4750 #ifdef SPELL_PRINTTREE
4751     int		wn_nr;		/* sequence nr for printing */
4752 #endif
4753 };
4754 
4755 #define WN_MASK	 0xffff		/* mask relevant bits of "wn_flags" */
4756 
4757 #define HI2WN(hi)    (wordnode_T *)((hi)->hi_key)
4758 
4759 /*
4760  * Info used while reading the spell files.
4761  */
4762 typedef struct spellinfo_S
4763 {
4764     wordnode_T	*si_foldroot;	/* tree with case-folded words */
4765     long	si_foldwcount;	/* nr of words in si_foldroot */
4766 
4767     wordnode_T	*si_keeproot;	/* tree with keep-case words */
4768     long	si_keepwcount;	/* nr of words in si_keeproot */
4769 
4770     wordnode_T	*si_prefroot;	/* tree with postponed prefixes */
4771 
4772     long	si_sugtree;	/* creating the soundfolding trie */
4773 
4774     sblock_T	*si_blocks;	/* memory blocks used */
4775     long	si_blocks_cnt;	/* memory blocks allocated */
4776     long	si_compress_cnt;    /* words to add before lowering
4777 				       compression limit */
4778     wordnode_T	*si_first_free; /* List of nodes that have been freed during
4779 				   compression, linked by "wn_child" field. */
4780     long	si_free_count;	/* number of nodes in si_first_free */
4781 #ifdef SPELL_PRINTTREE
4782     int		si_wordnode_nr;	/* sequence nr for nodes */
4783 #endif
4784     buf_T	*si_spellbuf;	/* buffer used to store soundfold word table */
4785 
4786     int		si_ascii;	/* handling only ASCII words */
4787     int		si_add;		/* addition file */
4788     int		si_clear_chartab;   /* when TRUE clear char tables */
4789     int		si_region;	/* region mask */
4790     vimconv_T	si_conv;	/* for conversion to 'encoding' */
4791     int		si_memtot;	/* runtime memory used */
4792     int		si_verbose;	/* verbose messages */
4793     int		si_msg_count;	/* number of words added since last message */
4794     char_u	*si_info;	/* info text chars or NULL  */
4795     int		si_region_count; /* number of regions supported (1 when there
4796 				    are no regions) */
4797     char_u	si_region_name[16]; /* region names; used only if
4798 				     * si_region_count > 1) */
4799 
4800     garray_T	si_rep;		/* list of fromto_T entries from REP lines */
4801     garray_T	si_repsal;	/* list of fromto_T entries from REPSAL lines */
4802     garray_T	si_sal;		/* list of fromto_T entries from SAL lines */
4803     char_u	*si_sofofr;	/* SOFOFROM text */
4804     char_u	*si_sofoto;	/* SOFOTO text */
4805     int		si_nosugfile;	/* NOSUGFILE item found */
4806     int		si_nosplitsugs;	/* NOSPLITSUGS item found */
4807     int		si_followup;	/* soundsalike: ? */
4808     int		si_collapse;	/* soundsalike: ? */
4809     hashtab_T	si_commonwords;	/* hashtable for common words */
4810     time_t	si_sugtime;	/* timestamp for .sug file */
4811     int		si_rem_accents;	/* soundsalike: remove accents */
4812     garray_T	si_map;		/* MAP info concatenated */
4813     char_u	*si_midword;	/* MIDWORD chars or NULL  */
4814     int		si_compmax;	/* max nr of words for compounding */
4815     int		si_compminlen;	/* minimal length for compounding */
4816     int		si_compsylmax;	/* max nr of syllables for compounding */
4817     int		si_compoptions;	/* COMP_ flags */
4818     garray_T	si_comppat;	/* CHECKCOMPOUNDPATTERN items, each stored as
4819 				   a string */
4820     char_u	*si_compflags;	/* flags used for compounding */
4821     char_u	si_nobreak;	/* NOBREAK */
4822     char_u	*si_syllable;	/* syllable string */
4823     garray_T	si_prefcond;	/* table with conditions for postponed
4824 				 * prefixes, each stored as a string */
4825     int		si_newprefID;	/* current value for ah_newID */
4826     int		si_newcompID;	/* current value for compound ID */
4827 } spellinfo_T;
4828 
4829 static afffile_T *spell_read_aff __ARGS((spellinfo_T *spin, char_u *fname));
4830 static int spell_info_item __ARGS((char_u *s));
4831 static unsigned affitem2flag __ARGS((int flagtype, char_u *item, char_u	*fname, int lnum));
4832 static unsigned get_affitem __ARGS((int flagtype, char_u **pp));
4833 static void process_compflags __ARGS((spellinfo_T *spin, afffile_T *aff, char_u *compflags));
4834 static void check_renumber __ARGS((spellinfo_T *spin));
4835 static int flag_in_afflist __ARGS((int flagtype, char_u *afflist, unsigned flag));
4836 static void aff_check_number __ARGS((int spinval, int affval, char *name));
4837 static void aff_check_string __ARGS((char_u *spinval, char_u *affval, char *name));
4838 static int str_equal __ARGS((char_u *s1, char_u	*s2));
4839 static void add_fromto __ARGS((spellinfo_T *spin, garray_T *gap, char_u	*from, char_u *to));
4840 static int sal_to_bool __ARGS((char_u *s));
4841 static int has_non_ascii __ARGS((char_u *s));
4842 static void spell_free_aff __ARGS((afffile_T *aff));
4843 static int spell_read_dic __ARGS((spellinfo_T *spin, char_u *fname, afffile_T *affile));
4844 static int get_pfxlist __ARGS((afffile_T *affile, char_u *afflist, char_u *store_afflist));
4845 static void get_compflags __ARGS((afffile_T *affile, char_u *afflist, char_u *store_afflist));
4846 static int store_aff_word __ARGS((spellinfo_T *spin, char_u *word, char_u *afflist, afffile_T *affile, hashtab_T *ht, hashtab_T *xht, int comb, int flags, char_u *pfxlist, int pfxlen));
4847 static int spell_read_wordfile __ARGS((spellinfo_T *spin, char_u *fname));
4848 static void *getroom __ARGS((spellinfo_T *spin, size_t len, int align));
4849 static char_u *getroom_save __ARGS((spellinfo_T *spin, char_u *s));
4850 static void free_blocks __ARGS((sblock_T *bl));
4851 static wordnode_T *wordtree_alloc __ARGS((spellinfo_T *spin));
4852 static int store_word __ARGS((spellinfo_T *spin, char_u *word, int flags, int region, char_u *pfxlist, int need_affix));
4853 static int tree_add_word __ARGS((spellinfo_T *spin, char_u *word, wordnode_T *tree, int flags, int region, int affixID));
4854 static wordnode_T *get_wordnode __ARGS((spellinfo_T *spin));
4855 static int deref_wordnode __ARGS((spellinfo_T *spin, wordnode_T *node));
4856 static void free_wordnode __ARGS((spellinfo_T *spin, wordnode_T *n));
4857 static void wordtree_compress __ARGS((spellinfo_T *spin, wordnode_T *root));
4858 static int node_compress __ARGS((spellinfo_T *spin, wordnode_T *node, hashtab_T *ht, int *tot));
4859 static int node_equal __ARGS((wordnode_T *n1, wordnode_T *n2));
4860 static void put_sugtime __ARGS((spellinfo_T *spin, FILE *fd));
4861 static int write_vim_spell __ARGS((spellinfo_T *spin, char_u *fname));
4862 static void clear_node __ARGS((wordnode_T *node));
4863 static int put_node __ARGS((FILE *fd, wordnode_T *node, int index, int regionmask, int prefixtree));
4864 static void spell_make_sugfile __ARGS((spellinfo_T *spin, char_u *wfname));
4865 static int sug_filltree __ARGS((spellinfo_T *spin, slang_T *slang));
4866 static int sug_maketable __ARGS((spellinfo_T *spin));
4867 static int sug_filltable __ARGS((spellinfo_T *spin, wordnode_T *node, int startwordnr, garray_T *gap));
4868 static int offset2bytes __ARGS((int nr, char_u *buf));
4869 static int bytes2offset __ARGS((char_u **pp));
4870 static void sug_write __ARGS((spellinfo_T *spin, char_u *fname));
4871 static void mkspell __ARGS((int fcount, char_u **fnames, int ascii, int overwrite, int added_word));
4872 static void spell_message __ARGS((spellinfo_T *spin, char_u *str));
4873 static void init_spellfile __ARGS((void));
4874 
4875 /* In the postponed prefixes tree wn_flags is used to store the WFP_ flags,
4876  * but it must be negative to indicate the prefix tree to tree_add_word().
4877  * Use a negative number with the lower 8 bits zero. */
4878 #define PFX_FLAGS	-256
4879 
4880 /*
4881  * Tunable parameters for when the tree is compressed.  See 'mkspellmem'.
4882  */
4883 static long compress_start = 30000;	/* memory / SBLOCKSIZE */
4884 static long compress_inc = 100;		/* memory / SBLOCKSIZE */
4885 static long compress_added = 500000;	/* word count */
4886 
4887 #ifdef SPELL_PRINTTREE
4888 /*
4889  * For debugging the tree code: print the current tree in a (more or less)
4890  * readable format, so that we can see what happens when adding a word and/or
4891  * compressing the tree.
4892  * Based on code from Olaf Seibert.
4893  */
4894 #define PRINTLINESIZE	1000
4895 #define PRINTWIDTH	6
4896 
4897 #define PRINTSOME(l, depth, fmt, a1, a2) vim_snprintf(l + depth * PRINTWIDTH, \
4898 	    PRINTLINESIZE - PRINTWIDTH * depth, fmt, a1, a2)
4899 
4900 static char line1[PRINTLINESIZE];
4901 static char line2[PRINTLINESIZE];
4902 static char line3[PRINTLINESIZE];
4903 
4904     static void
4905 spell_clear_flags(wordnode_T *node)
4906 {
4907     wordnode_T	*np;
4908 
4909     for (np = node; np != NULL; np = np->wn_sibling)
4910     {
4911 	np->wn_u1.index = FALSE;
4912 	spell_clear_flags(np->wn_child);
4913     }
4914 }
4915 
4916     static void
4917 spell_print_node(wordnode_T *node, int depth)
4918 {
4919     if (node->wn_u1.index)
4920     {
4921 	/* Done this node before, print the reference. */
4922 	PRINTSOME(line1, depth, "(%d)", node->wn_nr, 0);
4923 	PRINTSOME(line2, depth, "    ", 0, 0);
4924 	PRINTSOME(line3, depth, "    ", 0, 0);
4925 	msg(line1);
4926 	msg(line2);
4927 	msg(line3);
4928     }
4929     else
4930     {
4931 	node->wn_u1.index = TRUE;
4932 
4933 	if (node->wn_byte != NUL)
4934 	{
4935 	    if (node->wn_child != NULL)
4936 		PRINTSOME(line1, depth, " %c -> ", node->wn_byte, 0);
4937 	    else
4938 		/* Cannot happen? */
4939 		PRINTSOME(line1, depth, " %c ???", node->wn_byte, 0);
4940 	}
4941 	else
4942 	    PRINTSOME(line1, depth, " $    ", 0, 0);
4943 
4944 	PRINTSOME(line2, depth, "%d/%d    ", node->wn_nr, node->wn_refs);
4945 
4946 	if (node->wn_sibling != NULL)
4947 	    PRINTSOME(line3, depth, " |    ", 0, 0);
4948 	else
4949 	    PRINTSOME(line3, depth, "      ", 0, 0);
4950 
4951 	if (node->wn_byte == NUL)
4952 	{
4953 	    msg(line1);
4954 	    msg(line2);
4955 	    msg(line3);
4956 	}
4957 
4958 	/* do the children */
4959 	if (node->wn_byte != NUL && node->wn_child != NULL)
4960 	    spell_print_node(node->wn_child, depth + 1);
4961 
4962 	/* do the siblings */
4963 	if (node->wn_sibling != NULL)
4964 	{
4965 	    /* get rid of all parent details except | */
4966 	    STRCPY(line1, line3);
4967 	    STRCPY(line2, line3);
4968 	    spell_print_node(node->wn_sibling, depth);
4969 	}
4970     }
4971 }
4972 
4973     static void
4974 spell_print_tree(wordnode_T *root)
4975 {
4976     if (root != NULL)
4977     {
4978 	/* Clear the "wn_u1.index" fields, used to remember what has been
4979 	 * done. */
4980 	spell_clear_flags(root);
4981 
4982 	/* Recursively print the tree. */
4983 	spell_print_node(root, 0);
4984     }
4985 }
4986 #endif /* SPELL_PRINTTREE */
4987 
4988 /*
4989  * Read the affix file "fname".
4990  * Returns an afffile_T, NULL for complete failure.
4991  */
4992     static afffile_T *
4993 spell_read_aff(spin, fname)
4994     spellinfo_T	*spin;
4995     char_u	*fname;
4996 {
4997     FILE	*fd;
4998     afffile_T	*aff;
4999     char_u	rline[MAXLINELEN];
5000     char_u	*line;
5001     char_u	*pc = NULL;
5002 #define MAXITEMCNT  30
5003     char_u	*(items[MAXITEMCNT]);
5004     int		itemcnt;
5005     char_u	*p;
5006     int		lnum = 0;
5007     affheader_T	*cur_aff = NULL;
5008     int		did_postpone_prefix = FALSE;
5009     int		aff_todo = 0;
5010     hashtab_T	*tp;
5011     char_u	*low = NULL;
5012     char_u	*fol = NULL;
5013     char_u	*upp = NULL;
5014     int		do_rep;
5015     int		do_repsal;
5016     int		do_sal;
5017     int		do_map;
5018     int		found_map = FALSE;
5019     hashitem_T	*hi;
5020     int		l;
5021     int		compminlen = 0;		/* COMPOUNDMIN value */
5022     int		compsylmax = 0;		/* COMPOUNDSYLMAX value */
5023     int		compoptions = 0;	/* COMP_ flags */
5024     int		compmax = 0;		/* COMPOUNDWORDMAX value */
5025     char_u	*compflags = NULL;	/* COMPOUNDFLAG and COMPOUNDRULE
5026 					   concatenated */
5027     char_u	*midword = NULL;	/* MIDWORD value */
5028     char_u	*syllable = NULL;	/* SYLLABLE value */
5029     char_u	*sofofrom = NULL;	/* SOFOFROM value */
5030     char_u	*sofoto = NULL;		/* SOFOTO value */
5031 
5032     /*
5033      * Open the file.
5034      */
5035     fd = mch_fopen((char *)fname, "r");
5036     if (fd == NULL)
5037     {
5038 	EMSG2(_(e_notopen), fname);
5039 	return NULL;
5040     }
5041 
5042     vim_snprintf((char *)IObuff, IOSIZE, _("Reading affix file %s ..."), fname);
5043     spell_message(spin, IObuff);
5044 
5045     /* Only do REP lines when not done in another .aff file already. */
5046     do_rep = spin->si_rep.ga_len == 0;
5047 
5048     /* Only do REPSAL lines when not done in another .aff file already. */
5049     do_repsal = spin->si_repsal.ga_len == 0;
5050 
5051     /* Only do SAL lines when not done in another .aff file already. */
5052     do_sal = spin->si_sal.ga_len == 0;
5053 
5054     /* Only do MAP lines when not done in another .aff file already. */
5055     do_map = spin->si_map.ga_len == 0;
5056 
5057     /*
5058      * Allocate and init the afffile_T structure.
5059      */
5060     aff = (afffile_T *)getroom(spin, sizeof(afffile_T), TRUE);
5061     if (aff == NULL)
5062 	return NULL;
5063     hash_init(&aff->af_pref);
5064     hash_init(&aff->af_suff);
5065     hash_init(&aff->af_comp);
5066 
5067     /*
5068      * Read all the lines in the file one by one.
5069      */
5070     while (!vim_fgets(rline, MAXLINELEN, fd) && !got_int)
5071     {
5072 	line_breakcheck();
5073 	++lnum;
5074 
5075 	/* Skip comment lines. */
5076 	if (*rline == '#')
5077 	    continue;
5078 
5079 	/* Convert from "SET" to 'encoding' when needed. */
5080 	vim_free(pc);
5081 #ifdef FEAT_MBYTE
5082 	if (spin->si_conv.vc_type != CONV_NONE)
5083 	{
5084 	    pc = string_convert(&spin->si_conv, rline, NULL);
5085 	    if (pc == NULL)
5086 	    {
5087 		smsg((char_u *)_("Conversion failure for word in %s line %d: %s"),
5088 							   fname, lnum, rline);
5089 		continue;
5090 	    }
5091 	    line = pc;
5092 	}
5093 	else
5094 #endif
5095 	{
5096 	    pc = NULL;
5097 	    line = rline;
5098 	}
5099 
5100 	/* Split the line up in white separated items.  Put a NUL after each
5101 	 * item. */
5102 	itemcnt = 0;
5103 	for (p = line; ; )
5104 	{
5105 	    while (*p != NUL && *p <= ' ')  /* skip white space and CR/NL */
5106 		++p;
5107 	    if (*p == NUL)
5108 		break;
5109 	    if (itemcnt == MAXITEMCNT)	    /* too many items */
5110 		break;
5111 	    items[itemcnt++] = p;
5112 	    /* A few items have arbitrary text argument, don't split them. */
5113 	    if (itemcnt == 2 && spell_info_item(items[0]))
5114 		while (*p >= ' ' || *p == TAB)    /* skip until CR/NL */
5115 		    ++p;
5116 	    else
5117 		while (*p > ' ')    /* skip until white space or CR/NL */
5118 		    ++p;
5119 	    if (*p == NUL)
5120 		break;
5121 	    *p++ = NUL;
5122 	}
5123 
5124 	/* Handle non-empty lines. */
5125 	if (itemcnt > 0)
5126 	{
5127 	    if (STRCMP(items[0], "SET") == 0 && itemcnt == 2
5128 						       && aff->af_enc == NULL)
5129 	    {
5130 #ifdef FEAT_MBYTE
5131 		/* Setup for conversion from "ENC" to 'encoding'. */
5132 		aff->af_enc = enc_canonize(items[1]);
5133 		if (aff->af_enc != NULL && !spin->si_ascii
5134 			&& convert_setup(&spin->si_conv, aff->af_enc,
5135 							       p_enc) == FAIL)
5136 		    smsg((char_u *)_("Conversion in %s not supported: from %s to %s"),
5137 					       fname, aff->af_enc, p_enc);
5138 		spin->si_conv.vc_fail = TRUE;
5139 #else
5140 		    smsg((char_u *)_("Conversion in %s not supported"), fname);
5141 #endif
5142 	    }
5143 	    else if (STRCMP(items[0], "FLAG") == 0 && itemcnt == 2
5144 					      && aff->af_flagtype == AFT_CHAR)
5145 	    {
5146 		if (STRCMP(items[1], "long") == 0)
5147 		    aff->af_flagtype = AFT_LONG;
5148 		else if (STRCMP(items[1], "num") == 0)
5149 		    aff->af_flagtype = AFT_NUM;
5150 		else if (STRCMP(items[1], "caplong") == 0)
5151 		    aff->af_flagtype = AFT_CAPLONG;
5152 		else
5153 		    smsg((char_u *)_("Invalid value for FLAG in %s line %d: %s"),
5154 			    fname, lnum, items[1]);
5155 		if (aff->af_rare != 0
5156 			|| aff->af_keepcase != 0
5157 			|| aff->af_bad != 0
5158 			|| aff->af_needaffix != 0
5159 			|| aff->af_needcomp != 0
5160 			|| aff->af_comproot != 0
5161 			|| aff->af_nosuggest != 0
5162 			|| compflags != NULL
5163 			|| aff->af_suff.ht_used > 0
5164 			|| aff->af_pref.ht_used > 0)
5165 		    smsg((char_u *)_("FLAG after using flags in %s line %d: %s"),
5166 			    fname, lnum, items[1]);
5167 	    }
5168 	    else if (spell_info_item(items[0]))
5169 	    {
5170 		    p = (char_u *)getroom(spin,
5171 			    (spin->si_info == NULL ? 0 : STRLEN(spin->si_info))
5172 			    + STRLEN(items[0])
5173 			    + STRLEN(items[1]) + 3, FALSE);
5174 		    if (p != NULL)
5175 		    {
5176 			if (spin->si_info != NULL)
5177 			{
5178 			    STRCPY(p, spin->si_info);
5179 			    STRCAT(p, "\n");
5180 			}
5181 			STRCAT(p, items[0]);
5182 			STRCAT(p, " ");
5183 			STRCAT(p, items[1]);
5184 			spin->si_info = p;
5185 		    }
5186 	    }
5187 	    else if (STRCMP(items[0], "MIDWORD") == 0 && itemcnt == 2
5188 							   && midword == NULL)
5189 	    {
5190 		midword = getroom_save(spin, items[1]);
5191 	    }
5192 	    else if (STRCMP(items[0], "TRY") == 0 && itemcnt == 2)
5193 	    {
5194 		/* ignored, we look in the tree for what chars may appear */
5195 	    }
5196 	    /* TODO: remove "RAR" later */
5197 	    else if ((STRCMP(items[0], "RAR") == 0
5198 			|| STRCMP(items[0], "RARE") == 0) && itemcnt == 2
5199 						       && aff->af_rare == 0)
5200 	    {
5201 		aff->af_rare = affitem2flag(aff->af_flagtype, items[1],
5202 								 fname, lnum);
5203 	    }
5204 	    /* TODO: remove "KEP" later */
5205 	    else if ((STRCMP(items[0], "KEP") == 0
5206 		    || STRCMP(items[0], "KEEPCASE") == 0) && itemcnt == 2
5207 						     && aff->af_keepcase == 0)
5208 	    {
5209 		aff->af_keepcase = affitem2flag(aff->af_flagtype, items[1],
5210 								 fname, lnum);
5211 	    }
5212 	    else if (STRCMP(items[0], "BAD") == 0 && itemcnt == 2
5213 						       && aff->af_bad == 0)
5214 	    {
5215 		aff->af_bad = affitem2flag(aff->af_flagtype, items[1],
5216 								 fname, lnum);
5217 	    }
5218 	    else if (STRCMP(items[0], "NEEDAFFIX") == 0 && itemcnt == 2
5219 						    && aff->af_needaffix == 0)
5220 	    {
5221 		aff->af_needaffix = affitem2flag(aff->af_flagtype, items[1],
5222 								 fname, lnum);
5223 	    }
5224 	    else if (STRCMP(items[0], "NOSUGGEST") == 0 && itemcnt == 2
5225 						    && aff->af_nosuggest == 0)
5226 	    {
5227 		aff->af_nosuggest = affitem2flag(aff->af_flagtype, items[1],
5228 								 fname, lnum);
5229 	    }
5230 	    else if (STRCMP(items[0], "NEEDCOMPOUND") == 0 && itemcnt == 2
5231 						     && aff->af_needcomp == 0)
5232 	    {
5233 		aff->af_needcomp = affitem2flag(aff->af_flagtype, items[1],
5234 								 fname, lnum);
5235 	    }
5236 	    else if (STRCMP(items[0], "COMPOUNDROOT") == 0 && itemcnt == 2
5237 						     && aff->af_comproot == 0)
5238 	    {
5239 		aff->af_comproot = affitem2flag(aff->af_flagtype, items[1],
5240 								 fname, lnum);
5241 	    }
5242 	    else if (STRCMP(items[0], "COMPOUNDFORBIDFLAG") == 0
5243 				   && itemcnt == 2 && aff->af_compforbid == 0)
5244 	    {
5245 		aff->af_compforbid = affitem2flag(aff->af_flagtype, items[1],
5246 								 fname, lnum);
5247 	    }
5248 	    else if (STRCMP(items[0], "COMPOUNDPERMITFLAG") == 0
5249 				   && itemcnt == 2 && aff->af_comppermit == 0)
5250 	    {
5251 		aff->af_comppermit = affitem2flag(aff->af_flagtype, items[1],
5252 								 fname, lnum);
5253 	    }
5254 	    else if (STRCMP(items[0], "COMPOUNDFLAG") == 0 && itemcnt == 2
5255 							 && compflags == NULL)
5256 	    {
5257 		/* Turn flag "c" into COMPOUNDRULE compatible string "c+",
5258 		 * "Na" into "Na+", "1234" into "1234+". */
5259 		p = getroom(spin, STRLEN(items[1]) + 2, FALSE);
5260 		if (p != NULL)
5261 		{
5262 		    STRCPY(p, items[1]);
5263 		    STRCAT(p, "+");
5264 		    compflags = p;
5265 		}
5266 	    }
5267 	    else if (STRCMP(items[0], "COMPOUNDRULE") == 0 && itemcnt == 2)
5268 	    {
5269 		/* Concatenate this string to previously defined ones, using a
5270 		 * slash to separate them. */
5271 		l = STRLEN(items[1]) + 1;
5272 		if (compflags != NULL)
5273 		    l += STRLEN(compflags) + 1;
5274 		p = getroom(spin, l, FALSE);
5275 		if (p != NULL)
5276 		{
5277 		    if (compflags != NULL)
5278 		    {
5279 			STRCPY(p, compflags);
5280 			STRCAT(p, "/");
5281 		    }
5282 		    STRCAT(p, items[1]);
5283 		    compflags = p;
5284 		}
5285 	    }
5286 	    else if (STRCMP(items[0], "COMPOUNDWORDMAX") == 0 && itemcnt == 2
5287 							      && compmax == 0)
5288 	    {
5289 		compmax = atoi((char *)items[1]);
5290 		if (compmax == 0)
5291 		    smsg((char_u *)_("Wrong COMPOUNDWORDMAX value in %s line %d: %s"),
5292 						       fname, lnum, items[1]);
5293 	    }
5294 	    else if (STRCMP(items[0], "COMPOUNDMIN") == 0 && itemcnt == 2
5295 							   && compminlen == 0)
5296 	    {
5297 		compminlen = atoi((char *)items[1]);
5298 		if (compminlen == 0)
5299 		    smsg((char_u *)_("Wrong COMPOUNDMIN value in %s line %d: %s"),
5300 						       fname, lnum, items[1]);
5301 	    }
5302 	    else if (STRCMP(items[0], "COMPOUNDSYLMAX") == 0 && itemcnt == 2
5303 							   && compsylmax == 0)
5304 	    {
5305 		compsylmax = atoi((char *)items[1]);
5306 		if (compsylmax == 0)
5307 		    smsg((char_u *)_("Wrong COMPOUNDSYLMAX value in %s line %d: %s"),
5308 						       fname, lnum, items[1]);
5309 	    }
5310 	    else if (STRCMP(items[0], "CHECKCOMPOUNDDUP") == 0 && itemcnt == 1)
5311 	    {
5312 		compoptions |= COMP_CHECKDUP;
5313 	    }
5314 	    else if (STRCMP(items[0], "CHECKCOMPOUNDREP") == 0 && itemcnt == 1)
5315 	    {
5316 		compoptions |= COMP_CHECKREP;
5317 	    }
5318 	    else if (STRCMP(items[0], "CHECKCOMPOUNDCASE") == 0 && itemcnt == 1)
5319 	    {
5320 		compoptions |= COMP_CHECKCASE;
5321 	    }
5322 	    else if (STRCMP(items[0], "CHECKCOMPOUNDTRIPLE") == 0
5323 							      && itemcnt == 1)
5324 	    {
5325 		compoptions |= COMP_CHECKTRIPLE;
5326 	    }
5327 	    else if (STRCMP(items[0], "CHECKCOMPOUNDPATTERN") == 0
5328 							      && itemcnt == 2)
5329 	    {
5330 		if (atoi((char *)items[1]) == 0)
5331 		    smsg((char_u *)_("Wrong CHECKCOMPOUNDPATTERN value in %s line %d: %s"),
5332 						       fname, lnum, items[1]);
5333 	    }
5334 	    else if (STRCMP(items[0], "CHECKCOMPOUNDPATTERN") == 0
5335 							      && itemcnt == 3)
5336 	    {
5337 		garray_T    *gap = &spin->si_comppat;
5338 		int	    i;
5339 
5340 		/* Only add the couple if it isn't already there. */
5341 		for (i = 0; i < gap->ga_len - 1; i += 2)
5342 		    if (STRCMP(((char_u **)(gap->ga_data))[i], items[1]) == 0
5343 			    && STRCMP(((char_u **)(gap->ga_data))[i + 1],
5344 							       items[2]) == 0)
5345 			break;
5346 		if (i >= gap->ga_len && ga_grow(gap, 2) == OK)
5347 		{
5348 		    ((char_u **)(gap->ga_data))[gap->ga_len++]
5349 					       = getroom_save(spin, items[1]);
5350 		    ((char_u **)(gap->ga_data))[gap->ga_len++]
5351 					       = getroom_save(spin, items[2]);
5352 		}
5353 	    }
5354 	    else if (STRCMP(items[0], "SYLLABLE") == 0 && itemcnt == 2
5355 							  && syllable == NULL)
5356 	    {
5357 		syllable = getroom_save(spin, items[1]);
5358 	    }
5359 	    else if (STRCMP(items[0], "NOBREAK") == 0 && itemcnt == 1)
5360 	    {
5361 		spin->si_nobreak = TRUE;
5362 	    }
5363 	    else if (STRCMP(items[0], "NOSPLITSUGS") == 0 && itemcnt == 1)
5364 	    {
5365 		spin->si_nosplitsugs = TRUE;
5366 	    }
5367 	    else if (STRCMP(items[0], "NOSUGFILE") == 0 && itemcnt == 1)
5368 	    {
5369 		spin->si_nosugfile = TRUE;
5370 	    }
5371 	    else if (STRCMP(items[0], "PFXPOSTPONE") == 0 && itemcnt == 1)
5372 	    {
5373 		aff->af_pfxpostpone = TRUE;
5374 	    }
5375 	    else if ((STRCMP(items[0], "PFX") == 0
5376 					      || STRCMP(items[0], "SFX") == 0)
5377 		    && aff_todo == 0
5378 		    && itemcnt >= 4)
5379 	    {
5380 		int	lasti = 4;
5381 		char_u	key[AH_KEY_LEN];
5382 
5383 		if (*items[0] == 'P')
5384 		    tp = &aff->af_pref;
5385 		else
5386 		    tp = &aff->af_suff;
5387 
5388 		/* Myspell allows the same affix name to be used multiple
5389 		 * times.  The affix files that do this have an undocumented
5390 		 * "S" flag on all but the last block, thus we check for that
5391 		 * and store it in ah_follows. */
5392 		vim_strncpy(key, items[1], AH_KEY_LEN - 1);
5393 		hi = hash_find(tp, key);
5394 		if (!HASHITEM_EMPTY(hi))
5395 		{
5396 		    cur_aff = HI2AH(hi);
5397 		    if (cur_aff->ah_combine != (*items[2] == 'Y'))
5398 			smsg((char_u *)_("Different combining flag in continued affix block in %s line %d: %s"),
5399 						   fname, lnum, items[1]);
5400 		    if (!cur_aff->ah_follows)
5401 			smsg((char_u *)_("Duplicate affix in %s line %d: %s"),
5402 						       fname, lnum, items[1]);
5403 		}
5404 		else
5405 		{
5406 		    /* New affix letter. */
5407 		    cur_aff = (affheader_T *)getroom(spin,
5408 						   sizeof(affheader_T), TRUE);
5409 		    if (cur_aff == NULL)
5410 			break;
5411 		    cur_aff->ah_flag = affitem2flag(aff->af_flagtype, items[1],
5412 								 fname, lnum);
5413 		    if (cur_aff->ah_flag == 0 || STRLEN(items[1]) >= AH_KEY_LEN)
5414 			break;
5415 		    if (cur_aff->ah_flag == aff->af_bad
5416 			    || cur_aff->ah_flag == aff->af_rare
5417 			    || cur_aff->ah_flag == aff->af_keepcase
5418 			    || cur_aff->ah_flag == aff->af_needaffix
5419 			    || cur_aff->ah_flag == aff->af_nosuggest
5420 			    || cur_aff->ah_flag == aff->af_needcomp
5421 			    || cur_aff->ah_flag == aff->af_comproot)
5422 			smsg((char_u *)_("Affix also used for BAD/RARE/KEEPCASE/NEEDAFFIX/NEEDCOMPOUND/NOSUGGEST in %s line %d: %s"),
5423 						       fname, lnum, items[1]);
5424 		    STRCPY(cur_aff->ah_key, items[1]);
5425 		    hash_add(tp, cur_aff->ah_key);
5426 
5427 		    cur_aff->ah_combine = (*items[2] == 'Y');
5428 		}
5429 
5430 		/* Check for the "S" flag, which apparently means that another
5431 		 * block with the same affix name is following. */
5432 		if (itemcnt > lasti && STRCMP(items[lasti], "S") == 0)
5433 		{
5434 		    ++lasti;
5435 		    cur_aff->ah_follows = TRUE;
5436 		}
5437 		else
5438 		    cur_aff->ah_follows = FALSE;
5439 
5440 		/* Myspell allows extra text after the item, but that might
5441 		 * mean mistakes go unnoticed.  Require a comment-starter. */
5442 		if (itemcnt > lasti && *items[lasti] != '#')
5443 		    smsg((char_u *)_(e_afftrailing), fname, lnum, items[lasti]);
5444 
5445 		if (STRCMP(items[2], "Y") != 0 && STRCMP(items[2], "N") != 0)
5446 		    smsg((char_u *)_("Expected Y or N in %s line %d: %s"),
5447 						       fname, lnum, items[2]);
5448 
5449 		if (*items[0] == 'P' && aff->af_pfxpostpone)
5450 		{
5451 		    if (cur_aff->ah_newID == 0)
5452 		    {
5453 			/* Use a new number in the .spl file later, to be able
5454 			 * to handle multiple .aff files. */
5455 			check_renumber(spin);
5456 			cur_aff->ah_newID = ++spin->si_newprefID;
5457 
5458 			/* We only really use ah_newID if the prefix is
5459 			 * postponed.  We know that only after handling all
5460 			 * the items. */
5461 			did_postpone_prefix = FALSE;
5462 		    }
5463 		    else
5464 			/* Did use the ID in a previous block. */
5465 			did_postpone_prefix = TRUE;
5466 		}
5467 
5468 		aff_todo = atoi((char *)items[3]);
5469 	    }
5470 	    else if ((STRCMP(items[0], "PFX") == 0
5471 					      || STRCMP(items[0], "SFX") == 0)
5472 		    && aff_todo > 0
5473 		    && STRCMP(cur_aff->ah_key, items[1]) == 0
5474 		    && itemcnt >= 5)
5475 	    {
5476 		affentry_T	*aff_entry;
5477 		int		nocomp = FALSE;
5478 		int		upper = FALSE;
5479 		int		lasti = 5;
5480 
5481 		/* Myspell allows extra text after the item, but that might
5482 		 * mean mistakes go unnoticed.  Require a comment-starter.
5483 		 * Hunspell uses a "-" item. */
5484 		if (itemcnt > lasti && *items[lasti] != '#'
5485 			&& (STRCMP(items[lasti], "-") != 0
5486 						     || itemcnt != lasti + 1))
5487 		    smsg((char_u *)_(e_afftrailing), fname, lnum, items[lasti]);
5488 
5489 		/* New item for an affix letter. */
5490 		--aff_todo;
5491 		aff_entry = (affentry_T *)getroom(spin,
5492 						    sizeof(affentry_T), TRUE);
5493 		if (aff_entry == NULL)
5494 		    break;
5495 		aff_entry->ae_nocomp = nocomp;
5496 
5497 		if (STRCMP(items[2], "0") != 0)
5498 		    aff_entry->ae_chop = getroom_save(spin, items[2]);
5499 		if (STRCMP(items[3], "0") != 0)
5500 		{
5501 		    aff_entry->ae_add = getroom_save(spin, items[3]);
5502 
5503 		    /* Recognize flags on the affix: abcd/1234 */
5504 		    aff_entry->ae_flags = vim_strchr(aff_entry->ae_add, '/');
5505 		    if (aff_entry->ae_flags != NULL)
5506 			*aff_entry->ae_flags++ = NUL;
5507 		}
5508 
5509 		/* Don't use an affix entry with non-ASCII characters when
5510 		 * "spin->si_ascii" is TRUE. */
5511 		if (!spin->si_ascii || !(has_non_ascii(aff_entry->ae_chop)
5512 					  || has_non_ascii(aff_entry->ae_add)))
5513 		{
5514 		    aff_entry->ae_next = cur_aff->ah_first;
5515 		    cur_aff->ah_first = aff_entry;
5516 
5517 		    if (STRCMP(items[4], ".") != 0)
5518 		    {
5519 			char_u	buf[MAXLINELEN];
5520 
5521 			aff_entry->ae_cond = getroom_save(spin, items[4]);
5522 			if (*items[0] == 'P')
5523 			    sprintf((char *)buf, "^%s", items[4]);
5524 			else
5525 			    sprintf((char *)buf, "%s$", items[4]);
5526 			aff_entry->ae_prog = vim_regcomp(buf,
5527 					    RE_MAGIC + RE_STRING + RE_STRICT);
5528 			if (aff_entry->ae_prog == NULL)
5529 			    smsg((char_u *)_("Broken condition in %s line %d: %s"),
5530 						       fname, lnum, items[4]);
5531 		    }
5532 
5533 		    /* For postponed prefixes we need an entry in si_prefcond
5534 		     * for the condition.  Use an existing one if possible.
5535 		     * Can't be done for an affix with flags. */
5536 		    if (*items[0] == 'P' && aff->af_pfxpostpone
5537 					       && aff_entry->ae_flags == NULL)
5538 		    {
5539 			/* When the chop string is one lower-case letter and
5540 			 * the add string ends in the upper-case letter we set
5541 			 * the "upper" flag, clear "ae_chop" and remove the
5542 			 * letters from "ae_add".  The condition must either
5543 			 * be empty or start with the same letter. */
5544 			if (aff_entry->ae_chop != NULL
5545 				&& aff_entry->ae_add != NULL
5546 #ifdef FEAT_MBYTE
5547 				&& aff_entry->ae_chop[(*mb_ptr2len)(
5548 						   aff_entry->ae_chop)] == NUL
5549 #else
5550 				&& aff_entry->ae_chop[1] == NUL
5551 #endif
5552 				)
5553 			{
5554 			    int		c, c_up;
5555 
5556 			    c = PTR2CHAR(aff_entry->ae_chop);
5557 			    c_up = SPELL_TOUPPER(c);
5558 			    if (c_up != c
5559 				    && (aff_entry->ae_cond == NULL
5560 					|| PTR2CHAR(aff_entry->ae_cond) == c))
5561 			    {
5562 				p = aff_entry->ae_add
5563 						  + STRLEN(aff_entry->ae_add);
5564 				mb_ptr_back(aff_entry->ae_add, p);
5565 				if (PTR2CHAR(p) == c_up)
5566 				{
5567 				    upper = TRUE;
5568 				    aff_entry->ae_chop = NULL;
5569 				    *p = NUL;
5570 
5571 				    /* The condition is matched with the
5572 				     * actual word, thus must check for the
5573 				     * upper-case letter. */
5574 				    if (aff_entry->ae_cond != NULL)
5575 				    {
5576 					char_u	buf[MAXLINELEN];
5577 #ifdef FEAT_MBYTE
5578 					if (has_mbyte)
5579 					{
5580 					    onecap_copy(items[4], buf, TRUE);
5581 					    aff_entry->ae_cond = getroom_save(
5582 								   spin, buf);
5583 					}
5584 					else
5585 #endif
5586 					    *aff_entry->ae_cond = c_up;
5587 					if (aff_entry->ae_cond != NULL)
5588 					{
5589 					    sprintf((char *)buf, "^%s",
5590 							  aff_entry->ae_cond);
5591 					    vim_free(aff_entry->ae_prog);
5592 					    aff_entry->ae_prog = vim_regcomp(
5593 						    buf, RE_MAGIC + RE_STRING);
5594 					}
5595 				    }
5596 				}
5597 			    }
5598 			}
5599 
5600 			if (aff_entry->ae_chop == NULL
5601 					       && aff_entry->ae_flags == NULL)
5602 			{
5603 			    int		idx;
5604 			    char_u	**pp;
5605 			    int		n;
5606 
5607 			    /* Find a previously used condition. */
5608 			    for (idx = spin->si_prefcond.ga_len - 1; idx >= 0;
5609 									--idx)
5610 			    {
5611 				p = ((char_u **)spin->si_prefcond.ga_data)[idx];
5612 				if (str_equal(p, aff_entry->ae_cond))
5613 				    break;
5614 			    }
5615 			    if (idx < 0 && ga_grow(&spin->si_prefcond, 1) == OK)
5616 			    {
5617 				/* Not found, add a new condition. */
5618 				idx = spin->si_prefcond.ga_len++;
5619 				pp = ((char_u **)spin->si_prefcond.ga_data)
5620 									+ idx;
5621 				if (aff_entry->ae_cond == NULL)
5622 				    *pp = NULL;
5623 				else
5624 				    *pp = getroom_save(spin,
5625 							  aff_entry->ae_cond);
5626 			    }
5627 
5628 			    if (aff_entry->ae_flags != NULL)
5629 				smsg((char_u *)_("Affix flags ignored when PFXPOSTPONE used in %s line %d: %s"),
5630 						       fname, lnum, items[4]);
5631 
5632 			    /* Add the prefix to the prefix tree. */
5633 			    if (aff_entry->ae_add == NULL)
5634 				p = (char_u *)"";
5635 			    else
5636 				p = aff_entry->ae_add;
5637 
5638 			    /* PFX_FLAGS is a negative number, so that
5639 			     * tree_add_word() knows this is the prefix tree. */
5640 			    n = PFX_FLAGS;
5641 			    if (!cur_aff->ah_combine)
5642 				n |= WFP_NC;
5643 			    if (upper)
5644 				n |= WFP_UP;
5645 			    tree_add_word(spin, p, spin->si_prefroot, n,
5646 						      idx, cur_aff->ah_newID);
5647 			    did_postpone_prefix = TRUE;
5648 			}
5649 
5650 			/* Didn't actually use ah_newID, backup si_newprefID. */
5651 			if (aff_todo == 0 && !did_postpone_prefix)
5652 			{
5653 			    --spin->si_newprefID;
5654 			    cur_aff->ah_newID = 0;
5655 			}
5656 		    }
5657 		}
5658 	    }
5659 	    else if (STRCMP(items[0], "FOL") == 0 && itemcnt == 2
5660 							       && fol == NULL)
5661 	    {
5662 		fol = vim_strsave(items[1]);
5663 	    }
5664 	    else if (STRCMP(items[0], "LOW") == 0 && itemcnt == 2
5665 							       && low == NULL)
5666 	    {
5667 		low = vim_strsave(items[1]);
5668 	    }
5669 	    else if (STRCMP(items[0], "UPP") == 0 && itemcnt == 2
5670 							       && upp == NULL)
5671 	    {
5672 		upp = vim_strsave(items[1]);
5673 	    }
5674 	    else if ((STRCMP(items[0], "REP") == 0
5675 			|| STRCMP(items[0], "REPSAL") == 0)
5676 		    && itemcnt == 2)
5677 	    {
5678 		/* Ignore REP/REPSAL count */;
5679 		if (!isdigit(*items[1]))
5680 		    smsg((char_u *)_("Expected REP(SAL) count in %s line %d"),
5681 								 fname, lnum);
5682 	    }
5683 	    else if ((STRCMP(items[0], "REP") == 0
5684 			|| STRCMP(items[0], "REPSAL") == 0)
5685 		    && itemcnt >= 3)
5686 	    {
5687 		/* REP/REPSAL item */
5688 		/* Myspell ignores extra arguments, we require it starts with
5689 		 * # to detect mistakes. */
5690 		if (itemcnt > 3 && items[3][0] != '#')
5691 		    smsg((char_u *)_(e_afftrailing), fname, lnum, items[3]);
5692 		if (items[0][3] == 'S' ? do_repsal : do_rep)
5693 		{
5694 		    /* Replace underscore with space (can't include a space
5695 		     * directly). */
5696 		    for (p = items[1]; *p != NUL; mb_ptr_adv(p))
5697 			if (*p == '_')
5698 			    *p = ' ';
5699 		    for (p = items[2]; *p != NUL; mb_ptr_adv(p))
5700 			if (*p == '_')
5701 			    *p = ' ';
5702 		    add_fromto(spin, items[0][3] == 'S'
5703 					 ? &spin->si_repsal
5704 					 : &spin->si_rep, items[1], items[2]);
5705 		}
5706 	    }
5707 	    else if (STRCMP(items[0], "MAP") == 0 && itemcnt == 2)
5708 	    {
5709 		/* MAP item or count */
5710 		if (!found_map)
5711 		{
5712 		    /* First line contains the count. */
5713 		    found_map = TRUE;
5714 		    if (!isdigit(*items[1]))
5715 			smsg((char_u *)_("Expected MAP count in %s line %d"),
5716 								 fname, lnum);
5717 		}
5718 		else if (do_map)
5719 		{
5720 		    int		c;
5721 
5722 		    /* Check that every character appears only once. */
5723 		    for (p = items[1]; *p != NUL; )
5724 		    {
5725 #ifdef FEAT_MBYTE
5726 			c = mb_ptr2char_adv(&p);
5727 #else
5728 			c = *p++;
5729 #endif
5730 			if ((spin->si_map.ga_len > 0
5731 				    && vim_strchr(spin->si_map.ga_data, c)
5732 								      != NULL)
5733 				|| vim_strchr(p, c) != NULL)
5734 			    smsg((char_u *)_("Duplicate character in MAP in %s line %d"),
5735 								 fname, lnum);
5736 		    }
5737 
5738 		    /* We simply concatenate all the MAP strings, separated by
5739 		     * slashes. */
5740 		    ga_concat(&spin->si_map, items[1]);
5741 		    ga_append(&spin->si_map, '/');
5742 		}
5743 	    }
5744 	    /* Accept "SAL from to" and "SAL from to # comment". */
5745 	    else if (STRCMP(items[0], "SAL") == 0
5746 		    && (itemcnt == 3 || (itemcnt > 3 && items[3][0] == '#')))
5747 	    {
5748 		if (do_sal)
5749 		{
5750 		    /* SAL item (sounds-a-like)
5751 		     * Either one of the known keys or a from-to pair. */
5752 		    if (STRCMP(items[1], "followup") == 0)
5753 			spin->si_followup = sal_to_bool(items[2]);
5754 		    else if (STRCMP(items[1], "collapse_result") == 0)
5755 			spin->si_collapse = sal_to_bool(items[2]);
5756 		    else if (STRCMP(items[1], "remove_accents") == 0)
5757 			spin->si_rem_accents = sal_to_bool(items[2]);
5758 		    else
5759 			/* when "to" is "_" it means empty */
5760 			add_fromto(spin, &spin->si_sal, items[1],
5761 				     STRCMP(items[2], "_") == 0 ? (char_u *)""
5762 								: items[2]);
5763 		}
5764 	    }
5765 	    else if (STRCMP(items[0], "SOFOFROM") == 0 && itemcnt == 2
5766 							  && sofofrom == NULL)
5767 	    {
5768 		sofofrom = getroom_save(spin, items[1]);
5769 	    }
5770 	    else if (STRCMP(items[0], "SOFOTO") == 0 && itemcnt == 2
5771 							    && sofoto == NULL)
5772 	    {
5773 		sofoto = getroom_save(spin, items[1]);
5774 	    }
5775 	    else if (STRCMP(items[0], "COMMON") == 0)
5776 	    {
5777 		int	i;
5778 
5779 		for (i = 1; i < itemcnt; ++i)
5780 		{
5781 		    if (HASHITEM_EMPTY(hash_find(&spin->si_commonwords,
5782 								   items[i])))
5783 		    {
5784 			p = vim_strsave(items[i]);
5785 			if (p == NULL)
5786 			    break;
5787 			hash_add(&spin->si_commonwords, p);
5788 		    }
5789 		}
5790 	    }
5791 	    else
5792 		smsg((char_u *)_("Unrecognized or duplicate item in %s line %d: %s"),
5793 						       fname, lnum, items[0]);
5794 	}
5795     }
5796 
5797     if (fol != NULL || low != NULL || upp != NULL)
5798     {
5799 	if (spin->si_clear_chartab)
5800 	{
5801 	    /* Clear the char type tables, don't want to use any of the
5802 	     * currently used spell properties. */
5803 	    init_spell_chartab();
5804 	    spin->si_clear_chartab = FALSE;
5805 	}
5806 
5807 	/*
5808 	 * Don't write a word table for an ASCII file, so that we don't check
5809 	 * for conflicts with a word table that matches 'encoding'.
5810 	 * Don't write one for utf-8 either, we use utf_*() and
5811 	 * mb_get_class(), the list of chars in the file will be incomplete.
5812 	 */
5813 	if (!spin->si_ascii
5814 #ifdef FEAT_MBYTE
5815 		&& !enc_utf8
5816 #endif
5817 		)
5818 	{
5819 	    if (fol == NULL || low == NULL || upp == NULL)
5820 		smsg((char_u *)_("Missing FOL/LOW/UPP line in %s"), fname);
5821 	    else
5822 		(void)set_spell_chartab(fol, low, upp);
5823 	}
5824 
5825 	vim_free(fol);
5826 	vim_free(low);
5827 	vim_free(upp);
5828     }
5829 
5830     /* Use compound specifications of the .aff file for the spell info. */
5831     if (compmax != 0)
5832     {
5833 	aff_check_number(spin->si_compmax, compmax, "COMPOUNDWORDMAX");
5834 	spin->si_compmax = compmax;
5835     }
5836 
5837     if (compminlen != 0)
5838     {
5839 	aff_check_number(spin->si_compminlen, compminlen, "COMPOUNDMIN");
5840 	spin->si_compminlen = compminlen;
5841     }
5842 
5843     if (compsylmax != 0)
5844     {
5845 	if (syllable == NULL)
5846 	    smsg((char_u *)_("COMPOUNDSYLMAX used without SYLLABLE"));
5847 	aff_check_number(spin->si_compsylmax, compsylmax, "COMPOUNDSYLMAX");
5848 	spin->si_compsylmax = compsylmax;
5849     }
5850 
5851     if (compoptions != 0)
5852     {
5853 	aff_check_number(spin->si_compoptions, compoptions, "COMPOUND options");
5854 	spin->si_compoptions |= compoptions;
5855     }
5856 
5857     if (compflags != NULL)
5858 	process_compflags(spin, aff, compflags);
5859 
5860     /* Check that we didn't use too many renumbered flags. */
5861     if (spin->si_newcompID < spin->si_newprefID)
5862     {
5863 	if (spin->si_newcompID == 127 || spin->si_newcompID == 255)
5864 	    MSG(_("Too many postponed prefixes"));
5865 	else if (spin->si_newprefID == 0 || spin->si_newprefID == 127)
5866 	    MSG(_("Too many compound flags"));
5867 	else
5868 	    MSG(_("Too many posponed prefixes and/or compound flags"));
5869     }
5870 
5871     if (syllable != NULL)
5872     {
5873 	aff_check_string(spin->si_syllable, syllable, "SYLLABLE");
5874 	spin->si_syllable = syllable;
5875     }
5876 
5877     if (sofofrom != NULL || sofoto != NULL)
5878     {
5879 	if (sofofrom == NULL || sofoto == NULL)
5880 	    smsg((char_u *)_("Missing SOFO%s line in %s"),
5881 				     sofofrom == NULL ? "FROM" : "TO", fname);
5882 	else if (spin->si_sal.ga_len > 0)
5883 	    smsg((char_u *)_("Both SAL and SOFO lines in %s"), fname);
5884 	else
5885 	{
5886 	    aff_check_string(spin->si_sofofr, sofofrom, "SOFOFROM");
5887 	    aff_check_string(spin->si_sofoto, sofoto, "SOFOTO");
5888 	    spin->si_sofofr = sofofrom;
5889 	    spin->si_sofoto = sofoto;
5890 	}
5891     }
5892 
5893     if (midword != NULL)
5894     {
5895 	aff_check_string(spin->si_midword, midword, "MIDWORD");
5896 	spin->si_midword = midword;
5897     }
5898 
5899     vim_free(pc);
5900     fclose(fd);
5901     return aff;
5902 }
5903 
5904 /*
5905  * Return TRUE if "s" is the name of an info item in the affix file.
5906  */
5907     static int
5908 spell_info_item(s)
5909     char_u	*s;
5910 {
5911     return STRCMP(s, "NAME") == 0
5912 	|| STRCMP(s, "HOME") == 0
5913 	|| STRCMP(s, "VERSION") == 0
5914 	|| STRCMP(s, "AUTHOR") == 0
5915 	|| STRCMP(s, "EMAIL") == 0
5916 	|| STRCMP(s, "COPYRIGHT") == 0;
5917 }
5918 
5919 /*
5920  * Turn an affix flag name into a number, according to the FLAG type.
5921  * returns zero for failure.
5922  */
5923     static unsigned
5924 affitem2flag(flagtype, item, fname, lnum)
5925     int		flagtype;
5926     char_u	*item;
5927     char_u	*fname;
5928     int		lnum;
5929 {
5930     unsigned	res;
5931     char_u	*p = item;
5932 
5933     res = get_affitem(flagtype, &p);
5934     if (res == 0)
5935     {
5936 	if (flagtype == AFT_NUM)
5937 	    smsg((char_u *)_("Flag is not a number in %s line %d: %s"),
5938 							   fname, lnum, item);
5939 	else
5940 	    smsg((char_u *)_("Illegal flag in %s line %d: %s"),
5941 							   fname, lnum, item);
5942     }
5943     if (*p != NUL)
5944     {
5945 	smsg((char_u *)_(e_affname), fname, lnum, item);
5946 	return 0;
5947     }
5948 
5949     return res;
5950 }
5951 
5952 /*
5953  * Get one affix name from "*pp" and advance the pointer.
5954  * Returns zero for an error, still advances the pointer then.
5955  */
5956     static unsigned
5957 get_affitem(flagtype, pp)
5958     int		flagtype;
5959     char_u	**pp;
5960 {
5961     int		res;
5962 
5963     if (flagtype == AFT_NUM)
5964     {
5965 	if (!VIM_ISDIGIT(**pp))
5966 	{
5967 	    ++*pp;	/* always advance, avoid getting stuck */
5968 	    return 0;
5969 	}
5970 	res = getdigits(pp);
5971     }
5972     else
5973     {
5974 #ifdef FEAT_MBYTE
5975 	res = mb_ptr2char_adv(pp);
5976 #else
5977 	res = *(*pp)++;
5978 #endif
5979 	if (flagtype == AFT_LONG || (flagtype == AFT_CAPLONG
5980 						 && res >= 'A' && res <= 'Z'))
5981 	{
5982 	    if (**pp == NUL)
5983 		return 0;
5984 #ifdef FEAT_MBYTE
5985 	    res = mb_ptr2char_adv(pp) + (res << 16);
5986 #else
5987 	    res = *(*pp)++ + (res << 16);
5988 #endif
5989 	}
5990     }
5991     return res;
5992 }
5993 
5994 /*
5995  * Process the "compflags" string used in an affix file and append it to
5996  * spin->si_compflags.
5997  * The processing involves changing the affix names to ID numbers, so that
5998  * they fit in one byte.
5999  */
6000     static void
6001 process_compflags(spin, aff, compflags)
6002     spellinfo_T	*spin;
6003     afffile_T	*aff;
6004     char_u	*compflags;
6005 {
6006     char_u	*p;
6007     char_u	*prevp;
6008     unsigned	flag;
6009     compitem_T	*ci;
6010     int		id;
6011     int		len;
6012     char_u	*tp;
6013     char_u	key[AH_KEY_LEN];
6014     hashitem_T	*hi;
6015 
6016     /* Make room for the old and the new compflags, concatenated with a / in
6017      * between.  Processing it makes it shorter, but we don't know by how
6018      * much, thus allocate the maximum. */
6019     len = STRLEN(compflags) + 1;
6020     if (spin->si_compflags != NULL)
6021 	len += STRLEN(spin->si_compflags) + 1;
6022     p = getroom(spin, len, FALSE);
6023     if (p == NULL)
6024 	return;
6025     if (spin->si_compflags != NULL)
6026     {
6027 	STRCPY(p, spin->si_compflags);
6028 	STRCAT(p, "/");
6029     }
6030     spin->si_compflags = p;
6031     tp = p + STRLEN(p);
6032 
6033     for (p = compflags; *p != NUL; )
6034     {
6035 	if (vim_strchr((char_u *)"/*+[]", *p) != NULL)
6036 	    /* Copy non-flag characters directly. */
6037 	    *tp++ = *p++;
6038 	else
6039 	{
6040 	    /* First get the flag number, also checks validity. */
6041 	    prevp = p;
6042 	    flag = get_affitem(aff->af_flagtype, &p);
6043 	    if (flag != 0)
6044 	    {
6045 		/* Find the flag in the hashtable.  If it was used before, use
6046 		 * the existing ID.  Otherwise add a new entry. */
6047 		vim_strncpy(key, prevp, p - prevp);
6048 		hi = hash_find(&aff->af_comp, key);
6049 		if (!HASHITEM_EMPTY(hi))
6050 		    id = HI2CI(hi)->ci_newID;
6051 		else
6052 		{
6053 		    ci = (compitem_T *)getroom(spin, sizeof(compitem_T), TRUE);
6054 		    if (ci == NULL)
6055 			break;
6056 		    STRCPY(ci->ci_key, key);
6057 		    ci->ci_flag = flag;
6058 		    /* Avoid using a flag ID that has a special meaning in a
6059 		     * regexp (also inside []). */
6060 		    do
6061 		    {
6062 			check_renumber(spin);
6063 			id = spin->si_newcompID--;
6064 		    } while (vim_strchr((char_u *)"/+*[]\\-^", id) != NULL);
6065 		    ci->ci_newID = id;
6066 		    hash_add(&aff->af_comp, ci->ci_key);
6067 		}
6068 		*tp++ = id;
6069 	    }
6070 	    if (aff->af_flagtype == AFT_NUM && *p == ',')
6071 		++p;
6072 	}
6073     }
6074 
6075     *tp = NUL;
6076 }
6077 
6078 /*
6079  * Check that the new IDs for postponed affixes and compounding don't overrun
6080  * each other.  We have almost 255 available, but start at 0-127 to avoid
6081  * using two bytes for utf-8.  When the 0-127 range is used up go to 128-255.
6082  * When that is used up an error message is given.
6083  */
6084     static void
6085 check_renumber(spin)
6086     spellinfo_T	*spin;
6087 {
6088     if (spin->si_newprefID == spin->si_newcompID && spin->si_newcompID < 128)
6089     {
6090 	spin->si_newprefID = 127;
6091 	spin->si_newcompID = 255;
6092     }
6093 }
6094 
6095 /*
6096  * Return TRUE if flag "flag" appears in affix list "afflist".
6097  */
6098     static int
6099 flag_in_afflist(flagtype, afflist, flag)
6100     int		flagtype;
6101     char_u	*afflist;
6102     unsigned	flag;
6103 {
6104     char_u	*p;
6105     unsigned	n;
6106 
6107     switch (flagtype)
6108     {
6109 	case AFT_CHAR:
6110 	    return vim_strchr(afflist, flag) != NULL;
6111 
6112 	case AFT_CAPLONG:
6113 	case AFT_LONG:
6114 	    for (p = afflist; *p != NUL; )
6115 	    {
6116 #ifdef FEAT_MBYTE
6117 		n = mb_ptr2char_adv(&p);
6118 #else
6119 		n = *p++;
6120 #endif
6121 		if ((flagtype == AFT_LONG || (n >= 'A' && n <= 'Z'))
6122 								 && *p != NUL)
6123 #ifdef FEAT_MBYTE
6124 		    n = mb_ptr2char_adv(&p) + (n << 16);
6125 #else
6126 		    n = *p++ + (n << 16);
6127 #endif
6128 		if (n == flag)
6129 		    return TRUE;
6130 	    }
6131 	    break;
6132 
6133 	case AFT_NUM:
6134 	    for (p = afflist; *p != NUL; )
6135 	    {
6136 		n = getdigits(&p);
6137 		if (n == flag)
6138 		    return TRUE;
6139 		if (*p != NUL)	/* skip over comma */
6140 		    ++p;
6141 	    }
6142 	    break;
6143     }
6144     return FALSE;
6145 }
6146 
6147 /*
6148  * Give a warning when "spinval" and "affval" numbers are set and not the same.
6149  */
6150     static void
6151 aff_check_number(spinval, affval, name)
6152     int	    spinval;
6153     int	    affval;
6154     char    *name;
6155 {
6156     if (spinval != 0 && spinval != affval)
6157 	smsg((char_u *)_("%s value differs from what is used in another .aff file"), name);
6158 }
6159 
6160 /*
6161  * Give a warning when "spinval" and "affval" strings are set and not the same.
6162  */
6163     static void
6164 aff_check_string(spinval, affval, name)
6165     char_u	*spinval;
6166     char_u	*affval;
6167     char	*name;
6168 {
6169     if (spinval != NULL && STRCMP(spinval, affval) != 0)
6170 	smsg((char_u *)_("%s value differs from what is used in another .aff file"), name);
6171 }
6172 
6173 /*
6174  * Return TRUE if strings "s1" and "s2" are equal.  Also consider both being
6175  * NULL as equal.
6176  */
6177     static int
6178 str_equal(s1, s2)
6179     char_u	*s1;
6180     char_u	*s2;
6181 {
6182     if (s1 == NULL || s2 == NULL)
6183 	return s1 == s2;
6184     return STRCMP(s1, s2) == 0;
6185 }
6186 
6187 /*
6188  * Add a from-to item to "gap".  Used for REP and SAL items.
6189  * They are stored case-folded.
6190  */
6191     static void
6192 add_fromto(spin, gap, from, to)
6193     spellinfo_T	*spin;
6194     garray_T	*gap;
6195     char_u	*from;
6196     char_u	*to;
6197 {
6198     fromto_T	*ftp;
6199     char_u	word[MAXWLEN];
6200 
6201     if (ga_grow(gap, 1) == OK)
6202     {
6203 	ftp = ((fromto_T *)gap->ga_data) + gap->ga_len;
6204 	(void)spell_casefold(from, STRLEN(from), word, MAXWLEN);
6205 	ftp->ft_from = getroom_save(spin, word);
6206 	(void)spell_casefold(to, STRLEN(to), word, MAXWLEN);
6207 	ftp->ft_to = getroom_save(spin, word);
6208 	++gap->ga_len;
6209     }
6210 }
6211 
6212 /*
6213  * Convert a boolean argument in a SAL line to TRUE or FALSE;
6214  */
6215     static int
6216 sal_to_bool(s)
6217     char_u	*s;
6218 {
6219     return STRCMP(s, "1") == 0 || STRCMP(s, "true") == 0;
6220 }
6221 
6222 /*
6223  * Return TRUE if string "s" contains a non-ASCII character (128 or higher).
6224  * When "s" is NULL FALSE is returned.
6225  */
6226     static int
6227 has_non_ascii(s)
6228     char_u	*s;
6229 {
6230     char_u	*p;
6231 
6232     if (s != NULL)
6233 	for (p = s; *p != NUL; ++p)
6234 	    if (*p >= 128)
6235 		return TRUE;
6236     return FALSE;
6237 }
6238 
6239 /*
6240  * Free the structure filled by spell_read_aff().
6241  */
6242     static void
6243 spell_free_aff(aff)
6244     afffile_T	*aff;
6245 {
6246     hashtab_T	*ht;
6247     hashitem_T	*hi;
6248     int		todo;
6249     affheader_T	*ah;
6250     affentry_T	*ae;
6251 
6252     vim_free(aff->af_enc);
6253 
6254     /* All this trouble to free the "ae_prog" items... */
6255     for (ht = &aff->af_pref; ; ht = &aff->af_suff)
6256     {
6257 	todo = ht->ht_used;
6258 	for (hi = ht->ht_array; todo > 0; ++hi)
6259 	{
6260 	    if (!HASHITEM_EMPTY(hi))
6261 	    {
6262 		--todo;
6263 		ah = HI2AH(hi);
6264 		for (ae = ah->ah_first; ae != NULL; ae = ae->ae_next)
6265 		    vim_free(ae->ae_prog);
6266 	    }
6267 	}
6268 	if (ht == &aff->af_suff)
6269 	    break;
6270     }
6271 
6272     hash_clear(&aff->af_pref);
6273     hash_clear(&aff->af_suff);
6274     hash_clear(&aff->af_comp);
6275 }
6276 
6277 /*
6278  * Read dictionary file "fname".
6279  * Returns OK or FAIL;
6280  */
6281     static int
6282 spell_read_dic(spin, fname, affile)
6283     spellinfo_T	*spin;
6284     char_u	*fname;
6285     afffile_T	*affile;
6286 {
6287     hashtab_T	ht;
6288     char_u	line[MAXLINELEN];
6289     char_u	*p;
6290     char_u	*afflist;
6291     char_u	store_afflist[MAXWLEN];
6292     int		pfxlen;
6293     int		need_affix;
6294     char_u	*dw;
6295     char_u	*pc;
6296     char_u	*w;
6297     int		l;
6298     hash_T	hash;
6299     hashitem_T	*hi;
6300     FILE	*fd;
6301     int		lnum = 1;
6302     int		non_ascii = 0;
6303     int		retval = OK;
6304     char_u	message[MAXLINELEN + MAXWLEN];
6305     int		flags;
6306     int		duplicate = 0;
6307 
6308     /*
6309      * Open the file.
6310      */
6311     fd = mch_fopen((char *)fname, "r");
6312     if (fd == NULL)
6313     {
6314 	EMSG2(_(e_notopen), fname);
6315 	return FAIL;
6316     }
6317 
6318     /* The hashtable is only used to detect duplicated words. */
6319     hash_init(&ht);
6320 
6321     vim_snprintf((char *)IObuff, IOSIZE,
6322 				  _("Reading dictionary file %s ..."), fname);
6323     spell_message(spin, IObuff);
6324 
6325     /* start with a message for the first line */
6326     spin->si_msg_count = 999999;
6327 
6328     /* Read and ignore the first line: word count. */
6329     (void)vim_fgets(line, MAXLINELEN, fd);
6330     if (!vim_isdigit(*skipwhite(line)))
6331 	EMSG2(_("E760: No word count in %s"), fname);
6332 
6333     /*
6334      * Read all the lines in the file one by one.
6335      * The words are converted to 'encoding' here, before being added to
6336      * the hashtable.
6337      */
6338     while (!vim_fgets(line, MAXLINELEN, fd) && !got_int)
6339     {
6340 	line_breakcheck();
6341 	++lnum;
6342 	if (line[0] == '#' || line[0] == '/')
6343 	    continue;	/* comment line */
6344 
6345 	/* Remove CR, LF and white space from the end.  White space halfway
6346 	 * the word is kept to allow e.g., "et al.". */
6347 	l = STRLEN(line);
6348 	while (l > 0 && line[l - 1] <= ' ')
6349 	    --l;
6350 	if (l == 0)
6351 	    continue;	/* empty line */
6352 	line[l] = NUL;
6353 
6354 	/* Truncate the word at the "/", set "afflist" to what follows.
6355 	 * Replace "\/" by "/" and "\\" by "\". */
6356 	afflist = NULL;
6357 	for (p = line; *p != NUL; mb_ptr_adv(p))
6358 	{
6359 	    if (*p == '\\' && (p[1] == '\\' || p[1] == '/'))
6360 		mch_memmove(p, p + 1, STRLEN(p));
6361 	    else if (*p == '/')
6362 	    {
6363 		*p = NUL;
6364 		afflist = p + 1;
6365 		break;
6366 	    }
6367 	}
6368 
6369 	/* Skip non-ASCII words when "spin->si_ascii" is TRUE. */
6370 	if (spin->si_ascii && has_non_ascii(line))
6371 	{
6372 	    ++non_ascii;
6373 	    continue;
6374 	}
6375 
6376 #ifdef FEAT_MBYTE
6377 	/* Convert from "SET" to 'encoding' when needed. */
6378 	if (spin->si_conv.vc_type != CONV_NONE)
6379 	{
6380 	    pc = string_convert(&spin->si_conv, line, NULL);
6381 	    if (pc == NULL)
6382 	    {
6383 		smsg((char_u *)_("Conversion failure for word in %s line %d: %s"),
6384 						       fname, lnum, line);
6385 		continue;
6386 	    }
6387 	    w = pc;
6388 	}
6389 	else
6390 #endif
6391 	{
6392 	    pc = NULL;
6393 	    w = line;
6394 	}
6395 
6396 	/* This takes time, print a message every 10000 words. */
6397 	if (spin->si_verbose && spin->si_msg_count > 10000)
6398 	{
6399 	    spin->si_msg_count = 0;
6400 	    vim_snprintf((char *)message, sizeof(message),
6401 		    _("line %6d, word %6d - %s"),
6402 		       lnum, spin->si_foldwcount + spin->si_keepwcount, w);
6403 	    msg_start();
6404 	    msg_puts_long_attr(message, 0);
6405 	    msg_clr_eos();
6406 	    msg_didout = FALSE;
6407 	    msg_col = 0;
6408 	    out_flush();
6409 	}
6410 
6411 	/* Store the word in the hashtable to be able to find duplicates. */
6412 	dw = (char_u *)getroom_save(spin, w);
6413 	if (dw == NULL)
6414 	    retval = FAIL;
6415 	vim_free(pc);
6416 	if (retval == FAIL)
6417 	    break;
6418 
6419 	hash = hash_hash(dw);
6420 	hi = hash_lookup(&ht, dw, hash);
6421 	if (!HASHITEM_EMPTY(hi))
6422 	{
6423 	    if (p_verbose > 0)
6424 		smsg((char_u *)_("Duplicate word in %s line %d: %s"),
6425 							     fname, lnum, dw);
6426 	    else if (duplicate == 0)
6427 		smsg((char_u *)_("First duplicate word in %s line %d: %s"),
6428 							     fname, lnum, dw);
6429 	    ++duplicate;
6430 	}
6431 	else
6432 	    hash_add_item(&ht, hi, dw, hash);
6433 
6434 	flags = 0;
6435 	store_afflist[0] = NUL;
6436 	pfxlen = 0;
6437 	need_affix = FALSE;
6438 	if (afflist != NULL)
6439 	{
6440 	    /* Check for affix name that stands for keep-case word and stands
6441 	     * for rare word (if defined). */
6442 	    if (affile->af_keepcase != 0 && flag_in_afflist(
6443 			   affile->af_flagtype, afflist, affile->af_keepcase))
6444 		flags |= WF_KEEPCAP | WF_FIXCAP;
6445 	    if (affile->af_rare != 0 && flag_in_afflist(
6446 				affile->af_flagtype, afflist, affile->af_rare))
6447 		flags |= WF_RARE;
6448 	    if (affile->af_bad != 0 && flag_in_afflist(
6449 				affile->af_flagtype, afflist, affile->af_bad))
6450 		flags |= WF_BANNED;
6451 	    if (affile->af_needaffix != 0 && flag_in_afflist(
6452 			  affile->af_flagtype, afflist, affile->af_needaffix))
6453 		need_affix = TRUE;
6454 	    if (affile->af_needcomp != 0 && flag_in_afflist(
6455 			   affile->af_flagtype, afflist, affile->af_needcomp))
6456 		flags |= WF_NEEDCOMP;
6457 	    if (affile->af_comproot != 0 && flag_in_afflist(
6458 			   affile->af_flagtype, afflist, affile->af_comproot))
6459 		flags |= WF_COMPROOT;
6460 	    if (affile->af_nosuggest != 0 && flag_in_afflist(
6461 			   affile->af_flagtype, afflist, affile->af_nosuggest))
6462 		flags |= WF_NOSUGGEST;
6463 
6464 	    if (affile->af_pfxpostpone)
6465 		/* Need to store the list of prefix IDs with the word. */
6466 		pfxlen = get_pfxlist(affile, afflist, store_afflist);
6467 
6468 	    if (spin->si_compflags != NULL)
6469 		/* Need to store the list of compound flags with the word.
6470 		 * Concatenate them to the list of prefix IDs. */
6471 		get_compflags(affile, afflist, store_afflist + pfxlen);
6472 	}
6473 
6474 	/* Add the word to the word tree(s). */
6475 	if (store_word(spin, dw, flags, spin->si_region,
6476 					   store_afflist, need_affix) == FAIL)
6477 	    retval = FAIL;
6478 
6479 	if (afflist != NULL)
6480 	{
6481 	    /* Find all matching suffixes and add the resulting words.
6482 	     * Additionally do matching prefixes that combine. */
6483 	    if (store_aff_word(spin, dw, afflist, affile,
6484 			   &affile->af_suff, &affile->af_pref,
6485 				 FALSE, flags, store_afflist, pfxlen) == FAIL)
6486 		retval = FAIL;
6487 
6488 	    /* Find all matching prefixes and add the resulting words. */
6489 	    if (store_aff_word(spin, dw, afflist, affile,
6490 			  &affile->af_pref, NULL,
6491 				 FALSE, flags, store_afflist, pfxlen) == FAIL)
6492 		retval = FAIL;
6493 	}
6494     }
6495 
6496     if (duplicate > 0)
6497 	smsg((char_u *)_("%d duplicate word(s) in %s"), duplicate, fname);
6498     if (spin->si_ascii && non_ascii > 0)
6499 	smsg((char_u *)_("Ignored %d word(s) with non-ASCII characters in %s"),
6500 							    non_ascii, fname);
6501     hash_clear(&ht);
6502 
6503     fclose(fd);
6504     return retval;
6505 }
6506 
6507 /*
6508  * Get the list of prefix IDs from the affix list "afflist".
6509  * Used for PFXPOSTPONE.
6510  * Put the resulting flags in "store_afflist[MAXWLEN]" with a terminating NUL
6511  * and return the number of affixes.
6512  */
6513     static int
6514 get_pfxlist(affile, afflist, store_afflist)
6515     afffile_T	*affile;
6516     char_u	*afflist;
6517     char_u	*store_afflist;
6518 {
6519     char_u	*p;
6520     char_u	*prevp;
6521     int		cnt = 0;
6522     int		id;
6523     char_u	key[AH_KEY_LEN];
6524     hashitem_T	*hi;
6525 
6526     for (p = afflist; *p != NUL; )
6527     {
6528 	prevp = p;
6529 	if (get_affitem(affile->af_flagtype, &p) != 0)
6530 	{
6531 	    /* A flag is a postponed prefix flag if it appears in "af_pref"
6532 	     * and it's ID is not zero. */
6533 	    vim_strncpy(key, prevp, p - prevp);
6534 	    hi = hash_find(&affile->af_pref, key);
6535 	    if (!HASHITEM_EMPTY(hi))
6536 	    {
6537 		id = HI2AH(hi)->ah_newID;
6538 		if (id != 0)
6539 		    store_afflist[cnt++] = id;
6540 	    }
6541 	}
6542 	if (affile->af_flagtype == AFT_NUM && *p == ',')
6543 	    ++p;
6544     }
6545 
6546     store_afflist[cnt] = NUL;
6547     return cnt;
6548 }
6549 
6550 /*
6551  * Get the list of compound IDs from the affix list "afflist" that are used
6552  * for compound words.
6553  * Puts the flags in "store_afflist[]".
6554  */
6555     static void
6556 get_compflags(affile, afflist, store_afflist)
6557     afffile_T	*affile;
6558     char_u	*afflist;
6559     char_u	*store_afflist;
6560 {
6561     char_u	*p;
6562     char_u	*prevp;
6563     int		cnt = 0;
6564     char_u	key[AH_KEY_LEN];
6565     hashitem_T	*hi;
6566 
6567     for (p = afflist; *p != NUL; )
6568     {
6569 	prevp = p;
6570 	if (get_affitem(affile->af_flagtype, &p) != 0)
6571 	{
6572 	    /* A flag is a compound flag if it appears in "af_comp". */
6573 	    vim_strncpy(key, prevp, p - prevp);
6574 	    hi = hash_find(&affile->af_comp, key);
6575 	    if (!HASHITEM_EMPTY(hi))
6576 		store_afflist[cnt++] = HI2CI(hi)->ci_newID;
6577 	}
6578 	if (affile->af_flagtype == AFT_NUM && *p == ',')
6579 	    ++p;
6580     }
6581 
6582     store_afflist[cnt] = NUL;
6583 }
6584 
6585 /*
6586  * Apply affixes to a word and store the resulting words.
6587  * "ht" is the hashtable with affentry_T that need to be applied, either
6588  * prefixes or suffixes.
6589  * "xht", when not NULL, is the prefix hashtable, to be used additionally on
6590  * the resulting words for combining affixes.
6591  *
6592  * Returns FAIL when out of memory.
6593  */
6594     static int
6595 store_aff_word(spin, word, afflist, affile, ht, xht, comb, flags,
6596 							      pfxlist, pfxlen)
6597     spellinfo_T	*spin;		/* spell info */
6598     char_u	*word;		/* basic word start */
6599     char_u	*afflist;	/* list of names of supported affixes */
6600     afffile_T	*affile;
6601     hashtab_T	*ht;
6602     hashtab_T	*xht;
6603     int		comb;		/* only use affixes that combine */
6604     int		flags;		/* flags for the word */
6605     char_u	*pfxlist;	/* list of prefix IDs */
6606     int		pfxlen;		/* nr of flags in "pfxlist" for prefixes, rest
6607 				 * is compound flags */
6608 {
6609     int		todo;
6610     hashitem_T	*hi;
6611     affheader_T	*ah;
6612     affentry_T	*ae;
6613     regmatch_T	regmatch;
6614     char_u	newword[MAXWLEN];
6615     int		retval = OK;
6616     int		i;
6617     char_u	*p;
6618     int		use_flags;
6619     char_u	*use_pfxlist;
6620     char_u	pfx_pfxlist[MAXWLEN];
6621     size_t	wordlen = STRLEN(word);
6622 
6623     todo = ht->ht_used;
6624     for (hi = ht->ht_array; todo > 0 && retval == OK; ++hi)
6625     {
6626 	if (!HASHITEM_EMPTY(hi))
6627 	{
6628 	    --todo;
6629 	    ah = HI2AH(hi);
6630 
6631 	    /* Check that the affix combines, if required, and that the word
6632 	     * supports this affix. */
6633 	    if ((!comb || ah->ah_combine) && flag_in_afflist(
6634 				   affile->af_flagtype, afflist, ah->ah_flag))
6635 	    {
6636 		/* Loop over all affix entries with this name. */
6637 		for (ae = ah->ah_first; ae != NULL; ae = ae->ae_next)
6638 		{
6639 		    /* Check the condition.  It's not logical to match case
6640 		     * here, but it is required for compatibility with
6641 		     * Myspell.
6642 		     * Another requirement from Myspell is that the chop
6643 		     * string is shorter than the word itself.
6644 		     * For prefixes, when "PFXPOSTPONE" was used, only do
6645 		     * prefixes with a chop string and/or flags. */
6646 		    regmatch.regprog = ae->ae_prog;
6647 		    regmatch.rm_ic = FALSE;
6648 		    if ((xht != NULL || !affile->af_pfxpostpone
6649 				|| ae->ae_chop != NULL
6650 				|| ae->ae_flags != NULL)
6651 			    && (ae->ae_chop == NULL
6652 				|| STRLEN(ae->ae_chop) < wordlen)
6653 			    && (ae->ae_prog == NULL
6654 				|| vim_regexec(&regmatch, word, (colnr_T)0)))
6655 		    {
6656 			/* Match.  Remove the chop and add the affix. */
6657 			if (xht == NULL)
6658 			{
6659 			    /* prefix: chop/add at the start of the word */
6660 			    if (ae->ae_add == NULL)
6661 				*newword = NUL;
6662 			    else
6663 				STRCPY(newword, ae->ae_add);
6664 			    p = word;
6665 			    if (ae->ae_chop != NULL)
6666 			    {
6667 				/* Skip chop string. */
6668 #ifdef FEAT_MBYTE
6669 				if (has_mbyte)
6670 				{
6671 				    i = mb_charlen(ae->ae_chop);
6672 				    for ( ; i > 0; --i)
6673 					mb_ptr_adv(p);
6674 				}
6675 				else
6676 #endif
6677 				    p += STRLEN(ae->ae_chop);
6678 			    }
6679 			    STRCAT(newword, p);
6680 			}
6681 			else
6682 			{
6683 			    /* suffix: chop/add at the end of the word */
6684 			    STRCPY(newword, word);
6685 			    if (ae->ae_chop != NULL)
6686 			    {
6687 				/* Remove chop string. */
6688 				p = newword + STRLEN(newword);
6689 				i = MB_CHARLEN(ae->ae_chop);
6690 				for ( ; i > 0; --i)
6691 				    mb_ptr_back(newword, p);
6692 				*p = NUL;
6693 			    }
6694 			    if (ae->ae_add != NULL)
6695 				STRCAT(newword, ae->ae_add);
6696 			}
6697 
6698 			/* Obey the "rare" flag of the affix. */
6699 			if (affile->af_rare != 0
6700 				&& ae->ae_flags != NULL
6701 				&& flag_in_afflist(
6702 				    affile->af_flagtype, ae->ae_flags,
6703 							     affile->af_rare))
6704 			    use_flags = flags | WF_RARE;
6705 			else
6706 			    use_flags = flags;
6707 
6708 			/* Obey a "COMPOUNDFORBID" flag of the affix: don't
6709 			 * use the compound flags. */
6710 			use_pfxlist = pfxlist;
6711 			if (pfxlist != NULL
6712 				&& affile->af_compforbid != 0
6713 				&& ae->ae_flags != NULL
6714 				&& flag_in_afflist(
6715 				    affile->af_flagtype, ae->ae_flags,
6716 						       affile->af_compforbid))
6717 			{
6718 			    vim_strncpy(pfx_pfxlist, pfxlist, pfxlen);
6719 			    use_pfxlist = pfx_pfxlist;
6720 			}
6721 
6722 			/* When there are postponed prefixes... */
6723 			if (spin->si_prefroot != NULL
6724 				&& spin->si_prefroot->wn_sibling != NULL)
6725 			{
6726 			    /* ... add a flag to indicate an affix was used. */
6727 			    use_flags |= WF_HAS_AFF;
6728 
6729 			    /* ... don't use a prefix list if combining
6730 			     * affixes is not allowed.  But do use the
6731 			     * compound flags after them. */
6732 			    if ((!ah->ah_combine || comb) && pfxlist != NULL)
6733 				use_pfxlist += pfxlen;
6734 			}
6735 
6736 			/* Store the modified word. */
6737 			if (store_word(spin, newword, use_flags,
6738 				 spin->si_region, use_pfxlist, FALSE) == FAIL)
6739 			    retval = FAIL;
6740 
6741 			/* When added a suffix and combining is allowed also
6742 			 * try adding prefixes additionally. */
6743 			if (xht != NULL && ah->ah_combine)
6744 			    if (store_aff_word(spin, newword, afflist, affile,
6745 					  xht, NULL, TRUE,
6746 				      use_flags, use_pfxlist, pfxlen) == FAIL)
6747 				retval = FAIL;
6748 		    }
6749 		}
6750 	    }
6751 	}
6752     }
6753 
6754     return retval;
6755 }
6756 
6757 /*
6758  * Read a file with a list of words.
6759  */
6760     static int
6761 spell_read_wordfile(spin, fname)
6762     spellinfo_T	*spin;
6763     char_u	*fname;
6764 {
6765     FILE	*fd;
6766     long	lnum = 0;
6767     char_u	rline[MAXLINELEN];
6768     char_u	*line;
6769     char_u	*pc = NULL;
6770     char_u	*p;
6771     int		l;
6772     int		retval = OK;
6773     int		did_word = FALSE;
6774     int		non_ascii = 0;
6775     int		flags;
6776     int		regionmask;
6777 
6778     /*
6779      * Open the file.
6780      */
6781     fd = mch_fopen((char *)fname, "r");
6782     if (fd == NULL)
6783     {
6784 	EMSG2(_(e_notopen), fname);
6785 	return FAIL;
6786     }
6787 
6788     vim_snprintf((char *)IObuff, IOSIZE, _("Reading word file %s ..."), fname);
6789     spell_message(spin, IObuff);
6790 
6791     /*
6792      * Read all the lines in the file one by one.
6793      */
6794     while (!vim_fgets(rline, MAXLINELEN, fd) && !got_int)
6795     {
6796 	line_breakcheck();
6797 	++lnum;
6798 
6799 	/* Skip comment lines. */
6800 	if (*rline == '#')
6801 	    continue;
6802 
6803 	/* Remove CR, LF and white space from the end. */
6804 	l = STRLEN(rline);
6805 	while (l > 0 && rline[l - 1] <= ' ')
6806 	    --l;
6807 	if (l == 0)
6808 	    continue;	/* empty or blank line */
6809 	rline[l] = NUL;
6810 
6811 	/* Convert from "=encoding={encoding}" to 'encoding' when needed. */
6812 	vim_free(pc);
6813 #ifdef FEAT_MBYTE
6814 	if (spin->si_conv.vc_type != CONV_NONE)
6815 	{
6816 	    pc = string_convert(&spin->si_conv, rline, NULL);
6817 	    if (pc == NULL)
6818 	    {
6819 		smsg((char_u *)_("Conversion failure for word in %s line %d: %s"),
6820 							   fname, lnum, rline);
6821 		continue;
6822 	    }
6823 	    line = pc;
6824 	}
6825 	else
6826 #endif
6827 	{
6828 	    pc = NULL;
6829 	    line = rline;
6830 	}
6831 
6832 	if (*line == '/')
6833 	{
6834 	    ++line;
6835 	    if (STRNCMP(line, "encoding=", 9) == 0)
6836 	    {
6837 		if (spin->si_conv.vc_type != CONV_NONE)
6838 		    smsg((char_u *)_("Duplicate /encoding= line ignored in %s line %d: %s"),
6839 						       fname, lnum, line - 1);
6840 		else if (did_word)
6841 		    smsg((char_u *)_("/encoding= line after word ignored in %s line %d: %s"),
6842 						       fname, lnum, line - 1);
6843 		else
6844 		{
6845 #ifdef FEAT_MBYTE
6846 		    char_u	*enc;
6847 
6848 		    /* Setup for conversion to 'encoding'. */
6849 		    line += 10;
6850 		    enc = enc_canonize(line);
6851 		    if (enc != NULL && !spin->si_ascii
6852 			    && convert_setup(&spin->si_conv, enc,
6853 							       p_enc) == FAIL)
6854 			smsg((char_u *)_("Conversion in %s not supported: from %s to %s"),
6855 							  fname, line, p_enc);
6856 		    vim_free(enc);
6857 		    spin->si_conv.vc_fail = TRUE;
6858 #else
6859 		    smsg((char_u *)_("Conversion in %s not supported"), fname);
6860 #endif
6861 		}
6862 		continue;
6863 	    }
6864 
6865 	    if (STRNCMP(line, "regions=", 8) == 0)
6866 	    {
6867 		if (spin->si_region_count > 1)
6868 		    smsg((char_u *)_("Duplicate /regions= line ignored in %s line %d: %s"),
6869 						       fname, lnum, line);
6870 		else
6871 		{
6872 		    line += 8;
6873 		    if (STRLEN(line) > 16)
6874 			smsg((char_u *)_("Too many regions in %s line %d: %s"),
6875 						       fname, lnum, line);
6876 		    else
6877 		    {
6878 			spin->si_region_count = STRLEN(line) / 2;
6879 			STRCPY(spin->si_region_name, line);
6880 
6881 			/* Adjust the mask for a word valid in all regions. */
6882 			spin->si_region = (1 << spin->si_region_count) - 1;
6883 		    }
6884 		}
6885 		continue;
6886 	    }
6887 
6888 	    smsg((char_u *)_("/ line ignored in %s line %d: %s"),
6889 						       fname, lnum, line - 1);
6890 	    continue;
6891 	}
6892 
6893 	flags = 0;
6894 	regionmask = spin->si_region;
6895 
6896 	/* Check for flags and region after a slash. */
6897 	p = vim_strchr(line, '/');
6898 	if (p != NULL)
6899 	{
6900 	    *p++ = NUL;
6901 	    while (*p != NUL)
6902 	    {
6903 		if (*p == '=')		/* keep-case word */
6904 		    flags |= WF_KEEPCAP | WF_FIXCAP;
6905 		else if (*p == '!')	/* Bad, bad, wicked word. */
6906 		    flags |= WF_BANNED;
6907 		else if (*p == '?')	/* Rare word. */
6908 		    flags |= WF_RARE;
6909 		else if (VIM_ISDIGIT(*p)) /* region number(s) */
6910 		{
6911 		    if ((flags & WF_REGION) == 0)   /* first one */
6912 			regionmask = 0;
6913 		    flags |= WF_REGION;
6914 
6915 		    l = *p - '0';
6916 		    if (l > spin->si_region_count)
6917 		    {
6918 			smsg((char_u *)_("Invalid region nr in %s line %d: %s"),
6919 							  fname, lnum, p);
6920 			break;
6921 		    }
6922 		    regionmask |= 1 << (l - 1);
6923 		}
6924 		else
6925 		{
6926 		    smsg((char_u *)_("Unrecognized flags in %s line %d: %s"),
6927 							      fname, lnum, p);
6928 		    break;
6929 		}
6930 		++p;
6931 	    }
6932 	}
6933 
6934 	/* Skip non-ASCII words when "spin->si_ascii" is TRUE. */
6935 	if (spin->si_ascii && has_non_ascii(line))
6936 	{
6937 	    ++non_ascii;
6938 	    continue;
6939 	}
6940 
6941 	/* Normal word: store it. */
6942 	if (store_word(spin, line, flags, regionmask, NULL, FALSE) == FAIL)
6943 	{
6944 	    retval = FAIL;
6945 	    break;
6946 	}
6947 	did_word = TRUE;
6948     }
6949 
6950     vim_free(pc);
6951     fclose(fd);
6952 
6953     if (spin->si_ascii && non_ascii > 0)
6954     {
6955 	vim_snprintf((char *)IObuff, IOSIZE,
6956 		  _("Ignored %d words with non-ASCII characters"), non_ascii);
6957 	spell_message(spin, IObuff);
6958     }
6959 
6960     return retval;
6961 }
6962 
6963 /*
6964  * Get part of an sblock_T, "len" bytes long.
6965  * This avoids calling free() for every little struct we use (and keeping
6966  * track of them).
6967  * The memory is cleared to all zeros.
6968  * Returns NULL when out of memory.
6969  */
6970     static void *
6971 getroom(spin, len, align)
6972     spellinfo_T *spin;
6973     size_t	len;		/* length needed */
6974     int		align;		/* align for pointer */
6975 {
6976     char_u	*p;
6977     sblock_T	*bl = spin->si_blocks;
6978 
6979     if (align && bl != NULL)
6980 	/* Round size up for alignment.  On some systems structures need to be
6981 	 * aligned to the size of a pointer (e.g., SPARC). */
6982 	bl->sb_used = (bl->sb_used + sizeof(char *) - 1)
6983 						      & ~(sizeof(char *) - 1);
6984 
6985     if (bl == NULL || bl->sb_used + len > SBLOCKSIZE)
6986     {
6987 	/* Allocate a block of memory. This is not freed until much later. */
6988 	bl = (sblock_T *)alloc_clear((unsigned)(sizeof(sblock_T) + SBLOCKSIZE));
6989 	if (bl == NULL)
6990 	    return NULL;
6991 	bl->sb_next = spin->si_blocks;
6992 	spin->si_blocks = bl;
6993 	bl->sb_used = 0;
6994 	++spin->si_blocks_cnt;
6995     }
6996 
6997     p = bl->sb_data + bl->sb_used;
6998     bl->sb_used += len;
6999 
7000     return p;
7001 }
7002 
7003 /*
7004  * Make a copy of a string into memory allocated with getroom().
7005  */
7006     static char_u *
7007 getroom_save(spin, s)
7008     spellinfo_T	*spin;
7009     char_u	*s;
7010 {
7011     char_u	*sc;
7012 
7013     sc = (char_u *)getroom(spin, STRLEN(s) + 1, FALSE);
7014     if (sc != NULL)
7015 	STRCPY(sc, s);
7016     return sc;
7017 }
7018 
7019 
7020 /*
7021  * Free the list of allocated sblock_T.
7022  */
7023     static void
7024 free_blocks(bl)
7025     sblock_T	*bl;
7026 {
7027     sblock_T	*next;
7028 
7029     while (bl != NULL)
7030     {
7031 	next = bl->sb_next;
7032 	vim_free(bl);
7033 	bl = next;
7034     }
7035 }
7036 
7037 /*
7038  * Allocate the root of a word tree.
7039  */
7040     static wordnode_T *
7041 wordtree_alloc(spin)
7042     spellinfo_T *spin;
7043 {
7044     return (wordnode_T *)getroom(spin, sizeof(wordnode_T), TRUE);
7045 }
7046 
7047 /*
7048  * Store a word in the tree(s).
7049  * Always store it in the case-folded tree.  For a keep-case word this is
7050  * useful when the word can also be used with all caps (no WF_FIXCAP flag) and
7051  * used to find suggestions.
7052  * For a keep-case word also store it in the keep-case tree.
7053  * When "pfxlist" is not NULL store the word for each postponed prefix ID and
7054  * compound flag.
7055  */
7056     static int
7057 store_word(spin, word, flags, region, pfxlist, need_affix)
7058     spellinfo_T	*spin;
7059     char_u	*word;
7060     int		flags;		/* extra flags, WF_BANNED */
7061     int		region;		/* supported region(s) */
7062     char_u	*pfxlist;	/* list of prefix IDs or NULL */
7063     int		need_affix;	/* only store word with affix ID */
7064 {
7065     int		len = STRLEN(word);
7066     int		ct = captype(word, word + len);
7067     char_u	foldword[MAXWLEN];
7068     int		res = OK;
7069     char_u	*p;
7070 
7071     (void)spell_casefold(word, len, foldword, MAXWLEN);
7072     for (p = pfxlist; res == OK; ++p)
7073     {
7074 	if (!need_affix || (p != NULL && *p != NUL))
7075 	    res = tree_add_word(spin, foldword, spin->si_foldroot, ct | flags,
7076 						  region, p == NULL ? 0 : *p);
7077 	if (p == NULL || *p == NUL)
7078 	    break;
7079     }
7080     ++spin->si_foldwcount;
7081 
7082     if (res == OK && (ct == WF_KEEPCAP || (flags & WF_KEEPCAP)))
7083     {
7084 	for (p = pfxlist; res == OK; ++p)
7085 	{
7086 	    if (!need_affix || (p != NULL && *p != NUL))
7087 		res = tree_add_word(spin, word, spin->si_keeproot, flags,
7088 						  region, p == NULL ? 0 : *p);
7089 	    if (p == NULL || *p == NUL)
7090 		break;
7091 	}
7092 	++spin->si_keepwcount;
7093     }
7094     return res;
7095 }
7096 
7097 /*
7098  * Add word "word" to a word tree at "root".
7099  * When "flags" < 0 we are adding to the prefix tree where "flags" is used for
7100  * "rare" and "region" is the condition nr.
7101  * Returns FAIL when out of memory.
7102  */
7103     static int
7104 tree_add_word(spin, word, root, flags, region, affixID)
7105     spellinfo_T	*spin;
7106     char_u	*word;
7107     wordnode_T	*root;
7108     int		flags;
7109     int		region;
7110     int		affixID;
7111 {
7112     wordnode_T	*node = root;
7113     wordnode_T	*np;
7114     wordnode_T	*copyp, **copyprev;
7115     wordnode_T	**prev = NULL;
7116     int		i;
7117 
7118     /* Add each byte of the word to the tree, including the NUL at the end. */
7119     for (i = 0; ; ++i)
7120     {
7121 	/* When there is more than one reference to this node we need to make
7122 	 * a copy, so that we can modify it.  Copy the whole list of siblings
7123 	 * (we don't optimize for a partly shared list of siblings). */
7124 	if (node != NULL && node->wn_refs > 1)
7125 	{
7126 	    --node->wn_refs;
7127 	    copyprev = prev;
7128 	    for (copyp = node; copyp != NULL; copyp = copyp->wn_sibling)
7129 	    {
7130 		/* Allocate a new node and copy the info. */
7131 		np = get_wordnode(spin);
7132 		if (np == NULL)
7133 		    return FAIL;
7134 		np->wn_child = copyp->wn_child;
7135 		if (np->wn_child != NULL)
7136 		    ++np->wn_child->wn_refs;	/* child gets extra ref */
7137 		np->wn_byte = copyp->wn_byte;
7138 		if (np->wn_byte == NUL)
7139 		{
7140 		    np->wn_flags = copyp->wn_flags;
7141 		    np->wn_region = copyp->wn_region;
7142 		    np->wn_affixID = copyp->wn_affixID;
7143 		}
7144 
7145 		/* Link the new node in the list, there will be one ref. */
7146 		np->wn_refs = 1;
7147 		*copyprev = np;
7148 		copyprev = &np->wn_sibling;
7149 
7150 		/* Let "node" point to the head of the copied list. */
7151 		if (copyp == node)
7152 		    node = np;
7153 	    }
7154 	}
7155 
7156 	/* Look for the sibling that has the same character.  They are sorted
7157 	 * on byte value, thus stop searching when a sibling is found with a
7158 	 * higher byte value.  For zero bytes (end of word) the sorting is
7159 	 * done on flags and then on affixID. */
7160 	while (node != NULL
7161 		&& (node->wn_byte < word[i]
7162 		    || (node->wn_byte == NUL
7163 			&& (flags < 0
7164 			    ? node->wn_affixID < (unsigned)affixID
7165 			    : (node->wn_flags < (unsigned)(flags & WN_MASK)
7166 				|| (node->wn_flags == (flags & WN_MASK)
7167 				    && (spin->si_sugtree
7168 					? (node->wn_region & 0xffff) < region
7169 					: node->wn_affixID
7170 						    < (unsigned)affixID)))))))
7171 	{
7172 	    prev = &node->wn_sibling;
7173 	    node = *prev;
7174 	}
7175 	if (node == NULL
7176 		|| node->wn_byte != word[i]
7177 		|| (word[i] == NUL
7178 		    && (flags < 0
7179 			|| spin->si_sugtree
7180 			|| node->wn_flags != (flags & WN_MASK)
7181 			|| node->wn_affixID != affixID)))
7182 	{
7183 	    /* Allocate a new node. */
7184 	    np = get_wordnode(spin);
7185 	    if (np == NULL)
7186 		return FAIL;
7187 	    np->wn_byte = word[i];
7188 
7189 	    /* If "node" is NULL this is a new child or the end of the sibling
7190 	     * list: ref count is one.  Otherwise use ref count of sibling and
7191 	     * make ref count of sibling one (matters when inserting in front
7192 	     * of the list of siblings). */
7193 	    if (node == NULL)
7194 		np->wn_refs = 1;
7195 	    else
7196 	    {
7197 		np->wn_refs = node->wn_refs;
7198 		node->wn_refs = 1;
7199 	    }
7200 	    *prev = np;
7201 	    np->wn_sibling = node;
7202 	    node = np;
7203 	}
7204 
7205 	if (word[i] == NUL)
7206 	{
7207 	    node->wn_flags = flags;
7208 	    node->wn_region |= region;
7209 	    node->wn_affixID = affixID;
7210 	    break;
7211 	}
7212 	prev = &node->wn_child;
7213 	node = *prev;
7214     }
7215 #ifdef SPELL_PRINTTREE
7216     smsg("Added \"%s\"", word);
7217     spell_print_tree(root->wn_sibling);
7218 #endif
7219 
7220     /* count nr of words added since last message */
7221     ++spin->si_msg_count;
7222 
7223     if (spin->si_compress_cnt > 1)
7224     {
7225 	if (--spin->si_compress_cnt == 1)
7226 	    /* Did enough words to lower the block count limit. */
7227 	    spin->si_blocks_cnt += compress_inc;
7228     }
7229 
7230     /*
7231      * When we have allocated lots of memory we need to compress the word tree
7232      * to free up some room.  But compression is slow, and we might actually
7233      * need that room, thus only compress in the following situations:
7234      * 1. When not compressed before (si_compress_cnt == 0): when using
7235      *    "compress_start" blocks.
7236      * 2. When compressed before and used "compress_inc" blocks before
7237      *    adding "compress_added" words (si_compress_cnt > 1).
7238      * 3. When compressed before, added "compress_added" words
7239      *    (si_compress_cnt == 1) and the number of free nodes drops below the
7240      *    maximum word length.
7241      */
7242 #ifndef SPELL_PRINTTREE
7243     if (spin->si_compress_cnt == 1
7244 	    ? spin->si_free_count < MAXWLEN
7245 	    : spin->si_blocks_cnt >= compress_start)
7246 #endif
7247     {
7248 	/* Decrement the block counter.  The effect is that we compress again
7249 	 * when the freed up room has been used and another "compress_inc"
7250 	 * blocks have been allocated.  Unless "compress_added" words have
7251 	 * been added, then the limit is put back again. */
7252 	spin->si_blocks_cnt -= compress_inc;
7253 	spin->si_compress_cnt = compress_added;
7254 
7255 	if (spin->si_verbose)
7256 	{
7257 	    msg_start();
7258 	    msg_puts((char_u *)_(msg_compressing));
7259 	    msg_clr_eos();
7260 	    msg_didout = FALSE;
7261 	    msg_col = 0;
7262 	    out_flush();
7263 	}
7264 
7265 	/* Compress both trees.  Either they both have many nodes, which makes
7266 	 * compression useful, or one of them is small, which means
7267 	 * compression goes fast.  But when filling the souldfold word tree
7268 	 * there is no keep-case tree. */
7269 	wordtree_compress(spin, spin->si_foldroot);
7270 	if (affixID >= 0)
7271 	    wordtree_compress(spin, spin->si_keeproot);
7272     }
7273 
7274     return OK;
7275 }
7276 
7277 /*
7278  * Check the 'mkspellmem' option.  Return FAIL if it's wrong.
7279  * Sets "sps_flags".
7280  */
7281     int
7282 spell_check_msm()
7283 {
7284     char_u	*p = p_msm;
7285     long	start = 0;
7286     long	inc = 0;
7287     long	added = 0;
7288 
7289     if (!VIM_ISDIGIT(*p))
7290 	return FAIL;
7291     /* block count = (value * 1024) / SBLOCKSIZE (but avoid overflow)*/
7292     start = (getdigits(&p) * 10) / (SBLOCKSIZE / 102);
7293     if (*p != ',')
7294 	return FAIL;
7295     ++p;
7296     if (!VIM_ISDIGIT(*p))
7297 	return FAIL;
7298     inc = (getdigits(&p) * 102) / (SBLOCKSIZE / 10);
7299     if (*p != ',')
7300 	return FAIL;
7301     ++p;
7302     if (!VIM_ISDIGIT(*p))
7303 	return FAIL;
7304     added = getdigits(&p) * 1024;
7305     if (*p != NUL)
7306 	return FAIL;
7307 
7308     if (start == 0 || inc == 0 || added == 0 || inc > start)
7309 	return FAIL;
7310 
7311     compress_start = start;
7312     compress_inc = inc;
7313     compress_added = added;
7314     return OK;
7315 }
7316 
7317 
7318 /*
7319  * Get a wordnode_T, either from the list of previously freed nodes or
7320  * allocate a new one.
7321  */
7322     static wordnode_T *
7323 get_wordnode(spin)
7324     spellinfo_T	    *spin;
7325 {
7326     wordnode_T *n;
7327 
7328     if (spin->si_first_free == NULL)
7329 	n = (wordnode_T *)getroom(spin, sizeof(wordnode_T), TRUE);
7330     else
7331     {
7332 	n = spin->si_first_free;
7333 	spin->si_first_free = n->wn_child;
7334 	vim_memset(n, 0, sizeof(wordnode_T));
7335 	--spin->si_free_count;
7336     }
7337 #ifdef SPELL_PRINTTREE
7338     n->wn_nr = ++spin->si_wordnode_nr;
7339 #endif
7340     return n;
7341 }
7342 
7343 /*
7344  * Decrement the reference count on a node (which is the head of a list of
7345  * siblings).  If the reference count becomes zero free the node and its
7346  * siblings.
7347  * Returns the number of nodes actually freed.
7348  */
7349     static int
7350 deref_wordnode(spin, node)
7351     spellinfo_T *spin;
7352     wordnode_T  *node;
7353 {
7354     wordnode_T	*np;
7355     int		cnt = 0;
7356 
7357     if (--node->wn_refs == 0)
7358     {
7359 	for (np = node; np != NULL; np = np->wn_sibling)
7360 	{
7361 	    if (np->wn_child != NULL)
7362 		cnt += deref_wordnode(spin, np->wn_child);
7363 	    free_wordnode(spin, np);
7364 	    ++cnt;
7365 	}
7366 	++cnt;	    /* length field */
7367     }
7368     return cnt;
7369 }
7370 
7371 /*
7372  * Free a wordnode_T for re-use later.
7373  * Only the "wn_child" field becomes invalid.
7374  */
7375     static void
7376 free_wordnode(spin, n)
7377     spellinfo_T	*spin;
7378     wordnode_T  *n;
7379 {
7380     n->wn_child = spin->si_first_free;
7381     spin->si_first_free = n;
7382     ++spin->si_free_count;
7383 }
7384 
7385 /*
7386  * Compress a tree: find tails that are identical and can be shared.
7387  */
7388     static void
7389 wordtree_compress(spin, root)
7390     spellinfo_T	    *spin;
7391     wordnode_T	    *root;
7392 {
7393     hashtab_T	    ht;
7394     int		    n;
7395     int		    tot = 0;
7396     int		    perc;
7397 
7398     /* Skip the root itself, it's not actually used.  The first sibling is the
7399      * start of the tree. */
7400     if (root->wn_sibling != NULL)
7401     {
7402 	hash_init(&ht);
7403 	n = node_compress(spin, root->wn_sibling, &ht, &tot);
7404 
7405 #ifndef SPELL_PRINTTREE
7406 	if (spin->si_verbose || p_verbose > 2)
7407 #endif
7408 	{
7409 	    if (tot > 1000000)
7410 		perc = (tot - n) / (tot / 100);
7411 	    else if (tot == 0)
7412 		perc = 0;
7413 	    else
7414 		perc = (tot - n) * 100 / tot;
7415 	    vim_snprintf((char *)IObuff, IOSIZE,
7416 			  _("Compressed %d of %d nodes; %d (%d%%) remaining"),
7417 						       n, tot, tot - n, perc);
7418 	    spell_message(spin, IObuff);
7419 	}
7420 #ifdef SPELL_PRINTTREE
7421 	spell_print_tree(root->wn_sibling);
7422 #endif
7423 	hash_clear(&ht);
7424     }
7425 }
7426 
7427 /*
7428  * Compress a node, its siblings and its children, depth first.
7429  * Returns the number of compressed nodes.
7430  */
7431     static int
7432 node_compress(spin, node, ht, tot)
7433     spellinfo_T	*spin;
7434     wordnode_T	*node;
7435     hashtab_T	*ht;
7436     int		*tot;	    /* total count of nodes before compressing,
7437 			       incremented while going through the tree */
7438 {
7439     wordnode_T	*np;
7440     wordnode_T	*tp;
7441     wordnode_T	*child;
7442     hash_T	hash;
7443     hashitem_T	*hi;
7444     int		len = 0;
7445     unsigned	nr, n;
7446     int		compressed = 0;
7447 
7448     /*
7449      * Go through the list of siblings.  Compress each child and then try
7450      * finding an identical child to replace it.
7451      * Note that with "child" we mean not just the node that is pointed to,
7452      * but the whole list of siblings of which the child node is the first.
7453      */
7454     for (np = node; np != NULL && !got_int; np = np->wn_sibling)
7455     {
7456 	++len;
7457 	if ((child = np->wn_child) != NULL)
7458 	{
7459 	    /* Compress the child first.  This fills hashkey. */
7460 	    compressed += node_compress(spin, child, ht, tot);
7461 
7462 	    /* Try to find an identical child. */
7463 	    hash = hash_hash(child->wn_u1.hashkey);
7464 	    hi = hash_lookup(ht, child->wn_u1.hashkey, hash);
7465 	    if (!HASHITEM_EMPTY(hi))
7466 	    {
7467 		/* There are children we encountered before with a hash value
7468 		 * identical to the current child.  Now check if there is one
7469 		 * that is really identical. */
7470 		for (tp = HI2WN(hi); tp != NULL; tp = tp->wn_u2.next)
7471 		    if (node_equal(child, tp))
7472 		    {
7473 			/* Found one!  Now use that child in place of the
7474 			 * current one.  This means the current child and all
7475 			 * its siblings is unlinked from the tree. */
7476 			++tp->wn_refs;
7477 			compressed += deref_wordnode(spin, child);
7478 			np->wn_child = tp;
7479 			break;
7480 		    }
7481 		if (tp == NULL)
7482 		{
7483 		    /* No other child with this hash value equals the child of
7484 		     * the node, add it to the linked list after the first
7485 		     * item. */
7486 		    tp = HI2WN(hi);
7487 		    child->wn_u2.next = tp->wn_u2.next;
7488 		    tp->wn_u2.next = child;
7489 		}
7490 	    }
7491 	    else
7492 		/* No other child has this hash value, add it to the
7493 		 * hashtable. */
7494 		hash_add_item(ht, hi, child->wn_u1.hashkey, hash);
7495 	}
7496     }
7497     *tot += len + 1;	/* add one for the node that stores the length */
7498 
7499     /*
7500      * Make a hash key for the node and its siblings, so that we can quickly
7501      * find a lookalike node.  This must be done after compressing the sibling
7502      * list, otherwise the hash key would become invalid by the compression.
7503      */
7504     node->wn_u1.hashkey[0] = len;
7505     nr = 0;
7506     for (np = node; np != NULL; np = np->wn_sibling)
7507     {
7508 	if (np->wn_byte == NUL)
7509 	    /* end node: use wn_flags, wn_region and wn_affixID */
7510 	    n = np->wn_flags + (np->wn_region << 8) + (np->wn_affixID << 16);
7511 	else
7512 	    /* byte node: use the byte value and the child pointer */
7513 	    n = np->wn_byte + ((long_u)np->wn_child << 8);
7514 	nr = nr * 101 + n;
7515     }
7516 
7517     /* Avoid NUL bytes, it terminates the hash key. */
7518     n = nr & 0xff;
7519     node->wn_u1.hashkey[1] = n == 0 ? 1 : n;
7520     n = (nr >> 8) & 0xff;
7521     node->wn_u1.hashkey[2] = n == 0 ? 1 : n;
7522     n = (nr >> 16) & 0xff;
7523     node->wn_u1.hashkey[3] = n == 0 ? 1 : n;
7524     n = (nr >> 24) & 0xff;
7525     node->wn_u1.hashkey[4] = n == 0 ? 1 : n;
7526     node->wn_u1.hashkey[5] = NUL;
7527 
7528     /* Check for CTRL-C pressed now and then. */
7529     fast_breakcheck();
7530 
7531     return compressed;
7532 }
7533 
7534 /*
7535  * Return TRUE when two nodes have identical siblings and children.
7536  */
7537     static int
7538 node_equal(n1, n2)
7539     wordnode_T	*n1;
7540     wordnode_T	*n2;
7541 {
7542     wordnode_T	*p1;
7543     wordnode_T	*p2;
7544 
7545     for (p1 = n1, p2 = n2; p1 != NULL && p2 != NULL;
7546 				     p1 = p1->wn_sibling, p2 = p2->wn_sibling)
7547 	if (p1->wn_byte != p2->wn_byte
7548 		|| (p1->wn_byte == NUL
7549 		    ? (p1->wn_flags != p2->wn_flags
7550 			|| p1->wn_region != p2->wn_region
7551 			|| p1->wn_affixID != p2->wn_affixID)
7552 		    : (p1->wn_child != p2->wn_child)))
7553 	    break;
7554 
7555     return p1 == NULL && p2 == NULL;
7556 }
7557 
7558 /*
7559  * Write a number to file "fd", MSB first, in "len" bytes.
7560  */
7561     void
7562 put_bytes(fd, nr, len)
7563     FILE    *fd;
7564     long_u  nr;
7565     int	    len;
7566 {
7567     int	    i;
7568 
7569     for (i = len - 1; i >= 0; --i)
7570 	putc((int)(nr >> (i * 8)), fd);
7571 }
7572 
7573 #ifdef _MSC_VER
7574 # if (_MSC_VER <= 1200)
7575 /* This line is required for VC6 without the service pack.  Also see the
7576  * matching #pragma below. */
7577 /* # pragma optimize("", off) */
7578 # endif
7579 #endif
7580 
7581 /*
7582  * Write spin->si_sugtime to file "fd".
7583  */
7584     static void
7585 put_sugtime(spin, fd)
7586     spellinfo_T *spin;
7587     FILE	*fd;
7588 {
7589     int		c;
7590     int		i;
7591 
7592     /* time_t can be up to 8 bytes in size, more than long_u, thus we
7593      * can't use put_bytes() here. */
7594     for (i = 7; i >= 0; --i)
7595 	if (i + 1 > sizeof(time_t))
7596 	    /* ">>" doesn't work well when shifting more bits than avail */
7597 	    putc(0, fd);
7598 	else
7599 	{
7600 	    c = (unsigned)spin->si_sugtime >> (i * 8);
7601 	    putc(c, fd);
7602 	}
7603 }
7604 
7605 #ifdef _MSC_VER
7606 # if (_MSC_VER <= 1200)
7607 /* # pragma optimize("", on) */
7608 # endif
7609 #endif
7610 
7611 static int
7612 #ifdef __BORLANDC__
7613 _RTLENTRYF
7614 #endif
7615 rep_compare __ARGS((const void *s1, const void *s2));
7616 
7617 /*
7618  * Function given to qsort() to sort the REP items on "from" string.
7619  */
7620     static int
7621 #ifdef __BORLANDC__
7622 _RTLENTRYF
7623 #endif
7624 rep_compare(s1, s2)
7625     const void	*s1;
7626     const void	*s2;
7627 {
7628     fromto_T	*p1 = (fromto_T *)s1;
7629     fromto_T	*p2 = (fromto_T *)s2;
7630 
7631     return STRCMP(p1->ft_from, p2->ft_from);
7632 }
7633 
7634 /*
7635  * Write the Vim .spl file "fname".
7636  * Return FAIL or OK;
7637  */
7638     static int
7639 write_vim_spell(spin, fname)
7640     spellinfo_T	*spin;
7641     char_u	*fname;
7642 {
7643     FILE	*fd;
7644     int		regionmask;
7645     int		round;
7646     wordnode_T	*tree;
7647     int		nodecount;
7648     int		i;
7649     int		l;
7650     garray_T	*gap;
7651     fromto_T	*ftp;
7652     char_u	*p;
7653     int		rr;
7654     int		retval = OK;
7655 
7656     fd = mch_fopen((char *)fname, "w");
7657     if (fd == NULL)
7658     {
7659 	EMSG2(_(e_notopen), fname);
7660 	return FAIL;
7661     }
7662 
7663     /* <HEADER>: <fileID> <versionnr> */
7664 							    /* <fileID> */
7665     if (fwrite(VIMSPELLMAGIC, VIMSPELLMAGICL, (size_t)1, fd) != 1)
7666     {
7667 	EMSG(_(e_write));
7668 	retval = FAIL;
7669     }
7670     putc(VIMSPELLVERSION, fd);				    /* <versionnr> */
7671 
7672     /*
7673      * <SECTIONS>: <section> ... <sectionend>
7674      */
7675 
7676     /* SN_INFO: <infotext> */
7677     if (spin->si_info != NULL)
7678     {
7679 	putc(SN_INFO, fd);				/* <sectionID> */
7680 	putc(0, fd);					/* <sectionflags> */
7681 
7682 	i = STRLEN(spin->si_info);
7683 	put_bytes(fd, (long_u)i, 4);			/* <sectionlen> */
7684 	fwrite(spin->si_info, (size_t)i, (size_t)1, fd); /* <infotext> */
7685     }
7686 
7687     /* SN_REGION: <regionname> ...
7688      * Write the region names only if there is more than one. */
7689     if (spin->si_region_count > 1)
7690     {
7691 	putc(SN_REGION, fd);				/* <sectionID> */
7692 	putc(SNF_REQUIRED, fd);				/* <sectionflags> */
7693 	l = spin->si_region_count * 2;
7694 	put_bytes(fd, (long_u)l, 4);			/* <sectionlen> */
7695 	fwrite(spin->si_region_name, (size_t)l, (size_t)1, fd);
7696 							/* <regionname> ... */
7697 	regionmask = (1 << spin->si_region_count) - 1;
7698     }
7699     else
7700 	regionmask = 0;
7701 
7702     /* SN_CHARFLAGS: <charflagslen> <charflags> <folcharslen> <folchars>
7703      *
7704      * The table with character flags and the table for case folding.
7705      * This makes sure the same characters are recognized as word characters
7706      * when generating an when using a spell file.
7707      * Skip this for ASCII, the table may conflict with the one used for
7708      * 'encoding'.
7709      * Also skip this for an .add.spl file, the main spell file must contain
7710      * the table (avoids that it conflicts).  File is shorter too.
7711      */
7712     if (!spin->si_ascii && !spin->si_add)
7713     {
7714 	char_u	folchars[128 * 8];
7715 	int	flags;
7716 
7717 	putc(SN_CHARFLAGS, fd);				/* <sectionID> */
7718 	putc(SNF_REQUIRED, fd);				/* <sectionflags> */
7719 
7720 	/* Form the <folchars> string first, we need to know its length. */
7721 	l = 0;
7722 	for (i = 128; i < 256; ++i)
7723 	{
7724 #ifdef FEAT_MBYTE
7725 	    if (has_mbyte)
7726 		l += mb_char2bytes(spelltab.st_fold[i], folchars + l);
7727 	    else
7728 #endif
7729 		folchars[l++] = spelltab.st_fold[i];
7730 	}
7731 	put_bytes(fd, (long_u)(1 + 128 + 2 + l), 4);	/* <sectionlen> */
7732 
7733 	fputc(128, fd);					/* <charflagslen> */
7734 	for (i = 128; i < 256; ++i)
7735 	{
7736 	    flags = 0;
7737 	    if (spelltab.st_isw[i])
7738 		flags |= CF_WORD;
7739 	    if (spelltab.st_isu[i])
7740 		flags |= CF_UPPER;
7741 	    fputc(flags, fd);				/* <charflags> */
7742 	}
7743 
7744 	put_bytes(fd, (long_u)l, 2);			/* <folcharslen> */
7745 	fwrite(folchars, (size_t)l, (size_t)1, fd);	/* <folchars> */
7746     }
7747 
7748     /* SN_MIDWORD: <midword> */
7749     if (spin->si_midword != NULL)
7750     {
7751 	putc(SN_MIDWORD, fd);				/* <sectionID> */
7752 	putc(SNF_REQUIRED, fd);				/* <sectionflags> */
7753 
7754 	i = STRLEN(spin->si_midword);
7755 	put_bytes(fd, (long_u)i, 4);			/* <sectionlen> */
7756 	fwrite(spin->si_midword, (size_t)i, (size_t)1, fd); /* <midword> */
7757     }
7758 
7759     /* SN_PREFCOND: <prefcondcnt> <prefcond> ... */
7760     if (spin->si_prefcond.ga_len > 0)
7761     {
7762 	putc(SN_PREFCOND, fd);				/* <sectionID> */
7763 	putc(SNF_REQUIRED, fd);				/* <sectionflags> */
7764 
7765 	l = write_spell_prefcond(NULL, &spin->si_prefcond);
7766 	put_bytes(fd, (long_u)l, 4);			/* <sectionlen> */
7767 
7768 	write_spell_prefcond(fd, &spin->si_prefcond);
7769     }
7770 
7771     /* SN_REP: <repcount> <rep> ...
7772      * SN_SAL: <salflags> <salcount> <sal> ...
7773      * SN_REPSAL: <repcount> <rep> ... */
7774 
7775     /* round 1: SN_REP section
7776      * round 2: SN_SAL section (unless SN_SOFO is used)
7777      * round 3: SN_REPSAL section */
7778     for (round = 1; round <= 3; ++round)
7779     {
7780 	if (round == 1)
7781 	    gap = &spin->si_rep;
7782 	else if (round == 2)
7783 	{
7784 	    /* Don't write SN_SAL when using a SN_SOFO section */
7785 	    if (spin->si_sofofr != NULL && spin->si_sofoto != NULL)
7786 		continue;
7787 	    gap = &spin->si_sal;
7788 	}
7789 	else
7790 	    gap = &spin->si_repsal;
7791 
7792 	/* Don't write the section if there are no items. */
7793 	if (gap->ga_len == 0)
7794 	    continue;
7795 
7796 	/* Sort the REP/REPSAL items. */
7797 	if (round != 2)
7798 	    qsort(gap->ga_data, (size_t)gap->ga_len,
7799 					       sizeof(fromto_T), rep_compare);
7800 
7801 	i = round == 1 ? SN_REP : (round == 2 ? SN_SAL : SN_REPSAL);
7802 	putc(i, fd);					/* <sectionID> */
7803 
7804 	/* This is for making suggestions, section is not required. */
7805 	putc(0, fd);					/* <sectionflags> */
7806 
7807 	/* Compute the length of what follows. */
7808 	l = 2;	    /* count <repcount> or <salcount> */
7809 	for (i = 0; i < gap->ga_len; ++i)
7810 	{
7811 	    ftp = &((fromto_T *)gap->ga_data)[i];
7812 	    l += 1 + STRLEN(ftp->ft_from);  /* count <*fromlen> and <*from> */
7813 	    l += 1 + STRLEN(ftp->ft_to);    /* count <*tolen> and <*to> */
7814 	}
7815 	if (round == 2)
7816 	    ++l;	/* count <salflags> */
7817 	put_bytes(fd, (long_u)l, 4);			/* <sectionlen> */
7818 
7819 	if (round == 2)
7820 	{
7821 	    i = 0;
7822 	    if (spin->si_followup)
7823 		i |= SAL_F0LLOWUP;
7824 	    if (spin->si_collapse)
7825 		i |= SAL_COLLAPSE;
7826 	    if (spin->si_rem_accents)
7827 		i |= SAL_REM_ACCENTS;
7828 	    putc(i, fd);			/* <salflags> */
7829 	}
7830 
7831 	put_bytes(fd, (long_u)gap->ga_len, 2);	/* <repcount> or <salcount> */
7832 	for (i = 0; i < gap->ga_len; ++i)
7833 	{
7834 	    /* <rep> : <repfromlen> <repfrom> <reptolen> <repto> */
7835 	    /* <sal> : <salfromlen> <salfrom> <saltolen> <salto> */
7836 	    ftp = &((fromto_T *)gap->ga_data)[i];
7837 	    for (rr = 1; rr <= 2; ++rr)
7838 	    {
7839 		p = rr == 1 ? ftp->ft_from : ftp->ft_to;
7840 		l = STRLEN(p);
7841 		putc(l, fd);
7842 		fwrite(p, l, (size_t)1, fd);
7843 	    }
7844 	}
7845 
7846     }
7847 
7848     /* SN_SOFO: <sofofromlen> <sofofrom> <sofotolen> <sofoto>
7849      * This is for making suggestions, section is not required. */
7850     if (spin->si_sofofr != NULL && spin->si_sofoto != NULL)
7851     {
7852 	putc(SN_SOFO, fd);				/* <sectionID> */
7853 	putc(0, fd);					/* <sectionflags> */
7854 
7855 	l = STRLEN(spin->si_sofofr);
7856 	put_bytes(fd, (long_u)(l + STRLEN(spin->si_sofoto) + 4), 4);
7857 							/* <sectionlen> */
7858 
7859 	put_bytes(fd, (long_u)l, 2);			/* <sofofromlen> */
7860 	fwrite(spin->si_sofofr, l, (size_t)1, fd);	/* <sofofrom> */
7861 
7862 	l = STRLEN(spin->si_sofoto);
7863 	put_bytes(fd, (long_u)l, 2);			/* <sofotolen> */
7864 	fwrite(spin->si_sofoto, l, (size_t)1, fd);	/* <sofoto> */
7865     }
7866 
7867     /* SN_WORDS: <word> ...
7868      * This is for making suggestions, section is not required. */
7869     if (spin->si_commonwords.ht_used > 0)
7870     {
7871 	putc(SN_WORDS, fd);				/* <sectionID> */
7872 	putc(0, fd);					/* <sectionflags> */
7873 
7874 	/* round 1: count the bytes
7875 	 * round 2: write the bytes */
7876 	for (round = 1; round <= 2; ++round)
7877 	{
7878 	    int		todo;
7879 	    int		len = 0;
7880 	    hashitem_T	*hi;
7881 
7882 	    todo = spin->si_commonwords.ht_used;
7883 	    for (hi = spin->si_commonwords.ht_array; todo > 0; ++hi)
7884 		if (!HASHITEM_EMPTY(hi))
7885 		{
7886 		    l = STRLEN(hi->hi_key) + 1;
7887 		    len += l;
7888 		    if (round == 2)			/* <word> */
7889 			fwrite(hi->hi_key, (size_t)l, (size_t)1, fd);
7890 		    --todo;
7891 		}
7892 	    if (round == 1)
7893 		put_bytes(fd, (long_u)len, 4);		/* <sectionlen> */
7894 	}
7895     }
7896 
7897     /* SN_MAP: <mapstr>
7898      * This is for making suggestions, section is not required. */
7899     if (spin->si_map.ga_len > 0)
7900     {
7901 	putc(SN_MAP, fd);				/* <sectionID> */
7902 	putc(0, fd);					/* <sectionflags> */
7903 	l = spin->si_map.ga_len;
7904 	put_bytes(fd, (long_u)l, 4);			/* <sectionlen> */
7905 	fwrite(spin->si_map.ga_data, (size_t)l, (size_t)1, fd);
7906 							/* <mapstr> */
7907     }
7908 
7909     /* SN_SUGFILE: <timestamp>
7910      * This is used to notify that a .sug file may be available and at the
7911      * same time allows for checking that a .sug file that is found matches
7912      * with this .spl file.  That's because the word numbers must be exactly
7913      * right. */
7914     if (!spin->si_nosugfile
7915 	    && (spin->si_sal.ga_len > 0
7916 		     || (spin->si_sofofr != NULL && spin->si_sofoto != NULL)))
7917     {
7918 	putc(SN_SUGFILE, fd);				/* <sectionID> */
7919 	putc(0, fd);					/* <sectionflags> */
7920 	put_bytes(fd, (long_u)8, 4);			/* <sectionlen> */
7921 
7922 	/* Set si_sugtime and write it to the file. */
7923 	spin->si_sugtime = time(NULL);
7924 	put_sugtime(spin, fd);				/* <timestamp> */
7925     }
7926 
7927     /* SN_NOSPLITSUGS: nothing
7928      * This is used to notify that no suggestions with word splits are to be
7929      * made. */
7930     if (spin->si_nosplitsugs)
7931     {
7932 	putc(SN_NOSPLITSUGS, fd);			/* <sectionID> */
7933 	putc(0, fd);					/* <sectionflags> */
7934 	put_bytes(fd, (long_u)0, 4);			/* <sectionlen> */
7935     }
7936 
7937     /* SN_COMPOUND: compound info.
7938      * We don't mark it required, when not supported all compound words will
7939      * be bad words. */
7940     if (spin->si_compflags != NULL)
7941     {
7942 	putc(SN_COMPOUND, fd);				/* <sectionID> */
7943 	putc(0, fd);					/* <sectionflags> */
7944 
7945 	l = STRLEN(spin->si_compflags);
7946 	for (i = 0; i < spin->si_comppat.ga_len; ++i)
7947 	    l += STRLEN(((char_u **)(spin->si_comppat.ga_data))[i]) + 1;
7948 	put_bytes(fd, (long_u)(l + 7), 4);		/* <sectionlen> */
7949 
7950 	putc(spin->si_compmax, fd);			/* <compmax> */
7951 	putc(spin->si_compminlen, fd);			/* <compminlen> */
7952 	putc(spin->si_compsylmax, fd);			/* <compsylmax> */
7953 	putc(0, fd);		/* for Vim 7.0b compatibility */
7954 	putc(spin->si_compoptions, fd);			/* <compoptions> */
7955 	put_bytes(fd, (long_u)spin->si_comppat.ga_len, 2);
7956 							/* <comppatcount> */
7957 	for (i = 0; i < spin->si_comppat.ga_len; ++i)
7958 	{
7959 	    p = ((char_u **)(spin->si_comppat.ga_data))[i];
7960 	    putc(STRLEN(p), fd);			/* <comppatlen> */
7961 	    fwrite(p, (size_t)STRLEN(p), (size_t)1, fd);/* <comppattext> */
7962 	}
7963 							/* <compflags> */
7964 	fwrite(spin->si_compflags, (size_t)STRLEN(spin->si_compflags),
7965 							       (size_t)1, fd);
7966     }
7967 
7968     /* SN_NOBREAK: NOBREAK flag */
7969     if (spin->si_nobreak)
7970     {
7971 	putc(SN_NOBREAK, fd);				/* <sectionID> */
7972 	putc(0, fd);					/* <sectionflags> */
7973 
7974 	/* It's empty, the precense of the section flags the feature. */
7975 	put_bytes(fd, (long_u)0, 4);			/* <sectionlen> */
7976     }
7977 
7978     /* SN_SYLLABLE: syllable info.
7979      * We don't mark it required, when not supported syllables will not be
7980      * counted. */
7981     if (spin->si_syllable != NULL)
7982     {
7983 	putc(SN_SYLLABLE, fd);				/* <sectionID> */
7984 	putc(0, fd);					/* <sectionflags> */
7985 
7986 	l = STRLEN(spin->si_syllable);
7987 	put_bytes(fd, (long_u)l, 4);			/* <sectionlen> */
7988 	fwrite(spin->si_syllable, (size_t)l, (size_t)1, fd); /* <syllable> */
7989     }
7990 
7991     /* end of <SECTIONS> */
7992     putc(SN_END, fd);					/* <sectionend> */
7993 
7994 
7995     /*
7996      * <LWORDTREE>  <KWORDTREE>  <PREFIXTREE>
7997      */
7998     spin->si_memtot = 0;
7999     for (round = 1; round <= 3; ++round)
8000     {
8001 	if (round == 1)
8002 	    tree = spin->si_foldroot->wn_sibling;
8003 	else if (round == 2)
8004 	    tree = spin->si_keeproot->wn_sibling;
8005 	else
8006 	    tree = spin->si_prefroot->wn_sibling;
8007 
8008 	/* Clear the index and wnode fields in the tree. */
8009 	clear_node(tree);
8010 
8011 	/* Count the number of nodes.  Needed to be able to allocate the
8012 	 * memory when reading the nodes.  Also fills in index for shared
8013 	 * nodes. */
8014 	nodecount = put_node(NULL, tree, 0, regionmask, round == 3);
8015 
8016 	/* number of nodes in 4 bytes */
8017 	put_bytes(fd, (long_u)nodecount, 4);	/* <nodecount> */
8018 	spin->si_memtot += nodecount + nodecount * sizeof(int);
8019 
8020 	/* Write the nodes. */
8021 	(void)put_node(fd, tree, 0, regionmask, round == 3);
8022     }
8023 
8024     /* Write another byte to check for errors. */
8025     if (putc(0, fd) == EOF)
8026 	retval = FAIL;
8027 
8028     if (fclose(fd) == EOF)
8029 	retval = FAIL;
8030 
8031     return retval;
8032 }
8033 
8034 /*
8035  * Clear the index and wnode fields of "node", it siblings and its
8036  * children.  This is needed because they are a union with other items to save
8037  * space.
8038  */
8039     static void
8040 clear_node(node)
8041     wordnode_T	*node;
8042 {
8043     wordnode_T	*np;
8044 
8045     if (node != NULL)
8046 	for (np = node; np != NULL; np = np->wn_sibling)
8047 	{
8048 	    np->wn_u1.index = 0;
8049 	    np->wn_u2.wnode = NULL;
8050 
8051 	    if (np->wn_byte != NUL)
8052 		clear_node(np->wn_child);
8053 	}
8054 }
8055 
8056 
8057 /*
8058  * Dump a word tree at node "node".
8059  *
8060  * This first writes the list of possible bytes (siblings).  Then for each
8061  * byte recursively write the children.
8062  *
8063  * NOTE: The code here must match the code in read_tree_node(), since
8064  * assumptions are made about the indexes (so that we don't have to write them
8065  * in the file).
8066  *
8067  * Returns the number of nodes used.
8068  */
8069     static int
8070 put_node(fd, node, index, regionmask, prefixtree)
8071     FILE	*fd;		/* NULL when only counting */
8072     wordnode_T	*node;
8073     int		index;
8074     int		regionmask;
8075     int		prefixtree;	/* TRUE for PREFIXTREE */
8076 {
8077     int		newindex = index;
8078     int		siblingcount = 0;
8079     wordnode_T	*np;
8080     int		flags;
8081 
8082     /* If "node" is zero the tree is empty. */
8083     if (node == NULL)
8084 	return 0;
8085 
8086     /* Store the index where this node is written. */
8087     node->wn_u1.index = index;
8088 
8089     /* Count the number of siblings. */
8090     for (np = node; np != NULL; np = np->wn_sibling)
8091 	++siblingcount;
8092 
8093     /* Write the sibling count. */
8094     if (fd != NULL)
8095 	putc(siblingcount, fd);				/* <siblingcount> */
8096 
8097     /* Write each sibling byte and optionally extra info. */
8098     for (np = node; np != NULL; np = np->wn_sibling)
8099     {
8100 	if (np->wn_byte == 0)
8101 	{
8102 	    if (fd != NULL)
8103 	    {
8104 		/* For a NUL byte (end of word) write the flags etc. */
8105 		if (prefixtree)
8106 		{
8107 		    /* In PREFIXTREE write the required affixID and the
8108 		     * associated condition nr (stored in wn_region).  The
8109 		     * byte value is misused to store the "rare" and "not
8110 		     * combining" flags */
8111 		    if (np->wn_flags == (short_u)PFX_FLAGS)
8112 			putc(BY_NOFLAGS, fd);		/* <byte> */
8113 		    else
8114 		    {
8115 			putc(BY_FLAGS, fd);		/* <byte> */
8116 			putc(np->wn_flags, fd);		/* <pflags> */
8117 		    }
8118 		    putc(np->wn_affixID, fd);		/* <affixID> */
8119 		    put_bytes(fd, (long_u)np->wn_region, 2); /* <prefcondnr> */
8120 		}
8121 		else
8122 		{
8123 		    /* For word trees we write the flag/region items. */
8124 		    flags = np->wn_flags;
8125 		    if (regionmask != 0 && np->wn_region != regionmask)
8126 			flags |= WF_REGION;
8127 		    if (np->wn_affixID != 0)
8128 			flags |= WF_AFX;
8129 		    if (flags == 0)
8130 		    {
8131 			/* word without flags or region */
8132 			putc(BY_NOFLAGS, fd);			/* <byte> */
8133 		    }
8134 		    else
8135 		    {
8136 			if (np->wn_flags >= 0x100)
8137 			{
8138 			    putc(BY_FLAGS2, fd);		/* <byte> */
8139 			    putc(flags, fd);			/* <flags> */
8140 			    putc((unsigned)flags >> 8, fd);	/* <flags2> */
8141 			}
8142 			else
8143 			{
8144 			    putc(BY_FLAGS, fd);			/* <byte> */
8145 			    putc(flags, fd);			/* <flags> */
8146 			}
8147 			if (flags & WF_REGION)
8148 			    putc(np->wn_region, fd);		/* <region> */
8149 			if (flags & WF_AFX)
8150 			    putc(np->wn_affixID, fd);		/* <affixID> */
8151 		    }
8152 		}
8153 	    }
8154 	}
8155 	else
8156 	{
8157 	    if (np->wn_child->wn_u1.index != 0
8158 					 && np->wn_child->wn_u2.wnode != node)
8159 	    {
8160 		/* The child is written elsewhere, write the reference. */
8161 		if (fd != NULL)
8162 		{
8163 		    putc(BY_INDEX, fd);			/* <byte> */
8164 							/* <nodeidx> */
8165 		    put_bytes(fd, (long_u)np->wn_child->wn_u1.index, 3);
8166 		}
8167 	    }
8168 	    else if (np->wn_child->wn_u2.wnode == NULL)
8169 		/* We will write the child below and give it an index. */
8170 		np->wn_child->wn_u2.wnode = node;
8171 
8172 	    if (fd != NULL)
8173 		if (putc(np->wn_byte, fd) == EOF) /* <byte> or <xbyte> */
8174 		{
8175 		    EMSG(_(e_write));
8176 		    return 0;
8177 		}
8178 	}
8179     }
8180 
8181     /* Space used in the array when reading: one for each sibling and one for
8182      * the count. */
8183     newindex += siblingcount + 1;
8184 
8185     /* Recursively dump the children of each sibling. */
8186     for (np = node; np != NULL; np = np->wn_sibling)
8187 	if (np->wn_byte != 0 && np->wn_child->wn_u2.wnode == node)
8188 	    newindex = put_node(fd, np->wn_child, newindex, regionmask,
8189 								  prefixtree);
8190 
8191     return newindex;
8192 }
8193 
8194 
8195 /*
8196  * ":mkspell [-ascii] outfile  infile ..."
8197  * ":mkspell [-ascii] addfile"
8198  */
8199     void
8200 ex_mkspell(eap)
8201     exarg_T *eap;
8202 {
8203     int		fcount;
8204     char_u	**fnames;
8205     char_u	*arg = eap->arg;
8206     int		ascii = FALSE;
8207 
8208     if (STRNCMP(arg, "-ascii", 6) == 0)
8209     {
8210 	ascii = TRUE;
8211 	arg = skipwhite(arg + 6);
8212     }
8213 
8214     /* Expand all the remaining arguments (e.g., $VIMRUNTIME). */
8215     if (get_arglist_exp(arg, &fcount, &fnames) == OK)
8216     {
8217 	mkspell(fcount, fnames, ascii, eap->forceit, FALSE);
8218 	FreeWild(fcount, fnames);
8219     }
8220 }
8221 
8222 /*
8223  * Create the .sug file.
8224  * Uses the soundfold info in "spin".
8225  * Writes the file with the name "wfname", with ".spl" changed to ".sug".
8226  */
8227     static void
8228 spell_make_sugfile(spin, wfname)
8229     spellinfo_T	*spin;
8230     char_u	*wfname;
8231 {
8232     char_u	fname[MAXPATHL];
8233     int		len;
8234     slang_T	*slang;
8235     int		free_slang = FALSE;
8236 
8237     /*
8238      * Read back the .spl file that was written.  This fills the required
8239      * info for soundfolding.  This also uses less memory than the
8240      * pointer-linked version of the trie.  And it avoids having two versions
8241      * of the code for the soundfolding stuff.
8242      * It might have been done already by spell_reload_one().
8243      */
8244     for (slang = first_lang; slang != NULL; slang = slang->sl_next)
8245 	if (fullpathcmp(wfname, slang->sl_fname, FALSE) == FPC_SAME)
8246 	    break;
8247     if (slang == NULL)
8248     {
8249 	spell_message(spin, (char_u *)_("Reading back spell file..."));
8250 	slang = spell_load_file(wfname, NULL, NULL, FALSE);
8251 	if (slang == NULL)
8252 	    return;
8253 	free_slang = TRUE;
8254     }
8255 
8256     /*
8257      * Clear the info in "spin" that is used.
8258      */
8259     spin->si_blocks = NULL;
8260     spin->si_blocks_cnt = 0;
8261     spin->si_compress_cnt = 0;	    /* will stay at 0 all the time*/
8262     spin->si_free_count = 0;
8263     spin->si_first_free = NULL;
8264     spin->si_foldwcount = 0;
8265 
8266     /*
8267      * Go through the trie of good words, soundfold each word and add it to
8268      * the soundfold trie.
8269      */
8270     spell_message(spin, (char_u *)_("Performing soundfolding..."));
8271     if (sug_filltree(spin, slang) == FAIL)
8272 	goto theend;
8273 
8274     /*
8275      * Create the table which links each soundfold word with a list of the
8276      * good words it may come from.  Creates buffer "spin->si_spellbuf".
8277      * This also removes the wordnr from the NUL byte entries to make
8278      * compression possible.
8279      */
8280     if (sug_maketable(spin) == FAIL)
8281 	goto theend;
8282 
8283     smsg((char_u *)_("Number of words after soundfolding: %ld"),
8284 				 (long)spin->si_spellbuf->b_ml.ml_line_count);
8285 
8286     /*
8287      * Compress the soundfold trie.
8288      */
8289     spell_message(spin, (char_u *)_(msg_compressing));
8290     wordtree_compress(spin, spin->si_foldroot);
8291 
8292     /*
8293      * Write the .sug file.
8294      * Make the file name by changing ".spl" to ".sug".
8295      */
8296     STRCPY(fname, wfname);
8297     len = STRLEN(fname);
8298     fname[len - 2] = 'u';
8299     fname[len - 1] = 'g';
8300     sug_write(spin, fname);
8301 
8302 theend:
8303     if (free_slang)
8304 	slang_free(slang);
8305     free_blocks(spin->si_blocks);
8306     close_spellbuf(spin->si_spellbuf);
8307 }
8308 
8309 /*
8310  * Build the soundfold trie for language "slang".
8311  */
8312     static int
8313 sug_filltree(spin, slang)
8314     spellinfo_T	*spin;
8315     slang_T	*slang;
8316 {
8317     char_u	*byts;
8318     idx_T	*idxs;
8319     int		depth;
8320     idx_T	arridx[MAXWLEN];
8321     int		curi[MAXWLEN];
8322     char_u	tword[MAXWLEN];
8323     char_u	tsalword[MAXWLEN];
8324     int		c;
8325     idx_T	n;
8326     unsigned	words_done = 0;
8327     int		wordcount[MAXWLEN];
8328 
8329     /* We use si_foldroot for the souldfolded trie. */
8330     spin->si_foldroot = wordtree_alloc(spin);
8331     if (spin->si_foldroot == NULL)
8332 	return FAIL;
8333 
8334     /* let tree_add_word() know we're adding to the soundfolded tree */
8335     spin->si_sugtree = TRUE;
8336 
8337     /*
8338      * Go through the whole case-folded tree, soundfold each word and put it
8339      * in the trie.
8340      */
8341     byts = slang->sl_fbyts;
8342     idxs = slang->sl_fidxs;
8343 
8344     arridx[0] = 0;
8345     curi[0] = 1;
8346     wordcount[0] = 0;
8347 
8348     depth = 0;
8349     while (depth >= 0 && !got_int)
8350     {
8351 	if (curi[depth] > byts[arridx[depth]])
8352 	{
8353 	    /* Done all bytes at this node, go up one level. */
8354 	    idxs[arridx[depth]] = wordcount[depth];
8355 	    if (depth > 0)
8356 		wordcount[depth - 1] += wordcount[depth];
8357 
8358 	    --depth;
8359 	    line_breakcheck();
8360 	}
8361 	else
8362 	{
8363 
8364 	    /* Do one more byte at this node. */
8365 	    n = arridx[depth] + curi[depth];
8366 	    ++curi[depth];
8367 
8368 	    c = byts[n];
8369 	    if (c == 0)
8370 	    {
8371 		/* Sound-fold the word. */
8372 		tword[depth] = NUL;
8373 		spell_soundfold(slang, tword, TRUE, tsalword);
8374 
8375 		/* We use the "flags" field for the MSB of the wordnr,
8376 		 * "region" for the LSB of the wordnr.  */
8377 		if (tree_add_word(spin, tsalword, spin->si_foldroot,
8378 				words_done >> 16, words_done & 0xffff,
8379 							   0) == FAIL)
8380 		    return FAIL;
8381 
8382 		++words_done;
8383 		++wordcount[depth];
8384 
8385 		/* Reset the block count each time to avoid compression
8386 		 * kicking in. */
8387 		spin->si_blocks_cnt = 0;
8388 
8389 		/* Skip over any other NUL bytes (same word with different
8390 		 * flags). */
8391 		while (byts[n + 1] == 0)
8392 		{
8393 		    ++n;
8394 		    ++curi[depth];
8395 		}
8396 	    }
8397 	    else
8398 	    {
8399 		/* Normal char, go one level deeper. */
8400 		tword[depth++] = c;
8401 		arridx[depth] = idxs[n];
8402 		curi[depth] = 1;
8403 		wordcount[depth] = 0;
8404 	    }
8405 	}
8406     }
8407 
8408     smsg((char_u *)_("Total number of words: %d"), words_done);
8409 
8410     return OK;
8411 }
8412 
8413 /*
8414  * Make the table that links each word in the soundfold trie to the words it
8415  * can be produced from.
8416  * This is not unlike lines in a file, thus use a memfile to be able to access
8417  * the table efficiently.
8418  * Returns FAIL when out of memory.
8419  */
8420     static int
8421 sug_maketable(spin)
8422     spellinfo_T	*spin;
8423 {
8424     garray_T	ga;
8425     int		res = OK;
8426 
8427     /* Allocate a buffer, open a memline for it and create the swap file
8428      * (uses a temp file, not a .swp file). */
8429     spin->si_spellbuf = open_spellbuf();
8430     if (spin->si_spellbuf == NULL)
8431 	return FAIL;
8432 
8433     /* Use a buffer to store the line info, avoids allocating many small
8434      * pieces of memory. */
8435     ga_init2(&ga, 1, 100);
8436 
8437     /* recursively go through the tree */
8438     if (sug_filltable(spin, spin->si_foldroot->wn_sibling, 0, &ga) == -1)
8439 	res = FAIL;
8440 
8441     ga_clear(&ga);
8442     return res;
8443 }
8444 
8445 /*
8446  * Fill the table for one node and its children.
8447  * Returns the wordnr at the start of the node.
8448  * Returns -1 when out of memory.
8449  */
8450     static int
8451 sug_filltable(spin, node, startwordnr, gap)
8452     spellinfo_T	*spin;
8453     wordnode_T	*node;
8454     int		startwordnr;
8455     garray_T	*gap;	    /* place to store line of numbers */
8456 {
8457     wordnode_T	*p, *np;
8458     int		wordnr = startwordnr;
8459     int		nr;
8460     int		prev_nr;
8461 
8462     for (p = node; p != NULL; p = p->wn_sibling)
8463     {
8464 	if (p->wn_byte == NUL)
8465 	{
8466 	    gap->ga_len = 0;
8467 	    prev_nr = 0;
8468 	    for (np = p; np != NULL && np->wn_byte == NUL; np = np->wn_sibling)
8469 	    {
8470 		if (ga_grow(gap, 10) == FAIL)
8471 		    return -1;
8472 
8473 		nr = (np->wn_flags << 16) + (np->wn_region & 0xffff);
8474 		/* Compute the offset from the previous nr and store the
8475 		 * offset in a way that it takes a minimum number of bytes.
8476 		 * It's a bit like utf-8, but without the need to mark
8477 		 * following bytes. */
8478 		nr -= prev_nr;
8479 		prev_nr += nr;
8480 		gap->ga_len += offset2bytes(nr,
8481 					 (char_u *)gap->ga_data + gap->ga_len);
8482 	    }
8483 
8484 	    /* add the NUL byte */
8485 	    ((char_u *)gap->ga_data)[gap->ga_len++] = NUL;
8486 
8487 	    if (ml_append_buf(spin->si_spellbuf, (linenr_T)wordnr,
8488 				     gap->ga_data, gap->ga_len, TRUE) == FAIL)
8489 		return -1;
8490 	    ++wordnr;
8491 
8492 	    /* Remove extra NUL entries, we no longer need them. We don't
8493 	     * bother freeing the nodes, the won't be reused anyway. */
8494 	    while (p->wn_sibling != NULL && p->wn_sibling->wn_byte == NUL)
8495 		p->wn_sibling = p->wn_sibling->wn_sibling;
8496 
8497 	    /* Clear the flags on the remaining NUL node, so that compression
8498 	     * works a lot better. */
8499 	    p->wn_flags = 0;
8500 	    p->wn_region = 0;
8501 	}
8502 	else
8503 	{
8504 	    wordnr = sug_filltable(spin, p->wn_child, wordnr, gap);
8505 	    if (wordnr == -1)
8506 		return -1;
8507 	}
8508     }
8509     return wordnr;
8510 }
8511 
8512 /*
8513  * Convert an offset into a minimal number of bytes.
8514  * Similar to utf_char2byters, but use 8 bits in followup bytes and avoid NUL
8515  * bytes.
8516  */
8517     static int
8518 offset2bytes(nr, buf)
8519     int	    nr;
8520     char_u  *buf;
8521 {
8522     int	    rem;
8523     int	    b1, b2, b3, b4;
8524 
8525     /* Split the number in parts of base 255.  We need to avoid NUL bytes. */
8526     b1 = nr % 255 + 1;
8527     rem = nr / 255;
8528     b2 = rem % 255 + 1;
8529     rem = rem / 255;
8530     b3 = rem % 255 + 1;
8531     b4 = rem / 255 + 1;
8532 
8533     if (b4 > 1 || b3 > 0x1f)	/* 4 bytes */
8534     {
8535 	buf[0] = 0xe0 + b4;
8536 	buf[1] = b3;
8537 	buf[2] = b2;
8538 	buf[3] = b1;
8539 	return 4;
8540     }
8541     if (b3 > 1 || b2 > 0x3f )	/* 3 bytes */
8542     {
8543 	buf[0] = 0xc0 + b3;
8544 	buf[1] = b2;
8545 	buf[2] = b1;
8546 	return 3;
8547     }
8548     if (b2 > 1 || b1 > 0x7f )	/* 2 bytes */
8549     {
8550 	buf[0] = 0x80 + b2;
8551 	buf[1] = b1;
8552 	return 2;
8553     }
8554 				/* 1 byte */
8555     buf[0] = b1;
8556     return 1;
8557 }
8558 
8559 /*
8560  * Opposite of offset2bytes().
8561  * "pp" points to the bytes and is advanced over it.
8562  * Returns the offset.
8563  */
8564     static int
8565 bytes2offset(pp)
8566     char_u	**pp;
8567 {
8568     char_u	*p = *pp;
8569     int		nr;
8570     int		c;
8571 
8572     c = *p++;
8573     if ((c & 0x80) == 0x00)		/* 1 byte */
8574     {
8575 	nr = c - 1;
8576     }
8577     else if ((c & 0xc0) == 0x80)	/* 2 bytes */
8578     {
8579 	nr = (c & 0x3f) - 1;
8580 	nr = nr * 255 + (*p++ - 1);
8581     }
8582     else if ((c & 0xe0) == 0xc0)	/* 3 bytes */
8583     {
8584 	nr = (c & 0x1f) - 1;
8585 	nr = nr * 255 + (*p++ - 1);
8586 	nr = nr * 255 + (*p++ - 1);
8587     }
8588     else				/* 4 bytes */
8589     {
8590 	nr = (c & 0x0f) - 1;
8591 	nr = nr * 255 + (*p++ - 1);
8592 	nr = nr * 255 + (*p++ - 1);
8593 	nr = nr * 255 + (*p++ - 1);
8594     }
8595 
8596     *pp = p;
8597     return nr;
8598 }
8599 
8600 /*
8601  * Write the .sug file in "fname".
8602  */
8603     static void
8604 sug_write(spin, fname)
8605     spellinfo_T	*spin;
8606     char_u	*fname;
8607 {
8608     FILE	*fd;
8609     wordnode_T	*tree;
8610     int		nodecount;
8611     int		wcount;
8612     char_u	*line;
8613     linenr_T	lnum;
8614     int		len;
8615 
8616     /* Create the file.  Note that an existing file is silently overwritten! */
8617     fd = mch_fopen((char *)fname, "w");
8618     if (fd == NULL)
8619     {
8620 	EMSG2(_(e_notopen), fname);
8621 	return;
8622     }
8623 
8624     vim_snprintf((char *)IObuff, IOSIZE,
8625 				  _("Writing suggestion file %s ..."), fname);
8626     spell_message(spin, IObuff);
8627 
8628     /*
8629      * <SUGHEADER>: <fileID> <versionnr> <timestamp>
8630      */
8631     if (fwrite(VIMSUGMAGIC, VIMSUGMAGICL, (size_t)1, fd) != 1) /* <fileID> */
8632     {
8633 	EMSG(_(e_write));
8634 	goto theend;
8635     }
8636     putc(VIMSUGVERSION, fd);				/* <versionnr> */
8637 
8638     /* Write si_sugtime to the file. */
8639     put_sugtime(spin, fd);				/* <timestamp> */
8640 
8641     /*
8642      * <SUGWORDTREE>
8643      */
8644     spin->si_memtot = 0;
8645     tree = spin->si_foldroot->wn_sibling;
8646 
8647     /* Clear the index and wnode fields in the tree. */
8648     clear_node(tree);
8649 
8650     /* Count the number of nodes.  Needed to be able to allocate the
8651      * memory when reading the nodes.  Also fills in index for shared
8652      * nodes. */
8653     nodecount = put_node(NULL, tree, 0, 0, FALSE);
8654 
8655     /* number of nodes in 4 bytes */
8656     put_bytes(fd, (long_u)nodecount, 4);	/* <nodecount> */
8657     spin->si_memtot += nodecount + nodecount * sizeof(int);
8658 
8659     /* Write the nodes. */
8660     (void)put_node(fd, tree, 0, 0, FALSE);
8661 
8662     /*
8663      * <SUGTABLE>: <sugwcount> <sugline> ...
8664      */
8665     wcount = spin->si_spellbuf->b_ml.ml_line_count;
8666     put_bytes(fd, (long_u)wcount, 4);	/* <sugwcount> */
8667 
8668     for (lnum = 1; lnum <= (linenr_T)wcount; ++lnum)
8669     {
8670 	/* <sugline>: <sugnr> ... NUL */
8671 	line = ml_get_buf(spin->si_spellbuf, lnum, FALSE);
8672 	len = STRLEN(line) + 1;
8673 	if (fwrite(line, (size_t)len, (size_t)1, fd) == 0)
8674 	{
8675 	    EMSG(_(e_write));
8676 	    goto theend;
8677 	}
8678 	spin->si_memtot += len;
8679     }
8680 
8681     /* Write another byte to check for errors. */
8682     if (putc(0, fd) == EOF)
8683 	EMSG(_(e_write));
8684 
8685     vim_snprintf((char *)IObuff, IOSIZE,
8686 		 _("Estimated runtime memory use: %d bytes"), spin->si_memtot);
8687     spell_message(spin, IObuff);
8688 
8689 theend:
8690     /* close the file */
8691     fclose(fd);
8692 }
8693 
8694 /*
8695  * Open a spell buffer.  This is a nameless buffer that is not in the buffer
8696  * list and only contains text lines.  Can use a swapfile to reduce memory
8697  * use.
8698  * Most other fields are invalid!  Esp. watch out for string options being
8699  * NULL and there is no undo info.
8700  * Returns NULL when out of memory.
8701  */
8702     static buf_T *
8703 open_spellbuf()
8704 {
8705     buf_T	*buf;
8706 
8707     buf = (buf_T *)alloc_clear(sizeof(buf_T));
8708     if (buf != NULL)
8709     {
8710 	buf->b_spell = TRUE;
8711 	buf->b_p_swf = TRUE;	/* may create a swap file */
8712 	ml_open(buf);
8713 	ml_open_file(buf);	/* create swap file now */
8714     }
8715     return buf;
8716 }
8717 
8718 /*
8719  * Close the buffer used for spell info.
8720  */
8721     static void
8722 close_spellbuf(buf)
8723     buf_T	*buf;
8724 {
8725     if (buf != NULL)
8726     {
8727 	ml_close(buf, TRUE);
8728 	vim_free(buf);
8729     }
8730 }
8731 
8732 
8733 /*
8734  * Create a Vim spell file from one or more word lists.
8735  * "fnames[0]" is the output file name.
8736  * "fnames[fcount - 1]" is the last input file name.
8737  * Exception: when "fnames[0]" ends in ".add" it's used as the input file name
8738  * and ".spl" is appended to make the output file name.
8739  */
8740     static void
8741 mkspell(fcount, fnames, ascii, overwrite, added_word)
8742     int		fcount;
8743     char_u	**fnames;
8744     int		ascii;		    /* -ascii argument given */
8745     int		overwrite;	    /* overwrite existing output file */
8746     int		added_word;	    /* invoked through "zg" */
8747 {
8748     char_u	fname[MAXPATHL];
8749     char_u	wfname[MAXPATHL];
8750     char_u	**innames;
8751     int		incount;
8752     afffile_T	*(afile[8]);
8753     int		i;
8754     int		len;
8755     struct stat	st;
8756     int		error = FALSE;
8757     spellinfo_T spin;
8758 
8759     vim_memset(&spin, 0, sizeof(spin));
8760     spin.si_verbose = !added_word;
8761     spin.si_ascii = ascii;
8762     spin.si_followup = TRUE;
8763     spin.si_rem_accents = TRUE;
8764     ga_init2(&spin.si_rep, (int)sizeof(fromto_T), 20);
8765     ga_init2(&spin.si_repsal, (int)sizeof(fromto_T), 20);
8766     ga_init2(&spin.si_sal, (int)sizeof(fromto_T), 20);
8767     ga_init2(&spin.si_map, (int)sizeof(char_u), 100);
8768     ga_init2(&spin.si_comppat, (int)sizeof(char_u *), 20);
8769     ga_init2(&spin.si_prefcond, (int)sizeof(char_u *), 50);
8770     hash_init(&spin.si_commonwords);
8771     spin.si_newcompID = 127;	/* start compound ID at first maximum */
8772 
8773     /* default: fnames[0] is output file, following are input files */
8774     innames = &fnames[1];
8775     incount = fcount - 1;
8776 
8777     if (fcount >= 1)
8778     {
8779 	len = STRLEN(fnames[0]);
8780 	if (fcount == 1 && len > 4 && STRCMP(fnames[0] + len - 4, ".add") == 0)
8781 	{
8782 	    /* For ":mkspell path/en.latin1.add" output file is
8783 	     * "path/en.latin1.add.spl". */
8784 	    innames = &fnames[0];
8785 	    incount = 1;
8786 	    vim_snprintf((char *)wfname, sizeof(wfname), "%s.spl", fnames[0]);
8787 	}
8788 	else if (fcount == 1)
8789 	{
8790 	    /* For ":mkspell path/vim" output file is "path/vim.latin1.spl". */
8791 	    innames = &fnames[0];
8792 	    incount = 1;
8793 	    vim_snprintf((char *)wfname, sizeof(wfname), "%s.%s.spl", fnames[0],
8794 			     spin.si_ascii ? (char_u *)"ascii" : spell_enc());
8795 	}
8796 	else if (len > 4 && STRCMP(fnames[0] + len - 4, ".spl") == 0)
8797 	{
8798 	    /* Name ends in ".spl", use as the file name. */
8799 	    vim_strncpy(wfname, fnames[0], sizeof(wfname) - 1);
8800 	}
8801 	else
8802 	    /* Name should be language, make the file name from it. */
8803 	    vim_snprintf((char *)wfname, sizeof(wfname), "%s.%s.spl", fnames[0],
8804 			     spin.si_ascii ? (char_u *)"ascii" : spell_enc());
8805 
8806 	/* Check for .ascii.spl. */
8807 	if (strstr((char *)gettail(wfname), ".ascii.") != NULL)
8808 	    spin.si_ascii = TRUE;
8809 
8810 	/* Check for .add.spl. */
8811 	if (strstr((char *)gettail(wfname), ".add.") != NULL)
8812 	    spin.si_add = TRUE;
8813     }
8814 
8815     if (incount <= 0)
8816 	EMSG(_(e_invarg));	/* need at least output and input names */
8817     else if (vim_strchr(gettail(wfname), '_') != NULL)
8818 	EMSG(_("E751: Output file name must not have region name"));
8819     else if (incount > 8)
8820 	EMSG(_("E754: Only up to 8 regions supported"));
8821     else
8822     {
8823 	/* Check for overwriting before doing things that may take a lot of
8824 	 * time. */
8825 	if (!overwrite && mch_stat((char *)wfname, &st) >= 0)
8826 	{
8827 	    EMSG(_(e_exists));
8828 	    return;
8829 	}
8830 	if (mch_isdir(wfname))
8831 	{
8832 	    EMSG2(_(e_isadir2), wfname);
8833 	    return;
8834 	}
8835 
8836 	/*
8837 	 * Init the aff and dic pointers.
8838 	 * Get the region names if there are more than 2 arguments.
8839 	 */
8840 	for (i = 0; i < incount; ++i)
8841 	{
8842 	    afile[i] = NULL;
8843 
8844 	    if (incount > 1)
8845 	    {
8846 		len = STRLEN(innames[i]);
8847 		if (STRLEN(gettail(innames[i])) < 5
8848 						|| innames[i][len - 3] != '_')
8849 		{
8850 		    EMSG2(_("E755: Invalid region in %s"), innames[i]);
8851 		    return;
8852 		}
8853 		spin.si_region_name[i * 2] = TOLOWER_ASC(innames[i][len - 2]);
8854 		spin.si_region_name[i * 2 + 1] =
8855 					     TOLOWER_ASC(innames[i][len - 1]);
8856 	    }
8857 	}
8858 	spin.si_region_count = incount;
8859 
8860 	spin.si_foldroot = wordtree_alloc(&spin);
8861 	spin.si_keeproot = wordtree_alloc(&spin);
8862 	spin.si_prefroot = wordtree_alloc(&spin);
8863 	if (spin.si_foldroot == NULL
8864 		|| spin.si_keeproot == NULL
8865 		|| spin.si_prefroot == NULL)
8866 	{
8867 	    free_blocks(spin.si_blocks);
8868 	    return;
8869 	}
8870 
8871 	/* When not producing a .add.spl file clear the character table when
8872 	 * we encounter one in the .aff file.  This means we dump the current
8873 	 * one in the .spl file if the .aff file doesn't define one.  That's
8874 	 * better than guessing the contents, the table will match a
8875 	 * previously loaded spell file. */
8876 	if (!spin.si_add)
8877 	    spin.si_clear_chartab = TRUE;
8878 
8879 	/*
8880 	 * Read all the .aff and .dic files.
8881 	 * Text is converted to 'encoding'.
8882 	 * Words are stored in the case-folded and keep-case trees.
8883 	 */
8884 	for (i = 0; i < incount && !error; ++i)
8885 	{
8886 	    spin.si_conv.vc_type = CONV_NONE;
8887 	    spin.si_region = 1 << i;
8888 
8889 	    vim_snprintf((char *)fname, sizeof(fname), "%s.aff", innames[i]);
8890 	    if (mch_stat((char *)fname, &st) >= 0)
8891 	    {
8892 		/* Read the .aff file.  Will init "spin->si_conv" based on the
8893 		 * "SET" line. */
8894 		afile[i] = spell_read_aff(&spin, fname);
8895 		if (afile[i] == NULL)
8896 		    error = TRUE;
8897 		else
8898 		{
8899 		    /* Read the .dic file and store the words in the trees. */
8900 		    vim_snprintf((char *)fname, sizeof(fname), "%s.dic",
8901 								  innames[i]);
8902 		    if (spell_read_dic(&spin, fname, afile[i]) == FAIL)
8903 			error = TRUE;
8904 		}
8905 	    }
8906 	    else
8907 	    {
8908 		/* No .aff file, try reading the file as a word list.  Store
8909 		 * the words in the trees. */
8910 		if (spell_read_wordfile(&spin, innames[i]) == FAIL)
8911 		    error = TRUE;
8912 	    }
8913 
8914 #ifdef FEAT_MBYTE
8915 	    /* Free any conversion stuff. */
8916 	    convert_setup(&spin.si_conv, NULL, NULL);
8917 #endif
8918 	}
8919 
8920 	if (spin.si_compflags != NULL && spin.si_nobreak)
8921 	    MSG(_("Warning: both compounding and NOBREAK specified"));
8922 
8923 	if (!error && !got_int)
8924 	{
8925 	    /*
8926 	     * Combine tails in the tree.
8927 	     */
8928 	    spell_message(&spin, (char_u *)_(msg_compressing));
8929 	    wordtree_compress(&spin, spin.si_foldroot);
8930 	    wordtree_compress(&spin, spin.si_keeproot);
8931 	    wordtree_compress(&spin, spin.si_prefroot);
8932 	}
8933 
8934 	if (!error && !got_int)
8935 	{
8936 	    /*
8937 	     * Write the info in the spell file.
8938 	     */
8939 	    vim_snprintf((char *)IObuff, IOSIZE,
8940 				      _("Writing spell file %s ..."), wfname);
8941 	    spell_message(&spin, IObuff);
8942 
8943 	    error = write_vim_spell(&spin, wfname) == FAIL;
8944 
8945 	    spell_message(&spin, (char_u *)_("Done!"));
8946 	    vim_snprintf((char *)IObuff, IOSIZE,
8947 		 _("Estimated runtime memory use: %d bytes"), spin.si_memtot);
8948 	    spell_message(&spin, IObuff);
8949 
8950 	    /*
8951 	     * If the file is loaded need to reload it.
8952 	     */
8953 	    if (!error)
8954 		spell_reload_one(wfname, added_word);
8955 	}
8956 
8957 	/* Free the allocated memory. */
8958 	ga_clear(&spin.si_rep);
8959 	ga_clear(&spin.si_repsal);
8960 	ga_clear(&spin.si_sal);
8961 	ga_clear(&spin.si_map);
8962 	ga_clear(&spin.si_comppat);
8963 	ga_clear(&spin.si_prefcond);
8964 	hash_clear_all(&spin.si_commonwords, 0);
8965 
8966 	/* Free the .aff file structures. */
8967 	for (i = 0; i < incount; ++i)
8968 	    if (afile[i] != NULL)
8969 		spell_free_aff(afile[i]);
8970 
8971 	/* Free all the bits and pieces at once. */
8972 	free_blocks(spin.si_blocks);
8973 
8974 	/*
8975 	 * If there is soundfolding info and no NOSUGFILE item create the
8976 	 * .sug file with the soundfolded word trie.
8977 	 */
8978 	if (spin.si_sugtime != 0 && !error && !got_int)
8979 	    spell_make_sugfile(&spin, wfname);
8980 
8981     }
8982 }
8983 
8984 /*
8985  * Display a message for spell file processing when 'verbose' is set or using
8986  * ":mkspell".  "str" can be IObuff.
8987  */
8988     static void
8989 spell_message(spin, str)
8990     spellinfo_T *spin;
8991     char_u	*str;
8992 {
8993     if (spin->si_verbose || p_verbose > 2)
8994     {
8995 	if (!spin->si_verbose)
8996 	    verbose_enter();
8997 	MSG(str);
8998 	out_flush();
8999 	if (!spin->si_verbose)
9000 	    verbose_leave();
9001     }
9002 }
9003 
9004 /*
9005  * ":[count]spellgood  {word}"
9006  * ":[count]spellwrong  {word}"
9007  * ":[count]spellundo  {word}"
9008  */
9009     void
9010 ex_spell(eap)
9011     exarg_T *eap;
9012 {
9013     spell_add_word(eap->arg, STRLEN(eap->arg), eap->cmdidx == CMD_spellwrong,
9014 				   eap->forceit ? 0 : (int)eap->line2,
9015 				   eap->cmdidx == CMD_spellundo);
9016 }
9017 
9018 /*
9019  * Add "word[len]" to 'spellfile' as a good or bad word.
9020  */
9021     void
9022 spell_add_word(word, len, bad, index, undo)
9023     char_u	*word;
9024     int		len;
9025     int		bad;
9026     int		index;	    /* "zG" and "zW": zero, otherwise index in
9027 			       'spellfile' */
9028     int		undo;	    /* TRUE for "zug", "zuG", "zuw" and "zuW" */
9029 {
9030     FILE	*fd;
9031     buf_T	*buf = NULL;
9032     int		new_spf = FALSE;
9033     char_u	*fname;
9034     char_u	fnamebuf[MAXPATHL];
9035     char_u	line[MAXWLEN * 2];
9036     long	fpos, fpos_next = 0;
9037     int		i;
9038     char_u	*spf;
9039 
9040     if (index == 0)	    /* use internal wordlist */
9041     {
9042 	if (int_wordlist == NULL)
9043 	{
9044 	    int_wordlist = vim_tempname('s');
9045 	    if (int_wordlist == NULL)
9046 		return;
9047 	}
9048 	fname = int_wordlist;
9049     }
9050     else
9051     {
9052 	/* If 'spellfile' isn't set figure out a good default value. */
9053 	if (*curbuf->b_p_spf == NUL)
9054 	{
9055 	    init_spellfile();
9056 	    new_spf = TRUE;
9057 	}
9058 
9059 	if (*curbuf->b_p_spf == NUL)
9060 	{
9061 	    EMSG2(_(e_notset), "spellfile");
9062 	    return;
9063 	}
9064 
9065 	for (spf = curbuf->b_p_spf, i = 1; *spf != NUL; ++i)
9066 	{
9067 	    copy_option_part(&spf, fnamebuf, MAXPATHL, ",");
9068 	    if (i == index)
9069 		break;
9070 	    if (*spf == NUL)
9071 	    {
9072 		EMSGN(_("E765: 'spellfile' does not have %ld entries"), index);
9073 		return;
9074 	    }
9075 	}
9076 
9077 	/* Check that the user isn't editing the .add file somewhere. */
9078 	buf = buflist_findname_exp(fnamebuf);
9079 	if (buf != NULL && buf->b_ml.ml_mfp == NULL)
9080 	    buf = NULL;
9081 	if (buf != NULL && bufIsChanged(buf))
9082 	{
9083 	    EMSG(_(e_bufloaded));
9084 	    return;
9085 	}
9086 
9087 	fname = fnamebuf;
9088     }
9089 
9090     if (bad || undo)
9091     {
9092 	/* When the word appears as good word we need to remove that one,
9093 	 * since its flags sort before the one with WF_BANNED. */
9094 	fd = mch_fopen((char *)fname, "r");
9095 	if (fd != NULL)
9096 	{
9097 	    while (!vim_fgets(line, MAXWLEN * 2, fd))
9098 	    {
9099 		fpos = fpos_next;
9100 		fpos_next = ftell(fd);
9101 		if (STRNCMP(word, line, len) == 0
9102 			&& (line[len] == '/' || line[len] < ' '))
9103 		{
9104 		    /* Found duplicate word.  Remove it by writing a '#' at
9105 		     * the start of the line.  Mixing reading and writing
9106 		     * doesn't work for all systems, close the file first. */
9107 		    fclose(fd);
9108 		    fd = mch_fopen((char *)fname, "r+");
9109 		    if (fd == NULL)
9110 			break;
9111 		    if (fseek(fd, fpos, SEEK_SET) == 0)
9112 		    {
9113 			fputc('#', fd);
9114 			if (undo)
9115 			smsg((char_u *)_("Word removed from %s"), NameBuff);
9116 		    }
9117 		    fseek(fd, fpos_next, SEEK_SET);
9118 		}
9119 	    }
9120 	    fclose(fd);
9121 	}
9122     }
9123 
9124     if (!undo)
9125     {
9126 	fd = mch_fopen((char *)fname, "a");
9127 	if (fd == NULL && new_spf)
9128 	{
9129 	    /* We just initialized the 'spellfile' option and can't open the
9130 	     * file.  We may need to create the "spell" directory first.  We
9131 	     * already checked the runtime directory is writable in
9132 	     * init_spellfile(). */
9133 	    if (!dir_of_file_exists(fname))
9134 	    {
9135 		/* The directory doesn't exist.  Try creating it and opening
9136 		 * the file again. */
9137 		vim_mkdir(NameBuff, 0755);
9138 		fd = mch_fopen((char *)fname, "a");
9139 	    }
9140 	}
9141 
9142 	if (fd == NULL)
9143 	    EMSG2(_(e_notopen), fname);
9144 	else
9145 	{
9146 	    if (bad)
9147 		fprintf(fd, "%.*s/!\n", len, word);
9148 	    else
9149 		fprintf(fd, "%.*s\n", len, word);
9150 	    fclose(fd);
9151 
9152 	    home_replace(NULL, fname, NameBuff, MAXPATHL, TRUE);
9153 	    smsg((char_u *)_("Word added to %s"), NameBuff);
9154 	}
9155     }
9156 
9157     if (fd != NULL)
9158     {
9159 	/* Update the .add.spl file. */
9160 	mkspell(1, &fname, FALSE, TRUE, TRUE);
9161 
9162 	/* If the .add file is edited somewhere, reload it. */
9163 	if (buf != NULL)
9164 	    buf_reload(buf, buf->b_orig_mode);
9165 
9166 	redraw_all_later(SOME_VALID);
9167     }
9168 }
9169 
9170 /*
9171  * Initialize 'spellfile' for the current buffer.
9172  */
9173     static void
9174 init_spellfile()
9175 {
9176     char_u	buf[MAXPATHL];
9177     int		l;
9178     char_u	*fname;
9179     char_u	*rtp;
9180     char_u	*lend;
9181     int		aspath = FALSE;
9182     char_u	*lstart = curbuf->b_p_spl;
9183 
9184     if (*curbuf->b_p_spl != NUL && curbuf->b_langp.ga_len > 0)
9185     {
9186 	/* Find the end of the language name.  Exclude the region.  If there
9187 	 * is a path separator remember the start of the tail. */
9188 	for (lend = curbuf->b_p_spl; *lend != NUL
9189 			&& vim_strchr((char_u *)",._", *lend) == NULL; ++lend)
9190 	    if (vim_ispathsep(*lend))
9191 	    {
9192 		aspath = TRUE;
9193 		lstart = lend + 1;
9194 	    }
9195 
9196 	/* Loop over all entries in 'runtimepath'.  Use the first one where we
9197 	 * are allowed to write. */
9198 	rtp = p_rtp;
9199 	while (*rtp != NUL)
9200 	{
9201 	    if (aspath)
9202 		/* Use directory of an entry with path, e.g., for
9203 		 * "/dir/lg.utf-8.spl" use "/dir". */
9204 		vim_strncpy(buf, curbuf->b_p_spl, lstart - curbuf->b_p_spl - 1);
9205 	    else
9206 		/* Copy the path from 'runtimepath' to buf[]. */
9207 		copy_option_part(&rtp, buf, MAXPATHL, ",");
9208 	    if (filewritable(buf) == 2)
9209 	    {
9210 		/* Use the first language name from 'spelllang' and the
9211 		 * encoding used in the first loaded .spl file. */
9212 		if (aspath)
9213 		    vim_strncpy(buf, curbuf->b_p_spl, lend - curbuf->b_p_spl);
9214 		else
9215 		{
9216 		    l = STRLEN(buf);
9217 		    vim_snprintf((char *)buf + l, MAXPATHL - l,
9218 				 "/spell/%.*s", (int)(lend - lstart), lstart);
9219 		}
9220 		l = STRLEN(buf);
9221 		fname = LANGP_ENTRY(curbuf->b_langp, 0)->lp_slang->sl_fname;
9222 		vim_snprintf((char *)buf + l, MAXPATHL - l, ".%s.add",
9223 			fname != NULL
9224 			  && strstr((char *)gettail(fname), ".ascii.") != NULL
9225 				       ? (char_u *)"ascii" : spell_enc());
9226 		set_option_value((char_u *)"spellfile", 0L, buf, OPT_LOCAL);
9227 		break;
9228 	    }
9229 	    aspath = FALSE;
9230 	}
9231     }
9232 }
9233 
9234 
9235 /*
9236  * Init the chartab used for spelling for ASCII.
9237  * EBCDIC is not supported!
9238  */
9239     static void
9240 clear_spell_chartab(sp)
9241     spelltab_T	*sp;
9242 {
9243     int		i;
9244 
9245     /* Init everything to FALSE. */
9246     vim_memset(sp->st_isw, FALSE, sizeof(sp->st_isw));
9247     vim_memset(sp->st_isu, FALSE, sizeof(sp->st_isu));
9248     for (i = 0; i < 256; ++i)
9249     {
9250 	sp->st_fold[i] = i;
9251 	sp->st_upper[i] = i;
9252     }
9253 
9254     /* We include digits.  A word shouldn't start with a digit, but handling
9255      * that is done separately. */
9256     for (i = '0'; i <= '9'; ++i)
9257 	sp->st_isw[i] = TRUE;
9258     for (i = 'A'; i <= 'Z'; ++i)
9259     {
9260 	sp->st_isw[i] = TRUE;
9261 	sp->st_isu[i] = TRUE;
9262 	sp->st_fold[i] = i + 0x20;
9263     }
9264     for (i = 'a'; i <= 'z'; ++i)
9265     {
9266 	sp->st_isw[i] = TRUE;
9267 	sp->st_upper[i] = i - 0x20;
9268     }
9269 }
9270 
9271 /*
9272  * Init the chartab used for spelling.  Only depends on 'encoding'.
9273  * Called once while starting up and when 'encoding' changes.
9274  * The default is to use isalpha(), but the spell file should define the word
9275  * characters to make it possible that 'encoding' differs from the current
9276  * locale.  For utf-8 we don't use isalpha() but our own functions.
9277  */
9278     void
9279 init_spell_chartab()
9280 {
9281     int	    i;
9282 
9283     did_set_spelltab = FALSE;
9284     clear_spell_chartab(&spelltab);
9285 #ifdef FEAT_MBYTE
9286     if (enc_dbcs)
9287     {
9288 	/* DBCS: assume double-wide characters are word characters. */
9289 	for (i = 128; i <= 255; ++i)
9290 	    if (MB_BYTE2LEN(i) == 2)
9291 		spelltab.st_isw[i] = TRUE;
9292     }
9293     else if (enc_utf8)
9294     {
9295 	for (i = 128; i < 256; ++i)
9296 	{
9297 	    spelltab.st_isu[i] = utf_isupper(i);
9298 	    spelltab.st_isw[i] = spelltab.st_isu[i] || utf_islower(i);
9299 	    spelltab.st_fold[i] = utf_fold(i);
9300 	    spelltab.st_upper[i] = utf_toupper(i);
9301 	}
9302     }
9303     else
9304 #endif
9305     {
9306 	/* Rough guess: use locale-dependent library functions. */
9307 	for (i = 128; i < 256; ++i)
9308 	{
9309 	    if (MB_ISUPPER(i))
9310 	    {
9311 		spelltab.st_isw[i] = TRUE;
9312 		spelltab.st_isu[i] = TRUE;
9313 		spelltab.st_fold[i] = MB_TOLOWER(i);
9314 	    }
9315 	    else if (MB_ISLOWER(i))
9316 	    {
9317 		spelltab.st_isw[i] = TRUE;
9318 		spelltab.st_upper[i] = MB_TOUPPER(i);
9319 	    }
9320 	}
9321     }
9322 }
9323 
9324 /*
9325  * Set the spell character tables from strings in the affix file.
9326  */
9327     static int
9328 set_spell_chartab(fol, low, upp)
9329     char_u	*fol;
9330     char_u	*low;
9331     char_u	*upp;
9332 {
9333     /* We build the new tables here first, so that we can compare with the
9334      * previous one. */
9335     spelltab_T	new_st;
9336     char_u	*pf = fol, *pl = low, *pu = upp;
9337     int		f, l, u;
9338 
9339     clear_spell_chartab(&new_st);
9340 
9341     while (*pf != NUL)
9342     {
9343 	if (*pl == NUL || *pu == NUL)
9344 	{
9345 	    EMSG(_(e_affform));
9346 	    return FAIL;
9347 	}
9348 #ifdef FEAT_MBYTE
9349 	f = mb_ptr2char_adv(&pf);
9350 	l = mb_ptr2char_adv(&pl);
9351 	u = mb_ptr2char_adv(&pu);
9352 #else
9353 	f = *pf++;
9354 	l = *pl++;
9355 	u = *pu++;
9356 #endif
9357 	/* Every character that appears is a word character. */
9358 	if (f < 256)
9359 	    new_st.st_isw[f] = TRUE;
9360 	if (l < 256)
9361 	    new_st.st_isw[l] = TRUE;
9362 	if (u < 256)
9363 	    new_st.st_isw[u] = TRUE;
9364 
9365 	/* if "LOW" and "FOL" are not the same the "LOW" char needs
9366 	 * case-folding */
9367 	if (l < 256 && l != f)
9368 	{
9369 	    if (f >= 256)
9370 	    {
9371 		EMSG(_(e_affrange));
9372 		return FAIL;
9373 	    }
9374 	    new_st.st_fold[l] = f;
9375 	}
9376 
9377 	/* if "UPP" and "FOL" are not the same the "UPP" char needs
9378 	 * case-folding, it's upper case and the "UPP" is the upper case of
9379 	 * "FOL" . */
9380 	if (u < 256 && u != f)
9381 	{
9382 	    if (f >= 256)
9383 	    {
9384 		EMSG(_(e_affrange));
9385 		return FAIL;
9386 	    }
9387 	    new_st.st_fold[u] = f;
9388 	    new_st.st_isu[u] = TRUE;
9389 	    new_st.st_upper[f] = u;
9390 	}
9391     }
9392 
9393     if (*pl != NUL || *pu != NUL)
9394     {
9395 	EMSG(_(e_affform));
9396 	return FAIL;
9397     }
9398 
9399     return set_spell_finish(&new_st);
9400 }
9401 
9402 /*
9403  * Set the spell character tables from strings in the .spl file.
9404  */
9405     static void
9406 set_spell_charflags(flags, cnt, fol)
9407     char_u	*flags;
9408     int		cnt;	    /* length of "flags" */
9409     char_u	*fol;
9410 {
9411     /* We build the new tables here first, so that we can compare with the
9412      * previous one. */
9413     spelltab_T	new_st;
9414     int		i;
9415     char_u	*p = fol;
9416     int		c;
9417 
9418     clear_spell_chartab(&new_st);
9419 
9420     for (i = 0; i < 128; ++i)
9421     {
9422 	if (i < cnt)
9423 	{
9424 	    new_st.st_isw[i + 128] = (flags[i] & CF_WORD) != 0;
9425 	    new_st.st_isu[i + 128] = (flags[i] & CF_UPPER) != 0;
9426 	}
9427 
9428 	if (*p != NUL)
9429 	{
9430 #ifdef FEAT_MBYTE
9431 	    c = mb_ptr2char_adv(&p);
9432 #else
9433 	    c = *p++;
9434 #endif
9435 	    new_st.st_fold[i + 128] = c;
9436 	    if (i + 128 != c && new_st.st_isu[i + 128] && c < 256)
9437 		new_st.st_upper[c] = i + 128;
9438 	}
9439     }
9440 
9441     (void)set_spell_finish(&new_st);
9442 }
9443 
9444     static int
9445 set_spell_finish(new_st)
9446     spelltab_T	*new_st;
9447 {
9448     int		i;
9449 
9450     if (did_set_spelltab)
9451     {
9452 	/* check that it's the same table */
9453 	for (i = 0; i < 256; ++i)
9454 	{
9455 	    if (spelltab.st_isw[i] != new_st->st_isw[i]
9456 		    || spelltab.st_isu[i] != new_st->st_isu[i]
9457 		    || spelltab.st_fold[i] != new_st->st_fold[i]
9458 		    || spelltab.st_upper[i] != new_st->st_upper[i])
9459 	    {
9460 		EMSG(_("E763: Word characters differ between spell files"));
9461 		return FAIL;
9462 	    }
9463 	}
9464     }
9465     else
9466     {
9467 	/* copy the new spelltab into the one being used */
9468 	spelltab = *new_st;
9469 	did_set_spelltab = TRUE;
9470     }
9471 
9472     return OK;
9473 }
9474 
9475 /*
9476  * Return TRUE if "p" points to a word character.
9477  * As a special case we see "midword" characters as word character when it is
9478  * followed by a word character.  This finds they'there but not 'they there'.
9479  * Thus this only works properly when past the first character of the word.
9480  */
9481     static int
9482 spell_iswordp(p, buf)
9483     char_u	*p;
9484     buf_T	*buf;	    /* buffer used */
9485 {
9486 #ifdef FEAT_MBYTE
9487     char_u	*s;
9488     int		l;
9489     int		c;
9490 
9491     if (has_mbyte)
9492     {
9493 	l = MB_BYTE2LEN(*p);
9494 	s = p;
9495 	if (l == 1)
9496 	{
9497 	    /* be quick for ASCII */
9498 	    if (buf->b_spell_ismw[*p])
9499 	    {
9500 		s = p + 1;		/* skip a mid-word character */
9501 		l = MB_BYTE2LEN(*s);
9502 	    }
9503 	}
9504 	else
9505 	{
9506 	    c = mb_ptr2char(p);
9507 	    if (c < 256 ? buf->b_spell_ismw[c]
9508 		    : (buf->b_spell_ismw_mb != NULL
9509 			   && vim_strchr(buf->b_spell_ismw_mb, c) != NULL))
9510 	    {
9511 		s = p + l;
9512 		l = MB_BYTE2LEN(*s);
9513 	    }
9514 	}
9515 
9516 	c = mb_ptr2char(s);
9517 	if (c > 255)
9518 	    return mb_get_class(s) >= 2;
9519 	return spelltab.st_isw[c];
9520     }
9521 #endif
9522 
9523     return spelltab.st_isw[buf->b_spell_ismw[*p] ? p[1] : p[0]];
9524 }
9525 
9526 /*
9527  * Return TRUE if "p" points to a word character.
9528  * Unlike spell_iswordp() this doesn't check for "midword" characters.
9529  */
9530     static int
9531 spell_iswordp_nmw(p)
9532     char_u	*p;
9533 {
9534 #ifdef FEAT_MBYTE
9535     int		c;
9536 
9537     if (has_mbyte)
9538     {
9539 	c = mb_ptr2char(p);
9540 	if (c > 255)
9541 	    return mb_get_class(p) >= 2;
9542 	return spelltab.st_isw[c];
9543     }
9544 #endif
9545     return spelltab.st_isw[*p];
9546 }
9547 
9548 #ifdef FEAT_MBYTE
9549 /*
9550  * Return TRUE if "p" points to a word character.
9551  * Wide version of spell_iswordp().
9552  */
9553     static int
9554 spell_iswordp_w(p, buf)
9555     int		*p;
9556     buf_T	*buf;
9557 {
9558     int		*s;
9559 
9560     if (*p < 256 ? buf->b_spell_ismw[*p]
9561 		 : (buf->b_spell_ismw_mb != NULL
9562 			     && vim_strchr(buf->b_spell_ismw_mb, *p) != NULL))
9563 	s = p + 1;
9564     else
9565 	s = p;
9566 
9567     if (*s > 255)
9568     {
9569 	if (enc_utf8)
9570 	    return utf_class(*s) >= 2;
9571 	if (enc_dbcs)
9572 	    return dbcs_class((unsigned)*s >> 8, *s & 0xff) >= 2;
9573 	return 0;
9574     }
9575     return spelltab.st_isw[*s];
9576 }
9577 #endif
9578 
9579 /*
9580  * Write the table with prefix conditions to the .spl file.
9581  * When "fd" is NULL only count the length of what is written.
9582  */
9583     static int
9584 write_spell_prefcond(fd, gap)
9585     FILE	*fd;
9586     garray_T	*gap;
9587 {
9588     int		i;
9589     char_u	*p;
9590     int		len;
9591     int		totlen;
9592 
9593     if (fd != NULL)
9594 	put_bytes(fd, (long_u)gap->ga_len, 2);	    /* <prefcondcnt> */
9595 
9596     totlen = 2 + gap->ga_len; /* length of <prefcondcnt> and <condlen> bytes */
9597 
9598     for (i = 0; i < gap->ga_len; ++i)
9599     {
9600 	/* <prefcond> : <condlen> <condstr> */
9601 	p = ((char_u **)gap->ga_data)[i];
9602 	if (p != NULL)
9603 	{
9604 	    len = STRLEN(p);
9605 	    if (fd != NULL)
9606 	    {
9607 		fputc(len, fd);
9608 		fwrite(p, (size_t)len, (size_t)1, fd);
9609 	    }
9610 	    totlen += len;
9611 	}
9612 	else if (fd != NULL)
9613 	    fputc(0, fd);
9614     }
9615 
9616     return totlen;
9617 }
9618 
9619 /*
9620  * Case-fold "str[len]" into "buf[buflen]".  The result is NUL terminated.
9621  * Uses the character definitions from the .spl file.
9622  * When using a multi-byte 'encoding' the length may change!
9623  * Returns FAIL when something wrong.
9624  */
9625     static int
9626 spell_casefold(str, len, buf, buflen)
9627     char_u	*str;
9628     int		len;
9629     char_u	*buf;
9630     int		buflen;
9631 {
9632     int		i;
9633 
9634     if (len >= buflen)
9635     {
9636 	buf[0] = NUL;
9637 	return FAIL;		/* result will not fit */
9638     }
9639 
9640 #ifdef FEAT_MBYTE
9641     if (has_mbyte)
9642     {
9643 	int	outi = 0;
9644 	char_u	*p;
9645 	int	c;
9646 
9647 	/* Fold one character at a time. */
9648 	for (p = str; p < str + len; )
9649 	{
9650 	    if (outi + MB_MAXBYTES > buflen)
9651 	    {
9652 		buf[outi] = NUL;
9653 		return FAIL;
9654 	    }
9655 	    c = mb_cptr2char_adv(&p);
9656 	    outi += mb_char2bytes(SPELL_TOFOLD(c), buf + outi);
9657 	}
9658 	buf[outi] = NUL;
9659     }
9660     else
9661 #endif
9662     {
9663 	/* Be quick for non-multibyte encodings. */
9664 	for (i = 0; i < len; ++i)
9665 	    buf[i] = spelltab.st_fold[str[i]];
9666 	buf[i] = NUL;
9667     }
9668 
9669     return OK;
9670 }
9671 
9672 /* values for sps_flags */
9673 #define SPS_BEST    1
9674 #define SPS_FAST    2
9675 #define SPS_DOUBLE  4
9676 
9677 static int sps_flags = SPS_BEST;	/* flags from 'spellsuggest' */
9678 static int sps_limit = 9999;		/* max nr of suggestions given */
9679 
9680 /*
9681  * Check the 'spellsuggest' option.  Return FAIL if it's wrong.
9682  * Sets "sps_flags" and "sps_limit".
9683  */
9684     int
9685 spell_check_sps()
9686 {
9687     char_u	*p;
9688     char_u	*s;
9689     char_u	buf[MAXPATHL];
9690     int		f;
9691 
9692     sps_flags = 0;
9693     sps_limit = 9999;
9694 
9695     for (p = p_sps; *p != NUL; )
9696     {
9697 	copy_option_part(&p, buf, MAXPATHL, ",");
9698 
9699 	f = 0;
9700 	if (VIM_ISDIGIT(*buf))
9701 	{
9702 	    s = buf;
9703 	    sps_limit = getdigits(&s);
9704 	    if (*s != NUL && !VIM_ISDIGIT(*s))
9705 		f = -1;
9706 	}
9707 	else if (STRCMP(buf, "best") == 0)
9708 	    f = SPS_BEST;
9709 	else if (STRCMP(buf, "fast") == 0)
9710 	    f = SPS_FAST;
9711 	else if (STRCMP(buf, "double") == 0)
9712 	    f = SPS_DOUBLE;
9713 	else if (STRNCMP(buf, "expr:", 5) != 0
9714 		&& STRNCMP(buf, "file:", 5) != 0)
9715 	    f = -1;
9716 
9717 	if (f == -1 || (sps_flags != 0 && f != 0))
9718 	{
9719 	    sps_flags = SPS_BEST;
9720 	    sps_limit = 9999;
9721 	    return FAIL;
9722 	}
9723 	if (f != 0)
9724 	    sps_flags = f;
9725     }
9726 
9727     if (sps_flags == 0)
9728 	sps_flags = SPS_BEST;
9729 
9730     return OK;
9731 }
9732 
9733 /*
9734  * "z?": Find badly spelled word under or after the cursor.
9735  * Give suggestions for the properly spelled word.
9736  * In Visual mode use the highlighted word as the bad word.
9737  * When "count" is non-zero use that suggestion.
9738  */
9739     void
9740 spell_suggest(count)
9741     int		count;
9742 {
9743     char_u	*line;
9744     pos_T	prev_cursor = curwin->w_cursor;
9745     char_u	wcopy[MAXWLEN + 2];
9746     char_u	*p;
9747     int		i;
9748     int		c;
9749     suginfo_T	sug;
9750     suggest_T	*stp;
9751     int		mouse_used;
9752     int		need_cap;
9753     int		limit;
9754     int		selected = count;
9755     int		badlen = 0;
9756 
9757     if (no_spell_checking(curwin))
9758 	return;
9759 
9760 #ifdef FEAT_VISUAL
9761     if (VIsual_active)
9762     {
9763 	/* Use the Visually selected text as the bad word.  But reject
9764 	 * a multi-line selection. */
9765 	if (curwin->w_cursor.lnum != VIsual.lnum)
9766 	{
9767 	    vim_beep();
9768 	    return;
9769 	}
9770 	badlen = (int)curwin->w_cursor.col - (int)VIsual.col;
9771 	if (badlen < 0)
9772 	    badlen = -badlen;
9773 	else
9774 	    curwin->w_cursor.col = VIsual.col;
9775 	++badlen;
9776 	end_visual_mode();
9777     }
9778     else
9779 #endif
9780 	/* Find the start of the badly spelled word. */
9781 	if (spell_move_to(curwin, FORWARD, TRUE, TRUE, NULL) == 0
9782 	    || curwin->w_cursor.col > prev_cursor.col)
9783     {
9784 	/* No bad word or it starts after the cursor: use the word under the
9785 	 * cursor. */
9786 	curwin->w_cursor = prev_cursor;
9787 	line = ml_get_curline();
9788 	p = line + curwin->w_cursor.col;
9789 	/* Backup to before start of word. */
9790 	while (p > line && spell_iswordp_nmw(p))
9791 	    mb_ptr_back(line, p);
9792 	/* Forward to start of word. */
9793 	while (*p != NUL && !spell_iswordp_nmw(p))
9794 	    mb_ptr_adv(p);
9795 
9796 	if (!spell_iswordp_nmw(p))		/* No word found. */
9797 	{
9798 	    beep_flush();
9799 	    return;
9800 	}
9801 	curwin->w_cursor.col = p - line;
9802     }
9803 
9804     /* Get the word and its length. */
9805 
9806     /* Figure out if the word should be capitalised. */
9807     need_cap = check_need_cap(curwin->w_cursor.lnum, curwin->w_cursor.col);
9808 
9809     line = ml_get_curline();
9810 
9811     /* Get the list of suggestions.  Limit to 'lines' - 2 or the number in
9812      * 'spellsuggest', whatever is smaller. */
9813     if (sps_limit > (int)Rows - 2)
9814 	limit = (int)Rows - 2;
9815     else
9816 	limit = sps_limit;
9817     spell_find_suggest(line + curwin->w_cursor.col, badlen, &sug, limit,
9818 							TRUE, need_cap, TRUE);
9819 
9820     if (sug.su_ga.ga_len == 0)
9821 	MSG(_("Sorry, no suggestions"));
9822     else if (count > 0)
9823     {
9824 	if (count > sug.su_ga.ga_len)
9825 	    smsg((char_u *)_("Sorry, only %ld suggestions"),
9826 						      (long)sug.su_ga.ga_len);
9827     }
9828     else
9829     {
9830 	vim_free(repl_from);
9831 	repl_from = NULL;
9832 	vim_free(repl_to);
9833 	repl_to = NULL;
9834 
9835 #ifdef FEAT_RIGHTLEFT
9836 	/* When 'rightleft' is set the list is drawn right-left. */
9837 	cmdmsg_rl = curwin->w_p_rl;
9838 	if (cmdmsg_rl)
9839 	    msg_col = Columns - 1;
9840 #endif
9841 
9842 	/* List the suggestions. */
9843 	msg_start();
9844 	lines_left = Rows;	/* avoid more prompt */
9845 	vim_snprintf((char *)IObuff, IOSIZE, _("Change \"%.*s\" to:"),
9846 						sug.su_badlen, sug.su_badptr);
9847 #ifdef FEAT_RIGHTLEFT
9848 	if (cmdmsg_rl && STRNCMP(IObuff, "Change", 6) == 0)
9849 	{
9850 	    /* And now the rabbit from the high hat: Avoid showing the
9851 	     * untranslated message rightleft. */
9852 	    vim_snprintf((char *)IObuff, IOSIZE, ":ot \"%.*s\" egnahC",
9853 						sug.su_badlen, sug.su_badptr);
9854 	}
9855 #endif
9856 	msg_puts(IObuff);
9857 	msg_clr_eos();
9858 	msg_putchar('\n');
9859 
9860 	msg_scroll = TRUE;
9861 	for (i = 0; i < sug.su_ga.ga_len; ++i)
9862 	{
9863 	    stp = &SUG(sug.su_ga, i);
9864 
9865 	    /* The suggested word may replace only part of the bad word, add
9866 	     * the not replaced part. */
9867 	    STRCPY(wcopy, stp->st_word);
9868 	    if (sug.su_badlen > stp->st_orglen)
9869 		vim_strncpy(wcopy + stp->st_wordlen,
9870 					       sug.su_badptr + stp->st_orglen,
9871 					      sug.su_badlen - stp->st_orglen);
9872 	    vim_snprintf((char *)IObuff, IOSIZE, "%2d", i + 1);
9873 #ifdef FEAT_RIGHTLEFT
9874 	    if (cmdmsg_rl)
9875 		rl_mirror(IObuff);
9876 #endif
9877 	    msg_puts(IObuff);
9878 
9879 	    vim_snprintf((char *)IObuff, IOSIZE, " \"%s\"", wcopy);
9880 	    msg_puts(IObuff);
9881 
9882 	    /* The word may replace more than "su_badlen". */
9883 	    if (sug.su_badlen < stp->st_orglen)
9884 	    {
9885 		vim_snprintf((char *)IObuff, IOSIZE, _(" < \"%.*s\""),
9886 					       stp->st_orglen, sug.su_badptr);
9887 		msg_puts(IObuff);
9888 	    }
9889 
9890 	    if (p_verbose > 0)
9891 	    {
9892 		/* Add the score. */
9893 		if (sps_flags & (SPS_DOUBLE | SPS_BEST))
9894 		    vim_snprintf((char *)IObuff, IOSIZE, " (%s%d - %d)",
9895 			stp->st_salscore ? "s " : "",
9896 			stp->st_score, stp->st_altscore);
9897 		else
9898 		    vim_snprintf((char *)IObuff, IOSIZE, " (%d)",
9899 			    stp->st_score);
9900 #ifdef FEAT_RIGHTLEFT
9901 		if (cmdmsg_rl)
9902 		    /* Mirror the numbers, but keep the leading space. */
9903 		    rl_mirror(IObuff + 1);
9904 #endif
9905 		msg_advance(30);
9906 		msg_puts(IObuff);
9907 	    }
9908 	    msg_putchar('\n');
9909 	}
9910 
9911 #ifdef FEAT_RIGHTLEFT
9912 	cmdmsg_rl = FALSE;
9913 	msg_col = 0;
9914 #endif
9915 	/* Ask for choice. */
9916 	selected = prompt_for_number(&mouse_used);
9917 	if (mouse_used)
9918 	    selected -= lines_left;
9919 	lines_left = Rows;	/* avoid more prompt */
9920     }
9921 
9922     if (selected > 0 && selected <= sug.su_ga.ga_len && u_save_cursor() == OK)
9923     {
9924 	/* Save the from and to text for :spellrepall. */
9925 	stp = &SUG(sug.su_ga, selected - 1);
9926 	if (sug.su_badlen > stp->st_orglen)
9927 	{
9928 	    /* Replacing less than "su_badlen", append the remainder to
9929 	     * repl_to. */
9930 	    repl_from = vim_strnsave(sug.su_badptr, sug.su_badlen);
9931 	    vim_snprintf((char *)IObuff, IOSIZE, "%s%.*s", stp->st_word,
9932 		    sug.su_badlen - stp->st_orglen,
9933 					      sug.su_badptr + stp->st_orglen);
9934 	    repl_to = vim_strsave(IObuff);
9935 	}
9936 	else
9937 	{
9938 	    /* Replacing su_badlen or more, use the whole word. */
9939 	    repl_from = vim_strnsave(sug.su_badptr, stp->st_orglen);
9940 	    repl_to = vim_strsave(stp->st_word);
9941 	}
9942 
9943 	/* Replace the word. */
9944 	p = alloc(STRLEN(line) - stp->st_orglen + stp->st_wordlen + 1);
9945 	if (p != NULL)
9946 	{
9947 	    c = sug.su_badptr - line;
9948 	    mch_memmove(p, line, c);
9949 	    STRCPY(p + c, stp->st_word);
9950 	    STRCAT(p, sug.su_badptr + stp->st_orglen);
9951 	    ml_replace(curwin->w_cursor.lnum, p, FALSE);
9952 	    curwin->w_cursor.col = c;
9953 	    changed_bytes(curwin->w_cursor.lnum, c);
9954 
9955 	    /* For redo we use a change-word command. */
9956 	    ResetRedobuff();
9957 	    AppendToRedobuff((char_u *)"ciw");
9958 	    AppendToRedobuffLit(p + c,
9959 			    stp->st_wordlen + sug.su_badlen - stp->st_orglen);
9960 	    AppendCharToRedobuff(ESC);
9961 	}
9962     }
9963     else
9964 	curwin->w_cursor = prev_cursor;
9965 
9966     spell_find_cleanup(&sug);
9967 }
9968 
9969 /*
9970  * Check if the word at line "lnum" column "col" is required to start with a
9971  * capital.  This uses 'spellcapcheck' of the current buffer.
9972  */
9973     static int
9974 check_need_cap(lnum, col)
9975     linenr_T	lnum;
9976     colnr_T	col;
9977 {
9978     int		need_cap = FALSE;
9979     char_u	*line;
9980     char_u	*line_copy = NULL;
9981     char_u	*p;
9982     colnr_T	endcol;
9983     regmatch_T	regmatch;
9984 
9985     if (curbuf->b_cap_prog == NULL)
9986 	return FALSE;
9987 
9988     line = ml_get_curline();
9989     endcol = 0;
9990     if ((int)(skipwhite(line) - line) >= (int)col)
9991     {
9992 	/* At start of line, check if previous line is empty or sentence
9993 	 * ends there. */
9994 	if (lnum == 1)
9995 	    need_cap = TRUE;
9996 	else
9997 	{
9998 	    line = ml_get(lnum - 1);
9999 	    if (*skipwhite(line) == NUL)
10000 		need_cap = TRUE;
10001 	    else
10002 	    {
10003 		/* Append a space in place of the line break. */
10004 		line_copy = concat_str(line, (char_u *)" ");
10005 		line = line_copy;
10006 		endcol = STRLEN(line);
10007 	    }
10008 	}
10009     }
10010     else
10011 	endcol = col;
10012 
10013     if (endcol > 0)
10014     {
10015 	/* Check if sentence ends before the bad word. */
10016 	regmatch.regprog = curbuf->b_cap_prog;
10017 	regmatch.rm_ic = FALSE;
10018 	p = line + endcol;
10019 	for (;;)
10020 	{
10021 	    mb_ptr_back(line, p);
10022 	    if (p == line || spell_iswordp_nmw(p))
10023 		break;
10024 	    if (vim_regexec(&regmatch, p, 0)
10025 					 && regmatch.endp[0] == line + endcol)
10026 	    {
10027 		need_cap = TRUE;
10028 		break;
10029 	    }
10030 	}
10031     }
10032 
10033     vim_free(line_copy);
10034 
10035     return need_cap;
10036 }
10037 
10038 
10039 /*
10040  * ":spellrepall"
10041  */
10042 /*ARGSUSED*/
10043     void
10044 ex_spellrepall(eap)
10045     exarg_T *eap;
10046 {
10047     pos_T	pos = curwin->w_cursor;
10048     char_u	*frompat;
10049     int		addlen;
10050     char_u	*line;
10051     char_u	*p;
10052     int		save_ws = p_ws;
10053     linenr_T	prev_lnum = 0;
10054 
10055     if (repl_from == NULL || repl_to == NULL)
10056     {
10057 	EMSG(_("E752: No previous spell replacement"));
10058 	return;
10059     }
10060     addlen = STRLEN(repl_to) - STRLEN(repl_from);
10061 
10062     frompat = alloc(STRLEN(repl_from) + 7);
10063     if (frompat == NULL)
10064 	return;
10065     sprintf((char *)frompat, "\\V\\<%s\\>", repl_from);
10066     p_ws = FALSE;
10067 
10068     sub_nsubs = 0;
10069     sub_nlines = 0;
10070     curwin->w_cursor.lnum = 0;
10071     while (!got_int)
10072     {
10073 	if (do_search(NULL, '/', frompat, 1L, SEARCH_KEEP) == 0
10074 						   || u_save_cursor() == FAIL)
10075 	    break;
10076 
10077 	/* Only replace when the right word isn't there yet.  This happens
10078 	 * when changing "etc" to "etc.". */
10079 	line = ml_get_curline();
10080 	if (addlen <= 0 || STRNCMP(line + curwin->w_cursor.col,
10081 					       repl_to, STRLEN(repl_to)) != 0)
10082 	{
10083 	    p = alloc(STRLEN(line) + addlen + 1);
10084 	    if (p == NULL)
10085 		break;
10086 	    mch_memmove(p, line, curwin->w_cursor.col);
10087 	    STRCPY(p + curwin->w_cursor.col, repl_to);
10088 	    STRCAT(p, line + curwin->w_cursor.col + STRLEN(repl_from));
10089 	    ml_replace(curwin->w_cursor.lnum, p, FALSE);
10090 	    changed_bytes(curwin->w_cursor.lnum, curwin->w_cursor.col);
10091 
10092 	    if (curwin->w_cursor.lnum != prev_lnum)
10093 	    {
10094 		++sub_nlines;
10095 		prev_lnum = curwin->w_cursor.lnum;
10096 	    }
10097 	    ++sub_nsubs;
10098 	}
10099 	curwin->w_cursor.col += STRLEN(repl_to);
10100     }
10101 
10102     p_ws = save_ws;
10103     curwin->w_cursor = pos;
10104     vim_free(frompat);
10105 
10106     if (sub_nsubs == 0)
10107 	EMSG2(_("E753: Not found: %s"), repl_from);
10108     else
10109 	do_sub_msg(FALSE);
10110 }
10111 
10112 /*
10113  * Find spell suggestions for "word".  Return them in the growarray "*gap" as
10114  * a list of allocated strings.
10115  */
10116     void
10117 spell_suggest_list(gap, word, maxcount, need_cap, interactive)
10118     garray_T	*gap;
10119     char_u	*word;
10120     int		maxcount;	/* maximum nr of suggestions */
10121     int		need_cap;	/* 'spellcapcheck' matched */
10122     int		interactive;
10123 {
10124     suginfo_T	sug;
10125     int		i;
10126     suggest_T	*stp;
10127     char_u	*wcopy;
10128 
10129     spell_find_suggest(word, 0, &sug, maxcount, FALSE, need_cap, interactive);
10130 
10131     /* Make room in "gap". */
10132     ga_init2(gap, sizeof(char_u *), sug.su_ga.ga_len + 1);
10133     if (ga_grow(gap, sug.su_ga.ga_len) == OK)
10134     {
10135 	for (i = 0; i < sug.su_ga.ga_len; ++i)
10136 	{
10137 	    stp = &SUG(sug.su_ga, i);
10138 
10139 	    /* The suggested word may replace only part of "word", add the not
10140 	     * replaced part. */
10141 	    wcopy = alloc(stp->st_wordlen
10142 				+ STRLEN(sug.su_badptr + stp->st_orglen) + 1);
10143 	    if (wcopy == NULL)
10144 		break;
10145 	    STRCPY(wcopy, stp->st_word);
10146 	    STRCPY(wcopy + stp->st_wordlen, sug.su_badptr + stp->st_orglen);
10147 	    ((char_u **)gap->ga_data)[gap->ga_len++] = wcopy;
10148 	}
10149     }
10150 
10151     spell_find_cleanup(&sug);
10152 }
10153 
10154 /*
10155  * Find spell suggestions for the word at the start of "badptr".
10156  * Return the suggestions in "su->su_ga".
10157  * The maximum number of suggestions is "maxcount".
10158  * Note: does use info for the current window.
10159  * This is based on the mechanisms of Aspell, but completely reimplemented.
10160  */
10161     static void
10162 spell_find_suggest(badptr, badlen, su, maxcount, banbadword, need_cap, interactive)
10163     char_u	*badptr;
10164     int		badlen;		/* length of bad word or 0 if unknown */
10165     suginfo_T	*su;
10166     int		maxcount;
10167     int		banbadword;	/* don't include badword in suggestions */
10168     int		need_cap;	/* word should start with capital */
10169     int		interactive;
10170 {
10171     hlf_T	attr = HLF_COUNT;
10172     char_u	buf[MAXPATHL];
10173     char_u	*p;
10174     int		do_combine = FALSE;
10175     char_u	*sps_copy;
10176 #ifdef FEAT_EVAL
10177     static int	expr_busy = FALSE;
10178 #endif
10179     int		c;
10180     int		i;
10181     langp_T	*lp;
10182 
10183     /*
10184      * Set the info in "*su".
10185      */
10186     vim_memset(su, 0, sizeof(suginfo_T));
10187     ga_init2(&su->su_ga, (int)sizeof(suggest_T), 10);
10188     ga_init2(&su->su_sga, (int)sizeof(suggest_T), 10);
10189     if (*badptr == NUL)
10190 	return;
10191     hash_init(&su->su_banned);
10192 
10193     su->su_badptr = badptr;
10194     if (badlen != 0)
10195 	su->su_badlen = badlen;
10196     else
10197 	su->su_badlen = spell_check(curwin, su->su_badptr, &attr, NULL, FALSE);
10198     su->su_maxcount = maxcount;
10199     su->su_maxscore = SCORE_MAXINIT;
10200 
10201     if (su->su_badlen >= MAXWLEN)
10202 	su->su_badlen = MAXWLEN - 1;	/* just in case */
10203     vim_strncpy(su->su_badword, su->su_badptr, su->su_badlen);
10204     (void)spell_casefold(su->su_badptr, su->su_badlen,
10205 						    su->su_fbadword, MAXWLEN);
10206     /* get caps flags for bad word */
10207     su->su_badflags = badword_captype(su->su_badptr,
10208 					       su->su_badptr + su->su_badlen);
10209     if (need_cap)
10210 	su->su_badflags |= WF_ONECAP;
10211 
10212     /* Find the default language for sound folding.  We simply use the first
10213      * one in 'spelllang' that supports sound folding.  That's good for when
10214      * using multiple files for one language, it's not that bad when mixing
10215      * languages (e.g., "pl,en"). */
10216     for (i = 0; i < curbuf->b_langp.ga_len; ++i)
10217     {
10218 	lp = LANGP_ENTRY(curbuf->b_langp, i);
10219 	if (lp->lp_sallang != NULL)
10220 	{
10221 	    su->su_sallang = lp->lp_sallang;
10222 	    break;
10223 	}
10224     }
10225 
10226     /* Soundfold the bad word with the default sound folding, so that we don't
10227      * have to do this many times. */
10228     if (su->su_sallang != NULL)
10229 	spell_soundfold(su->su_sallang, su->su_fbadword, TRUE,
10230 							  su->su_sal_badword);
10231 
10232     /* If the word is not capitalised and spell_check() doesn't consider the
10233      * word to be bad then it might need to be capitalised.  Add a suggestion
10234      * for that. */
10235     c = PTR2CHAR(su->su_badptr);
10236     if (!SPELL_ISUPPER(c) && attr == HLF_COUNT)
10237     {
10238 	make_case_word(su->su_badword, buf, WF_ONECAP);
10239 	add_suggestion(su, &su->su_ga, buf, su->su_badlen, SCORE_ICASE,
10240 					      0, TRUE, su->su_sallang, FALSE);
10241     }
10242 
10243     /* Ban the bad word itself.  It may appear in another region. */
10244     if (banbadword)
10245 	add_banned(su, su->su_badword);
10246 
10247     /* Make a copy of 'spellsuggest', because the expression may change it. */
10248     sps_copy = vim_strsave(p_sps);
10249     if (sps_copy == NULL)
10250 	return;
10251 
10252     /* Loop over the items in 'spellsuggest'. */
10253     for (p = sps_copy; *p != NUL; )
10254     {
10255 	copy_option_part(&p, buf, MAXPATHL, ",");
10256 
10257 	if (STRNCMP(buf, "expr:", 5) == 0)
10258 	{
10259 #ifdef FEAT_EVAL
10260 	    /* Evaluate an expression.  Skip this when called recursively,
10261 	     * when using spellsuggest() in the expression. */
10262 	    if (!expr_busy)
10263 	    {
10264 		expr_busy = TRUE;
10265 		spell_suggest_expr(su, buf + 5);
10266 		expr_busy = FALSE;
10267 	    }
10268 #endif
10269 	}
10270 	else if (STRNCMP(buf, "file:", 5) == 0)
10271 	    /* Use list of suggestions in a file. */
10272 	    spell_suggest_file(su, buf + 5);
10273 	else
10274 	{
10275 	    /* Use internal method. */
10276 	    spell_suggest_intern(su, interactive);
10277 	    if (sps_flags & SPS_DOUBLE)
10278 		do_combine = TRUE;
10279 	}
10280     }
10281 
10282     vim_free(sps_copy);
10283 
10284     if (do_combine)
10285 	/* Combine the two list of suggestions.  This must be done last,
10286 	 * because sorting changes the order again. */
10287 	score_combine(su);
10288 }
10289 
10290 #ifdef FEAT_EVAL
10291 /*
10292  * Find suggestions by evaluating expression "expr".
10293  */
10294     static void
10295 spell_suggest_expr(su, expr)
10296     suginfo_T	*su;
10297     char_u	*expr;
10298 {
10299     list_T	*list;
10300     listitem_T	*li;
10301     int		score;
10302     char_u	*p;
10303 
10304     /* The work is split up in a few parts to avoid having to export
10305      * suginfo_T.
10306      * First evaluate the expression and get the resulting list. */
10307     list = eval_spell_expr(su->su_badword, expr);
10308     if (list != NULL)
10309     {
10310 	/* Loop over the items in the list. */
10311 	for (li = list->lv_first; li != NULL; li = li->li_next)
10312 	    if (li->li_tv.v_type == VAR_LIST)
10313 	    {
10314 		/* Get the word and the score from the items. */
10315 		score = get_spellword(li->li_tv.vval.v_list, &p);
10316 		if (score >= 0 && score <= su->su_maxscore)
10317 		    add_suggestion(su, &su->su_ga, p, su->su_badlen,
10318 				       score, 0, TRUE, su->su_sallang, FALSE);
10319 	    }
10320 	list_unref(list);
10321     }
10322 
10323     /* Remove bogus suggestions, sort and truncate at "maxcount". */
10324     check_suggestions(su, &su->su_ga);
10325     (void)cleanup_suggestions(&su->su_ga, su->su_maxscore, su->su_maxcount);
10326 }
10327 #endif
10328 
10329 /*
10330  * Find suggestions in file "fname".  Used for "file:" in 'spellsuggest'.
10331  */
10332     static void
10333 spell_suggest_file(su, fname)
10334     suginfo_T	*su;
10335     char_u	*fname;
10336 {
10337     FILE	*fd;
10338     char_u	line[MAXWLEN * 2];
10339     char_u	*p;
10340     int		len;
10341     char_u	cword[MAXWLEN];
10342 
10343     /* Open the file. */
10344     fd = mch_fopen((char *)fname, "r");
10345     if (fd == NULL)
10346     {
10347 	EMSG2(_(e_notopen), fname);
10348 	return;
10349     }
10350 
10351     /* Read it line by line. */
10352     while (!vim_fgets(line, MAXWLEN * 2, fd) && !got_int)
10353     {
10354 	line_breakcheck();
10355 
10356 	p = vim_strchr(line, '/');
10357 	if (p == NULL)
10358 	    continue;	    /* No Tab found, just skip the line. */
10359 	*p++ = NUL;
10360 	if (STRICMP(su->su_badword, line) == 0)
10361 	{
10362 	    /* Match!  Isolate the good word, until CR or NL. */
10363 	    for (len = 0; p[len] >= ' '; ++len)
10364 		;
10365 	    p[len] = NUL;
10366 
10367 	    /* If the suggestion doesn't have specific case duplicate the case
10368 	     * of the bad word. */
10369 	    if (captype(p, NULL) == 0)
10370 	    {
10371 		make_case_word(p, cword, su->su_badflags);
10372 		p = cword;
10373 	    }
10374 
10375 	    add_suggestion(su, &su->su_ga, p, su->su_badlen,
10376 				  SCORE_FILE, 0, TRUE, su->su_sallang, FALSE);
10377 	}
10378     }
10379 
10380     fclose(fd);
10381 
10382     /* Remove bogus suggestions, sort and truncate at "maxcount". */
10383     check_suggestions(su, &su->su_ga);
10384     (void)cleanup_suggestions(&su->su_ga, su->su_maxscore, su->su_maxcount);
10385 }
10386 
10387 /*
10388  * Find suggestions for the internal method indicated by "sps_flags".
10389  */
10390     static void
10391 spell_suggest_intern(su, interactive)
10392     suginfo_T	*su;
10393     int		interactive;
10394 {
10395     /*
10396      * Load the .sug file(s) that are available and not done yet.
10397      */
10398     suggest_load_files();
10399 
10400     /*
10401      * 1. Try special cases, such as repeating a word: "the the" -> "the".
10402      *
10403      * Set a maximum score to limit the combination of operations that is
10404      * tried.
10405      */
10406     suggest_try_special(su);
10407 
10408     /*
10409      * 2. Try inserting/deleting/swapping/changing a letter, use REP entries
10410      *    from the .aff file and inserting a space (split the word).
10411      */
10412     suggest_try_change(su);
10413 
10414     /* For the resulting top-scorers compute the sound-a-like score. */
10415     if (sps_flags & SPS_DOUBLE)
10416 	score_comp_sal(su);
10417 
10418     /*
10419      * 3. Try finding sound-a-like words.
10420      */
10421     if ((sps_flags & SPS_FAST) == 0)
10422     {
10423 	if (sps_flags & SPS_BEST)
10424 	    /* Adjust the word score for the suggestions found so far for how
10425 	     * they sounds like. */
10426 	    rescore_suggestions(su);
10427 
10428 	/*
10429 	 * While going throught the soundfold tree "su_maxscore" is the score
10430 	 * for the soundfold word, limits the changes that are being tried,
10431 	 * and "su_sfmaxscore" the rescored score, which is set by
10432 	 * cleanup_suggestions().
10433 	 * First find words with a small edit distance, because this is much
10434 	 * faster and often already finds the top-N suggestions.  If we didn't
10435 	 * find many suggestions try again with a higher edit distance.
10436 	 * "sl_sounddone" is used to avoid doing the same word twice.
10437 	 */
10438 	suggest_try_soundalike_prep();
10439 	su->su_maxscore = SCORE_SFMAX1;
10440 	su->su_sfmaxscore = SCORE_MAXINIT * 3;
10441 	suggest_try_soundalike(su);
10442 	if (su->su_ga.ga_len < SUG_CLEAN_COUNT(su))
10443 	{
10444 	    /* We didn't find enough matches, try again, allowing more
10445 	     * changes to the soundfold word. */
10446 	    su->su_maxscore = SCORE_SFMAX2;
10447 	    suggest_try_soundalike(su);
10448 	    if (su->su_ga.ga_len < SUG_CLEAN_COUNT(su))
10449 	    {
10450 		/* Still didn't find enough matches, try again, allowing even
10451 		 * more changes to the soundfold word. */
10452 		su->su_maxscore = SCORE_SFMAX3;
10453 		suggest_try_soundalike(su);
10454 	    }
10455 	}
10456 	su->su_maxscore = su->su_sfmaxscore;
10457 	suggest_try_soundalike_finish();
10458     }
10459 
10460     /* When CTRL-C was hit while searching do show the results.  Only clear
10461      * got_int when using a command, not for spellsuggest(). */
10462     ui_breakcheck();
10463     if (interactive && got_int)
10464     {
10465 	(void)vgetc();
10466 	got_int = FALSE;
10467     }
10468 
10469     if ((sps_flags & SPS_DOUBLE) == 0 && su->su_ga.ga_len != 0)
10470     {
10471 	if (sps_flags & SPS_BEST)
10472 	    /* Adjust the word score for how it sounds like. */
10473 	    rescore_suggestions(su);
10474 
10475 	/* Remove bogus suggestions, sort and truncate at "maxcount". */
10476 	check_suggestions(su, &su->su_ga);
10477 	(void)cleanup_suggestions(&su->su_ga, su->su_maxscore, su->su_maxcount);
10478     }
10479 }
10480 
10481 /*
10482  * Load the .sug files for languages that have one and weren't loaded yet.
10483  */
10484     static void
10485 suggest_load_files()
10486 {
10487     langp_T	*lp;
10488     int		lpi;
10489     slang_T	*slang;
10490     char_u	*dotp;
10491     FILE	*fd;
10492     char_u	buf[MAXWLEN];
10493     int		i;
10494     time_t	timestamp;
10495     int		wcount;
10496     int		wordnr;
10497     garray_T	ga;
10498     int		c;
10499 
10500     /* Do this for all languages that support sound folding. */
10501     for (lpi = 0; lpi < curbuf->b_langp.ga_len; ++lpi)
10502     {
10503 	lp = LANGP_ENTRY(curbuf->b_langp, lpi);
10504 	slang = lp->lp_slang;
10505 	if (slang->sl_sugtime != 0 && !slang->sl_sugloaded)
10506 	{
10507 	    /* Change ".spl" to ".sug" and open the file.  When the file isn't
10508 	     * found silently skip it.  Do set "sl_sugloaded" so that we
10509 	     * don't try again and again. */
10510 	    slang->sl_sugloaded = TRUE;
10511 
10512 	    dotp = vim_strrchr(slang->sl_fname, '.');
10513 	    if (dotp == NULL || fnamecmp(dotp, ".spl") != 0)
10514 		continue;
10515 	    STRCPY(dotp, ".sug");
10516 	    fd = fopen((char *)slang->sl_fname, "r");
10517 	    if (fd == NULL)
10518 		goto nextone;
10519 
10520 	    /*
10521 	     * <SUGHEADER>: <fileID> <versionnr> <timestamp>
10522 	     */
10523 	    for (i = 0; i < VIMSUGMAGICL; ++i)
10524 		buf[i] = getc(fd);			/* <fileID> */
10525 	    if (STRNCMP(buf, VIMSUGMAGIC, VIMSUGMAGICL) != 0)
10526 	    {
10527 		EMSG2(_("E778: This does not look like a .sug file: %s"),
10528 							     slang->sl_fname);
10529 		goto nextone;
10530 	    }
10531 	    c = getc(fd);				/* <versionnr> */
10532 	    if (c < VIMSUGVERSION)
10533 	    {
10534 		EMSG2(_("E779: Old .sug file, needs to be updated: %s"),
10535 							     slang->sl_fname);
10536 		goto nextone;
10537 	    }
10538 	    else if (c > VIMSUGVERSION)
10539 	    {
10540 		EMSG2(_("E780: .sug file is for newer version of Vim: %s"),
10541 							     slang->sl_fname);
10542 		goto nextone;
10543 	    }
10544 
10545 	    /* Check the timestamp, it must be exactly the same as the one in
10546 	     * the .spl file.  Otherwise the word numbers won't match. */
10547 	    timestamp = get8c(fd);			/* <timestamp> */
10548 	    if (timestamp != slang->sl_sugtime)
10549 	    {
10550 		EMSG2(_("E781: .sug file doesn't match .spl file: %s"),
10551 							     slang->sl_fname);
10552 		goto nextone;
10553 	    }
10554 
10555 	    /*
10556 	     * <SUGWORDTREE>: <wordtree>
10557 	     * Read the trie with the soundfolded words.
10558 	     */
10559 	    if (spell_read_tree(fd, &slang->sl_sbyts, &slang->sl_sidxs,
10560 							       FALSE, 0) != 0)
10561 	    {
10562 someerror:
10563 		EMSG2(_("E782: error while reading .sug file: %s"),
10564 							     slang->sl_fname);
10565 		slang_clear_sug(slang);
10566 		goto nextone;
10567 	    }
10568 
10569 	    /*
10570 	     * <SUGTABLE>: <sugwcount> <sugline> ...
10571 	     *
10572 	     * Read the table with word numbers.  We use a file buffer for
10573 	     * this, because it's so much like a file with lines.  Makes it
10574 	     * possible to swap the info and save on memory use.
10575 	     */
10576 	    slang->sl_sugbuf = open_spellbuf();
10577 	    if (slang->sl_sugbuf == NULL)
10578 		goto someerror;
10579 							    /* <sugwcount> */
10580 	    wcount = get4c(fd);
10581 	    if (wcount < 0)
10582 		goto someerror;
10583 
10584 	    /* Read all the wordnr lists into the buffer, one NUL terminated
10585 	     * list per line. */
10586 	    ga_init2(&ga, 1, 100);
10587 	    for (wordnr = 0; wordnr < wcount; ++wordnr)
10588 	    {
10589 		ga.ga_len = 0;
10590 		for (;;)
10591 		{
10592 		    c = getc(fd);			    /* <sugline> */
10593 		    if (c < 0 || ga_grow(&ga, 1) == FAIL)
10594 			goto someerror;
10595 		    ((char_u *)ga.ga_data)[ga.ga_len++] = c;
10596 		    if (c == NUL)
10597 			break;
10598 		}
10599 		if (ml_append_buf(slang->sl_sugbuf, (linenr_T)wordnr,
10600 					 ga.ga_data, ga.ga_len, TRUE) == FAIL)
10601 		    goto someerror;
10602 	    }
10603 	    ga_clear(&ga);
10604 
10605 	    /*
10606 	     * Need to put word counts in the word tries, so that we can find
10607 	     * a word by its number.
10608 	     */
10609 	    tree_count_words(slang->sl_fbyts, slang->sl_fidxs);
10610 	    tree_count_words(slang->sl_sbyts, slang->sl_sidxs);
10611 
10612 nextone:
10613 	    if (fd != NULL)
10614 		fclose(fd);
10615 	    STRCPY(dotp, ".spl");
10616 	}
10617     }
10618 }
10619 
10620 
10621 /*
10622  * Fill in the wordcount fields for a trie.
10623  * Returns the total number of words.
10624  */
10625     static void
10626 tree_count_words(byts, idxs)
10627     char_u	*byts;
10628     idx_T	*idxs;
10629 {
10630     int		depth;
10631     idx_T	arridx[MAXWLEN];
10632     int		curi[MAXWLEN];
10633     int		c;
10634     idx_T	n;
10635     int		wordcount[MAXWLEN];
10636 
10637     arridx[0] = 0;
10638     curi[0] = 1;
10639     wordcount[0] = 0;
10640     depth = 0;
10641     while (depth >= 0 && !got_int)
10642     {
10643 	if (curi[depth] > byts[arridx[depth]])
10644 	{
10645 	    /* Done all bytes at this node, go up one level. */
10646 	    idxs[arridx[depth]] = wordcount[depth];
10647 	    if (depth > 0)
10648 		wordcount[depth - 1] += wordcount[depth];
10649 
10650 	    --depth;
10651 	    fast_breakcheck();
10652 	}
10653 	else
10654 	{
10655 	    /* Do one more byte at this node. */
10656 	    n = arridx[depth] + curi[depth];
10657 	    ++curi[depth];
10658 
10659 	    c = byts[n];
10660 	    if (c == 0)
10661 	    {
10662 		/* End of word, count it. */
10663 		++wordcount[depth];
10664 
10665 		/* Skip over any other NUL bytes (same word with different
10666 		 * flags). */
10667 		while (byts[n + 1] == 0)
10668 		{
10669 		    ++n;
10670 		    ++curi[depth];
10671 		}
10672 	    }
10673 	    else
10674 	    {
10675 		/* Normal char, go one level deeper to count the words. */
10676 		++depth;
10677 		arridx[depth] = idxs[n];
10678 		curi[depth] = 1;
10679 		wordcount[depth] = 0;
10680 	    }
10681 	}
10682     }
10683 }
10684 
10685 /*
10686  * Free the info put in "*su" by spell_find_suggest().
10687  */
10688     static void
10689 spell_find_cleanup(su)
10690     suginfo_T	*su;
10691 {
10692     int		i;
10693 
10694     /* Free the suggestions. */
10695     for (i = 0; i < su->su_ga.ga_len; ++i)
10696 	vim_free(SUG(su->su_ga, i).st_word);
10697     ga_clear(&su->su_ga);
10698     for (i = 0; i < su->su_sga.ga_len; ++i)
10699 	vim_free(SUG(su->su_sga, i).st_word);
10700     ga_clear(&su->su_sga);
10701 
10702     /* Free the banned words. */
10703     hash_clear_all(&su->su_banned, 0);
10704 }
10705 
10706 /*
10707  * Make a copy of "word", with the first letter upper or lower cased, to
10708  * "wcopy[MAXWLEN]".  "word" must not be empty.
10709  * The result is NUL terminated.
10710  */
10711     static void
10712 onecap_copy(word, wcopy, upper)
10713     char_u	*word;
10714     char_u	*wcopy;
10715     int		upper;	    /* TRUE: first letter made upper case */
10716 {
10717     char_u	*p;
10718     int		c;
10719     int		l;
10720 
10721     p = word;
10722 #ifdef FEAT_MBYTE
10723     if (has_mbyte)
10724 	c = mb_cptr2char_adv(&p);
10725     else
10726 #endif
10727 	c = *p++;
10728     if (upper)
10729 	c = SPELL_TOUPPER(c);
10730     else
10731 	c = SPELL_TOFOLD(c);
10732 #ifdef FEAT_MBYTE
10733     if (has_mbyte)
10734 	l = mb_char2bytes(c, wcopy);
10735     else
10736 #endif
10737     {
10738 	l = 1;
10739 	wcopy[0] = c;
10740     }
10741     vim_strncpy(wcopy + l, p, MAXWLEN - l - 1);
10742 }
10743 
10744 /*
10745  * Make a copy of "word" with all the letters upper cased into
10746  * "wcopy[MAXWLEN]".  The result is NUL terminated.
10747  */
10748     static void
10749 allcap_copy(word, wcopy)
10750     char_u	*word;
10751     char_u	*wcopy;
10752 {
10753     char_u	*s;
10754     char_u	*d;
10755     int		c;
10756 
10757     d = wcopy;
10758     for (s = word; *s != NUL; )
10759     {
10760 #ifdef FEAT_MBYTE
10761 	if (has_mbyte)
10762 	    c = mb_cptr2char_adv(&s);
10763 	else
10764 #endif
10765 	    c = *s++;
10766 
10767 #ifdef FEAT_MBYTE
10768 	/* We only change � to SS when we are certain latin1 is used.  It
10769 	 * would cause weird errors in other 8-bit encodings. */
10770 	if (enc_latin1like && c == 0xdf)
10771 	{
10772 	    c = 'S';
10773 	    if (d - wcopy >= MAXWLEN - 1)
10774 		break;
10775 	    *d++ = c;
10776 	}
10777 	else
10778 #endif
10779 	    c = SPELL_TOUPPER(c);
10780 
10781 #ifdef FEAT_MBYTE
10782 	if (has_mbyte)
10783 	{
10784 	    if (d - wcopy >= MAXWLEN - MB_MAXBYTES)
10785 		break;
10786 	    d += mb_char2bytes(c, d);
10787 	}
10788 	else
10789 #endif
10790 	{
10791 	    if (d - wcopy >= MAXWLEN - 1)
10792 		break;
10793 	    *d++ = c;
10794 	}
10795     }
10796     *d = NUL;
10797 }
10798 
10799 /*
10800  * Try finding suggestions by recognizing specific situations.
10801  */
10802     static void
10803 suggest_try_special(su)
10804     suginfo_T	*su;
10805 {
10806     char_u	*p;
10807     size_t	len;
10808     int		c;
10809     char_u	word[MAXWLEN];
10810 
10811     /*
10812      * Recognize a word that is repeated: "the the".
10813      */
10814     p = skiptowhite(su->su_fbadword);
10815     len = p - su->su_fbadword;
10816     p = skipwhite(p);
10817     if (STRLEN(p) == len && STRNCMP(su->su_fbadword, p, len) == 0)
10818     {
10819 	/* Include badflags: if the badword is onecap or allcap
10820 	 * use that for the goodword too: "The the" -> "The". */
10821 	c = su->su_fbadword[len];
10822 	su->su_fbadword[len] = NUL;
10823 	make_case_word(su->su_fbadword, word, su->su_badflags);
10824 	su->su_fbadword[len] = c;
10825 
10826 	/* Give a soundalike score of 0, compute the score as if deleting one
10827 	 * character. */
10828 	add_suggestion(su, &su->su_ga, word, su->su_badlen,
10829 		       RESCORE(SCORE_REP, 0), 0, TRUE, su->su_sallang, FALSE);
10830     }
10831 }
10832 
10833 /*
10834  * Try finding suggestions by adding/removing/swapping letters.
10835  */
10836     static void
10837 suggest_try_change(su)
10838     suginfo_T	*su;
10839 {
10840     char_u	fword[MAXWLEN];	    /* copy of the bad word, case-folded */
10841     int		n;
10842     char_u	*p;
10843     int		lpi;
10844     langp_T	*lp;
10845 
10846     /* We make a copy of the case-folded bad word, so that we can modify it
10847      * to find matches (esp. REP items).  Append some more text, changing
10848      * chars after the bad word may help. */
10849     STRCPY(fword, su->su_fbadword);
10850     n = STRLEN(fword);
10851     p = su->su_badptr + su->su_badlen;
10852     (void)spell_casefold(p, STRLEN(p), fword + n, MAXWLEN - n);
10853 
10854     for (lpi = 0; lpi < curbuf->b_langp.ga_len; ++lpi)
10855     {
10856 	lp = LANGP_ENTRY(curbuf->b_langp, lpi);
10857 
10858 	/* If reloading a spell file fails it's still in the list but
10859 	 * everything has been cleared. */
10860 	if (lp->lp_slang->sl_fbyts == NULL)
10861 	    continue;
10862 
10863 	/* Try it for this language.  Will add possible suggestions. */
10864 	suggest_trie_walk(su, lp, fword, FALSE);
10865     }
10866 }
10867 
10868 /* Check the maximum score, if we go over it we won't try this change. */
10869 #define TRY_DEEPER(su, stack, depth, add) \
10870 		(stack[depth].ts_score + (add) < su->su_maxscore)
10871 
10872 /*
10873  * Try finding suggestions by adding/removing/swapping letters.
10874  *
10875  * This uses a state machine.  At each node in the tree we try various
10876  * operations.  When trying if an operation works "depth" is increased and the
10877  * stack[] is used to store info.  This allows combinations, thus insert one
10878  * character, replace one and delete another.  The number of changes is
10879  * limited by su->su_maxscore.
10880  *
10881  * After implementing this I noticed an article by Kemal Oflazer that
10882  * describes something similar: "Error-tolerant Finite State Recognition with
10883  * Applications to Morphological Analysis and Spelling Correction" (1996).
10884  * The implementation in the article is simplified and requires a stack of
10885  * unknown depth.  The implementation here only needs a stack depth equal to
10886  * the length of the word.
10887  *
10888  * This is also used for the sound-folded word, "soundfold" is TRUE then.
10889  * The mechanism is the same, but we find a match with a sound-folded word
10890  * that comes from one or more original words.  Each of these words may be
10891  * added, this is done by add_sound_suggest().
10892  * Don't use:
10893  *	the prefix tree or the keep-case tree
10894  *	"su->su_badlen"
10895  *	anything to do with upper and lower case
10896  *	anything to do with word or non-word characters ("spell_iswordp()")
10897  *	banned words
10898  *	word flags (rare, region, compounding)
10899  *	word splitting for now
10900  *	"similar_chars()"
10901  *	use "slang->sl_repsal" instead of "lp->lp_replang->sl_rep"
10902  */
10903     static void
10904 suggest_trie_walk(su, lp, fword, soundfold)
10905     suginfo_T	*su;
10906     langp_T	*lp;
10907     char_u	*fword;
10908     int		soundfold;
10909 {
10910     char_u	tword[MAXWLEN];	    /* good word collected so far */
10911     trystate_T	stack[MAXWLEN];
10912     char_u	preword[MAXWLEN * 3]; /* word found with proper case;
10913 				       * concatanation of prefix compound
10914 				       * words and split word.  NUL terminated
10915 				       * when going deeper but not when coming
10916 				       * back. */
10917     char_u	compflags[MAXWLEN];	/* compound flags, one for each word */
10918     trystate_T	*sp;
10919     int		newscore;
10920     int		score;
10921     char_u	*byts, *fbyts, *pbyts;
10922     idx_T	*idxs, *fidxs, *pidxs;
10923     int		depth;
10924     int		c, c2, c3;
10925     int		n = 0;
10926     int		flags;
10927     garray_T	*gap;
10928     idx_T	arridx;
10929     int		len;
10930     char_u	*p;
10931     fromto_T	*ftp;
10932     int		fl = 0, tl;
10933     int		repextra = 0;	    /* extra bytes in fword[] from REP item */
10934     slang_T	*slang = lp->lp_slang;
10935     int		fword_ends;
10936     int		goodword_ends;
10937 #ifdef DEBUG_TRIEWALK
10938     /* Stores the name of the change made at each level. */
10939     char_u	changename[MAXWLEN][80];
10940 #endif
10941     int		breakcheckcount = 1000;
10942     int		compound_ok;
10943 
10944     /*
10945      * Go through the whole case-fold tree, try changes at each node.
10946      * "tword[]" contains the word collected from nodes in the tree.
10947      * "fword[]" the word we are trying to match with (initially the bad
10948      * word).
10949      */
10950     depth = 0;
10951     sp = &stack[0];
10952     vim_memset(sp, 0, sizeof(trystate_T));
10953     sp->ts_curi = 1;
10954 
10955     if (soundfold)
10956     {
10957 	/* Going through the soundfold tree. */
10958 	byts = fbyts = slang->sl_sbyts;
10959 	idxs = fidxs = slang->sl_sidxs;
10960 	pbyts = NULL;
10961 	pidxs = NULL;
10962 	sp->ts_prefixdepth = PFD_NOPREFIX;
10963 	sp->ts_state = STATE_START;
10964     }
10965     else
10966     {
10967 	/*
10968 	 * When there are postponed prefixes we need to use these first.  At
10969 	 * the end of the prefix we continue in the case-fold tree.
10970 	 */
10971 	fbyts = slang->sl_fbyts;
10972 	fidxs = slang->sl_fidxs;
10973 	pbyts = slang->sl_pbyts;
10974 	pidxs = slang->sl_pidxs;
10975 	if (pbyts != NULL)
10976 	{
10977 	    byts = pbyts;
10978 	    idxs = pidxs;
10979 	    sp->ts_prefixdepth = PFD_PREFIXTREE;
10980 	    sp->ts_state = STATE_NOPREFIX;	/* try without prefix first */
10981 	}
10982 	else
10983 	{
10984 	    byts = fbyts;
10985 	    idxs = fidxs;
10986 	    sp->ts_prefixdepth = PFD_NOPREFIX;
10987 	    sp->ts_state = STATE_START;
10988 	}
10989     }
10990 
10991     /*
10992      * Loop to find all suggestions.  At each round we either:
10993      * - For the current state try one operation, advance "ts_curi",
10994      *   increase "depth".
10995      * - When a state is done go to the next, set "ts_state".
10996      * - When all states are tried decrease "depth".
10997      */
10998     while (depth >= 0 && !got_int)
10999     {
11000 	sp = &stack[depth];
11001 	switch (sp->ts_state)
11002 	{
11003 	case STATE_START:
11004 	case STATE_NOPREFIX:
11005 	    /*
11006 	     * Start of node: Deal with NUL bytes, which means
11007 	     * tword[] may end here.
11008 	     */
11009 	    arridx = sp->ts_arridx;	    /* current node in the tree */
11010 	    len = byts[arridx];		    /* bytes in this node */
11011 	    arridx += sp->ts_curi;	    /* index of current byte */
11012 
11013 	    if (sp->ts_prefixdepth == PFD_PREFIXTREE)
11014 	    {
11015 		/* Skip over the NUL bytes, we use them later. */
11016 		for (n = 0; n < len && byts[arridx + n] == 0; ++n)
11017 		    ;
11018 		sp->ts_curi += n;
11019 
11020 		/* Always past NUL bytes now. */
11021 		n = (int)sp->ts_state;
11022 		sp->ts_state = STATE_ENDNUL;
11023 		sp->ts_save_badflags = su->su_badflags;
11024 
11025 		/* At end of a prefix or at start of prefixtree: check for
11026 		 * following word. */
11027 		if (byts[arridx] == 0 || n == (int)STATE_NOPREFIX)
11028 		{
11029 		    /* Set su->su_badflags to the caps type at this position.
11030 		     * Use the caps type until here for the prefix itself. */
11031 #ifdef FEAT_MBYTE
11032 		    if (has_mbyte)
11033 			n = nofold_len(fword, sp->ts_fidx, su->su_badptr);
11034 		    else
11035 #endif
11036 			n = sp->ts_fidx;
11037 		    flags = badword_captype(su->su_badptr, su->su_badptr + n);
11038 		    su->su_badflags = badword_captype(su->su_badptr + n,
11039 					       su->su_badptr + su->su_badlen);
11040 #ifdef DEBUG_TRIEWALK
11041 		    sprintf(changename[depth], "prefix");
11042 #endif
11043 		    go_deeper(stack, depth, 0);
11044 		    ++depth;
11045 		    sp = &stack[depth];
11046 		    sp->ts_prefixdepth = depth - 1;
11047 		    byts = fbyts;
11048 		    idxs = fidxs;
11049 		    sp->ts_arridx = 0;
11050 
11051 		    /* Move the prefix to preword[] with the right case
11052 		     * and make find_keepcap_word() works. */
11053 		    tword[sp->ts_twordlen] = NUL;
11054 		    make_case_word(tword + sp->ts_splitoff,
11055 					  preword + sp->ts_prewordlen, flags);
11056 		    sp->ts_prewordlen = STRLEN(preword);
11057 		    sp->ts_splitoff = sp->ts_twordlen;
11058 		}
11059 		break;
11060 	    }
11061 
11062 	    if (sp->ts_curi > len || byts[arridx] != 0)
11063 	    {
11064 		/* Past bytes in node and/or past NUL bytes. */
11065 		sp->ts_state = STATE_ENDNUL;
11066 		sp->ts_save_badflags = su->su_badflags;
11067 		break;
11068 	    }
11069 
11070 	    /*
11071 	     * End of word in tree.
11072 	     */
11073 	    ++sp->ts_curi;		/* eat one NUL byte */
11074 
11075 	    flags = (int)idxs[arridx];
11076 
11077 	    /* Skip words with the NOSUGGEST flag. */
11078 	    if (flags & WF_NOSUGGEST)
11079 		break;
11080 
11081 	    fword_ends = (fword[sp->ts_fidx] == NUL
11082 			   || (soundfold
11083 			       ? vim_iswhite(fword[sp->ts_fidx])
11084 			       : !spell_iswordp(fword + sp->ts_fidx, curbuf)));
11085 	    tword[sp->ts_twordlen] = NUL;
11086 
11087 	    if (sp->ts_prefixdepth <= PFD_NOTSPECIAL
11088 					&& (sp->ts_flags & TSF_PREFIXOK) == 0)
11089 	    {
11090 		/* There was a prefix before the word.  Check that the prefix
11091 		 * can be used with this word. */
11092 		/* Count the length of the NULs in the prefix.  If there are
11093 		 * none this must be the first try without a prefix.  */
11094 		n = stack[sp->ts_prefixdepth].ts_arridx;
11095 		len = pbyts[n++];
11096 		for (c = 0; c < len && pbyts[n + c] == 0; ++c)
11097 		    ;
11098 		if (c > 0)
11099 		{
11100 		    c = valid_word_prefix(c, n, flags,
11101 				       tword + sp->ts_splitoff, slang, FALSE);
11102 		    if (c == 0)
11103 			break;
11104 
11105 		    /* Use the WF_RARE flag for a rare prefix. */
11106 		    if (c & WF_RAREPFX)
11107 			flags |= WF_RARE;
11108 
11109 		    /* Tricky: when checking for both prefix and compounding
11110 		     * we run into the prefix flag first.
11111 		     * Remember that it's OK, so that we accept the prefix
11112 		     * when arriving at a compound flag. */
11113 		    sp->ts_flags |= TSF_PREFIXOK;
11114 		}
11115 	    }
11116 
11117 	    /* Check NEEDCOMPOUND: can't use word without compounding.  Do try
11118 	     * appending another compound word below. */
11119 	    if (sp->ts_complen == sp->ts_compsplit && fword_ends
11120 						     && (flags & WF_NEEDCOMP))
11121 		goodword_ends = FALSE;
11122 	    else
11123 		goodword_ends = TRUE;
11124 
11125 	    p = NULL;
11126 	    compound_ok = TRUE;
11127 	    if (sp->ts_complen > sp->ts_compsplit)
11128 	    {
11129 		if (slang->sl_nobreak)
11130 		{
11131 		    /* There was a word before this word.  When there was no
11132 		     * change in this word (it was correct) add the first word
11133 		     * as a suggestion.  If this word was corrected too, we
11134 		     * need to check if a correct word follows. */
11135 		    if (sp->ts_fidx - sp->ts_splitfidx
11136 					  == sp->ts_twordlen - sp->ts_splitoff
11137 			    && STRNCMP(fword + sp->ts_splitfidx,
11138 					tword + sp->ts_splitoff,
11139 					 sp->ts_fidx - sp->ts_splitfidx) == 0)
11140 		    {
11141 			preword[sp->ts_prewordlen] = NUL;
11142 			newscore = score_wordcount_adj(slang, sp->ts_score,
11143 						 preword + sp->ts_prewordlen,
11144 						 sp->ts_prewordlen > 0);
11145 			/* Add the suggestion if the score isn't too bad. */
11146 			if (newscore <= su->su_maxscore)
11147 			    add_suggestion(su, &su->su_ga, preword,
11148 				    sp->ts_splitfidx - repextra,
11149 				    newscore, 0, FALSE,
11150 				    lp->lp_sallang, FALSE);
11151 			break;
11152 		    }
11153 		}
11154 		else
11155 		{
11156 		    /* There was a compound word before this word.  If this
11157 		     * word does not support compounding then give up
11158 		     * (splitting is tried for the word without compound
11159 		     * flag). */
11160 		    if (((unsigned)flags >> 24) == 0
11161 			    || sp->ts_twordlen - sp->ts_splitoff
11162 						       < slang->sl_compminlen)
11163 			break;
11164 #ifdef FEAT_MBYTE
11165 		    /* For multi-byte chars check character length against
11166 		     * COMPOUNDMIN. */
11167 		    if (has_mbyte
11168 			    && slang->sl_compminlen > 0
11169 			    && mb_charlen(tword + sp->ts_splitoff)
11170 						       < slang->sl_compminlen)
11171 			break;
11172 #endif
11173 
11174 		    compflags[sp->ts_complen] = ((unsigned)flags >> 24);
11175 		    compflags[sp->ts_complen + 1] = NUL;
11176 		    vim_strncpy(preword + sp->ts_prewordlen,
11177 			    tword + sp->ts_splitoff,
11178 			    sp->ts_twordlen - sp->ts_splitoff);
11179 		    p = preword;
11180 		    while (*skiptowhite(p) != NUL)
11181 			p = skipwhite(skiptowhite(p));
11182 		    if (fword_ends && !can_compound(slang, p,
11183 						compflags + sp->ts_compsplit))
11184 			/* Compound is not allowed.  But it may still be
11185 			 * possible if we add another (short) word. */
11186 			compound_ok = FALSE;
11187 
11188 		    /* Get pointer to last char of previous word. */
11189 		    p = preword + sp->ts_prewordlen;
11190 		    mb_ptr_back(preword, p);
11191 		}
11192 	    }
11193 
11194 	    /*
11195 	     * Form the word with proper case in preword.
11196 	     * If there is a word from a previous split, append.
11197 	     * For the soundfold tree don't change the case, simply append.
11198 	     */
11199 	    if (soundfold)
11200 		STRCPY(preword + sp->ts_prewordlen, tword + sp->ts_splitoff);
11201 	    else if (flags & WF_KEEPCAP)
11202 		/* Must find the word in the keep-case tree. */
11203 		find_keepcap_word(slang, tword + sp->ts_splitoff,
11204 						 preword + sp->ts_prewordlen);
11205 	    else
11206 	    {
11207 		/* Include badflags: If the badword is onecap or allcap
11208 		 * use that for the goodword too.  But if the badword is
11209 		 * allcap and it's only one char long use onecap. */
11210 		c = su->su_badflags;
11211 		if ((c & WF_ALLCAP)
11212 #ifdef FEAT_MBYTE
11213 			&& su->su_badlen == (*mb_ptr2len)(su->su_badptr)
11214 #else
11215 			&& su->su_badlen == 1
11216 #endif
11217 			)
11218 		    c = WF_ONECAP;
11219 		c |= flags;
11220 
11221 		/* When appending a compound word after a word character don't
11222 		 * use Onecap. */
11223 		if (p != NULL && spell_iswordp_nmw(p))
11224 		    c &= ~WF_ONECAP;
11225 		make_case_word(tword + sp->ts_splitoff,
11226 					      preword + sp->ts_prewordlen, c);
11227 	    }
11228 
11229 	    if (!soundfold)
11230 	    {
11231 		/* Don't use a banned word.  It may appear again as a good
11232 		 * word, thus remember it. */
11233 		if (flags & WF_BANNED)
11234 		{
11235 		    add_banned(su, preword + sp->ts_prewordlen);
11236 		    break;
11237 		}
11238 		if ((sp->ts_complen == sp->ts_compsplit
11239 			    && WAS_BANNED(su, preword + sp->ts_prewordlen))
11240 						   || WAS_BANNED(su, preword))
11241 		{
11242 		    if (slang->sl_compprog == NULL)
11243 			break;
11244 		    /* the word so far was banned but we may try compounding */
11245 		    goodword_ends = FALSE;
11246 		}
11247 	    }
11248 
11249 	    newscore = 0;
11250 	    if (!soundfold)	/* soundfold words don't have flags */
11251 	    {
11252 		if ((flags & WF_REGION)
11253 			    && (((unsigned)flags >> 16) & lp->lp_region) == 0)
11254 		    newscore += SCORE_REGION;
11255 		if (flags & WF_RARE)
11256 		    newscore += SCORE_RARE;
11257 
11258 		if (!spell_valid_case(su->su_badflags,
11259 				  captype(preword + sp->ts_prewordlen, NULL)))
11260 		    newscore += SCORE_ICASE;
11261 	    }
11262 
11263 	    /* TODO: how about splitting in the soundfold tree? */
11264 	    if (fword_ends
11265 		    && goodword_ends
11266 		    && sp->ts_fidx >= sp->ts_fidxtry
11267 		    && compound_ok)
11268 	    {
11269 		/* The badword also ends: add suggestions. */
11270 #ifdef DEBUG_TRIEWALK
11271 		if (soundfold && STRCMP(preword, "smwrd") == 0)
11272 		{
11273 		    int	    j;
11274 
11275 		    /* print the stack of changes that brought us here */
11276 		    smsg("------ %s -------", fword);
11277 		    for (j = 0; j < depth; ++j)
11278 			smsg("%s", changename[j]);
11279 		}
11280 #endif
11281 		if (soundfold)
11282 		{
11283 		    /* For soundfolded words we need to find the original
11284 		     * words, the edit distrance and then add them. */
11285 		    add_sound_suggest(su, preword, sp->ts_score, lp);
11286 		}
11287 		else
11288 		{
11289 		    /* Give a penalty when changing non-word char to word
11290 		     * char, e.g., "thes," -> "these". */
11291 		    p = fword + sp->ts_fidx;
11292 		    mb_ptr_back(fword, p);
11293 		    if (!spell_iswordp(p, curbuf))
11294 		    {
11295 			p = preword + STRLEN(preword);
11296 			mb_ptr_back(preword, p);
11297 			if (spell_iswordp(p, curbuf))
11298 			    newscore += SCORE_NONWORD;
11299 		    }
11300 
11301 		    /* Give a bonus to words seen before. */
11302 		    score = score_wordcount_adj(slang,
11303 						sp->ts_score + newscore,
11304 						preword + sp->ts_prewordlen,
11305 						sp->ts_prewordlen > 0);
11306 
11307 		    /* Add the suggestion if the score isn't too bad. */
11308 		    if (score <= su->su_maxscore)
11309 		    {
11310 			add_suggestion(su, &su->su_ga, preword,
11311 				    sp->ts_fidx - repextra,
11312 				    score, 0, FALSE, lp->lp_sallang, FALSE);
11313 
11314 			if (su->su_badflags & WF_MIXCAP)
11315 			{
11316 			    /* We really don't know if the word should be
11317 			     * upper or lower case, add both. */
11318 			    c = captype(preword, NULL);
11319 			    if (c == 0 || c == WF_ALLCAP)
11320 			    {
11321 				make_case_word(tword + sp->ts_splitoff,
11322 					      preword + sp->ts_prewordlen,
11323 						      c == 0 ? WF_ALLCAP : 0);
11324 
11325 				add_suggestion(su, &su->su_ga, preword,
11326 					sp->ts_fidx - repextra,
11327 					score + SCORE_ICASE, 0, FALSE,
11328 					lp->lp_sallang, FALSE);
11329 			    }
11330 			}
11331 		    }
11332 		}
11333 	    }
11334 
11335 	    /*
11336 	     * Try word split and/or compounding.
11337 	     */
11338 	    if ((sp->ts_fidx >= sp->ts_fidxtry || fword_ends)
11339 #ifdef FEAT_MBYTE
11340 		    /* Don't split halfway a character. */
11341 		    && (!has_mbyte || sp->ts_tcharlen == 0)
11342 #endif
11343 		    )
11344 	    {
11345 		int	try_compound;
11346 		int	try_split;
11347 
11348 		/* If past the end of the bad word don't try a split.
11349 		 * Otherwise try changing the next word.  E.g., find
11350 		 * suggestions for "the the" where the second "the" is
11351 		 * different.  It's done like a split.
11352 		 * TODO: word split for soundfold words */
11353 		try_split = (sp->ts_fidx - repextra < su->su_badlen)
11354 								&& !soundfold;
11355 
11356 		/* Get here in several situations:
11357 		 * 1. The word in the tree ends:
11358 		 *    If the word allows compounding try that.  Otherwise try
11359 		 *    a split by inserting a space.  For both check that a
11360 		 *    valid words starts at fword[sp->ts_fidx].
11361 		 *    For NOBREAK do like compounding to be able to check if
11362 		 *    the next word is valid.
11363 		 * 2. The badword does end, but it was due to a change (e.g.,
11364 		 *    a swap).  No need to split, but do check that the
11365 		 *    following word is valid.
11366 		 * 3. The badword and the word in the tree end.  It may still
11367 		 *    be possible to compound another (short) word.
11368 		 */
11369 		try_compound = FALSE;
11370 		if (!soundfold
11371 			&& slang->sl_compprog != NULL
11372 			&& ((unsigned)flags >> 24) != 0
11373 			&& sp->ts_twordlen - sp->ts_splitoff
11374 						       >= slang->sl_compminlen
11375 #ifdef FEAT_MBYTE
11376 			&& (!has_mbyte
11377 			    || slang->sl_compminlen == 0
11378 			    || mb_charlen(tword + sp->ts_splitoff)
11379 						      >= slang->sl_compminlen)
11380 #endif
11381 			&& (slang->sl_compsylmax < MAXWLEN
11382 			    || sp->ts_complen + 1 - sp->ts_compsplit
11383 							  < slang->sl_compmax)
11384 			&& (byte_in_str(sp->ts_complen == sp->ts_compsplit
11385 					    ? slang->sl_compstartflags
11386 					    : slang->sl_compallflags,
11387 						    ((unsigned)flags >> 24))))
11388 		{
11389 		    try_compound = TRUE;
11390 		    compflags[sp->ts_complen] = ((unsigned)flags >> 24);
11391 		    compflags[sp->ts_complen + 1] = NUL;
11392 		}
11393 
11394 		/* For NOBREAK we never try splitting, it won't make any word
11395 		 * valid. */
11396 		if (slang->sl_nobreak)
11397 		    try_compound = TRUE;
11398 
11399 		/* If we could add a compound word, and it's also possible to
11400 		 * split at this point, do the split first and set
11401 		 * TSF_DIDSPLIT to avoid doing it again. */
11402 		else if (!fword_ends
11403 			&& try_compound
11404 			&& (sp->ts_flags & TSF_DIDSPLIT) == 0)
11405 		{
11406 		    try_compound = FALSE;
11407 		    sp->ts_flags |= TSF_DIDSPLIT;
11408 		    --sp->ts_curi;	    /* do the same NUL again */
11409 		    compflags[sp->ts_complen] = NUL;
11410 		}
11411 		else
11412 		    sp->ts_flags &= ~TSF_DIDSPLIT;
11413 
11414 		if (try_split || try_compound)
11415 		{
11416 		    if (!try_compound && (!fword_ends || !goodword_ends))
11417 		    {
11418 			/* If we're going to split need to check that the
11419 			 * words so far are valid for compounding.  If there
11420 			 * is only one word it must not have the NEEDCOMPOUND
11421 			 * flag. */
11422 			if (sp->ts_complen == sp->ts_compsplit
11423 						     && (flags & WF_NEEDCOMP))
11424 			    break;
11425 			p = preword;
11426 			while (*skiptowhite(p) != NUL)
11427 			    p = skipwhite(skiptowhite(p));
11428 			if (sp->ts_complen > sp->ts_compsplit
11429 				&& !can_compound(slang, p,
11430 						compflags + sp->ts_compsplit))
11431 			    break;
11432 
11433 			if (slang->sl_nosplitsugs)
11434 			    newscore += SCORE_SPLIT_NO;
11435 			else
11436 			    newscore += SCORE_SPLIT;
11437 
11438 			/* Give a bonus to words seen before. */
11439 			newscore = score_wordcount_adj(slang, newscore,
11440 					   preword + sp->ts_prewordlen, TRUE);
11441 		    }
11442 
11443 		    if (TRY_DEEPER(su, stack, depth, newscore))
11444 		    {
11445 			go_deeper(stack, depth, newscore);
11446 #ifdef DEBUG_TRIEWALK
11447 			if (!try_compound && !fword_ends)
11448 			    sprintf(changename[depth], "%.*s-%s: split",
11449 				 sp->ts_twordlen, tword, fword + sp->ts_fidx);
11450 			else
11451 			    sprintf(changename[depth], "%.*s-%s: compound",
11452 				 sp->ts_twordlen, tword, fword + sp->ts_fidx);
11453 #endif
11454 			/* Save things to be restored at STATE_SPLITUNDO. */
11455 			sp->ts_save_badflags = su->su_badflags;
11456 			sp->ts_state = STATE_SPLITUNDO;
11457 
11458 			++depth;
11459 			sp = &stack[depth];
11460 
11461 			/* Append a space to preword when splitting. */
11462 			if (!try_compound && !fword_ends)
11463 			    STRCAT(preword, " ");
11464 			sp->ts_prewordlen = STRLEN(preword);
11465 			sp->ts_splitoff = sp->ts_twordlen;
11466 			sp->ts_splitfidx = sp->ts_fidx;
11467 
11468 			/* If the badword has a non-word character at this
11469 			 * position skip it.  That means replacing the
11470 			 * non-word character with a space.  Always skip a
11471 			 * character when the word ends.  But only when the
11472 			 * good word can end. */
11473 			if (((!try_compound && !spell_iswordp_nmw(fword
11474 							       + sp->ts_fidx))
11475 				    || fword_ends)
11476 				&& fword[sp->ts_fidx] != NUL
11477 				&& goodword_ends)
11478 			{
11479 			    int	    l;
11480 
11481 #ifdef FEAT_MBYTE
11482 			    if (has_mbyte)
11483 				l = MB_BYTE2LEN(fword[sp->ts_fidx]);
11484 			    else
11485 #endif
11486 				l = 1;
11487 			    if (fword_ends)
11488 			    {
11489 				/* Copy the skipped character to preword. */
11490 				mch_memmove(preword + sp->ts_prewordlen,
11491 						      fword + sp->ts_fidx, l);
11492 				sp->ts_prewordlen += l;
11493 				preword[sp->ts_prewordlen] = NUL;
11494 			    }
11495 			    else
11496 				sp->ts_score -= SCORE_SPLIT - SCORE_SUBST;
11497 			    sp->ts_fidx += l;
11498 			}
11499 
11500 			/* When compounding include compound flag in
11501 			 * compflags[] (already set above).  When splitting we
11502 			 * may start compounding over again.  */
11503 			if (try_compound)
11504 			    ++sp->ts_complen;
11505 			else
11506 			    sp->ts_compsplit = sp->ts_complen;
11507 			sp->ts_prefixdepth = PFD_NOPREFIX;
11508 
11509 			/* set su->su_badflags to the caps type at this
11510 			 * position */
11511 #ifdef FEAT_MBYTE
11512 			if (has_mbyte)
11513 			    n = nofold_len(fword, sp->ts_fidx, su->su_badptr);
11514 			else
11515 #endif
11516 			    n = sp->ts_fidx;
11517 			su->su_badflags = badword_captype(su->su_badptr + n,
11518 					       su->su_badptr + su->su_badlen);
11519 
11520 			/* Restart at top of the tree. */
11521 			sp->ts_arridx = 0;
11522 
11523 			/* If there are postponed prefixes, try these too. */
11524 			if (pbyts != NULL)
11525 			{
11526 			    byts = pbyts;
11527 			    idxs = pidxs;
11528 			    sp->ts_prefixdepth = PFD_PREFIXTREE;
11529 			    sp->ts_state = STATE_NOPREFIX;
11530 			}
11531 		    }
11532 		}
11533 	    }
11534 	    break;
11535 
11536 	case STATE_SPLITUNDO:
11537 	    /* Undo the changes done for word split or compound word. */
11538 	    su->su_badflags = sp->ts_save_badflags;
11539 
11540 	    /* Continue looking for NUL bytes. */
11541 	    sp->ts_state = STATE_START;
11542 
11543 	    /* In case we went into the prefix tree. */
11544 	    byts = fbyts;
11545 	    idxs = fidxs;
11546 	    break;
11547 
11548 	case STATE_ENDNUL:
11549 	    /* Past the NUL bytes in the node. */
11550 	    su->su_badflags = sp->ts_save_badflags;
11551 	    if (fword[sp->ts_fidx] == NUL
11552 #ifdef FEAT_MBYTE
11553 		    && sp->ts_tcharlen == 0
11554 #endif
11555 	       )
11556 	    {
11557 		/* The badword ends, can't use STATE_PLAIN. */
11558 		sp->ts_state = STATE_DEL;
11559 		break;
11560 	    }
11561 	    sp->ts_state = STATE_PLAIN;
11562 	    /*FALLTHROUGH*/
11563 
11564 	case STATE_PLAIN:
11565 	    /*
11566 	     * Go over all possible bytes at this node, add each to tword[]
11567 	     * and use child node.  "ts_curi" is the index.
11568 	     */
11569 	    arridx = sp->ts_arridx;
11570 	    if (sp->ts_curi > byts[arridx])
11571 	    {
11572 		/* Done all bytes at this node, do next state.  When still at
11573 		 * already changed bytes skip the other tricks. */
11574 		if (sp->ts_fidx >= sp->ts_fidxtry)
11575 		    sp->ts_state = STATE_DEL;
11576 		else
11577 		    sp->ts_state = STATE_FINAL;
11578 	    }
11579 	    else
11580 	    {
11581 		arridx += sp->ts_curi++;
11582 		c = byts[arridx];
11583 
11584 		/* Normal byte, go one level deeper.  If it's not equal to the
11585 		 * byte in the bad word adjust the score.  But don't even try
11586 		 * when the byte was already changed.  And don't try when we
11587 		 * just deleted this byte, accepting it is always cheaper then
11588 		 * delete + substitute. */
11589 		if (c == fword[sp->ts_fidx]
11590 #ifdef FEAT_MBYTE
11591 			|| (sp->ts_tcharlen > 0 && sp->ts_isdiff != DIFF_NONE)
11592 #endif
11593 			)
11594 		    newscore = 0;
11595 		else
11596 		    newscore = SCORE_SUBST;
11597 		if ((newscore == 0
11598 			    || (sp->ts_fidx >= sp->ts_fidxtry
11599 				&& ((sp->ts_flags & TSF_DIDDEL) == 0
11600 				    || c != fword[sp->ts_delidx])))
11601 			&& TRY_DEEPER(su, stack, depth, newscore))
11602 		{
11603 		    go_deeper(stack, depth, newscore);
11604 #ifdef DEBUG_TRIEWALK
11605 		    if (newscore > 0)
11606 			sprintf(changename[depth], "%.*s-%s: subst %c to %c",
11607 				sp->ts_twordlen, tword, fword + sp->ts_fidx,
11608 				fword[sp->ts_fidx], c);
11609 		    else
11610 			sprintf(changename[depth], "%.*s-%s: accept %c",
11611 				sp->ts_twordlen, tword, fword + sp->ts_fidx,
11612 				fword[sp->ts_fidx]);
11613 #endif
11614 		    ++depth;
11615 		    sp = &stack[depth];
11616 		    ++sp->ts_fidx;
11617 		    tword[sp->ts_twordlen++] = c;
11618 		    sp->ts_arridx = idxs[arridx];
11619 #ifdef FEAT_MBYTE
11620 		    if (newscore == SCORE_SUBST)
11621 			sp->ts_isdiff = DIFF_YES;
11622 		    if (has_mbyte)
11623 		    {
11624 			/* Multi-byte characters are a bit complicated to
11625 			 * handle: They differ when any of the bytes differ
11626 			 * and then their length may also differ. */
11627 			if (sp->ts_tcharlen == 0)
11628 			{
11629 			    /* First byte. */
11630 			    sp->ts_tcharidx = 0;
11631 			    sp->ts_tcharlen = MB_BYTE2LEN(c);
11632 			    sp->ts_fcharstart = sp->ts_fidx - 1;
11633 			    sp->ts_isdiff = (newscore != 0)
11634 						       ? DIFF_YES : DIFF_NONE;
11635 			}
11636 			else if (sp->ts_isdiff == DIFF_INSERT)
11637 			    /* When inserting trail bytes don't advance in the
11638 			     * bad word. */
11639 			    --sp->ts_fidx;
11640 			if (++sp->ts_tcharidx == sp->ts_tcharlen)
11641 			{
11642 			    /* Last byte of character. */
11643 			    if (sp->ts_isdiff == DIFF_YES)
11644 			    {
11645 				/* Correct ts_fidx for the byte length of the
11646 				 * character (we didn't check that before). */
11647 				sp->ts_fidx = sp->ts_fcharstart
11648 					    + MB_BYTE2LEN(
11649 						    fword[sp->ts_fcharstart]);
11650 
11651 				/* For changing a composing character adjust
11652 				 * the score from SCORE_SUBST to
11653 				 * SCORE_SUBCOMP. */
11654 				if (enc_utf8
11655 					&& utf_iscomposing(
11656 					    mb_ptr2char(tword
11657 						+ sp->ts_twordlen
11658 							   - sp->ts_tcharlen))
11659 					&& utf_iscomposing(
11660 					    mb_ptr2char(fword
11661 							+ sp->ts_fcharstart)))
11662 				    sp->ts_score -=
11663 						  SCORE_SUBST - SCORE_SUBCOMP;
11664 
11665 				/* For a similar character adjust score from
11666 				 * SCORE_SUBST to SCORE_SIMILAR. */
11667 				else if (!soundfold
11668 					&& slang->sl_has_map
11669 					&& similar_chars(slang,
11670 					    mb_ptr2char(tword
11671 						+ sp->ts_twordlen
11672 							   - sp->ts_tcharlen),
11673 					    mb_ptr2char(fword
11674 							+ sp->ts_fcharstart)))
11675 				    sp->ts_score -=
11676 						  SCORE_SUBST - SCORE_SIMILAR;
11677 			    }
11678 			    else if (sp->ts_isdiff == DIFF_INSERT
11679 					 && sp->ts_twordlen > sp->ts_tcharlen)
11680 			    {
11681 				p = tword + sp->ts_twordlen - sp->ts_tcharlen;
11682 				c = mb_ptr2char(p);
11683 				if (enc_utf8 && utf_iscomposing(c))
11684 				{
11685 				    /* Inserting a composing char doesn't
11686 				     * count that much. */
11687 				    sp->ts_score -= SCORE_INS - SCORE_INSCOMP;
11688 				}
11689 				else
11690 				{
11691 				    /* If the previous character was the same,
11692 				     * thus doubling a character, give a bonus
11693 				     * to the score.  Also for the soundfold
11694 				     * tree (might seem illogical but does
11695 				     * give better scores). */
11696 				    mb_ptr_back(tword, p);
11697 				    if (c == mb_ptr2char(p))
11698 					sp->ts_score -= SCORE_INS
11699 							       - SCORE_INSDUP;
11700 				}
11701 			    }
11702 
11703 			    /* Starting a new char, reset the length. */
11704 			    sp->ts_tcharlen = 0;
11705 			}
11706 		    }
11707 		    else
11708 #endif
11709 		    {
11710 			/* If we found a similar char adjust the score.
11711 			 * We do this after calling go_deeper() because
11712 			 * it's slow. */
11713 			if (newscore != 0
11714 				&& !soundfold
11715 				&& slang->sl_has_map
11716 				&& similar_chars(slang,
11717 						   c, fword[sp->ts_fidx - 1]))
11718 			    sp->ts_score -= SCORE_SUBST - SCORE_SIMILAR;
11719 		    }
11720 		}
11721 	    }
11722 	    break;
11723 
11724 	case STATE_DEL:
11725 #ifdef FEAT_MBYTE
11726 	    /* When past the first byte of a multi-byte char don't try
11727 	     * delete/insert/swap a character. */
11728 	    if (has_mbyte && sp->ts_tcharlen > 0)
11729 	    {
11730 		sp->ts_state = STATE_FINAL;
11731 		break;
11732 	    }
11733 #endif
11734 	    /*
11735 	     * Try skipping one character in the bad word (delete it).
11736 	     */
11737 	    sp->ts_state = STATE_INS_PREP;
11738 	    sp->ts_curi = 1;
11739 	    if (soundfold && sp->ts_fidx == 0 && fword[sp->ts_fidx] == '*')
11740 		/* Deleting a vowel at the start of a word counts less, see
11741 		 * soundalike_score(). */
11742 		newscore = 2 * SCORE_DEL / 3;
11743 	    else
11744 		newscore = SCORE_DEL;
11745 	    if (fword[sp->ts_fidx] != NUL
11746 				    && TRY_DEEPER(su, stack, depth, newscore))
11747 	    {
11748 		go_deeper(stack, depth, newscore);
11749 #ifdef DEBUG_TRIEWALK
11750 		sprintf(changename[depth], "%.*s-%s: delete %c",
11751 			sp->ts_twordlen, tword, fword + sp->ts_fidx,
11752 			fword[sp->ts_fidx]);
11753 #endif
11754 		++depth;
11755 
11756 		/* Remember what character we deleted, so that we can avoid
11757 		 * inserting it again. */
11758 		stack[depth].ts_flags |= TSF_DIDDEL;
11759 		stack[depth].ts_delidx = sp->ts_fidx;
11760 
11761 		/* Advance over the character in fword[].  Give a bonus to the
11762 		 * score if the same character is following "nn" -> "n".  It's
11763 		 * a bit illogical for soundfold tree but it does give better
11764 		 * results. */
11765 #ifdef FEAT_MBYTE
11766 		if (has_mbyte)
11767 		{
11768 		    c = mb_ptr2char(fword + sp->ts_fidx);
11769 		    stack[depth].ts_fidx += MB_BYTE2LEN(fword[sp->ts_fidx]);
11770 		    if (enc_utf8 && utf_iscomposing(c))
11771 			stack[depth].ts_score -= SCORE_DEL - SCORE_DELCOMP;
11772 		    else if (c == mb_ptr2char(fword + stack[depth].ts_fidx))
11773 			stack[depth].ts_score -= SCORE_DEL - SCORE_DELDUP;
11774 		}
11775 		else
11776 #endif
11777 		{
11778 		    ++stack[depth].ts_fidx;
11779 		    if (fword[sp->ts_fidx] == fword[sp->ts_fidx + 1])
11780 			stack[depth].ts_score -= SCORE_DEL - SCORE_DELDUP;
11781 		}
11782 		break;
11783 	    }
11784 	    /*FALLTHROUGH*/
11785 
11786 	case STATE_INS_PREP:
11787 	    if (sp->ts_flags & TSF_DIDDEL)
11788 	    {
11789 		/* If we just deleted a byte then inserting won't make sense,
11790 		 * a substitute is always cheaper. */
11791 		sp->ts_state = STATE_SWAP;
11792 		break;
11793 	    }
11794 
11795 	    /* skip over NUL bytes */
11796 	    n = sp->ts_arridx;
11797 	    for (;;)
11798 	    {
11799 		if (sp->ts_curi > byts[n])
11800 		{
11801 		    /* Only NUL bytes at this node, go to next state. */
11802 		    sp->ts_state = STATE_SWAP;
11803 		    break;
11804 		}
11805 		if (byts[n + sp->ts_curi] != NUL)
11806 		{
11807 		    /* Found a byte to insert. */
11808 		    sp->ts_state = STATE_INS;
11809 		    break;
11810 		}
11811 		++sp->ts_curi;
11812 	    }
11813 	    break;
11814 
11815 	    /*FALLTHROUGH*/
11816 
11817 	case STATE_INS:
11818 	    /* Insert one byte.  Repeat this for each possible byte at this
11819 	     * node. */
11820 	    n = sp->ts_arridx;
11821 	    if (sp->ts_curi > byts[n])
11822 	    {
11823 		/* Done all bytes at this node, go to next state. */
11824 		sp->ts_state = STATE_SWAP;
11825 		break;
11826 	    }
11827 
11828 	    /* Do one more byte at this node, but:
11829 	     * - Skip NUL bytes.
11830 	     * - Skip the byte if it's equal to the byte in the word,
11831 	     *   accepting that byte is always better.
11832 	     */
11833 	    n += sp->ts_curi++;
11834 	    c = byts[n];
11835 	    if (soundfold && sp->ts_twordlen == 0 && c == '*')
11836 		/* Inserting a vowel at the start of a word counts less,
11837 		 * see soundalike_score(). */
11838 		newscore = 2 * SCORE_INS / 3;
11839 	    else
11840 		newscore = SCORE_INS;
11841 	    if (c != fword[sp->ts_fidx]
11842 				    && TRY_DEEPER(su, stack, depth, newscore))
11843 	    {
11844 		go_deeper(stack, depth, newscore);
11845 #ifdef DEBUG_TRIEWALK
11846 		sprintf(changename[depth], "%.*s-%s: insert %c",
11847 			sp->ts_twordlen, tword, fword + sp->ts_fidx,
11848 			c);
11849 #endif
11850 		++depth;
11851 		sp = &stack[depth];
11852 		tword[sp->ts_twordlen++] = c;
11853 		sp->ts_arridx = idxs[n];
11854 #ifdef FEAT_MBYTE
11855 		if (has_mbyte)
11856 		{
11857 		    fl = MB_BYTE2LEN(c);
11858 		    if (fl > 1)
11859 		    {
11860 			/* There are following bytes for the same character.
11861 			 * We must find all bytes before trying
11862 			 * delete/insert/swap/etc. */
11863 			sp->ts_tcharlen = fl;
11864 			sp->ts_tcharidx = 1;
11865 			sp->ts_isdiff = DIFF_INSERT;
11866 		    }
11867 		}
11868 		else
11869 		    fl = 1;
11870 		if (fl == 1)
11871 #endif
11872 		{
11873 		    /* If the previous character was the same, thus doubling a
11874 		     * character, give a bonus to the score.  Also for
11875 		     * soundfold words (illogical but does give a better
11876 		     * score). */
11877 		    if (sp->ts_twordlen >= 2
11878 					   && tword[sp->ts_twordlen - 2] == c)
11879 			sp->ts_score -= SCORE_INS - SCORE_INSDUP;
11880 		}
11881 	    }
11882 	    break;
11883 
11884 	case STATE_SWAP:
11885 	    /*
11886 	     * Swap two bytes in the bad word: "12" -> "21".
11887 	     * We change "fword" here, it's changed back afterwards at
11888 	     * STATE_UNSWAP.
11889 	     */
11890 	    p = fword + sp->ts_fidx;
11891 	    c = *p;
11892 	    if (c == NUL)
11893 	    {
11894 		/* End of word, can't swap or replace. */
11895 		sp->ts_state = STATE_FINAL;
11896 		break;
11897 	    }
11898 
11899 	    /* Don't swap if the first character is not a word character.
11900 	     * SWAP3 etc. also don't make sense then. */
11901 	    if (!soundfold && !spell_iswordp(p, curbuf))
11902 	    {
11903 		sp->ts_state = STATE_REP_INI;
11904 		break;
11905 	    }
11906 
11907 #ifdef FEAT_MBYTE
11908 	    if (has_mbyte)
11909 	    {
11910 		n = mb_cptr2len(p);
11911 		c = mb_ptr2char(p);
11912 		if (!soundfold && !spell_iswordp(p + n, curbuf))
11913 		    c2 = c; /* don't swap non-word char */
11914 		else
11915 		    c2 = mb_ptr2char(p + n);
11916 	    }
11917 	    else
11918 #endif
11919 	    {
11920 		if (!soundfold && !spell_iswordp(p + 1, curbuf))
11921 		    c2 = c; /* don't swap non-word char */
11922 		else
11923 		    c2 = p[1];
11924 	    }
11925 
11926 	    /* When characters are identical, swap won't do anything.
11927 	     * Also get here if the second char is not a word character. */
11928 	    if (c == c2)
11929 	    {
11930 		sp->ts_state = STATE_SWAP3;
11931 		break;
11932 	    }
11933 	    if (c2 != NUL && TRY_DEEPER(su, stack, depth, SCORE_SWAP))
11934 	    {
11935 		go_deeper(stack, depth, SCORE_SWAP);
11936 #ifdef DEBUG_TRIEWALK
11937 		sprintf(changename[depth], "%.*s-%s: swap %c and %c",
11938 			sp->ts_twordlen, tword, fword + sp->ts_fidx,
11939 			c, c2);
11940 #endif
11941 		sp->ts_state = STATE_UNSWAP;
11942 		++depth;
11943 #ifdef FEAT_MBYTE
11944 		if (has_mbyte)
11945 		{
11946 		    fl = mb_char2len(c2);
11947 		    mch_memmove(p, p + n, fl);
11948 		    mb_char2bytes(c, p + fl);
11949 		    stack[depth].ts_fidxtry = sp->ts_fidx + n + fl;
11950 		}
11951 		else
11952 #endif
11953 		{
11954 		    p[0] = c2;
11955 		    p[1] = c;
11956 		    stack[depth].ts_fidxtry = sp->ts_fidx + 2;
11957 		}
11958 	    }
11959 	    else
11960 		/* If this swap doesn't work then SWAP3 won't either. */
11961 		sp->ts_state = STATE_REP_INI;
11962 	    break;
11963 
11964 	case STATE_UNSWAP:
11965 	    /* Undo the STATE_SWAP swap: "21" -> "12". */
11966 	    p = fword + sp->ts_fidx;
11967 #ifdef FEAT_MBYTE
11968 	    if (has_mbyte)
11969 	    {
11970 		n = MB_BYTE2LEN(*p);
11971 		c = mb_ptr2char(p + n);
11972 		mch_memmove(p + MB_BYTE2LEN(p[n]), p, n);
11973 		mb_char2bytes(c, p);
11974 	    }
11975 	    else
11976 #endif
11977 	    {
11978 		c = *p;
11979 		*p = p[1];
11980 		p[1] = c;
11981 	    }
11982 	    /*FALLTHROUGH*/
11983 
11984 	case STATE_SWAP3:
11985 	    /* Swap two bytes, skipping one: "123" -> "321".  We change
11986 	     * "fword" here, it's changed back afterwards at STATE_UNSWAP3. */
11987 	    p = fword + sp->ts_fidx;
11988 #ifdef FEAT_MBYTE
11989 	    if (has_mbyte)
11990 	    {
11991 		n = mb_cptr2len(p);
11992 		c = mb_ptr2char(p);
11993 		fl = mb_cptr2len(p + n);
11994 		c2 = mb_ptr2char(p + n);
11995 		if (!soundfold && !spell_iswordp(p + n + fl, curbuf))
11996 		    c3 = c;	/* don't swap non-word char */
11997 		else
11998 		    c3 = mb_ptr2char(p + n + fl);
11999 	    }
12000 	    else
12001 #endif
12002 	    {
12003 		c = *p;
12004 		c2 = p[1];
12005 		if (!soundfold && !spell_iswordp(p + 2, curbuf))
12006 		    c3 = c;	/* don't swap non-word char */
12007 		else
12008 		    c3 = p[2];
12009 	    }
12010 
12011 	    /* When characters are identical: "121" then SWAP3 result is
12012 	     * identical, ROT3L result is same as SWAP: "211", ROT3L result is
12013 	     * same as SWAP on next char: "112".  Thus skip all swapping.
12014 	     * Also skip when c3 is NUL.
12015 	     * Also get here when the third character is not a word character.
12016 	     * Second character may any char: "a.b" -> "b.a" */
12017 	    if (c == c3 || c3 == NUL)
12018 	    {
12019 		sp->ts_state = STATE_REP_INI;
12020 		break;
12021 	    }
12022 	    if (TRY_DEEPER(su, stack, depth, SCORE_SWAP3))
12023 	    {
12024 		go_deeper(stack, depth, SCORE_SWAP3);
12025 #ifdef DEBUG_TRIEWALK
12026 		sprintf(changename[depth], "%.*s-%s: swap3 %c and %c",
12027 			sp->ts_twordlen, tword, fword + sp->ts_fidx,
12028 			c, c3);
12029 #endif
12030 		sp->ts_state = STATE_UNSWAP3;
12031 		++depth;
12032 #ifdef FEAT_MBYTE
12033 		if (has_mbyte)
12034 		{
12035 		    tl = mb_char2len(c3);
12036 		    mch_memmove(p, p + n + fl, tl);
12037 		    mb_char2bytes(c2, p + tl);
12038 		    mb_char2bytes(c, p + fl + tl);
12039 		    stack[depth].ts_fidxtry = sp->ts_fidx + n + fl + tl;
12040 		}
12041 		else
12042 #endif
12043 		{
12044 		    p[0] = p[2];
12045 		    p[2] = c;
12046 		    stack[depth].ts_fidxtry = sp->ts_fidx + 3;
12047 		}
12048 	    }
12049 	    else
12050 		sp->ts_state = STATE_REP_INI;
12051 	    break;
12052 
12053 	case STATE_UNSWAP3:
12054 	    /* Undo STATE_SWAP3: "321" -> "123" */
12055 	    p = fword + sp->ts_fidx;
12056 #ifdef FEAT_MBYTE
12057 	    if (has_mbyte)
12058 	    {
12059 		n = MB_BYTE2LEN(*p);
12060 		c2 = mb_ptr2char(p + n);
12061 		fl = MB_BYTE2LEN(p[n]);
12062 		c = mb_ptr2char(p + n + fl);
12063 		tl = MB_BYTE2LEN(p[n + fl]);
12064 		mch_memmove(p + fl + tl, p, n);
12065 		mb_char2bytes(c, p);
12066 		mb_char2bytes(c2, p + tl);
12067 		p = p + tl;
12068 	    }
12069 	    else
12070 #endif
12071 	    {
12072 		c = *p;
12073 		*p = p[2];
12074 		p[2] = c;
12075 		++p;
12076 	    }
12077 
12078 	    if (!soundfold && !spell_iswordp(p, curbuf))
12079 	    {
12080 		/* Middle char is not a word char, skip the rotate.  First and
12081 		 * third char were already checked at swap and swap3. */
12082 		sp->ts_state = STATE_REP_INI;
12083 		break;
12084 	    }
12085 
12086 	    /* Rotate three characters left: "123" -> "231".  We change
12087 	     * "fword" here, it's changed back afterwards at STATE_UNROT3L. */
12088 	    if (TRY_DEEPER(su, stack, depth, SCORE_SWAP3))
12089 	    {
12090 		go_deeper(stack, depth, SCORE_SWAP3);
12091 #ifdef DEBUG_TRIEWALK
12092 		p = fword + sp->ts_fidx;
12093 		sprintf(changename[depth], "%.*s-%s: rotate left %c%c%c",
12094 			sp->ts_twordlen, tword, fword + sp->ts_fidx,
12095 			p[0], p[1], p[2]);
12096 #endif
12097 		sp->ts_state = STATE_UNROT3L;
12098 		++depth;
12099 		p = fword + sp->ts_fidx;
12100 #ifdef FEAT_MBYTE
12101 		if (has_mbyte)
12102 		{
12103 		    n = mb_cptr2len(p);
12104 		    c = mb_ptr2char(p);
12105 		    fl = mb_cptr2len(p + n);
12106 		    fl += mb_cptr2len(p + n + fl);
12107 		    mch_memmove(p, p + n, fl);
12108 		    mb_char2bytes(c, p + fl);
12109 		    stack[depth].ts_fidxtry = sp->ts_fidx + n + fl;
12110 		}
12111 		else
12112 #endif
12113 		{
12114 		    c = *p;
12115 		    *p = p[1];
12116 		    p[1] = p[2];
12117 		    p[2] = c;
12118 		    stack[depth].ts_fidxtry = sp->ts_fidx + 3;
12119 		}
12120 	    }
12121 	    else
12122 		sp->ts_state = STATE_REP_INI;
12123 	    break;
12124 
12125 	case STATE_UNROT3L:
12126 	    /* Undo ROT3L: "231" -> "123" */
12127 	    p = fword + sp->ts_fidx;
12128 #ifdef FEAT_MBYTE
12129 	    if (has_mbyte)
12130 	    {
12131 		n = MB_BYTE2LEN(*p);
12132 		n += MB_BYTE2LEN(p[n]);
12133 		c = mb_ptr2char(p + n);
12134 		tl = MB_BYTE2LEN(p[n]);
12135 		mch_memmove(p + tl, p, n);
12136 		mb_char2bytes(c, p);
12137 	    }
12138 	    else
12139 #endif
12140 	    {
12141 		c = p[2];
12142 		p[2] = p[1];
12143 		p[1] = *p;
12144 		*p = c;
12145 	    }
12146 
12147 	    /* Rotate three bytes right: "123" -> "312".  We change "fword"
12148 	     * here, it's changed back afterwards at STATE_UNROT3R. */
12149 	    if (TRY_DEEPER(su, stack, depth, SCORE_SWAP3))
12150 	    {
12151 		go_deeper(stack, depth, SCORE_SWAP3);
12152 #ifdef DEBUG_TRIEWALK
12153 		p = fword + sp->ts_fidx;
12154 		sprintf(changename[depth], "%.*s-%s: rotate right %c%c%c",
12155 			sp->ts_twordlen, tword, fword + sp->ts_fidx,
12156 			p[0], p[1], p[2]);
12157 #endif
12158 		sp->ts_state = STATE_UNROT3R;
12159 		++depth;
12160 		p = fword + sp->ts_fidx;
12161 #ifdef FEAT_MBYTE
12162 		if (has_mbyte)
12163 		{
12164 		    n = mb_cptr2len(p);
12165 		    n += mb_cptr2len(p + n);
12166 		    c = mb_ptr2char(p + n);
12167 		    tl = mb_cptr2len(p + n);
12168 		    mch_memmove(p + tl, p, n);
12169 		    mb_char2bytes(c, p);
12170 		    stack[depth].ts_fidxtry = sp->ts_fidx + n + tl;
12171 		}
12172 		else
12173 #endif
12174 		{
12175 		    c = p[2];
12176 		    p[2] = p[1];
12177 		    p[1] = *p;
12178 		    *p = c;
12179 		    stack[depth].ts_fidxtry = sp->ts_fidx + 3;
12180 		}
12181 	    }
12182 	    else
12183 		sp->ts_state = STATE_REP_INI;
12184 	    break;
12185 
12186 	case STATE_UNROT3R:
12187 	    /* Undo ROT3R: "312" -> "123" */
12188 	    p = fword + sp->ts_fidx;
12189 #ifdef FEAT_MBYTE
12190 	    if (has_mbyte)
12191 	    {
12192 		c = mb_ptr2char(p);
12193 		tl = MB_BYTE2LEN(*p);
12194 		n = MB_BYTE2LEN(p[tl]);
12195 		n += MB_BYTE2LEN(p[tl + n]);
12196 		mch_memmove(p, p + tl, n);
12197 		mb_char2bytes(c, p + n);
12198 	    }
12199 	    else
12200 #endif
12201 	    {
12202 		c = *p;
12203 		*p = p[1];
12204 		p[1] = p[2];
12205 		p[2] = c;
12206 	    }
12207 	    /*FALLTHROUGH*/
12208 
12209 	case STATE_REP_INI:
12210 	    /* Check if matching with REP items from the .aff file would work.
12211 	     * Quickly skip if:
12212 	     * - there are no REP items and we are not in the soundfold trie
12213 	     * - the score is going to be too high anyway
12214 	     * - already applied a REP item or swapped here  */
12215 	    if ((lp->lp_replang == NULL && !soundfold)
12216 		    || sp->ts_score + SCORE_REP >= su->su_maxscore
12217 		    || sp->ts_fidx < sp->ts_fidxtry)
12218 	    {
12219 		sp->ts_state = STATE_FINAL;
12220 		break;
12221 	    }
12222 
12223 	    /* Use the first byte to quickly find the first entry that may
12224 	     * match.  If the index is -1 there is none. */
12225 	    if (soundfold)
12226 		sp->ts_curi = slang->sl_repsal_first[fword[sp->ts_fidx]];
12227 	    else
12228 		sp->ts_curi = lp->lp_replang->sl_rep_first[fword[sp->ts_fidx]];
12229 
12230 	    if (sp->ts_curi < 0)
12231 	    {
12232 		sp->ts_state = STATE_FINAL;
12233 		break;
12234 	    }
12235 
12236 	    sp->ts_state = STATE_REP;
12237 	    /*FALLTHROUGH*/
12238 
12239 	case STATE_REP:
12240 	    /* Try matching with REP items from the .aff file.  For each match
12241 	     * replace the characters and check if the resulting word is
12242 	     * valid. */
12243 	    p = fword + sp->ts_fidx;
12244 
12245 	    if (soundfold)
12246 		gap = &slang->sl_repsal;
12247 	    else
12248 		gap = &lp->lp_replang->sl_rep;
12249 	    while (sp->ts_curi < gap->ga_len)
12250 	    {
12251 		ftp = (fromto_T *)gap->ga_data + sp->ts_curi++;
12252 		if (*ftp->ft_from != *p)
12253 		{
12254 		    /* past possible matching entries */
12255 		    sp->ts_curi = gap->ga_len;
12256 		    break;
12257 		}
12258 		if (STRNCMP(ftp->ft_from, p, STRLEN(ftp->ft_from)) == 0
12259 			&& TRY_DEEPER(su, stack, depth, SCORE_REP))
12260 		{
12261 		    go_deeper(stack, depth, SCORE_REP);
12262 #ifdef DEBUG_TRIEWALK
12263 		    sprintf(changename[depth], "%.*s-%s: replace %s with %s",
12264 			    sp->ts_twordlen, tword, fword + sp->ts_fidx,
12265 			    ftp->ft_from, ftp->ft_to);
12266 #endif
12267 		    /* Need to undo this afterwards. */
12268 		    sp->ts_state = STATE_REP_UNDO;
12269 
12270 		    /* Change the "from" to the "to" string. */
12271 		    ++depth;
12272 		    fl = STRLEN(ftp->ft_from);
12273 		    tl = STRLEN(ftp->ft_to);
12274 		    if (fl != tl)
12275 		    {
12276 			mch_memmove(p + tl, p + fl, STRLEN(p + fl) + 1);
12277 			repextra += tl - fl;
12278 		    }
12279 		    mch_memmove(p, ftp->ft_to, tl);
12280 		    stack[depth].ts_fidxtry = sp->ts_fidx + tl;
12281 #ifdef FEAT_MBYTE
12282 		    stack[depth].ts_tcharlen = 0;
12283 #endif
12284 		    break;
12285 		}
12286 	    }
12287 
12288 	    if (sp->ts_curi >= gap->ga_len && sp->ts_state == STATE_REP)
12289 		/* No (more) matches. */
12290 		sp->ts_state = STATE_FINAL;
12291 
12292 	    break;
12293 
12294 	case STATE_REP_UNDO:
12295 	    /* Undo a REP replacement and continue with the next one. */
12296 	    if (soundfold)
12297 		gap = &slang->sl_repsal;
12298 	    else
12299 		gap = &lp->lp_replang->sl_rep;
12300 	    ftp = (fromto_T *)gap->ga_data + sp->ts_curi - 1;
12301 	    fl = STRLEN(ftp->ft_from);
12302 	    tl = STRLEN(ftp->ft_to);
12303 	    p = fword + sp->ts_fidx;
12304 	    if (fl != tl)
12305 	    {
12306 		mch_memmove(p + fl, p + tl, STRLEN(p + tl) + 1);
12307 		repextra -= tl - fl;
12308 	    }
12309 	    mch_memmove(p, ftp->ft_from, fl);
12310 	    sp->ts_state = STATE_REP;
12311 	    break;
12312 
12313 	default:
12314 	    /* Did all possible states at this level, go up one level. */
12315 	    --depth;
12316 
12317 	    if (depth >= 0 && stack[depth].ts_prefixdepth == PFD_PREFIXTREE)
12318 	    {
12319 		/* Continue in or go back to the prefix tree. */
12320 		byts = pbyts;
12321 		idxs = pidxs;
12322 	    }
12323 
12324 	    /* Don't check for CTRL-C too often, it takes time. */
12325 	    if (--breakcheckcount == 0)
12326 	    {
12327 		ui_breakcheck();
12328 		breakcheckcount = 1000;
12329 	    }
12330 	}
12331     }
12332 }
12333 
12334 
12335 /*
12336  * Go one level deeper in the tree.
12337  */
12338     static void
12339 go_deeper(stack, depth, score_add)
12340     trystate_T	*stack;
12341     int		depth;
12342     int		score_add;
12343 {
12344     stack[depth + 1] = stack[depth];
12345     stack[depth + 1].ts_state = STATE_START;
12346     stack[depth + 1].ts_score = stack[depth].ts_score + score_add;
12347     stack[depth + 1].ts_curi = 1;	/* start just after length byte */
12348     stack[depth + 1].ts_flags = 0;
12349 }
12350 
12351 #ifdef FEAT_MBYTE
12352 /*
12353  * Case-folding may change the number of bytes: Count nr of chars in
12354  * fword[flen] and return the byte length of that many chars in "word".
12355  */
12356     static int
12357 nofold_len(fword, flen, word)
12358     char_u	*fword;
12359     int		flen;
12360     char_u	*word;
12361 {
12362     char_u	*p;
12363     int		i = 0;
12364 
12365     for (p = fword; p < fword + flen; mb_ptr_adv(p))
12366 	++i;
12367     for (p = word; i > 0; mb_ptr_adv(p))
12368 	--i;
12369     return (int)(p - word);
12370 }
12371 #endif
12372 
12373 /*
12374  * "fword" is a good word with case folded.  Find the matching keep-case
12375  * words and put it in "kword".
12376  * Theoretically there could be several keep-case words that result in the
12377  * same case-folded word, but we only find one...
12378  */
12379     static void
12380 find_keepcap_word(slang, fword, kword)
12381     slang_T	*slang;
12382     char_u	*fword;
12383     char_u	*kword;
12384 {
12385     char_u	uword[MAXWLEN];		/* "fword" in upper-case */
12386     int		depth;
12387     idx_T	tryidx;
12388 
12389     /* The following arrays are used at each depth in the tree. */
12390     idx_T	arridx[MAXWLEN];
12391     int		round[MAXWLEN];
12392     int		fwordidx[MAXWLEN];
12393     int		uwordidx[MAXWLEN];
12394     int		kwordlen[MAXWLEN];
12395 
12396     int		flen, ulen;
12397     int		l;
12398     int		len;
12399     int		c;
12400     idx_T	lo, hi, m;
12401     char_u	*p;
12402     char_u	*byts = slang->sl_kbyts;    /* array with bytes of the words */
12403     idx_T	*idxs = slang->sl_kidxs;    /* array with indexes */
12404 
12405     if (byts == NULL)
12406     {
12407 	/* array is empty: "cannot happen" */
12408 	*kword = NUL;
12409 	return;
12410     }
12411 
12412     /* Make an all-cap version of "fword". */
12413     allcap_copy(fword, uword);
12414 
12415     /*
12416      * Each character needs to be tried both case-folded and upper-case.
12417      * All this gets very complicated if we keep in mind that changing case
12418      * may change the byte length of a multi-byte character...
12419      */
12420     depth = 0;
12421     arridx[0] = 0;
12422     round[0] = 0;
12423     fwordidx[0] = 0;
12424     uwordidx[0] = 0;
12425     kwordlen[0] = 0;
12426     while (depth >= 0)
12427     {
12428 	if (fword[fwordidx[depth]] == NUL)
12429 	{
12430 	    /* We are at the end of "fword".  If the tree allows a word to end
12431 	     * here we have found a match. */
12432 	    if (byts[arridx[depth] + 1] == 0)
12433 	    {
12434 		kword[kwordlen[depth]] = NUL;
12435 		return;
12436 	    }
12437 
12438 	    /* kword is getting too long, continue one level up */
12439 	    --depth;
12440 	}
12441 	else if (++round[depth] > 2)
12442 	{
12443 	    /* tried both fold-case and upper-case character, continue one
12444 	     * level up */
12445 	    --depth;
12446 	}
12447 	else
12448 	{
12449 	    /*
12450 	     * round[depth] == 1: Try using the folded-case character.
12451 	     * round[depth] == 2: Try using the upper-case character.
12452 	     */
12453 #ifdef FEAT_MBYTE
12454 	    if (has_mbyte)
12455 	    {
12456 		flen = mb_cptr2len(fword + fwordidx[depth]);
12457 		ulen = mb_cptr2len(uword + uwordidx[depth]);
12458 	    }
12459 	    else
12460 #endif
12461 		ulen = flen = 1;
12462 	    if (round[depth] == 1)
12463 	    {
12464 		p = fword + fwordidx[depth];
12465 		l = flen;
12466 	    }
12467 	    else
12468 	    {
12469 		p = uword + uwordidx[depth];
12470 		l = ulen;
12471 	    }
12472 
12473 	    for (tryidx = arridx[depth]; l > 0; --l)
12474 	    {
12475 		/* Perform a binary search in the list of accepted bytes. */
12476 		len = byts[tryidx++];
12477 		c = *p++;
12478 		lo = tryidx;
12479 		hi = tryidx + len - 1;
12480 		while (lo < hi)
12481 		{
12482 		    m = (lo + hi) / 2;
12483 		    if (byts[m] > c)
12484 			hi = m - 1;
12485 		    else if (byts[m] < c)
12486 			lo = m + 1;
12487 		    else
12488 		    {
12489 			lo = hi = m;
12490 			break;
12491 		    }
12492 		}
12493 
12494 		/* Stop if there is no matching byte. */
12495 		if (hi < lo || byts[lo] != c)
12496 		    break;
12497 
12498 		/* Continue at the child (if there is one). */
12499 		tryidx = idxs[lo];
12500 	    }
12501 
12502 	    if (l == 0)
12503 	    {
12504 		/*
12505 		 * Found the matching char.  Copy it to "kword" and go a
12506 		 * level deeper.
12507 		 */
12508 		if (round[depth] == 1)
12509 		{
12510 		    STRNCPY(kword + kwordlen[depth], fword + fwordidx[depth],
12511 									flen);
12512 		    kwordlen[depth + 1] = kwordlen[depth] + flen;
12513 		}
12514 		else
12515 		{
12516 		    STRNCPY(kword + kwordlen[depth], uword + uwordidx[depth],
12517 									ulen);
12518 		    kwordlen[depth + 1] = kwordlen[depth] + ulen;
12519 		}
12520 		fwordidx[depth + 1] = fwordidx[depth] + flen;
12521 		uwordidx[depth + 1] = uwordidx[depth] + ulen;
12522 
12523 		++depth;
12524 		arridx[depth] = tryidx;
12525 		round[depth] = 0;
12526 	    }
12527 	}
12528     }
12529 
12530     /* Didn't find it: "cannot happen". */
12531     *kword = NUL;
12532 }
12533 
12534 /*
12535  * Compute the sound-a-like score for suggestions in su->su_ga and add them to
12536  * su->su_sga.
12537  */
12538     static void
12539 score_comp_sal(su)
12540     suginfo_T	*su;
12541 {
12542     langp_T	*lp;
12543     char_u	badsound[MAXWLEN];
12544     int		i;
12545     suggest_T   *stp;
12546     suggest_T   *sstp;
12547     int		score;
12548     int		lpi;
12549 
12550     if (ga_grow(&su->su_sga, su->su_ga.ga_len) == FAIL)
12551 	return;
12552 
12553     /*	Use the sound-folding of the first language that supports it. */
12554     for (lpi = 0; lpi < curbuf->b_langp.ga_len; ++lpi)
12555     {
12556 	lp = LANGP_ENTRY(curbuf->b_langp, lpi);
12557 	if (lp->lp_slang->sl_sal.ga_len > 0)
12558 	{
12559 	    /* soundfold the bad word */
12560 	    spell_soundfold(lp->lp_slang, su->su_fbadword, TRUE, badsound);
12561 
12562 	    for (i = 0; i < su->su_ga.ga_len; ++i)
12563 	    {
12564 		stp = &SUG(su->su_ga, i);
12565 
12566 		/* Case-fold the suggested word, sound-fold it and compute the
12567 		 * sound-a-like score. */
12568 		score = stp_sal_score(stp, su, lp->lp_slang, badsound);
12569 		if (score < SCORE_MAXMAX)
12570 		{
12571 		    /* Add the suggestion. */
12572 		    sstp = &SUG(su->su_sga, su->su_sga.ga_len);
12573 		    sstp->st_word = vim_strsave(stp->st_word);
12574 		    if (sstp->st_word != NULL)
12575 		    {
12576 			sstp->st_wordlen = stp->st_wordlen;
12577 			sstp->st_score = score;
12578 			sstp->st_altscore = 0;
12579 			sstp->st_orglen = stp->st_orglen;
12580 			++su->su_sga.ga_len;
12581 		    }
12582 		}
12583 	    }
12584 	    break;
12585 	}
12586     }
12587 }
12588 
12589 /*
12590  * Combine the list of suggestions in su->su_ga and su->su_sga.
12591  * They are intwined.
12592  */
12593     static void
12594 score_combine(su)
12595     suginfo_T	*su;
12596 {
12597     int		i;
12598     int		j;
12599     garray_T	ga;
12600     garray_T	*gap;
12601     langp_T	*lp;
12602     suggest_T	*stp;
12603     char_u	*p;
12604     char_u	badsound[MAXWLEN];
12605     int		round;
12606     int		lpi;
12607     slang_T	*slang = NULL;
12608 
12609     /* Add the alternate score to su_ga. */
12610     for (lpi = 0; lpi < curbuf->b_langp.ga_len; ++lpi)
12611     {
12612 	lp = LANGP_ENTRY(curbuf->b_langp, lpi);
12613 	if (lp->lp_slang->sl_sal.ga_len > 0)
12614 	{
12615 	    /* soundfold the bad word */
12616 	    slang = lp->lp_slang;
12617 	    spell_soundfold(slang, su->su_fbadword, TRUE, badsound);
12618 
12619 	    for (i = 0; i < su->su_ga.ga_len; ++i)
12620 	    {
12621 		stp = &SUG(su->su_ga, i);
12622 		stp->st_altscore = stp_sal_score(stp, su, slang, badsound);
12623 		if (stp->st_altscore == SCORE_MAXMAX)
12624 		    stp->st_score = (stp->st_score * 3 + SCORE_BIG) / 4;
12625 		else
12626 		    stp->st_score = (stp->st_score * 3
12627 						  + stp->st_altscore) / 4;
12628 		stp->st_salscore = FALSE;
12629 	    }
12630 	    break;
12631 	}
12632     }
12633 
12634     if (slang == NULL)	/* just in case */
12635 	return;
12636 
12637     /* Add the alternate score to su_sga. */
12638     for (i = 0; i < su->su_sga.ga_len; ++i)
12639     {
12640 	stp = &SUG(su->su_sga, i);
12641 	stp->st_altscore = spell_edit_score(slang,
12642 						su->su_badword, stp->st_word);
12643 	if (stp->st_score == SCORE_MAXMAX)
12644 	    stp->st_score = (SCORE_BIG * 7 + stp->st_altscore) / 8;
12645 	else
12646 	    stp->st_score = (stp->st_score * 7 + stp->st_altscore) / 8;
12647 	stp->st_salscore = TRUE;
12648     }
12649 
12650     /* Remove bad suggestions, sort the suggestions and truncate at "maxcount"
12651      * for both lists. */
12652     check_suggestions(su, &su->su_ga);
12653     (void)cleanup_suggestions(&su->su_ga, su->su_maxscore, su->su_maxcount);
12654     check_suggestions(su, &su->su_sga);
12655     (void)cleanup_suggestions(&su->su_sga, su->su_maxscore, su->su_maxcount);
12656 
12657     ga_init2(&ga, (int)sizeof(suginfo_T), 1);
12658     if (ga_grow(&ga, su->su_ga.ga_len + su->su_sga.ga_len) == FAIL)
12659 	return;
12660 
12661     stp = &SUG(ga, 0);
12662     for (i = 0; i < su->su_ga.ga_len || i < su->su_sga.ga_len; ++i)
12663     {
12664 	/* round 1: get a suggestion from su_ga
12665 	 * round 2: get a suggestion from su_sga */
12666 	for (round = 1; round <= 2; ++round)
12667 	{
12668 	    gap = round == 1 ? &su->su_ga : &su->su_sga;
12669 	    if (i < gap->ga_len)
12670 	    {
12671 		/* Don't add a word if it's already there. */
12672 		p = SUG(*gap, i).st_word;
12673 		for (j = 0; j < ga.ga_len; ++j)
12674 		    if (STRCMP(stp[j].st_word, p) == 0)
12675 			break;
12676 		if (j == ga.ga_len)
12677 		    stp[ga.ga_len++] = SUG(*gap, i);
12678 		else
12679 		    vim_free(p);
12680 	    }
12681 	}
12682     }
12683 
12684     ga_clear(&su->su_ga);
12685     ga_clear(&su->su_sga);
12686 
12687     /* Truncate the list to the number of suggestions that will be displayed. */
12688     if (ga.ga_len > su->su_maxcount)
12689     {
12690 	for (i = su->su_maxcount; i < ga.ga_len; ++i)
12691 	    vim_free(stp[i].st_word);
12692 	ga.ga_len = su->su_maxcount;
12693     }
12694 
12695     su->su_ga = ga;
12696 }
12697 
12698 /*
12699  * For the goodword in "stp" compute the soundalike score compared to the
12700  * badword.
12701  */
12702     static int
12703 stp_sal_score(stp, su, slang, badsound)
12704     suggest_T	*stp;
12705     suginfo_T	*su;
12706     slang_T	*slang;
12707     char_u	*badsound;	/* sound-folded badword */
12708 {
12709     char_u	*p;
12710     char_u	*pbad;
12711     char_u	*pgood;
12712     char_u	badsound2[MAXWLEN];
12713     char_u	fword[MAXWLEN];
12714     char_u	goodsound[MAXWLEN];
12715     char_u	goodword[MAXWLEN];
12716     int		lendiff;
12717 
12718     lendiff = (int)(su->su_badlen - stp->st_orglen);
12719     if (lendiff >= 0)
12720 	pbad = badsound;
12721     else
12722     {
12723 	/* soundfold the bad word with more characters following */
12724 	(void)spell_casefold(su->su_badptr, stp->st_orglen, fword, MAXWLEN);
12725 
12726 	/* When joining two words the sound often changes a lot.  E.g., "t he"
12727 	 * sounds like "t h" while "the" sounds like "@".  Avoid that by
12728 	 * removing the space.  Don't do it when the good word also contains a
12729 	 * space. */
12730 	if (vim_iswhite(su->su_badptr[su->su_badlen])
12731 					 && *skiptowhite(stp->st_word) == NUL)
12732 	    for (p = fword; *(p = skiptowhite(p)) != NUL; )
12733 		mch_memmove(p, p + 1, STRLEN(p));
12734 
12735 	spell_soundfold(slang, fword, TRUE, badsound2);
12736 	pbad = badsound2;
12737     }
12738 
12739     if (lendiff > 0)
12740     {
12741 	/* Add part of the bad word to the good word, so that we soundfold
12742 	 * what replaces the bad word. */
12743 	STRCPY(goodword, stp->st_word);
12744 	vim_strncpy(goodword + stp->st_wordlen,
12745 			    su->su_badptr + su->su_badlen - lendiff, lendiff);
12746 	pgood = goodword;
12747     }
12748     else
12749 	pgood = stp->st_word;
12750 
12751     /* Sound-fold the word and compute the score for the difference. */
12752     spell_soundfold(slang, pgood, FALSE, goodsound);
12753 
12754     return soundalike_score(goodsound, pbad);
12755 }
12756 
12757 /* structure used to store soundfolded words that add_sound_suggest() has
12758  * handled already. */
12759 typedef struct
12760 {
12761     short	sft_score;	/* lowest score used */
12762     char_u	sft_word[1];    /* soundfolded word, actually longer */
12763 } sftword_T;
12764 
12765 static sftword_T dumsft;
12766 #define HIKEY2SFT(p)  ((sftword_T *)(p - (dumsft.sft_word - (char_u *)&dumsft)))
12767 #define HI2SFT(hi)     HIKEY2SFT((hi)->hi_key)
12768 
12769 /*
12770  * Prepare for calling suggest_try_soundalike().
12771  */
12772     static void
12773 suggest_try_soundalike_prep()
12774 {
12775     langp_T	*lp;
12776     int		lpi;
12777     slang_T	*slang;
12778 
12779     /* Do this for all languages that support sound folding and for which a
12780      * .sug file has been loaded. */
12781     for (lpi = 0; lpi < curbuf->b_langp.ga_len; ++lpi)
12782     {
12783 	lp = LANGP_ENTRY(curbuf->b_langp, lpi);
12784 	slang = lp->lp_slang;
12785 	if (slang->sl_sal.ga_len > 0 && slang->sl_sbyts != NULL)
12786 	    /* prepare the hashtable used by add_sound_suggest() */
12787 	    hash_init(&slang->sl_sounddone);
12788     }
12789 }
12790 
12791 /*
12792  * Find suggestions by comparing the word in a sound-a-like form.
12793  * Note: This doesn't support postponed prefixes.
12794  */
12795     static void
12796 suggest_try_soundalike(su)
12797     suginfo_T	*su;
12798 {
12799     char_u	salword[MAXWLEN];
12800     langp_T	*lp;
12801     int		lpi;
12802     slang_T	*slang;
12803 
12804     /* Do this for all languages that support sound folding and for which a
12805      * .sug file has been loaded. */
12806     for (lpi = 0; lpi < curbuf->b_langp.ga_len; ++lpi)
12807     {
12808 	lp = LANGP_ENTRY(curbuf->b_langp, lpi);
12809 	slang = lp->lp_slang;
12810 	if (slang->sl_sal.ga_len > 0 && slang->sl_sbyts != NULL)
12811 	{
12812 	    /* soundfold the bad word */
12813 	    spell_soundfold(slang, su->su_fbadword, TRUE, salword);
12814 
12815 	    /* try all kinds of inserts/deletes/swaps/etc. */
12816 	    /* TODO: also soundfold the next words, so that we can try joining
12817 	     * and splitting */
12818 	    suggest_trie_walk(su, lp, salword, TRUE);
12819 	}
12820     }
12821 }
12822 
12823 /*
12824  * Finish up after calling suggest_try_soundalike().
12825  */
12826     static void
12827 suggest_try_soundalike_finish()
12828 {
12829     langp_T	*lp;
12830     int		lpi;
12831     slang_T	*slang;
12832     int		todo;
12833     hashitem_T	*hi;
12834 
12835     /* Do this for all languages that support sound folding and for which a
12836      * .sug file has been loaded. */
12837     for (lpi = 0; lpi < curbuf->b_langp.ga_len; ++lpi)
12838     {
12839 	lp = LANGP_ENTRY(curbuf->b_langp, lpi);
12840 	slang = lp->lp_slang;
12841 	if (slang->sl_sal.ga_len > 0 && slang->sl_sbyts != NULL)
12842 	{
12843 	    /* Free the info about handled words. */
12844 	    todo = slang->sl_sounddone.ht_used;
12845 	    for (hi = slang->sl_sounddone.ht_array; todo > 0; ++hi)
12846 		if (!HASHITEM_EMPTY(hi))
12847 		{
12848 		    vim_free(HI2SFT(hi));
12849 		    --todo;
12850 		}
12851 	    hash_clear(&slang->sl_sounddone);
12852 	}
12853     }
12854 }
12855 
12856 /*
12857  * A match with a soundfolded word is found.  Add the good word(s) that
12858  * produce this soundfolded word.
12859  */
12860     static void
12861 add_sound_suggest(su, goodword, score, lp)
12862     suginfo_T	*su;
12863     char_u	*goodword;
12864     int		score;		/* soundfold score  */
12865     langp_T	*lp;
12866 {
12867     slang_T	*slang = lp->lp_slang;	/* language for sound folding */
12868     int		sfwordnr;
12869     char_u	*nrline;
12870     int		orgnr;
12871     char_u	theword[MAXWLEN];
12872     int		i;
12873     int		wlen;
12874     char_u	*byts;
12875     idx_T	*idxs;
12876     int		n;
12877     int		wordcount;
12878     int		wc;
12879     int		goodscore;
12880     hash_T	hash;
12881     hashitem_T  *hi;
12882     sftword_T	*sft;
12883     int		bc, gc;
12884     int		limit;
12885 
12886     /*
12887      * It's very well possible that the same soundfold word is found several
12888      * times with different scores.  Since the following is quite slow only do
12889      * the words that have a better score than before.  Use a hashtable to
12890      * remember the words that have been done.
12891      */
12892     hash = hash_hash(goodword);
12893     hi = hash_lookup(&slang->sl_sounddone, goodword, hash);
12894     if (HASHITEM_EMPTY(hi))
12895     {
12896 	sft = (sftword_T *)alloc(sizeof(sftword_T) + STRLEN(goodword));
12897 	if (sft != NULL)
12898 	{
12899 	    sft->sft_score = score;
12900 	    STRCPY(sft->sft_word, goodword);
12901 	    hash_add_item(&slang->sl_sounddone, hi, sft->sft_word, hash);
12902 	}
12903     }
12904     else
12905     {
12906 	sft = HI2SFT(hi);
12907 	if (score >= sft->sft_score)
12908 	    return;
12909 	sft->sft_score = score;
12910     }
12911 
12912     /*
12913      * Find the word nr in the soundfold tree.
12914      */
12915     sfwordnr = soundfold_find(slang, goodword);
12916     if (sfwordnr < 0)
12917     {
12918 	EMSG2(_(e_intern2), "add_sound_suggest()");
12919 	return;
12920     }
12921 
12922     /*
12923      * go over the list of good words that produce this soundfold word
12924      */
12925     nrline = ml_get_buf(slang->sl_sugbuf, (linenr_T)(sfwordnr + 1), FALSE);
12926     orgnr = 0;
12927     while (*nrline != NUL)
12928     {
12929 	/* The wordnr was stored in a minimal nr of bytes as an offset to the
12930 	 * previous wordnr. */
12931 	orgnr += bytes2offset(&nrline);
12932 
12933 	byts = slang->sl_fbyts;
12934 	idxs = slang->sl_fidxs;
12935 
12936 	/* Lookup the word "orgnr" one of the two tries. */
12937 	n = 0;
12938 	wlen = 0;
12939 	wordcount = 0;
12940 	for (;;)
12941 	{
12942 	    i = 1;
12943 	    if (wordcount == orgnr && byts[n + 1] == NUL)
12944 		break;	/* found end of word */
12945 
12946 	    if (byts[n + 1] == NUL)
12947 		++wordcount;
12948 
12949 	    /* skip over the NUL bytes */
12950 	    for ( ; byts[n + i] == NUL; ++i)
12951 		if (i > byts[n])	/* safety check */
12952 		{
12953 		    STRCPY(theword + wlen, "BAD");
12954 		    goto badword;
12955 		}
12956 
12957 	    /* One of the siblings must have the word. */
12958 	    for ( ; i < byts[n]; ++i)
12959 	    {
12960 		wc = idxs[idxs[n + i]];	/* nr of words under this byte */
12961 		if (wordcount + wc > orgnr)
12962 		    break;
12963 		wordcount += wc;
12964 	    }
12965 
12966 	    theword[wlen++] = byts[n + i];
12967 	    n = idxs[n + i];
12968 	}
12969 badword:
12970 	theword[wlen] = NUL;
12971 
12972 	/* Go over the possible flags and regions. */
12973 	for (; i <= byts[n] && byts[n + i] == NUL; ++i)
12974 	{
12975 	    char_u	cword[MAXWLEN];
12976 	    char_u	*p;
12977 	    int		flags = (int)idxs[n + i];
12978 
12979 	    /* Skip words with the NOSUGGEST flag */
12980 	    if (flags & WF_NOSUGGEST)
12981 		continue;
12982 
12983 	    if (flags & WF_KEEPCAP)
12984 	    {
12985 		/* Must find the word in the keep-case tree. */
12986 		find_keepcap_word(slang, theword, cword);
12987 		p = cword;
12988 	    }
12989 	    else
12990 	    {
12991 		flags |= su->su_badflags;
12992 		if ((flags & WF_CAPMASK) != 0)
12993 		{
12994 		    /* Need to fix case according to "flags". */
12995 		    make_case_word(theword, cword, flags);
12996 		    p = cword;
12997 		}
12998 		else
12999 		    p = theword;
13000 	    }
13001 
13002 	    /* Add the suggestion. */
13003 	    if (sps_flags & SPS_DOUBLE)
13004 	    {
13005 		/* Add the suggestion if the score isn't too bad. */
13006 		if (score <= su->su_maxscore)
13007 		    add_suggestion(su, &su->su_sga, p, su->su_badlen,
13008 					       score, 0, FALSE, slang, FALSE);
13009 	    }
13010 	    else
13011 	    {
13012 		/* Add a penalty for words in another region. */
13013 		if ((flags & WF_REGION)
13014 			    && (((unsigned)flags >> 16) & lp->lp_region) == 0)
13015 		    goodscore = SCORE_REGION;
13016 		else
13017 		    goodscore = 0;
13018 
13019 		/* Add a small penalty for changing the first letter from
13020 		 * lower to upper case.  Helps for "tath" -> "Kath", which is
13021 		 * less common thatn "tath" -> "path".  Don't do it when the
13022 		 * letter is the same, that has already been counted. */
13023 		gc = PTR2CHAR(p);
13024 		if (SPELL_ISUPPER(gc))
13025 		{
13026 		    bc = PTR2CHAR(su->su_badword);
13027 		    if (!SPELL_ISUPPER(bc)
13028 				      && SPELL_TOFOLD(bc) != SPELL_TOFOLD(gc))
13029 			goodscore += SCORE_ICASE / 2;
13030 		}
13031 
13032 		/* Compute the score for the good word.  This only does letter
13033 		 * insert/delete/swap/replace.  REP items are not considered,
13034 		 * which may make the score a bit higher.
13035 		 * Use a limit for the score to make it work faster.  Use
13036 		 * MAXSCORE(), because RESCORE() will change the score.
13037 		 * If the limit is very high then the iterative method is
13038 		 * inefficient, using an array is quicker. */
13039 		limit = MAXSCORE(su->su_sfmaxscore - goodscore, score);
13040 		if (limit > SCORE_LIMITMAX)
13041 		    goodscore += spell_edit_score(slang, su->su_badword, p);
13042 		else
13043 		    goodscore += spell_edit_score_limit(slang, su->su_badword,
13044 								    p, limit);
13045 
13046 		/* When going over the limit don't bother to do the rest. */
13047 		if (goodscore < SCORE_MAXMAX)
13048 		{
13049 		    /* Give a bonus to words seen before. */
13050 		    goodscore = score_wordcount_adj(slang, goodscore, p, FALSE);
13051 
13052 		    /* Add the suggestion if the score isn't too bad. */
13053 		    goodscore = RESCORE(goodscore, score);
13054 		    if (goodscore <= su->su_sfmaxscore)
13055 			add_suggestion(su, &su->su_ga, p, su->su_badlen,
13056 					 goodscore, score, TRUE, slang, TRUE);
13057 		}
13058 	    }
13059 	}
13060 	/* smsg("word %s (%d): %s (%d)", sftword, sftnr, theword, orgnr); */
13061     }
13062 }
13063 
13064 /*
13065  * Find word "word" in fold-case tree for "slang" and return the word number.
13066  */
13067     static int
13068 soundfold_find(slang, word)
13069     slang_T	*slang;
13070     char_u	*word;
13071 {
13072     idx_T	arridx = 0;
13073     int		len;
13074     int		wlen = 0;
13075     int		c;
13076     char_u	*ptr = word;
13077     char_u	*byts;
13078     idx_T	*idxs;
13079     int		wordnr = 0;
13080 
13081     byts = slang->sl_sbyts;
13082     idxs = slang->sl_sidxs;
13083 
13084     for (;;)
13085     {
13086 	/* First byte is the number of possible bytes. */
13087 	len = byts[arridx++];
13088 
13089 	/* If the first possible byte is a zero the word could end here.
13090 	 * If the word ends we found the word.  If not skip the NUL bytes. */
13091 	c = ptr[wlen];
13092 	if (byts[arridx] == NUL)
13093 	{
13094 	    if (c == NUL)
13095 		break;
13096 
13097 	    /* Skip over the zeros, there can be several. */
13098 	    while (len > 0 && byts[arridx] == NUL)
13099 	    {
13100 		++arridx;
13101 		--len;
13102 	    }
13103 	    if (len == 0)
13104 		return -1;    /* no children, word should have ended here */
13105 	    ++wordnr;
13106 	}
13107 
13108 	/* If the word ends we didn't find it. */
13109 	if (c == NUL)
13110 	    return -1;
13111 
13112 	/* Perform a binary search in the list of accepted bytes. */
13113 	if (c == TAB)	    /* <Tab> is handled like <Space> */
13114 	    c = ' ';
13115 	while (byts[arridx] < c)
13116 	{
13117 	    /* The word count is in the first idxs[] entry of the child. */
13118 	    wordnr += idxs[idxs[arridx]];
13119 	    ++arridx;
13120 	    if (--len == 0)	/* end of the bytes, didn't find it */
13121 		return -1;
13122 	}
13123 	if (byts[arridx] != c)	/* didn't find the byte */
13124 	    return -1;
13125 
13126 	/* Continue at the child (if there is one). */
13127 	arridx = idxs[arridx];
13128 	++wlen;
13129 
13130 	/* One space in the good word may stand for several spaces in the
13131 	 * checked word. */
13132 	if (c == ' ')
13133 	    while (ptr[wlen] == ' ' || ptr[wlen] == TAB)
13134 		++wlen;
13135     }
13136 
13137     return wordnr;
13138 }
13139 
13140 /*
13141  * Copy "fword" to "cword", fixing case according to "flags".
13142  */
13143     static void
13144 make_case_word(fword, cword, flags)
13145     char_u	*fword;
13146     char_u	*cword;
13147     int		flags;
13148 {
13149     if (flags & WF_ALLCAP)
13150 	/* Make it all upper-case */
13151 	allcap_copy(fword, cword);
13152     else if (flags & WF_ONECAP)
13153 	/* Make the first letter upper-case */
13154 	onecap_copy(fword, cword, TRUE);
13155     else
13156 	/* Use goodword as-is. */
13157 	STRCPY(cword, fword);
13158 }
13159 
13160 /*
13161  * Use map string "map" for languages "lp".
13162  */
13163     static void
13164 set_map_str(lp, map)
13165     slang_T	*lp;
13166     char_u	*map;
13167 {
13168     char_u	*p;
13169     int		headc = 0;
13170     int		c;
13171     int		i;
13172 
13173     if (*map == NUL)
13174     {
13175 	lp->sl_has_map = FALSE;
13176 	return;
13177     }
13178     lp->sl_has_map = TRUE;
13179 
13180     /* Init the array and hash tables empty. */
13181     for (i = 0; i < 256; ++i)
13182 	lp->sl_map_array[i] = 0;
13183 #ifdef FEAT_MBYTE
13184     hash_init(&lp->sl_map_hash);
13185 #endif
13186 
13187     /*
13188      * The similar characters are stored separated with slashes:
13189      * "aaa/bbb/ccc/".  Fill sl_map_array[c] with the character before c and
13190      * before the same slash.  For characters above 255 sl_map_hash is used.
13191      */
13192     for (p = map; *p != NUL; )
13193     {
13194 #ifdef FEAT_MBYTE
13195 	c = mb_cptr2char_adv(&p);
13196 #else
13197 	c = *p++;
13198 #endif
13199 	if (c == '/')
13200 	    headc = 0;
13201 	else
13202 	{
13203 	    if (headc == 0)
13204 		 headc = c;
13205 
13206 #ifdef FEAT_MBYTE
13207 	    /* Characters above 255 don't fit in sl_map_array[], put them in
13208 	     * the hash table.  Each entry is the char, a NUL the headchar and
13209 	     * a NUL. */
13210 	    if (c >= 256)
13211 	    {
13212 		int	    cl = mb_char2len(c);
13213 		int	    headcl = mb_char2len(headc);
13214 		char_u	    *b;
13215 		hash_T	    hash;
13216 		hashitem_T  *hi;
13217 
13218 		b = alloc((unsigned)(cl + headcl + 2));
13219 		if (b == NULL)
13220 		    return;
13221 		mb_char2bytes(c, b);
13222 		b[cl] = NUL;
13223 		mb_char2bytes(headc, b + cl + 1);
13224 		b[cl + 1 + headcl] = NUL;
13225 		hash = hash_hash(b);
13226 		hi = hash_lookup(&lp->sl_map_hash, b, hash);
13227 		if (HASHITEM_EMPTY(hi))
13228 		    hash_add_item(&lp->sl_map_hash, hi, b, hash);
13229 		else
13230 		{
13231 		    /* This should have been checked when generating the .spl
13232 		     * file. */
13233 		    EMSG(_("E783: duplicate char in MAP entry"));
13234 		    vim_free(b);
13235 		}
13236 	    }
13237 	    else
13238 #endif
13239 		lp->sl_map_array[c] = headc;
13240 	}
13241     }
13242 }
13243 
13244 /*
13245  * Return TRUE if "c1" and "c2" are similar characters according to the MAP
13246  * lines in the .aff file.
13247  */
13248     static int
13249 similar_chars(slang, c1, c2)
13250     slang_T	*slang;
13251     int		c1;
13252     int		c2;
13253 {
13254     int		m1, m2;
13255 #ifdef FEAT_MBYTE
13256     char_u	buf[MB_MAXBYTES];
13257     hashitem_T  *hi;
13258 
13259     if (c1 >= 256)
13260     {
13261 	buf[mb_char2bytes(c1, buf)] = 0;
13262 	hi = hash_find(&slang->sl_map_hash, buf);
13263 	if (HASHITEM_EMPTY(hi))
13264 	    m1 = 0;
13265 	else
13266 	    m1 = mb_ptr2char(hi->hi_key + STRLEN(hi->hi_key) + 1);
13267     }
13268     else
13269 #endif
13270 	m1 = slang->sl_map_array[c1];
13271     if (m1 == 0)
13272 	return FALSE;
13273 
13274 
13275 #ifdef FEAT_MBYTE
13276     if (c2 >= 256)
13277     {
13278 	buf[mb_char2bytes(c2, buf)] = 0;
13279 	hi = hash_find(&slang->sl_map_hash, buf);
13280 	if (HASHITEM_EMPTY(hi))
13281 	    m2 = 0;
13282 	else
13283 	    m2 = mb_ptr2char(hi->hi_key + STRLEN(hi->hi_key) + 1);
13284     }
13285     else
13286 #endif
13287 	m2 = slang->sl_map_array[c2];
13288 
13289     return m1 == m2;
13290 }
13291 
13292 /*
13293  * Add a suggestion to the list of suggestions.
13294  * For a suggestion that is already in the list the lowest score is remembered.
13295  */
13296     static void
13297 add_suggestion(su, gap, goodword, badlenarg, score, altscore, had_bonus,
13298 								 slang, maxsf)
13299     suginfo_T	*su;
13300     garray_T	*gap;		/* either su_ga or su_sga */
13301     char_u	*goodword;
13302     int		badlenarg;	/* len of bad word replaced with "goodword" */
13303     int		score;
13304     int		altscore;
13305     int		had_bonus;	/* value for st_had_bonus */
13306     slang_T	*slang;		/* language for sound folding */
13307     int		maxsf;		/* su_maxscore applies to soundfold score,
13308 				   su_sfmaxscore to the total score. */
13309 {
13310     int		goodlen;	/* len of goodword changed */
13311     int		badlen;		/* len of bad word changed */
13312     suggest_T   *stp;
13313     suggest_T   new_sug;
13314     int		i;
13315     char_u	*pgood, *pbad;
13316 
13317     /* Minimize "badlen" for consistency.  Avoids that changing "the the" to
13318      * "thee the" is added next to changing the first "the" the "thee".  */
13319     pgood = goodword + STRLEN(goodword);
13320     pbad = su->su_badptr + badlenarg;
13321     for (;;)
13322     {
13323 	goodlen = pgood - goodword;
13324 	badlen = pbad - su->su_badptr;
13325 	if (goodlen <= 0 || badlen <= 0)
13326 	    break;
13327 	mb_ptr_back(goodword, pgood);
13328 	mb_ptr_back(su->su_badptr, pbad);
13329 #ifdef FEAT_MBYTE
13330 	if (has_mbyte)
13331 	{
13332 	    if (mb_ptr2char(pgood) != mb_ptr2char(pbad))
13333 		break;
13334 	}
13335 	else
13336 #endif
13337 	    if (*pgood != *pbad)
13338 		break;
13339     }
13340 
13341     if (badlen == 0 && goodlen == 0)
13342 	/* goodword doesn't change anything; may happen for "the the" changing
13343 	 * the first "the" to itself. */
13344 	return;
13345 
13346     /* Check if the word is already there.  Also check the length that is
13347      * being replaced "thes," -> "these" is a different suggestion from
13348      * "thes" -> "these". */
13349     stp = &SUG(*gap, 0);
13350     for (i = gap->ga_len; --i >= 0; ++stp)
13351 	if (stp->st_wordlen == goodlen
13352 		&& stp->st_orglen == badlen
13353 		&& STRNCMP(stp->st_word, goodword, goodlen) == 0)
13354 	{
13355 	    /*
13356 	     * Found it.  Remember the word with the lowest score.
13357 	     */
13358 	    if (stp->st_slang == NULL)
13359 		stp->st_slang = slang;
13360 
13361 	    new_sug.st_score = score;
13362 	    new_sug.st_altscore = altscore;
13363 	    new_sug.st_had_bonus = had_bonus;
13364 
13365 	    if (stp->st_had_bonus != had_bonus)
13366 	    {
13367 		/* Only one of the two had the soundalike score computed.
13368 		 * Need to do that for the other one now, otherwise the
13369 		 * scores can't be compared.  This happens because
13370 		 * suggest_try_change() doesn't compute the soundalike
13371 		 * word to keep it fast, while some special methods set
13372 		 * the soundalike score to zero. */
13373 		if (had_bonus)
13374 		    rescore_one(su, stp);
13375 		else
13376 		{
13377 		    new_sug.st_word = stp->st_word;
13378 		    new_sug.st_wordlen = stp->st_wordlen;
13379 		    new_sug.st_slang = stp->st_slang;
13380 		    new_sug.st_orglen = badlen;
13381 		    rescore_one(su, &new_sug);
13382 		}
13383 	    }
13384 
13385 	    if (stp->st_score > new_sug.st_score)
13386 	    {
13387 		stp->st_score = new_sug.st_score;
13388 		stp->st_altscore = new_sug.st_altscore;
13389 		stp->st_had_bonus = new_sug.st_had_bonus;
13390 	    }
13391 	    break;
13392 	}
13393 
13394     if (i < 0 && ga_grow(gap, 1) == OK)
13395     {
13396 	/* Add a suggestion. */
13397 	stp = &SUG(*gap, gap->ga_len);
13398 	stp->st_word = vim_strnsave(goodword, goodlen);
13399 	if (stp->st_word != NULL)
13400 	{
13401 	    stp->st_wordlen = goodlen;
13402 	    stp->st_score = score;
13403 	    stp->st_altscore = altscore;
13404 	    stp->st_had_bonus = had_bonus;
13405 	    stp->st_orglen = badlen;
13406 	    stp->st_slang = slang;
13407 	    ++gap->ga_len;
13408 
13409 	    /* If we have too many suggestions now, sort the list and keep
13410 	     * the best suggestions. */
13411 	    if (gap->ga_len > SUG_MAX_COUNT(su))
13412 	    {
13413 		if (maxsf)
13414 		    su->su_sfmaxscore = cleanup_suggestions(gap,
13415 				      su->su_sfmaxscore, SUG_CLEAN_COUNT(su));
13416 		else
13417 		{
13418 		    i = su->su_maxscore;
13419 		    su->su_maxscore = cleanup_suggestions(gap,
13420 					su->su_maxscore, SUG_CLEAN_COUNT(su));
13421 		}
13422 	    }
13423 	}
13424     }
13425 }
13426 
13427 /*
13428  * Suggestions may in fact be flagged as errors.  Esp. for banned words and
13429  * for split words, such as "the the".  Remove these from the list here.
13430  */
13431     static void
13432 check_suggestions(su, gap)
13433     suginfo_T	*su;
13434     garray_T	*gap;		    /* either su_ga or su_sga */
13435 {
13436     suggest_T   *stp;
13437     int		i;
13438     char_u	longword[MAXWLEN + 1];
13439     int		len;
13440     hlf_T	attr;
13441 
13442     stp = &SUG(*gap, 0);
13443     for (i = gap->ga_len - 1; i >= 0; --i)
13444     {
13445 	/* Need to append what follows to check for "the the". */
13446 	STRCPY(longword, stp[i].st_word);
13447 	len = stp[i].st_wordlen;
13448 	vim_strncpy(longword + len, su->su_badptr + stp[i].st_orglen,
13449 							       MAXWLEN - len);
13450 	attr = HLF_COUNT;
13451 	(void)spell_check(curwin, longword, &attr, NULL, FALSE);
13452 	if (attr != HLF_COUNT)
13453 	{
13454 	    /* Remove this entry. */
13455 	    vim_free(stp[i].st_word);
13456 	    --gap->ga_len;
13457 	    if (i < gap->ga_len)
13458 		mch_memmove(stp + i, stp + i + 1,
13459 				       sizeof(suggest_T) * (gap->ga_len - i));
13460 	}
13461     }
13462 }
13463 
13464 
13465 /*
13466  * Add a word to be banned.
13467  */
13468     static void
13469 add_banned(su, word)
13470     suginfo_T	*su;
13471     char_u	*word;
13472 {
13473     char_u	*s;
13474     hash_T	hash;
13475     hashitem_T	*hi;
13476 
13477     hash = hash_hash(word);
13478     hi = hash_lookup(&su->su_banned, word, hash);
13479     if (HASHITEM_EMPTY(hi))
13480     {
13481 	s = vim_strsave(word);
13482 	if (s != NULL)
13483 	    hash_add_item(&su->su_banned, hi, s, hash);
13484     }
13485 }
13486 
13487 /*
13488  * Recompute the score for all suggestions if sound-folding is possible.  This
13489  * is slow, thus only done for the final results.
13490  */
13491     static void
13492 rescore_suggestions(su)
13493     suginfo_T	*su;
13494 {
13495     int		i;
13496 
13497     if (su->su_sallang != NULL)
13498 	for (i = 0; i < su->su_ga.ga_len; ++i)
13499 	    rescore_one(su, &SUG(su->su_ga, i));
13500 }
13501 
13502 /*
13503  * Recompute the score for one suggestion if sound-folding is possible.
13504  */
13505     static void
13506 rescore_one(su, stp)
13507     suginfo_T	*su;
13508     suggest_T	*stp;
13509 {
13510     slang_T	*slang = stp->st_slang;
13511     char_u	sal_badword[MAXWLEN];
13512     char_u	*p;
13513 
13514     /* Only rescore suggestions that have no sal score yet and do have a
13515      * language. */
13516     if (slang != NULL && slang->sl_sal.ga_len > 0 && !stp->st_had_bonus)
13517     {
13518 	if (slang == su->su_sallang)
13519 	    p = su->su_sal_badword;
13520 	else
13521 	{
13522 	    spell_soundfold(slang, su->su_fbadword, TRUE, sal_badword);
13523 	    p = sal_badword;
13524 	}
13525 
13526 	stp->st_altscore = stp_sal_score(stp, su, slang, p);
13527 	if (stp->st_altscore == SCORE_MAXMAX)
13528 	    stp->st_altscore = SCORE_BIG;
13529 	stp->st_score = RESCORE(stp->st_score, stp->st_altscore);
13530 	stp->st_had_bonus = TRUE;
13531     }
13532 }
13533 
13534 static int
13535 #ifdef __BORLANDC__
13536 _RTLENTRYF
13537 #endif
13538 sug_compare __ARGS((const void *s1, const void *s2));
13539 
13540 /*
13541  * Function given to qsort() to sort the suggestions on st_score.
13542  * First on "st_score", then "st_altscore" then alphabetically.
13543  */
13544     static int
13545 #ifdef __BORLANDC__
13546 _RTLENTRYF
13547 #endif
13548 sug_compare(s1, s2)
13549     const void	*s1;
13550     const void	*s2;
13551 {
13552     suggest_T	*p1 = (suggest_T *)s1;
13553     suggest_T	*p2 = (suggest_T *)s2;
13554     int		n = p1->st_score - p2->st_score;
13555 
13556     if (n == 0)
13557     {
13558 	n = p1->st_altscore - p2->st_altscore;
13559 	if (n == 0)
13560 	    n = STRICMP(p1->st_word, p2->st_word);
13561     }
13562     return n;
13563 }
13564 
13565 /*
13566  * Cleanup the suggestions:
13567  * - Sort on score.
13568  * - Remove words that won't be displayed.
13569  * Returns the maximum score in the list or "maxscore" unmodified.
13570  */
13571     static int
13572 cleanup_suggestions(gap, maxscore, keep)
13573     garray_T	*gap;
13574     int		maxscore;
13575     int		keep;		/* nr of suggestions to keep */
13576 {
13577     suggest_T   *stp = &SUG(*gap, 0);
13578     int		i;
13579 
13580     /* Sort the list. */
13581     qsort(gap->ga_data, (size_t)gap->ga_len, sizeof(suggest_T), sug_compare);
13582 
13583     /* Truncate the list to the number of suggestions that will be displayed. */
13584     if (gap->ga_len > keep)
13585     {
13586 	for (i = keep; i < gap->ga_len; ++i)
13587 	    vim_free(stp[i].st_word);
13588 	gap->ga_len = keep;
13589 	return stp[keep - 1].st_score;
13590     }
13591     return maxscore;
13592 }
13593 
13594 #if defined(FEAT_EVAL) || defined(PROTO)
13595 /*
13596  * Soundfold a string, for soundfold().
13597  * Result is in allocated memory, NULL for an error.
13598  */
13599     char_u *
13600 eval_soundfold(word)
13601     char_u	*word;
13602 {
13603     langp_T	*lp;
13604     char_u	sound[MAXWLEN];
13605     int		lpi;
13606 
13607     if (curwin->w_p_spell && *curbuf->b_p_spl != NUL)
13608 	/* Use the sound-folding of the first language that supports it. */
13609 	for (lpi = 0; lpi < curbuf->b_langp.ga_len; ++lpi)
13610 	{
13611 	    lp = LANGP_ENTRY(curbuf->b_langp, lpi);
13612 	    if (lp->lp_slang->sl_sal.ga_len > 0)
13613 	    {
13614 		/* soundfold the word */
13615 		spell_soundfold(lp->lp_slang, word, FALSE, sound);
13616 		return vim_strsave(sound);
13617 	    }
13618 	}
13619 
13620     /* No language with sound folding, return word as-is. */
13621     return vim_strsave(word);
13622 }
13623 #endif
13624 
13625 /*
13626  * Turn "inword" into its sound-a-like equivalent in "res[MAXWLEN]".
13627  *
13628  * There are many ways to turn a word into a sound-a-like representation.  The
13629  * oldest is Soundex (1918!).   A nice overview can be found in "Approximate
13630  * swedish name matching - survey and test of different algorithms" by Klas
13631  * Erikson.
13632  *
13633  * We support two methods:
13634  * 1. SOFOFROM/SOFOTO do a simple character mapping.
13635  * 2. SAL items define a more advanced sound-folding (and much slower).
13636  */
13637     static void
13638 spell_soundfold(slang, inword, folded, res)
13639     slang_T	*slang;
13640     char_u	*inword;
13641     int		folded;	    /* "inword" is already case-folded */
13642     char_u	*res;
13643 {
13644     char_u	fword[MAXWLEN];
13645     char_u	*word;
13646 
13647     if (slang->sl_sofo)
13648 	/* SOFOFROM and SOFOTO used */
13649 	spell_soundfold_sofo(slang, inword, res);
13650     else
13651     {
13652 	/* SAL items used.  Requires the word to be case-folded. */
13653 	if (folded)
13654 	    word = inword;
13655 	else
13656 	{
13657 	    (void)spell_casefold(inword, STRLEN(inword), fword, MAXWLEN);
13658 	    word = fword;
13659 	}
13660 
13661 #ifdef FEAT_MBYTE
13662 	if (has_mbyte)
13663 	    spell_soundfold_wsal(slang, word, res);
13664 	else
13665 #endif
13666 	    spell_soundfold_sal(slang, word, res);
13667     }
13668 }
13669 
13670 /*
13671  * Perform sound folding of "inword" into "res" according to SOFOFROM and
13672  * SOFOTO lines.
13673  */
13674     static void
13675 spell_soundfold_sofo(slang, inword, res)
13676     slang_T	*slang;
13677     char_u	*inword;
13678     char_u	*res;
13679 {
13680     char_u	*s;
13681     int		ri = 0;
13682     int		c;
13683 
13684 #ifdef FEAT_MBYTE
13685     if (has_mbyte)
13686     {
13687 	int	prevc = 0;
13688 	int	*ip;
13689 
13690 	/* The sl_sal_first[] table contains the translation for chars up to
13691 	 * 255, sl_sal the rest. */
13692 	for (s = inword; *s != NUL; )
13693 	{
13694 	    c = mb_cptr2char_adv(&s);
13695 	    if (enc_utf8 ? utf_class(c) == 0 : vim_iswhite(c))
13696 		c = ' ';
13697 	    else if (c < 256)
13698 		c = slang->sl_sal_first[c];
13699 	    else
13700 	    {
13701 		ip = ((int **)slang->sl_sal.ga_data)[c & 0xff];
13702 		if (ip == NULL)		/* empty list, can't match */
13703 		    c = NUL;
13704 		else
13705 		    for (;;)		/* find "c" in the list */
13706 		    {
13707 			if (*ip == 0)	/* not found */
13708 			{
13709 			    c = NUL;
13710 			    break;
13711 			}
13712 			if (*ip == c)	/* match! */
13713 			{
13714 			    c = ip[1];
13715 			    break;
13716 			}
13717 			ip += 2;
13718 		    }
13719 	    }
13720 
13721 	    if (c != NUL && c != prevc)
13722 	    {
13723 		ri += mb_char2bytes(c, res + ri);
13724 		if (ri + MB_MAXBYTES > MAXWLEN)
13725 		    break;
13726 		prevc = c;
13727 	    }
13728 	}
13729     }
13730     else
13731 #endif
13732     {
13733 	/* The sl_sal_first[] table contains the translation. */
13734 	for (s = inword; (c = *s) != NUL; ++s)
13735 	{
13736 	    if (vim_iswhite(c))
13737 		c = ' ';
13738 	    else
13739 		c = slang->sl_sal_first[c];
13740 	    if (c != NUL && (ri == 0 || res[ri - 1] != c))
13741 		res[ri++] = c;
13742 	}
13743     }
13744 
13745     res[ri] = NUL;
13746 }
13747 
13748     static void
13749 spell_soundfold_sal(slang, inword, res)
13750     slang_T	*slang;
13751     char_u	*inword;
13752     char_u	*res;
13753 {
13754     salitem_T	*smp;
13755     char_u	word[MAXWLEN];
13756     char_u	*s = inword;
13757     char_u	*t;
13758     char_u	*pf;
13759     int		i, j, z;
13760     int		reslen;
13761     int		n, k = 0;
13762     int		z0;
13763     int		k0;
13764     int		n0;
13765     int		c;
13766     int		pri;
13767     int		p0 = -333;
13768     int		c0;
13769 
13770     /* Remove accents, if wanted.  We actually remove all non-word characters.
13771      * But keep white space.  We need a copy, the word may be changed here. */
13772     if (slang->sl_rem_accents)
13773     {
13774 	t = word;
13775 	while (*s != NUL)
13776 	{
13777 	    if (vim_iswhite(*s))
13778 	    {
13779 		*t++ = ' ';
13780 		s = skipwhite(s);
13781 	    }
13782 	    else
13783 	    {
13784 		if (spell_iswordp_nmw(s))
13785 		    *t++ = *s;
13786 		++s;
13787 	    }
13788 	}
13789 	*t = NUL;
13790     }
13791     else
13792 	STRCPY(word, s);
13793 
13794     smp = (salitem_T *)slang->sl_sal.ga_data;
13795 
13796     /*
13797      * This comes from Aspell phonet.cpp.  Converted from C++ to C.
13798      * Changed to keep spaces.
13799      */
13800     i = reslen = z = 0;
13801     while ((c = word[i]) != NUL)
13802     {
13803 	/* Start with the first rule that has the character in the word. */
13804 	n = slang->sl_sal_first[c];
13805 	z0 = 0;
13806 
13807 	if (n >= 0)
13808 	{
13809 	    /* check all rules for the same letter */
13810 	    for (; (s = smp[n].sm_lead)[0] == c; ++n)
13811 	    {
13812 		/* Quickly skip entries that don't match the word.  Most
13813 		 * entries are less then three chars, optimize for that. */
13814 		k = smp[n].sm_leadlen;
13815 		if (k > 1)
13816 		{
13817 		    if (word[i + 1] != s[1])
13818 			continue;
13819 		    if (k > 2)
13820 		    {
13821 			for (j = 2; j < k; ++j)
13822 			    if (word[i + j] != s[j])
13823 				break;
13824 			if (j < k)
13825 			    continue;
13826 		    }
13827 		}
13828 
13829 		if ((pf = smp[n].sm_oneof) != NULL)
13830 		{
13831 		    /* Check for match with one of the chars in "sm_oneof". */
13832 		    while (*pf != NUL && *pf != word[i + k])
13833 			++pf;
13834 		    if (*pf == NUL)
13835 			continue;
13836 		    ++k;
13837 		}
13838 		s = smp[n].sm_rules;
13839 		pri = 5;    /* default priority */
13840 
13841 		p0 = *s;
13842 		k0 = k;
13843 		while (*s == '-' && k > 1)
13844 		{
13845 		    k--;
13846 		    s++;
13847 		}
13848 		if (*s == '<')
13849 		    s++;
13850 		if (VIM_ISDIGIT(*s))
13851 		{
13852 		    /* determine priority */
13853 		    pri = *s - '0';
13854 		    s++;
13855 		}
13856 		if (*s == '^' && *(s + 1) == '^')
13857 		    s++;
13858 
13859 		if (*s == NUL
13860 			|| (*s == '^'
13861 			    && (i == 0 || !(word[i - 1] == ' '
13862 				      || spell_iswordp(word + i - 1, curbuf)))
13863 			    && (*(s + 1) != '$'
13864 				|| (!spell_iswordp(word + i + k0, curbuf))))
13865 			|| (*s == '$' && i > 0
13866 			    && spell_iswordp(word + i - 1, curbuf)
13867 			    && (!spell_iswordp(word + i + k0, curbuf))))
13868 		{
13869 		    /* search for followup rules, if:    */
13870 		    /* followup and k > 1  and  NO '-' in searchstring */
13871 		    c0 = word[i + k - 1];
13872 		    n0 = slang->sl_sal_first[c0];
13873 
13874 		    if (slang->sl_followup && k > 1 && n0 >= 0
13875 					   && p0 != '-' && word[i + k] != NUL)
13876 		    {
13877 			/* test follow-up rule for "word[i + k]" */
13878 			for ( ; (s = smp[n0].sm_lead)[0] == c0; ++n0)
13879 			{
13880 			    /* Quickly skip entries that don't match the word.
13881 			     * */
13882 			    k0 = smp[n0].sm_leadlen;
13883 			    if (k0 > 1)
13884 			    {
13885 				if (word[i + k] != s[1])
13886 				    continue;
13887 				if (k0 > 2)
13888 				{
13889 				    pf = word + i + k + 1;
13890 				    for (j = 2; j < k0; ++j)
13891 					if (*pf++ != s[j])
13892 					    break;
13893 				    if (j < k0)
13894 					continue;
13895 				}
13896 			    }
13897 			    k0 += k - 1;
13898 
13899 			    if ((pf = smp[n0].sm_oneof) != NULL)
13900 			    {
13901 				/* Check for match with one of the chars in
13902 				 * "sm_oneof". */
13903 				while (*pf != NUL && *pf != word[i + k0])
13904 				    ++pf;
13905 				if (*pf == NUL)
13906 				    continue;
13907 				++k0;
13908 			    }
13909 
13910 			    p0 = 5;
13911 			    s = smp[n0].sm_rules;
13912 			    while (*s == '-')
13913 			    {
13914 				/* "k0" gets NOT reduced because
13915 				 * "if (k0 == k)" */
13916 				s++;
13917 			    }
13918 			    if (*s == '<')
13919 				s++;
13920 			    if (VIM_ISDIGIT(*s))
13921 			    {
13922 				p0 = *s - '0';
13923 				s++;
13924 			    }
13925 
13926 			    if (*s == NUL
13927 				    /* *s == '^' cuts */
13928 				    || (*s == '$'
13929 					    && !spell_iswordp(word + i + k0,
13930 								     curbuf)))
13931 			    {
13932 				if (k0 == k)
13933 				    /* this is just a piece of the string */
13934 				    continue;
13935 
13936 				if (p0 < pri)
13937 				    /* priority too low */
13938 				    continue;
13939 				/* rule fits; stop search */
13940 				break;
13941 			    }
13942 			}
13943 
13944 			if (p0 >= pri && smp[n0].sm_lead[0] == c0)
13945 			    continue;
13946 		    }
13947 
13948 		    /* replace string */
13949 		    s = smp[n].sm_to;
13950 		    if (s == NULL)
13951 			s = (char_u *)"";
13952 		    pf = smp[n].sm_rules;
13953 		    p0 = (vim_strchr(pf, '<') != NULL) ? 1 : 0;
13954 		    if (p0 == 1 && z == 0)
13955 		    {
13956 			/* rule with '<' is used */
13957 			if (reslen > 0 && *s != NUL && (res[reslen - 1] == c
13958 						    || res[reslen - 1] == *s))
13959 			    reslen--;
13960 			z0 = 1;
13961 			z = 1;
13962 			k0 = 0;
13963 			while (*s != NUL && word[i + k0] != NUL)
13964 			{
13965 			    word[i + k0] = *s;
13966 			    k0++;
13967 			    s++;
13968 			}
13969 			if (k > k0)
13970 			    mch_memmove(word + i + k0, word + i + k,
13971 						    STRLEN(word + i + k) + 1);
13972 
13973 			/* new "actual letter" */
13974 			c = word[i];
13975 		    }
13976 		    else
13977 		    {
13978 			/* no '<' rule used */
13979 			i += k - 1;
13980 			z = 0;
13981 			while (*s != NUL && s[1] != NUL && reslen < MAXWLEN)
13982 			{
13983 			    if (reslen == 0 || res[reslen - 1] != *s)
13984 				res[reslen++] = *s;
13985 			    s++;
13986 			}
13987 			/* new "actual letter" */
13988 			c = *s;
13989 			if (strstr((char *)pf, "^^") != NULL)
13990 			{
13991 			    if (c != NUL)
13992 				res[reslen++] = c;
13993 			    mch_memmove(word, word + i + 1,
13994 						    STRLEN(word + i + 1) + 1);
13995 			    i = 0;
13996 			    z0 = 1;
13997 			}
13998 		    }
13999 		    break;
14000 		}
14001 	    }
14002 	}
14003 	else if (vim_iswhite(c))
14004 	{
14005 	    c = ' ';
14006 	    k = 1;
14007 	}
14008 
14009 	if (z0 == 0)
14010 	{
14011 	    if (k && !p0 && reslen < MAXWLEN && c != NUL
14012 		    && (!slang->sl_collapse || reslen == 0
14013 						     || res[reslen - 1] != c))
14014 		/* condense only double letters */
14015 		res[reslen++] = c;
14016 
14017 	    i++;
14018 	    z = 0;
14019 	    k = 0;
14020 	}
14021     }
14022 
14023     res[reslen] = NUL;
14024 }
14025 
14026 #ifdef FEAT_MBYTE
14027 /*
14028  * Turn "inword" into its sound-a-like equivalent in "res[MAXWLEN]".
14029  * Multi-byte version of spell_soundfold().
14030  */
14031     static void
14032 spell_soundfold_wsal(slang, inword, res)
14033     slang_T	*slang;
14034     char_u	*inword;
14035     char_u	*res;
14036 {
14037     salitem_T	*smp = (salitem_T *)slang->sl_sal.ga_data;
14038     int		word[MAXWLEN];
14039     int		wres[MAXWLEN];
14040     int		l;
14041     char_u	*s;
14042     int		*ws;
14043     char_u	*t;
14044     int		*pf;
14045     int		i, j, z;
14046     int		reslen;
14047     int		n, k = 0;
14048     int		z0;
14049     int		k0;
14050     int		n0;
14051     int		c;
14052     int		pri;
14053     int		p0 = -333;
14054     int		c0;
14055     int		did_white = FALSE;
14056 
14057     /*
14058      * Convert the multi-byte string to a wide-character string.
14059      * Remove accents, if wanted.  We actually remove all non-word characters.
14060      * But keep white space.
14061      */
14062     n = 0;
14063     for (s = inword; *s != NUL; )
14064     {
14065 	t = s;
14066 	c = mb_cptr2char_adv(&s);
14067 	if (slang->sl_rem_accents)
14068 	{
14069 	    if (enc_utf8 ? utf_class(c) == 0 : vim_iswhite(c))
14070 	    {
14071 		if (did_white)
14072 		    continue;
14073 		c = ' ';
14074 		did_white = TRUE;
14075 	    }
14076 	    else
14077 	    {
14078 		did_white = FALSE;
14079 		if (!spell_iswordp_nmw(t))
14080 		    continue;
14081 	    }
14082 	}
14083 	word[n++] = c;
14084     }
14085     word[n] = NUL;
14086 
14087     /*
14088      * This comes from Aspell phonet.cpp.
14089      * Converted from C++ to C.  Added support for multi-byte chars.
14090      * Changed to keep spaces.
14091      */
14092     i = reslen = z = 0;
14093     while ((c = word[i]) != NUL)
14094     {
14095 	/* Start with the first rule that has the character in the word. */
14096 	n = slang->sl_sal_first[c & 0xff];
14097 	z0 = 0;
14098 
14099 	if (n >= 0)
14100 	{
14101 	    /* check all rules for the same index byte */
14102 	    for (; ((ws = smp[n].sm_lead_w)[0] & 0xff) == (c & 0xff); ++n)
14103 	    {
14104 		/* Quickly skip entries that don't match the word.  Most
14105 		 * entries are less then three chars, optimize for that. */
14106 		if (c != ws[0])
14107 		    continue;
14108 		k = smp[n].sm_leadlen;
14109 		if (k > 1)
14110 		{
14111 		    if (word[i + 1] != ws[1])
14112 			continue;
14113 		    if (k > 2)
14114 		    {
14115 			for (j = 2; j < k; ++j)
14116 			    if (word[i + j] != ws[j])
14117 				break;
14118 			if (j < k)
14119 			    continue;
14120 		    }
14121 		}
14122 
14123 		if ((pf = smp[n].sm_oneof_w) != NULL)
14124 		{
14125 		    /* Check for match with one of the chars in "sm_oneof". */
14126 		    while (*pf != NUL && *pf != word[i + k])
14127 			++pf;
14128 		    if (*pf == NUL)
14129 			continue;
14130 		    ++k;
14131 		}
14132 		s = smp[n].sm_rules;
14133 		pri = 5;    /* default priority */
14134 
14135 		p0 = *s;
14136 		k0 = k;
14137 		while (*s == '-' && k > 1)
14138 		{
14139 		    k--;
14140 		    s++;
14141 		}
14142 		if (*s == '<')
14143 		    s++;
14144 		if (VIM_ISDIGIT(*s))
14145 		{
14146 		    /* determine priority */
14147 		    pri = *s - '0';
14148 		    s++;
14149 		}
14150 		if (*s == '^' && *(s + 1) == '^')
14151 		    s++;
14152 
14153 		if (*s == NUL
14154 			|| (*s == '^'
14155 			    && (i == 0 || !(word[i - 1] == ' '
14156 				    || spell_iswordp_w(word + i - 1, curbuf)))
14157 			    && (*(s + 1) != '$'
14158 				|| (!spell_iswordp_w(word + i + k0, curbuf))))
14159 			|| (*s == '$' && i > 0
14160 			    && spell_iswordp_w(word + i - 1, curbuf)
14161 			    && (!spell_iswordp_w(word + i + k0, curbuf))))
14162 		{
14163 		    /* search for followup rules, if:    */
14164 		    /* followup and k > 1  and  NO '-' in searchstring */
14165 		    c0 = word[i + k - 1];
14166 		    n0 = slang->sl_sal_first[c0 & 0xff];
14167 
14168 		    if (slang->sl_followup && k > 1 && n0 >= 0
14169 					   && p0 != '-' && word[i + k] != NUL)
14170 		    {
14171 			/* Test follow-up rule for "word[i + k]"; loop over
14172 			 * all entries with the same index byte. */
14173 			for ( ; ((ws = smp[n0].sm_lead_w)[0] & 0xff)
14174 							 == (c0 & 0xff); ++n0)
14175 			{
14176 			    /* Quickly skip entries that don't match the word.
14177 			     */
14178 			    if (c0 != ws[0])
14179 				continue;
14180 			    k0 = smp[n0].sm_leadlen;
14181 			    if (k0 > 1)
14182 			    {
14183 				if (word[i + k] != ws[1])
14184 				    continue;
14185 				if (k0 > 2)
14186 				{
14187 				    pf = word + i + k + 1;
14188 				    for (j = 2; j < k0; ++j)
14189 					if (*pf++ != ws[j])
14190 					    break;
14191 				    if (j < k0)
14192 					continue;
14193 				}
14194 			    }
14195 			    k0 += k - 1;
14196 
14197 			    if ((pf = smp[n0].sm_oneof_w) != NULL)
14198 			    {
14199 				/* Check for match with one of the chars in
14200 				 * "sm_oneof". */
14201 				while (*pf != NUL && *pf != word[i + k0])
14202 				    ++pf;
14203 				if (*pf == NUL)
14204 				    continue;
14205 				++k0;
14206 			    }
14207 
14208 			    p0 = 5;
14209 			    s = smp[n0].sm_rules;
14210 			    while (*s == '-')
14211 			    {
14212 				/* "k0" gets NOT reduced because
14213 				 * "if (k0 == k)" */
14214 				s++;
14215 			    }
14216 			    if (*s == '<')
14217 				s++;
14218 			    if (VIM_ISDIGIT(*s))
14219 			    {
14220 				p0 = *s - '0';
14221 				s++;
14222 			    }
14223 
14224 			    if (*s == NUL
14225 				    /* *s == '^' cuts */
14226 				    || (*s == '$'
14227 					 && !spell_iswordp_w(word + i + k0,
14228 								     curbuf)))
14229 			    {
14230 				if (k0 == k)
14231 				    /* this is just a piece of the string */
14232 				    continue;
14233 
14234 				if (p0 < pri)
14235 				    /* priority too low */
14236 				    continue;
14237 				/* rule fits; stop search */
14238 				break;
14239 			    }
14240 			}
14241 
14242 			if (p0 >= pri && (smp[n0].sm_lead_w[0] & 0xff)
14243 							       == (c0 & 0xff))
14244 			    continue;
14245 		    }
14246 
14247 		    /* replace string */
14248 		    ws = smp[n].sm_to_w;
14249 		    s = smp[n].sm_rules;
14250 		    p0 = (vim_strchr(s, '<') != NULL) ? 1 : 0;
14251 		    if (p0 == 1 && z == 0)
14252 		    {
14253 			/* rule with '<' is used */
14254 			if (reslen > 0 && ws != NULL && *ws != NUL
14255 				&& (wres[reslen - 1] == c
14256 						    || wres[reslen - 1] == *ws))
14257 			    reslen--;
14258 			z0 = 1;
14259 			z = 1;
14260 			k0 = 0;
14261 			if (ws != NULL)
14262 			    while (*ws != NUL && word[i + k0] != NUL)
14263 			    {
14264 				word[i + k0] = *ws;
14265 				k0++;
14266 				ws++;
14267 			    }
14268 			if (k > k0)
14269 			    mch_memmove(word + i + k0, word + i + k,
14270 				    sizeof(int) * (STRLEN(word + i + k) + 1));
14271 
14272 			/* new "actual letter" */
14273 			c = word[i];
14274 		    }
14275 		    else
14276 		    {
14277 			/* no '<' rule used */
14278 			i += k - 1;
14279 			z = 0;
14280 			if (ws != NULL)
14281 			    while (*ws != NUL && ws[1] != NUL
14282 							  && reslen < MAXWLEN)
14283 			    {
14284 				if (reslen == 0 || wres[reslen - 1] != *ws)
14285 				    wres[reslen++] = *ws;
14286 				ws++;
14287 			    }
14288 			/* new "actual letter" */
14289 			if (ws == NULL)
14290 			    c = NUL;
14291 			else
14292 			    c = *ws;
14293 			if (strstr((char *)s, "^^") != NULL)
14294 			{
14295 			    if (c != NUL)
14296 				wres[reslen++] = c;
14297 			    mch_memmove(word, word + i + 1,
14298 				    sizeof(int) * (STRLEN(word + i + 1) + 1));
14299 			    i = 0;
14300 			    z0 = 1;
14301 			}
14302 		    }
14303 		    break;
14304 		}
14305 	    }
14306 	}
14307 	else if (vim_iswhite(c))
14308 	{
14309 	    c = ' ';
14310 	    k = 1;
14311 	}
14312 
14313 	if (z0 == 0)
14314 	{
14315 	    if (k && !p0 && reslen < MAXWLEN && c != NUL
14316 		    && (!slang->sl_collapse || reslen == 0
14317 						     || wres[reslen - 1] != c))
14318 		/* condense only double letters */
14319 		wres[reslen++] = c;
14320 
14321 	    i++;
14322 	    z = 0;
14323 	    k = 0;
14324 	}
14325     }
14326 
14327     /* Convert wide characters in "wres" to a multi-byte string in "res". */
14328     l = 0;
14329     for (n = 0; n < reslen; ++n)
14330     {
14331 	l += mb_char2bytes(wres[n], res + l);
14332 	if (l + MB_MAXBYTES > MAXWLEN)
14333 	    break;
14334     }
14335     res[l] = NUL;
14336 }
14337 #endif
14338 
14339 /*
14340  * Compute a score for two sound-a-like words.
14341  * This permits up to two inserts/deletes/swaps/etc. to keep things fast.
14342  * Instead of a generic loop we write out the code.  That keeps it fast by
14343  * avoiding checks that will not be possible.
14344  */
14345     static int
14346 soundalike_score(goodstart, badstart)
14347     char_u	*goodstart;	/* sound-folded good word */
14348     char_u	*badstart;	/* sound-folded bad word */
14349 {
14350     char_u	*goodsound = goodstart;
14351     char_u	*badsound = badstart;
14352     int		goodlen;
14353     int		badlen;
14354     int		n;
14355     char_u	*pl, *ps;
14356     char_u	*pl2, *ps2;
14357     int		score = 0;
14358 
14359     /* adding/inserting "*" at the start (word starts with vowel) shouldn't be
14360      * counted so much, vowels halfway the word aren't counted at all. */
14361     if ((*badsound == '*' || *goodsound == '*') && *badsound != *goodsound)
14362     {
14363 	if (badsound[1] == goodsound[1]
14364 		|| (badsound[1] != NUL
14365 		    && goodsound[1] != NUL
14366 		    && badsound[2] == goodsound[2]))
14367 	{
14368 	    /* handle like a substitute */
14369 	}
14370 	else
14371 	{
14372 	    score = 2 * SCORE_DEL / 3;
14373 	    if (*badsound == '*')
14374 		++badsound;
14375 	    else
14376 		++goodsound;
14377 	}
14378     }
14379 
14380     goodlen = STRLEN(goodsound);
14381     badlen = STRLEN(badsound);
14382 
14383     /* Return quickly if the lenghts are too different to be fixed by two
14384      * changes. */
14385     n = goodlen - badlen;
14386     if (n < -2 || n > 2)
14387 	return SCORE_MAXMAX;
14388 
14389     if (n > 0)
14390     {
14391 	pl = goodsound;	    /* goodsound is longest */
14392 	ps = badsound;
14393     }
14394     else
14395     {
14396 	pl = badsound;	    /* badsound is longest */
14397 	ps = goodsound;
14398     }
14399 
14400     /* Skip over the identical part. */
14401     while (*pl == *ps && *pl != NUL)
14402     {
14403 	++pl;
14404 	++ps;
14405     }
14406 
14407     switch (n)
14408     {
14409 	case -2:
14410 	case 2:
14411 	    /*
14412 	     * Must delete two characters from "pl".
14413 	     */
14414 	    ++pl;	/* first delete */
14415 	    while (*pl == *ps)
14416 	    {
14417 		++pl;
14418 		++ps;
14419 	    }
14420 	    /* strings must be equal after second delete */
14421 	    if (STRCMP(pl + 1, ps) == 0)
14422 		return score + SCORE_DEL * 2;
14423 
14424 	    /* Failed to compare. */
14425 	    break;
14426 
14427 	case -1:
14428 	case 1:
14429 	    /*
14430 	     * Minimal one delete from "pl" required.
14431 	     */
14432 
14433 	    /* 1: delete */
14434 	    pl2 = pl + 1;
14435 	    ps2 = ps;
14436 	    while (*pl2 == *ps2)
14437 	    {
14438 		if (*pl2 == NUL)	/* reached the end */
14439 		    return score + SCORE_DEL;
14440 		++pl2;
14441 		++ps2;
14442 	    }
14443 
14444 	    /* 2: delete then swap, then rest must be equal */
14445 	    if (pl2[0] == ps2[1] && pl2[1] == ps2[0]
14446 					     && STRCMP(pl2 + 2, ps2 + 2) == 0)
14447 		return score + SCORE_DEL + SCORE_SWAP;
14448 
14449 	    /* 3: delete then substitute, then the rest must be equal */
14450 	    if (STRCMP(pl2 + 1, ps2 + 1) == 0)
14451 		return score + SCORE_DEL + SCORE_SUBST;
14452 
14453 	    /* 4: first swap then delete */
14454 	    if (pl[0] == ps[1] && pl[1] == ps[0])
14455 	    {
14456 		pl2 = pl + 2;	    /* swap, skip two chars */
14457 		ps2 = ps + 2;
14458 		while (*pl2 == *ps2)
14459 		{
14460 		    ++pl2;
14461 		    ++ps2;
14462 		}
14463 		/* delete a char and then strings must be equal */
14464 		if (STRCMP(pl2 + 1, ps2) == 0)
14465 		    return score + SCORE_SWAP + SCORE_DEL;
14466 	    }
14467 
14468 	    /* 5: first substitute then delete */
14469 	    pl2 = pl + 1;	    /* substitute, skip one char */
14470 	    ps2 = ps + 1;
14471 	    while (*pl2 == *ps2)
14472 	    {
14473 		++pl2;
14474 		++ps2;
14475 	    }
14476 	    /* delete a char and then strings must be equal */
14477 	    if (STRCMP(pl2 + 1, ps2) == 0)
14478 		return score + SCORE_SUBST + SCORE_DEL;
14479 
14480 	    /* Failed to compare. */
14481 	    break;
14482 
14483 	case 0:
14484 	    /*
14485 	     * Lenghts are equal, thus changes must result in same length: An
14486 	     * insert is only possible in combination with a delete.
14487 	     * 1: check if for identical strings
14488 	     */
14489 	    if (*pl == NUL)
14490 		return score;
14491 
14492 	    /* 2: swap */
14493 	    if (pl[0] == ps[1] && pl[1] == ps[0])
14494 	    {
14495 		pl2 = pl + 2;	    /* swap, skip two chars */
14496 		ps2 = ps + 2;
14497 		while (*pl2 == *ps2)
14498 		{
14499 		    if (*pl2 == NUL)	/* reached the end */
14500 			return score + SCORE_SWAP;
14501 		    ++pl2;
14502 		    ++ps2;
14503 		}
14504 		/* 3: swap and swap again */
14505 		if (pl2[0] == ps2[1] && pl2[1] == ps2[0]
14506 					     && STRCMP(pl2 + 2, ps2 + 2) == 0)
14507 		    return score + SCORE_SWAP + SCORE_SWAP;
14508 
14509 		/* 4: swap and substitute */
14510 		if (STRCMP(pl2 + 1, ps2 + 1) == 0)
14511 		    return score + SCORE_SWAP + SCORE_SUBST;
14512 	    }
14513 
14514 	    /* 5: substitute */
14515 	    pl2 = pl + 1;
14516 	    ps2 = ps + 1;
14517 	    while (*pl2 == *ps2)
14518 	    {
14519 		if (*pl2 == NUL)	/* reached the end */
14520 		    return score + SCORE_SUBST;
14521 		++pl2;
14522 		++ps2;
14523 	    }
14524 
14525 	    /* 6: substitute and swap */
14526 	    if (pl2[0] == ps2[1] && pl2[1] == ps2[0]
14527 					     && STRCMP(pl2 + 2, ps2 + 2) == 0)
14528 		return score + SCORE_SUBST + SCORE_SWAP;
14529 
14530 	    /* 7: substitute and substitute */
14531 	    if (STRCMP(pl2 + 1, ps2 + 1) == 0)
14532 		return score + SCORE_SUBST + SCORE_SUBST;
14533 
14534 	    /* 8: insert then delete */
14535 	    pl2 = pl;
14536 	    ps2 = ps + 1;
14537 	    while (*pl2 == *ps2)
14538 	    {
14539 		++pl2;
14540 		++ps2;
14541 	    }
14542 	    if (STRCMP(pl2 + 1, ps2) == 0)
14543 		return score + SCORE_INS + SCORE_DEL;
14544 
14545 	    /* 9: delete then insert */
14546 	    pl2 = pl + 1;
14547 	    ps2 = ps;
14548 	    while (*pl2 == *ps2)
14549 	    {
14550 		++pl2;
14551 		++ps2;
14552 	    }
14553 	    if (STRCMP(pl2, ps2 + 1) == 0)
14554 		return score + SCORE_INS + SCORE_DEL;
14555 
14556 	    /* Failed to compare. */
14557 	    break;
14558     }
14559 
14560     return SCORE_MAXMAX;
14561 }
14562 
14563 /*
14564  * Compute the "edit distance" to turn "badword" into "goodword".  The less
14565  * deletes/inserts/substitutes/swaps are required the lower the score.
14566  *
14567  * The algorithm is described by Du and Chang, 1992.
14568  * The implementation of the algorithm comes from Aspell editdist.cpp,
14569  * edit_distance().  It has been converted from C++ to C and modified to
14570  * support multi-byte characters.
14571  */
14572     static int
14573 spell_edit_score(slang, badword, goodword)
14574     slang_T	*slang;
14575     char_u	*badword;
14576     char_u	*goodword;
14577 {
14578     int		*cnt;
14579     int		badlen, goodlen;	/* lenghts including NUL */
14580     int		j, i;
14581     int		t;
14582     int		bc, gc;
14583     int		pbc, pgc;
14584 #ifdef FEAT_MBYTE
14585     char_u	*p;
14586     int		wbadword[MAXWLEN];
14587     int		wgoodword[MAXWLEN];
14588 
14589     if (has_mbyte)
14590     {
14591 	/* Get the characters from the multi-byte strings and put them in an
14592 	 * int array for easy access. */
14593 	for (p = badword, badlen = 0; *p != NUL; )
14594 	    wbadword[badlen++] = mb_cptr2char_adv(&p);
14595 	wbadword[badlen++] = 0;
14596 	for (p = goodword, goodlen = 0; *p != NUL; )
14597 	    wgoodword[goodlen++] = mb_cptr2char_adv(&p);
14598 	wgoodword[goodlen++] = 0;
14599     }
14600     else
14601 #endif
14602     {
14603 	badlen = STRLEN(badword) + 1;
14604 	goodlen = STRLEN(goodword) + 1;
14605     }
14606 
14607     /* We use "cnt" as an array: CNT(badword_idx, goodword_idx). */
14608 #define CNT(a, b)   cnt[(a) + (b) * (badlen + 1)]
14609     cnt = (int *)lalloc((long_u)(sizeof(int) * (badlen + 1) * (goodlen + 1)),
14610 									TRUE);
14611     if (cnt == NULL)
14612 	return 0;	/* out of memory */
14613 
14614     CNT(0, 0) = 0;
14615     for (j = 1; j <= goodlen; ++j)
14616 	CNT(0, j) = CNT(0, j - 1) + SCORE_INS;
14617 
14618     for (i = 1; i <= badlen; ++i)
14619     {
14620 	CNT(i, 0) = CNT(i - 1, 0) + SCORE_DEL;
14621 	for (j = 1; j <= goodlen; ++j)
14622 	{
14623 #ifdef FEAT_MBYTE
14624 	    if (has_mbyte)
14625 	    {
14626 		bc = wbadword[i - 1];
14627 		gc = wgoodword[j - 1];
14628 	    }
14629 	    else
14630 #endif
14631 	    {
14632 		bc = badword[i - 1];
14633 		gc = goodword[j - 1];
14634 	    }
14635 	    if (bc == gc)
14636 		CNT(i, j) = CNT(i - 1, j - 1);
14637 	    else
14638 	    {
14639 		/* Use a better score when there is only a case difference. */
14640 		if (SPELL_TOFOLD(bc) == SPELL_TOFOLD(gc))
14641 		    CNT(i, j) = SCORE_ICASE + CNT(i - 1, j - 1);
14642 		else
14643 		{
14644 		    /* For a similar character use SCORE_SIMILAR. */
14645 		    if (slang != NULL
14646 			    && slang->sl_has_map
14647 			    && similar_chars(slang, gc, bc))
14648 			CNT(i, j) = SCORE_SIMILAR + CNT(i - 1, j - 1);
14649 		    else
14650 			CNT(i, j) = SCORE_SUBST + CNT(i - 1, j - 1);
14651 		}
14652 
14653 		if (i > 1 && j > 1)
14654 		{
14655 #ifdef FEAT_MBYTE
14656 		    if (has_mbyte)
14657 		    {
14658 			pbc = wbadword[i - 2];
14659 			pgc = wgoodword[j - 2];
14660 		    }
14661 		    else
14662 #endif
14663 		    {
14664 			pbc = badword[i - 2];
14665 			pgc = goodword[j - 2];
14666 		    }
14667 		    if (bc == pgc && pbc == gc)
14668 		    {
14669 			t = SCORE_SWAP + CNT(i - 2, j - 2);
14670 			if (t < CNT(i, j))
14671 			    CNT(i, j) = t;
14672 		    }
14673 		}
14674 		t = SCORE_DEL + CNT(i - 1, j);
14675 		if (t < CNT(i, j))
14676 		    CNT(i, j) = t;
14677 		t = SCORE_INS + CNT(i, j - 1);
14678 		if (t < CNT(i, j))
14679 		    CNT(i, j) = t;
14680 	    }
14681 	}
14682     }
14683 
14684     i = CNT(badlen - 1, goodlen - 1);
14685     vim_free(cnt);
14686     return i;
14687 }
14688 
14689 typedef struct
14690 {
14691     int		badi;
14692     int		goodi;
14693     int		score;
14694 } limitscore_T;
14695 
14696 /*
14697  * Like spell_edit_score(), but with a limit on the score to make it faster.
14698  * May return SCORE_MAXMAX when the score is higher than "limit".
14699  *
14700  * This uses a stack for the edits still to be tried.
14701  * The idea comes from Aspell leditdist.cpp.  Rewritten in C and added support
14702  * for multi-byte characters.
14703  */
14704     static int
14705 spell_edit_score_limit(slang, badword, goodword, limit)
14706     slang_T	*slang;
14707     char_u	*badword;
14708     char_u	*goodword;
14709     int		limit;
14710 {
14711     limitscore_T    stack[10];		/* allow for over 3 * 2 edits */
14712     int		    stackidx;
14713     int		    bi, gi;
14714     int		    bi2, gi2;
14715     int		    bc, gc;
14716     int		    score;
14717     int		    score_off;
14718     int		    minscore;
14719     int		    round;
14720 
14721 #ifdef FEAT_MBYTE
14722     /* Multi-byte characters require a bit more work, use a different function
14723      * to avoid testing "has_mbyte" quite often. */
14724     if (has_mbyte)
14725 	return spell_edit_score_limit_w(slang, badword, goodword, limit);
14726 #endif
14727 
14728     /*
14729      * The idea is to go from start to end over the words.  So long as
14730      * characters are equal just continue, this always gives the lowest score.
14731      * When there is a difference try several alternatives.  Each alternative
14732      * increases "score" for the edit distance.  Some of the alternatives are
14733      * pushed unto a stack and tried later, some are tried right away.  At the
14734      * end of the word the score for one alternative is known.  The lowest
14735      * possible score is stored in "minscore".
14736      */
14737     stackidx = 0;
14738     bi = 0;
14739     gi = 0;
14740     score = 0;
14741     minscore = limit + 1;
14742 
14743     for (;;)
14744     {
14745 	/* Skip over an equal part, score remains the same. */
14746 	for (;;)
14747 	{
14748 	    bc = badword[bi];
14749 	    gc = goodword[gi];
14750 	    if (bc != gc)	/* stop at a char that's different */
14751 		break;
14752 	    if (bc == NUL)	/* both words end */
14753 	    {
14754 		if (score < minscore)
14755 		    minscore = score;
14756 		goto pop;	/* do next alternative */
14757 	    }
14758 	    ++bi;
14759 	    ++gi;
14760 	}
14761 
14762 	if (gc == NUL)    /* goodword ends, delete badword chars */
14763 	{
14764 	    do
14765 	    {
14766 		if ((score += SCORE_DEL) >= minscore)
14767 		    goto pop;	    /* do next alternative */
14768 	    } while (badword[++bi] != NUL);
14769 	    minscore = score;
14770 	}
14771 	else if (bc == NUL) /* badword ends, insert badword chars */
14772 	{
14773 	    do
14774 	    {
14775 		if ((score += SCORE_INS) >= minscore)
14776 		    goto pop;	    /* do next alternative */
14777 	    } while (goodword[++gi] != NUL);
14778 	    minscore = score;
14779 	}
14780 	else			/* both words continue */
14781 	{
14782 	    /* If not close to the limit, perform a change.  Only try changes
14783 	     * that may lead to a lower score than "minscore".
14784 	     * round 0: try deleting a char from badword
14785 	     * round 1: try inserting a char in badword */
14786 	    for (round = 0; round <= 1; ++round)
14787 	    {
14788 		score_off = score + (round == 0 ? SCORE_DEL : SCORE_INS);
14789 		if (score_off < minscore)
14790 		{
14791 		    if (score_off + SCORE_EDIT_MIN >= minscore)
14792 		    {
14793 			/* Near the limit, rest of the words must match.  We
14794 			 * can check that right now, no need to push an item
14795 			 * onto the stack. */
14796 			bi2 = bi + 1 - round;
14797 			gi2 = gi + round;
14798 			while (goodword[gi2] == badword[bi2])
14799 			{
14800 			    if (goodword[gi2] == NUL)
14801 			    {
14802 				minscore = score_off;
14803 				break;
14804 			    }
14805 			    ++bi2;
14806 			    ++gi2;
14807 			}
14808 		    }
14809 		    else
14810 		    {
14811 			/* try deleting/inserting a character later */
14812 			stack[stackidx].badi = bi + 1 - round;
14813 			stack[stackidx].goodi = gi + round;
14814 			stack[stackidx].score = score_off;
14815 			++stackidx;
14816 		    }
14817 		}
14818 	    }
14819 
14820 	    if (score + SCORE_SWAP < minscore)
14821 	    {
14822 		/* If swapping two characters makes a match then the
14823 		 * substitution is more expensive, thus there is no need to
14824 		 * try both. */
14825 		if (gc == badword[bi + 1] && bc == goodword[gi + 1])
14826 		{
14827 		    /* Swap two characters, that is: skip them. */
14828 		    gi += 2;
14829 		    bi += 2;
14830 		    score += SCORE_SWAP;
14831 		    continue;
14832 		}
14833 	    }
14834 
14835 	    /* Substitute one character for another which is the same
14836 	     * thing as deleting a character from both goodword and badword.
14837 	     * Use a better score when there is only a case difference. */
14838 	    if (SPELL_TOFOLD(bc) == SPELL_TOFOLD(gc))
14839 		score += SCORE_ICASE;
14840 	    else
14841 	    {
14842 		/* For a similar character use SCORE_SIMILAR. */
14843 		if (slang != NULL
14844 			&& slang->sl_has_map
14845 			&& similar_chars(slang, gc, bc))
14846 		    score += SCORE_SIMILAR;
14847 		else
14848 		    score += SCORE_SUBST;
14849 	    }
14850 
14851 	    if (score < minscore)
14852 	    {
14853 		/* Do the substitution. */
14854 		++gi;
14855 		++bi;
14856 		continue;
14857 	    }
14858 	}
14859 pop:
14860 	/*
14861 	 * Get here to try the next alternative, pop it from the stack.
14862 	 */
14863 	if (stackidx == 0)		/* stack is empty, finished */
14864 	    break;
14865 
14866 	/* pop an item from the stack */
14867 	--stackidx;
14868 	gi = stack[stackidx].goodi;
14869 	bi = stack[stackidx].badi;
14870 	score = stack[stackidx].score;
14871     }
14872 
14873     /* When the score goes over "limit" it may actually be much higher.
14874      * Return a very large number to avoid going below the limit when giving a
14875      * bonus. */
14876     if (minscore > limit)
14877 	return SCORE_MAXMAX;
14878     return minscore;
14879 }
14880 
14881 #ifdef FEAT_MBYTE
14882 /*
14883  * Multi-byte version of spell_edit_score_limit().
14884  * Keep it in sync with the above!
14885  */
14886     static int
14887 spell_edit_score_limit_w(slang, badword, goodword, limit)
14888     slang_T	*slang;
14889     char_u	*badword;
14890     char_u	*goodword;
14891     int		limit;
14892 {
14893     limitscore_T    stack[10];		/* allow for over 3 * 2 edits */
14894     int		    stackidx;
14895     int		    bi, gi;
14896     int		    bi2, gi2;
14897     int		    bc, gc;
14898     int		    score;
14899     int		    score_off;
14900     int		    minscore;
14901     int		    round;
14902     char_u	    *p;
14903     int		    wbadword[MAXWLEN];
14904     int		    wgoodword[MAXWLEN];
14905 
14906     /* Get the characters from the multi-byte strings and put them in an
14907      * int array for easy access. */
14908     bi = 0;
14909     for (p = badword; *p != NUL; )
14910 	wbadword[bi++] = mb_cptr2char_adv(&p);
14911     wbadword[bi++] = 0;
14912     gi = 0;
14913     for (p = goodword; *p != NUL; )
14914 	wgoodword[gi++] = mb_cptr2char_adv(&p);
14915     wgoodword[gi++] = 0;
14916 
14917     /*
14918      * The idea is to go from start to end over the words.  So long as
14919      * characters are equal just continue, this always gives the lowest score.
14920      * When there is a difference try several alternatives.  Each alternative
14921      * increases "score" for the edit distance.  Some of the alternatives are
14922      * pushed unto a stack and tried later, some are tried right away.  At the
14923      * end of the word the score for one alternative is known.  The lowest
14924      * possible score is stored in "minscore".
14925      */
14926     stackidx = 0;
14927     bi = 0;
14928     gi = 0;
14929     score = 0;
14930     minscore = limit + 1;
14931 
14932     for (;;)
14933     {
14934 	/* Skip over an equal part, score remains the same. */
14935 	for (;;)
14936 	{
14937 	    bc = wbadword[bi];
14938 	    gc = wgoodword[gi];
14939 
14940 	    if (bc != gc)	/* stop at a char that's different */
14941 		break;
14942 	    if (bc == NUL)	/* both words end */
14943 	    {
14944 		if (score < minscore)
14945 		    minscore = score;
14946 		goto pop;	/* do next alternative */
14947 	    }
14948 	    ++bi;
14949 	    ++gi;
14950 	}
14951 
14952 	if (gc == NUL)    /* goodword ends, delete badword chars */
14953 	{
14954 	    do
14955 	    {
14956 		if ((score += SCORE_DEL) >= minscore)
14957 		    goto pop;	    /* do next alternative */
14958 	    } while (wbadword[++bi] != NUL);
14959 	    minscore = score;
14960 	}
14961 	else if (bc == NUL) /* badword ends, insert badword chars */
14962 	{
14963 	    do
14964 	    {
14965 		if ((score += SCORE_INS) >= minscore)
14966 		    goto pop;	    /* do next alternative */
14967 	    } while (wgoodword[++gi] != NUL);
14968 	    minscore = score;
14969 	}
14970 	else			/* both words continue */
14971 	{
14972 	    /* If not close to the limit, perform a change.  Only try changes
14973 	     * that may lead to a lower score than "minscore".
14974 	     * round 0: try deleting a char from badword
14975 	     * round 1: try inserting a char in badword */
14976 	    for (round = 0; round <= 1; ++round)
14977 	    {
14978 		score_off = score + (round == 0 ? SCORE_DEL : SCORE_INS);
14979 		if (score_off < minscore)
14980 		{
14981 		    if (score_off + SCORE_EDIT_MIN >= minscore)
14982 		    {
14983 			/* Near the limit, rest of the words must match.  We
14984 			 * can check that right now, no need to push an item
14985 			 * onto the stack. */
14986 			bi2 = bi + 1 - round;
14987 			gi2 = gi + round;
14988 			while (wgoodword[gi2] == wbadword[bi2])
14989 			{
14990 			    if (wgoodword[gi2] == NUL)
14991 			    {
14992 				minscore = score_off;
14993 				break;
14994 			    }
14995 			    ++bi2;
14996 			    ++gi2;
14997 			}
14998 		    }
14999 		    else
15000 		    {
15001 			/* try deleting a character from badword later */
15002 			stack[stackidx].badi = bi + 1 - round;
15003 			stack[stackidx].goodi = gi + round;
15004 			stack[stackidx].score = score_off;
15005 			++stackidx;
15006 		    }
15007 		}
15008 	    }
15009 
15010 	    if (score + SCORE_SWAP < minscore)
15011 	    {
15012 		/* If swapping two characters makes a match then the
15013 		 * substitution is more expensive, thus there is no need to
15014 		 * try both. */
15015 		if (gc == wbadword[bi + 1] && bc == wgoodword[gi + 1])
15016 		{
15017 		    /* Swap two characters, that is: skip them. */
15018 		    gi += 2;
15019 		    bi += 2;
15020 		    score += SCORE_SWAP;
15021 		    continue;
15022 		}
15023 	    }
15024 
15025 	    /* Substitute one character for another which is the same
15026 	     * thing as deleting a character from both goodword and badword.
15027 	     * Use a better score when there is only a case difference. */
15028 	    if (SPELL_TOFOLD(bc) == SPELL_TOFOLD(gc))
15029 		score += SCORE_ICASE;
15030 	    else
15031 	    {
15032 		/* For a similar character use SCORE_SIMILAR. */
15033 		if (slang != NULL
15034 			&& slang->sl_has_map
15035 			&& similar_chars(slang, gc, bc))
15036 		    score += SCORE_SIMILAR;
15037 		else
15038 		    score += SCORE_SUBST;
15039 	    }
15040 
15041 	    if (score < minscore)
15042 	    {
15043 		/* Do the substitution. */
15044 		++gi;
15045 		++bi;
15046 		continue;
15047 	    }
15048 	}
15049 pop:
15050 	/*
15051 	 * Get here to try the next alternative, pop it from the stack.
15052 	 */
15053 	if (stackidx == 0)		/* stack is empty, finished */
15054 	    break;
15055 
15056 	/* pop an item from the stack */
15057 	--stackidx;
15058 	gi = stack[stackidx].goodi;
15059 	bi = stack[stackidx].badi;
15060 	score = stack[stackidx].score;
15061     }
15062 
15063     /* When the score goes over "limit" it may actually be much higher.
15064      * Return a very large number to avoid going below the limit when giving a
15065      * bonus. */
15066     if (minscore > limit)
15067 	return SCORE_MAXMAX;
15068     return minscore;
15069 }
15070 #endif
15071 
15072 /*
15073  * ":spellinfo"
15074  */
15075 /*ARGSUSED*/
15076     void
15077 ex_spellinfo(eap)
15078     exarg_T *eap;
15079 {
15080     int		lpi;
15081     langp_T	*lp;
15082     char_u	*p;
15083 
15084     if (no_spell_checking(curwin))
15085 	return;
15086 
15087     msg_start();
15088     for (lpi = 0; lpi < curbuf->b_langp.ga_len && !got_int; ++lpi)
15089     {
15090 	lp = LANGP_ENTRY(curbuf->b_langp, lpi);
15091 	msg_puts((char_u *)"file: ");
15092 	msg_puts(lp->lp_slang->sl_fname);
15093 	msg_putchar('\n');
15094 	p = lp->lp_slang->sl_info;
15095 	if (p != NULL)
15096 	{
15097 	    msg_puts(p);
15098 	    msg_putchar('\n');
15099 	}
15100     }
15101     msg_end();
15102 }
15103 
15104 #define DUMPFLAG_KEEPCASE   1	/* round 2: keep-case tree */
15105 #define DUMPFLAG_COUNT	    2	/* include word count */
15106 #define DUMPFLAG_ICASE	    4	/* ignore case when finding matches */
15107 #define DUMPFLAG_ONECAP	    8	/* pattern starts with capital */
15108 #define DUMPFLAG_ALLCAP	    16	/* pattern is all capitals */
15109 
15110 /*
15111  * ":spelldump"
15112  */
15113     void
15114 ex_spelldump(eap)
15115     exarg_T *eap;
15116 {
15117     buf_T	*buf = curbuf;
15118 
15119     if (no_spell_checking(curwin))
15120 	return;
15121 
15122     /* Create a new empty buffer by splitting the window. */
15123     do_cmdline_cmd((char_u *)"new");
15124     if (!bufempty() || !buf_valid(buf))
15125 	return;
15126 
15127     spell_dump_compl(buf, NULL, 0, NULL, eap->forceit ? DUMPFLAG_COUNT : 0);
15128 
15129     /* Delete the empty line that we started with. */
15130     if (curbuf->b_ml.ml_line_count > 1)
15131 	ml_delete(curbuf->b_ml.ml_line_count, FALSE);
15132 
15133     redraw_later(NOT_VALID);
15134 }
15135 
15136 /*
15137  * Go through all possible words and:
15138  * 1. When "pat" is NULL: dump a list of all words in the current buffer.
15139  *	"ic" and "dir" are not used.
15140  * 2. When "pat" is not NULL: add matching words to insert mode completion.
15141  */
15142     void
15143 spell_dump_compl(buf, pat, ic, dir, dumpflags_arg)
15144     buf_T	*buf;	    /* buffer with spell checking */
15145     char_u	*pat;	    /* leading part of the word */
15146     int		ic;	    /* ignore case */
15147     int		*dir;	    /* direction for adding matches */
15148     int		dumpflags_arg;	/* DUMPFLAG_* */
15149 {
15150     langp_T	*lp;
15151     slang_T	*slang;
15152     idx_T	arridx[MAXWLEN];
15153     int		curi[MAXWLEN];
15154     char_u	word[MAXWLEN];
15155     int		c;
15156     char_u	*byts;
15157     idx_T	*idxs;
15158     linenr_T	lnum = 0;
15159     int		round;
15160     int		depth;
15161     int		n;
15162     int		flags;
15163     char_u	*region_names = NULL;	    /* region names being used */
15164     int		do_region = TRUE;	    /* dump region names and numbers */
15165     char_u	*p;
15166     int		lpi;
15167     int		dumpflags = dumpflags_arg;
15168     int		patlen;
15169 
15170     /* When ignoring case or when the pattern starts with capital pass this on
15171      * to dump_word(). */
15172     if (pat != NULL)
15173     {
15174 	if (ic)
15175 	    dumpflags |= DUMPFLAG_ICASE;
15176 	else
15177 	{
15178 	    n = captype(pat, NULL);
15179 	    if (n == WF_ONECAP)
15180 		dumpflags |= DUMPFLAG_ONECAP;
15181 	    else if (n == WF_ALLCAP
15182 #ifdef FEAT_MBYTE
15183 		    && (int)STRLEN(pat) > mb_ptr2len(pat)
15184 #else
15185 		    && (int)STRLEN(pat) > 1
15186 #endif
15187 		    )
15188 		dumpflags |= DUMPFLAG_ALLCAP;
15189 	}
15190     }
15191 
15192     /* Find out if we can support regions: All languages must support the same
15193      * regions or none at all. */
15194     for (lpi = 0; lpi < buf->b_langp.ga_len; ++lpi)
15195     {
15196 	lp = LANGP_ENTRY(buf->b_langp, lpi);
15197 	p = lp->lp_slang->sl_regions;
15198 	if (p[0] != 0)
15199 	{
15200 	    if (region_names == NULL)	    /* first language with regions */
15201 		region_names = p;
15202 	    else if (STRCMP(region_names, p) != 0)
15203 	    {
15204 		do_region = FALSE;	    /* region names are different */
15205 		break;
15206 	    }
15207 	}
15208     }
15209 
15210     if (do_region && region_names != NULL)
15211     {
15212 	if (pat == NULL)
15213 	{
15214 	    vim_snprintf((char *)IObuff, IOSIZE, "/regions=%s", region_names);
15215 	    ml_append(lnum++, IObuff, (colnr_T)0, FALSE);
15216 	}
15217     }
15218     else
15219 	do_region = FALSE;
15220 
15221     /*
15222      * Loop over all files loaded for the entries in 'spelllang'.
15223      */
15224     for (lpi = 0; lpi < buf->b_langp.ga_len; ++lpi)
15225     {
15226 	lp = LANGP_ENTRY(buf->b_langp, lpi);
15227 	slang = lp->lp_slang;
15228 	if (slang->sl_fbyts == NULL)	    /* reloading failed */
15229 	    continue;
15230 
15231 	if (pat == NULL)
15232 	{
15233 	    vim_snprintf((char *)IObuff, IOSIZE, "# file: %s", slang->sl_fname);
15234 	    ml_append(lnum++, IObuff, (colnr_T)0, FALSE);
15235 	}
15236 
15237 	/* When matching with a pattern and there are no prefixes only use
15238 	 * parts of the tree that match "pat". */
15239 	if (pat != NULL && slang->sl_pbyts == NULL)
15240 	    patlen = STRLEN(pat);
15241 	else
15242 	    patlen = 0;
15243 
15244 	/* round 1: case-folded tree
15245 	 * round 2: keep-case tree */
15246 	for (round = 1; round <= 2; ++round)
15247 	{
15248 	    if (round == 1)
15249 	    {
15250 		dumpflags &= ~DUMPFLAG_KEEPCASE;
15251 		byts = slang->sl_fbyts;
15252 		idxs = slang->sl_fidxs;
15253 	    }
15254 	    else
15255 	    {
15256 		dumpflags |= DUMPFLAG_KEEPCASE;
15257 		byts = slang->sl_kbyts;
15258 		idxs = slang->sl_kidxs;
15259 	    }
15260 	    if (byts == NULL)
15261 		continue;		/* array is empty */
15262 
15263 	    depth = 0;
15264 	    arridx[0] = 0;
15265 	    curi[0] = 1;
15266 	    while (depth >= 0 && !got_int
15267 				       && (pat == NULL || !compl_interrupted))
15268 	    {
15269 		if (curi[depth] > byts[arridx[depth]])
15270 		{
15271 		    /* Done all bytes at this node, go up one level. */
15272 		    --depth;
15273 		    line_breakcheck();
15274 		    ins_compl_check_keys(50);
15275 		}
15276 		else
15277 		{
15278 		    /* Do one more byte at this node. */
15279 		    n = arridx[depth] + curi[depth];
15280 		    ++curi[depth];
15281 		    c = byts[n];
15282 		    if (c == 0)
15283 		    {
15284 			/* End of word, deal with the word.
15285 			 * Don't use keep-case words in the fold-case tree,
15286 			 * they will appear in the keep-case tree.
15287 			 * Only use the word when the region matches. */
15288 			flags = (int)idxs[n];
15289 			if ((round == 2 || (flags & WF_KEEPCAP) == 0)
15290 				&& (flags & WF_NEEDCOMP) == 0
15291 				&& (do_region
15292 				    || (flags & WF_REGION) == 0
15293 				    || (((unsigned)flags >> 16)
15294 						       & lp->lp_region) != 0))
15295 			{
15296 			    word[depth] = NUL;
15297 			    if (!do_region)
15298 				flags &= ~WF_REGION;
15299 
15300 			    /* Dump the basic word if there is no prefix or
15301 			     * when it's the first one. */
15302 			    c = (unsigned)flags >> 24;
15303 			    if (c == 0 || curi[depth] == 2)
15304 			    {
15305 				dump_word(slang, word, pat, dir,
15306 						      dumpflags, flags, lnum);
15307 				if (pat == NULL)
15308 				    ++lnum;
15309 			    }
15310 
15311 			    /* Apply the prefix, if there is one. */
15312 			    if (c != 0)
15313 				lnum = dump_prefixes(slang, word, pat, dir,
15314 						      dumpflags, flags, lnum);
15315 			}
15316 		    }
15317 		    else
15318 		    {
15319 			/* Normal char, go one level deeper. */
15320 			word[depth++] = c;
15321 			arridx[depth] = idxs[n];
15322 			curi[depth] = 1;
15323 
15324 			/* Check if this characters matches with the pattern.
15325 			 * If not skip the whole tree below it.
15326 			 * Always ignore case here, dump_word() will check
15327 			 * proper case later.  This isn't exactly right when
15328 			 * length changes for multi-byte characters with
15329 			 * ignore case... */
15330 			if (depth <= patlen
15331 					&& MB_STRNICMP(word, pat, depth) != 0)
15332 			    --depth;
15333 		    }
15334 		}
15335 	    }
15336 	}
15337     }
15338 }
15339 
15340 /*
15341  * Dump one word: apply case modifications and append a line to the buffer.
15342  * When "lnum" is zero add insert mode completion.
15343  */
15344     static void
15345 dump_word(slang, word, pat, dir, dumpflags, wordflags, lnum)
15346     slang_T	*slang;
15347     char_u	*word;
15348     char_u	*pat;
15349     int		*dir;
15350     int		dumpflags;
15351     int		wordflags;
15352     linenr_T	lnum;
15353 {
15354     int		keepcap = FALSE;
15355     char_u	*p;
15356     char_u	*tw;
15357     char_u	cword[MAXWLEN];
15358     char_u	badword[MAXWLEN + 10];
15359     int		i;
15360     int		flags = wordflags;
15361 
15362     if (dumpflags & DUMPFLAG_ONECAP)
15363 	flags |= WF_ONECAP;
15364     if (dumpflags & DUMPFLAG_ALLCAP)
15365 	flags |= WF_ALLCAP;
15366 
15367     if ((dumpflags & DUMPFLAG_KEEPCASE) == 0 && (flags & WF_CAPMASK) != 0)
15368     {
15369 	/* Need to fix case according to "flags". */
15370 	make_case_word(word, cword, flags);
15371 	p = cword;
15372     }
15373     else
15374     {
15375 	p = word;
15376 	if ((dumpflags & DUMPFLAG_KEEPCASE)
15377 		&& ((captype(word, NULL) & WF_KEEPCAP) == 0
15378 						 || (flags & WF_FIXCAP) != 0))
15379 	    keepcap = TRUE;
15380     }
15381     tw = p;
15382 
15383     if (pat == NULL)
15384     {
15385 	/* Add flags and regions after a slash. */
15386 	if ((flags & (WF_BANNED | WF_RARE | WF_REGION)) || keepcap)
15387 	{
15388 	    STRCPY(badword, p);
15389 	    STRCAT(badword, "/");
15390 	    if (keepcap)
15391 		STRCAT(badword, "=");
15392 	    if (flags & WF_BANNED)
15393 		STRCAT(badword, "!");
15394 	    else if (flags & WF_RARE)
15395 		STRCAT(badword, "?");
15396 	    if (flags & WF_REGION)
15397 		for (i = 0; i < 7; ++i)
15398 		    if (flags & (0x10000 << i))
15399 			sprintf((char *)badword + STRLEN(badword), "%d", i + 1);
15400 	    p = badword;
15401 	}
15402 
15403 	if (dumpflags & DUMPFLAG_COUNT)
15404 	{
15405 	    hashitem_T  *hi;
15406 
15407 	    /* Include the word count for ":spelldump!". */
15408 	    hi = hash_find(&slang->sl_wordcount, tw);
15409 	    if (!HASHITEM_EMPTY(hi))
15410 	    {
15411 		vim_snprintf((char *)IObuff, IOSIZE, "%s\t%d",
15412 						     tw, HI2WC(hi)->wc_count);
15413 		p = IObuff;
15414 	    }
15415 	}
15416 
15417 	ml_append(lnum, p, (colnr_T)0, FALSE);
15418     }
15419     else if (((dumpflags & DUMPFLAG_ICASE)
15420 		    ? MB_STRNICMP(p, pat, STRLEN(pat)) == 0
15421 		    : STRNCMP(p, pat, STRLEN(pat)) == 0)
15422 		&& ins_compl_add_infercase(p, (int)STRLEN(p),
15423 					  dumpflags & DUMPFLAG_ICASE,
15424 					  NULL, *dir, 0) == OK)
15425 	/* if dir was BACKWARD then honor it just once */
15426 	*dir = FORWARD;
15427 }
15428 
15429 /*
15430  * For ":spelldump": Find matching prefixes for "word".  Prepend each to
15431  * "word" and append a line to the buffer.
15432  * When "lnum" is zero add insert mode completion.
15433  * Return the updated line number.
15434  */
15435     static linenr_T
15436 dump_prefixes(slang, word, pat, dir, dumpflags, flags, startlnum)
15437     slang_T	*slang;
15438     char_u	*word;	    /* case-folded word */
15439     char_u	*pat;
15440     int		*dir;
15441     int		dumpflags;
15442     int		flags;	    /* flags with prefix ID */
15443     linenr_T	startlnum;
15444 {
15445     idx_T	arridx[MAXWLEN];
15446     int		curi[MAXWLEN];
15447     char_u	prefix[MAXWLEN];
15448     char_u	word_up[MAXWLEN];
15449     int		has_word_up = FALSE;
15450     int		c;
15451     char_u	*byts;
15452     idx_T	*idxs;
15453     linenr_T	lnum = startlnum;
15454     int		depth;
15455     int		n;
15456     int		len;
15457     int		i;
15458 
15459     /* If the word starts with a lower-case letter make the word with an
15460      * upper-case letter in word_up[]. */
15461     c = PTR2CHAR(word);
15462     if (SPELL_TOUPPER(c) != c)
15463     {
15464 	onecap_copy(word, word_up, TRUE);
15465 	has_word_up = TRUE;
15466     }
15467 
15468     byts = slang->sl_pbyts;
15469     idxs = slang->sl_pidxs;
15470     if (byts != NULL)		/* array not is empty */
15471     {
15472 	/*
15473 	 * Loop over all prefixes, building them byte-by-byte in prefix[].
15474 	 * When at the end of a prefix check that it supports "flags".
15475 	 */
15476 	depth = 0;
15477 	arridx[0] = 0;
15478 	curi[0] = 1;
15479 	while (depth >= 0 && !got_int)
15480 	{
15481 	    n = arridx[depth];
15482 	    len = byts[n];
15483 	    if (curi[depth] > len)
15484 	    {
15485 		/* Done all bytes at this node, go up one level. */
15486 		--depth;
15487 		line_breakcheck();
15488 	    }
15489 	    else
15490 	    {
15491 		/* Do one more byte at this node. */
15492 		n += curi[depth];
15493 		++curi[depth];
15494 		c = byts[n];
15495 		if (c == 0)
15496 		{
15497 		    /* End of prefix, find out how many IDs there are. */
15498 		    for (i = 1; i < len; ++i)
15499 			if (byts[n + i] != 0)
15500 			    break;
15501 		    curi[depth] += i - 1;
15502 
15503 		    c = valid_word_prefix(i, n, flags, word, slang, FALSE);
15504 		    if (c != 0)
15505 		    {
15506 			vim_strncpy(prefix + depth, word, MAXWLEN - depth - 1);
15507 			dump_word(slang, prefix, pat, dir, dumpflags,
15508 				(c & WF_RAREPFX) ? (flags | WF_RARE)
15509 							       : flags, lnum);
15510 			if (lnum != 0)
15511 			    ++lnum;
15512 		    }
15513 
15514 		    /* Check for prefix that matches the word when the
15515 		     * first letter is upper-case, but only if the prefix has
15516 		     * a condition. */
15517 		    if (has_word_up)
15518 		    {
15519 			c = valid_word_prefix(i, n, flags, word_up, slang,
15520 									TRUE);
15521 			if (c != 0)
15522 			{
15523 			    vim_strncpy(prefix + depth, word_up,
15524 							 MAXWLEN - depth - 1);
15525 			    dump_word(slang, prefix, pat, dir, dumpflags,
15526 				    (c & WF_RAREPFX) ? (flags | WF_RARE)
15527 							       : flags, lnum);
15528 			    if (lnum != 0)
15529 				++lnum;
15530 			}
15531 		    }
15532 		}
15533 		else
15534 		{
15535 		    /* Normal char, go one level deeper. */
15536 		    prefix[depth++] = c;
15537 		    arridx[depth] = idxs[n];
15538 		    curi[depth] = 1;
15539 		}
15540 	    }
15541 	}
15542     }
15543 
15544     return lnum;
15545 }
15546 
15547 /*
15548  * Move "p" to the end of word "start".
15549  * Uses the spell-checking word characters.
15550  */
15551     char_u *
15552 spell_to_word_end(start, buf)
15553     char_u  *start;
15554     buf_T   *buf;
15555 {
15556     char_u  *p = start;
15557 
15558     while (*p != NUL && spell_iswordp(p, buf))
15559 	mb_ptr_adv(p);
15560     return p;
15561 }
15562 
15563 #if defined(FEAT_INS_EXPAND) || defined(PROTO)
15564 /*
15565  * For Insert mode completion CTRL-X s:
15566  * Find start of the word in front of column "startcol".
15567  * We don't check if it is badly spelled, with completion we can only change
15568  * the word in front of the cursor.
15569  * Returns the column number of the word.
15570  */
15571     int
15572 spell_word_start(startcol)
15573     int		startcol;
15574 {
15575     char_u	*line;
15576     char_u	*p;
15577     int		col = 0;
15578 
15579     if (no_spell_checking(curwin))
15580 	return startcol;
15581 
15582     /* Find a word character before "startcol". */
15583     line = ml_get_curline();
15584     for (p = line + startcol; p > line; )
15585     {
15586 	mb_ptr_back(line, p);
15587 	if (spell_iswordp_nmw(p))
15588 	    break;
15589     }
15590 
15591     /* Go back to start of the word. */
15592     while (p > line)
15593     {
15594 	col = p - line;
15595 	mb_ptr_back(line, p);
15596 	if (!spell_iswordp(p, curbuf))
15597 	    break;
15598 	col = 0;
15599     }
15600 
15601     return col;
15602 }
15603 
15604 /*
15605  * Need to check for 'spellcapcheck' now, the word is removed before
15606  * expand_spelling() is called.  Therefore the ugly global variable.
15607  */
15608 static int spell_expand_need_cap;
15609 
15610     void
15611 spell_expand_check_cap(col)
15612     colnr_T col;
15613 {
15614     spell_expand_need_cap = check_need_cap(curwin->w_cursor.lnum, col);
15615 }
15616 
15617 /*
15618  * Get list of spelling suggestions.
15619  * Used for Insert mode completion CTRL-X ?.
15620  * Returns the number of matches.  The matches are in "matchp[]", array of
15621  * allocated strings.
15622  */
15623 /*ARGSUSED*/
15624     int
15625 expand_spelling(lnum, col, pat, matchp)
15626     linenr_T	lnum;
15627     int		col;
15628     char_u	*pat;
15629     char_u	***matchp;
15630 {
15631     garray_T	ga;
15632 
15633     spell_suggest_list(&ga, pat, 100, spell_expand_need_cap, TRUE);
15634     *matchp = ga.ga_data;
15635     return ga.ga_len;
15636 }
15637 #endif
15638 
15639 #endif  /* FEAT_SPELL */
15640