xref: /vim-8.2.3635/src/spellfile.c (revision ea2d8d25)
1 /* vi:set ts=8 sts=4 sw=4 noet:
2  *
3  * VIM - Vi IMproved	by Bram Moolenaar
4  *
5  * Do ":help uganda"  in Vim to read copying and usage conditions.
6  * Do ":help credits" in Vim to see a list of people who contributed.
7  * See README.txt for an overview of the Vim source code.
8  */
9 
10 /*
11  * spellfile.c: code for reading and writing spell files.
12  *
13  * See spell.c for information about spell checking.
14  */
15 
16 /*
17  * Vim spell file format: <HEADER>
18  *			  <SECTIONS>
19  *			  <LWORDTREE>
20  *			  <KWORDTREE>
21  *			  <PREFIXTREE>
22  *
23  * <HEADER>: <fileID> <versionnr>
24  *
25  * <fileID>     8 bytes    "VIMspell"
26  * <versionnr>  1 byte	    VIMSPELLVERSION
27  *
28  *
29  * Sections make it possible to add information to the .spl file without
30  * making it incompatible with previous versions.  There are two kinds of
31  * sections:
32  * 1. Not essential for correct spell checking.  E.g. for making suggestions.
33  *    These are skipped when not supported.
34  * 2. Optional information, but essential for spell checking when present.
35  *    E.g. conditions for affixes.  When this section is present but not
36  *    supported an error message is given.
37  *
38  * <SECTIONS>: <section> ... <sectionend>
39  *
40  * <section>: <sectionID> <sectionflags> <sectionlen> (section contents)
41  *
42  * <sectionID>	  1 byte    number from 0 to 254 identifying the section
43  *
44  * <sectionflags> 1 byte    SNF_REQUIRED: this section is required for correct
45  *					    spell checking
46  *
47  * <sectionlen>   4 bytes   length of section contents, MSB first
48  *
49  * <sectionend>	  1 byte    SN_END
50  *
51  *
52  * sectionID == SN_INFO: <infotext>
53  * <infotext>	 N bytes    free format text with spell file info (version,
54  *			    website, etc)
55  *
56  * sectionID == SN_REGION: <regionname> ...
57  * <regionname>	 2 bytes    Up to MAXREGIONS region names: ca, au, etc.  Lower
58  *			    case.  First <regionname> is region 1.
59  *
60  * sectionID == SN_CHARFLAGS: <charflagslen> <charflags>
61  *				<folcharslen> <folchars>
62  * <charflagslen> 1 byte    Number of bytes in <charflags> (should be 128).
63  * <charflags>  N bytes     List of flags (first one is for character 128):
64  *			    0x01  word character	CF_WORD
65  *			    0x02  upper-case character	CF_UPPER
66  * <folcharslen>  2 bytes   Number of bytes in <folchars>.
67  * <folchars>     N bytes   Folded characters, first one is for character 128.
68  *
69  * sectionID == SN_MIDWORD: <midword>
70  * <midword>     N bytes    Characters that are word characters only when used
71  *			    in the middle of a word.
72  *
73  * sectionID == SN_PREFCOND: <prefcondcnt> <prefcond> ...
74  * <prefcondcnt> 2 bytes    Number of <prefcond> items following.
75  * <prefcond> : <condlen> <condstr>
76  * <condlen>	1 byte	    Length of <condstr>.
77  * <condstr>	N bytes	    Condition for the prefix.
78  *
79  * sectionID == SN_REP: <repcount> <rep> ...
80  * <repcount>	 2 bytes    number of <rep> items, MSB first.
81  * <rep> : <repfromlen> <repfrom> <reptolen> <repto>
82  * <repfromlen>	 1 byte	    length of <repfrom>
83  * <repfrom>	 N bytes    "from" part of replacement
84  * <reptolen>	 1 byte	    length of <repto>
85  * <repto>	 N bytes    "to" part of replacement
86  *
87  * sectionID == SN_REPSAL: <repcount> <rep> ...
88  *   just like SN_REP but for soundfolded words
89  *
90  * sectionID == SN_SAL: <salflags> <salcount> <sal> ...
91  * <salflags>	 1 byte	    flags for soundsalike conversion:
92  *			    SAL_F0LLOWUP
93  *			    SAL_COLLAPSE
94  *			    SAL_REM_ACCENTS
95  * <salcount>    2 bytes    number of <sal> items following
96  * <sal> : <salfromlen> <salfrom> <saltolen> <salto>
97  * <salfromlen>	 1 byte	    length of <salfrom>
98  * <salfrom>	 N bytes    "from" part of soundsalike
99  * <saltolen>	 1 byte	    length of <salto>
100  * <salto>	 N bytes    "to" part of soundsalike
101  *
102  * sectionID == SN_SOFO: <sofofromlen> <sofofrom> <sofotolen> <sofoto>
103  * <sofofromlen> 2 bytes    length of <sofofrom>
104  * <sofofrom>	 N bytes    "from" part of soundfold
105  * <sofotolen>	 2 bytes    length of <sofoto>
106  * <sofoto>	 N bytes    "to" part of soundfold
107  *
108  * sectionID == SN_SUGFILE: <timestamp>
109  * <timestamp>   8 bytes    time in seconds that must match with .sug file
110  *
111  * sectionID == SN_NOSPLITSUGS: nothing
112 	 *
113  * sectionID == SN_NOCOMPOUNDSUGS: nothing
114  *
115  * sectionID == SN_WORDS: <word> ...
116  * <word>	 N bytes    NUL terminated common word
117  *
118  * sectionID == SN_MAP: <mapstr>
119  * <mapstr>	 N bytes    String with sequences of similar characters,
120  *			    separated by slashes.
121  *
122  * sectionID == SN_COMPOUND: <compmax> <compminlen> <compsylmax> <compoptions>
123  *				<comppatcount> <comppattern> ... <compflags>
124  * <compmax>     1 byte	    Maximum nr of words in compound word.
125  * <compminlen>  1 byte	    Minimal word length for compounding.
126  * <compsylmax>  1 byte	    Maximum nr of syllables in compound word.
127  * <compoptions> 2 bytes    COMP_ flags.
128  * <comppatcount> 2 bytes   number of <comppattern> following
129  * <compflags>   N bytes    Flags from COMPOUNDRULE items, separated by
130  *			    slashes.
131  *
132  * <comppattern>: <comppatlen> <comppattext>
133  * <comppatlen>	 1 byte	    length of <comppattext>
134  * <comppattext> N bytes    end or begin chars from CHECKCOMPOUNDPATTERN
135  *
136  * sectionID == SN_NOBREAK: (empty, its presence is what matters)
137  *
138  * sectionID == SN_SYLLABLE: <syllable>
139  * <syllable>    N bytes    String from SYLLABLE item.
140  *
141  * <LWORDTREE>: <wordtree>
142  *
143  * <KWORDTREE>: <wordtree>
144  *
145  * <PREFIXTREE>: <wordtree>
146  *
147  *
148  * <wordtree>: <nodecount> <nodedata> ...
149  *
150  * <nodecount>	4 bytes	    Number of nodes following.  MSB first.
151  *
152  * <nodedata>: <siblingcount> <sibling> ...
153  *
154  * <siblingcount> 1 byte    Number of siblings in this node.  The siblings
155  *			    follow in sorted order.
156  *
157  * <sibling>: <byte> [ <nodeidx> <xbyte>
158  *		      | <flags> [<flags2>] [<region>] [<affixID>]
159  *		      | [<pflags>] <affixID> <prefcondnr> ]
160  *
161  * <byte>	1 byte	    Byte value of the sibling.  Special cases:
162  *			    BY_NOFLAGS: End of word without flags and for all
163  *					regions.
164  *					For PREFIXTREE <affixID> and
165  *					<prefcondnr> follow.
166  *			    BY_FLAGS:   End of word, <flags> follow.
167  *					For PREFIXTREE <pflags>, <affixID>
168  *					and <prefcondnr> follow.
169  *			    BY_FLAGS2:  End of word, <flags> and <flags2>
170  *					follow.  Not used in PREFIXTREE.
171  *			    BY_INDEX:   Child of sibling is shared, <nodeidx>
172  *					and <xbyte> follow.
173  *
174  * <nodeidx>	3 bytes	    Index of child for this sibling, MSB first.
175  *
176  * <xbyte>	1 byte	    byte value of the sibling.
177  *
178  * <flags>	1 byte	    bitmask of:
179  *			    WF_ALLCAP	word must have only capitals
180  *			    WF_ONECAP   first char of word must be capital
181  *			    WF_KEEPCAP	keep-case word
182  *			    WF_FIXCAP   keep-case word, all caps not allowed
183  *			    WF_RARE	rare word
184  *			    WF_BANNED	bad word
185  *			    WF_REGION	<region> follows
186  *			    WF_AFX	<affixID> follows
187  *
188  * <flags2>	1 byte	    Bitmask of:
189  *			    WF_HAS_AFF >> 8   word includes affix
190  *			    WF_NEEDCOMP >> 8  word only valid in compound
191  *			    WF_NOSUGGEST >> 8  word not used for suggestions
192  *			    WF_COMPROOT >> 8  word already a compound
193  *			    WF_NOCOMPBEF >> 8 no compounding before this word
194  *			    WF_NOCOMPAFT >> 8 no compounding after this word
195  *
196  * <pflags>	1 byte	    bitmask of:
197  *			    WFP_RARE	rare prefix
198  *			    WFP_NC	non-combining prefix
199  *			    WFP_UP	letter after prefix made upper case
200  *
201  * <region>	1 byte	    Bitmask for regions in which word is valid.  When
202  *			    omitted it's valid in all regions.
203  *			    Lowest bit is for region 1.
204  *
205  * <affixID>	1 byte	    ID of affix that can be used with this word.  In
206  *			    PREFIXTREE used for the required prefix ID.
207  *
208  * <prefcondnr>	2 bytes	    Prefix condition number, index in <prefcond> list
209  *			    from HEADER.
210  *
211  * All text characters are in 'encoding', but stored as single bytes.
212  */
213 
214 /*
215  * Vim .sug file format:  <SUGHEADER>
216  *			  <SUGWORDTREE>
217  *			  <SUGTABLE>
218  *
219  * <SUGHEADER>: <fileID> <versionnr> <timestamp>
220  *
221  * <fileID>     6 bytes     "VIMsug"
222  * <versionnr>  1 byte      VIMSUGVERSION
223  * <timestamp>  8 bytes     timestamp that must match with .spl file
224  *
225  *
226  * <SUGWORDTREE>: <wordtree>  (see above, no flags or region used)
227  *
228  *
229  * <SUGTABLE>: <sugwcount> <sugline> ...
230  *
231  * <sugwcount>	4 bytes	    number of <sugline> following
232  *
233  * <sugline>: <sugnr> ... NUL
234  *
235  * <sugnr>:     X bytes     word number that results in this soundfolded word,
236  *			    stored as an offset to the previous number in as
237  *			    few bytes as possible, see offset2bytes())
238  */
239 
240 #include "vim.h"
241 
242 #if defined(FEAT_SPELL) || defined(PROTO)
243 
244 #ifndef UNIX		// it's in os_unix.h for Unix
245 # include <time.h>	// for time_t
246 #endif
247 
248 #ifndef UNIX		// it's in os_unix.h for Unix
249 # include <time.h>	// for time_t
250 #endif
251 
252 // Special byte values for <byte>.  Some are only used in the tree for
253 // postponed prefixes, some only in the other trees.  This is a bit messy...
254 #define BY_NOFLAGS	0	// end of word without flags or region; for
255 				// postponed prefix: no <pflags>
256 #define BY_INDEX	1	// child is shared, index follows
257 #define BY_FLAGS	2	// end of word, <flags> byte follows; for
258 				// postponed prefix: <pflags> follows
259 #define BY_FLAGS2	3	// end of word, <flags> and <flags2> bytes
260 				// follow; never used in prefix tree
261 #define BY_SPECIAL  BY_FLAGS2	// highest special byte value
262 
263 #define ZERO_FLAG	65009	// used when flag is zero: "0"
264 
265 // Flags used in .spl file for soundsalike flags.
266 #define SAL_F0LLOWUP		1
267 #define SAL_COLLAPSE		2
268 #define SAL_REM_ACCENTS		4
269 
270 #define VIMSPELLMAGIC "VIMspell"  // string at start of Vim spell file
271 #define VIMSPELLMAGICL 8
272 #define VIMSPELLVERSION 50
273 
274 // Section IDs.  Only renumber them when VIMSPELLVERSION changes!
275 #define SN_REGION	0	// <regionname> section
276 #define SN_CHARFLAGS	1	// charflags section
277 #define SN_MIDWORD	2	// <midword> section
278 #define SN_PREFCOND	3	// <prefcond> section
279 #define SN_REP		4	// REP items section
280 #define SN_SAL		5	// SAL items section
281 #define SN_SOFO		6	// soundfolding section
282 #define SN_MAP		7	// MAP items section
283 #define SN_COMPOUND	8	// compound words section
284 #define SN_SYLLABLE	9	// syllable section
285 #define SN_NOBREAK	10	// NOBREAK section
286 #define SN_SUGFILE	11	// timestamp for .sug file
287 #define SN_REPSAL	12	// REPSAL items section
288 #define SN_WORDS	13	// common words
289 #define SN_NOSPLITSUGS	14	// don't split word for suggestions
290 #define SN_INFO		15	// info section
291 #define SN_NOCOMPOUNDSUGS 16	// don't compound for suggestions
292 #define SN_END		255	// end of sections
293 
294 #define SNF_REQUIRED	1	// <sectionflags>: required section
295 
296 #define CF_WORD		0x01
297 #define CF_UPPER	0x02
298 
299 /*
300  * Loop through all the siblings of a node (including the node)
301  */
302 #define FOR_ALL_NODE_SIBLINGS(node, np) \
303     for ((np) = (node); (np) != NULL; (np) = (np)->wn_sibling)
304 
305 static int set_spell_finish(spelltab_T	*new_st);
306 static int write_spell_prefcond(FILE *fd, garray_T *gap);
307 static int read_region_section(FILE *fd, slang_T *slang, int len);
308 static int read_charflags_section(FILE *fd);
309 static int read_prefcond_section(FILE *fd, slang_T *lp);
310 static int read_rep_section(FILE *fd, garray_T *gap, short *first);
311 static int read_sal_section(FILE *fd, slang_T *slang);
312 static int read_words_section(FILE *fd, slang_T *lp, int len);
313 static int read_sofo_section(FILE *fd, slang_T *slang);
314 static int read_compound(FILE *fd, slang_T *slang, int len);
315 static int set_sofo(slang_T *lp, char_u *from, char_u *to);
316 static void set_sal_first(slang_T *lp);
317 static int *mb_str2wide(char_u *s);
318 static int spell_read_tree(FILE *fd, char_u **bytsp, idx_T **idxsp, int prefixtree, int prefixcnt);
319 static idx_T read_tree_node(FILE *fd, char_u *byts, idx_T *idxs, int maxidx, idx_T startidx, int prefixtree, int maxprefcondnr);
320 static void set_spell_charflags(char_u *flags, int cnt, char_u *upp);
321 static int set_spell_chartab(char_u *fol, char_u *low, char_u *upp);
322 static void set_map_str(slang_T *lp, char_u *map);
323 
324 
325 static char *e_spell_trunc = N_("E758: Truncated spell file");
326 static char *e_afftrailing = N_("Trailing text in %s line %d: %s");
327 static char *e_affname = N_("Affix name too long in %s line %d: %s");
328 static char *e_affform = N_("E761: Format error in affix file FOL, LOW or UPP");
329 static char *e_affrange = N_("E762: Character in FOL, LOW or UPP is out of range");
330 static char *msg_compressing = N_("Compressing word tree...");
331 
332 /*
333  * Load one spell file and store the info into a slang_T.
334  *
335  * This is invoked in three ways:
336  * - From spell_load_cb() to load a spell file for the first time.  "lang" is
337  *   the language name, "old_lp" is NULL.  Will allocate an slang_T.
338  * - To reload a spell file that was changed.  "lang" is NULL and "old_lp"
339  *   points to the existing slang_T.
340  * - Just after writing a .spl file; it's read back to produce the .sug file.
341  *   "old_lp" is NULL and "lang" is NULL.  Will allocate an slang_T.
342  *
343  * Returns the slang_T the spell file was loaded into.  NULL for error.
344  */
345     slang_T *
346 spell_load_file(
347     char_u	*fname,
348     char_u	*lang,
349     slang_T	*old_lp,
350     int		silent)		// no error if file doesn't exist
351 {
352     FILE	*fd;
353     char_u	buf[VIMSPELLMAGICL];
354     char_u	*p;
355     int		i;
356     int		n;
357     int		len;
358     slang_T	*lp = NULL;
359     int		c = 0;
360     int		res;
361     int		did_estack_push = FALSE;
362     ESTACK_CHECK_DECLARATION
363 
364     fd = mch_fopen((char *)fname, "r");
365     if (fd == NULL)
366     {
367 	if (!silent)
368 	    semsg(_(e_notopen), fname);
369 	else if (p_verbose > 2)
370 	{
371 	    verbose_enter();
372 	    smsg((const char *)e_notopen, fname);
373 	    verbose_leave();
374 	}
375 	goto endFAIL;
376     }
377     if (p_verbose > 2)
378     {
379 	verbose_enter();
380 	smsg(_("Reading spell file \"%s\""), fname);
381 	verbose_leave();
382     }
383 
384     if (old_lp == NULL)
385     {
386 	lp = slang_alloc(lang);
387 	if (lp == NULL)
388 	    goto endFAIL;
389 
390 	// Remember the file name, used to reload the file when it's updated.
391 	lp->sl_fname = vim_strsave(fname);
392 	if (lp->sl_fname == NULL)
393 	    goto endFAIL;
394 
395 	// Check for .add.spl (_add.spl for VMS).
396 	lp->sl_add = strstr((char *)gettail(fname), SPL_FNAME_ADD) != NULL;
397     }
398     else
399 	lp = old_lp;
400 
401     // Set sourcing_name, so that error messages mention the file name.
402     estack_push(ETYPE_SPELL, fname, 0);
403     ESTACK_CHECK_SETUP
404     did_estack_push = TRUE;
405 
406     /*
407      * <HEADER>: <fileID>
408      */
409     for (i = 0; i < VIMSPELLMAGICL; ++i)
410 	buf[i] = getc(fd);				// <fileID>
411     if (STRNCMP(buf, VIMSPELLMAGIC, VIMSPELLMAGICL) != 0)
412     {
413 	emsg(_("E757: This does not look like a spell file"));
414 	goto endFAIL;
415     }
416     c = getc(fd);					// <versionnr>
417     if (c < VIMSPELLVERSION)
418     {
419 	emsg(_("E771: Old spell file, needs to be updated"));
420 	goto endFAIL;
421     }
422     else if (c > VIMSPELLVERSION)
423     {
424 	emsg(_("E772: Spell file is for newer version of Vim"));
425 	goto endFAIL;
426     }
427 
428 
429     /*
430      * <SECTIONS>: <section> ... <sectionend>
431      * <section>: <sectionID> <sectionflags> <sectionlen> (section contents)
432      */
433     for (;;)
434     {
435 	n = getc(fd);			    // <sectionID> or <sectionend>
436 	if (n == SN_END)
437 	    break;
438 	c = getc(fd);					// <sectionflags>
439 	len = get4c(fd);				// <sectionlen>
440 	if (len < 0)
441 	    goto truncerr;
442 
443 	res = 0;
444 	switch (n)
445 	{
446 	    case SN_INFO:
447 		lp->sl_info = read_string(fd, len);	// <infotext>
448 		if (lp->sl_info == NULL)
449 		    goto endFAIL;
450 		break;
451 
452 	    case SN_REGION:
453 		res = read_region_section(fd, lp, len);
454 		break;
455 
456 	    case SN_CHARFLAGS:
457 		res = read_charflags_section(fd);
458 		break;
459 
460 	    case SN_MIDWORD:
461 		lp->sl_midword = read_string(fd, len);	// <midword>
462 		if (lp->sl_midword == NULL)
463 		    goto endFAIL;
464 		break;
465 
466 	    case SN_PREFCOND:
467 		res = read_prefcond_section(fd, lp);
468 		break;
469 
470 	    case SN_REP:
471 		res = read_rep_section(fd, &lp->sl_rep, lp->sl_rep_first);
472 		break;
473 
474 	    case SN_REPSAL:
475 		res = read_rep_section(fd, &lp->sl_repsal, lp->sl_repsal_first);
476 		break;
477 
478 	    case SN_SAL:
479 		res = read_sal_section(fd, lp);
480 		break;
481 
482 	    case SN_SOFO:
483 		res = read_sofo_section(fd, lp);
484 		break;
485 
486 	    case SN_MAP:
487 		p = read_string(fd, len);		// <mapstr>
488 		if (p == NULL)
489 		    goto endFAIL;
490 		set_map_str(lp, p);
491 		vim_free(p);
492 		break;
493 
494 	    case SN_WORDS:
495 		res = read_words_section(fd, lp, len);
496 		break;
497 
498 	    case SN_SUGFILE:
499 		lp->sl_sugtime = get8ctime(fd);		// <timestamp>
500 		break;
501 
502 	    case SN_NOSPLITSUGS:
503 		lp->sl_nosplitsugs = TRUE;
504 		break;
505 
506 	    case SN_NOCOMPOUNDSUGS:
507 		lp->sl_nocompoundsugs = TRUE;
508 		break;
509 
510 	    case SN_COMPOUND:
511 		res = read_compound(fd, lp, len);
512 		break;
513 
514 	    case SN_NOBREAK:
515 		lp->sl_nobreak = TRUE;
516 		break;
517 
518 	    case SN_SYLLABLE:
519 		lp->sl_syllable = read_string(fd, len);	// <syllable>
520 		if (lp->sl_syllable == NULL)
521 		    goto endFAIL;
522 		if (init_syl_tab(lp) == FAIL)
523 		    goto endFAIL;
524 		break;
525 
526 	    default:
527 		// Unsupported section.  When it's required give an error
528 		// message.  When it's not required skip the contents.
529 		if (c & SNF_REQUIRED)
530 		{
531 		    emsg(_("E770: Unsupported section in spell file"));
532 		    goto endFAIL;
533 		}
534 		while (--len >= 0)
535 		    if (getc(fd) < 0)
536 			goto truncerr;
537 		break;
538 	}
539 someerror:
540 	if (res == SP_FORMERROR)
541 	{
542 	    emsg(_(e_format));
543 	    goto endFAIL;
544 	}
545 	if (res == SP_TRUNCERROR)
546 	{
547 truncerr:
548 	    emsg(_(e_spell_trunc));
549 	    goto endFAIL;
550 	}
551 	if (res == SP_OTHERERROR)
552 	    goto endFAIL;
553     }
554 
555     // <LWORDTREE>
556     res = spell_read_tree(fd, &lp->sl_fbyts, &lp->sl_fidxs, FALSE, 0);
557     if (res != 0)
558 	goto someerror;
559 
560     // <KWORDTREE>
561     res = spell_read_tree(fd, &lp->sl_kbyts, &lp->sl_kidxs, FALSE, 0);
562     if (res != 0)
563 	goto someerror;
564 
565     // <PREFIXTREE>
566     res = spell_read_tree(fd, &lp->sl_pbyts, &lp->sl_pidxs, TRUE,
567 							    lp->sl_prefixcnt);
568     if (res != 0)
569 	goto someerror;
570 
571     // For a new file link it in the list of spell files.
572     if (old_lp == NULL && lang != NULL)
573     {
574 	lp->sl_next = first_lang;
575 	first_lang = lp;
576     }
577 
578     goto endOK;
579 
580 endFAIL:
581     if (lang != NULL)
582 	// truncating the name signals the error to spell_load_lang()
583 	*lang = NUL;
584     if (lp != NULL && old_lp == NULL)
585 	slang_free(lp);
586     lp = NULL;
587 
588 endOK:
589     if (fd != NULL)
590 	fclose(fd);
591     if (did_estack_push)
592     {
593 	ESTACK_CHECK_NOW
594 	estack_pop();
595     }
596 
597     return lp;
598 }
599 
600 /*
601  * Fill in the wordcount fields for a trie.
602  * Returns the total number of words.
603  */
604     static void
605 tree_count_words(char_u *byts, idx_T *idxs)
606 {
607     int		depth;
608     idx_T	arridx[MAXWLEN];
609     int		curi[MAXWLEN];
610     int		c;
611     idx_T	n;
612     int		wordcount[MAXWLEN];
613 
614     arridx[0] = 0;
615     curi[0] = 1;
616     wordcount[0] = 0;
617     depth = 0;
618     while (depth >= 0 && !got_int)
619     {
620 	if (curi[depth] > byts[arridx[depth]])
621 	{
622 	    // Done all bytes at this node, go up one level.
623 	    idxs[arridx[depth]] = wordcount[depth];
624 	    if (depth > 0)
625 		wordcount[depth - 1] += wordcount[depth];
626 
627 	    --depth;
628 	    fast_breakcheck();
629 	}
630 	else
631 	{
632 	    // Do one more byte at this node.
633 	    n = arridx[depth] + curi[depth];
634 	    ++curi[depth];
635 
636 	    c = byts[n];
637 	    if (c == 0)
638 	    {
639 		// End of word, count it.
640 		++wordcount[depth];
641 
642 		// Skip over any other NUL bytes (same word with different
643 		// flags).
644 		while (byts[n + 1] == 0)
645 		{
646 		    ++n;
647 		    ++curi[depth];
648 		}
649 	    }
650 	    else
651 	    {
652 		// Normal char, go one level deeper to count the words.
653 		++depth;
654 		arridx[depth] = idxs[n];
655 		curi[depth] = 1;
656 		wordcount[depth] = 0;
657 	    }
658 	}
659     }
660 }
661 
662 /*
663  * Load the .sug files for languages that have one and weren't loaded yet.
664  */
665     void
666 suggest_load_files(void)
667 {
668     langp_T	*lp;
669     int		lpi;
670     slang_T	*slang;
671     char_u	*dotp;
672     FILE	*fd;
673     char_u	buf[MAXWLEN];
674     int		i;
675     time_t	timestamp;
676     int		wcount;
677     int		wordnr;
678     garray_T	ga;
679     int		c;
680 
681     // Do this for all languages that support sound folding.
682     for (lpi = 0; lpi < curwin->w_s->b_langp.ga_len; ++lpi)
683     {
684 	lp = LANGP_ENTRY(curwin->w_s->b_langp, lpi);
685 	slang = lp->lp_slang;
686 	if (slang->sl_sugtime != 0 && !slang->sl_sugloaded)
687 	{
688 	    // Change ".spl" to ".sug" and open the file.  When the file isn't
689 	    // found silently skip it.  Do set "sl_sugloaded" so that we
690 	    // don't try again and again.
691 	    slang->sl_sugloaded = TRUE;
692 
693 	    dotp = vim_strrchr(slang->sl_fname, '.');
694 	    if (dotp == NULL || fnamecmp(dotp, ".spl") != 0)
695 		continue;
696 	    STRCPY(dotp, ".sug");
697 	    fd = mch_fopen((char *)slang->sl_fname, "r");
698 	    if (fd == NULL)
699 		goto nextone;
700 
701 	    /*
702 	     * <SUGHEADER>: <fileID> <versionnr> <timestamp>
703 	     */
704 	    for (i = 0; i < VIMSUGMAGICL; ++i)
705 		buf[i] = getc(fd);			// <fileID>
706 	    if (STRNCMP(buf, VIMSUGMAGIC, VIMSUGMAGICL) != 0)
707 	    {
708 		semsg(_("E778: This does not look like a .sug file: %s"),
709 							     slang->sl_fname);
710 		goto nextone;
711 	    }
712 	    c = getc(fd);				// <versionnr>
713 	    if (c < VIMSUGVERSION)
714 	    {
715 		semsg(_("E779: Old .sug file, needs to be updated: %s"),
716 							     slang->sl_fname);
717 		goto nextone;
718 	    }
719 	    else if (c > VIMSUGVERSION)
720 	    {
721 		semsg(_("E780: .sug file is for newer version of Vim: %s"),
722 							     slang->sl_fname);
723 		goto nextone;
724 	    }
725 
726 	    // Check the timestamp, it must be exactly the same as the one in
727 	    // the .spl file.  Otherwise the word numbers won't match.
728 	    timestamp = get8ctime(fd);			// <timestamp>
729 	    if (timestamp != slang->sl_sugtime)
730 	    {
731 		semsg(_("E781: .sug file doesn't match .spl file: %s"),
732 							     slang->sl_fname);
733 		goto nextone;
734 	    }
735 
736 	    /*
737 	     * <SUGWORDTREE>: <wordtree>
738 	     * Read the trie with the soundfolded words.
739 	     */
740 	    if (spell_read_tree(fd, &slang->sl_sbyts, &slang->sl_sidxs,
741 							       FALSE, 0) != 0)
742 	    {
743 someerror:
744 		semsg(_("E782: error while reading .sug file: %s"),
745 							     slang->sl_fname);
746 		slang_clear_sug(slang);
747 		goto nextone;
748 	    }
749 
750 	    /*
751 	     * <SUGTABLE>: <sugwcount> <sugline> ...
752 	     *
753 	     * Read the table with word numbers.  We use a file buffer for
754 	     * this, because it's so much like a file with lines.  Makes it
755 	     * possible to swap the info and save on memory use.
756 	     */
757 	    slang->sl_sugbuf = open_spellbuf();
758 	    if (slang->sl_sugbuf == NULL)
759 		goto someerror;
760 							    // <sugwcount>
761 	    wcount = get4c(fd);
762 	    if (wcount < 0)
763 		goto someerror;
764 
765 	    // Read all the wordnr lists into the buffer, one NUL terminated
766 	    // list per line.
767 	    ga_init2(&ga, 1, 100);
768 	    for (wordnr = 0; wordnr < wcount; ++wordnr)
769 	    {
770 		ga.ga_len = 0;
771 		for (;;)
772 		{
773 		    c = getc(fd);			    // <sugline>
774 		    if (c < 0 || ga_grow(&ga, 1) == FAIL)
775 			goto someerror;
776 		    ((char_u *)ga.ga_data)[ga.ga_len++] = c;
777 		    if (c == NUL)
778 			break;
779 		}
780 		if (ml_append_buf(slang->sl_sugbuf, (linenr_T)wordnr,
781 					 ga.ga_data, ga.ga_len, TRUE) == FAIL)
782 		    goto someerror;
783 	    }
784 	    ga_clear(&ga);
785 
786 	    /*
787 	     * Need to put word counts in the word tries, so that we can find
788 	     * a word by its number.
789 	     */
790 	    tree_count_words(slang->sl_fbyts, slang->sl_fidxs);
791 	    tree_count_words(slang->sl_sbyts, slang->sl_sidxs);
792 
793 nextone:
794 	    if (fd != NULL)
795 		fclose(fd);
796 	    STRCPY(dotp, ".spl");
797 	}
798     }
799 }
800 
801 
802 /*
803  * Read a length field from "fd" in "cnt_bytes" bytes.
804  * Allocate memory, read the string into it and add a NUL at the end.
805  * Returns NULL when the count is zero.
806  * Sets "*cntp" to SP_*ERROR when there is an error, length of the result
807  * otherwise.
808  */
809     static char_u *
810 read_cnt_string(FILE *fd, int cnt_bytes, int *cntp)
811 {
812     int		cnt = 0;
813     int		i;
814     char_u	*str;
815 
816     // read the length bytes, MSB first
817     for (i = 0; i < cnt_bytes; ++i)
818 	cnt = (cnt << 8) + getc(fd);
819     if (cnt < 0)
820     {
821 	*cntp = SP_TRUNCERROR;
822 	return NULL;
823     }
824     *cntp = cnt;
825     if (cnt == 0)
826 	return NULL;	    // nothing to read, return NULL
827 
828     str = read_string(fd, cnt);
829     if (str == NULL)
830 	*cntp = SP_OTHERERROR;
831     return str;
832 }
833 
834 /*
835  * Read SN_REGION: <regionname> ...
836  * Return SP_*ERROR flags.
837  */
838     static int
839 read_region_section(FILE *fd, slang_T *lp, int len)
840 {
841     int		i;
842 
843     if (len > MAXREGIONS * 2)
844 	return SP_FORMERROR;
845     for (i = 0; i < len; ++i)
846 	lp->sl_regions[i] = getc(fd);			// <regionname>
847     lp->sl_regions[len] = NUL;
848     return 0;
849 }
850 
851 /*
852  * Read SN_CHARFLAGS section: <charflagslen> <charflags>
853  *				<folcharslen> <folchars>
854  * Return SP_*ERROR flags.
855  */
856     static int
857 read_charflags_section(FILE *fd)
858 {
859     char_u	*flags;
860     char_u	*fol;
861     int		flagslen, follen;
862 
863     // <charflagslen> <charflags>
864     flags = read_cnt_string(fd, 1, &flagslen);
865     if (flagslen < 0)
866 	return flagslen;
867 
868     // <folcharslen> <folchars>
869     fol = read_cnt_string(fd, 2, &follen);
870     if (follen < 0)
871     {
872 	vim_free(flags);
873 	return follen;
874     }
875 
876     // Set the word-char flags and fill SPELL_ISUPPER() table.
877     if (flags != NULL && fol != NULL)
878 	set_spell_charflags(flags, flagslen, fol);
879 
880     vim_free(flags);
881     vim_free(fol);
882 
883     // When <charflagslen> is zero then <fcharlen> must also be zero.
884     if ((flags == NULL) != (fol == NULL))
885 	return SP_FORMERROR;
886     return 0;
887 }
888 
889 /*
890  * Read SN_PREFCOND section.
891  * Return SP_*ERROR flags.
892  */
893     static int
894 read_prefcond_section(FILE *fd, slang_T *lp)
895 {
896     int		cnt;
897     int		i;
898     int		n;
899     char_u	*p;
900     char_u	buf[MAXWLEN + 1];
901 
902     // <prefcondcnt> <prefcond> ...
903     cnt = get2c(fd);					// <prefcondcnt>
904     if (cnt <= 0)
905 	return SP_FORMERROR;
906 
907     lp->sl_prefprog = ALLOC_CLEAR_MULT(regprog_T *, cnt);
908     if (lp->sl_prefprog == NULL)
909 	return SP_OTHERERROR;
910     lp->sl_prefixcnt = cnt;
911 
912     for (i = 0; i < cnt; ++i)
913     {
914 	// <prefcond> : <condlen> <condstr>
915 	n = getc(fd);					// <condlen>
916 	if (n < 0 || n >= MAXWLEN)
917 	    return SP_FORMERROR;
918 
919 	// When <condlen> is zero we have an empty condition.  Otherwise
920 	// compile the regexp program used to check for the condition.
921 	if (n > 0)
922 	{
923 	    buf[0] = '^';	    // always match at one position only
924 	    p = buf + 1;
925 	    while (n-- > 0)
926 		*p++ = getc(fd);			// <condstr>
927 	    *p = NUL;
928 	    lp->sl_prefprog[i] = vim_regcomp(buf, RE_MAGIC + RE_STRING);
929 	}
930     }
931     return 0;
932 }
933 
934 /*
935  * Read REP or REPSAL items section from "fd": <repcount> <rep> ...
936  * Return SP_*ERROR flags.
937  */
938     static int
939 read_rep_section(FILE *fd, garray_T *gap, short *first)
940 {
941     int		cnt;
942     fromto_T	*ftp;
943     int		i;
944 
945     cnt = get2c(fd);					// <repcount>
946     if (cnt < 0)
947 	return SP_TRUNCERROR;
948 
949     if (ga_grow(gap, cnt) == FAIL)
950 	return SP_OTHERERROR;
951 
952     // <rep> : <repfromlen> <repfrom> <reptolen> <repto>
953     for (; gap->ga_len < cnt; ++gap->ga_len)
954     {
955 	ftp = &((fromto_T *)gap->ga_data)[gap->ga_len];
956 	ftp->ft_from = read_cnt_string(fd, 1, &i);
957 	if (i < 0)
958 	    return i;
959 	if (i == 0)
960 	    return SP_FORMERROR;
961 	ftp->ft_to = read_cnt_string(fd, 1, &i);
962 	if (i <= 0)
963 	{
964 	    vim_free(ftp->ft_from);
965 	    if (i < 0)
966 		return i;
967 	    return SP_FORMERROR;
968 	}
969     }
970 
971     // Fill the first-index table.
972     for (i = 0; i < 256; ++i)
973 	first[i] = -1;
974     for (i = 0; i < gap->ga_len; ++i)
975     {
976 	ftp = &((fromto_T *)gap->ga_data)[i];
977 	if (first[*ftp->ft_from] == -1)
978 	    first[*ftp->ft_from] = i;
979     }
980     return 0;
981 }
982 
983 /*
984  * Read SN_SAL section: <salflags> <salcount> <sal> ...
985  * Return SP_*ERROR flags.
986  */
987     static int
988 read_sal_section(FILE *fd, slang_T *slang)
989 {
990     int		i;
991     int		cnt;
992     garray_T	*gap;
993     salitem_T	*smp;
994     int		ccnt;
995     char_u	*p;
996 
997     slang->sl_sofo = FALSE;
998 
999     i = getc(fd);				// <salflags>
1000     if (i & SAL_F0LLOWUP)
1001 	slang->sl_followup = TRUE;
1002     if (i & SAL_COLLAPSE)
1003 	slang->sl_collapse = TRUE;
1004     if (i & SAL_REM_ACCENTS)
1005 	slang->sl_rem_accents = TRUE;
1006 
1007     cnt = get2c(fd);				// <salcount>
1008     if (cnt < 0)
1009 	return SP_TRUNCERROR;
1010 
1011     gap = &slang->sl_sal;
1012     ga_init2(gap, sizeof(salitem_T), 10);
1013     if (ga_grow(gap, cnt + 1) == FAIL)
1014 	return SP_OTHERERROR;
1015 
1016     // <sal> : <salfromlen> <salfrom> <saltolen> <salto>
1017     for (; gap->ga_len < cnt; ++gap->ga_len)
1018     {
1019 	int	c = NUL;
1020 
1021 	smp = &((salitem_T *)gap->ga_data)[gap->ga_len];
1022 	ccnt = getc(fd);			// <salfromlen>
1023 	if (ccnt < 0)
1024 	    return SP_TRUNCERROR;
1025 	if ((p = alloc(ccnt + 2)) == NULL)
1026 	    return SP_OTHERERROR;
1027 	smp->sm_lead = p;
1028 
1029 	// Read up to the first special char into sm_lead.
1030 	for (i = 0; i < ccnt; ++i)
1031 	{
1032 	    c = getc(fd);			// <salfrom>
1033 	    if (vim_strchr((char_u *)"0123456789(-<^$", c) != NULL)
1034 		break;
1035 	    *p++ = c;
1036 	}
1037 	smp->sm_leadlen = (int)(p - smp->sm_lead);
1038 	*p++ = NUL;
1039 
1040 	// Put (abc) chars in sm_oneof, if any.
1041 	if (c == '(')
1042 	{
1043 	    smp->sm_oneof = p;
1044 	    for (++i; i < ccnt; ++i)
1045 	    {
1046 		c = getc(fd);			// <salfrom>
1047 		if (c == ')')
1048 		    break;
1049 		*p++ = c;
1050 	    }
1051 	    *p++ = NUL;
1052 	    if (++i < ccnt)
1053 		c = getc(fd);
1054 	}
1055 	else
1056 	    smp->sm_oneof = NULL;
1057 
1058 	// Any following chars go in sm_rules.
1059 	smp->sm_rules = p;
1060 	if (i < ccnt)
1061 	    // store the char we got while checking for end of sm_lead
1062 	    *p++ = c;
1063 	for (++i; i < ccnt; ++i)
1064 	    *p++ = getc(fd);			// <salfrom>
1065 	*p++ = NUL;
1066 
1067 	// <saltolen> <salto>
1068 	smp->sm_to = read_cnt_string(fd, 1, &ccnt);
1069 	if (ccnt < 0)
1070 	{
1071 	    vim_free(smp->sm_lead);
1072 	    return ccnt;
1073 	}
1074 
1075 	if (has_mbyte)
1076 	{
1077 	    // convert the multi-byte strings to wide char strings
1078 	    smp->sm_lead_w = mb_str2wide(smp->sm_lead);
1079 	    smp->sm_leadlen = mb_charlen(smp->sm_lead);
1080 	    if (smp->sm_oneof == NULL)
1081 		smp->sm_oneof_w = NULL;
1082 	    else
1083 		smp->sm_oneof_w = mb_str2wide(smp->sm_oneof);
1084 	    if (smp->sm_to == NULL)
1085 		smp->sm_to_w = NULL;
1086 	    else
1087 		smp->sm_to_w = mb_str2wide(smp->sm_to);
1088 	    if (smp->sm_lead_w == NULL
1089 		    || (smp->sm_oneof_w == NULL && smp->sm_oneof != NULL)
1090 		    || (smp->sm_to_w == NULL && smp->sm_to != NULL))
1091 	    {
1092 		vim_free(smp->sm_lead);
1093 		vim_free(smp->sm_to);
1094 		vim_free(smp->sm_lead_w);
1095 		vim_free(smp->sm_oneof_w);
1096 		vim_free(smp->sm_to_w);
1097 		return SP_OTHERERROR;
1098 	    }
1099 	}
1100     }
1101 
1102     if (gap->ga_len > 0)
1103     {
1104 	// Add one extra entry to mark the end with an empty sm_lead.  Avoids
1105 	// that we need to check the index every time.
1106 	smp = &((salitem_T *)gap->ga_data)[gap->ga_len];
1107 	if ((p = alloc(1)) == NULL)
1108 	    return SP_OTHERERROR;
1109 	p[0] = NUL;
1110 	smp->sm_lead = p;
1111 	smp->sm_leadlen = 0;
1112 	smp->sm_oneof = NULL;
1113 	smp->sm_rules = p;
1114 	smp->sm_to = NULL;
1115 	if (has_mbyte)
1116 	{
1117 	    smp->sm_lead_w = mb_str2wide(smp->sm_lead);
1118 	    smp->sm_leadlen = 0;
1119 	    smp->sm_oneof_w = NULL;
1120 	    smp->sm_to_w = NULL;
1121 	}
1122 	++gap->ga_len;
1123     }
1124 
1125     // Fill the first-index table.
1126     set_sal_first(slang);
1127 
1128     return 0;
1129 }
1130 
1131 /*
1132  * Read SN_WORDS: <word> ...
1133  * Return SP_*ERROR flags.
1134  */
1135     static int
1136 read_words_section(FILE *fd, slang_T *lp, int len)
1137 {
1138     int		done = 0;
1139     int		i;
1140     int		c;
1141     char_u	word[MAXWLEN];
1142 
1143     while (done < len)
1144     {
1145 	// Read one word at a time.
1146 	for (i = 0; ; ++i)
1147 	{
1148 	    c = getc(fd);
1149 	    if (c == EOF)
1150 		return SP_TRUNCERROR;
1151 	    word[i] = c;
1152 	    if (word[i] == NUL)
1153 		break;
1154 	    if (i == MAXWLEN - 1)
1155 		return SP_FORMERROR;
1156 	}
1157 
1158 	// Init the count to 10.
1159 	count_common_word(lp, word, -1, 10);
1160 	done += i + 1;
1161     }
1162     return 0;
1163 }
1164 
1165 /*
1166  * SN_SOFO: <sofofromlen> <sofofrom> <sofotolen> <sofoto>
1167  * Return SP_*ERROR flags.
1168  */
1169     static int
1170 read_sofo_section(FILE *fd, slang_T *slang)
1171 {
1172     int		cnt;
1173     char_u	*from, *to;
1174     int		res;
1175 
1176     slang->sl_sofo = TRUE;
1177 
1178     // <sofofromlen> <sofofrom>
1179     from = read_cnt_string(fd, 2, &cnt);
1180     if (cnt < 0)
1181 	return cnt;
1182 
1183     // <sofotolen> <sofoto>
1184     to = read_cnt_string(fd, 2, &cnt);
1185     if (cnt < 0)
1186     {
1187 	vim_free(from);
1188 	return cnt;
1189     }
1190 
1191     // Store the info in slang->sl_sal and/or slang->sl_sal_first.
1192     if (from != NULL && to != NULL)
1193 	res = set_sofo(slang, from, to);
1194     else if (from != NULL || to != NULL)
1195 	res = SP_FORMERROR;    // only one of two strings is an error
1196     else
1197 	res = 0;
1198 
1199     vim_free(from);
1200     vim_free(to);
1201     return res;
1202 }
1203 
1204 /*
1205  * Read the compound section from the .spl file:
1206  *	<compmax> <compminlen> <compsylmax> <compoptions> <compflags>
1207  * Returns SP_*ERROR flags.
1208  */
1209     static int
1210 read_compound(FILE *fd, slang_T *slang, int len)
1211 {
1212     int		todo = len;
1213     int		c;
1214     int		atstart;
1215     char_u	*pat;
1216     char_u	*pp;
1217     char_u	*cp;
1218     char_u	*ap;
1219     char_u	*crp;
1220     int		cnt;
1221     garray_T	*gap;
1222 
1223     if (todo < 2)
1224 	return SP_FORMERROR;	// need at least two bytes
1225 
1226     --todo;
1227     c = getc(fd);					// <compmax>
1228     if (c < 2)
1229 	c = MAXWLEN;
1230     slang->sl_compmax = c;
1231 
1232     --todo;
1233     c = getc(fd);					// <compminlen>
1234     if (c < 1)
1235 	c = 0;
1236     slang->sl_compminlen = c;
1237 
1238     --todo;
1239     c = getc(fd);					// <compsylmax>
1240     if (c < 1)
1241 	c = MAXWLEN;
1242     slang->sl_compsylmax = c;
1243 
1244     c = getc(fd);					// <compoptions>
1245     if (c != 0)
1246 	ungetc(c, fd);	    // be backwards compatible with Vim 7.0b
1247     else
1248     {
1249 	--todo;
1250 	c = getc(fd);	    // only use the lower byte for now
1251 	--todo;
1252 	slang->sl_compoptions = c;
1253 
1254 	gap = &slang->sl_comppat;
1255 	c = get2c(fd);					// <comppatcount>
1256 	todo -= 2;
1257 	ga_init2(gap, sizeof(char_u *), c);
1258 	if (ga_grow(gap, c) == OK)
1259 	    while (--c >= 0)
1260 	    {
1261 		((char_u **)(gap->ga_data))[gap->ga_len++] =
1262 						 read_cnt_string(fd, 1, &cnt);
1263 					    // <comppatlen> <comppattext>
1264 		if (cnt < 0)
1265 		    return cnt;
1266 		todo -= cnt + 1;
1267 	    }
1268     }
1269     if (todo < 0)
1270 	return SP_FORMERROR;
1271 
1272     // Turn the COMPOUNDRULE items into a regexp pattern:
1273     // "a[bc]/a*b+" -> "^\(a[bc]\|a*b\+\)$".
1274     // Inserting backslashes may double the length, "^\(\)$<Nul>" is 7 bytes.
1275     // Conversion to utf-8 may double the size.
1276     c = todo * 2 + 7;
1277     if (enc_utf8)
1278 	c += todo * 2;
1279     pat = alloc(c);
1280     if (pat == NULL)
1281 	return SP_OTHERERROR;
1282 
1283     // We also need a list of all flags that can appear at the start and one
1284     // for all flags.
1285     cp = alloc(todo + 1);
1286     if (cp == NULL)
1287     {
1288 	vim_free(pat);
1289 	return SP_OTHERERROR;
1290     }
1291     slang->sl_compstartflags = cp;
1292     *cp = NUL;
1293 
1294     ap = alloc(todo + 1);
1295     if (ap == NULL)
1296     {
1297 	vim_free(pat);
1298 	return SP_OTHERERROR;
1299     }
1300     slang->sl_compallflags = ap;
1301     *ap = NUL;
1302 
1303     // And a list of all patterns in their original form, for checking whether
1304     // compounding may work in match_compoundrule().  This is freed when we
1305     // encounter a wildcard, the check doesn't work then.
1306     crp = alloc(todo + 1);
1307     slang->sl_comprules = crp;
1308 
1309     pp = pat;
1310     *pp++ = '^';
1311     *pp++ = '\\';
1312     *pp++ = '(';
1313 
1314     atstart = 1;
1315     while (todo-- > 0)
1316     {
1317 	c = getc(fd);					// <compflags>
1318 	if (c == EOF)
1319 	{
1320 	    vim_free(pat);
1321 	    return SP_TRUNCERROR;
1322 	}
1323 
1324 	// Add all flags to "sl_compallflags".
1325 	if (vim_strchr((char_u *)"?*+[]/", c) == NULL
1326 		&& !byte_in_str(slang->sl_compallflags, c))
1327 	{
1328 	    *ap++ = c;
1329 	    *ap = NUL;
1330 	}
1331 
1332 	if (atstart != 0)
1333 	{
1334 	    // At start of item: copy flags to "sl_compstartflags".  For a
1335 	    // [abc] item set "atstart" to 2 and copy up to the ']'.
1336 	    if (c == '[')
1337 		atstart = 2;
1338 	    else if (c == ']')
1339 		atstart = 0;
1340 	    else
1341 	    {
1342 		if (!byte_in_str(slang->sl_compstartflags, c))
1343 		{
1344 		    *cp++ = c;
1345 		    *cp = NUL;
1346 		}
1347 		if (atstart == 1)
1348 		    atstart = 0;
1349 	    }
1350 	}
1351 
1352 	// Copy flag to "sl_comprules", unless we run into a wildcard.
1353 	if (crp != NULL)
1354 	{
1355 	    if (c == '?' || c == '+' || c == '*')
1356 	    {
1357 		VIM_CLEAR(slang->sl_comprules);
1358 		crp = NULL;
1359 	    }
1360 	    else
1361 		*crp++ = c;
1362 	}
1363 
1364 	if (c == '/')	    // slash separates two items
1365 	{
1366 	    *pp++ = '\\';
1367 	    *pp++ = '|';
1368 	    atstart = 1;
1369 	}
1370 	else		    // normal char, "[abc]" and '*' are copied as-is
1371 	{
1372 	    if (c == '?' || c == '+' || c == '~')
1373 		*pp++ = '\\';	    // "a?" becomes "a\?", "a+" becomes "a\+"
1374 	    if (enc_utf8)
1375 		pp += mb_char2bytes(c, pp);
1376 	    else
1377 		*pp++ = c;
1378 	}
1379     }
1380 
1381     *pp++ = '\\';
1382     *pp++ = ')';
1383     *pp++ = '$';
1384     *pp = NUL;
1385 
1386     if (crp != NULL)
1387 	*crp = NUL;
1388 
1389     slang->sl_compprog = vim_regcomp(pat, RE_MAGIC + RE_STRING + RE_STRICT);
1390     vim_free(pat);
1391     if (slang->sl_compprog == NULL)
1392 	return SP_FORMERROR;
1393 
1394     return 0;
1395 }
1396 
1397 /*
1398  * Set the SOFOFROM and SOFOTO items in language "lp".
1399  * Returns SP_*ERROR flags when there is something wrong.
1400  */
1401     static int
1402 set_sofo(slang_T *lp, char_u *from, char_u *to)
1403 {
1404     int		i;
1405 
1406     garray_T	*gap;
1407     char_u	*s;
1408     char_u	*p;
1409     int		c;
1410     int		*inp;
1411 
1412     if (has_mbyte)
1413     {
1414 	// Use "sl_sal" as an array with 256 pointers to a list of wide
1415 	// characters.  The index is the low byte of the character.
1416 	// The list contains from-to pairs with a terminating NUL.
1417 	// sl_sal_first[] is used for latin1 "from" characters.
1418 	gap = &lp->sl_sal;
1419 	ga_init2(gap, sizeof(int *), 1);
1420 	if (ga_grow(gap, 256) == FAIL)
1421 	    return SP_OTHERERROR;
1422 	vim_memset(gap->ga_data, 0, sizeof(int *) * 256);
1423 	gap->ga_len = 256;
1424 
1425 	// First count the number of items for each list.  Temporarily use
1426 	// sl_sal_first[] for this.
1427 	for (p = from, s = to; *p != NUL && *s != NUL; )
1428 	{
1429 	    c = mb_cptr2char_adv(&p);
1430 	    MB_CPTR_ADV(s);
1431 	    if (c >= 256)
1432 		++lp->sl_sal_first[c & 0xff];
1433 	}
1434 	if (*p != NUL || *s != NUL)	    // lengths differ
1435 	    return SP_FORMERROR;
1436 
1437 	// Allocate the lists.
1438 	for (i = 0; i < 256; ++i)
1439 	    if (lp->sl_sal_first[i] > 0)
1440 	    {
1441 		p = alloc(sizeof(int) * (lp->sl_sal_first[i] * 2 + 1));
1442 		if (p == NULL)
1443 		    return SP_OTHERERROR;
1444 		((int **)gap->ga_data)[i] = (int *)p;
1445 		*(int *)p = 0;
1446 	    }
1447 
1448 	// Put the characters up to 255 in sl_sal_first[] the rest in a sl_sal
1449 	// list.
1450 	vim_memset(lp->sl_sal_first, 0, sizeof(salfirst_T) * 256);
1451 	for (p = from, s = to; *p != NUL && *s != NUL; )
1452 	{
1453 	    c = mb_cptr2char_adv(&p);
1454 	    i = mb_cptr2char_adv(&s);
1455 	    if (c >= 256)
1456 	    {
1457 		// Append the from-to chars at the end of the list with
1458 		// the low byte.
1459 		inp = ((int **)gap->ga_data)[c & 0xff];
1460 		while (*inp != 0)
1461 		    ++inp;
1462 		*inp++ = c;		// from char
1463 		*inp++ = i;		// to char
1464 		*inp++ = NUL;		// NUL at the end
1465 	    }
1466 	    else
1467 		// mapping byte to char is done in sl_sal_first[]
1468 		lp->sl_sal_first[c] = i;
1469 	}
1470     }
1471     else
1472     {
1473 	// mapping bytes to bytes is done in sl_sal_first[]
1474 	if (STRLEN(from) != STRLEN(to))
1475 	    return SP_FORMERROR;
1476 
1477 	for (i = 0; to[i] != NUL; ++i)
1478 	    lp->sl_sal_first[from[i]] = to[i];
1479 	lp->sl_sal.ga_len = 1;		// indicates we have soundfolding
1480     }
1481 
1482     return 0;
1483 }
1484 
1485 /*
1486  * Fill the first-index table for "lp".
1487  */
1488     static void
1489 set_sal_first(slang_T *lp)
1490 {
1491     salfirst_T	*sfirst;
1492     int		i;
1493     salitem_T	*smp;
1494     int		c;
1495     garray_T	*gap = &lp->sl_sal;
1496 
1497     sfirst = lp->sl_sal_first;
1498     for (i = 0; i < 256; ++i)
1499 	sfirst[i] = -1;
1500     smp = (salitem_T *)gap->ga_data;
1501     for (i = 0; i < gap->ga_len; ++i)
1502     {
1503 	if (has_mbyte)
1504 	    // Use the lowest byte of the first character.  For latin1 it's
1505 	    // the character, for other encodings it should differ for most
1506 	    // characters.
1507 	    c = *smp[i].sm_lead_w & 0xff;
1508 	else
1509 	    c = *smp[i].sm_lead;
1510 	if (sfirst[c] == -1)
1511 	{
1512 	    sfirst[c] = i;
1513 	    if (has_mbyte)
1514 	    {
1515 		int		n;
1516 
1517 		// Make sure all entries with this byte are following each
1518 		// other.  Move the ones that are in the wrong position.  Do
1519 		// keep the same ordering!
1520 		while (i + 1 < gap->ga_len
1521 				       && (*smp[i + 1].sm_lead_w & 0xff) == c)
1522 		    // Skip over entry with same index byte.
1523 		    ++i;
1524 
1525 		for (n = 1; i + n < gap->ga_len; ++n)
1526 		    if ((*smp[i + n].sm_lead_w & 0xff) == c)
1527 		    {
1528 			salitem_T  tsal;
1529 
1530 			// Move entry with same index byte after the entries
1531 			// we already found.
1532 			++i;
1533 			--n;
1534 			tsal = smp[i + n];
1535 			mch_memmove(smp + i + 1, smp + i,
1536 						       sizeof(salitem_T) * n);
1537 			smp[i] = tsal;
1538 		    }
1539 	    }
1540 	}
1541     }
1542 }
1543 
1544 /*
1545  * Turn a multi-byte string into a wide character string.
1546  * Return it in allocated memory (NULL for out-of-memory)
1547  */
1548     static int *
1549 mb_str2wide(char_u *s)
1550 {
1551     int		*res;
1552     char_u	*p;
1553     int		i = 0;
1554 
1555     res = ALLOC_MULT(int, mb_charlen(s) + 1);
1556     if (res != NULL)
1557     {
1558 	for (p = s; *p != NUL; )
1559 	    res[i++] = mb_ptr2char_adv(&p);
1560 	res[i] = NUL;
1561     }
1562     return res;
1563 }
1564 
1565 /*
1566  * Read a tree from the .spl or .sug file.
1567  * Allocates the memory and stores pointers in "bytsp" and "idxsp".
1568  * This is skipped when the tree has zero length.
1569  * Returns zero when OK, SP_ value for an error.
1570  */
1571     static int
1572 spell_read_tree(
1573     FILE	*fd,
1574     char_u	**bytsp,
1575     idx_T	**idxsp,
1576     int		prefixtree,	// TRUE for the prefix tree
1577     int		prefixcnt)	// when "prefixtree" is TRUE: prefix count
1578 {
1579     long	len;
1580     int		idx;
1581     char_u	*bp;
1582     idx_T	*ip;
1583 
1584     // The tree size was computed when writing the file, so that we can
1585     // allocate it as one long block. <nodecount>
1586     len = get4c(fd);
1587     if (len < 0)
1588 	return SP_TRUNCERROR;
1589     if (len >= LONG_MAX / (long)sizeof(int))
1590 	// Invalid length, multiply with sizeof(int) would overflow.
1591 	return SP_FORMERROR;
1592     if (len > 0)
1593     {
1594 	// Allocate the byte array.
1595 	bp = alloc(len);
1596 	if (bp == NULL)
1597 	    return SP_OTHERERROR;
1598 	*bytsp = bp;
1599 
1600 	// Allocate the index array.
1601 	ip = lalloc_clear(len * sizeof(int), TRUE);
1602 	if (ip == NULL)
1603 	    return SP_OTHERERROR;
1604 	*idxsp = ip;
1605 
1606 	// Recursively read the tree and store it in the array.
1607 	idx = read_tree_node(fd, bp, ip, len, 0, prefixtree, prefixcnt);
1608 	if (idx < 0)
1609 	    return idx;
1610     }
1611     return 0;
1612 }
1613 
1614 /*
1615  * Read one row of siblings from the spell file and store it in the byte array
1616  * "byts" and index array "idxs".  Recursively read the children.
1617  *
1618  * NOTE: The code here must match put_node()!
1619  *
1620  * Returns the index (>= 0) following the siblings.
1621  * Returns SP_TRUNCERROR if the file is shorter than expected.
1622  * Returns SP_FORMERROR if there is a format error.
1623  */
1624     static idx_T
1625 read_tree_node(
1626     FILE	*fd,
1627     char_u	*byts,
1628     idx_T	*idxs,
1629     int		maxidx,		    // size of arrays
1630     idx_T	startidx,	    // current index in "byts" and "idxs"
1631     int		prefixtree,	    // TRUE for reading PREFIXTREE
1632     int		maxprefcondnr)	    // maximum for <prefcondnr>
1633 {
1634     int		len;
1635     int		i;
1636     int		n;
1637     idx_T	idx = startidx;
1638     int		c;
1639     int		c2;
1640 #define SHARED_MASK	0x8000000
1641 
1642     len = getc(fd);					// <siblingcount>
1643     if (len <= 0)
1644 	return SP_TRUNCERROR;
1645 
1646     if (startidx + len >= maxidx)
1647 	return SP_FORMERROR;
1648     byts[idx++] = len;
1649 
1650     // Read the byte values, flag/region bytes and shared indexes.
1651     for (i = 1; i <= len; ++i)
1652     {
1653 	c = getc(fd);					// <byte>
1654 	if (c < 0)
1655 	    return SP_TRUNCERROR;
1656 	if (c <= BY_SPECIAL)
1657 	{
1658 	    if (c == BY_NOFLAGS && !prefixtree)
1659 	    {
1660 		// No flags, all regions.
1661 		idxs[idx] = 0;
1662 		c = 0;
1663 	    }
1664 	    else if (c != BY_INDEX)
1665 	    {
1666 		if (prefixtree)
1667 		{
1668 		    // Read the optional pflags byte, the prefix ID and the
1669 		    // condition nr.  In idxs[] store the prefix ID in the low
1670 		    // byte, the condition index shifted up 8 bits, the flags
1671 		    // shifted up 24 bits.
1672 		    if (c == BY_FLAGS)
1673 			c = getc(fd) << 24;		// <pflags>
1674 		    else
1675 			c = 0;
1676 
1677 		    c |= getc(fd);			// <affixID>
1678 
1679 		    n = get2c(fd);			// <prefcondnr>
1680 		    if (n >= maxprefcondnr)
1681 			return SP_FORMERROR;
1682 		    c |= (n << 8);
1683 		}
1684 		else // c must be BY_FLAGS or BY_FLAGS2
1685 		{
1686 		    // Read flags and optional region and prefix ID.  In
1687 		    // idxs[] the flags go in the low two bytes, region above
1688 		    // that and prefix ID above the region.
1689 		    c2 = c;
1690 		    c = getc(fd);			// <flags>
1691 		    if (c2 == BY_FLAGS2)
1692 			c = (getc(fd) << 8) + c;	// <flags2>
1693 		    if (c & WF_REGION)
1694 			c = (getc(fd) << 16) + c;	// <region>
1695 		    if (c & WF_AFX)
1696 			c = (getc(fd) << 24) + c;	// <affixID>
1697 		}
1698 
1699 		idxs[idx] = c;
1700 		c = 0;
1701 	    }
1702 	    else // c == BY_INDEX
1703 	    {
1704 							// <nodeidx>
1705 		n = get3c(fd);
1706 		if (n < 0 || n >= maxidx)
1707 		    return SP_FORMERROR;
1708 		idxs[idx] = n + SHARED_MASK;
1709 		c = getc(fd);				// <xbyte>
1710 	    }
1711 	}
1712 	byts[idx++] = c;
1713     }
1714 
1715     // Recursively read the children for non-shared siblings.
1716     // Skip the end-of-word ones (zero byte value) and the shared ones (and
1717     // remove SHARED_MASK)
1718     for (i = 1; i <= len; ++i)
1719 	if (byts[startidx + i] != 0)
1720 	{
1721 	    if (idxs[startidx + i] & SHARED_MASK)
1722 		idxs[startidx + i] &= ~SHARED_MASK;
1723 	    else
1724 	    {
1725 		idxs[startidx + i] = idx;
1726 		idx = read_tree_node(fd, byts, idxs, maxidx, idx,
1727 						     prefixtree, maxprefcondnr);
1728 		if (idx < 0)
1729 		    break;
1730 	    }
1731 	}
1732 
1733     return idx;
1734 }
1735 
1736 /*
1737  * Reload the spell file "fname" if it's loaded.
1738  */
1739     static void
1740 spell_reload_one(
1741     char_u	*fname,
1742     int		added_word)	// invoked through "zg"
1743 {
1744     slang_T	*slang;
1745     int		didit = FALSE;
1746 
1747     FOR_ALL_SPELL_LANGS(slang)
1748     {
1749 	if (fullpathcmp(fname, slang->sl_fname, FALSE, TRUE) == FPC_SAME)
1750 	{
1751 	    slang_clear(slang);
1752 	    if (spell_load_file(fname, NULL, slang, FALSE) == NULL)
1753 		// reloading failed, clear the language
1754 		slang_clear(slang);
1755 	    redraw_all_later(SOME_VALID);
1756 	    didit = TRUE;
1757 	}
1758     }
1759 
1760     // When "zg" was used and the file wasn't loaded yet, should redo
1761     // 'spelllang' to load it now.
1762     if (added_word && !didit)
1763 	did_set_spelllang(curwin);
1764 }
1765 
1766 
1767 /*
1768  * Functions for ":mkspell".
1769  */
1770 
1771 #define MAXLINELEN  500		// Maximum length in bytes of a line in a .aff
1772 				// and .dic file.
1773 /*
1774  * Main structure to store the contents of a ".aff" file.
1775  */
1776 typedef struct afffile_S
1777 {
1778     char_u	*af_enc;	// "SET", normalized, alloc'ed string or NULL
1779     int		af_flagtype;	// AFT_CHAR, AFT_LONG, AFT_NUM or AFT_CAPLONG
1780     unsigned	af_rare;	// RARE ID for rare word
1781     unsigned	af_keepcase;	// KEEPCASE ID for keep-case word
1782     unsigned	af_bad;		// BAD ID for banned word
1783     unsigned	af_needaffix;	// NEEDAFFIX ID
1784     unsigned	af_circumfix;	// CIRCUMFIX ID
1785     unsigned	af_needcomp;	// NEEDCOMPOUND ID
1786     unsigned	af_comproot;	// COMPOUNDROOT ID
1787     unsigned	af_compforbid;	// COMPOUNDFORBIDFLAG ID
1788     unsigned	af_comppermit;	// COMPOUNDPERMITFLAG ID
1789     unsigned	af_nosuggest;	// NOSUGGEST ID
1790     int		af_pfxpostpone;	// postpone prefixes without chop string and
1791 				// without flags
1792     int		af_ignoreextra;	// IGNOREEXTRA present
1793     hashtab_T	af_pref;	// hashtable for prefixes, affheader_T
1794     hashtab_T	af_suff;	// hashtable for suffixes, affheader_T
1795     hashtab_T	af_comp;	// hashtable for compound flags, compitem_T
1796 } afffile_T;
1797 
1798 #define AFT_CHAR	0	// flags are one character
1799 #define AFT_LONG	1	// flags are two characters
1800 #define AFT_CAPLONG	2	// flags are one or two characters
1801 #define AFT_NUM		3	// flags are numbers, comma separated
1802 
1803 typedef struct affentry_S affentry_T;
1804 // Affix entry from ".aff" file.  Used for prefixes and suffixes.
1805 struct affentry_S
1806 {
1807     affentry_T	*ae_next;	// next affix with same name/number
1808     char_u	*ae_chop;	// text to chop off basic word (can be NULL)
1809     char_u	*ae_add;	// text to add to basic word (can be NULL)
1810     char_u	*ae_flags;	// flags on the affix (can be NULL)
1811     char_u	*ae_cond;	// condition (NULL for ".")
1812     regprog_T	*ae_prog;	// regexp program for ae_cond or NULL
1813     char	ae_compforbid;	// COMPOUNDFORBIDFLAG found
1814     char	ae_comppermit;	// COMPOUNDPERMITFLAG found
1815 };
1816 
1817 #define AH_KEY_LEN 17		// 2 x 8 bytes + NUL
1818 
1819 // Affix header from ".aff" file.  Used for af_pref and af_suff.
1820 typedef struct affheader_S
1821 {
1822     char_u	ah_key[AH_KEY_LEN]; // key for hashtab == name of affix
1823     unsigned	ah_flag;	// affix name as number, uses "af_flagtype"
1824     int		ah_newID;	// prefix ID after renumbering; 0 if not used
1825     int		ah_combine;	// suffix may combine with prefix
1826     int		ah_follows;	// another affix block should be following
1827     affentry_T	*ah_first;	// first affix entry
1828 } affheader_T;
1829 
1830 #define HI2AH(hi)   ((affheader_T *)(hi)->hi_key)
1831 
1832 // Flag used in compound items.
1833 typedef struct compitem_S
1834 {
1835     char_u	ci_key[AH_KEY_LEN]; // key for hashtab == name of compound
1836     unsigned	ci_flag;	// affix name as number, uses "af_flagtype"
1837     int		ci_newID;	// affix ID after renumbering.
1838 } compitem_T;
1839 
1840 #define HI2CI(hi)   ((compitem_T *)(hi)->hi_key)
1841 
1842 /*
1843  * Structure that is used to store the items in the word tree.  This avoids
1844  * the need to keep track of each allocated thing, everything is freed all at
1845  * once after ":mkspell" is done.
1846  * Note: "sb_next" must be just before "sb_data" to make sure the alignment of
1847  * "sb_data" is correct for systems where pointers must be aligned on
1848  * pointer-size boundaries and sizeof(pointer) > sizeof(int) (e.g., Sparc).
1849  */
1850 #define  SBLOCKSIZE 16000	// size of sb_data
1851 typedef struct sblock_S sblock_T;
1852 struct sblock_S
1853 {
1854     int		sb_used;	// nr of bytes already in use
1855     sblock_T	*sb_next;	// next block in list
1856     char_u	sb_data[1];	// data, actually longer
1857 };
1858 
1859 /*
1860  * A node in the tree.
1861  */
1862 typedef struct wordnode_S wordnode_T;
1863 struct wordnode_S
1864 {
1865     union   // shared to save space
1866     {
1867 	char_u	hashkey[6];	// the hash key, only used while compressing
1868 	int	index;		// index in written nodes (valid after first
1869 				// round)
1870     } wn_u1;
1871     union   // shared to save space
1872     {
1873 	wordnode_T *next;	// next node with same hash key
1874 	wordnode_T *wnode;	// parent node that will write this node
1875     } wn_u2;
1876     wordnode_T	*wn_child;	// child (next byte in word)
1877     wordnode_T  *wn_sibling;	// next sibling (alternate byte in word,
1878 				// always sorted)
1879     int		wn_refs;	// Nr. of references to this node.  Only
1880 				// relevant for first node in a list of
1881 				// siblings, in following siblings it is
1882 				// always one.
1883     char_u	wn_byte;	// Byte for this node. NUL for word end
1884 
1885     // Info for when "wn_byte" is NUL.
1886     // In PREFIXTREE "wn_region" is used for the prefcondnr.
1887     // In the soundfolded word tree "wn_flags" has the MSW of the wordnr and
1888     // "wn_region" the LSW of the wordnr.
1889     char_u	wn_affixID;	// supported/required prefix ID or 0
1890     short_u	wn_flags;	// WF_ flags
1891     short	wn_region;	// region mask
1892 
1893 #ifdef SPELL_PRINTTREE
1894     int		wn_nr;		// sequence nr for printing
1895 #endif
1896 };
1897 
1898 #define WN_MASK	 0xffff		// mask relevant bits of "wn_flags"
1899 
1900 #define HI2WN(hi)    (wordnode_T *)((hi)->hi_key)
1901 
1902 /*
1903  * Info used while reading the spell files.
1904  */
1905 typedef struct spellinfo_S
1906 {
1907     wordnode_T	*si_foldroot;	// tree with case-folded words
1908     long	si_foldwcount;	// nr of words in si_foldroot
1909 
1910     wordnode_T	*si_keeproot;	// tree with keep-case words
1911     long	si_keepwcount;	// nr of words in si_keeproot
1912 
1913     wordnode_T	*si_prefroot;	// tree with postponed prefixes
1914 
1915     long	si_sugtree;	// creating the soundfolding trie
1916 
1917     sblock_T	*si_blocks;	// memory blocks used
1918     long	si_blocks_cnt;	// memory blocks allocated
1919     int		si_did_emsg;	// TRUE when ran out of memory
1920 
1921     long	si_compress_cnt;    // words to add before lowering
1922 				    // compression limit
1923     wordnode_T	*si_first_free; // List of nodes that have been freed during
1924 				// compression, linked by "wn_child" field.
1925     long	si_free_count;	// number of nodes in si_first_free
1926 #ifdef SPELL_PRINTTREE
1927     int		si_wordnode_nr;	// sequence nr for nodes
1928 #endif
1929     buf_T	*si_spellbuf;	// buffer used to store soundfold word table
1930 
1931     int		si_ascii;	// handling only ASCII words
1932     int		si_add;		// addition file
1933     int		si_clear_chartab;   // when TRUE clear char tables
1934     int		si_region;	// region mask
1935     vimconv_T	si_conv;	// for conversion to 'encoding'
1936     int		si_memtot;	// runtime memory used
1937     int		si_verbose;	// verbose messages
1938     int		si_msg_count;	// number of words added since last message
1939     char_u	*si_info;	// info text chars or NULL
1940     int		si_region_count; // number of regions supported (1 when there
1941 				 // are no regions)
1942     char_u	si_region_name[MAXREGIONS * 2 + 1];
1943 				// region names; used only if
1944 				// si_region_count > 1)
1945 
1946     garray_T	si_rep;		// list of fromto_T entries from REP lines
1947     garray_T	si_repsal;	// list of fromto_T entries from REPSAL lines
1948     garray_T	si_sal;		// list of fromto_T entries from SAL lines
1949     char_u	*si_sofofr;	// SOFOFROM text
1950     char_u	*si_sofoto;	// SOFOTO text
1951     int		si_nosugfile;	// NOSUGFILE item found
1952     int		si_nosplitsugs;	// NOSPLITSUGS item found
1953     int		si_nocompoundsugs; // NOCOMPOUNDSUGS item found
1954     int		si_followup;	// soundsalike: ?
1955     int		si_collapse;	// soundsalike: ?
1956     hashtab_T	si_commonwords;	// hashtable for common words
1957     time_t	si_sugtime;	// timestamp for .sug file
1958     int		si_rem_accents;	// soundsalike: remove accents
1959     garray_T	si_map;		// MAP info concatenated
1960     char_u	*si_midword;	// MIDWORD chars or NULL
1961     int		si_compmax;	// max nr of words for compounding
1962     int		si_compminlen;	// minimal length for compounding
1963     int		si_compsylmax;	// max nr of syllables for compounding
1964     int		si_compoptions;	// COMP_ flags
1965     garray_T	si_comppat;	// CHECKCOMPOUNDPATTERN items, each stored as
1966 				// a string
1967     char_u	*si_compflags;	// flags used for compounding
1968     char_u	si_nobreak;	// NOBREAK
1969     char_u	*si_syllable;	// syllable string
1970     garray_T	si_prefcond;	// table with conditions for postponed
1971 				// prefixes, each stored as a string
1972     int		si_newprefID;	// current value for ah_newID
1973     int		si_newcompID;	// current value for compound ID
1974 } spellinfo_T;
1975 
1976 static int is_aff_rule(char_u **items, int itemcnt, char *rulename, int	 mincount);
1977 static void aff_process_flags(afffile_T *affile, affentry_T *entry);
1978 static int spell_info_item(char_u *s);
1979 static unsigned affitem2flag(int flagtype, char_u *item, char_u	*fname, int lnum);
1980 static unsigned get_affitem(int flagtype, char_u **pp);
1981 static void process_compflags(spellinfo_T *spin, afffile_T *aff, char_u *compflags);
1982 static void check_renumber(spellinfo_T *spin);
1983 static void aff_check_number(int spinval, int affval, char *name);
1984 static void aff_check_string(char_u *spinval, char_u *affval, char *name);
1985 static int str_equal(char_u *s1, char_u	*s2);
1986 static void add_fromto(spellinfo_T *spin, garray_T *gap, char_u	*from, char_u *to);
1987 static int sal_to_bool(char_u *s);
1988 static int get_affix_flags(afffile_T *affile, char_u *afflist);
1989 static int get_pfxlist(afffile_T *affile, char_u *afflist, char_u *store_afflist);
1990 static void get_compflags(afffile_T *affile, char_u *afflist, char_u *store_afflist);
1991 static int store_aff_word(spellinfo_T *spin, char_u *word, char_u *afflist, afffile_T *affile, hashtab_T *ht, hashtab_T *xht, int condit, int flags, char_u *pfxlist, int pfxlen);
1992 static void *getroom(spellinfo_T *spin, size_t len, int align);
1993 static char_u *getroom_save(spellinfo_T *spin, char_u *s);
1994 static int store_word(spellinfo_T *spin, char_u *word, int flags, int region, char_u *pfxlist, int need_affix);
1995 static int tree_add_word(spellinfo_T *spin, char_u *word, wordnode_T *tree, int flags, int region, int affixID);
1996 static wordnode_T *get_wordnode(spellinfo_T *spin);
1997 static void free_wordnode(spellinfo_T *spin, wordnode_T *n);
1998 static void wordtree_compress(spellinfo_T *spin, wordnode_T *root, char *name);
1999 static long node_compress(spellinfo_T *spin, wordnode_T *node, hashtab_T *ht, long *tot);
2000 static int node_equal(wordnode_T *n1, wordnode_T *n2);
2001 static void clear_node(wordnode_T *node);
2002 static int put_node(FILE *fd, wordnode_T *node, int idx, int regionmask, int prefixtree);
2003 static int sug_filltree(spellinfo_T *spin, slang_T *slang);
2004 static int sug_maketable(spellinfo_T *spin);
2005 static int sug_filltable(spellinfo_T *spin, wordnode_T *node, int startwordnr, garray_T *gap);
2006 static int offset2bytes(int nr, char_u *buf);
2007 static void sug_write(spellinfo_T *spin, char_u *fname);
2008 static void spell_message(spellinfo_T *spin, char_u *str);
2009 static void init_spellfile(void);
2010 
2011 // In the postponed prefixes tree wn_flags is used to store the WFP_ flags,
2012 // but it must be negative to indicate the prefix tree to tree_add_word().
2013 // Use a negative number with the lower 8 bits zero.
2014 #define PFX_FLAGS	-256
2015 
2016 // flags for "condit" argument of store_aff_word()
2017 #define CONDIT_COMB	1	// affix must combine
2018 #define CONDIT_CFIX	2	// affix must have CIRCUMFIX flag
2019 #define CONDIT_SUF	4	// add a suffix for matching flags
2020 #define CONDIT_AFF	8	// word already has an affix
2021 
2022 /*
2023  * Tunable parameters for when the tree is compressed.  Filled from the
2024  * 'mkspellmem' option.
2025  */
2026 static long compress_start = 30000;	// memory / SBLOCKSIZE
2027 static long compress_inc = 100;		// memory / SBLOCKSIZE
2028 static long compress_added = 500000;	// word count
2029 
2030 /*
2031  * Check the 'mkspellmem' option.  Return FAIL if it's wrong.
2032  * Sets "sps_flags".
2033  */
2034     int
2035 spell_check_msm(void)
2036 {
2037     char_u	*p = p_msm;
2038     long	start = 0;
2039     long	incr = 0;
2040     long	added = 0;
2041 
2042     if (!VIM_ISDIGIT(*p))
2043 	return FAIL;
2044     // block count = (value * 1024) / SBLOCKSIZE (but avoid overflow)
2045     start = (getdigits(&p) * 10) / (SBLOCKSIZE / 102);
2046     if (*p != ',')
2047 	return FAIL;
2048     ++p;
2049     if (!VIM_ISDIGIT(*p))
2050 	return FAIL;
2051     incr = (getdigits(&p) * 102) / (SBLOCKSIZE / 10);
2052     if (*p != ',')
2053 	return FAIL;
2054     ++p;
2055     if (!VIM_ISDIGIT(*p))
2056 	return FAIL;
2057     added = getdigits(&p) * 1024;
2058     if (*p != NUL)
2059 	return FAIL;
2060 
2061     if (start == 0 || incr == 0 || added == 0 || incr > start)
2062 	return FAIL;
2063 
2064     compress_start = start;
2065     compress_inc = incr;
2066     compress_added = added;
2067     return OK;
2068 }
2069 
2070 #ifdef SPELL_PRINTTREE
2071 /*
2072  * For debugging the tree code: print the current tree in a (more or less)
2073  * readable format, so that we can see what happens when adding a word and/or
2074  * compressing the tree.
2075  * Based on code from Olaf Seibert.
2076  */
2077 #define PRINTLINESIZE	1000
2078 #define PRINTWIDTH	6
2079 
2080 #define PRINTSOME(l, depth, fmt, a1, a2) vim_snprintf(l + depth * PRINTWIDTH, \
2081 	    PRINTLINESIZE - PRINTWIDTH * depth, fmt, a1, a2)
2082 
2083 static char line1[PRINTLINESIZE];
2084 static char line2[PRINTLINESIZE];
2085 static char line3[PRINTLINESIZE];
2086 
2087     static void
2088 spell_clear_flags(wordnode_T *node)
2089 {
2090     wordnode_T	*np;
2091 
2092     FOR_ALL_NODE_SIBLINGS(node, np)
2093     {
2094 	np->wn_u1.index = FALSE;
2095 	spell_clear_flags(np->wn_child);
2096     }
2097 }
2098 
2099     static void
2100 spell_print_node(wordnode_T *node, int depth)
2101 {
2102     if (node->wn_u1.index)
2103     {
2104 	// Done this node before, print the reference.
2105 	PRINTSOME(line1, depth, "(%d)", node->wn_nr, 0);
2106 	PRINTSOME(line2, depth, "    ", 0, 0);
2107 	PRINTSOME(line3, depth, "    ", 0, 0);
2108 	msg(line1);
2109 	msg(line2);
2110 	msg(line3);
2111     }
2112     else
2113     {
2114 	node->wn_u1.index = TRUE;
2115 
2116 	if (node->wn_byte != NUL)
2117 	{
2118 	    if (node->wn_child != NULL)
2119 		PRINTSOME(line1, depth, " %c -> ", node->wn_byte, 0);
2120 	    else
2121 		// Cannot happen?
2122 		PRINTSOME(line1, depth, " %c ???", node->wn_byte, 0);
2123 	}
2124 	else
2125 	    PRINTSOME(line1, depth, " $    ", 0, 0);
2126 
2127 	PRINTSOME(line2, depth, "%d/%d    ", node->wn_nr, node->wn_refs);
2128 
2129 	if (node->wn_sibling != NULL)
2130 	    PRINTSOME(line3, depth, " |    ", 0, 0);
2131 	else
2132 	    PRINTSOME(line3, depth, "      ", 0, 0);
2133 
2134 	if (node->wn_byte == NUL)
2135 	{
2136 	    msg(line1);
2137 	    msg(line2);
2138 	    msg(line3);
2139 	}
2140 
2141 	// do the children
2142 	if (node->wn_byte != NUL && node->wn_child != NULL)
2143 	    spell_print_node(node->wn_child, depth + 1);
2144 
2145 	// do the siblings
2146 	if (node->wn_sibling != NULL)
2147 	{
2148 	    // get rid of all parent details except |
2149 	    STRCPY(line1, line3);
2150 	    STRCPY(line2, line3);
2151 	    spell_print_node(node->wn_sibling, depth);
2152 	}
2153     }
2154 }
2155 
2156     static void
2157 spell_print_tree(wordnode_T *root)
2158 {
2159     if (root != NULL)
2160     {
2161 	// Clear the "wn_u1.index" fields, used to remember what has been
2162 	// done.
2163 	spell_clear_flags(root);
2164 
2165 	// Recursively print the tree.
2166 	spell_print_node(root, 0);
2167     }
2168 }
2169 #endif // SPELL_PRINTTREE
2170 
2171 /*
2172  * Read the affix file "fname".
2173  * Returns an afffile_T, NULL for complete failure.
2174  */
2175     static afffile_T *
2176 spell_read_aff(spellinfo_T *spin, char_u *fname)
2177 {
2178     FILE	*fd;
2179     afffile_T	*aff;
2180     char_u	rline[MAXLINELEN];
2181     char_u	*line;
2182     char_u	*pc = NULL;
2183 #define MAXITEMCNT  30
2184     char_u	*(items[MAXITEMCNT]);
2185     int		itemcnt;
2186     char_u	*p;
2187     int		lnum = 0;
2188     affheader_T	*cur_aff = NULL;
2189     int		did_postpone_prefix = FALSE;
2190     int		aff_todo = 0;
2191     hashtab_T	*tp;
2192     char_u	*low = NULL;
2193     char_u	*fol = NULL;
2194     char_u	*upp = NULL;
2195     int		do_rep;
2196     int		do_repsal;
2197     int		do_sal;
2198     int		do_mapline;
2199     int		found_map = FALSE;
2200     hashitem_T	*hi;
2201     int		l;
2202     int		compminlen = 0;		// COMPOUNDMIN value
2203     int		compsylmax = 0;		// COMPOUNDSYLMAX value
2204     int		compoptions = 0;	// COMP_ flags
2205     int		compmax = 0;		// COMPOUNDWORDMAX value
2206     char_u	*compflags = NULL;	// COMPOUNDFLAG and COMPOUNDRULE
2207 					// concatenated
2208     char_u	*midword = NULL;	// MIDWORD value
2209     char_u	*syllable = NULL;	// SYLLABLE value
2210     char_u	*sofofrom = NULL;	// SOFOFROM value
2211     char_u	*sofoto = NULL;		// SOFOTO value
2212 
2213     /*
2214      * Open the file.
2215      */
2216     fd = mch_fopen((char *)fname, "r");
2217     if (fd == NULL)
2218     {
2219 	semsg(_(e_notopen), fname);
2220 	return NULL;
2221     }
2222 
2223     vim_snprintf((char *)IObuff, IOSIZE, _("Reading affix file %s..."), fname);
2224     spell_message(spin, IObuff);
2225 
2226     // Only do REP lines when not done in another .aff file already.
2227     do_rep = spin->si_rep.ga_len == 0;
2228 
2229     // Only do REPSAL lines when not done in another .aff file already.
2230     do_repsal = spin->si_repsal.ga_len == 0;
2231 
2232     // Only do SAL lines when not done in another .aff file already.
2233     do_sal = spin->si_sal.ga_len == 0;
2234 
2235     // Only do MAP lines when not done in another .aff file already.
2236     do_mapline = spin->si_map.ga_len == 0;
2237 
2238     /*
2239      * Allocate and init the afffile_T structure.
2240      */
2241     aff = (afffile_T *)getroom(spin, sizeof(afffile_T), TRUE);
2242     if (aff == NULL)
2243     {
2244 	fclose(fd);
2245 	return NULL;
2246     }
2247     hash_init(&aff->af_pref);
2248     hash_init(&aff->af_suff);
2249     hash_init(&aff->af_comp);
2250 
2251     /*
2252      * Read all the lines in the file one by one.
2253      */
2254     while (!vim_fgets(rline, MAXLINELEN, fd) && !got_int)
2255     {
2256 	line_breakcheck();
2257 	++lnum;
2258 
2259 	// Skip comment lines.
2260 	if (*rline == '#')
2261 	    continue;
2262 
2263 	// Convert from "SET" to 'encoding' when needed.
2264 	vim_free(pc);
2265 	if (spin->si_conv.vc_type != CONV_NONE)
2266 	{
2267 	    pc = string_convert(&spin->si_conv, rline, NULL);
2268 	    if (pc == NULL)
2269 	    {
2270 		smsg(_("Conversion failure for word in %s line %d: %s"),
2271 							   fname, lnum, rline);
2272 		continue;
2273 	    }
2274 	    line = pc;
2275 	}
2276 	else
2277 	{
2278 	    pc = NULL;
2279 	    line = rline;
2280 	}
2281 
2282 	// Split the line up in white separated items.  Put a NUL after each
2283 	// item.
2284 	itemcnt = 0;
2285 	for (p = line; ; )
2286 	{
2287 	    while (*p != NUL && *p <= ' ')  // skip white space and CR/NL
2288 		++p;
2289 	    if (*p == NUL)
2290 		break;
2291 	    if (itemcnt == MAXITEMCNT)	    // too many items
2292 		break;
2293 	    items[itemcnt++] = p;
2294 	    // A few items have arbitrary text argument, don't split them.
2295 	    if (itemcnt == 2 && spell_info_item(items[0]))
2296 		while (*p >= ' ' || *p == TAB)    // skip until CR/NL
2297 		    ++p;
2298 	    else
2299 		while (*p > ' ')    // skip until white space or CR/NL
2300 		    ++p;
2301 	    if (*p == NUL)
2302 		break;
2303 	    *p++ = NUL;
2304 	}
2305 
2306 	// Handle non-empty lines.
2307 	if (itemcnt > 0)
2308 	{
2309 	    if (is_aff_rule(items, itemcnt, "SET", 2) && aff->af_enc == NULL)
2310 	    {
2311 		// Setup for conversion from "ENC" to 'encoding'.
2312 		aff->af_enc = enc_canonize(items[1]);
2313 		if (aff->af_enc != NULL && !spin->si_ascii
2314 			&& convert_setup(&spin->si_conv, aff->af_enc,
2315 							       p_enc) == FAIL)
2316 		    smsg(_("Conversion in %s not supported: from %s to %s"),
2317 					       fname, aff->af_enc, p_enc);
2318 		spin->si_conv.vc_fail = TRUE;
2319 	    }
2320 	    else if (is_aff_rule(items, itemcnt, "FLAG", 2)
2321 					      && aff->af_flagtype == AFT_CHAR)
2322 	    {
2323 		if (STRCMP(items[1], "long") == 0)
2324 		    aff->af_flagtype = AFT_LONG;
2325 		else if (STRCMP(items[1], "num") == 0)
2326 		    aff->af_flagtype = AFT_NUM;
2327 		else if (STRCMP(items[1], "caplong") == 0)
2328 		    aff->af_flagtype = AFT_CAPLONG;
2329 		else
2330 		    smsg(_("Invalid value for FLAG in %s line %d: %s"),
2331 			    fname, lnum, items[1]);
2332 		if (aff->af_rare != 0
2333 			|| aff->af_keepcase != 0
2334 			|| aff->af_bad != 0
2335 			|| aff->af_needaffix != 0
2336 			|| aff->af_circumfix != 0
2337 			|| aff->af_needcomp != 0
2338 			|| aff->af_comproot != 0
2339 			|| aff->af_nosuggest != 0
2340 			|| compflags != NULL
2341 			|| aff->af_suff.ht_used > 0
2342 			|| aff->af_pref.ht_used > 0)
2343 		    smsg(_("FLAG after using flags in %s line %d: %s"),
2344 			    fname, lnum, items[1]);
2345 	    }
2346 	    else if (spell_info_item(items[0]))
2347 	    {
2348 		    p = (char_u *)getroom(spin,
2349 			    (spin->si_info == NULL ? 0 : STRLEN(spin->si_info))
2350 			    + STRLEN(items[0])
2351 			    + STRLEN(items[1]) + 3, FALSE);
2352 		    if (p != NULL)
2353 		    {
2354 			if (spin->si_info != NULL)
2355 			{
2356 			    STRCPY(p, spin->si_info);
2357 			    STRCAT(p, "\n");
2358 			}
2359 			STRCAT(p, items[0]);
2360 			STRCAT(p, " ");
2361 			STRCAT(p, items[1]);
2362 			spin->si_info = p;
2363 		    }
2364 	    }
2365 	    else if (is_aff_rule(items, itemcnt, "MIDWORD", 2)
2366 							   && midword == NULL)
2367 	    {
2368 		midword = getroom_save(spin, items[1]);
2369 	    }
2370 	    else if (is_aff_rule(items, itemcnt, "TRY", 2))
2371 	    {
2372 		// ignored, we look in the tree for what chars may appear
2373 	    }
2374 	    // TODO: remove "RAR" later
2375 	    else if ((is_aff_rule(items, itemcnt, "RAR", 2)
2376 			|| is_aff_rule(items, itemcnt, "RARE", 2))
2377 							 && aff->af_rare == 0)
2378 	    {
2379 		aff->af_rare = affitem2flag(aff->af_flagtype, items[1],
2380 								 fname, lnum);
2381 	    }
2382 	    // TODO: remove "KEP" later
2383 	    else if ((is_aff_rule(items, itemcnt, "KEP", 2)
2384 			|| is_aff_rule(items, itemcnt, "KEEPCASE", 2))
2385 						     && aff->af_keepcase == 0)
2386 	    {
2387 		aff->af_keepcase = affitem2flag(aff->af_flagtype, items[1],
2388 								 fname, lnum);
2389 	    }
2390 	    else if ((is_aff_rule(items, itemcnt, "BAD", 2)
2391 			|| is_aff_rule(items, itemcnt, "FORBIDDENWORD", 2))
2392 							  && aff->af_bad == 0)
2393 	    {
2394 		aff->af_bad = affitem2flag(aff->af_flagtype, items[1],
2395 								 fname, lnum);
2396 	    }
2397 	    else if (is_aff_rule(items, itemcnt, "NEEDAFFIX", 2)
2398 						    && aff->af_needaffix == 0)
2399 	    {
2400 		aff->af_needaffix = affitem2flag(aff->af_flagtype, items[1],
2401 								 fname, lnum);
2402 	    }
2403 	    else if (is_aff_rule(items, itemcnt, "CIRCUMFIX", 2)
2404 						    && aff->af_circumfix == 0)
2405 	    {
2406 		aff->af_circumfix = affitem2flag(aff->af_flagtype, items[1],
2407 								 fname, lnum);
2408 	    }
2409 	    else if (is_aff_rule(items, itemcnt, "NOSUGGEST", 2)
2410 						    && aff->af_nosuggest == 0)
2411 	    {
2412 		aff->af_nosuggest = affitem2flag(aff->af_flagtype, items[1],
2413 								 fname, lnum);
2414 	    }
2415 	    else if ((is_aff_rule(items, itemcnt, "NEEDCOMPOUND", 2)
2416 			|| is_aff_rule(items, itemcnt, "ONLYINCOMPOUND", 2))
2417 						     && aff->af_needcomp == 0)
2418 	    {
2419 		aff->af_needcomp = affitem2flag(aff->af_flagtype, items[1],
2420 								 fname, lnum);
2421 	    }
2422 	    else if (is_aff_rule(items, itemcnt, "COMPOUNDROOT", 2)
2423 						     && aff->af_comproot == 0)
2424 	    {
2425 		aff->af_comproot = affitem2flag(aff->af_flagtype, items[1],
2426 								 fname, lnum);
2427 	    }
2428 	    else if (is_aff_rule(items, itemcnt, "COMPOUNDFORBIDFLAG", 2)
2429 						   && aff->af_compforbid == 0)
2430 	    {
2431 		aff->af_compforbid = affitem2flag(aff->af_flagtype, items[1],
2432 								 fname, lnum);
2433 		if (aff->af_pref.ht_used > 0)
2434 		    smsg(_("Defining COMPOUNDFORBIDFLAG after PFX item may give wrong results in %s line %d"),
2435 			    fname, lnum);
2436 	    }
2437 	    else if (is_aff_rule(items, itemcnt, "COMPOUNDPERMITFLAG", 2)
2438 						   && aff->af_comppermit == 0)
2439 	    {
2440 		aff->af_comppermit = affitem2flag(aff->af_flagtype, items[1],
2441 								 fname, lnum);
2442 		if (aff->af_pref.ht_used > 0)
2443 		    smsg(_("Defining COMPOUNDPERMITFLAG after PFX item may give wrong results in %s line %d"),
2444 			    fname, lnum);
2445 	    }
2446 	    else if (is_aff_rule(items, itemcnt, "COMPOUNDFLAG", 2)
2447 							 && compflags == NULL)
2448 	    {
2449 		// Turn flag "c" into COMPOUNDRULE compatible string "c+",
2450 		// "Na" into "Na+", "1234" into "1234+".
2451 		p = getroom(spin, STRLEN(items[1]) + 2, FALSE);
2452 		if (p != NULL)
2453 		{
2454 		    STRCPY(p, items[1]);
2455 		    STRCAT(p, "+");
2456 		    compflags = p;
2457 		}
2458 	    }
2459 	    else if (is_aff_rule(items, itemcnt, "COMPOUNDRULES", 2))
2460 	    {
2461 		// We don't use the count, but do check that it's a number and
2462 		// not COMPOUNDRULE mistyped.
2463 		if (atoi((char *)items[1]) == 0)
2464 		    smsg(_("Wrong COMPOUNDRULES value in %s line %d: %s"),
2465 						       fname, lnum, items[1]);
2466 	    }
2467 	    else if (is_aff_rule(items, itemcnt, "COMPOUNDRULE", 2))
2468 	    {
2469 		// Don't use the first rule if it is a number.
2470 		if (compflags != NULL || *skipdigits(items[1]) != NUL)
2471 		{
2472 		    // Concatenate this string to previously defined ones,
2473 		    // using a slash to separate them.
2474 		    l = (int)STRLEN(items[1]) + 1;
2475 		    if (compflags != NULL)
2476 			l += (int)STRLEN(compflags) + 1;
2477 		    p = getroom(spin, l, FALSE);
2478 		    if (p != NULL)
2479 		    {
2480 			if (compflags != NULL)
2481 			{
2482 			    STRCPY(p, compflags);
2483 			    STRCAT(p, "/");
2484 			}
2485 			STRCAT(p, items[1]);
2486 			compflags = p;
2487 		    }
2488 		}
2489 	    }
2490 	    else if (is_aff_rule(items, itemcnt, "COMPOUNDWORDMAX", 2)
2491 							      && compmax == 0)
2492 	    {
2493 		compmax = atoi((char *)items[1]);
2494 		if (compmax == 0)
2495 		    smsg(_("Wrong COMPOUNDWORDMAX value in %s line %d: %s"),
2496 						       fname, lnum, items[1]);
2497 	    }
2498 	    else if (is_aff_rule(items, itemcnt, "COMPOUNDMIN", 2)
2499 							   && compminlen == 0)
2500 	    {
2501 		compminlen = atoi((char *)items[1]);
2502 		if (compminlen == 0)
2503 		    smsg(_("Wrong COMPOUNDMIN value in %s line %d: %s"),
2504 						       fname, lnum, items[1]);
2505 	    }
2506 	    else if (is_aff_rule(items, itemcnt, "COMPOUNDSYLMAX", 2)
2507 							   && compsylmax == 0)
2508 	    {
2509 		compsylmax = atoi((char *)items[1]);
2510 		if (compsylmax == 0)
2511 		    smsg(_("Wrong COMPOUNDSYLMAX value in %s line %d: %s"),
2512 						       fname, lnum, items[1]);
2513 	    }
2514 	    else if (is_aff_rule(items, itemcnt, "CHECKCOMPOUNDDUP", 1))
2515 	    {
2516 		compoptions |= COMP_CHECKDUP;
2517 	    }
2518 	    else if (is_aff_rule(items, itemcnt, "CHECKCOMPOUNDREP", 1))
2519 	    {
2520 		compoptions |= COMP_CHECKREP;
2521 	    }
2522 	    else if (is_aff_rule(items, itemcnt, "CHECKCOMPOUNDCASE", 1))
2523 	    {
2524 		compoptions |= COMP_CHECKCASE;
2525 	    }
2526 	    else if (is_aff_rule(items, itemcnt, "CHECKCOMPOUNDTRIPLE", 1))
2527 	    {
2528 		compoptions |= COMP_CHECKTRIPLE;
2529 	    }
2530 	    else if (is_aff_rule(items, itemcnt, "CHECKCOMPOUNDPATTERN", 2))
2531 	    {
2532 		if (atoi((char *)items[1]) == 0)
2533 		    smsg(_("Wrong CHECKCOMPOUNDPATTERN value in %s line %d: %s"),
2534 						       fname, lnum, items[1]);
2535 	    }
2536 	    else if (is_aff_rule(items, itemcnt, "CHECKCOMPOUNDPATTERN", 3))
2537 	    {
2538 		garray_T    *gap = &spin->si_comppat;
2539 		int	    i;
2540 
2541 		// Only add the couple if it isn't already there.
2542 		for (i = 0; i < gap->ga_len - 1; i += 2)
2543 		    if (STRCMP(((char_u **)(gap->ga_data))[i], items[1]) == 0
2544 			    && STRCMP(((char_u **)(gap->ga_data))[i + 1],
2545 							       items[2]) == 0)
2546 			break;
2547 		if (i >= gap->ga_len && ga_grow(gap, 2) == OK)
2548 		{
2549 		    ((char_u **)(gap->ga_data))[gap->ga_len++]
2550 					       = getroom_save(spin, items[1]);
2551 		    ((char_u **)(gap->ga_data))[gap->ga_len++]
2552 					       = getroom_save(spin, items[2]);
2553 		}
2554 	    }
2555 	    else if (is_aff_rule(items, itemcnt, "SYLLABLE", 2)
2556 							  && syllable == NULL)
2557 	    {
2558 		syllable = getroom_save(spin, items[1]);
2559 	    }
2560 	    else if (is_aff_rule(items, itemcnt, "NOBREAK", 1))
2561 	    {
2562 		spin->si_nobreak = TRUE;
2563 	    }
2564 	    else if (is_aff_rule(items, itemcnt, "NOSPLITSUGS", 1))
2565 	    {
2566 		spin->si_nosplitsugs = TRUE;
2567 	    }
2568 	    else if (is_aff_rule(items, itemcnt, "NOCOMPOUNDSUGS", 1))
2569 	    {
2570 		spin->si_nocompoundsugs = TRUE;
2571 	    }
2572 	    else if (is_aff_rule(items, itemcnt, "NOSUGFILE", 1))
2573 	    {
2574 		spin->si_nosugfile = TRUE;
2575 	    }
2576 	    else if (is_aff_rule(items, itemcnt, "PFXPOSTPONE", 1))
2577 	    {
2578 		aff->af_pfxpostpone = TRUE;
2579 	    }
2580 	    else if (is_aff_rule(items, itemcnt, "IGNOREEXTRA", 1))
2581 	    {
2582 		aff->af_ignoreextra = TRUE;
2583 	    }
2584 	    else if ((STRCMP(items[0], "PFX") == 0
2585 					      || STRCMP(items[0], "SFX") == 0)
2586 		    && aff_todo == 0
2587 		    && itemcnt >= 4)
2588 	    {
2589 		int	lasti = 4;
2590 		char_u	key[AH_KEY_LEN];
2591 
2592 		if (*items[0] == 'P')
2593 		    tp = &aff->af_pref;
2594 		else
2595 		    tp = &aff->af_suff;
2596 
2597 		// Myspell allows the same affix name to be used multiple
2598 		// times.  The affix files that do this have an undocumented
2599 		// "S" flag on all but the last block, thus we check for that
2600 		// and store it in ah_follows.
2601 		vim_strncpy(key, items[1], AH_KEY_LEN - 1);
2602 		hi = hash_find(tp, key);
2603 		if (!HASHITEM_EMPTY(hi))
2604 		{
2605 		    cur_aff = HI2AH(hi);
2606 		    if (cur_aff->ah_combine != (*items[2] == 'Y'))
2607 			smsg(_("Different combining flag in continued affix block in %s line %d: %s"),
2608 						   fname, lnum, items[1]);
2609 		    if (!cur_aff->ah_follows)
2610 			smsg(_("Duplicate affix in %s line %d: %s"),
2611 						       fname, lnum, items[1]);
2612 		}
2613 		else
2614 		{
2615 		    // New affix letter.
2616 		    cur_aff = (affheader_T *)getroom(spin,
2617 						   sizeof(affheader_T), TRUE);
2618 		    if (cur_aff == NULL)
2619 			break;
2620 		    cur_aff->ah_flag = affitem2flag(aff->af_flagtype, items[1],
2621 								 fname, lnum);
2622 		    if (cur_aff->ah_flag == 0 || STRLEN(items[1]) >= AH_KEY_LEN)
2623 			break;
2624 		    if (cur_aff->ah_flag == aff->af_bad
2625 			    || cur_aff->ah_flag == aff->af_rare
2626 			    || cur_aff->ah_flag == aff->af_keepcase
2627 			    || cur_aff->ah_flag == aff->af_needaffix
2628 			    || cur_aff->ah_flag == aff->af_circumfix
2629 			    || cur_aff->ah_flag == aff->af_nosuggest
2630 			    || cur_aff->ah_flag == aff->af_needcomp
2631 			    || cur_aff->ah_flag == aff->af_comproot)
2632 			smsg(_("Affix also used for BAD/RARE/KEEPCASE/NEEDAFFIX/NEEDCOMPOUND/NOSUGGEST in %s line %d: %s"),
2633 						       fname, lnum, items[1]);
2634 		    STRCPY(cur_aff->ah_key, items[1]);
2635 		    hash_add(tp, cur_aff->ah_key);
2636 
2637 		    cur_aff->ah_combine = (*items[2] == 'Y');
2638 		}
2639 
2640 		// Check for the "S" flag, which apparently means that another
2641 		// block with the same affix name is following.
2642 		if (itemcnt > lasti && STRCMP(items[lasti], "S") == 0)
2643 		{
2644 		    ++lasti;
2645 		    cur_aff->ah_follows = TRUE;
2646 		}
2647 		else
2648 		    cur_aff->ah_follows = FALSE;
2649 
2650 		// Myspell allows extra text after the item, but that might
2651 		// mean mistakes go unnoticed.  Require a comment-starter.
2652 		if (itemcnt > lasti && *items[lasti] != '#')
2653 		    smsg(_(e_afftrailing), fname, lnum, items[lasti]);
2654 
2655 		if (STRCMP(items[2], "Y") != 0 && STRCMP(items[2], "N") != 0)
2656 		    smsg(_("Expected Y or N in %s line %d: %s"),
2657 						       fname, lnum, items[2]);
2658 
2659 		if (*items[0] == 'P' && aff->af_pfxpostpone)
2660 		{
2661 		    if (cur_aff->ah_newID == 0)
2662 		    {
2663 			// Use a new number in the .spl file later, to be able
2664 			// to handle multiple .aff files.
2665 			check_renumber(spin);
2666 			cur_aff->ah_newID = ++spin->si_newprefID;
2667 
2668 			// We only really use ah_newID if the prefix is
2669 			// postponed.  We know that only after handling all
2670 			// the items.
2671 			did_postpone_prefix = FALSE;
2672 		    }
2673 		    else
2674 			// Did use the ID in a previous block.
2675 			did_postpone_prefix = TRUE;
2676 		}
2677 
2678 		aff_todo = atoi((char *)items[3]);
2679 	    }
2680 	    else if ((STRCMP(items[0], "PFX") == 0
2681 					      || STRCMP(items[0], "SFX") == 0)
2682 		    && aff_todo > 0
2683 		    && STRCMP(cur_aff->ah_key, items[1]) == 0
2684 		    && itemcnt >= 5)
2685 	    {
2686 		affentry_T	*aff_entry;
2687 		int		upper = FALSE;
2688 		int		lasti = 5;
2689 
2690 		// Myspell allows extra text after the item, but that might
2691 		// mean mistakes go unnoticed.  Require a comment-starter,
2692 		// unless IGNOREEXTRA is used.  Hunspell uses a "-" item.
2693 		if (itemcnt > lasti
2694 			&& !aff->af_ignoreextra
2695 			&& *items[lasti] != '#'
2696 			&& (STRCMP(items[lasti], "-") != 0
2697 						     || itemcnt != lasti + 1))
2698 		    smsg(_(e_afftrailing), fname, lnum, items[lasti]);
2699 
2700 		// New item for an affix letter.
2701 		--aff_todo;
2702 		aff_entry = (affentry_T *)getroom(spin,
2703 						    sizeof(affentry_T), TRUE);
2704 		if (aff_entry == NULL)
2705 		    break;
2706 
2707 		if (STRCMP(items[2], "0") != 0)
2708 		    aff_entry->ae_chop = getroom_save(spin, items[2]);
2709 		if (STRCMP(items[3], "0") != 0)
2710 		{
2711 		    aff_entry->ae_add = getroom_save(spin, items[3]);
2712 
2713 		    // Recognize flags on the affix: abcd/XYZ
2714 		    aff_entry->ae_flags = vim_strchr(aff_entry->ae_add, '/');
2715 		    if (aff_entry->ae_flags != NULL)
2716 		    {
2717 			*aff_entry->ae_flags++ = NUL;
2718 			aff_process_flags(aff, aff_entry);
2719 		    }
2720 		}
2721 
2722 		// Don't use an affix entry with non-ASCII characters when
2723 		// "spin->si_ascii" is TRUE.
2724 		if (!spin->si_ascii || !(has_non_ascii(aff_entry->ae_chop)
2725 					  || has_non_ascii(aff_entry->ae_add)))
2726 		{
2727 		    aff_entry->ae_next = cur_aff->ah_first;
2728 		    cur_aff->ah_first = aff_entry;
2729 
2730 		    if (STRCMP(items[4], ".") != 0)
2731 		    {
2732 			char_u	buf[MAXLINELEN];
2733 
2734 			aff_entry->ae_cond = getroom_save(spin, items[4]);
2735 			if (*items[0] == 'P')
2736 			    sprintf((char *)buf, "^%s", items[4]);
2737 			else
2738 			    sprintf((char *)buf, "%s$", items[4]);
2739 			aff_entry->ae_prog = vim_regcomp(buf,
2740 					    RE_MAGIC + RE_STRING + RE_STRICT);
2741 			if (aff_entry->ae_prog == NULL)
2742 			    smsg(_("Broken condition in %s line %d: %s"),
2743 						       fname, lnum, items[4]);
2744 		    }
2745 
2746 		    // For postponed prefixes we need an entry in si_prefcond
2747 		    // for the condition.  Use an existing one if possible.
2748 		    // Can't be done for an affix with flags, ignoring
2749 		    // COMPOUNDFORBIDFLAG and COMPOUNDPERMITFLAG.
2750 		    if (*items[0] == 'P' && aff->af_pfxpostpone
2751 					       && aff_entry->ae_flags == NULL)
2752 		    {
2753 			// When the chop string is one lower-case letter and
2754 			// the add string ends in the upper-case letter we set
2755 			// the "upper" flag, clear "ae_chop" and remove the
2756 			// letters from "ae_add".  The condition must either
2757 			// be empty or start with the same letter.
2758 			if (aff_entry->ae_chop != NULL
2759 				&& aff_entry->ae_add != NULL
2760 				&& aff_entry->ae_chop[(*mb_ptr2len)(
2761 						   aff_entry->ae_chop)] == NUL)
2762 			{
2763 			    int		c, c_up;
2764 
2765 			    c = PTR2CHAR(aff_entry->ae_chop);
2766 			    c_up = SPELL_TOUPPER(c);
2767 			    if (c_up != c
2768 				    && (aff_entry->ae_cond == NULL
2769 					|| PTR2CHAR(aff_entry->ae_cond) == c))
2770 			    {
2771 				p = aff_entry->ae_add
2772 						  + STRLEN(aff_entry->ae_add);
2773 				MB_PTR_BACK(aff_entry->ae_add, p);
2774 				if (PTR2CHAR(p) == c_up)
2775 				{
2776 				    upper = TRUE;
2777 				    aff_entry->ae_chop = NULL;
2778 				    *p = NUL;
2779 
2780 				    // The condition is matched with the
2781 				    // actual word, thus must check for the
2782 				    // upper-case letter.
2783 				    if (aff_entry->ae_cond != NULL)
2784 				    {
2785 					char_u	buf[MAXLINELEN];
2786 
2787 					if (has_mbyte)
2788 					{
2789 					    onecap_copy(items[4], buf, TRUE);
2790 					    aff_entry->ae_cond = getroom_save(
2791 								   spin, buf);
2792 					}
2793 					else
2794 					    *aff_entry->ae_cond = c_up;
2795 					if (aff_entry->ae_cond != NULL)
2796 					{
2797 					    sprintf((char *)buf, "^%s",
2798 							  aff_entry->ae_cond);
2799 					    vim_regfree(aff_entry->ae_prog);
2800 					    aff_entry->ae_prog = vim_regcomp(
2801 						    buf, RE_MAGIC + RE_STRING);
2802 					}
2803 				    }
2804 				}
2805 			    }
2806 			}
2807 
2808 			if (aff_entry->ae_chop == NULL
2809 					       && aff_entry->ae_flags == NULL)
2810 			{
2811 			    int		idx;
2812 			    char_u	**pp;
2813 			    int		n;
2814 
2815 			    // Find a previously used condition.
2816 			    for (idx = spin->si_prefcond.ga_len - 1; idx >= 0;
2817 									--idx)
2818 			    {
2819 				p = ((char_u **)spin->si_prefcond.ga_data)[idx];
2820 				if (str_equal(p, aff_entry->ae_cond))
2821 				    break;
2822 			    }
2823 			    if (idx < 0 && ga_grow(&spin->si_prefcond, 1) == OK)
2824 			    {
2825 				// Not found, add a new condition.
2826 				idx = spin->si_prefcond.ga_len++;
2827 				pp = ((char_u **)spin->si_prefcond.ga_data)
2828 									+ idx;
2829 				if (aff_entry->ae_cond == NULL)
2830 				    *pp = NULL;
2831 				else
2832 				    *pp = getroom_save(spin,
2833 							  aff_entry->ae_cond);
2834 			    }
2835 
2836 			    // Add the prefix to the prefix tree.
2837 			    if (aff_entry->ae_add == NULL)
2838 				p = (char_u *)"";
2839 			    else
2840 				p = aff_entry->ae_add;
2841 
2842 			    // PFX_FLAGS is a negative number, so that
2843 			    // tree_add_word() knows this is the prefix tree.
2844 			    n = PFX_FLAGS;
2845 			    if (!cur_aff->ah_combine)
2846 				n |= WFP_NC;
2847 			    if (upper)
2848 				n |= WFP_UP;
2849 			    if (aff_entry->ae_comppermit)
2850 				n |= WFP_COMPPERMIT;
2851 			    if (aff_entry->ae_compforbid)
2852 				n |= WFP_COMPFORBID;
2853 			    tree_add_word(spin, p, spin->si_prefroot, n,
2854 						      idx, cur_aff->ah_newID);
2855 			    did_postpone_prefix = TRUE;
2856 			}
2857 
2858 			// Didn't actually use ah_newID, backup si_newprefID.
2859 			if (aff_todo == 0 && !did_postpone_prefix)
2860 			{
2861 			    --spin->si_newprefID;
2862 			    cur_aff->ah_newID = 0;
2863 			}
2864 		    }
2865 		}
2866 	    }
2867 	    else if (is_aff_rule(items, itemcnt, "FOL", 2) && fol == NULL)
2868 	    {
2869 		fol = vim_strsave(items[1]);
2870 	    }
2871 	    else if (is_aff_rule(items, itemcnt, "LOW", 2) && low == NULL)
2872 	    {
2873 		low = vim_strsave(items[1]);
2874 	    }
2875 	    else if (is_aff_rule(items, itemcnt, "UPP", 2) && upp == NULL)
2876 	    {
2877 		upp = vim_strsave(items[1]);
2878 	    }
2879 	    else if (is_aff_rule(items, itemcnt, "REP", 2)
2880 		     || is_aff_rule(items, itemcnt, "REPSAL", 2))
2881 	    {
2882 		// Ignore REP/REPSAL count
2883 		if (!isdigit(*items[1]))
2884 		    smsg(_("Expected REP(SAL) count in %s line %d"),
2885 								 fname, lnum);
2886 	    }
2887 	    else if ((STRCMP(items[0], "REP") == 0
2888 			|| STRCMP(items[0], "REPSAL") == 0)
2889 		    && itemcnt >= 3)
2890 	    {
2891 		// REP/REPSAL item
2892 		// Myspell ignores extra arguments, we require it starts with
2893 		// # to detect mistakes.
2894 		if (itemcnt > 3 && items[3][0] != '#')
2895 		    smsg(_(e_afftrailing), fname, lnum, items[3]);
2896 		if (items[0][3] == 'S' ? do_repsal : do_rep)
2897 		{
2898 		    // Replace underscore with space (can't include a space
2899 		    // directly).
2900 		    for (p = items[1]; *p != NUL; MB_PTR_ADV(p))
2901 			if (*p == '_')
2902 			    *p = ' ';
2903 		    for (p = items[2]; *p != NUL; MB_PTR_ADV(p))
2904 			if (*p == '_')
2905 			    *p = ' ';
2906 		    add_fromto(spin, items[0][3] == 'S'
2907 					 ? &spin->si_repsal
2908 					 : &spin->si_rep, items[1], items[2]);
2909 		}
2910 	    }
2911 	    else if (is_aff_rule(items, itemcnt, "MAP", 2))
2912 	    {
2913 		// MAP item or count
2914 		if (!found_map)
2915 		{
2916 		    // First line contains the count.
2917 		    found_map = TRUE;
2918 		    if (!isdigit(*items[1]))
2919 			smsg(_("Expected MAP count in %s line %d"),
2920 								 fname, lnum);
2921 		}
2922 		else if (do_mapline)
2923 		{
2924 		    int		c;
2925 
2926 		    // Check that every character appears only once.
2927 		    for (p = items[1]; *p != NUL; )
2928 		    {
2929 			c = mb_ptr2char_adv(&p);
2930 			if ((spin->si_map.ga_len > 0
2931 				    && vim_strchr(spin->si_map.ga_data, c)
2932 								      != NULL)
2933 				|| vim_strchr(p, c) != NULL)
2934 			    smsg(_("Duplicate character in MAP in %s line %d"),
2935 								 fname, lnum);
2936 		    }
2937 
2938 		    // We simply concatenate all the MAP strings, separated by
2939 		    // slashes.
2940 		    ga_concat(&spin->si_map, items[1]);
2941 		    ga_append(&spin->si_map, '/');
2942 		}
2943 	    }
2944 	    // Accept "SAL from to" and "SAL from to  #comment".
2945 	    else if (is_aff_rule(items, itemcnt, "SAL", 3))
2946 	    {
2947 		if (do_sal)
2948 		{
2949 		    // SAL item (sounds-a-like)
2950 		    // Either one of the known keys or a from-to pair.
2951 		    if (STRCMP(items[1], "followup") == 0)
2952 			spin->si_followup = sal_to_bool(items[2]);
2953 		    else if (STRCMP(items[1], "collapse_result") == 0)
2954 			spin->si_collapse = sal_to_bool(items[2]);
2955 		    else if (STRCMP(items[1], "remove_accents") == 0)
2956 			spin->si_rem_accents = sal_to_bool(items[2]);
2957 		    else
2958 			// when "to" is "_" it means empty
2959 			add_fromto(spin, &spin->si_sal, items[1],
2960 				     STRCMP(items[2], "_") == 0 ? (char_u *)""
2961 								: items[2]);
2962 		}
2963 	    }
2964 	    else if (is_aff_rule(items, itemcnt, "SOFOFROM", 2)
2965 							  && sofofrom == NULL)
2966 	    {
2967 		sofofrom = getroom_save(spin, items[1]);
2968 	    }
2969 	    else if (is_aff_rule(items, itemcnt, "SOFOTO", 2)
2970 							    && sofoto == NULL)
2971 	    {
2972 		sofoto = getroom_save(spin, items[1]);
2973 	    }
2974 	    else if (STRCMP(items[0], "COMMON") == 0)
2975 	    {
2976 		int	i;
2977 
2978 		for (i = 1; i < itemcnt; ++i)
2979 		{
2980 		    if (HASHITEM_EMPTY(hash_find(&spin->si_commonwords,
2981 								   items[i])))
2982 		    {
2983 			p = vim_strsave(items[i]);
2984 			if (p == NULL)
2985 			    break;
2986 			hash_add(&spin->si_commonwords, p);
2987 		    }
2988 		}
2989 	    }
2990 	    else
2991 		smsg(_("Unrecognized or duplicate item in %s line %d: %s"),
2992 						       fname, lnum, items[0]);
2993 	}
2994     }
2995 
2996     if (fol != NULL || low != NULL || upp != NULL)
2997     {
2998 	if (spin->si_clear_chartab)
2999 	{
3000 	    // Clear the char type tables, don't want to use any of the
3001 	    // currently used spell properties.
3002 	    init_spell_chartab();
3003 	    spin->si_clear_chartab = FALSE;
3004 	}
3005 
3006 	/*
3007 	 * Don't write a word table for an ASCII file, so that we don't check
3008 	 * for conflicts with a word table that matches 'encoding'.
3009 	 * Don't write one for utf-8 either, we use utf_*() and
3010 	 * mb_get_class(), the list of chars in the file will be incomplete.
3011 	 */
3012 	if (!spin->si_ascii && !enc_utf8)
3013 	{
3014 	    if (fol == NULL || low == NULL || upp == NULL)
3015 		smsg(_("Missing FOL/LOW/UPP line in %s"), fname);
3016 	    else
3017 		(void)set_spell_chartab(fol, low, upp);
3018 	}
3019 
3020 	vim_free(fol);
3021 	vim_free(low);
3022 	vim_free(upp);
3023     }
3024 
3025     // Use compound specifications of the .aff file for the spell info.
3026     if (compmax != 0)
3027     {
3028 	aff_check_number(spin->si_compmax, compmax, "COMPOUNDWORDMAX");
3029 	spin->si_compmax = compmax;
3030     }
3031 
3032     if (compminlen != 0)
3033     {
3034 	aff_check_number(spin->si_compminlen, compminlen, "COMPOUNDMIN");
3035 	spin->si_compminlen = compminlen;
3036     }
3037 
3038     if (compsylmax != 0)
3039     {
3040 	if (syllable == NULL)
3041 	    smsg(_("COMPOUNDSYLMAX used without SYLLABLE"));
3042 	aff_check_number(spin->si_compsylmax, compsylmax, "COMPOUNDSYLMAX");
3043 	spin->si_compsylmax = compsylmax;
3044     }
3045 
3046     if (compoptions != 0)
3047     {
3048 	aff_check_number(spin->si_compoptions, compoptions, "COMPOUND options");
3049 	spin->si_compoptions |= compoptions;
3050     }
3051 
3052     if (compflags != NULL)
3053 	process_compflags(spin, aff, compflags);
3054 
3055     // Check that we didn't use too many renumbered flags.
3056     if (spin->si_newcompID < spin->si_newprefID)
3057     {
3058 	if (spin->si_newcompID == 127 || spin->si_newcompID == 255)
3059 	    msg(_("Too many postponed prefixes"));
3060 	else if (spin->si_newprefID == 0 || spin->si_newprefID == 127)
3061 	    msg(_("Too many compound flags"));
3062 	else
3063 	    msg(_("Too many postponed prefixes and/or compound flags"));
3064     }
3065 
3066     if (syllable != NULL)
3067     {
3068 	aff_check_string(spin->si_syllable, syllable, "SYLLABLE");
3069 	spin->si_syllable = syllable;
3070     }
3071 
3072     if (sofofrom != NULL || sofoto != NULL)
3073     {
3074 	if (sofofrom == NULL || sofoto == NULL)
3075 	    smsg(_("Missing SOFO%s line in %s"),
3076 				     sofofrom == NULL ? "FROM" : "TO", fname);
3077 	else if (spin->si_sal.ga_len > 0)
3078 	    smsg(_("Both SAL and SOFO lines in %s"), fname);
3079 	else
3080 	{
3081 	    aff_check_string(spin->si_sofofr, sofofrom, "SOFOFROM");
3082 	    aff_check_string(spin->si_sofoto, sofoto, "SOFOTO");
3083 	    spin->si_sofofr = sofofrom;
3084 	    spin->si_sofoto = sofoto;
3085 	}
3086     }
3087 
3088     if (midword != NULL)
3089     {
3090 	aff_check_string(spin->si_midword, midword, "MIDWORD");
3091 	spin->si_midword = midword;
3092     }
3093 
3094     vim_free(pc);
3095     fclose(fd);
3096     return aff;
3097 }
3098 
3099 /*
3100  * Return TRUE when items[0] equals "rulename", there are "mincount" items or
3101  * a comment is following after item "mincount".
3102  */
3103     static int
3104 is_aff_rule(
3105     char_u	**items,
3106     int		itemcnt,
3107     char	*rulename,
3108     int		mincount)
3109 {
3110     return (STRCMP(items[0], rulename) == 0
3111 	    && (itemcnt == mincount
3112 		|| (itemcnt > mincount && items[mincount][0] == '#')));
3113 }
3114 
3115 /*
3116  * For affix "entry" move COMPOUNDFORBIDFLAG and COMPOUNDPERMITFLAG from
3117  * ae_flags to ae_comppermit and ae_compforbid.
3118  */
3119     static void
3120 aff_process_flags(afffile_T *affile, affentry_T *entry)
3121 {
3122     char_u	*p;
3123     char_u	*prevp;
3124     unsigned	flag;
3125 
3126     if (entry->ae_flags != NULL
3127 		&& (affile->af_compforbid != 0 || affile->af_comppermit != 0))
3128     {
3129 	for (p = entry->ae_flags; *p != NUL; )
3130 	{
3131 	    prevp = p;
3132 	    flag = get_affitem(affile->af_flagtype, &p);
3133 	    if (flag == affile->af_comppermit || flag == affile->af_compforbid)
3134 	    {
3135 		STRMOVE(prevp, p);
3136 		p = prevp;
3137 		if (flag == affile->af_comppermit)
3138 		    entry->ae_comppermit = TRUE;
3139 		else
3140 		    entry->ae_compforbid = TRUE;
3141 	    }
3142 	    if (affile->af_flagtype == AFT_NUM && *p == ',')
3143 		++p;
3144 	}
3145 	if (*entry->ae_flags == NUL)
3146 	    entry->ae_flags = NULL;	// nothing left
3147     }
3148 }
3149 
3150 /*
3151  * Return TRUE if "s" is the name of an info item in the affix file.
3152  */
3153     static int
3154 spell_info_item(char_u *s)
3155 {
3156     return STRCMP(s, "NAME") == 0
3157 	|| STRCMP(s, "HOME") == 0
3158 	|| STRCMP(s, "VERSION") == 0
3159 	|| STRCMP(s, "AUTHOR") == 0
3160 	|| STRCMP(s, "EMAIL") == 0
3161 	|| STRCMP(s, "COPYRIGHT") == 0;
3162 }
3163 
3164 /*
3165  * Turn an affix flag name into a number, according to the FLAG type.
3166  * returns zero for failure.
3167  */
3168     static unsigned
3169 affitem2flag(
3170     int		flagtype,
3171     char_u	*item,
3172     char_u	*fname,
3173     int		lnum)
3174 {
3175     unsigned	res;
3176     char_u	*p = item;
3177 
3178     res = get_affitem(flagtype, &p);
3179     if (res == 0)
3180     {
3181 	if (flagtype == AFT_NUM)
3182 	    smsg(_("Flag is not a number in %s line %d: %s"),
3183 							   fname, lnum, item);
3184 	else
3185 	    smsg(_("Illegal flag in %s line %d: %s"),
3186 							   fname, lnum, item);
3187     }
3188     if (*p != NUL)
3189     {
3190 	smsg(_(e_affname), fname, lnum, item);
3191 	return 0;
3192     }
3193 
3194     return res;
3195 }
3196 
3197 /*
3198  * Get one affix name from "*pp" and advance the pointer.
3199  * Returns ZERO_FLAG for "0".
3200  * Returns zero for an error, still advances the pointer then.
3201  */
3202     static unsigned
3203 get_affitem(int flagtype, char_u **pp)
3204 {
3205     int		res;
3206 
3207     if (flagtype == AFT_NUM)
3208     {
3209 	if (!VIM_ISDIGIT(**pp))
3210 	{
3211 	    ++*pp;	// always advance, avoid getting stuck
3212 	    return 0;
3213 	}
3214 	res = getdigits(pp);
3215 	if (res == 0)
3216 	    res = ZERO_FLAG;
3217     }
3218     else
3219     {
3220 	res = mb_ptr2char_adv(pp);
3221 	if (flagtype == AFT_LONG || (flagtype == AFT_CAPLONG
3222 						 && res >= 'A' && res <= 'Z'))
3223 	{
3224 	    if (**pp == NUL)
3225 		return 0;
3226 	    res = mb_ptr2char_adv(pp) + (res << 16);
3227 	}
3228     }
3229     return res;
3230 }
3231 
3232 /*
3233  * Process the "compflags" string used in an affix file and append it to
3234  * spin->si_compflags.
3235  * The processing involves changing the affix names to ID numbers, so that
3236  * they fit in one byte.
3237  */
3238     static void
3239 process_compflags(
3240     spellinfo_T	*spin,
3241     afffile_T	*aff,
3242     char_u	*compflags)
3243 {
3244     char_u	*p;
3245     char_u	*prevp;
3246     unsigned	flag;
3247     compitem_T	*ci;
3248     int		id;
3249     int		len;
3250     char_u	*tp;
3251     char_u	key[AH_KEY_LEN];
3252     hashitem_T	*hi;
3253 
3254     // Make room for the old and the new compflags, concatenated with a / in
3255     // between.  Processing it makes it shorter, but we don't know by how
3256     // much, thus allocate the maximum.
3257     len = (int)STRLEN(compflags) + 1;
3258     if (spin->si_compflags != NULL)
3259 	len += (int)STRLEN(spin->si_compflags) + 1;
3260     p = getroom(spin, len, FALSE);
3261     if (p == NULL)
3262 	return;
3263     if (spin->si_compflags != NULL)
3264     {
3265 	STRCPY(p, spin->si_compflags);
3266 	STRCAT(p, "/");
3267     }
3268     spin->si_compflags = p;
3269     tp = p + STRLEN(p);
3270 
3271     for (p = compflags; *p != NUL; )
3272     {
3273 	if (vim_strchr((char_u *)"/?*+[]", *p) != NULL)
3274 	    // Copy non-flag characters directly.
3275 	    *tp++ = *p++;
3276 	else
3277 	{
3278 	    // First get the flag number, also checks validity.
3279 	    prevp = p;
3280 	    flag = get_affitem(aff->af_flagtype, &p);
3281 	    if (flag != 0)
3282 	    {
3283 		// Find the flag in the hashtable.  If it was used before, use
3284 		// the existing ID.  Otherwise add a new entry.
3285 		vim_strncpy(key, prevp, p - prevp);
3286 		hi = hash_find(&aff->af_comp, key);
3287 		if (!HASHITEM_EMPTY(hi))
3288 		    id = HI2CI(hi)->ci_newID;
3289 		else
3290 		{
3291 		    ci = (compitem_T *)getroom(spin, sizeof(compitem_T), TRUE);
3292 		    if (ci == NULL)
3293 			break;
3294 		    STRCPY(ci->ci_key, key);
3295 		    ci->ci_flag = flag;
3296 		    // Avoid using a flag ID that has a special meaning in a
3297 		    // regexp (also inside []).
3298 		    do
3299 		    {
3300 			check_renumber(spin);
3301 			id = spin->si_newcompID--;
3302 		    } while (vim_strchr((char_u *)"/?*+[]\\-^", id) != NULL);
3303 		    ci->ci_newID = id;
3304 		    hash_add(&aff->af_comp, ci->ci_key);
3305 		}
3306 		*tp++ = id;
3307 	    }
3308 	    if (aff->af_flagtype == AFT_NUM && *p == ',')
3309 		++p;
3310 	}
3311     }
3312 
3313     *tp = NUL;
3314 }
3315 
3316 /*
3317  * Check that the new IDs for postponed affixes and compounding don't overrun
3318  * each other.  We have almost 255 available, but start at 0-127 to avoid
3319  * using two bytes for utf-8.  When the 0-127 range is used up go to 128-255.
3320  * When that is used up an error message is given.
3321  */
3322     static void
3323 check_renumber(spellinfo_T *spin)
3324 {
3325     if (spin->si_newprefID == spin->si_newcompID && spin->si_newcompID < 128)
3326     {
3327 	spin->si_newprefID = 127;
3328 	spin->si_newcompID = 255;
3329     }
3330 }
3331 
3332 /*
3333  * Return TRUE if flag "flag" appears in affix list "afflist".
3334  */
3335     static int
3336 flag_in_afflist(int flagtype, char_u *afflist, unsigned flag)
3337 {
3338     char_u	*p;
3339     unsigned	n;
3340 
3341     switch (flagtype)
3342     {
3343 	case AFT_CHAR:
3344 	    return vim_strchr(afflist, flag) != NULL;
3345 
3346 	case AFT_CAPLONG:
3347 	case AFT_LONG:
3348 	    for (p = afflist; *p != NUL; )
3349 	    {
3350 		n = mb_ptr2char_adv(&p);
3351 		if ((flagtype == AFT_LONG || (n >= 'A' && n <= 'Z'))
3352 								 && *p != NUL)
3353 		    n = mb_ptr2char_adv(&p) + (n << 16);
3354 		if (n == flag)
3355 		    return TRUE;
3356 	    }
3357 	    break;
3358 
3359 	case AFT_NUM:
3360 	    for (p = afflist; *p != NUL; )
3361 	    {
3362 		n = getdigits(&p);
3363 		if (n == 0)
3364 		    n = ZERO_FLAG;
3365 		if (n == flag)
3366 		    return TRUE;
3367 		if (*p != NUL)	// skip over comma
3368 		    ++p;
3369 	    }
3370 	    break;
3371     }
3372     return FALSE;
3373 }
3374 
3375 /*
3376  * Give a warning when "spinval" and "affval" numbers are set and not the same.
3377  */
3378     static void
3379 aff_check_number(int spinval, int affval, char *name)
3380 {
3381     if (spinval != 0 && spinval != affval)
3382 	smsg(_("%s value differs from what is used in another .aff file"), name);
3383 }
3384 
3385 /*
3386  * Give a warning when "spinval" and "affval" strings are set and not the same.
3387  */
3388     static void
3389 aff_check_string(char_u *spinval, char_u *affval, char *name)
3390 {
3391     if (spinval != NULL && STRCMP(spinval, affval) != 0)
3392 	smsg(_("%s value differs from what is used in another .aff file"), name);
3393 }
3394 
3395 /*
3396  * Return TRUE if strings "s1" and "s2" are equal.  Also consider both being
3397  * NULL as equal.
3398  */
3399     static int
3400 str_equal(char_u *s1, char_u *s2)
3401 {
3402     if (s1 == NULL || s2 == NULL)
3403 	return s1 == s2;
3404     return STRCMP(s1, s2) == 0;
3405 }
3406 
3407 /*
3408  * Add a from-to item to "gap".  Used for REP and SAL items.
3409  * They are stored case-folded.
3410  */
3411     static void
3412 add_fromto(
3413     spellinfo_T	*spin,
3414     garray_T	*gap,
3415     char_u	*from,
3416     char_u	*to)
3417 {
3418     fromto_T	*ftp;
3419     char_u	word[MAXWLEN];
3420 
3421     if (ga_grow(gap, 1) == OK)
3422     {
3423 	ftp = ((fromto_T *)gap->ga_data) + gap->ga_len;
3424 	(void)spell_casefold(from, (int)STRLEN(from), word, MAXWLEN);
3425 	ftp->ft_from = getroom_save(spin, word);
3426 	(void)spell_casefold(to, (int)STRLEN(to), word, MAXWLEN);
3427 	ftp->ft_to = getroom_save(spin, word);
3428 	++gap->ga_len;
3429     }
3430 }
3431 
3432 /*
3433  * Convert a boolean argument in a SAL line to TRUE or FALSE;
3434  */
3435     static int
3436 sal_to_bool(char_u *s)
3437 {
3438     return STRCMP(s, "1") == 0 || STRCMP(s, "true") == 0;
3439 }
3440 
3441 /*
3442  * Free the structure filled by spell_read_aff().
3443  */
3444     static void
3445 spell_free_aff(afffile_T *aff)
3446 {
3447     hashtab_T	*ht;
3448     hashitem_T	*hi;
3449     int		todo;
3450     affheader_T	*ah;
3451     affentry_T	*ae;
3452 
3453     vim_free(aff->af_enc);
3454 
3455     // All this trouble to free the "ae_prog" items...
3456     for (ht = &aff->af_pref; ; ht = &aff->af_suff)
3457     {
3458 	todo = (int)ht->ht_used;
3459 	for (hi = ht->ht_array; todo > 0; ++hi)
3460 	{
3461 	    if (!HASHITEM_EMPTY(hi))
3462 	    {
3463 		--todo;
3464 		ah = HI2AH(hi);
3465 		for (ae = ah->ah_first; ae != NULL; ae = ae->ae_next)
3466 		    vim_regfree(ae->ae_prog);
3467 	    }
3468 	}
3469 	if (ht == &aff->af_suff)
3470 	    break;
3471     }
3472 
3473     hash_clear(&aff->af_pref);
3474     hash_clear(&aff->af_suff);
3475     hash_clear(&aff->af_comp);
3476 }
3477 
3478 /*
3479  * Read dictionary file "fname".
3480  * Returns OK or FAIL;
3481  */
3482     static int
3483 spell_read_dic(spellinfo_T *spin, char_u *fname, afffile_T *affile)
3484 {
3485     hashtab_T	ht;
3486     char_u	line[MAXLINELEN];
3487     char_u	*p;
3488     char_u	*afflist;
3489     char_u	store_afflist[MAXWLEN];
3490     int		pfxlen;
3491     int		need_affix;
3492     char_u	*dw;
3493     char_u	*pc;
3494     char_u	*w;
3495     int		l;
3496     hash_T	hash;
3497     hashitem_T	*hi;
3498     FILE	*fd;
3499     int		lnum = 1;
3500     int		non_ascii = 0;
3501     int		retval = OK;
3502     char_u	message[MAXLINELEN + MAXWLEN];
3503     int		flags;
3504     int		duplicate = 0;
3505     time_T	last_msg_time = 0;
3506 
3507     /*
3508      * Open the file.
3509      */
3510     fd = mch_fopen((char *)fname, "r");
3511     if (fd == NULL)
3512     {
3513 	semsg(_(e_notopen), fname);
3514 	return FAIL;
3515     }
3516 
3517     // The hashtable is only used to detect duplicated words.
3518     hash_init(&ht);
3519 
3520     vim_snprintf((char *)IObuff, IOSIZE,
3521 				  _("Reading dictionary file %s..."), fname);
3522     spell_message(spin, IObuff);
3523 
3524     // start with a message for the first line
3525     spin->si_msg_count = 999999;
3526 
3527     // Read and ignore the first line: word count.
3528     (void)vim_fgets(line, MAXLINELEN, fd);
3529     if (!vim_isdigit(*skipwhite(line)))
3530 	semsg(_("E760: No word count in %s"), fname);
3531 
3532     /*
3533      * Read all the lines in the file one by one.
3534      * The words are converted to 'encoding' here, before being added to
3535      * the hashtable.
3536      */
3537     while (!vim_fgets(line, MAXLINELEN, fd) && !got_int)
3538     {
3539 	line_breakcheck();
3540 	++lnum;
3541 	if (line[0] == '#' || line[0] == '/')
3542 	    continue;	// comment line
3543 
3544 	// Remove CR, LF and white space from the end.  White space halfway
3545 	// the word is kept to allow e.g., "et al.".
3546 	l = (int)STRLEN(line);
3547 	while (l > 0 && line[l - 1] <= ' ')
3548 	    --l;
3549 	if (l == 0)
3550 	    continue;	// empty line
3551 	line[l] = NUL;
3552 
3553 	// Convert from "SET" to 'encoding' when needed.
3554 	if (spin->si_conv.vc_type != CONV_NONE)
3555 	{
3556 	    pc = string_convert(&spin->si_conv, line, NULL);
3557 	    if (pc == NULL)
3558 	    {
3559 		smsg(_("Conversion failure for word in %s line %d: %s"),
3560 						       fname, lnum, line);
3561 		continue;
3562 	    }
3563 	    w = pc;
3564 	}
3565 	else
3566 	{
3567 	    pc = NULL;
3568 	    w = line;
3569 	}
3570 
3571 	// Truncate the word at the "/", set "afflist" to what follows.
3572 	// Replace "\/" by "/" and "\\" by "\".
3573 	afflist = NULL;
3574 	for (p = w; *p != NUL; MB_PTR_ADV(p))
3575 	{
3576 	    if (*p == '\\' && (p[1] == '\\' || p[1] == '/'))
3577 		STRMOVE(p, p + 1);
3578 	    else if (*p == '/')
3579 	    {
3580 		*p = NUL;
3581 		afflist = p + 1;
3582 		break;
3583 	    }
3584 	}
3585 
3586 	// Skip non-ASCII words when "spin->si_ascii" is TRUE.
3587 	if (spin->si_ascii && has_non_ascii(w))
3588 	{
3589 	    ++non_ascii;
3590 	    vim_free(pc);
3591 	    continue;
3592 	}
3593 
3594 	// This takes time, print a message every 10000 words, but not more
3595 	// often than once per second.
3596 	if (spin->si_verbose && spin->si_msg_count > 10000)
3597 	{
3598 	    spin->si_msg_count = 0;
3599 	    if (vim_time() > last_msg_time)
3600 	    {
3601 		last_msg_time = vim_time();
3602 		vim_snprintf((char *)message, sizeof(message),
3603 			_("line %6d, word %6ld - %s"),
3604 			   lnum, spin->si_foldwcount + spin->si_keepwcount, w);
3605 		msg_start();
3606 		msg_outtrans_long_attr(message, 0);
3607 		msg_clr_eos();
3608 		msg_didout = FALSE;
3609 		msg_col = 0;
3610 		out_flush();
3611 	    }
3612 	}
3613 
3614 	// Store the word in the hashtable to be able to find duplicates.
3615 	dw = (char_u *)getroom_save(spin, w);
3616 	if (dw == NULL)
3617 	{
3618 	    retval = FAIL;
3619 	    vim_free(pc);
3620 	    break;
3621 	}
3622 
3623 	hash = hash_hash(dw);
3624 	hi = hash_lookup(&ht, dw, hash);
3625 	if (!HASHITEM_EMPTY(hi))
3626 	{
3627 	    if (p_verbose > 0)
3628 		smsg(_("Duplicate word in %s line %d: %s"),
3629 							     fname, lnum, dw);
3630 	    else if (duplicate == 0)
3631 		smsg(_("First duplicate word in %s line %d: %s"),
3632 							     fname, lnum, dw);
3633 	    ++duplicate;
3634 	}
3635 	else
3636 	    hash_add_item(&ht, hi, dw, hash);
3637 
3638 	flags = 0;
3639 	store_afflist[0] = NUL;
3640 	pfxlen = 0;
3641 	need_affix = FALSE;
3642 	if (afflist != NULL)
3643 	{
3644 	    // Extract flags from the affix list.
3645 	    flags |= get_affix_flags(affile, afflist);
3646 
3647 	    if (affile->af_needaffix != 0 && flag_in_afflist(
3648 			  affile->af_flagtype, afflist, affile->af_needaffix))
3649 		need_affix = TRUE;
3650 
3651 	    if (affile->af_pfxpostpone)
3652 		// Need to store the list of prefix IDs with the word.
3653 		pfxlen = get_pfxlist(affile, afflist, store_afflist);
3654 
3655 	    if (spin->si_compflags != NULL)
3656 		// Need to store the list of compound flags with the word.
3657 		// Concatenate them to the list of prefix IDs.
3658 		get_compflags(affile, afflist, store_afflist + pfxlen);
3659 	}
3660 
3661 	// Add the word to the word tree(s).
3662 	if (store_word(spin, dw, flags, spin->si_region,
3663 					   store_afflist, need_affix) == FAIL)
3664 	    retval = FAIL;
3665 
3666 	if (afflist != NULL)
3667 	{
3668 	    // Find all matching suffixes and add the resulting words.
3669 	    // Additionally do matching prefixes that combine.
3670 	    if (store_aff_word(spin, dw, afflist, affile,
3671 			   &affile->af_suff, &affile->af_pref,
3672 			    CONDIT_SUF, flags, store_afflist, pfxlen) == FAIL)
3673 		retval = FAIL;
3674 
3675 	    // Find all matching prefixes and add the resulting words.
3676 	    if (store_aff_word(spin, dw, afflist, affile,
3677 			  &affile->af_pref, NULL,
3678 			    CONDIT_SUF, flags, store_afflist, pfxlen) == FAIL)
3679 		retval = FAIL;
3680 	}
3681 
3682 	vim_free(pc);
3683     }
3684 
3685     if (duplicate > 0)
3686 	smsg(_("%d duplicate word(s) in %s"), duplicate, fname);
3687     if (spin->si_ascii && non_ascii > 0)
3688 	smsg(_("Ignored %d word(s) with non-ASCII characters in %s"),
3689 							    non_ascii, fname);
3690     hash_clear(&ht);
3691 
3692     fclose(fd);
3693     return retval;
3694 }
3695 
3696 /*
3697  * Check for affix flags in "afflist" that are turned into word flags.
3698  * Return WF_ flags.
3699  */
3700     static int
3701 get_affix_flags(afffile_T *affile, char_u *afflist)
3702 {
3703     int		flags = 0;
3704 
3705     if (affile->af_keepcase != 0 && flag_in_afflist(
3706 			   affile->af_flagtype, afflist, affile->af_keepcase))
3707 	flags |= WF_KEEPCAP | WF_FIXCAP;
3708     if (affile->af_rare != 0 && flag_in_afflist(
3709 			       affile->af_flagtype, afflist, affile->af_rare))
3710 	flags |= WF_RARE;
3711     if (affile->af_bad != 0 && flag_in_afflist(
3712 				affile->af_flagtype, afflist, affile->af_bad))
3713 	flags |= WF_BANNED;
3714     if (affile->af_needcomp != 0 && flag_in_afflist(
3715 			   affile->af_flagtype, afflist, affile->af_needcomp))
3716 	flags |= WF_NEEDCOMP;
3717     if (affile->af_comproot != 0 && flag_in_afflist(
3718 			   affile->af_flagtype, afflist, affile->af_comproot))
3719 	flags |= WF_COMPROOT;
3720     if (affile->af_nosuggest != 0 && flag_in_afflist(
3721 			  affile->af_flagtype, afflist, affile->af_nosuggest))
3722 	flags |= WF_NOSUGGEST;
3723     return flags;
3724 }
3725 
3726 /*
3727  * Get the list of prefix IDs from the affix list "afflist".
3728  * Used for PFXPOSTPONE.
3729  * Put the resulting flags in "store_afflist[MAXWLEN]" with a terminating NUL
3730  * and return the number of affixes.
3731  */
3732     static int
3733 get_pfxlist(
3734     afffile_T	*affile,
3735     char_u	*afflist,
3736     char_u	*store_afflist)
3737 {
3738     char_u	*p;
3739     char_u	*prevp;
3740     int		cnt = 0;
3741     int		id;
3742     char_u	key[AH_KEY_LEN];
3743     hashitem_T	*hi;
3744 
3745     for (p = afflist; *p != NUL; )
3746     {
3747 	prevp = p;
3748 	if (get_affitem(affile->af_flagtype, &p) != 0)
3749 	{
3750 	    // A flag is a postponed prefix flag if it appears in "af_pref"
3751 	    // and its ID is not zero.
3752 	    vim_strncpy(key, prevp, p - prevp);
3753 	    hi = hash_find(&affile->af_pref, key);
3754 	    if (!HASHITEM_EMPTY(hi))
3755 	    {
3756 		id = HI2AH(hi)->ah_newID;
3757 		if (id != 0)
3758 		    store_afflist[cnt++] = id;
3759 	    }
3760 	}
3761 	if (affile->af_flagtype == AFT_NUM && *p == ',')
3762 	    ++p;
3763     }
3764 
3765     store_afflist[cnt] = NUL;
3766     return cnt;
3767 }
3768 
3769 /*
3770  * Get the list of compound IDs from the affix list "afflist" that are used
3771  * for compound words.
3772  * Puts the flags in "store_afflist[]".
3773  */
3774     static void
3775 get_compflags(
3776     afffile_T	*affile,
3777     char_u	*afflist,
3778     char_u	*store_afflist)
3779 {
3780     char_u	*p;
3781     char_u	*prevp;
3782     int		cnt = 0;
3783     char_u	key[AH_KEY_LEN];
3784     hashitem_T	*hi;
3785 
3786     for (p = afflist; *p != NUL; )
3787     {
3788 	prevp = p;
3789 	if (get_affitem(affile->af_flagtype, &p) != 0)
3790 	{
3791 	    // A flag is a compound flag if it appears in "af_comp".
3792 	    vim_strncpy(key, prevp, p - prevp);
3793 	    hi = hash_find(&affile->af_comp, key);
3794 	    if (!HASHITEM_EMPTY(hi))
3795 		store_afflist[cnt++] = HI2CI(hi)->ci_newID;
3796 	}
3797 	if (affile->af_flagtype == AFT_NUM && *p == ',')
3798 	    ++p;
3799     }
3800 
3801     store_afflist[cnt] = NUL;
3802 }
3803 
3804 /*
3805  * Apply affixes to a word and store the resulting words.
3806  * "ht" is the hashtable with affentry_T that need to be applied, either
3807  * prefixes or suffixes.
3808  * "xht", when not NULL, is the prefix hashtable, to be used additionally on
3809  * the resulting words for combining affixes.
3810  *
3811  * Returns FAIL when out of memory.
3812  */
3813     static int
3814 store_aff_word(
3815     spellinfo_T	*spin,		// spell info
3816     char_u	*word,		// basic word start
3817     char_u	*afflist,	// list of names of supported affixes
3818     afffile_T	*affile,
3819     hashtab_T	*ht,
3820     hashtab_T	*xht,
3821     int		condit,		// CONDIT_SUF et al.
3822     int		flags,		// flags for the word
3823     char_u	*pfxlist,	// list of prefix IDs
3824     int		pfxlen)		// nr of flags in "pfxlist" for prefixes, rest
3825 				// is compound flags
3826 {
3827     int		todo;
3828     hashitem_T	*hi;
3829     affheader_T	*ah;
3830     affentry_T	*ae;
3831     char_u	newword[MAXWLEN];
3832     int		retval = OK;
3833     int		i, j;
3834     char_u	*p;
3835     int		use_flags;
3836     char_u	*use_pfxlist;
3837     int		use_pfxlen;
3838     int		need_affix;
3839     char_u	store_afflist[MAXWLEN];
3840     char_u	pfx_pfxlist[MAXWLEN];
3841     size_t	wordlen = STRLEN(word);
3842     int		use_condit;
3843 
3844     todo = (int)ht->ht_used;
3845     for (hi = ht->ht_array; todo > 0 && retval == OK; ++hi)
3846     {
3847 	if (!HASHITEM_EMPTY(hi))
3848 	{
3849 	    --todo;
3850 	    ah = HI2AH(hi);
3851 
3852 	    // Check that the affix combines, if required, and that the word
3853 	    // supports this affix.
3854 	    if (((condit & CONDIT_COMB) == 0 || ah->ah_combine)
3855 		    && flag_in_afflist(affile->af_flagtype, afflist,
3856 								 ah->ah_flag))
3857 	    {
3858 		// Loop over all affix entries with this name.
3859 		for (ae = ah->ah_first; ae != NULL; ae = ae->ae_next)
3860 		{
3861 		    // Check the condition.  It's not logical to match case
3862 		    // here, but it is required for compatibility with
3863 		    // Myspell.
3864 		    // Another requirement from Myspell is that the chop
3865 		    // string is shorter than the word itself.
3866 		    // For prefixes, when "PFXPOSTPONE" was used, only do
3867 		    // prefixes with a chop string and/or flags.
3868 		    // When a previously added affix had CIRCUMFIX this one
3869 		    // must have it too, if it had not then this one must not
3870 		    // have one either.
3871 		    if ((xht != NULL || !affile->af_pfxpostpone
3872 				|| ae->ae_chop != NULL
3873 				|| ae->ae_flags != NULL)
3874 			    && (ae->ae_chop == NULL
3875 				|| STRLEN(ae->ae_chop) < wordlen)
3876 			    && (ae->ae_prog == NULL
3877 				|| vim_regexec_prog(&ae->ae_prog, FALSE,
3878 							    word, (colnr_T)0))
3879 			    && (((condit & CONDIT_CFIX) == 0)
3880 				== ((condit & CONDIT_AFF) == 0
3881 				    || ae->ae_flags == NULL
3882 				    || !flag_in_afflist(affile->af_flagtype,
3883 					ae->ae_flags, affile->af_circumfix))))
3884 		    {
3885 			// Match.  Remove the chop and add the affix.
3886 			if (xht == NULL)
3887 			{
3888 			    // prefix: chop/add at the start of the word
3889 			    if (ae->ae_add == NULL)
3890 				*newword = NUL;
3891 			    else
3892 				vim_strncpy(newword, ae->ae_add, MAXWLEN - 1);
3893 			    p = word;
3894 			    if (ae->ae_chop != NULL)
3895 			    {
3896 				// Skip chop string.
3897 				if (has_mbyte)
3898 				{
3899 				    i = mb_charlen(ae->ae_chop);
3900 				    for ( ; i > 0; --i)
3901 					MB_PTR_ADV(p);
3902 				}
3903 				else
3904 				    p += STRLEN(ae->ae_chop);
3905 			    }
3906 			    STRCAT(newword, p);
3907 			}
3908 			else
3909 			{
3910 			    // suffix: chop/add at the end of the word
3911 			    vim_strncpy(newword, word, MAXWLEN - 1);
3912 			    if (ae->ae_chop != NULL)
3913 			    {
3914 				// Remove chop string.
3915 				p = newword + STRLEN(newword);
3916 				i = (int)MB_CHARLEN(ae->ae_chop);
3917 				for ( ; i > 0; --i)
3918 				    MB_PTR_BACK(newword, p);
3919 				*p = NUL;
3920 			    }
3921 			    if (ae->ae_add != NULL)
3922 				STRCAT(newword, ae->ae_add);
3923 			}
3924 
3925 			use_flags = flags;
3926 			use_pfxlist = pfxlist;
3927 			use_pfxlen = pfxlen;
3928 			need_affix = FALSE;
3929 			use_condit = condit | CONDIT_COMB | CONDIT_AFF;
3930 			if (ae->ae_flags != NULL)
3931 			{
3932 			    // Extract flags from the affix list.
3933 			    use_flags |= get_affix_flags(affile, ae->ae_flags);
3934 
3935 			    if (affile->af_needaffix != 0 && flag_in_afflist(
3936 					affile->af_flagtype, ae->ae_flags,
3937 							affile->af_needaffix))
3938 				need_affix = TRUE;
3939 
3940 			    // When there is a CIRCUMFIX flag the other affix
3941 			    // must also have it and we don't add the word
3942 			    // with one affix.
3943 			    if (affile->af_circumfix != 0 && flag_in_afflist(
3944 					affile->af_flagtype, ae->ae_flags,
3945 							affile->af_circumfix))
3946 			    {
3947 				use_condit |= CONDIT_CFIX;
3948 				if ((condit & CONDIT_CFIX) == 0)
3949 				    need_affix = TRUE;
3950 			    }
3951 
3952 			    if (affile->af_pfxpostpone
3953 						|| spin->si_compflags != NULL)
3954 			    {
3955 				if (affile->af_pfxpostpone)
3956 				    // Get prefix IDS from the affix list.
3957 				    use_pfxlen = get_pfxlist(affile,
3958 						 ae->ae_flags, store_afflist);
3959 				else
3960 				    use_pfxlen = 0;
3961 				use_pfxlist = store_afflist;
3962 
3963 				// Combine the prefix IDs. Avoid adding the
3964 				// same ID twice.
3965 				for (i = 0; i < pfxlen; ++i)
3966 				{
3967 				    for (j = 0; j < use_pfxlen; ++j)
3968 					if (pfxlist[i] == use_pfxlist[j])
3969 					    break;
3970 				    if (j == use_pfxlen)
3971 					use_pfxlist[use_pfxlen++] = pfxlist[i];
3972 				}
3973 
3974 				if (spin->si_compflags != NULL)
3975 				    // Get compound IDS from the affix list.
3976 				    get_compflags(affile, ae->ae_flags,
3977 						  use_pfxlist + use_pfxlen);
3978 
3979 				// Combine the list of compound flags.
3980 				// Concatenate them to the prefix IDs list.
3981 				// Avoid adding the same ID twice.
3982 				for (i = pfxlen; pfxlist[i] != NUL; ++i)
3983 				{
3984 				    for (j = use_pfxlen;
3985 						   use_pfxlist[j] != NUL; ++j)
3986 					if (pfxlist[i] == use_pfxlist[j])
3987 					    break;
3988 				    if (use_pfxlist[j] == NUL)
3989 				    {
3990 					use_pfxlist[j++] = pfxlist[i];
3991 					use_pfxlist[j] = NUL;
3992 				    }
3993 				}
3994 			    }
3995 			}
3996 
3997 			// Obey a "COMPOUNDFORBIDFLAG" of the affix: don't
3998 			// use the compound flags.
3999 			if (use_pfxlist != NULL && ae->ae_compforbid)
4000 			{
4001 			    vim_strncpy(pfx_pfxlist, use_pfxlist, use_pfxlen);
4002 			    use_pfxlist = pfx_pfxlist;
4003 			}
4004 
4005 			// When there are postponed prefixes...
4006 			if (spin->si_prefroot != NULL
4007 				&& spin->si_prefroot->wn_sibling != NULL)
4008 			{
4009 			    // ... add a flag to indicate an affix was used.
4010 			    use_flags |= WF_HAS_AFF;
4011 
4012 			    // ... don't use a prefix list if combining
4013 			    // affixes is not allowed.  But do use the
4014 			    // compound flags after them.
4015 			    if (!ah->ah_combine && use_pfxlist != NULL)
4016 				use_pfxlist += use_pfxlen;
4017 			}
4018 
4019 			// When compounding is supported and there is no
4020 			// "COMPOUNDPERMITFLAG" then forbid compounding on the
4021 			// side where the affix is applied.
4022 			if (spin->si_compflags != NULL && !ae->ae_comppermit)
4023 			{
4024 			    if (xht != NULL)
4025 				use_flags |= WF_NOCOMPAFT;
4026 			    else
4027 				use_flags |= WF_NOCOMPBEF;
4028 			}
4029 
4030 			// Store the modified word.
4031 			if (store_word(spin, newword, use_flags,
4032 						 spin->si_region, use_pfxlist,
4033 							  need_affix) == FAIL)
4034 			    retval = FAIL;
4035 
4036 			// When added a prefix or a first suffix and the affix
4037 			// has flags may add a(nother) suffix.  RECURSIVE!
4038 			if ((condit & CONDIT_SUF) && ae->ae_flags != NULL)
4039 			    if (store_aff_word(spin, newword, ae->ae_flags,
4040 					affile, &affile->af_suff, xht,
4041 					   use_condit & (xht == NULL
4042 							? ~0 :  ~CONDIT_SUF),
4043 				      use_flags, use_pfxlist, pfxlen) == FAIL)
4044 				retval = FAIL;
4045 
4046 			// When added a suffix and combining is allowed also
4047 			// try adding a prefix additionally.  Both for the
4048 			// word flags and for the affix flags.  RECURSIVE!
4049 			if (xht != NULL && ah->ah_combine)
4050 			{
4051 			    if (store_aff_word(spin, newword,
4052 					afflist, affile,
4053 					xht, NULL, use_condit,
4054 					use_flags, use_pfxlist,
4055 					pfxlen) == FAIL
4056 				    || (ae->ae_flags != NULL
4057 					&& store_aff_word(spin, newword,
4058 					    ae->ae_flags, affile,
4059 					    xht, NULL, use_condit,
4060 					    use_flags, use_pfxlist,
4061 					    pfxlen) == FAIL))
4062 				retval = FAIL;
4063 			}
4064 		    }
4065 		}
4066 	    }
4067 	}
4068     }
4069 
4070     return retval;
4071 }
4072 
4073 /*
4074  * Read a file with a list of words.
4075  */
4076     static int
4077 spell_read_wordfile(spellinfo_T *spin, char_u *fname)
4078 {
4079     FILE	*fd;
4080     long	lnum = 0;
4081     char_u	rline[MAXLINELEN];
4082     char_u	*line;
4083     char_u	*pc = NULL;
4084     char_u	*p;
4085     int		l;
4086     int		retval = OK;
4087     int		did_word = FALSE;
4088     int		non_ascii = 0;
4089     int		flags;
4090     int		regionmask;
4091 
4092     /*
4093      * Open the file.
4094      */
4095     fd = mch_fopen((char *)fname, "r");
4096     if (fd == NULL)
4097     {
4098 	semsg(_(e_notopen), fname);
4099 	return FAIL;
4100     }
4101 
4102     vim_snprintf((char *)IObuff, IOSIZE, _("Reading word file %s..."), fname);
4103     spell_message(spin, IObuff);
4104 
4105     /*
4106      * Read all the lines in the file one by one.
4107      */
4108     while (!vim_fgets(rline, MAXLINELEN, fd) && !got_int)
4109     {
4110 	line_breakcheck();
4111 	++lnum;
4112 
4113 	// Skip comment lines.
4114 	if (*rline == '#')
4115 	    continue;
4116 
4117 	// Remove CR, LF and white space from the end.
4118 	l = (int)STRLEN(rline);
4119 	while (l > 0 && rline[l - 1] <= ' ')
4120 	    --l;
4121 	if (l == 0)
4122 	    continue;	// empty or blank line
4123 	rline[l] = NUL;
4124 
4125 	// Convert from "/encoding={encoding}" to 'encoding' when needed.
4126 	vim_free(pc);
4127 	if (spin->si_conv.vc_type != CONV_NONE)
4128 	{
4129 	    pc = string_convert(&spin->si_conv, rline, NULL);
4130 	    if (pc == NULL)
4131 	    {
4132 		smsg(_("Conversion failure for word in %s line %ld: %s"),
4133 							   fname, lnum, rline);
4134 		continue;
4135 	    }
4136 	    line = pc;
4137 	}
4138 	else
4139 	{
4140 	    pc = NULL;
4141 	    line = rline;
4142 	}
4143 
4144 	if (*line == '/')
4145 	{
4146 	    ++line;
4147 	    if (STRNCMP(line, "encoding=", 9) == 0)
4148 	    {
4149 		if (spin->si_conv.vc_type != CONV_NONE)
4150 		    smsg(_("Duplicate /encoding= line ignored in %s line %ld: %s"),
4151 						       fname, lnum, line - 1);
4152 		else if (did_word)
4153 		    smsg(_("/encoding= line after word ignored in %s line %ld: %s"),
4154 						       fname, lnum, line - 1);
4155 		else
4156 		{
4157 		    char_u	*enc;
4158 
4159 		    // Setup for conversion to 'encoding'.
4160 		    line += 9;
4161 		    enc = enc_canonize(line);
4162 		    if (enc != NULL && !spin->si_ascii
4163 			    && convert_setup(&spin->si_conv, enc,
4164 							       p_enc) == FAIL)
4165 			smsg(_("Conversion in %s not supported: from %s to %s"),
4166 							  fname, line, p_enc);
4167 		    vim_free(enc);
4168 		    spin->si_conv.vc_fail = TRUE;
4169 		}
4170 		continue;
4171 	    }
4172 
4173 	    if (STRNCMP(line, "regions=", 8) == 0)
4174 	    {
4175 		if (spin->si_region_count > 1)
4176 		    smsg(_("Duplicate /regions= line ignored in %s line %ld: %s"),
4177 						       fname, lnum, line);
4178 		else
4179 		{
4180 		    line += 8;
4181 		    if (STRLEN(line) > MAXREGIONS * 2)
4182 			smsg(_("Too many regions in %s line %ld: %s"),
4183 						       fname, lnum, line);
4184 		    else
4185 		    {
4186 			spin->si_region_count = (int)STRLEN(line) / 2;
4187 			STRCPY(spin->si_region_name, line);
4188 
4189 			// Adjust the mask for a word valid in all regions.
4190 			spin->si_region = (1 << spin->si_region_count) - 1;
4191 		    }
4192 		}
4193 		continue;
4194 	    }
4195 
4196 	    smsg(_("/ line ignored in %s line %ld: %s"),
4197 						       fname, lnum, line - 1);
4198 	    continue;
4199 	}
4200 
4201 	flags = 0;
4202 	regionmask = spin->si_region;
4203 
4204 	// Check for flags and region after a slash.
4205 	p = vim_strchr(line, '/');
4206 	if (p != NULL)
4207 	{
4208 	    *p++ = NUL;
4209 	    while (*p != NUL)
4210 	    {
4211 		if (*p == '=')		// keep-case word
4212 		    flags |= WF_KEEPCAP | WF_FIXCAP;
4213 		else if (*p == '!')	// Bad, bad, wicked word.
4214 		    flags |= WF_BANNED;
4215 		else if (*p == '?')	// Rare word.
4216 		    flags |= WF_RARE;
4217 		else if (VIM_ISDIGIT(*p)) // region number(s)
4218 		{
4219 		    if ((flags & WF_REGION) == 0)   // first one
4220 			regionmask = 0;
4221 		    flags |= WF_REGION;
4222 
4223 		    l = *p - '0';
4224 		    if (l == 0 || l > spin->si_region_count)
4225 		    {
4226 			smsg(_("Invalid region nr in %s line %ld: %s"),
4227 							  fname, lnum, p);
4228 			break;
4229 		    }
4230 		    regionmask |= 1 << (l - 1);
4231 		}
4232 		else
4233 		{
4234 		    smsg(_("Unrecognized flags in %s line %ld: %s"),
4235 							      fname, lnum, p);
4236 		    break;
4237 		}
4238 		++p;
4239 	    }
4240 	}
4241 
4242 	// Skip non-ASCII words when "spin->si_ascii" is TRUE.
4243 	if (spin->si_ascii && has_non_ascii(line))
4244 	{
4245 	    ++non_ascii;
4246 	    continue;
4247 	}
4248 
4249 	// Normal word: store it.
4250 	if (store_word(spin, line, flags, regionmask, NULL, FALSE) == FAIL)
4251 	{
4252 	    retval = FAIL;
4253 	    break;
4254 	}
4255 	did_word = TRUE;
4256     }
4257 
4258     vim_free(pc);
4259     fclose(fd);
4260 
4261     if (spin->si_ascii && non_ascii > 0)
4262     {
4263 	vim_snprintf((char *)IObuff, IOSIZE,
4264 		  _("Ignored %d words with non-ASCII characters"), non_ascii);
4265 	spell_message(spin, IObuff);
4266     }
4267 
4268     return retval;
4269 }
4270 
4271 /*
4272  * Get part of an sblock_T, "len" bytes long.
4273  * This avoids calling free() for every little struct we use (and keeping
4274  * track of them).
4275  * The memory is cleared to all zeros.
4276  * Returns NULL when out of memory.
4277  */
4278     static void *
4279 getroom(
4280     spellinfo_T *spin,
4281     size_t	len,		// length needed
4282     int		align)		// align for pointer
4283 {
4284     char_u	*p;
4285     sblock_T	*bl = spin->si_blocks;
4286 
4287     if (align && bl != NULL)
4288 	// Round size up for alignment.  On some systems structures need to be
4289 	// aligned to the size of a pointer (e.g., SPARC).
4290 	bl->sb_used = (bl->sb_used + sizeof(char *) - 1)
4291 						      & ~(sizeof(char *) - 1);
4292 
4293     if (bl == NULL || bl->sb_used + len > SBLOCKSIZE)
4294     {
4295 	if (len >= SBLOCKSIZE)
4296 	    bl = NULL;
4297 	else
4298 	    // Allocate a block of memory. It is not freed until much later.
4299 	    bl = alloc_clear(sizeof(sblock_T) + SBLOCKSIZE);
4300 	if (bl == NULL)
4301 	{
4302 	    if (!spin->si_did_emsg)
4303 	    {
4304 		emsg(_("E845: Insufficient memory, word list will be incomplete"));
4305 		spin->si_did_emsg = TRUE;
4306 	    }
4307 	    return NULL;
4308 	}
4309 	bl->sb_next = spin->si_blocks;
4310 	spin->si_blocks = bl;
4311 	bl->sb_used = 0;
4312 	++spin->si_blocks_cnt;
4313     }
4314 
4315     p = bl->sb_data + bl->sb_used;
4316     bl->sb_used += (int)len;
4317 
4318     return p;
4319 }
4320 
4321 /*
4322  * Make a copy of a string into memory allocated with getroom().
4323  * Returns NULL when out of memory.
4324  */
4325     static char_u *
4326 getroom_save(spellinfo_T *spin, char_u *s)
4327 {
4328     char_u	*sc;
4329 
4330     sc = (char_u *)getroom(spin, STRLEN(s) + 1, FALSE);
4331     if (sc != NULL)
4332 	STRCPY(sc, s);
4333     return sc;
4334 }
4335 
4336 
4337 /*
4338  * Free the list of allocated sblock_T.
4339  */
4340     static void
4341 free_blocks(sblock_T *bl)
4342 {
4343     sblock_T	*next;
4344 
4345     while (bl != NULL)
4346     {
4347 	next = bl->sb_next;
4348 	vim_free(bl);
4349 	bl = next;
4350     }
4351 }
4352 
4353 /*
4354  * Allocate the root of a word tree.
4355  * Returns NULL when out of memory.
4356  */
4357     static wordnode_T *
4358 wordtree_alloc(spellinfo_T *spin)
4359 {
4360     return (wordnode_T *)getroom(spin, sizeof(wordnode_T), TRUE);
4361 }
4362 
4363 /*
4364  * Store a word in the tree(s).
4365  * Always store it in the case-folded tree.  For a keep-case word this is
4366  * useful when the word can also be used with all caps (no WF_FIXCAP flag) and
4367  * used to find suggestions.
4368  * For a keep-case word also store it in the keep-case tree.
4369  * When "pfxlist" is not NULL store the word for each postponed prefix ID and
4370  * compound flag.
4371  */
4372     static int
4373 store_word(
4374     spellinfo_T	*spin,
4375     char_u	*word,
4376     int		flags,		// extra flags, WF_BANNED
4377     int		region,		// supported region(s)
4378     char_u	*pfxlist,	// list of prefix IDs or NULL
4379     int		need_affix)	// only store word with affix ID
4380 {
4381     int		len = (int)STRLEN(word);
4382     int		ct = captype(word, word + len);
4383     char_u	foldword[MAXWLEN];
4384     int		res = OK;
4385     char_u	*p;
4386 
4387     (void)spell_casefold(word, len, foldword, MAXWLEN);
4388     for (p = pfxlist; res == OK; ++p)
4389     {
4390 	if (!need_affix || (p != NULL && *p != NUL))
4391 	    res = tree_add_word(spin, foldword, spin->si_foldroot, ct | flags,
4392 						  region, p == NULL ? 0 : *p);
4393 	if (p == NULL || *p == NUL)
4394 	    break;
4395     }
4396     ++spin->si_foldwcount;
4397 
4398     if (res == OK && (ct == WF_KEEPCAP || (flags & WF_KEEPCAP)))
4399     {
4400 	for (p = pfxlist; res == OK; ++p)
4401 	{
4402 	    if (!need_affix || (p != NULL && *p != NUL))
4403 		res = tree_add_word(spin, word, spin->si_keeproot, flags,
4404 						  region, p == NULL ? 0 : *p);
4405 	    if (p == NULL || *p == NUL)
4406 		break;
4407 	}
4408 	++spin->si_keepwcount;
4409     }
4410     return res;
4411 }
4412 
4413 /*
4414  * Add word "word" to a word tree at "root".
4415  * When "flags" < 0 we are adding to the prefix tree where "flags" is used for
4416  * "rare" and "region" is the condition nr.
4417  * Returns FAIL when out of memory.
4418  */
4419     static int
4420 tree_add_word(
4421     spellinfo_T	*spin,
4422     char_u	*word,
4423     wordnode_T	*root,
4424     int		flags,
4425     int		region,
4426     int		affixID)
4427 {
4428     wordnode_T	*node = root;
4429     wordnode_T	*np;
4430     wordnode_T	*copyp, **copyprev;
4431     wordnode_T	**prev = NULL;
4432     int		i;
4433 
4434     // Add each byte of the word to the tree, including the NUL at the end.
4435     for (i = 0; ; ++i)
4436     {
4437 	// When there is more than one reference to this node we need to make
4438 	// a copy, so that we can modify it.  Copy the whole list of siblings
4439 	// (we don't optimize for a partly shared list of siblings).
4440 	if (node != NULL && node->wn_refs > 1)
4441 	{
4442 	    --node->wn_refs;
4443 	    copyprev = prev;
4444 	    FOR_ALL_NODE_SIBLINGS(node, copyp)
4445 	    {
4446 		// Allocate a new node and copy the info.
4447 		np = get_wordnode(spin);
4448 		if (np == NULL)
4449 		    return FAIL;
4450 		np->wn_child = copyp->wn_child;
4451 		if (np->wn_child != NULL)
4452 		    ++np->wn_child->wn_refs;	// child gets extra ref
4453 		np->wn_byte = copyp->wn_byte;
4454 		if (np->wn_byte == NUL)
4455 		{
4456 		    np->wn_flags = copyp->wn_flags;
4457 		    np->wn_region = copyp->wn_region;
4458 		    np->wn_affixID = copyp->wn_affixID;
4459 		}
4460 
4461 		// Link the new node in the list, there will be one ref.
4462 		np->wn_refs = 1;
4463 		if (copyprev != NULL)
4464 		    *copyprev = np;
4465 		copyprev = &np->wn_sibling;
4466 
4467 		// Let "node" point to the head of the copied list.
4468 		if (copyp == node)
4469 		    node = np;
4470 	    }
4471 	}
4472 
4473 	// Look for the sibling that has the same character.  They are sorted
4474 	// on byte value, thus stop searching when a sibling is found with a
4475 	// higher byte value.  For zero bytes (end of word) the sorting is
4476 	// done on flags and then on affixID.
4477 	while (node != NULL
4478 		&& (node->wn_byte < word[i]
4479 		    || (node->wn_byte == NUL
4480 			&& (flags < 0
4481 			    ? node->wn_affixID < (unsigned)affixID
4482 			    : (node->wn_flags < (unsigned)(flags & WN_MASK)
4483 				|| (node->wn_flags == (flags & WN_MASK)
4484 				    && (spin->si_sugtree
4485 					? (node->wn_region & 0xffff) < region
4486 					: node->wn_affixID
4487 						    < (unsigned)affixID)))))))
4488 	{
4489 	    prev = &node->wn_sibling;
4490 	    node = *prev;
4491 	}
4492 	if (node == NULL
4493 		|| node->wn_byte != word[i]
4494 		|| (word[i] == NUL
4495 		    && (flags < 0
4496 			|| spin->si_sugtree
4497 			|| node->wn_flags != (flags & WN_MASK)
4498 			|| node->wn_affixID != affixID)))
4499 	{
4500 	    // Allocate a new node.
4501 	    np = get_wordnode(spin);
4502 	    if (np == NULL)
4503 		return FAIL;
4504 	    np->wn_byte = word[i];
4505 
4506 	    // If "node" is NULL this is a new child or the end of the sibling
4507 	    // list: ref count is one.  Otherwise use ref count of sibling and
4508 	    // make ref count of sibling one (matters when inserting in front
4509 	    // of the list of siblings).
4510 	    if (node == NULL)
4511 		np->wn_refs = 1;
4512 	    else
4513 	    {
4514 		np->wn_refs = node->wn_refs;
4515 		node->wn_refs = 1;
4516 	    }
4517 	    if (prev != NULL)
4518 		*prev = np;
4519 	    np->wn_sibling = node;
4520 	    node = np;
4521 	}
4522 
4523 	if (word[i] == NUL)
4524 	{
4525 	    node->wn_flags = flags;
4526 	    node->wn_region |= region;
4527 	    node->wn_affixID = affixID;
4528 	    break;
4529 	}
4530 	prev = &node->wn_child;
4531 	node = *prev;
4532     }
4533 #ifdef SPELL_PRINTTREE
4534     smsg("Added \"%s\"", word);
4535     spell_print_tree(root->wn_sibling);
4536 #endif
4537 
4538     // count nr of words added since last message
4539     ++spin->si_msg_count;
4540 
4541     if (spin->si_compress_cnt > 1)
4542     {
4543 	if (--spin->si_compress_cnt == 1)
4544 	    // Did enough words to lower the block count limit.
4545 	    spin->si_blocks_cnt += compress_inc;
4546     }
4547 
4548     /*
4549      * When we have allocated lots of memory we need to compress the word tree
4550      * to free up some room.  But compression is slow, and we might actually
4551      * need that room, thus only compress in the following situations:
4552      * 1. When not compressed before (si_compress_cnt == 0): when using
4553      *    "compress_start" blocks.
4554      * 2. When compressed before and used "compress_inc" blocks before
4555      *    adding "compress_added" words (si_compress_cnt > 1).
4556      * 3. When compressed before, added "compress_added" words
4557      *    (si_compress_cnt == 1) and the number of free nodes drops below the
4558      *    maximum word length.
4559      */
4560 #ifndef SPELL_COMPRESS_ALLWAYS
4561     if (spin->si_compress_cnt == 1
4562 	    ? spin->si_free_count < MAXWLEN
4563 	    : spin->si_blocks_cnt >= compress_start)
4564 #endif
4565     {
4566 	// Decrement the block counter.  The effect is that we compress again
4567 	// when the freed up room has been used and another "compress_inc"
4568 	// blocks have been allocated.  Unless "compress_added" words have
4569 	// been added, then the limit is put back again.
4570 	spin->si_blocks_cnt -= compress_inc;
4571 	spin->si_compress_cnt = compress_added;
4572 
4573 	if (spin->si_verbose)
4574 	{
4575 	    msg_start();
4576 	    msg_puts(_(msg_compressing));
4577 	    msg_clr_eos();
4578 	    msg_didout = FALSE;
4579 	    msg_col = 0;
4580 	    out_flush();
4581 	}
4582 
4583 	// Compress both trees.  Either they both have many nodes, which makes
4584 	// compression useful, or one of them is small, which means
4585 	// compression goes fast.  But when filling the soundfold word tree
4586 	// there is no keep-case tree.
4587 	wordtree_compress(spin, spin->si_foldroot, "case-folded");
4588 	if (affixID >= 0)
4589 	    wordtree_compress(spin, spin->si_keeproot, "keep-case");
4590     }
4591 
4592     return OK;
4593 }
4594 
4595 /*
4596  * Get a wordnode_T, either from the list of previously freed nodes or
4597  * allocate a new one.
4598  * Returns NULL when out of memory.
4599  */
4600     static wordnode_T *
4601 get_wordnode(spellinfo_T *spin)
4602 {
4603     wordnode_T *n;
4604 
4605     if (spin->si_first_free == NULL)
4606 	n = (wordnode_T *)getroom(spin, sizeof(wordnode_T), TRUE);
4607     else
4608     {
4609 	n = spin->si_first_free;
4610 	spin->si_first_free = n->wn_child;
4611 	CLEAR_POINTER(n);
4612 	--spin->si_free_count;
4613     }
4614 #ifdef SPELL_PRINTTREE
4615     if (n != NULL)
4616 	n->wn_nr = ++spin->si_wordnode_nr;
4617 #endif
4618     return n;
4619 }
4620 
4621 /*
4622  * Decrement the reference count on a node (which is the head of a list of
4623  * siblings).  If the reference count becomes zero free the node and its
4624  * siblings.
4625  * Returns the number of nodes actually freed.
4626  */
4627     static int
4628 deref_wordnode(spellinfo_T *spin, wordnode_T *node)
4629 {
4630     wordnode_T	*np;
4631     int		cnt = 0;
4632 
4633     if (--node->wn_refs == 0)
4634     {
4635 	FOR_ALL_NODE_SIBLINGS(node, np)
4636 	{
4637 	    if (np->wn_child != NULL)
4638 		cnt += deref_wordnode(spin, np->wn_child);
4639 	    free_wordnode(spin, np);
4640 	    ++cnt;
4641 	}
4642 	++cnt;	    // length field
4643     }
4644     return cnt;
4645 }
4646 
4647 /*
4648  * Free a wordnode_T for re-use later.
4649  * Only the "wn_child" field becomes invalid.
4650  */
4651     static void
4652 free_wordnode(spellinfo_T *spin, wordnode_T *n)
4653 {
4654     n->wn_child = spin->si_first_free;
4655     spin->si_first_free = n;
4656     ++spin->si_free_count;
4657 }
4658 
4659 /*
4660  * Compress a tree: find tails that are identical and can be shared.
4661  */
4662     static void
4663 wordtree_compress(spellinfo_T *spin, wordnode_T *root, char *name)
4664 {
4665     hashtab_T	    ht;
4666     long	    n;
4667     long	    tot = 0;
4668     long	    perc;
4669 
4670     // Skip the root itself, it's not actually used.  The first sibling is the
4671     // start of the tree.
4672     if (root->wn_sibling != NULL)
4673     {
4674 	hash_init(&ht);
4675 	n = node_compress(spin, root->wn_sibling, &ht, &tot);
4676 
4677 #ifndef SPELL_PRINTTREE
4678 	if (spin->si_verbose || p_verbose > 2)
4679 #endif
4680 	{
4681 	    if (tot > 1000000)
4682 		perc = (tot - n) / (tot / 100);
4683 	    else if (tot == 0)
4684 		perc = 0;
4685 	    else
4686 		perc = (tot - n) * 100 / tot;
4687 	    vim_snprintf((char *)IObuff, IOSIZE,
4688 		       _("Compressed %s: %ld of %ld nodes; %ld (%ld%%) remaining"),
4689 						       name, n, tot, tot - n, perc);
4690 	    spell_message(spin, IObuff);
4691 	}
4692 #ifdef SPELL_PRINTTREE
4693 	spell_print_tree(root->wn_sibling);
4694 #endif
4695 	hash_clear(&ht);
4696     }
4697 }
4698 
4699 /*
4700  * Compress a node, its siblings and its children, depth first.
4701  * Returns the number of compressed nodes.
4702  */
4703     static long
4704 node_compress(
4705     spellinfo_T	*spin,
4706     wordnode_T	*node,
4707     hashtab_T	*ht,
4708     long	*tot)	    // total count of nodes before compressing,
4709 			    // incremented while going through the tree
4710 {
4711     wordnode_T	*np;
4712     wordnode_T	*tp;
4713     wordnode_T	*child;
4714     hash_T	hash;
4715     hashitem_T	*hi;
4716     long	len = 0;
4717     unsigned	nr, n;
4718     long	compressed = 0;
4719 
4720     /*
4721      * Go through the list of siblings.  Compress each child and then try
4722      * finding an identical child to replace it.
4723      * Note that with "child" we mean not just the node that is pointed to,
4724      * but the whole list of siblings of which the child node is the first.
4725      */
4726     for (np = node; np != NULL && !got_int; np = np->wn_sibling)
4727     {
4728 	++len;
4729 	if ((child = np->wn_child) != NULL)
4730 	{
4731 	    // Compress the child first.  This fills hashkey.
4732 	    compressed += node_compress(spin, child, ht, tot);
4733 
4734 	    // Try to find an identical child.
4735 	    hash = hash_hash(child->wn_u1.hashkey);
4736 	    hi = hash_lookup(ht, child->wn_u1.hashkey, hash);
4737 	    if (!HASHITEM_EMPTY(hi))
4738 	    {
4739 		// There are children we encountered before with a hash value
4740 		// identical to the current child.  Now check if there is one
4741 		// that is really identical.
4742 		for (tp = HI2WN(hi); tp != NULL; tp = tp->wn_u2.next)
4743 		    if (node_equal(child, tp))
4744 		    {
4745 			// Found one!  Now use that child in place of the
4746 			// current one.  This means the current child and all
4747 			// its siblings is unlinked from the tree.
4748 			++tp->wn_refs;
4749 			compressed += deref_wordnode(spin, child);
4750 			np->wn_child = tp;
4751 			break;
4752 		    }
4753 		if (tp == NULL)
4754 		{
4755 		    // No other child with this hash value equals the child of
4756 		    // the node, add it to the linked list after the first
4757 		    // item.
4758 		    tp = HI2WN(hi);
4759 		    child->wn_u2.next = tp->wn_u2.next;
4760 		    tp->wn_u2.next = child;
4761 		}
4762 	    }
4763 	    else
4764 		// No other child has this hash value, add it to the
4765 		// hashtable.
4766 		hash_add_item(ht, hi, child->wn_u1.hashkey, hash);
4767 	}
4768     }
4769     *tot += len + 1;	// add one for the node that stores the length
4770 
4771     /*
4772      * Make a hash key for the node and its siblings, so that we can quickly
4773      * find a lookalike node.  This must be done after compressing the sibling
4774      * list, otherwise the hash key would become invalid by the compression.
4775      */
4776     node->wn_u1.hashkey[0] = len;
4777     nr = 0;
4778     FOR_ALL_NODE_SIBLINGS(node, np)
4779     {
4780 	if (np->wn_byte == NUL)
4781 	    // end node: use wn_flags, wn_region and wn_affixID
4782 	    n = np->wn_flags + (np->wn_region << 8) + (np->wn_affixID << 16);
4783 	else
4784 	    // byte node: use the byte value and the child pointer
4785 	    n = (unsigned)(np->wn_byte + ((long_u)np->wn_child << 8));
4786 	nr = nr * 101 + n;
4787     }
4788 
4789     // Avoid NUL bytes, it terminates the hash key.
4790     n = nr & 0xff;
4791     node->wn_u1.hashkey[1] = n == 0 ? 1 : n;
4792     n = (nr >> 8) & 0xff;
4793     node->wn_u1.hashkey[2] = n == 0 ? 1 : n;
4794     n = (nr >> 16) & 0xff;
4795     node->wn_u1.hashkey[3] = n == 0 ? 1 : n;
4796     n = (nr >> 24) & 0xff;
4797     node->wn_u1.hashkey[4] = n == 0 ? 1 : n;
4798     node->wn_u1.hashkey[5] = NUL;
4799 
4800     // Check for CTRL-C pressed now and then.
4801     veryfast_breakcheck();
4802 
4803     return compressed;
4804 }
4805 
4806 /*
4807  * Return TRUE when two nodes have identical siblings and children.
4808  */
4809     static int
4810 node_equal(wordnode_T *n1, wordnode_T *n2)
4811 {
4812     wordnode_T	*p1;
4813     wordnode_T	*p2;
4814 
4815     for (p1 = n1, p2 = n2; p1 != NULL && p2 != NULL;
4816 				     p1 = p1->wn_sibling, p2 = p2->wn_sibling)
4817 	if (p1->wn_byte != p2->wn_byte
4818 		|| (p1->wn_byte == NUL
4819 		    ? (p1->wn_flags != p2->wn_flags
4820 			|| p1->wn_region != p2->wn_region
4821 			|| p1->wn_affixID != p2->wn_affixID)
4822 		    : (p1->wn_child != p2->wn_child)))
4823 	    break;
4824 
4825     return p1 == NULL && p2 == NULL;
4826 }
4827 
4828 static int rep_compare(const void *s1, const void *s2);
4829 
4830 /*
4831  * Function given to qsort() to sort the REP items on "from" string.
4832  */
4833     static int
4834 rep_compare(const void *s1, const void *s2)
4835 {
4836     fromto_T	*p1 = (fromto_T *)s1;
4837     fromto_T	*p2 = (fromto_T *)s2;
4838 
4839     return STRCMP(p1->ft_from, p2->ft_from);
4840 }
4841 
4842 /*
4843  * Write the Vim .spl file "fname".
4844  * Return FAIL or OK;
4845  */
4846     static int
4847 write_vim_spell(spellinfo_T *spin, char_u *fname)
4848 {
4849     FILE	*fd;
4850     int		regionmask;
4851     int		round;
4852     wordnode_T	*tree;
4853     int		nodecount;
4854     int		i;
4855     int		l;
4856     garray_T	*gap;
4857     fromto_T	*ftp;
4858     char_u	*p;
4859     int		rr;
4860     int		retval = OK;
4861     size_t	fwv = 1;  // collect return value of fwrite() to avoid
4862 			  // warnings from picky compiler
4863 
4864     fd = mch_fopen((char *)fname, "w");
4865     if (fd == NULL)
4866     {
4867 	semsg(_(e_notopen), fname);
4868 	return FAIL;
4869     }
4870 
4871     // <HEADER>: <fileID> <versionnr>
4872 							    // <fileID>
4873     fwv &= fwrite(VIMSPELLMAGIC, VIMSPELLMAGICL, (size_t)1, fd);
4874     if (fwv != (size_t)1)
4875 	// Catch first write error, don't try writing more.
4876 	goto theend;
4877 
4878     putc(VIMSPELLVERSION, fd);				    // <versionnr>
4879 
4880     /*
4881      * <SECTIONS>: <section> ... <sectionend>
4882      */
4883 
4884     // SN_INFO: <infotext>
4885     if (spin->si_info != NULL)
4886     {
4887 	putc(SN_INFO, fd);				// <sectionID>
4888 	putc(0, fd);					// <sectionflags>
4889 
4890 	i = (int)STRLEN(spin->si_info);
4891 	put_bytes(fd, (long_u)i, 4);			// <sectionlen>
4892 	fwv &= fwrite(spin->si_info, (size_t)i, (size_t)1, fd); // <infotext>
4893     }
4894 
4895     // SN_REGION: <regionname> ...
4896     // Write the region names only if there is more than one.
4897     if (spin->si_region_count > 1)
4898     {
4899 	putc(SN_REGION, fd);				// <sectionID>
4900 	putc(SNF_REQUIRED, fd);				// <sectionflags>
4901 	l = spin->si_region_count * 2;
4902 	put_bytes(fd, (long_u)l, 4);			// <sectionlen>
4903 	fwv &= fwrite(spin->si_region_name, (size_t)l, (size_t)1, fd);
4904 							// <regionname> ...
4905 	regionmask = (1 << spin->si_region_count) - 1;
4906     }
4907     else
4908 	regionmask = 0;
4909 
4910     // SN_CHARFLAGS: <charflagslen> <charflags> <folcharslen> <folchars>
4911     //
4912     // The table with character flags and the table for case folding.
4913     // This makes sure the same characters are recognized as word characters
4914     // when generating an when using a spell file.
4915     // Skip this for ASCII, the table may conflict with the one used for
4916     // 'encoding'.
4917     // Also skip this for an .add.spl file, the main spell file must contain
4918     // the table (avoids that it conflicts).  File is shorter too.
4919     if (!spin->si_ascii && !spin->si_add)
4920     {
4921 	char_u	folchars[128 * 8];
4922 	int	flags;
4923 
4924 	putc(SN_CHARFLAGS, fd);				// <sectionID>
4925 	putc(SNF_REQUIRED, fd);				// <sectionflags>
4926 
4927 	// Form the <folchars> string first, we need to know its length.
4928 	l = 0;
4929 	for (i = 128; i < 256; ++i)
4930 	{
4931 	    if (has_mbyte)
4932 		l += mb_char2bytes(spelltab.st_fold[i], folchars + l);
4933 	    else
4934 		folchars[l++] = spelltab.st_fold[i];
4935 	}
4936 	put_bytes(fd, (long_u)(1 + 128 + 2 + l), 4);	// <sectionlen>
4937 
4938 	fputc(128, fd);					// <charflagslen>
4939 	for (i = 128; i < 256; ++i)
4940 	{
4941 	    flags = 0;
4942 	    if (spelltab.st_isw[i])
4943 		flags |= CF_WORD;
4944 	    if (spelltab.st_isu[i])
4945 		flags |= CF_UPPER;
4946 	    fputc(flags, fd);				// <charflags>
4947 	}
4948 
4949 	put_bytes(fd, (long_u)l, 2);			// <folcharslen>
4950 	fwv &= fwrite(folchars, (size_t)l, (size_t)1, fd); // <folchars>
4951     }
4952 
4953     // SN_MIDWORD: <midword>
4954     if (spin->si_midword != NULL)
4955     {
4956 	putc(SN_MIDWORD, fd);				// <sectionID>
4957 	putc(SNF_REQUIRED, fd);				// <sectionflags>
4958 
4959 	i = (int)STRLEN(spin->si_midword);
4960 	put_bytes(fd, (long_u)i, 4);			// <sectionlen>
4961 	fwv &= fwrite(spin->si_midword, (size_t)i, (size_t)1, fd);
4962 							// <midword>
4963     }
4964 
4965     // SN_PREFCOND: <prefcondcnt> <prefcond> ...
4966     if (spin->si_prefcond.ga_len > 0)
4967     {
4968 	putc(SN_PREFCOND, fd);				// <sectionID>
4969 	putc(SNF_REQUIRED, fd);				// <sectionflags>
4970 
4971 	l = write_spell_prefcond(NULL, &spin->si_prefcond);
4972 	put_bytes(fd, (long_u)l, 4);			// <sectionlen>
4973 
4974 	write_spell_prefcond(fd, &spin->si_prefcond);
4975     }
4976 
4977     // SN_REP: <repcount> <rep> ...
4978     // SN_SAL: <salflags> <salcount> <sal> ...
4979     // SN_REPSAL: <repcount> <rep> ...
4980 
4981     // round 1: SN_REP section
4982     // round 2: SN_SAL section (unless SN_SOFO is used)
4983     // round 3: SN_REPSAL section
4984     for (round = 1; round <= 3; ++round)
4985     {
4986 	if (round == 1)
4987 	    gap = &spin->si_rep;
4988 	else if (round == 2)
4989 	{
4990 	    // Don't write SN_SAL when using a SN_SOFO section
4991 	    if (spin->si_sofofr != NULL && spin->si_sofoto != NULL)
4992 		continue;
4993 	    gap = &spin->si_sal;
4994 	}
4995 	else
4996 	    gap = &spin->si_repsal;
4997 
4998 	// Don't write the section if there are no items.
4999 	if (gap->ga_len == 0)
5000 	    continue;
5001 
5002 	// Sort the REP/REPSAL items.
5003 	if (round != 2)
5004 	    qsort(gap->ga_data, (size_t)gap->ga_len,
5005 					       sizeof(fromto_T), rep_compare);
5006 
5007 	i = round == 1 ? SN_REP : (round == 2 ? SN_SAL : SN_REPSAL);
5008 	putc(i, fd);					// <sectionID>
5009 
5010 	// This is for making suggestions, section is not required.
5011 	putc(0, fd);					// <sectionflags>
5012 
5013 	// Compute the length of what follows.
5014 	l = 2;	    // count <repcount> or <salcount>
5015 	for (i = 0; i < gap->ga_len; ++i)
5016 	{
5017 	    ftp = &((fromto_T *)gap->ga_data)[i];
5018 	    l += 1 + (int)STRLEN(ftp->ft_from);  // count <*fromlen> and <*from>
5019 	    l += 1 + (int)STRLEN(ftp->ft_to);    // count <*tolen> and <*to>
5020 	}
5021 	if (round == 2)
5022 	    ++l;	// count <salflags>
5023 	put_bytes(fd, (long_u)l, 4);			// <sectionlen>
5024 
5025 	if (round == 2)
5026 	{
5027 	    i = 0;
5028 	    if (spin->si_followup)
5029 		i |= SAL_F0LLOWUP;
5030 	    if (spin->si_collapse)
5031 		i |= SAL_COLLAPSE;
5032 	    if (spin->si_rem_accents)
5033 		i |= SAL_REM_ACCENTS;
5034 	    putc(i, fd);			// <salflags>
5035 	}
5036 
5037 	put_bytes(fd, (long_u)gap->ga_len, 2);	// <repcount> or <salcount>
5038 	for (i = 0; i < gap->ga_len; ++i)
5039 	{
5040 	    // <rep> : <repfromlen> <repfrom> <reptolen> <repto>
5041 	    // <sal> : <salfromlen> <salfrom> <saltolen> <salto>
5042 	    ftp = &((fromto_T *)gap->ga_data)[i];
5043 	    for (rr = 1; rr <= 2; ++rr)
5044 	    {
5045 		p = rr == 1 ? ftp->ft_from : ftp->ft_to;
5046 		l = (int)STRLEN(p);
5047 		putc(l, fd);
5048 		if (l > 0)
5049 		    fwv &= fwrite(p, l, (size_t)1, fd);
5050 	    }
5051 	}
5052 
5053     }
5054 
5055     // SN_SOFO: <sofofromlen> <sofofrom> <sofotolen> <sofoto>
5056     // This is for making suggestions, section is not required.
5057     if (spin->si_sofofr != NULL && spin->si_sofoto != NULL)
5058     {
5059 	putc(SN_SOFO, fd);				// <sectionID>
5060 	putc(0, fd);					// <sectionflags>
5061 
5062 	l = (int)STRLEN(spin->si_sofofr);
5063 	put_bytes(fd, (long_u)(l + STRLEN(spin->si_sofoto) + 4), 4);
5064 							// <sectionlen>
5065 
5066 	put_bytes(fd, (long_u)l, 2);			// <sofofromlen>
5067 	fwv &= fwrite(spin->si_sofofr, l, (size_t)1, fd); // <sofofrom>
5068 
5069 	l = (int)STRLEN(spin->si_sofoto);
5070 	put_bytes(fd, (long_u)l, 2);			// <sofotolen>
5071 	fwv &= fwrite(spin->si_sofoto, l, (size_t)1, fd); // <sofoto>
5072     }
5073 
5074     // SN_WORDS: <word> ...
5075     // This is for making suggestions, section is not required.
5076     if (spin->si_commonwords.ht_used > 0)
5077     {
5078 	putc(SN_WORDS, fd);				// <sectionID>
5079 	putc(0, fd);					// <sectionflags>
5080 
5081 	// round 1: count the bytes
5082 	// round 2: write the bytes
5083 	for (round = 1; round <= 2; ++round)
5084 	{
5085 	    int		todo;
5086 	    int		len = 0;
5087 	    hashitem_T	*hi;
5088 
5089 	    todo = (int)spin->si_commonwords.ht_used;
5090 	    for (hi = spin->si_commonwords.ht_array; todo > 0; ++hi)
5091 		if (!HASHITEM_EMPTY(hi))
5092 		{
5093 		    l = (int)STRLEN(hi->hi_key) + 1;
5094 		    len += l;
5095 		    if (round == 2)			// <word>
5096 			fwv &= fwrite(hi->hi_key, (size_t)l, (size_t)1, fd);
5097 		    --todo;
5098 		}
5099 	    if (round == 1)
5100 		put_bytes(fd, (long_u)len, 4);		// <sectionlen>
5101 	}
5102     }
5103 
5104     // SN_MAP: <mapstr>
5105     // This is for making suggestions, section is not required.
5106     if (spin->si_map.ga_len > 0)
5107     {
5108 	putc(SN_MAP, fd);				// <sectionID>
5109 	putc(0, fd);					// <sectionflags>
5110 	l = spin->si_map.ga_len;
5111 	put_bytes(fd, (long_u)l, 4);			// <sectionlen>
5112 	fwv &= fwrite(spin->si_map.ga_data, (size_t)l, (size_t)1, fd);
5113 							// <mapstr>
5114     }
5115 
5116     // SN_SUGFILE: <timestamp>
5117     // This is used to notify that a .sug file may be available and at the
5118     // same time allows for checking that a .sug file that is found matches
5119     // with this .spl file.  That's because the word numbers must be exactly
5120     // right.
5121     if (!spin->si_nosugfile
5122 	    && (spin->si_sal.ga_len > 0
5123 		     || (spin->si_sofofr != NULL && spin->si_sofoto != NULL)))
5124     {
5125 	putc(SN_SUGFILE, fd);				// <sectionID>
5126 	putc(0, fd);					// <sectionflags>
5127 	put_bytes(fd, (long_u)8, 4);			// <sectionlen>
5128 
5129 	// Set si_sugtime and write it to the file.
5130 	spin->si_sugtime = time(NULL);
5131 	put_time(fd, spin->si_sugtime);			// <timestamp>
5132     }
5133 
5134     // SN_NOSPLITSUGS: nothing
5135     // This is used to notify that no suggestions with word splits are to be
5136     // made.
5137     if (spin->si_nosplitsugs)
5138     {
5139 	putc(SN_NOSPLITSUGS, fd);			// <sectionID>
5140 	putc(0, fd);					// <sectionflags>
5141 	put_bytes(fd, (long_u)0, 4);			// <sectionlen>
5142     }
5143 
5144     // SN_NOCOMPUNDSUGS: nothing
5145     // This is used to notify that no suggestions with compounds are to be
5146     // made.
5147     if (spin->si_nocompoundsugs)
5148     {
5149 	putc(SN_NOCOMPOUNDSUGS, fd);			// <sectionID>
5150 	putc(0, fd);					// <sectionflags>
5151 	put_bytes(fd, (long_u)0, 4);			// <sectionlen>
5152     }
5153 
5154     // SN_COMPOUND: compound info.
5155     // We don't mark it required, when not supported all compound words will
5156     // be bad words.
5157     if (spin->si_compflags != NULL)
5158     {
5159 	putc(SN_COMPOUND, fd);				// <sectionID>
5160 	putc(0, fd);					// <sectionflags>
5161 
5162 	l = (int)STRLEN(spin->si_compflags);
5163 	for (i = 0; i < spin->si_comppat.ga_len; ++i)
5164 	    l += (int)STRLEN(((char_u **)(spin->si_comppat.ga_data))[i]) + 1;
5165 	put_bytes(fd, (long_u)(l + 7), 4);		// <sectionlen>
5166 
5167 	putc(spin->si_compmax, fd);			// <compmax>
5168 	putc(spin->si_compminlen, fd);			// <compminlen>
5169 	putc(spin->si_compsylmax, fd);			// <compsylmax>
5170 	putc(0, fd);		// for Vim 7.0b compatibility
5171 	putc(spin->si_compoptions, fd);			// <compoptions>
5172 	put_bytes(fd, (long_u)spin->si_comppat.ga_len, 2);
5173 							// <comppatcount>
5174 	for (i = 0; i < spin->si_comppat.ga_len; ++i)
5175 	{
5176 	    p = ((char_u **)(spin->si_comppat.ga_data))[i];
5177 	    putc((int)STRLEN(p), fd);			// <comppatlen>
5178 	    fwv &= fwrite(p, (size_t)STRLEN(p), (size_t)1, fd);
5179 							// <comppattext>
5180 	}
5181 							// <compflags>
5182 	fwv &= fwrite(spin->si_compflags, (size_t)STRLEN(spin->si_compflags),
5183 							       (size_t)1, fd);
5184     }
5185 
5186     // SN_NOBREAK: NOBREAK flag
5187     if (spin->si_nobreak)
5188     {
5189 	putc(SN_NOBREAK, fd);				// <sectionID>
5190 	putc(0, fd);					// <sectionflags>
5191 
5192 	// It's empty, the presence of the section flags the feature.
5193 	put_bytes(fd, (long_u)0, 4);			// <sectionlen>
5194     }
5195 
5196     // SN_SYLLABLE: syllable info.
5197     // We don't mark it required, when not supported syllables will not be
5198     // counted.
5199     if (spin->si_syllable != NULL)
5200     {
5201 	putc(SN_SYLLABLE, fd);				// <sectionID>
5202 	putc(0, fd);					// <sectionflags>
5203 
5204 	l = (int)STRLEN(spin->si_syllable);
5205 	put_bytes(fd, (long_u)l, 4);			// <sectionlen>
5206 	fwv &= fwrite(spin->si_syllable, (size_t)l, (size_t)1, fd);
5207 							// <syllable>
5208     }
5209 
5210     // end of <SECTIONS>
5211     putc(SN_END, fd);					// <sectionend>
5212 
5213 
5214     /*
5215      * <LWORDTREE>  <KWORDTREE>  <PREFIXTREE>
5216      */
5217     spin->si_memtot = 0;
5218     for (round = 1; round <= 3; ++round)
5219     {
5220 	if (round == 1)
5221 	    tree = spin->si_foldroot->wn_sibling;
5222 	else if (round == 2)
5223 	    tree = spin->si_keeproot->wn_sibling;
5224 	else
5225 	    tree = spin->si_prefroot->wn_sibling;
5226 
5227 	// Clear the index and wnode fields in the tree.
5228 	clear_node(tree);
5229 
5230 	// Count the number of nodes.  Needed to be able to allocate the
5231 	// memory when reading the nodes.  Also fills in index for shared
5232 	// nodes.
5233 	nodecount = put_node(NULL, tree, 0, regionmask, round == 3);
5234 
5235 	// number of nodes in 4 bytes
5236 	put_bytes(fd, (long_u)nodecount, 4);	// <nodecount>
5237 	spin->si_memtot += nodecount + nodecount * sizeof(int);
5238 
5239 	// Write the nodes.
5240 	(void)put_node(fd, tree, 0, regionmask, round == 3);
5241     }
5242 
5243     // Write another byte to check for errors (file system full).
5244     if (putc(0, fd) == EOF)
5245 	retval = FAIL;
5246 theend:
5247     if (fclose(fd) == EOF)
5248 	retval = FAIL;
5249 
5250     if (fwv != (size_t)1)
5251 	retval = FAIL;
5252     if (retval == FAIL)
5253 	emsg(_(e_write));
5254 
5255     return retval;
5256 }
5257 
5258 /*
5259  * Clear the index and wnode fields of "node", it siblings and its
5260  * children.  This is needed because they are a union with other items to save
5261  * space.
5262  */
5263     static void
5264 clear_node(wordnode_T *node)
5265 {
5266     wordnode_T	*np;
5267 
5268     if (node != NULL)
5269 	FOR_ALL_NODE_SIBLINGS(node, np)
5270 	{
5271 	    np->wn_u1.index = 0;
5272 	    np->wn_u2.wnode = NULL;
5273 
5274 	    if (np->wn_byte != NUL)
5275 		clear_node(np->wn_child);
5276 	}
5277 }
5278 
5279 
5280 /*
5281  * Dump a word tree at node "node".
5282  *
5283  * This first writes the list of possible bytes (siblings).  Then for each
5284  * byte recursively write the children.
5285  *
5286  * NOTE: The code here must match the code in read_tree_node(), since
5287  * assumptions are made about the indexes (so that we don't have to write them
5288  * in the file).
5289  *
5290  * Returns the number of nodes used.
5291  */
5292     static int
5293 put_node(
5294     FILE	*fd,		// NULL when only counting
5295     wordnode_T	*node,
5296     int		idx,
5297     int		regionmask,
5298     int		prefixtree)	// TRUE for PREFIXTREE
5299 {
5300     int		newindex = idx;
5301     int		siblingcount = 0;
5302     wordnode_T	*np;
5303     int		flags;
5304 
5305     // If "node" is zero the tree is empty.
5306     if (node == NULL)
5307 	return 0;
5308 
5309     // Store the index where this node is written.
5310     node->wn_u1.index = idx;
5311 
5312     // Count the number of siblings.
5313     FOR_ALL_NODE_SIBLINGS(node, np)
5314 	++siblingcount;
5315 
5316     // Write the sibling count.
5317     if (fd != NULL)
5318 	putc(siblingcount, fd);				// <siblingcount>
5319 
5320     // Write each sibling byte and optionally extra info.
5321     FOR_ALL_NODE_SIBLINGS(node, np)
5322     {
5323 	if (np->wn_byte == 0)
5324 	{
5325 	    if (fd != NULL)
5326 	    {
5327 		// For a NUL byte (end of word) write the flags etc.
5328 		if (prefixtree)
5329 		{
5330 		    // In PREFIXTREE write the required affixID and the
5331 		    // associated condition nr (stored in wn_region).  The
5332 		    // byte value is misused to store the "rare" and "not
5333 		    // combining" flags
5334 		    if (np->wn_flags == (short_u)PFX_FLAGS)
5335 			putc(BY_NOFLAGS, fd);		// <byte>
5336 		    else
5337 		    {
5338 			putc(BY_FLAGS, fd);		// <byte>
5339 			putc(np->wn_flags, fd);		// <pflags>
5340 		    }
5341 		    putc(np->wn_affixID, fd);		// <affixID>
5342 		    put_bytes(fd, (long_u)np->wn_region, 2); // <prefcondnr>
5343 		}
5344 		else
5345 		{
5346 		    // For word trees we write the flag/region items.
5347 		    flags = np->wn_flags;
5348 		    if (regionmask != 0 && np->wn_region != regionmask)
5349 			flags |= WF_REGION;
5350 		    if (np->wn_affixID != 0)
5351 			flags |= WF_AFX;
5352 		    if (flags == 0)
5353 		    {
5354 			// word without flags or region
5355 			putc(BY_NOFLAGS, fd);			// <byte>
5356 		    }
5357 		    else
5358 		    {
5359 			if (np->wn_flags >= 0x100)
5360 			{
5361 			    putc(BY_FLAGS2, fd);		// <byte>
5362 			    putc(flags, fd);			// <flags>
5363 			    putc((unsigned)flags >> 8, fd);	// <flags2>
5364 			}
5365 			else
5366 			{
5367 			    putc(BY_FLAGS, fd);			// <byte>
5368 			    putc(flags, fd);			// <flags>
5369 			}
5370 			if (flags & WF_REGION)
5371 			    putc(np->wn_region, fd);		// <region>
5372 			if (flags & WF_AFX)
5373 			    putc(np->wn_affixID, fd);		// <affixID>
5374 		    }
5375 		}
5376 	    }
5377 	}
5378 	else
5379 	{
5380 	    if (np->wn_child->wn_u1.index != 0
5381 					 && np->wn_child->wn_u2.wnode != node)
5382 	    {
5383 		// The child is written elsewhere, write the reference.
5384 		if (fd != NULL)
5385 		{
5386 		    putc(BY_INDEX, fd);			// <byte>
5387 							// <nodeidx>
5388 		    put_bytes(fd, (long_u)np->wn_child->wn_u1.index, 3);
5389 		}
5390 	    }
5391 	    else if (np->wn_child->wn_u2.wnode == NULL)
5392 		// We will write the child below and give it an index.
5393 		np->wn_child->wn_u2.wnode = node;
5394 
5395 	    if (fd != NULL)
5396 		if (putc(np->wn_byte, fd) == EOF) // <byte> or <xbyte>
5397 		{
5398 		    emsg(_(e_write));
5399 		    return 0;
5400 		}
5401 	}
5402     }
5403 
5404     // Space used in the array when reading: one for each sibling and one for
5405     // the count.
5406     newindex += siblingcount + 1;
5407 
5408     // Recursively dump the children of each sibling.
5409     FOR_ALL_NODE_SIBLINGS(node, np)
5410 	if (np->wn_byte != 0 && np->wn_child->wn_u2.wnode == node)
5411 	    newindex = put_node(fd, np->wn_child, newindex, regionmask,
5412 								  prefixtree);
5413 
5414     return newindex;
5415 }
5416 
5417 
5418 /*
5419  * ":mkspell [-ascii] outfile  infile ..."
5420  * ":mkspell [-ascii] addfile"
5421  */
5422     void
5423 ex_mkspell(exarg_T *eap)
5424 {
5425     int		fcount;
5426     char_u	**fnames;
5427     char_u	*arg = eap->arg;
5428     int		ascii = FALSE;
5429 
5430     if (STRNCMP(arg, "-ascii", 6) == 0)
5431     {
5432 	ascii = TRUE;
5433 	arg = skipwhite(arg + 6);
5434     }
5435 
5436     // Expand all the remaining arguments (e.g., $VIMRUNTIME).
5437     if (get_arglist_exp(arg, &fcount, &fnames, FALSE) == OK)
5438     {
5439 	mkspell(fcount, fnames, ascii, eap->forceit, FALSE);
5440 	FreeWild(fcount, fnames);
5441     }
5442 }
5443 
5444 /*
5445  * Create the .sug file.
5446  * Uses the soundfold info in "spin".
5447  * Writes the file with the name "wfname", with ".spl" changed to ".sug".
5448  */
5449     static void
5450 spell_make_sugfile(spellinfo_T *spin, char_u *wfname)
5451 {
5452     char_u	*fname = NULL;
5453     int		len;
5454     slang_T	*slang;
5455     int		free_slang = FALSE;
5456 
5457     /*
5458      * Read back the .spl file that was written.  This fills the required
5459      * info for soundfolding.  This also uses less memory than the
5460      * pointer-linked version of the trie.  And it avoids having two versions
5461      * of the code for the soundfolding stuff.
5462      * It might have been done already by spell_reload_one().
5463      */
5464     FOR_ALL_SPELL_LANGS(slang)
5465 	if (fullpathcmp(wfname, slang->sl_fname, FALSE, TRUE) == FPC_SAME)
5466 	    break;
5467     if (slang == NULL)
5468     {
5469 	spell_message(spin, (char_u *)_("Reading back spell file..."));
5470 	slang = spell_load_file(wfname, NULL, NULL, FALSE);
5471 	if (slang == NULL)
5472 	    return;
5473 	free_slang = TRUE;
5474     }
5475 
5476     /*
5477      * Clear the info in "spin" that is used.
5478      */
5479     spin->si_blocks = NULL;
5480     spin->si_blocks_cnt = 0;
5481     spin->si_compress_cnt = 0;	    // will stay at 0 all the time
5482     spin->si_free_count = 0;
5483     spin->si_first_free = NULL;
5484     spin->si_foldwcount = 0;
5485 
5486     /*
5487      * Go through the trie of good words, soundfold each word and add it to
5488      * the soundfold trie.
5489      */
5490     spell_message(spin, (char_u *)_("Performing soundfolding..."));
5491     if (sug_filltree(spin, slang) == FAIL)
5492 	goto theend;
5493 
5494     /*
5495      * Create the table which links each soundfold word with a list of the
5496      * good words it may come from.  Creates buffer "spin->si_spellbuf".
5497      * This also removes the wordnr from the NUL byte entries to make
5498      * compression possible.
5499      */
5500     if (sug_maketable(spin) == FAIL)
5501 	goto theend;
5502 
5503     smsg(_("Number of words after soundfolding: %ld"),
5504 				 (long)spin->si_spellbuf->b_ml.ml_line_count);
5505 
5506     /*
5507      * Compress the soundfold trie.
5508      */
5509     spell_message(spin, (char_u *)_(msg_compressing));
5510     wordtree_compress(spin, spin->si_foldroot, "case-folded");
5511 
5512     /*
5513      * Write the .sug file.
5514      * Make the file name by changing ".spl" to ".sug".
5515      */
5516     fname = alloc(MAXPATHL);
5517     if (fname == NULL)
5518 	goto theend;
5519     vim_strncpy(fname, wfname, MAXPATHL - 1);
5520     len = (int)STRLEN(fname);
5521     fname[len - 2] = 'u';
5522     fname[len - 1] = 'g';
5523     sug_write(spin, fname);
5524 
5525 theend:
5526     vim_free(fname);
5527     if (free_slang)
5528 	slang_free(slang);
5529     free_blocks(spin->si_blocks);
5530     close_spellbuf(spin->si_spellbuf);
5531 }
5532 
5533 /*
5534  * Build the soundfold trie for language "slang".
5535  */
5536     static int
5537 sug_filltree(spellinfo_T *spin, slang_T *slang)
5538 {
5539     char_u	*byts;
5540     idx_T	*idxs;
5541     int		depth;
5542     idx_T	arridx[MAXWLEN];
5543     int		curi[MAXWLEN];
5544     char_u	tword[MAXWLEN];
5545     char_u	tsalword[MAXWLEN];
5546     int		c;
5547     idx_T	n;
5548     unsigned	words_done = 0;
5549     int		wordcount[MAXWLEN];
5550 
5551     // We use si_foldroot for the soundfolded trie.
5552     spin->si_foldroot = wordtree_alloc(spin);
5553     if (spin->si_foldroot == NULL)
5554 	return FAIL;
5555 
5556     // let tree_add_word() know we're adding to the soundfolded tree
5557     spin->si_sugtree = TRUE;
5558 
5559     /*
5560      * Go through the whole case-folded tree, soundfold each word and put it
5561      * in the trie.
5562      */
5563     byts = slang->sl_fbyts;
5564     idxs = slang->sl_fidxs;
5565 
5566     arridx[0] = 0;
5567     curi[0] = 1;
5568     wordcount[0] = 0;
5569 
5570     depth = 0;
5571     while (depth >= 0 && !got_int)
5572     {
5573 	if (curi[depth] > byts[arridx[depth]])
5574 	{
5575 	    // Done all bytes at this node, go up one level.
5576 	    idxs[arridx[depth]] = wordcount[depth];
5577 	    if (depth > 0)
5578 		wordcount[depth - 1] += wordcount[depth];
5579 
5580 	    --depth;
5581 	    line_breakcheck();
5582 	}
5583 	else
5584 	{
5585 
5586 	    // Do one more byte at this node.
5587 	    n = arridx[depth] + curi[depth];
5588 	    ++curi[depth];
5589 
5590 	    c = byts[n];
5591 	    if (c == 0)
5592 	    {
5593 		// Sound-fold the word.
5594 		tword[depth] = NUL;
5595 		spell_soundfold(slang, tword, TRUE, tsalword);
5596 
5597 		// We use the "flags" field for the MSB of the wordnr,
5598 		// "region" for the LSB of the wordnr.
5599 		if (tree_add_word(spin, tsalword, spin->si_foldroot,
5600 				words_done >> 16, words_done & 0xffff,
5601 							   0) == FAIL)
5602 		    return FAIL;
5603 
5604 		++words_done;
5605 		++wordcount[depth];
5606 
5607 		// Reset the block count each time to avoid compression
5608 		// kicking in.
5609 		spin->si_blocks_cnt = 0;
5610 
5611 		// Skip over any other NUL bytes (same word with different
5612 		// flags).
5613 		while (byts[n + 1] == 0)
5614 		{
5615 		    ++n;
5616 		    ++curi[depth];
5617 		}
5618 	    }
5619 	    else
5620 	    {
5621 		// Normal char, go one level deeper.
5622 		tword[depth++] = c;
5623 		arridx[depth] = idxs[n];
5624 		curi[depth] = 1;
5625 		wordcount[depth] = 0;
5626 	    }
5627 	}
5628     }
5629 
5630     smsg(_("Total number of words: %d"), words_done);
5631 
5632     return OK;
5633 }
5634 
5635 /*
5636  * Make the table that links each word in the soundfold trie to the words it
5637  * can be produced from.
5638  * This is not unlike lines in a file, thus use a memfile to be able to access
5639  * the table efficiently.
5640  * Returns FAIL when out of memory.
5641  */
5642     static int
5643 sug_maketable(spellinfo_T *spin)
5644 {
5645     garray_T	ga;
5646     int		res = OK;
5647 
5648     // Allocate a buffer, open a memline for it and create the swap file
5649     // (uses a temp file, not a .swp file).
5650     spin->si_spellbuf = open_spellbuf();
5651     if (spin->si_spellbuf == NULL)
5652 	return FAIL;
5653 
5654     // Use a buffer to store the line info, avoids allocating many small
5655     // pieces of memory.
5656     ga_init2(&ga, 1, 100);
5657 
5658     // recursively go through the tree
5659     if (sug_filltable(spin, spin->si_foldroot->wn_sibling, 0, &ga) == -1)
5660 	res = FAIL;
5661 
5662     ga_clear(&ga);
5663     return res;
5664 }
5665 
5666 /*
5667  * Fill the table for one node and its children.
5668  * Returns the wordnr at the start of the node.
5669  * Returns -1 when out of memory.
5670  */
5671     static int
5672 sug_filltable(
5673     spellinfo_T	*spin,
5674     wordnode_T	*node,
5675     int		startwordnr,
5676     garray_T	*gap)	    // place to store line of numbers
5677 {
5678     wordnode_T	*p, *np;
5679     int		wordnr = startwordnr;
5680     int		nr;
5681     int		prev_nr;
5682 
5683     FOR_ALL_NODE_SIBLINGS(node, p)
5684     {
5685 	if (p->wn_byte == NUL)
5686 	{
5687 	    gap->ga_len = 0;
5688 	    prev_nr = 0;
5689 	    for (np = p; np != NULL && np->wn_byte == NUL; np = np->wn_sibling)
5690 	    {
5691 		if (ga_grow(gap, 10) == FAIL)
5692 		    return -1;
5693 
5694 		nr = (np->wn_flags << 16) + (np->wn_region & 0xffff);
5695 		// Compute the offset from the previous nr and store the
5696 		// offset in a way that it takes a minimum number of bytes.
5697 		// It's a bit like utf-8, but without the need to mark
5698 		// following bytes.
5699 		nr -= prev_nr;
5700 		prev_nr += nr;
5701 		gap->ga_len += offset2bytes(nr,
5702 					 (char_u *)gap->ga_data + gap->ga_len);
5703 	    }
5704 
5705 	    // add the NUL byte
5706 	    ((char_u *)gap->ga_data)[gap->ga_len++] = NUL;
5707 
5708 	    if (ml_append_buf(spin->si_spellbuf, (linenr_T)wordnr,
5709 				     gap->ga_data, gap->ga_len, TRUE) == FAIL)
5710 		return -1;
5711 	    ++wordnr;
5712 
5713 	    // Remove extra NUL entries, we no longer need them. We don't
5714 	    // bother freeing the nodes, the won't be reused anyway.
5715 	    while (p->wn_sibling != NULL && p->wn_sibling->wn_byte == NUL)
5716 		p->wn_sibling = p->wn_sibling->wn_sibling;
5717 
5718 	    // Clear the flags on the remaining NUL node, so that compression
5719 	    // works a lot better.
5720 	    p->wn_flags = 0;
5721 	    p->wn_region = 0;
5722 	}
5723 	else
5724 	{
5725 	    wordnr = sug_filltable(spin, p->wn_child, wordnr, gap);
5726 	    if (wordnr == -1)
5727 		return -1;
5728 	}
5729     }
5730     return wordnr;
5731 }
5732 
5733 /*
5734  * Convert an offset into a minimal number of bytes.
5735  * Similar to utf_char2byters, but use 8 bits in followup bytes and avoid NUL
5736  * bytes.
5737  */
5738     static int
5739 offset2bytes(int nr, char_u *buf)
5740 {
5741     int	    rem;
5742     int	    b1, b2, b3, b4;
5743 
5744     // Split the number in parts of base 255.  We need to avoid NUL bytes.
5745     b1 = nr % 255 + 1;
5746     rem = nr / 255;
5747     b2 = rem % 255 + 1;
5748     rem = rem / 255;
5749     b3 = rem % 255 + 1;
5750     b4 = rem / 255 + 1;
5751 
5752     if (b4 > 1 || b3 > 0x1f)	// 4 bytes
5753     {
5754 	buf[0] = 0xe0 + b4;
5755 	buf[1] = b3;
5756 	buf[2] = b2;
5757 	buf[3] = b1;
5758 	return 4;
5759     }
5760     if (b3 > 1 || b2 > 0x3f )	// 3 bytes
5761     {
5762 	buf[0] = 0xc0 + b3;
5763 	buf[1] = b2;
5764 	buf[2] = b1;
5765 	return 3;
5766     }
5767     if (b2 > 1 || b1 > 0x7f )	// 2 bytes
5768     {
5769 	buf[0] = 0x80 + b2;
5770 	buf[1] = b1;
5771 	return 2;
5772     }
5773 				// 1 byte
5774     buf[0] = b1;
5775     return 1;
5776 }
5777 
5778 /*
5779  * Write the .sug file in "fname".
5780  */
5781     static void
5782 sug_write(spellinfo_T *spin, char_u *fname)
5783 {
5784     FILE	*fd;
5785     wordnode_T	*tree;
5786     int		nodecount;
5787     int		wcount;
5788     char_u	*line;
5789     linenr_T	lnum;
5790     int		len;
5791 
5792     // Create the file.  Note that an existing file is silently overwritten!
5793     fd = mch_fopen((char *)fname, "w");
5794     if (fd == NULL)
5795     {
5796 	semsg(_(e_notopen), fname);
5797 	return;
5798     }
5799 
5800     vim_snprintf((char *)IObuff, IOSIZE,
5801 				  _("Writing suggestion file %s..."), fname);
5802     spell_message(spin, IObuff);
5803 
5804     /*
5805      * <SUGHEADER>: <fileID> <versionnr> <timestamp>
5806      */
5807     if (fwrite(VIMSUGMAGIC, VIMSUGMAGICL, (size_t)1, fd) != 1) // <fileID>
5808     {
5809 	emsg(_(e_write));
5810 	goto theend;
5811     }
5812     putc(VIMSUGVERSION, fd);				// <versionnr>
5813 
5814     // Write si_sugtime to the file.
5815     put_time(fd, spin->si_sugtime);			// <timestamp>
5816 
5817     /*
5818      * <SUGWORDTREE>
5819      */
5820     spin->si_memtot = 0;
5821     tree = spin->si_foldroot->wn_sibling;
5822 
5823     // Clear the index and wnode fields in the tree.
5824     clear_node(tree);
5825 
5826     // Count the number of nodes.  Needed to be able to allocate the
5827     // memory when reading the nodes.  Also fills in index for shared
5828     // nodes.
5829     nodecount = put_node(NULL, tree, 0, 0, FALSE);
5830 
5831     // number of nodes in 4 bytes
5832     put_bytes(fd, (long_u)nodecount, 4);	// <nodecount>
5833     spin->si_memtot += nodecount + nodecount * sizeof(int);
5834 
5835     // Write the nodes.
5836     (void)put_node(fd, tree, 0, 0, FALSE);
5837 
5838     /*
5839      * <SUGTABLE>: <sugwcount> <sugline> ...
5840      */
5841     wcount = spin->si_spellbuf->b_ml.ml_line_count;
5842     put_bytes(fd, (long_u)wcount, 4);	// <sugwcount>
5843 
5844     for (lnum = 1; lnum <= (linenr_T)wcount; ++lnum)
5845     {
5846 	// <sugline>: <sugnr> ... NUL
5847 	line = ml_get_buf(spin->si_spellbuf, lnum, FALSE);
5848 	len = (int)STRLEN(line) + 1;
5849 	if (fwrite(line, (size_t)len, (size_t)1, fd) == 0)
5850 	{
5851 	    emsg(_(e_write));
5852 	    goto theend;
5853 	}
5854 	spin->si_memtot += len;
5855     }
5856 
5857     // Write another byte to check for errors.
5858     if (putc(0, fd) == EOF)
5859 	emsg(_(e_write));
5860 
5861     vim_snprintf((char *)IObuff, IOSIZE,
5862 		 _("Estimated runtime memory use: %d bytes"), spin->si_memtot);
5863     spell_message(spin, IObuff);
5864 
5865 theend:
5866     // close the file
5867     fclose(fd);
5868 }
5869 
5870 
5871 /*
5872  * Create a Vim spell file from one or more word lists.
5873  * "fnames[0]" is the output file name.
5874  * "fnames[fcount - 1]" is the last input file name.
5875  * Exception: when "fnames[0]" ends in ".add" it's used as the input file name
5876  * and ".spl" is appended to make the output file name.
5877  */
5878     void
5879 mkspell(
5880     int		fcount,
5881     char_u	**fnames,
5882     int		ascii,		    // -ascii argument given
5883     int		over_write,	    // overwrite existing output file
5884     int		added_word)	    // invoked through "zg"
5885 {
5886     char_u	*fname = NULL;
5887     char_u	*wfname;
5888     char_u	**innames;
5889     int		incount;
5890     afffile_T	*(afile[MAXREGIONS]);
5891     int		i;
5892     int		len;
5893     stat_T	st;
5894     int		error = FALSE;
5895     spellinfo_T spin;
5896 
5897     CLEAR_FIELD(spin);
5898     spin.si_verbose = !added_word;
5899     spin.si_ascii = ascii;
5900     spin.si_followup = TRUE;
5901     spin.si_rem_accents = TRUE;
5902     ga_init2(&spin.si_rep, (int)sizeof(fromto_T), 20);
5903     ga_init2(&spin.si_repsal, (int)sizeof(fromto_T), 20);
5904     ga_init2(&spin.si_sal, (int)sizeof(fromto_T), 20);
5905     ga_init2(&spin.si_map, (int)sizeof(char_u), 100);
5906     ga_init2(&spin.si_comppat, (int)sizeof(char_u *), 20);
5907     ga_init2(&spin.si_prefcond, (int)sizeof(char_u *), 50);
5908     hash_init(&spin.si_commonwords);
5909     spin.si_newcompID = 127;	// start compound ID at first maximum
5910 
5911     // default: fnames[0] is output file, following are input files
5912     // When "fcount" is 1 there is only one file.
5913     innames = &fnames[fcount == 1 ? 0 : 1];
5914     incount = fcount - 1;
5915 
5916     wfname = alloc(MAXPATHL);
5917     if (wfname == NULL)
5918 	return;
5919 
5920     if (fcount >= 1)
5921     {
5922 	len = (int)STRLEN(fnames[0]);
5923 	if (fcount == 1 && len > 4 && STRCMP(fnames[0] + len - 4, ".add") == 0)
5924 	{
5925 	    // For ":mkspell path/en.latin1.add" output file is
5926 	    // "path/en.latin1.add.spl".
5927 	    incount = 1;
5928 	    vim_snprintf((char *)wfname, MAXPATHL, "%s.spl", fnames[0]);
5929 	}
5930 	else if (fcount == 1)
5931 	{
5932 	    // For ":mkspell path/vim" output file is "path/vim.latin1.spl".
5933 	    incount = 1;
5934 	    vim_snprintf((char *)wfname, MAXPATHL, SPL_FNAME_TMPL,
5935 		  fnames[0], spin.si_ascii ? (char_u *)"ascii" : spell_enc());
5936 	}
5937 	else if (len > 4 && STRCMP(fnames[0] + len - 4, ".spl") == 0)
5938 	{
5939 	    // Name ends in ".spl", use as the file name.
5940 	    vim_strncpy(wfname, fnames[0], MAXPATHL - 1);
5941 	}
5942 	else
5943 	    // Name should be language, make the file name from it.
5944 	    vim_snprintf((char *)wfname, MAXPATHL, SPL_FNAME_TMPL,
5945 		  fnames[0], spin.si_ascii ? (char_u *)"ascii" : spell_enc());
5946 
5947 	// Check for .ascii.spl.
5948 	if (strstr((char *)gettail(wfname), SPL_FNAME_ASCII) != NULL)
5949 	    spin.si_ascii = TRUE;
5950 
5951 	// Check for .add.spl.
5952 	if (strstr((char *)gettail(wfname), SPL_FNAME_ADD) != NULL)
5953 	    spin.si_add = TRUE;
5954     }
5955 
5956     if (incount <= 0)
5957 	emsg(_(e_invarg));	// need at least output and input names
5958     else if (vim_strchr(gettail(wfname), '_') != NULL)
5959 	emsg(_("E751: Output file name must not have region name"));
5960     else if (incount > MAXREGIONS)
5961 	semsg(_("E754: Only up to %d regions supported"), MAXREGIONS);
5962     else
5963     {
5964 	// Check for overwriting before doing things that may take a lot of
5965 	// time.
5966 	if (!over_write && mch_stat((char *)wfname, &st) >= 0)
5967 	{
5968 	    emsg(_(e_exists));
5969 	    goto theend;
5970 	}
5971 	if (mch_isdir(wfname))
5972 	{
5973 	    semsg(_(e_isadir2), wfname);
5974 	    goto theend;
5975 	}
5976 
5977 	fname = alloc(MAXPATHL);
5978 	if (fname == NULL)
5979 	    goto theend;
5980 
5981 	/*
5982 	 * Init the aff and dic pointers.
5983 	 * Get the region names if there are more than 2 arguments.
5984 	 */
5985 	for (i = 0; i < incount; ++i)
5986 	{
5987 	    afile[i] = NULL;
5988 
5989 	    if (incount > 1)
5990 	    {
5991 		len = (int)STRLEN(innames[i]);
5992 		if (STRLEN(gettail(innames[i])) < 5
5993 						|| innames[i][len - 3] != '_')
5994 		{
5995 		    semsg(_("E755: Invalid region in %s"), innames[i]);
5996 		    goto theend;
5997 		}
5998 		spin.si_region_name[i * 2] = TOLOWER_ASC(innames[i][len - 2]);
5999 		spin.si_region_name[i * 2 + 1] =
6000 					     TOLOWER_ASC(innames[i][len - 1]);
6001 	    }
6002 	}
6003 	spin.si_region_count = incount;
6004 
6005 	spin.si_foldroot = wordtree_alloc(&spin);
6006 	spin.si_keeproot = wordtree_alloc(&spin);
6007 	spin.si_prefroot = wordtree_alloc(&spin);
6008 	if (spin.si_foldroot == NULL
6009 		|| spin.si_keeproot == NULL
6010 		|| spin.si_prefroot == NULL)
6011 	{
6012 	    free_blocks(spin.si_blocks);
6013 	    goto theend;
6014 	}
6015 
6016 	// When not producing a .add.spl file clear the character table when
6017 	// we encounter one in the .aff file.  This means we dump the current
6018 	// one in the .spl file if the .aff file doesn't define one.  That's
6019 	// better than guessing the contents, the table will match a
6020 	// previously loaded spell file.
6021 	if (!spin.si_add)
6022 	    spin.si_clear_chartab = TRUE;
6023 
6024 	/*
6025 	 * Read all the .aff and .dic files.
6026 	 * Text is converted to 'encoding'.
6027 	 * Words are stored in the case-folded and keep-case trees.
6028 	 */
6029 	for (i = 0; i < incount && !error; ++i)
6030 	{
6031 	    spin.si_conv.vc_type = CONV_NONE;
6032 	    spin.si_region = 1 << i;
6033 
6034 	    vim_snprintf((char *)fname, MAXPATHL, "%s.aff", innames[i]);
6035 	    if (mch_stat((char *)fname, &st) >= 0)
6036 	    {
6037 		// Read the .aff file.  Will init "spin->si_conv" based on the
6038 		// "SET" line.
6039 		afile[i] = spell_read_aff(&spin, fname);
6040 		if (afile[i] == NULL)
6041 		    error = TRUE;
6042 		else
6043 		{
6044 		    // Read the .dic file and store the words in the trees.
6045 		    vim_snprintf((char *)fname, MAXPATHL, "%s.dic",
6046 								  innames[i]);
6047 		    if (spell_read_dic(&spin, fname, afile[i]) == FAIL)
6048 			error = TRUE;
6049 		}
6050 	    }
6051 	    else
6052 	    {
6053 		// No .aff file, try reading the file as a word list.  Store
6054 		// the words in the trees.
6055 		if (spell_read_wordfile(&spin, innames[i]) == FAIL)
6056 		    error = TRUE;
6057 	    }
6058 
6059 	    // Free any conversion stuff.
6060 	    convert_setup(&spin.si_conv, NULL, NULL);
6061 	}
6062 
6063 	if (spin.si_compflags != NULL && spin.si_nobreak)
6064 	    msg(_("Warning: both compounding and NOBREAK specified"));
6065 
6066 	if (!error && !got_int)
6067 	{
6068 	    /*
6069 	     * Combine tails in the tree.
6070 	     */
6071 	    spell_message(&spin, (char_u *)_(msg_compressing));
6072 	    wordtree_compress(&spin, spin.si_foldroot, "case-folded");
6073 	    wordtree_compress(&spin, spin.si_keeproot, "keep-case");
6074 	    wordtree_compress(&spin, spin.si_prefroot, "prefixes");
6075 	}
6076 
6077 	if (!error && !got_int)
6078 	{
6079 	    /*
6080 	     * Write the info in the spell file.
6081 	     */
6082 	    vim_snprintf((char *)IObuff, IOSIZE,
6083 				      _("Writing spell file %s..."), wfname);
6084 	    spell_message(&spin, IObuff);
6085 
6086 	    error = write_vim_spell(&spin, wfname) == FAIL;
6087 
6088 	    spell_message(&spin, (char_u *)_("Done!"));
6089 	    vim_snprintf((char *)IObuff, IOSIZE,
6090 		 _("Estimated runtime memory use: %d bytes"), spin.si_memtot);
6091 	    spell_message(&spin, IObuff);
6092 
6093 	    /*
6094 	     * If the file is loaded need to reload it.
6095 	     */
6096 	    if (!error)
6097 		spell_reload_one(wfname, added_word);
6098 	}
6099 
6100 	// Free the allocated memory.
6101 	ga_clear(&spin.si_rep);
6102 	ga_clear(&spin.si_repsal);
6103 	ga_clear(&spin.si_sal);
6104 	ga_clear(&spin.si_map);
6105 	ga_clear(&spin.si_comppat);
6106 	ga_clear(&spin.si_prefcond);
6107 	hash_clear_all(&spin.si_commonwords, 0);
6108 
6109 	// Free the .aff file structures.
6110 	for (i = 0; i < incount; ++i)
6111 	    if (afile[i] != NULL)
6112 		spell_free_aff(afile[i]);
6113 
6114 	// Free all the bits and pieces at once.
6115 	free_blocks(spin.si_blocks);
6116 
6117 	/*
6118 	 * If there is soundfolding info and no NOSUGFILE item create the
6119 	 * .sug file with the soundfolded word trie.
6120 	 */
6121 	if (spin.si_sugtime != 0 && !error && !got_int)
6122 	    spell_make_sugfile(&spin, wfname);
6123 
6124     }
6125 
6126 theend:
6127     vim_free(fname);
6128     vim_free(wfname);
6129 }
6130 
6131 /*
6132  * Display a message for spell file processing when 'verbose' is set or using
6133  * ":mkspell".  "str" can be IObuff.
6134  */
6135     static void
6136 spell_message(spellinfo_T *spin, char_u *str)
6137 {
6138     if (spin->si_verbose || p_verbose > 2)
6139     {
6140 	if (!spin->si_verbose)
6141 	    verbose_enter();
6142 	msg((char *)str);
6143 	out_flush();
6144 	if (!spin->si_verbose)
6145 	    verbose_leave();
6146     }
6147 }
6148 
6149 /*
6150  * ":[count]spellgood  {word}"
6151  * ":[count]spellwrong {word}"
6152  * ":[count]spellundo  {word}"
6153  * ":[count]spellrare  {word}"
6154  */
6155     void
6156 ex_spell(exarg_T *eap)
6157 {
6158     spell_add_word(eap->arg, (int)STRLEN(eap->arg),
6159 		eap->cmdidx == CMD_spellwrong ? SPELL_ADD_BAD :
6160 		eap->cmdidx == CMD_spellrare ? SPELL_ADD_RARE : SPELL_ADD_GOOD,
6161 				   eap->forceit ? 0 : (int)eap->line2,
6162 				   eap->cmdidx == CMD_spellundo);
6163 }
6164 
6165 /*
6166  * Add "word[len]" to 'spellfile' as a good, rare or bad word.
6167  */
6168     void
6169 spell_add_word(
6170     char_u	*word,
6171     int		len,
6172     int		what,	    // SPELL_ADD_ values
6173     int		idx,	    // "zG" and "zW": zero, otherwise index in
6174 			    // 'spellfile'
6175     int		undo)	    // TRUE for "zug", "zuG", "zuw" and "zuW"
6176 {
6177     FILE	*fd = NULL;
6178     buf_T	*buf = NULL;
6179     int		new_spf = FALSE;
6180     char_u	*fname;
6181     char_u	*fnamebuf = NULL;
6182     char_u	line[MAXWLEN * 2];
6183     long	fpos, fpos_next = 0;
6184     int		i;
6185     char_u	*spf;
6186 
6187     if (idx == 0)	    // use internal wordlist
6188     {
6189 	if (int_wordlist == NULL)
6190 	{
6191 	    int_wordlist = vim_tempname('s', FALSE);
6192 	    if (int_wordlist == NULL)
6193 		return;
6194 	}
6195 	fname = int_wordlist;
6196     }
6197     else
6198     {
6199 	// If 'spellfile' isn't set figure out a good default value.
6200 	if (*curwin->w_s->b_p_spf == NUL)
6201 	{
6202 	    init_spellfile();
6203 	    new_spf = TRUE;
6204 	}
6205 
6206 	if (*curwin->w_s->b_p_spf == NUL)
6207 	{
6208 	    semsg(_(e_notset), "spellfile");
6209 	    return;
6210 	}
6211 	fnamebuf = alloc(MAXPATHL);
6212 	if (fnamebuf == NULL)
6213 	    return;
6214 
6215 	for (spf = curwin->w_s->b_p_spf, i = 1; *spf != NUL; ++i)
6216 	{
6217 	    copy_option_part(&spf, fnamebuf, MAXPATHL, ",");
6218 	    if (i == idx)
6219 		break;
6220 	    if (*spf == NUL)
6221 	    {
6222 		semsg(_("E765: 'spellfile' does not have %d entries"), idx);
6223 		vim_free(fnamebuf);
6224 		return;
6225 	    }
6226 	}
6227 
6228 	// Check that the user isn't editing the .add file somewhere.
6229 	buf = buflist_findname_exp(fnamebuf);
6230 	if (buf != NULL && buf->b_ml.ml_mfp == NULL)
6231 	    buf = NULL;
6232 	if (buf != NULL && bufIsChanged(buf))
6233 	{
6234 	    emsg(_(e_bufloaded));
6235 	    vim_free(fnamebuf);
6236 	    return;
6237 	}
6238 
6239 	fname = fnamebuf;
6240     }
6241 
6242     if (what == SPELL_ADD_BAD || undo)
6243     {
6244 	// When the word appears as good word we need to remove that one,
6245 	// since its flags sort before the one with WF_BANNED.
6246 	fd = mch_fopen((char *)fname, "r");
6247 	if (fd != NULL)
6248 	{
6249 	    while (!vim_fgets(line, MAXWLEN * 2, fd))
6250 	    {
6251 		fpos = fpos_next;
6252 		fpos_next = ftell(fd);
6253 		if (STRNCMP(word, line, len) == 0
6254 			&& (line[len] == '/' || line[len] < ' '))
6255 		{
6256 		    // Found duplicate word.  Remove it by writing a '#' at
6257 		    // the start of the line.  Mixing reading and writing
6258 		    // doesn't work for all systems, close the file first.
6259 		    fclose(fd);
6260 		    fd = mch_fopen((char *)fname, "r+");
6261 		    if (fd == NULL)
6262 			break;
6263 		    if (fseek(fd, fpos, SEEK_SET) == 0)
6264 		    {
6265 			fputc('#', fd);
6266 			if (undo)
6267 			{
6268 			    home_replace(NULL, fname, NameBuff, MAXPATHL, TRUE);
6269 			    smsg(_("Word '%.*s' removed from %s"),
6270 							 len, word, NameBuff);
6271 			}
6272 		    }
6273 		    fseek(fd, fpos_next, SEEK_SET);
6274 		}
6275 	    }
6276 	    if (fd != NULL)
6277 		fclose(fd);
6278 	}
6279     }
6280 
6281     if (!undo)
6282     {
6283 	fd = mch_fopen((char *)fname, "a");
6284 	if (fd == NULL && new_spf)
6285 	{
6286 	    char_u *p;
6287 
6288 	    // We just initialized the 'spellfile' option and can't open the
6289 	    // file.  We may need to create the "spell" directory first.  We
6290 	    // already checked the runtime directory is writable in
6291 	    // init_spellfile().
6292 	    if (!dir_of_file_exists(fname) && (p = gettail_sep(fname)) != fname)
6293 	    {
6294 		int c = *p;
6295 
6296 		// The directory doesn't exist.  Try creating it and opening
6297 		// the file again.
6298 		*p = NUL;
6299 		vim_mkdir(fname, 0755);
6300 		*p = c;
6301 		fd = mch_fopen((char *)fname, "a");
6302 	    }
6303 	}
6304 
6305 	if (fd == NULL)
6306 	    semsg(_(e_notopen), fname);
6307 	else
6308 	{
6309 	    if (what == SPELL_ADD_BAD)
6310 		fprintf(fd, "%.*s/!\n", len, word);
6311 	    else if (what == SPELL_ADD_RARE)
6312 		fprintf(fd, "%.*s/?\n", len, word);
6313 	    else
6314 		fprintf(fd, "%.*s\n", len, word);
6315 	    fclose(fd);
6316 
6317 	    home_replace(NULL, fname, NameBuff, MAXPATHL, TRUE);
6318 	    smsg(_("Word '%.*s' added to %s"), len, word, NameBuff);
6319 	}
6320     }
6321 
6322     if (fd != NULL)
6323     {
6324 	// Update the .add.spl file.
6325 	mkspell(1, &fname, FALSE, TRUE, TRUE);
6326 
6327 	// If the .add file is edited somewhere, reload it.
6328 	if (buf != NULL)
6329 	    buf_reload(buf, buf->b_orig_mode);
6330 
6331 	redraw_all_later(SOME_VALID);
6332     }
6333     vim_free(fnamebuf);
6334 }
6335 
6336 /*
6337  * Initialize 'spellfile' for the current buffer.
6338  */
6339     static void
6340 init_spellfile(void)
6341 {
6342     char_u	*buf;
6343     int		l;
6344     char_u	*fname;
6345     char_u	*rtp;
6346     char_u	*lend;
6347     int		aspath = FALSE;
6348     char_u	*lstart = curbuf->b_s.b_p_spl;
6349 
6350     if (*curwin->w_s->b_p_spl != NUL && curwin->w_s->b_langp.ga_len > 0)
6351     {
6352 	buf = alloc(MAXPATHL);
6353 	if (buf == NULL)
6354 	    return;
6355 
6356 	// Find the end of the language name.  Exclude the region.  If there
6357 	// is a path separator remember the start of the tail.
6358 	for (lend = curwin->w_s->b_p_spl; *lend != NUL
6359 			&& vim_strchr((char_u *)",._", *lend) == NULL; ++lend)
6360 	    if (vim_ispathsep(*lend))
6361 	    {
6362 		aspath = TRUE;
6363 		lstart = lend + 1;
6364 	    }
6365 
6366 	// Loop over all entries in 'runtimepath'.  Use the first one where we
6367 	// are allowed to write.
6368 	rtp = p_rtp;
6369 	while (*rtp != NUL)
6370 	{
6371 	    if (aspath)
6372 		// Use directory of an entry with path, e.g., for
6373 		// "/dir/lg.utf-8.spl" use "/dir".
6374 		vim_strncpy(buf, curbuf->b_s.b_p_spl,
6375 					    lstart - curbuf->b_s.b_p_spl - 1);
6376 	    else
6377 		// Copy the path from 'runtimepath' to buf[].
6378 		copy_option_part(&rtp, buf, MAXPATHL, ",");
6379 	    if (filewritable(buf) == 2)
6380 	    {
6381 		// Use the first language name from 'spelllang' and the
6382 		// encoding used in the first loaded .spl file.
6383 		if (aspath)
6384 		    vim_strncpy(buf, curbuf->b_s.b_p_spl,
6385 						  lend - curbuf->b_s.b_p_spl);
6386 		else
6387 		{
6388 		    // Create the "spell" directory if it doesn't exist yet.
6389 		    l = (int)STRLEN(buf);
6390 		    vim_snprintf((char *)buf + l, MAXPATHL - l, "/spell");
6391 		    if (filewritable(buf) != 2)
6392 			vim_mkdir(buf, 0755);
6393 
6394 		    l = (int)STRLEN(buf);
6395 		    vim_snprintf((char *)buf + l, MAXPATHL - l,
6396 				 "/%.*s", (int)(lend - lstart), lstart);
6397 		}
6398 		l = (int)STRLEN(buf);
6399 		fname = LANGP_ENTRY(curwin->w_s->b_langp, 0)
6400 							 ->lp_slang->sl_fname;
6401 		vim_snprintf((char *)buf + l, MAXPATHL - l, ".%s.add",
6402 			fname != NULL
6403 			  && strstr((char *)gettail(fname), ".ascii.") != NULL
6404 				       ? (char_u *)"ascii" : spell_enc());
6405 		set_option_value((char_u *)"spellfile", 0L, buf, OPT_LOCAL);
6406 		break;
6407 	    }
6408 	    aspath = FALSE;
6409 	}
6410 
6411 	vim_free(buf);
6412     }
6413 }
6414 
6415 
6416 
6417 /*
6418  * Set the spell character tables from strings in the affix file.
6419  */
6420     static int
6421 set_spell_chartab(char_u *fol, char_u *low, char_u *upp)
6422 {
6423     // We build the new tables here first, so that we can compare with the
6424     // previous one.
6425     spelltab_T	new_st;
6426     char_u	*pf = fol, *pl = low, *pu = upp;
6427     int		f, l, u;
6428 
6429     clear_spell_chartab(&new_st);
6430 
6431     while (*pf != NUL)
6432     {
6433 	if (*pl == NUL || *pu == NUL)
6434 	{
6435 	    emsg(_(e_affform));
6436 	    return FAIL;
6437 	}
6438 	f = mb_ptr2char_adv(&pf);
6439 	l = mb_ptr2char_adv(&pl);
6440 	u = mb_ptr2char_adv(&pu);
6441 
6442 	// Every character that appears is a word character.
6443 	if (f < 256)
6444 	    new_st.st_isw[f] = TRUE;
6445 	if (l < 256)
6446 	    new_st.st_isw[l] = TRUE;
6447 	if (u < 256)
6448 	    new_st.st_isw[u] = TRUE;
6449 
6450 	// if "LOW" and "FOL" are not the same the "LOW" char needs
6451 	// case-folding
6452 	if (l < 256 && l != f)
6453 	{
6454 	    if (f >= 256)
6455 	    {
6456 		emsg(_(e_affrange));
6457 		return FAIL;
6458 	    }
6459 	    new_st.st_fold[l] = f;
6460 	}
6461 
6462 	// if "UPP" and "FOL" are not the same the "UPP" char needs
6463 	// case-folding, it's upper case and the "UPP" is the upper case of
6464 	// "FOL" .
6465 	if (u < 256 && u != f)
6466 	{
6467 	    if (f >= 256)
6468 	    {
6469 		emsg(_(e_affrange));
6470 		return FAIL;
6471 	    }
6472 	    new_st.st_fold[u] = f;
6473 	    new_st.st_isu[u] = TRUE;
6474 	    new_st.st_upper[f] = u;
6475 	}
6476     }
6477 
6478     if (*pl != NUL || *pu != NUL)
6479     {
6480 	emsg(_(e_affform));
6481 	return FAIL;
6482     }
6483 
6484     return set_spell_finish(&new_st);
6485 }
6486 
6487 /*
6488  * Set the spell character tables from strings in the .spl file.
6489  */
6490     static void
6491 set_spell_charflags(
6492     char_u	*flags,
6493     int		cnt,	    // length of "flags"
6494     char_u	*fol)
6495 {
6496     // We build the new tables here first, so that we can compare with the
6497     // previous one.
6498     spelltab_T	new_st;
6499     int		i;
6500     char_u	*p = fol;
6501     int		c;
6502 
6503     clear_spell_chartab(&new_st);
6504 
6505     for (i = 0; i < 128; ++i)
6506     {
6507 	if (i < cnt)
6508 	{
6509 	    new_st.st_isw[i + 128] = (flags[i] & CF_WORD) != 0;
6510 	    new_st.st_isu[i + 128] = (flags[i] & CF_UPPER) != 0;
6511 	}
6512 
6513 	if (*p != NUL)
6514 	{
6515 	    c = mb_ptr2char_adv(&p);
6516 	    new_st.st_fold[i + 128] = c;
6517 	    if (i + 128 != c && new_st.st_isu[i + 128] && c < 256)
6518 		new_st.st_upper[c] = i + 128;
6519 	}
6520     }
6521 
6522     (void)set_spell_finish(&new_st);
6523 }
6524 
6525     static int
6526 set_spell_finish(spelltab_T *new_st)
6527 {
6528     int		i;
6529 
6530     if (did_set_spelltab)
6531     {
6532 	// check that it's the same table
6533 	for (i = 0; i < 256; ++i)
6534 	{
6535 	    if (spelltab.st_isw[i] != new_st->st_isw[i]
6536 		    || spelltab.st_isu[i] != new_st->st_isu[i]
6537 		    || spelltab.st_fold[i] != new_st->st_fold[i]
6538 		    || spelltab.st_upper[i] != new_st->st_upper[i])
6539 	    {
6540 		emsg(_("E763: Word characters differ between spell files"));
6541 		return FAIL;
6542 	    }
6543 	}
6544     }
6545     else
6546     {
6547 	// copy the new spelltab into the one being used
6548 	spelltab = *new_st;
6549 	did_set_spelltab = TRUE;
6550     }
6551 
6552     return OK;
6553 }
6554 
6555 /*
6556  * Write the table with prefix conditions to the .spl file.
6557  * When "fd" is NULL only count the length of what is written.
6558  */
6559     static int
6560 write_spell_prefcond(FILE *fd, garray_T *gap)
6561 {
6562     int		i;
6563     char_u	*p;
6564     int		len;
6565     int		totlen;
6566     size_t	x = 1;  // collect return value of fwrite()
6567 
6568     if (fd != NULL)
6569 	put_bytes(fd, (long_u)gap->ga_len, 2);	    // <prefcondcnt>
6570 
6571     totlen = 2 + gap->ga_len; // length of <prefcondcnt> and <condlen> bytes
6572 
6573     for (i = 0; i < gap->ga_len; ++i)
6574     {
6575 	// <prefcond> : <condlen> <condstr>
6576 	p = ((char_u **)gap->ga_data)[i];
6577 	if (p != NULL)
6578 	{
6579 	    len = (int)STRLEN(p);
6580 	    if (fd != NULL)
6581 	    {
6582 		fputc(len, fd);
6583 		x &= fwrite(p, (size_t)len, (size_t)1, fd);
6584 	    }
6585 	    totlen += len;
6586 	}
6587 	else if (fd != NULL)
6588 	    fputc(0, fd);
6589     }
6590 
6591     return totlen;
6592 }
6593 
6594 
6595 /*
6596  * Use map string "map" for languages "lp".
6597  */
6598     static void
6599 set_map_str(slang_T *lp, char_u *map)
6600 {
6601     char_u	*p;
6602     int		headc = 0;
6603     int		c;
6604     int		i;
6605 
6606     if (*map == NUL)
6607     {
6608 	lp->sl_has_map = FALSE;
6609 	return;
6610     }
6611     lp->sl_has_map = TRUE;
6612 
6613     // Init the array and hash tables empty.
6614     for (i = 0; i < 256; ++i)
6615 	lp->sl_map_array[i] = 0;
6616     hash_init(&lp->sl_map_hash);
6617 
6618     /*
6619      * The similar characters are stored separated with slashes:
6620      * "aaa/bbb/ccc/".  Fill sl_map_array[c] with the character before c and
6621      * before the same slash.  For characters above 255 sl_map_hash is used.
6622      */
6623     for (p = map; *p != NUL; )
6624     {
6625 	c = mb_cptr2char_adv(&p);
6626 	if (c == '/')
6627 	    headc = 0;
6628 	else
6629 	{
6630 	    if (headc == 0)
6631 		 headc = c;
6632 
6633 	    // Characters above 255 don't fit in sl_map_array[], put them in
6634 	    // the hash table.  Each entry is the char, a NUL the headchar and
6635 	    // a NUL.
6636 	    if (c >= 256)
6637 	    {
6638 		int	    cl = mb_char2len(c);
6639 		int	    headcl = mb_char2len(headc);
6640 		char_u	    *b;
6641 		hash_T	    hash;
6642 		hashitem_T  *hi;
6643 
6644 		b = alloc(cl + headcl + 2);
6645 		if (b == NULL)
6646 		    return;
6647 		mb_char2bytes(c, b);
6648 		b[cl] = NUL;
6649 		mb_char2bytes(headc, b + cl + 1);
6650 		b[cl + 1 + headcl] = NUL;
6651 		hash = hash_hash(b);
6652 		hi = hash_lookup(&lp->sl_map_hash, b, hash);
6653 		if (HASHITEM_EMPTY(hi))
6654 		    hash_add_item(&lp->sl_map_hash, hi, b, hash);
6655 		else
6656 		{
6657 		    // This should have been checked when generating the .spl
6658 		    // file.
6659 		    emsg(_("E783: duplicate char in MAP entry"));
6660 		    vim_free(b);
6661 		}
6662 	    }
6663 	    else
6664 		lp->sl_map_array[c] = headc;
6665 	}
6666     }
6667 }
6668 
6669 #endif  // FEAT_SPELL
6670