xref: /vim-8.2.3635/src/spell.c (revision 8ee52aff)
1edf3f97aSBram Moolenaar /* vi:set ts=8 sts=4 sw=4 noet:
2e19defe1SBram Moolenaar  *
3e19defe1SBram Moolenaar  * VIM - Vi IMproved	by Bram Moolenaar
4e19defe1SBram Moolenaar  *
5e19defe1SBram Moolenaar  * Do ":help uganda"  in Vim to read copying and usage conditions.
6e19defe1SBram Moolenaar  * Do ":help credits" in Vim to see a list of people who contributed.
7e19defe1SBram Moolenaar  * See README.txt for an overview of the Vim source code.
8e19defe1SBram Moolenaar  */
9e19defe1SBram Moolenaar 
10e19defe1SBram Moolenaar /*
11e19defe1SBram Moolenaar  * spell.c: code for spell checking
12fc73515fSBram Moolenaar  *
139ccfebddSBram Moolenaar  * See spellfile.c for the Vim spell file format.
149ccfebddSBram Moolenaar  *
1551485f06SBram Moolenaar  * The spell checking mechanism uses a tree (aka trie).  Each node in the tree
1651485f06SBram Moolenaar  * has a list of bytes that can appear (siblings).  For each byte there is a
1751485f06SBram Moolenaar  * pointer to the node with the byte that follows in the word (child).
189f30f504SBram Moolenaar  *
199f30f504SBram Moolenaar  * A NUL byte is used where the word may end.  The bytes are sorted, so that
209f30f504SBram Moolenaar  * binary searching can be used and the NUL bytes are at the start.  The
219f30f504SBram Moolenaar  * number of possible bytes is stored before the list of bytes.
229f30f504SBram Moolenaar  *
239f30f504SBram Moolenaar  * The tree uses two arrays: "byts" stores the characters, "idxs" stores
249f30f504SBram Moolenaar  * either the next index or flags.  The tree starts at index 0.  For example,
259f30f504SBram Moolenaar  * to lookup "vi" this sequence is followed:
269f30f504SBram Moolenaar  *	i = 0
279f30f504SBram Moolenaar  *	len = byts[i]
289f30f504SBram Moolenaar  *	n = where "v" appears in byts[i + 1] to byts[i + len]
299f30f504SBram Moolenaar  *	i = idxs[n]
309f30f504SBram Moolenaar  *	len = byts[i]
319f30f504SBram Moolenaar  *	n = where "i" appears in byts[i + 1] to byts[i + len]
329f30f504SBram Moolenaar  *	i = idxs[n]
339f30f504SBram Moolenaar  *	len = byts[i]
349f30f504SBram Moolenaar  *	find that byts[i + 1] is 0, idxs[i + 1] has flags for "vi".
3551485f06SBram Moolenaar  *
361d73c885SBram Moolenaar  * There are two word trees: one with case-folded words and one with words in
3751485f06SBram Moolenaar  * original case.  The second one is only used for keep-case words and is
3851485f06SBram Moolenaar  * usually small.
3951485f06SBram Moolenaar  *
40ae5bce1cSBram Moolenaar  * There is one additional tree for when not all prefixes are applied when
411d73c885SBram Moolenaar  * generating the .spl file.  This tree stores all the possible prefixes, as
421d73c885SBram Moolenaar  * if they were words.  At each word (prefix) end the prefix nr is stored, the
431d73c885SBram Moolenaar  * following word must support this prefix nr.  And the condition nr is
441d73c885SBram Moolenaar  * stored, used to lookup the condition that the word must match with.
451d73c885SBram Moolenaar  *
4651485f06SBram Moolenaar  * Thanks to Olaf Seibert for providing an example implementation of this tree
4751485f06SBram Moolenaar  * and the compression mechanism.
484770d09aSBram Moolenaar  * LZ trie ideas:
494770d09aSBram Moolenaar  *	http://www.irb.hr/hr/home/ristov/papers/RistovLZtrieRevision1.pdf
504770d09aSBram Moolenaar  * More papers: http://www-igm.univ-mlv.fr/~laporte/publi_en.html
5163d5a1e5SBram Moolenaar  *
5263d5a1e5SBram Moolenaar  * Matching involves checking the caps type: Onecap ALLCAP KeepCap.
5363d5a1e5SBram Moolenaar  *
54402d2feaSBram Moolenaar  * Why doesn't Vim use aspell/ispell/myspell/etc.?
55402d2feaSBram Moolenaar  * See ":help develop-spell".
56402d2feaSBram Moolenaar  */
57402d2feaSBram Moolenaar 
589ccfebddSBram Moolenaar #define IN_SPELL_C
59e19defe1SBram Moolenaar #include "vim.h"
60e19defe1SBram Moolenaar 
61f71a3db4SBram Moolenaar #if defined(FEAT_SPELL) || defined(PROTO)
62e19defe1SBram Moolenaar 
630d6f5d97SBram Moolenaar #ifndef UNIX		// it's in os_unix.h for Unix
640d6f5d97SBram Moolenaar # include <time.h>	// for time_t
654770d09aSBram Moolenaar #endif
664770d09aSBram Moolenaar 
670d6f5d97SBram Moolenaar #define REGION_ALL 0xff		// word valid in all regions
68cfc6c43cSBram Moolenaar 
690d6f5d97SBram Moolenaar // Result values.  Lower number is accepted over higher one.
70cfc6c43cSBram Moolenaar #define SP_BANNED	-1
71402d2feaSBram Moolenaar #define SP_OK		0
72cfc6c43cSBram Moolenaar #define SP_RARE		1
73cfc6c43cSBram Moolenaar #define SP_LOCAL	2
74cfc6c43cSBram Moolenaar #define SP_BAD		3
75402d2feaSBram Moolenaar 
769ba0eb85SBram Moolenaar /*
77402d2feaSBram Moolenaar  * Structure to store info for word matching.
78402d2feaSBram Moolenaar  */
79402d2feaSBram Moolenaar typedef struct matchinf_S
80402d2feaSBram Moolenaar {
810d6f5d97SBram Moolenaar     langp_T	*mi_lp;			// info for language and region
8263d5a1e5SBram Moolenaar 
830d6f5d97SBram Moolenaar     // pointers to original text to be checked
840d6f5d97SBram Moolenaar     char_u	*mi_word;		// start of word being checked
850d6f5d97SBram Moolenaar     char_u	*mi_end;		// end of matching word so far
860d6f5d97SBram Moolenaar     char_u	*mi_fend;		// next char to be added to mi_fword
870d6f5d97SBram Moolenaar     char_u	*mi_cend;		// char after what was used for
880d6f5d97SBram Moolenaar 					// mi_capflags
8963d5a1e5SBram Moolenaar 
900d6f5d97SBram Moolenaar     // case-folded text
910d6f5d97SBram Moolenaar     char_u	mi_fword[MAXWLEN + 1];	// mi_word case-folded
920d6f5d97SBram Moolenaar     int		mi_fwordlen;		// nr of valid bytes in mi_fword
9363d5a1e5SBram Moolenaar 
940d6f5d97SBram Moolenaar     // for when checking word after a prefix
950d6f5d97SBram Moolenaar     int		mi_prefarridx;		// index in sl_pidxs with list of
960d6f5d97SBram Moolenaar 					// affixID/condition
970d6f5d97SBram Moolenaar     int		mi_prefcnt;		// number of entries at mi_prefarridx
980d6f5d97SBram Moolenaar     int		mi_prefixlen;		// byte length of prefix
990d6f5d97SBram Moolenaar     int		mi_cprefixlen;		// byte length of prefix in original
1000d6f5d97SBram Moolenaar 					// case
1011d73c885SBram Moolenaar 
1020d6f5d97SBram Moolenaar     // for when checking a compound word
1030d6f5d97SBram Moolenaar     int		mi_compoff;		// start of following word offset
1040d6f5d97SBram Moolenaar     char_u	mi_compflags[MAXWLEN];	// flags for compound words used
1050d6f5d97SBram Moolenaar     int		mi_complen;		// nr of compound words used
1060d6f5d97SBram Moolenaar     int		mi_compextra;		// nr of COMPOUNDROOT words
107ae5bce1cSBram Moolenaar 
1080d6f5d97SBram Moolenaar     // others
1090d6f5d97SBram Moolenaar     int		mi_result;		// result so far: SP_BAD, SP_OK, etc.
1100d6f5d97SBram Moolenaar     int		mi_capflags;		// WF_ONECAP WF_ALLCAP WF_KEEPCAP
1110d6f5d97SBram Moolenaar     win_T	*mi_win;		// buffer being checked
1127862282fSBram Moolenaar 
1130d6f5d97SBram Moolenaar     // for NOBREAK
1140d6f5d97SBram Moolenaar     int		mi_result2;		// "mi_resul" without following word
1150d6f5d97SBram Moolenaar     char_u	*mi_end2;		// "mi_end" without following word
116402d2feaSBram Moolenaar } matchinf_T;
117402d2feaSBram Moolenaar 
118cfc6c43cSBram Moolenaar 
119baaa7e9eSBram Moolenaar static int spell_mb_isword_class(int cl, win_T *wp);
120cfc6c43cSBram Moolenaar 
1210d6f5d97SBram Moolenaar // mode values for find_word
1220d6f5d97SBram Moolenaar #define FIND_FOLDWORD	    0	// find word case-folded
1230d6f5d97SBram Moolenaar #define FIND_KEEPWORD	    1	// find keep-case word
1240d6f5d97SBram Moolenaar #define FIND_PREFIX	    2	// find word after prefix
1250d6f5d97SBram Moolenaar #define FIND_COMPOUND	    3	// find case-folded compound word
1260d6f5d97SBram Moolenaar #define FIND_KEEPCOMPOUND   4	// find keep-case compound word
1271d73c885SBram Moolenaar 
128baaa7e9eSBram Moolenaar static void find_word(matchinf_T *mip, int mode);
129baaa7e9eSBram Moolenaar static void find_prefix(matchinf_T *mip, int mode);
130baaa7e9eSBram Moolenaar static int fold_more(matchinf_T *mip);
131baaa7e9eSBram Moolenaar static void spell_load_cb(char_u *fname, void *cookie);
132baaa7e9eSBram Moolenaar static int count_syllables(slang_T *slang, char_u *word);
133baaa7e9eSBram Moolenaar static void clear_midword(win_T *buf);
134baaa7e9eSBram Moolenaar static void use_midword(slang_T *lp, win_T *buf);
135baaa7e9eSBram Moolenaar static int find_region(char_u *rp, char_u *region);
136baaa7e9eSBram Moolenaar static void spell_soundfold_sofo(slang_T *slang, char_u *inword, char_u *res);
137baaa7e9eSBram Moolenaar static void spell_soundfold_sal(slang_T *slang, char_u *inword, char_u *res);
138baaa7e9eSBram Moolenaar static void spell_soundfold_wsal(slang_T *slang, char_u *inword, char_u *res);
139baaa7e9eSBram Moolenaar static void dump_word(slang_T *slang, char_u *word, char_u *pat, int *dir, int round, int flags, linenr_T lnum);
140baaa7e9eSBram Moolenaar static linenr_T dump_prefixes(slang_T *slang, char_u *word, char_u *pat, int *dir, int round, int flags, linenr_T startlnum);
1419ba0eb85SBram Moolenaar 
142402d2feaSBram Moolenaar /*
143402d2feaSBram Moolenaar  * Main spell-checking function.
14451485f06SBram Moolenaar  * "ptr" points to a character that could be the start of a word.
145482aaeb0SBram Moolenaar  * "*attrp" is set to the highlight index for a badly spelled word.  For a
146482aaeb0SBram Moolenaar  * non-word or when it's OK it remains unchanged.
147402d2feaSBram Moolenaar  * This must only be called when 'spelllang' is not empty.
1489ba0eb85SBram Moolenaar  *
149f9184a1dSBram Moolenaar  * "capcol" is used to check for a Capitalised word after the end of a
150f9184a1dSBram Moolenaar  * sentence.  If it's zero then perform the check.  Return the column where to
151f9184a1dSBram Moolenaar  * check next, or -1 when no sentence end was found.  If it's NULL then don't
152f9184a1dSBram Moolenaar  * worry.
1539ba0eb85SBram Moolenaar  *
154402d2feaSBram Moolenaar  * Returns the length of the word in bytes, also when it's OK, so that the
155402d2feaSBram Moolenaar  * caller can skip over the word.
156402d2feaSBram Moolenaar  */
157402d2feaSBram Moolenaar     int
spell_check(win_T * wp,char_u * ptr,hlf_T * attrp,int * capcol,int docount)158764b23c8SBram Moolenaar spell_check(
1590d6f5d97SBram Moolenaar     win_T	*wp,		// current window
160764b23c8SBram Moolenaar     char_u	*ptr,
161764b23c8SBram Moolenaar     hlf_T	*attrp,
1620d6f5d97SBram Moolenaar     int		*capcol,	// column to check for Capital
1630d6f5d97SBram Moolenaar     int		docount)	// count good words
164402d2feaSBram Moolenaar {
1650d6f5d97SBram Moolenaar     matchinf_T	mi;		// Most things are put in "mi" so that it can
1660d6f5d97SBram Moolenaar 				// be passed to functions quickly.
1670d6f5d97SBram Moolenaar     int		nrlen = 0;	// found a number first
168f9184a1dSBram Moolenaar     int		c;
1695195e456SBram Moolenaar     int		wrongcaplen = 0;
170ac6e65f8SBram Moolenaar     int		lpi;
1714770d09aSBram Moolenaar     int		count_word = docount;
172e0ebeda4SBram Moolenaar     int		use_camel_case = *wp->w_s->b_p_spo != NUL;
173e0ebeda4SBram Moolenaar     int		camel_case = 0;
174402d2feaSBram Moolenaar 
1750d6f5d97SBram Moolenaar     // A word never starts at a space or a control character.  Return quickly
1760d6f5d97SBram Moolenaar     // then, skipping over the character.
177cfc6c43cSBram Moolenaar     if (*ptr <= ' ')
178cfc6c43cSBram Moolenaar 	return 1;
179a226a6ddSBram Moolenaar 
1800d6f5d97SBram Moolenaar     // Return here when loading language files failed.
181860cae1cSBram Moolenaar     if (wp->w_s->b_langp.ga_len == 0)
182a226a6ddSBram Moolenaar 	return 1;
183a226a6ddSBram Moolenaar 
184a80faa89SBram Moolenaar     CLEAR_FIELD(mi);
18551485f06SBram Moolenaar 
1860d6f5d97SBram Moolenaar     // A number is always OK.  Also skip hexadecimal numbers 0xFF99 and
1870d6f5d97SBram Moolenaar     // 0X99FF.  But always do check spelling to find "3GPP" and "11
1880d6f5d97SBram Moolenaar     // julifeest".
18951485f06SBram Moolenaar     if (*ptr >= '0' && *ptr <= '9')
19051485f06SBram Moolenaar     {
191887c1feaSBram Moolenaar 	if (*ptr == '0' && (ptr[1] == 'b' || ptr[1] == 'B'))
192887c1feaSBram Moolenaar 	    mi.mi_end = skipbin(ptr + 2);
193887c1feaSBram Moolenaar 	else if (*ptr == '0' && (ptr[1] == 'x' || ptr[1] == 'X'))
1943982c541SBram Moolenaar 	    mi.mi_end = skiphex(ptr + 2);
19551485f06SBram Moolenaar 	else
196d857f0e0SBram Moolenaar 	    mi.mi_end = skipdigits(ptr);
197a93fa7eeSBram Moolenaar 	nrlen = (int)(mi.mi_end - ptr);
198d857f0e0SBram Moolenaar     }
1999ba0eb85SBram Moolenaar 
2000d6f5d97SBram Moolenaar     // Find the normal end of the word (until the next non-word character).
2010c40586aSBram Moolenaar     mi.mi_word = ptr;
20243abc521SBram Moolenaar     mi.mi_fend = ptr;
203860cae1cSBram Moolenaar     if (spell_iswordp(mi.mi_fend, wp))
20451485f06SBram Moolenaar     {
205e0ebeda4SBram Moolenaar 	int prev_upper;
2062d4070d3SBram Moolenaar 	int this_upper = FALSE;  // init for gcc
207e0ebeda4SBram Moolenaar 
208e0ebeda4SBram Moolenaar 	if (use_camel_case)
209e0ebeda4SBram Moolenaar 	{
210e0ebeda4SBram Moolenaar 	    c = PTR2CHAR(mi.mi_fend);
211e0ebeda4SBram Moolenaar 	    this_upper = SPELL_ISUPPER(c);
212e0ebeda4SBram Moolenaar 	}
213e0ebeda4SBram Moolenaar 
214402d2feaSBram Moolenaar 	do
215e0ebeda4SBram Moolenaar 	{
21691acfffcSBram Moolenaar 	    MB_PTR_ADV(mi.mi_fend);
217e0ebeda4SBram Moolenaar 	    if (use_camel_case)
218e0ebeda4SBram Moolenaar 	    {
219e0ebeda4SBram Moolenaar 		prev_upper = this_upper;
220e0ebeda4SBram Moolenaar 		c = PTR2CHAR(mi.mi_fend);
221e0ebeda4SBram Moolenaar 		this_upper = SPELL_ISUPPER(c);
222e0ebeda4SBram Moolenaar 		camel_case = !prev_upper && this_upper;
223e0ebeda4SBram Moolenaar 	    }
224e0ebeda4SBram Moolenaar 	} while (*mi.mi_fend != NUL && spell_iswordp(mi.mi_fend, wp)
225e0ebeda4SBram Moolenaar 							       && !camel_case);
226f9184a1dSBram Moolenaar 
227860cae1cSBram Moolenaar 	if (capcol != NULL && *capcol == 0 && wp->w_s->b_cap_prog != NULL)
228f9184a1dSBram Moolenaar 	{
2290d6f5d97SBram Moolenaar 	    // Check word starting with capital letter.
23053805d1eSBram Moolenaar 	    c = PTR2CHAR(ptr);
231f9184a1dSBram Moolenaar 	    if (!SPELL_ISUPPER(c))
2325195e456SBram Moolenaar 		wrongcaplen = (int)(mi.mi_fend - ptr);
233f9184a1dSBram Moolenaar 	}
234f9184a1dSBram Moolenaar     }
235f9184a1dSBram Moolenaar     if (capcol != NULL)
236f9184a1dSBram Moolenaar 	*capcol = -1;
23751485f06SBram Moolenaar 
2380d6f5d97SBram Moolenaar     // We always use the characters up to the next non-word character,
2390d6f5d97SBram Moolenaar     // also for bad words.
24051485f06SBram Moolenaar     mi.mi_end = mi.mi_fend;
2419ba0eb85SBram Moolenaar 
2420d6f5d97SBram Moolenaar     // Check caps type later.
243860cae1cSBram Moolenaar     mi.mi_capflags = 0;
244860cae1cSBram Moolenaar     mi.mi_cend = NULL;
245860cae1cSBram Moolenaar     mi.mi_win = wp;
24663d5a1e5SBram Moolenaar 
2470d6f5d97SBram Moolenaar     // case-fold the word with one non-word character, so that we can check
2480d6f5d97SBram Moolenaar     // for the word end.
249cfc6c43cSBram Moolenaar     if (*mi.mi_fend != NUL)
25091acfffcSBram Moolenaar 	MB_PTR_ADV(mi.mi_fend);
251cfc6c43cSBram Moolenaar 
2524f135275SBram Moolenaar     (void)spell_casefold(wp, ptr, (int)(mi.mi_fend - ptr), mi.mi_fword,
253cfc6c43cSBram Moolenaar 							     MAXWLEN + 1);
254a93fa7eeSBram Moolenaar     mi.mi_fwordlen = (int)STRLEN(mi.mi_fword);
255cfc6c43cSBram Moolenaar 
256e0ebeda4SBram Moolenaar     if (camel_case)
257e0ebeda4SBram Moolenaar 	// Introduce a fake word end space into the folded word.
258e0ebeda4SBram Moolenaar 	mi.mi_fword[mi.mi_fwordlen - 1] = ' ';
259e0ebeda4SBram Moolenaar 
2600d6f5d97SBram Moolenaar     // The word is bad unless we recognize it.
261402d2feaSBram Moolenaar     mi.mi_result = SP_BAD;
2627862282fSBram Moolenaar     mi.mi_result2 = SP_BAD;
263402d2feaSBram Moolenaar 
264402d2feaSBram Moolenaar     /*
265402d2feaSBram Moolenaar      * Loop over the languages specified in 'spelllang'.
2664770d09aSBram Moolenaar      * We check them all, because a word may be matched longer in another
2674770d09aSBram Moolenaar      * language.
268402d2feaSBram Moolenaar      */
269860cae1cSBram Moolenaar     for (lpi = 0; lpi < wp->w_s->b_langp.ga_len; ++lpi)
270402d2feaSBram Moolenaar     {
271860cae1cSBram Moolenaar 	mi.mi_lp = LANGP_ENTRY(wp->w_s->b_langp, lpi);
272ac6e65f8SBram Moolenaar 
2730d6f5d97SBram Moolenaar 	// If reloading fails the language is still in the list but everything
2740d6f5d97SBram Moolenaar 	// has been cleared.
275ac6e65f8SBram Moolenaar 	if (mi.mi_lp->lp_slang->sl_fidxs == NULL)
276ac6e65f8SBram Moolenaar 	    continue;
277ac6e65f8SBram Moolenaar 
2780d6f5d97SBram Moolenaar 	// Check for a matching word in case-folded words.
2791d73c885SBram Moolenaar 	find_word(&mi, FIND_FOLDWORD);
28051485f06SBram Moolenaar 
2810d6f5d97SBram Moolenaar 	// Check for a matching word in keep-case words.
2821d73c885SBram Moolenaar 	find_word(&mi, FIND_KEEPWORD);
2831d73c885SBram Moolenaar 
2840d6f5d97SBram Moolenaar 	// Check for matching prefixes.
285d12a1326SBram Moolenaar 	find_prefix(&mi, FIND_FOLDWORD);
2867862282fSBram Moolenaar 
2870d6f5d97SBram Moolenaar 	// For a NOBREAK language, may want to use a word without a following
2880d6f5d97SBram Moolenaar 	// word as a backup.
2897862282fSBram Moolenaar 	if (mi.mi_lp->lp_slang->sl_nobreak && mi.mi_result == SP_BAD
2907862282fSBram Moolenaar 						   && mi.mi_result2 != SP_BAD)
2917862282fSBram Moolenaar 	{
2927862282fSBram Moolenaar 	    mi.mi_result = mi.mi_result2;
2937862282fSBram Moolenaar 	    mi.mi_end = mi.mi_end2;
2947862282fSBram Moolenaar 	}
2954770d09aSBram Moolenaar 
2960d6f5d97SBram Moolenaar 	// Count the word in the first language where it's found to be OK.
2974770d09aSBram Moolenaar 	if (count_word && mi.mi_result == SP_OK)
2984770d09aSBram Moolenaar 	{
2994770d09aSBram Moolenaar 	    count_common_word(mi.mi_lp->lp_slang, ptr,
3004770d09aSBram Moolenaar 						   (int)(mi.mi_end - ptr), 1);
3014770d09aSBram Moolenaar 	    count_word = FALSE;
3024770d09aSBram Moolenaar 	}
303402d2feaSBram Moolenaar     }
304402d2feaSBram Moolenaar 
305402d2feaSBram Moolenaar     if (mi.mi_result != SP_OK)
306402d2feaSBram Moolenaar     {
3070d6f5d97SBram Moolenaar 	// If we found a number skip over it.  Allows for "42nd".  Do flag
3080d6f5d97SBram Moolenaar 	// rare and local words, e.g., "3GPP".
309d857f0e0SBram Moolenaar 	if (nrlen > 0)
3100c40586aSBram Moolenaar 	{
3110c40586aSBram Moolenaar 	    if (mi.mi_result == SP_BAD || mi.mi_result == SP_BANNED)
312d857f0e0SBram Moolenaar 		return nrlen;
3130c40586aSBram Moolenaar 	}
314d857f0e0SBram Moolenaar 
3150d6f5d97SBram Moolenaar 	// When we are at a non-word character there is no error, just
3160d6f5d97SBram Moolenaar 	// skip over the character (try looking for a word after it).
317cc63c647SBram Moolenaar 	else if (!spell_iswordp_nmw(ptr, wp))
31851485f06SBram Moolenaar 	{
319860cae1cSBram Moolenaar 	    if (capcol != NULL && wp->w_s->b_cap_prog != NULL)
320f9184a1dSBram Moolenaar 	    {
321f9184a1dSBram Moolenaar 		regmatch_T	regmatch;
322dffa5b8eSBram Moolenaar 		int		r;
323f9184a1dSBram Moolenaar 
3240d6f5d97SBram Moolenaar 		// Check for end of sentence.
325860cae1cSBram Moolenaar 		regmatch.regprog = wp->w_s->b_cap_prog;
326f9184a1dSBram Moolenaar 		regmatch.rm_ic = FALSE;
327dffa5b8eSBram Moolenaar 		r = vim_regexec(&regmatch, ptr, 0);
328dffa5b8eSBram Moolenaar 		wp->w_s->b_cap_prog = regmatch.regprog;
329dffa5b8eSBram Moolenaar 		if (r)
330f9184a1dSBram Moolenaar 		    *capcol = (int)(regmatch.endp[0] - ptr);
331f9184a1dSBram Moolenaar 	    }
332f9184a1dSBram Moolenaar 
33351485f06SBram Moolenaar 	    if (has_mbyte)
3340fa313a7SBram Moolenaar 		return (*mb_ptr2len)(ptr);
33551485f06SBram Moolenaar 	    return 1;
33651485f06SBram Moolenaar 	}
3375195e456SBram Moolenaar 	else if (mi.mi_end == ptr)
3380d6f5d97SBram Moolenaar 	    // Always include at least one character.  Required for when there
3390d6f5d97SBram Moolenaar 	    // is a mixup in "midword".
34091acfffcSBram Moolenaar 	    MB_PTR_ADV(mi.mi_end);
3417862282fSBram Moolenaar 	else if (mi.mi_result == SP_BAD
342860cae1cSBram Moolenaar 		&& LANGP_ENTRY(wp->w_s->b_langp, 0)->lp_slang->sl_nobreak)
3437862282fSBram Moolenaar 	{
3447862282fSBram Moolenaar 	    char_u	*p, *fp;
3457862282fSBram Moolenaar 	    int		save_result = mi.mi_result;
3467862282fSBram Moolenaar 
3470d6f5d97SBram Moolenaar 	    // First language in 'spelllang' is NOBREAK.  Find first position
3480d6f5d97SBram Moolenaar 	    // at which any word would be valid.
349860cae1cSBram Moolenaar 	    mi.mi_lp = LANGP_ENTRY(wp->w_s->b_langp, 0);
350ac6e65f8SBram Moolenaar 	    if (mi.mi_lp->lp_slang->sl_fidxs != NULL)
351ac6e65f8SBram Moolenaar 	    {
3527862282fSBram Moolenaar 		p = mi.mi_word;
3537862282fSBram Moolenaar 		fp = mi.mi_fword;
3547862282fSBram Moolenaar 		for (;;)
3557862282fSBram Moolenaar 		{
35691acfffcSBram Moolenaar 		    MB_PTR_ADV(p);
35791acfffcSBram Moolenaar 		    MB_PTR_ADV(fp);
3587862282fSBram Moolenaar 		    if (p >= mi.mi_end)
3597862282fSBram Moolenaar 			break;
360a93fa7eeSBram Moolenaar 		    mi.mi_compoff = (int)(fp - mi.mi_fword);
3617862282fSBram Moolenaar 		    find_word(&mi, FIND_COMPOUND);
3627862282fSBram Moolenaar 		    if (mi.mi_result != SP_BAD)
3637862282fSBram Moolenaar 		    {
3647862282fSBram Moolenaar 			mi.mi_end = p;
3657862282fSBram Moolenaar 			break;
3667862282fSBram Moolenaar 		    }
3677862282fSBram Moolenaar 		}
3687862282fSBram Moolenaar 		mi.mi_result = save_result;
3697862282fSBram Moolenaar 	    }
370ac6e65f8SBram Moolenaar 	}
37151485f06SBram Moolenaar 
372cfc6c43cSBram Moolenaar 	if (mi.mi_result == SP_BAD || mi.mi_result == SP_BANNED)
373482aaeb0SBram Moolenaar 	    *attrp = HLF_SPB;
374402d2feaSBram Moolenaar 	else if (mi.mi_result == SP_RARE)
375482aaeb0SBram Moolenaar 	    *attrp = HLF_SPR;
376402d2feaSBram Moolenaar 	else
377482aaeb0SBram Moolenaar 	    *attrp = HLF_SPL;
378402d2feaSBram Moolenaar     }
379402d2feaSBram Moolenaar 
3805195e456SBram Moolenaar     if (wrongcaplen > 0 && (mi.mi_result == SP_OK || mi.mi_result == SP_RARE))
3815195e456SBram Moolenaar     {
3820d6f5d97SBram Moolenaar 	// Report SpellCap only when the word isn't badly spelled.
383482aaeb0SBram Moolenaar 	*attrp = HLF_SPC;
3845195e456SBram Moolenaar 	return wrongcaplen;
3855195e456SBram Moolenaar     }
3865195e456SBram Moolenaar 
38751485f06SBram Moolenaar     return (int)(mi.mi_end - ptr);
388402d2feaSBram Moolenaar }
389402d2feaSBram Moolenaar 
390402d2feaSBram Moolenaar /*
39151485f06SBram Moolenaar  * Check if the word at "mip->mi_word" is in the tree.
3921d73c885SBram Moolenaar  * When "mode" is FIND_FOLDWORD check in fold-case word tree.
3931d73c885SBram Moolenaar  * When "mode" is FIND_KEEPWORD check in keep-case word tree.
3941d73c885SBram Moolenaar  * When "mode" is FIND_PREFIX check for word after prefix in fold-case word
3951d73c885SBram Moolenaar  * tree.
39663d5a1e5SBram Moolenaar  *
39751485f06SBram Moolenaar  * For a match mip->mi_result is updated.
39863d5a1e5SBram Moolenaar  */
39963d5a1e5SBram Moolenaar     static void
find_word(matchinf_T * mip,int mode)400764b23c8SBram Moolenaar find_word(matchinf_T *mip, int mode)
40163d5a1e5SBram Moolenaar {
4029f30f504SBram Moolenaar     idx_T	arridx = 0;
4030d6f5d97SBram Moolenaar     int		endlen[MAXWLEN];    // length at possible word endings
4040d6f5d97SBram Moolenaar     idx_T	endidx[MAXWLEN];    // possible word endings
40551485f06SBram Moolenaar     int		endidxcnt = 0;
406402d2feaSBram Moolenaar     int		len;
40751485f06SBram Moolenaar     int		wlen = 0;
40851485f06SBram Moolenaar     int		flen;
40951485f06SBram Moolenaar     int		c;
41051485f06SBram Moolenaar     char_u	*ptr;
4119f30f504SBram Moolenaar     idx_T	lo, hi, m;
41251485f06SBram Moolenaar     char_u	*s;
413e52325c2SBram Moolenaar     char_u	*p;
414cfc6c43cSBram Moolenaar     int		res = SP_BAD;
41551485f06SBram Moolenaar     slang_T	*slang = mip->mi_lp->lp_slang;
41651485f06SBram Moolenaar     unsigned	flags;
41751485f06SBram Moolenaar     char_u	*byts;
4189f30f504SBram Moolenaar     idx_T	*idxs;
419ae5bce1cSBram Moolenaar     int		word_ends;
420d12a1326SBram Moolenaar     int		prefix_found;
4217862282fSBram Moolenaar     int		nobreak_result;
42251485f06SBram Moolenaar 
423ae5bce1cSBram Moolenaar     if (mode == FIND_KEEPWORD || mode == FIND_KEEPCOMPOUND)
424402d2feaSBram Moolenaar     {
4250d6f5d97SBram Moolenaar 	// Check for word with matching case in keep-case tree.
42651485f06SBram Moolenaar 	ptr = mip->mi_word;
4270d6f5d97SBram Moolenaar 	flen = 9999;		    // no case folding, always enough bytes
42851485f06SBram Moolenaar 	byts = slang->sl_kbyts;
42951485f06SBram Moolenaar 	idxs = slang->sl_kidxs;
430ae5bce1cSBram Moolenaar 
431ae5bce1cSBram Moolenaar 	if (mode == FIND_KEEPCOMPOUND)
4320d6f5d97SBram Moolenaar 	    // Skip over the previously found word(s).
433ae5bce1cSBram Moolenaar 	    wlen += mip->mi_compoff;
43451485f06SBram Moolenaar     }
435402d2feaSBram Moolenaar     else
436402d2feaSBram Moolenaar     {
4370d6f5d97SBram Moolenaar 	// Check for case-folded in case-folded tree.
43851485f06SBram Moolenaar 	ptr = mip->mi_fword;
4390d6f5d97SBram Moolenaar 	flen = mip->mi_fwordlen;    // available case-folded bytes
44051485f06SBram Moolenaar 	byts = slang->sl_fbyts;
44151485f06SBram Moolenaar 	idxs = slang->sl_fidxs;
4421d73c885SBram Moolenaar 
4431d73c885SBram Moolenaar 	if (mode == FIND_PREFIX)
4441d73c885SBram Moolenaar 	{
4450d6f5d97SBram Moolenaar 	    // Skip over the prefix.
4461d73c885SBram Moolenaar 	    wlen = mip->mi_prefixlen;
4471d73c885SBram Moolenaar 	    flen -= mip->mi_prefixlen;
4481d73c885SBram Moolenaar 	}
449ae5bce1cSBram Moolenaar 	else if (mode == FIND_COMPOUND)
450ae5bce1cSBram Moolenaar 	{
4510d6f5d97SBram Moolenaar 	    // Skip over the previously found word(s).
452ae5bce1cSBram Moolenaar 	    wlen = mip->mi_compoff;
453ae5bce1cSBram Moolenaar 	    flen -= mip->mi_compoff;
454ae5bce1cSBram Moolenaar 	}
455ae5bce1cSBram Moolenaar 
45651485f06SBram Moolenaar     }
45751485f06SBram Moolenaar 
45851485f06SBram Moolenaar     if (byts == NULL)
4590d6f5d97SBram Moolenaar 	return;			// array is empty
46051485f06SBram Moolenaar 
46151485f06SBram Moolenaar     /*
462cfc6c43cSBram Moolenaar      * Repeat advancing in the tree until:
463cfc6c43cSBram Moolenaar      * - there is a byte that doesn't match,
464cfc6c43cSBram Moolenaar      * - we reach the end of the tree,
465cfc6c43cSBram Moolenaar      * - or we reach the end of the line.
46651485f06SBram Moolenaar      */
46751485f06SBram Moolenaar     for (;;)
46851485f06SBram Moolenaar     {
4690c40586aSBram Moolenaar 	if (flen <= 0 && *mip->mi_fend != NUL)
4701d73c885SBram Moolenaar 	    flen = fold_more(mip);
47151485f06SBram Moolenaar 
47251485f06SBram Moolenaar 	len = byts[arridx++];
47351485f06SBram Moolenaar 
4740d6f5d97SBram Moolenaar 	// If the first possible byte is a zero the word could end here.
4750d6f5d97SBram Moolenaar 	// Remember this index, we first check for the longest word.
47651485f06SBram Moolenaar 	if (byts[arridx] == 0)
477402d2feaSBram Moolenaar 	{
478cfc6c43cSBram Moolenaar 	    if (endidxcnt == MAXWLEN)
479cfc6c43cSBram Moolenaar 	    {
4800d6f5d97SBram Moolenaar 		// Must be a corrupted spell file.
481f9e3e09fSBram Moolenaar 		emsg(_(e_format));
482cfc6c43cSBram Moolenaar 		return;
483cfc6c43cSBram Moolenaar 	    }
48451485f06SBram Moolenaar 	    endlen[endidxcnt] = wlen;
48551485f06SBram Moolenaar 	    endidx[endidxcnt++] = arridx++;
48651485f06SBram Moolenaar 	    --len;
48751485f06SBram Moolenaar 
4880d6f5d97SBram Moolenaar 	    // Skip over the zeros, there can be several flag/region
4890d6f5d97SBram Moolenaar 	    // combinations.
49051485f06SBram Moolenaar 	    while (len > 0 && byts[arridx] == 0)
491402d2feaSBram Moolenaar 	    {
49251485f06SBram Moolenaar 		++arridx;
49351485f06SBram Moolenaar 		--len;
49451485f06SBram Moolenaar 	    }
49551485f06SBram Moolenaar 	    if (len == 0)
4960d6f5d97SBram Moolenaar 		break;	    // no children, word must end here
49751485f06SBram Moolenaar 	}
49851485f06SBram Moolenaar 
4990d6f5d97SBram Moolenaar 	// Stop looking at end of the line.
50051485f06SBram Moolenaar 	if (ptr[wlen] == NUL)
50151485f06SBram Moolenaar 	    break;
50251485f06SBram Moolenaar 
5030d6f5d97SBram Moolenaar 	// Perform a binary search in the list of accepted bytes.
50451485f06SBram Moolenaar 	c = ptr[wlen];
5050d6f5d97SBram Moolenaar 	if (c == TAB)	    // <Tab> is handled like <Space>
5060c40586aSBram Moolenaar 	    c = ' ';
50751485f06SBram Moolenaar 	lo = arridx;
50851485f06SBram Moolenaar 	hi = arridx + len - 1;
50951485f06SBram Moolenaar 	while (lo < hi)
51051485f06SBram Moolenaar 	{
51151485f06SBram Moolenaar 	    m = (lo + hi) / 2;
51251485f06SBram Moolenaar 	    if (byts[m] > c)
51351485f06SBram Moolenaar 		hi = m - 1;
51451485f06SBram Moolenaar 	    else if (byts[m] < c)
51551485f06SBram Moolenaar 		lo = m + 1;
51651485f06SBram Moolenaar 	    else
51751485f06SBram Moolenaar 	    {
51851485f06SBram Moolenaar 		lo = hi = m;
519402d2feaSBram Moolenaar 		break;
520402d2feaSBram Moolenaar 	    }
521402d2feaSBram Moolenaar 	}
52251485f06SBram Moolenaar 
5230d6f5d97SBram Moolenaar 	// Stop if there is no matching byte.
52451485f06SBram Moolenaar 	if (hi < lo || byts[lo] != c)
52551485f06SBram Moolenaar 	    break;
52651485f06SBram Moolenaar 
5270d6f5d97SBram Moolenaar 	// Continue at the child (if there is one).
52851485f06SBram Moolenaar 	arridx = idxs[lo];
52951485f06SBram Moolenaar 	++wlen;
53051485f06SBram Moolenaar 	--flen;
5310c40586aSBram Moolenaar 
5320d6f5d97SBram Moolenaar 	// One space in the good word may stand for several spaces in the
5330d6f5d97SBram Moolenaar 	// checked word.
5340c40586aSBram Moolenaar 	if (c == ' ')
5350c40586aSBram Moolenaar 	{
5360c40586aSBram Moolenaar 	    for (;;)
5370c40586aSBram Moolenaar 	    {
5380c40586aSBram Moolenaar 		if (flen <= 0 && *mip->mi_fend != NUL)
5390c40586aSBram Moolenaar 		    flen = fold_more(mip);
5400c40586aSBram Moolenaar 		if (ptr[wlen] != ' ' && ptr[wlen] != TAB)
5410c40586aSBram Moolenaar 		    break;
5420c40586aSBram Moolenaar 		++wlen;
5430c40586aSBram Moolenaar 		--flen;
5440c40586aSBram Moolenaar 	    }
5450c40586aSBram Moolenaar 	}
54651485f06SBram Moolenaar     }
54751485f06SBram Moolenaar 
54851485f06SBram Moolenaar     /*
54951485f06SBram Moolenaar      * Verify that one of the possible endings is valid.  Try the longest
55051485f06SBram Moolenaar      * first.
55151485f06SBram Moolenaar      */
55251485f06SBram Moolenaar     while (endidxcnt > 0)
55351485f06SBram Moolenaar     {
55451485f06SBram Moolenaar 	--endidxcnt;
55551485f06SBram Moolenaar 	arridx = endidx[endidxcnt];
55651485f06SBram Moolenaar 	wlen = endlen[endidxcnt];
55751485f06SBram Moolenaar 
55851485f06SBram Moolenaar 	if ((*mb_head_off)(ptr, ptr + wlen) > 0)
5590d6f5d97SBram Moolenaar 	    continue;	    // not at first byte of character
560860cae1cSBram Moolenaar 	if (spell_iswordp(ptr + wlen, mip->mi_win))
561ae5bce1cSBram Moolenaar 	{
5627862282fSBram Moolenaar 	    if (slang->sl_compprog == NULL && !slang->sl_nobreak)
5630d6f5d97SBram Moolenaar 		continue;	    // next char is a word character
564ae5bce1cSBram Moolenaar 	    word_ends = FALSE;
565ae5bce1cSBram Moolenaar 	}
566ae5bce1cSBram Moolenaar 	else
567ae5bce1cSBram Moolenaar 	    word_ends = TRUE;
5680d6f5d97SBram Moolenaar 	// The prefix flag is before compound flags.  Once a valid prefix flag
5690d6f5d97SBram Moolenaar 	// has been found we try compound flags.
570d12a1326SBram Moolenaar 	prefix_found = FALSE;
57151485f06SBram Moolenaar 
5721d73c885SBram Moolenaar 	if (mode != FIND_KEEPWORD && has_mbyte)
57351485f06SBram Moolenaar 	{
5740d6f5d97SBram Moolenaar 	    // Compute byte length in original word, length may change
5750d6f5d97SBram Moolenaar 	    // when folding case.  This can be slow, take a shortcut when the
5760d6f5d97SBram Moolenaar 	    // case-folded word is equal to the keep-case word.
57751485f06SBram Moolenaar 	    p = mip->mi_word;
5781d73c885SBram Moolenaar 	    if (STRNCMP(ptr, p, wlen) != 0)
5791d73c885SBram Moolenaar 	    {
58091acfffcSBram Moolenaar 		for (s = ptr; s < ptr + wlen; MB_PTR_ADV(s))
58191acfffcSBram Moolenaar 		    MB_PTR_ADV(p);
582a93fa7eeSBram Moolenaar 		wlen = (int)(p - mip->mi_word);
58351485f06SBram Moolenaar 	    }
5841d73c885SBram Moolenaar 	}
58551485f06SBram Moolenaar 
5860d6f5d97SBram Moolenaar 	// Check flags and region.  For FIND_PREFIX check the condition and
5870d6f5d97SBram Moolenaar 	// prefix ID.
5880d6f5d97SBram Moolenaar 	// Repeat this if there are more flags/region alternatives until there
5890d6f5d97SBram Moolenaar 	// is a match.
5901d73c885SBram Moolenaar 	res = SP_BAD;
5911d73c885SBram Moolenaar 	for (len = byts[arridx - 1]; len > 0 && byts[arridx] == 0;
5921d73c885SBram Moolenaar 							      --len, ++arridx)
59351485f06SBram Moolenaar 	{
59451485f06SBram Moolenaar 	    flags = idxs[arridx];
5959f30f504SBram Moolenaar 
5960d6f5d97SBram Moolenaar 	    // For the fold-case tree check that the case of the checked word
5970d6f5d97SBram Moolenaar 	    // matches with what the word in the tree requires.
5980d6f5d97SBram Moolenaar 	    // For keep-case tree the case is always right.  For prefixes we
5990d6f5d97SBram Moolenaar 	    // don't bother to check.
6001d73c885SBram Moolenaar 	    if (mode == FIND_FOLDWORD)
60151485f06SBram Moolenaar 	    {
60251485f06SBram Moolenaar 		if (mip->mi_cend != mip->mi_word + wlen)
603402d2feaSBram Moolenaar 		{
6040d6f5d97SBram Moolenaar 		    // mi_capflags was set for a different word length, need
6050d6f5d97SBram Moolenaar 		    // to do it again.
60651485f06SBram Moolenaar 		    mip->mi_cend = mip->mi_word + wlen;
6079ba0eb85SBram Moolenaar 		    mip->mi_capflags = captype(mip->mi_word, mip->mi_cend);
60851485f06SBram Moolenaar 		}
60951485f06SBram Moolenaar 
6100c40586aSBram Moolenaar 		if (mip->mi_capflags == WF_KEEPCAP
6110c40586aSBram Moolenaar 				|| !spell_valid_case(mip->mi_capflags, flags))
6121d73c885SBram Moolenaar 		    continue;
61351485f06SBram Moolenaar 	    }
61451485f06SBram Moolenaar 
6150d6f5d97SBram Moolenaar 	    // When mode is FIND_PREFIX the word must support the prefix:
6160d6f5d97SBram Moolenaar 	    // check the prefix ID and the condition.  Do that for the list at
6170d6f5d97SBram Moolenaar 	    // mip->mi_prefarridx that find_prefix() filled.
618d12a1326SBram Moolenaar 	    else if (mode == FIND_PREFIX && !prefix_found)
619402d2feaSBram Moolenaar 	    {
620cf6bf39fSBram Moolenaar 		c = valid_word_prefix(mip->mi_prefcnt, mip->mi_prefarridx,
621dfb9ac00SBram Moolenaar 				    flags,
62253805d1eSBram Moolenaar 				    mip->mi_word + mip->mi_cprefixlen, slang,
62353805d1eSBram Moolenaar 				    FALSE);
624cf6bf39fSBram Moolenaar 		if (c == 0)
6251d73c885SBram Moolenaar 		    continue;
626cf6bf39fSBram Moolenaar 
6270d6f5d97SBram Moolenaar 		// Use the WF_RARE flag for a rare prefix.
628cf6bf39fSBram Moolenaar 		if (c & WF_RAREPFX)
629cf6bf39fSBram Moolenaar 		    flags |= WF_RARE;
630d12a1326SBram Moolenaar 		prefix_found = TRUE;
6311d73c885SBram Moolenaar 	    }
6321d73c885SBram Moolenaar 
6337862282fSBram Moolenaar 	    if (slang->sl_nobreak)
6347862282fSBram Moolenaar 	    {
6357862282fSBram Moolenaar 		if ((mode == FIND_COMPOUND || mode == FIND_KEEPCOMPOUND)
6367862282fSBram Moolenaar 			&& (flags & WF_BANNED) == 0)
6377862282fSBram Moolenaar 		{
6380d6f5d97SBram Moolenaar 		    // NOBREAK: found a valid following word.  That's all we
6390d6f5d97SBram Moolenaar 		    // need to know, so return.
6407862282fSBram Moolenaar 		    mip->mi_result = SP_OK;
6417862282fSBram Moolenaar 		    break;
6427862282fSBram Moolenaar 		}
6437862282fSBram Moolenaar 	    }
6447862282fSBram Moolenaar 
6457862282fSBram Moolenaar 	    else if ((mode == FIND_COMPOUND || mode == FIND_KEEPCOMPOUND
6467862282fSBram Moolenaar 								|| !word_ends))
647ae5bce1cSBram Moolenaar 	    {
6480d6f5d97SBram Moolenaar 		// If there is no compound flag or the word is shorter than
6490d6f5d97SBram Moolenaar 		// COMPOUNDMIN reject it quickly.
6500d6f5d97SBram Moolenaar 		// Makes you wonder why someone puts a compound flag on a word
6510d6f5d97SBram Moolenaar 		// that's too short...  Myspell compatibility requires this
6520d6f5d97SBram Moolenaar 		// anyway.
653e52325c2SBram Moolenaar 		if (((unsigned)flags >> 24) == 0
654e52325c2SBram Moolenaar 			     || wlen - mip->mi_compoff < slang->sl_compminlen)
655ae5bce1cSBram Moolenaar 		    continue;
6560d6f5d97SBram Moolenaar 		// For multi-byte chars check character length against
6570d6f5d97SBram Moolenaar 		// COMPOUNDMIN.
658ac6e65f8SBram Moolenaar 		if (has_mbyte
659da2303d9SBram Moolenaar 			&& slang->sl_compminlen > 0
660ac6e65f8SBram Moolenaar 			&& mb_charlen_len(mip->mi_word + mip->mi_compoff,
661ac6e65f8SBram Moolenaar 				wlen - mip->mi_compoff) < slang->sl_compminlen)
662ac6e65f8SBram Moolenaar 			continue;
663ae5bce1cSBram Moolenaar 
6640d6f5d97SBram Moolenaar 		// Limit the number of compound words to COMPOUNDWORDMAX if no
6650d6f5d97SBram Moolenaar 		// maximum for syllables is specified.
666899dddf8SBram Moolenaar 		if (!word_ends && mip->mi_complen + mip->mi_compextra + 2
667899dddf8SBram Moolenaar 							   > slang->sl_compmax
668e52325c2SBram Moolenaar 					   && slang->sl_compsylmax == MAXWLEN)
669ae5bce1cSBram Moolenaar 		    continue;
6705195e456SBram Moolenaar 
6710d6f5d97SBram Moolenaar 		// Don't allow compounding on a side where an affix was added,
6720d6f5d97SBram Moolenaar 		// unless COMPOUNDPERMITFLAG was used.
673910f66f9SBram Moolenaar 		if (mip->mi_complen > 0 && (flags & WF_NOCOMPBEF))
674910f66f9SBram Moolenaar 		    continue;
675910f66f9SBram Moolenaar 		if (!word_ends && (flags & WF_NOCOMPAFT))
676910f66f9SBram Moolenaar 		    continue;
677910f66f9SBram Moolenaar 
6780d6f5d97SBram Moolenaar 		// Quickly check if compounding is possible with this flag.
6796de6853cSBram Moolenaar 		if (!byte_in_str(mip->mi_complen == 0
680d12a1326SBram Moolenaar 					? slang->sl_compstartflags
681d12a1326SBram Moolenaar 					: slang->sl_compallflags,
6826de6853cSBram Moolenaar 					    ((unsigned)flags >> 24)))
6835195e456SBram Moolenaar 		    continue;
6845195e456SBram Moolenaar 
6850d6f5d97SBram Moolenaar 		// If there is a match with a CHECKCOMPOUNDPATTERN rule
6860d6f5d97SBram Moolenaar 		// discard the compound word.
6879f94b05bSBram Moolenaar 		if (match_checkcompoundpattern(ptr, wlen, &slang->sl_comppat))
6889f94b05bSBram Moolenaar 		    continue;
6899f94b05bSBram Moolenaar 
690e52325c2SBram Moolenaar 		if (mode == FIND_COMPOUND)
691e52325c2SBram Moolenaar 		{
692e52325c2SBram Moolenaar 		    int	    capflags;
693e52325c2SBram Moolenaar 
6940d6f5d97SBram Moolenaar 		    // Need to check the caps type of the appended compound
6950d6f5d97SBram Moolenaar 		    // word.
696e52325c2SBram Moolenaar 		    if (has_mbyte && STRNCMP(ptr, mip->mi_word,
697e52325c2SBram Moolenaar 							mip->mi_compoff) != 0)
698e52325c2SBram Moolenaar 		    {
6990d6f5d97SBram Moolenaar 			// case folding may have changed the length
700e52325c2SBram Moolenaar 			p = mip->mi_word;
70191acfffcSBram Moolenaar 			for (s = ptr; s < ptr + mip->mi_compoff; MB_PTR_ADV(s))
70291acfffcSBram Moolenaar 			    MB_PTR_ADV(p);
703e52325c2SBram Moolenaar 		    }
704e52325c2SBram Moolenaar 		    else
705e52325c2SBram Moolenaar 			p = mip->mi_word + mip->mi_compoff;
706e52325c2SBram Moolenaar 		    capflags = captype(p, mip->mi_word + wlen);
707e52325c2SBram Moolenaar 		    if (capflags == WF_KEEPCAP || (capflags == WF_ALLCAP
708e52325c2SBram Moolenaar 						 && (flags & WF_FIXCAP) != 0))
709e52325c2SBram Moolenaar 			continue;
710e52325c2SBram Moolenaar 
711e52325c2SBram Moolenaar 		    if (capflags != WF_ALLCAP)
712e52325c2SBram Moolenaar 		    {
7130d6f5d97SBram Moolenaar 			// When the character before the word is a word
7140d6f5d97SBram Moolenaar 			// character we do not accept a Onecap word.  We do
7150d6f5d97SBram Moolenaar 			// accept a no-caps word, even when the dictionary
7160d6f5d97SBram Moolenaar 			// word specifies ONECAP.
71791acfffcSBram Moolenaar 			MB_PTR_BACK(mip->mi_word, p);
718cc63c647SBram Moolenaar 			if (spell_iswordp_nmw(p, mip->mi_win)
719e52325c2SBram Moolenaar 				? capflags == WF_ONECAP
720e52325c2SBram Moolenaar 				: (flags & WF_ONECAP) != 0
721e52325c2SBram Moolenaar 						     && capflags != WF_ONECAP)
722e52325c2SBram Moolenaar 			    continue;
723e52325c2SBram Moolenaar 		    }
724e52325c2SBram Moolenaar 		}
725e52325c2SBram Moolenaar 
7260d6f5d97SBram Moolenaar 		// If the word ends the sequence of compound flags of the
7270d6f5d97SBram Moolenaar 		// words must match with one of the COMPOUNDRULE items and
7280d6f5d97SBram Moolenaar 		// the number of syllables must not be too large.
7295195e456SBram Moolenaar 		mip->mi_compflags[mip->mi_complen] = ((unsigned)flags >> 24);
7305195e456SBram Moolenaar 		mip->mi_compflags[mip->mi_complen + 1] = NUL;
7315195e456SBram Moolenaar 		if (word_ends)
7325195e456SBram Moolenaar 		{
7335195e456SBram Moolenaar 		    char_u	fword[MAXWLEN];
7345195e456SBram Moolenaar 
7355195e456SBram Moolenaar 		    if (slang->sl_compsylmax < MAXWLEN)
7365195e456SBram Moolenaar 		    {
7370d6f5d97SBram Moolenaar 			// "fword" is only needed for checking syllables.
7385195e456SBram Moolenaar 			if (ptr == mip->mi_word)
7394f135275SBram Moolenaar 			    (void)spell_casefold(mip->mi_win,
7404f135275SBram Moolenaar 						    ptr, wlen, fword, MAXWLEN);
7415195e456SBram Moolenaar 			else
7425195e456SBram Moolenaar 			    vim_strncpy(fword, ptr, endlen[endidxcnt]);
7435195e456SBram Moolenaar 		    }
7445195e456SBram Moolenaar 		    if (!can_compound(slang, fword, mip->mi_compflags))
7455195e456SBram Moolenaar 			continue;
7465195e456SBram Moolenaar 		}
7479f94b05bSBram Moolenaar 		else if (slang->sl_comprules != NULL
7489f94b05bSBram Moolenaar 			     && !match_compoundrule(slang, mip->mi_compflags))
7490d6f5d97SBram Moolenaar 		    // The compound flags collected so far do not match any
7500d6f5d97SBram Moolenaar 		    // COMPOUNDRULE, discard the compounded word.
7519f94b05bSBram Moolenaar 		    continue;
752ae5bce1cSBram Moolenaar 	    }
753ae5bce1cSBram Moolenaar 
7540d6f5d97SBram Moolenaar 	    // Check NEEDCOMPOUND: can't use word without compounding.
755ac6e65f8SBram Moolenaar 	    else if (flags & WF_NEEDCOMP)
756ac6e65f8SBram Moolenaar 		continue;
757ac6e65f8SBram Moolenaar 
7587862282fSBram Moolenaar 	    nobreak_result = SP_OK;
7597862282fSBram Moolenaar 
760ae5bce1cSBram Moolenaar 	    if (!word_ends)
761ae5bce1cSBram Moolenaar 	    {
7627862282fSBram Moolenaar 		int	save_result = mip->mi_result;
7637862282fSBram Moolenaar 		char_u	*save_end = mip->mi_end;
764da2303d9SBram Moolenaar 		langp_T	*save_lp = mip->mi_lp;
765da2303d9SBram Moolenaar 		int	lpi;
7667862282fSBram Moolenaar 
7670d6f5d97SBram Moolenaar 		// Check that a valid word follows.  If there is one and we
7680d6f5d97SBram Moolenaar 		// are compounding, it will set "mi_result", thus we are
7690d6f5d97SBram Moolenaar 		// always finished here.  For NOBREAK we only check that a
7700d6f5d97SBram Moolenaar 		// valid word follows.
7710d6f5d97SBram Moolenaar 		// Recursive!
7727862282fSBram Moolenaar 		if (slang->sl_nobreak)
7737862282fSBram Moolenaar 		    mip->mi_result = SP_BAD;
774ae5bce1cSBram Moolenaar 
7750d6f5d97SBram Moolenaar 		// Find following word in case-folded tree.
776ae5bce1cSBram Moolenaar 		mip->mi_compoff = endlen[endidxcnt];
777ae5bce1cSBram Moolenaar 		if (has_mbyte && mode == FIND_KEEPWORD)
778ae5bce1cSBram Moolenaar 		{
7790d6f5d97SBram Moolenaar 		    // Compute byte length in case-folded word from "wlen":
7800d6f5d97SBram Moolenaar 		    // byte length in keep-case word.  Length may change when
7810d6f5d97SBram Moolenaar 		    // folding case.  This can be slow, take a shortcut when
7820d6f5d97SBram Moolenaar 		    // the case-folded word is equal to the keep-case word.
783ae5bce1cSBram Moolenaar 		    p = mip->mi_fword;
784ae5bce1cSBram Moolenaar 		    if (STRNCMP(ptr, p, wlen) != 0)
785ae5bce1cSBram Moolenaar 		    {
78691acfffcSBram Moolenaar 			for (s = ptr; s < ptr + wlen; MB_PTR_ADV(s))
78791acfffcSBram Moolenaar 			    MB_PTR_ADV(p);
788a93fa7eeSBram Moolenaar 			mip->mi_compoff = (int)(p - mip->mi_fword);
789ae5bce1cSBram Moolenaar 		    }
790ae5bce1cSBram Moolenaar 		}
7910d6f5d97SBram Moolenaar #if 0 // Disabled, see below
792d12a1326SBram Moolenaar 		c = mip->mi_compoff;
793ba534351SBram Moolenaar #endif
7945195e456SBram Moolenaar 		++mip->mi_complen;
795899dddf8SBram Moolenaar 		if (flags & WF_COMPROOT)
796899dddf8SBram Moolenaar 		    ++mip->mi_compextra;
797da2303d9SBram Moolenaar 
7980d6f5d97SBram Moolenaar 		// For NOBREAK we need to try all NOBREAK languages, at least
7990d6f5d97SBram Moolenaar 		// to find the ".add" file(s).
800860cae1cSBram Moolenaar 		for (lpi = 0; lpi < mip->mi_win->w_s->b_langp.ga_len; ++lpi)
801da2303d9SBram Moolenaar 		{
802da2303d9SBram Moolenaar 		    if (slang->sl_nobreak)
803da2303d9SBram Moolenaar 		    {
804860cae1cSBram Moolenaar 			mip->mi_lp = LANGP_ENTRY(mip->mi_win->w_s->b_langp, lpi);
805da2303d9SBram Moolenaar 			if (mip->mi_lp->lp_slang->sl_fidxs == NULL
806da2303d9SBram Moolenaar 					 || !mip->mi_lp->lp_slang->sl_nobreak)
807da2303d9SBram Moolenaar 			    continue;
808da2303d9SBram Moolenaar 		    }
809da2303d9SBram Moolenaar 
810ae5bce1cSBram Moolenaar 		    find_word(mip, FIND_COMPOUND);
811ae5bce1cSBram Moolenaar 
8120d6f5d97SBram Moolenaar 		    // When NOBREAK any word that matches is OK.  Otherwise we
8130d6f5d97SBram Moolenaar 		    // need to find the longest match, thus try with keep-case
8140d6f5d97SBram Moolenaar 		    // and prefix too.
8157862282fSBram Moolenaar 		    if (!slang->sl_nobreak || mip->mi_result == SP_BAD)
8167862282fSBram Moolenaar 		    {
8170d6f5d97SBram Moolenaar 			// Find following word in keep-case tree.
818ae5bce1cSBram Moolenaar 			mip->mi_compoff = wlen;
819ae5bce1cSBram Moolenaar 			find_word(mip, FIND_KEEPCOMPOUND);
820d12a1326SBram Moolenaar 
8210d6f5d97SBram Moolenaar #if 0	    // Disabled, a prefix must not appear halfway a compound word,
8220d6f5d97SBram Moolenaar 	    // unless the COMPOUNDPERMITFLAG is used and then it can't be a
8230d6f5d97SBram Moolenaar 	    // postponed prefix.
8247862282fSBram Moolenaar 			if (!slang->sl_nobreak || mip->mi_result == SP_BAD)
8257862282fSBram Moolenaar 			{
8260d6f5d97SBram Moolenaar 			    // Check for following word with prefix.
827d12a1326SBram Moolenaar 			    mip->mi_compoff = c;
828d12a1326SBram Moolenaar 			    find_prefix(mip, FIND_COMPOUND);
8297862282fSBram Moolenaar 			}
830910f66f9SBram Moolenaar #endif
8317862282fSBram Moolenaar 		    }
832da2303d9SBram Moolenaar 
833da2303d9SBram Moolenaar 		    if (!slang->sl_nobreak)
834da2303d9SBram Moolenaar 			break;
835da2303d9SBram Moolenaar 		}
8365195e456SBram Moolenaar 		--mip->mi_complen;
837899dddf8SBram Moolenaar 		if (flags & WF_COMPROOT)
838899dddf8SBram Moolenaar 		    --mip->mi_compextra;
839da2303d9SBram Moolenaar 		mip->mi_lp = save_lp;
840d12a1326SBram Moolenaar 
8417862282fSBram Moolenaar 		if (slang->sl_nobreak)
8427862282fSBram Moolenaar 		{
8437862282fSBram Moolenaar 		    nobreak_result = mip->mi_result;
8447862282fSBram Moolenaar 		    mip->mi_result = save_result;
8457862282fSBram Moolenaar 		    mip->mi_end = save_end;
8467862282fSBram Moolenaar 		}
8477862282fSBram Moolenaar 		else
8487862282fSBram Moolenaar 		{
849ae5bce1cSBram Moolenaar 		    if (mip->mi_result == SP_OK)
850ae5bce1cSBram Moolenaar 			break;
851ae5bce1cSBram Moolenaar 		    continue;
852ae5bce1cSBram Moolenaar 		}
8537862282fSBram Moolenaar 	    }
854ae5bce1cSBram Moolenaar 
855cfc6c43cSBram Moolenaar 	    if (flags & WF_BANNED)
856cfc6c43cSBram Moolenaar 		res = SP_BANNED;
857cfc6c43cSBram Moolenaar 	    else if (flags & WF_REGION)
85851485f06SBram Moolenaar 	    {
8590d6f5d97SBram Moolenaar 		// Check region.
860dfb9ac00SBram Moolenaar 		if ((mip->mi_lp->lp_region & (flags >> 16)) != 0)
86151485f06SBram Moolenaar 		    res = SP_OK;
86251485f06SBram Moolenaar 		else
86351485f06SBram Moolenaar 		    res = SP_LOCAL;
86451485f06SBram Moolenaar 	    }
86551485f06SBram Moolenaar 	    else if (flags & WF_RARE)
86651485f06SBram Moolenaar 		res = SP_RARE;
86751485f06SBram Moolenaar 	    else
86851485f06SBram Moolenaar 		res = SP_OK;
869cfc6c43cSBram Moolenaar 
8700d6f5d97SBram Moolenaar 	    // Always use the longest match and the best result.  For NOBREAK
8710d6f5d97SBram Moolenaar 	    // we separately keep the longest match without a following good
8720d6f5d97SBram Moolenaar 	    // word as a fall-back.
8737862282fSBram Moolenaar 	    if (nobreak_result == SP_BAD)
8747862282fSBram Moolenaar 	    {
8757862282fSBram Moolenaar 		if (mip->mi_result2 > res)
8767862282fSBram Moolenaar 		{
8777862282fSBram Moolenaar 		    mip->mi_result2 = res;
8787862282fSBram Moolenaar 		    mip->mi_end2 = mip->mi_word + wlen;
8797862282fSBram Moolenaar 		}
8807862282fSBram Moolenaar 		else if (mip->mi_result2 == res
8817862282fSBram Moolenaar 					&& mip->mi_end2 < mip->mi_word + wlen)
8827862282fSBram Moolenaar 		    mip->mi_end2 = mip->mi_word + wlen;
8837862282fSBram Moolenaar 	    }
8847862282fSBram Moolenaar 	    else if (mip->mi_result > res)
885cfc6c43cSBram Moolenaar 	    {
886cfc6c43cSBram Moolenaar 		mip->mi_result = res;
887cfc6c43cSBram Moolenaar 		mip->mi_end = mip->mi_word + wlen;
888cfc6c43cSBram Moolenaar 	    }
889f417f2b6SBram Moolenaar 	    else if (mip->mi_result == res && mip->mi_end < mip->mi_word + wlen)
890cfc6c43cSBram Moolenaar 		mip->mi_end = mip->mi_word + wlen;
891cfc6c43cSBram Moolenaar 
8927862282fSBram Moolenaar 	    if (mip->mi_result == SP_OK)
893cfc6c43cSBram Moolenaar 		break;
894cfc6c43cSBram Moolenaar 	}
89551485f06SBram Moolenaar 
8967862282fSBram Moolenaar 	if (mip->mi_result == SP_OK)
89751485f06SBram Moolenaar 	    break;
898402d2feaSBram Moolenaar     }
899402d2feaSBram Moolenaar }
900402d2feaSBram Moolenaar 
9019ba0eb85SBram Moolenaar /*
9029f94b05bSBram Moolenaar  * Return TRUE if there is a match between the word ptr[wlen] and
9039f94b05bSBram Moolenaar  * CHECKCOMPOUNDPATTERN rules, assuming that we will concatenate with another
9049f94b05bSBram Moolenaar  * word.
9059f94b05bSBram Moolenaar  * A match means that the first part of CHECKCOMPOUNDPATTERN matches at the
9069f94b05bSBram Moolenaar  * end of ptr[wlen] and the second part matches after it.
9079f94b05bSBram Moolenaar  */
90846a426c9SBram Moolenaar     int
match_checkcompoundpattern(char_u * ptr,int wlen,garray_T * gap)909764b23c8SBram Moolenaar match_checkcompoundpattern(
910764b23c8SBram Moolenaar     char_u	*ptr,
911764b23c8SBram Moolenaar     int		wlen,
9120d6f5d97SBram Moolenaar     garray_T	*gap)  // &sl_comppat
9139f94b05bSBram Moolenaar {
9149f94b05bSBram Moolenaar     int		i;
9159f94b05bSBram Moolenaar     char_u	*p;
9169f94b05bSBram Moolenaar     int		len;
9179f94b05bSBram Moolenaar 
9189f94b05bSBram Moolenaar     for (i = 0; i + 1 < gap->ga_len; i += 2)
9199f94b05bSBram Moolenaar     {
9209f94b05bSBram Moolenaar 	p = ((char_u **)gap->ga_data)[i + 1];
9219f94b05bSBram Moolenaar 	if (STRNCMP(ptr + wlen, p, STRLEN(p)) == 0)
9229f94b05bSBram Moolenaar 	{
9230d6f5d97SBram Moolenaar 	    // Second part matches at start of following compound word, now
9240d6f5d97SBram Moolenaar 	    // check if first part matches at end of previous word.
9259f94b05bSBram Moolenaar 	    p = ((char_u **)gap->ga_data)[i];
92619c9c76cSBram Moolenaar 	    len = (int)STRLEN(p);
9279f94b05bSBram Moolenaar 	    if (len <= wlen && STRNCMP(ptr + wlen - len, p, len) == 0)
9289f94b05bSBram Moolenaar 		return TRUE;
9299f94b05bSBram Moolenaar 	}
9309f94b05bSBram Moolenaar     }
9319f94b05bSBram Moolenaar     return FALSE;
9329f94b05bSBram Moolenaar }
9339f94b05bSBram Moolenaar 
9349f94b05bSBram Moolenaar /*
935a40ceaf8SBram Moolenaar  * Return TRUE if "flags" is a valid sequence of compound flags and "word"
936a40ceaf8SBram Moolenaar  * does not have too many syllables.
9375b8d8fdbSBram Moolenaar  */
93846a426c9SBram Moolenaar     int
can_compound(slang_T * slang,char_u * word,char_u * flags)939764b23c8SBram Moolenaar can_compound(slang_T *slang, char_u *word, char_u *flags)
9405b8d8fdbSBram Moolenaar {
9416de6853cSBram Moolenaar     char_u	uflags[MAXWLEN * 2];
9426de6853cSBram Moolenaar     int		i;
9436de6853cSBram Moolenaar     char_u	*p;
9445195e456SBram Moolenaar 
9455195e456SBram Moolenaar     if (slang->sl_compprog == NULL)
9465195e456SBram Moolenaar 	return FALSE;
9476de6853cSBram Moolenaar     if (enc_utf8)
9486de6853cSBram Moolenaar     {
9490d6f5d97SBram Moolenaar 	// Need to convert the single byte flags to utf8 characters.
9506de6853cSBram Moolenaar 	p = uflags;
9516de6853cSBram Moolenaar 	for (i = 0; flags[i] != NUL; ++i)
952ace95989SBram Moolenaar 	    p += utf_char2bytes(flags[i], p);
9536de6853cSBram Moolenaar 	*p = NUL;
9546de6853cSBram Moolenaar 	p = uflags;
9556de6853cSBram Moolenaar     }
9566de6853cSBram Moolenaar     else
9576de6853cSBram Moolenaar 	p = flags;
958dffa5b8eSBram Moolenaar     if (!vim_regexec_prog(&slang->sl_compprog, FALSE, p, 0))
9595195e456SBram Moolenaar 	return FALSE;
9605195e456SBram Moolenaar 
9610d6f5d97SBram Moolenaar     // Count the number of syllables.  This may be slow, do it last.  If there
9620d6f5d97SBram Moolenaar     // are too many syllables AND the number of compound words is above
9630d6f5d97SBram Moolenaar     // COMPOUNDWORDMAX then compounding is not allowed.
9645195e456SBram Moolenaar     if (slang->sl_compsylmax < MAXWLEN
9655195e456SBram Moolenaar 		       && count_syllables(slang, word) > slang->sl_compsylmax)
9666de6853cSBram Moolenaar 	return (int)STRLEN(flags) < slang->sl_compmax;
9675195e456SBram Moolenaar     return TRUE;
9685b8d8fdbSBram Moolenaar }
9695b8d8fdbSBram Moolenaar 
9705b8d8fdbSBram Moolenaar /*
9719f94b05bSBram Moolenaar  * Return TRUE if the compound flags in compflags[] match the start of any
9729f94b05bSBram Moolenaar  * compound rule.  This is used to stop trying a compound if the flags
9739f94b05bSBram Moolenaar  * collected so far can't possibly match any compound rule.
9749f94b05bSBram Moolenaar  * Caller must check that slang->sl_comprules is not NULL.
9759f94b05bSBram Moolenaar  */
97646a426c9SBram Moolenaar     int
match_compoundrule(slang_T * slang,char_u * compflags)977764b23c8SBram Moolenaar match_compoundrule(slang_T *slang, char_u *compflags)
9789f94b05bSBram Moolenaar {
9799f94b05bSBram Moolenaar     char_u	*p;
9809f94b05bSBram Moolenaar     int		i;
9819f94b05bSBram Moolenaar     int		c;
9829f94b05bSBram Moolenaar 
9830d6f5d97SBram Moolenaar     // loop over all the COMPOUNDRULE entries
9849f94b05bSBram Moolenaar     for (p = slang->sl_comprules; *p != NUL; ++p)
9859f94b05bSBram Moolenaar     {
9860d6f5d97SBram Moolenaar 	// loop over the flags in the compound word we have made, match
9870d6f5d97SBram Moolenaar 	// them against the current rule entry
9889f94b05bSBram Moolenaar 	for (i = 0; ; ++i)
9899f94b05bSBram Moolenaar 	{
9909f94b05bSBram Moolenaar 	    c = compflags[i];
9919f94b05bSBram Moolenaar 	    if (c == NUL)
9920d6f5d97SBram Moolenaar 		// found a rule that matches for the flags we have so far
9939f94b05bSBram Moolenaar 		return TRUE;
9949f94b05bSBram Moolenaar 	    if (*p == '/' || *p == NUL)
9950d6f5d97SBram Moolenaar 		break;  // end of rule, it's too short
9969f94b05bSBram Moolenaar 	    if (*p == '[')
9979f94b05bSBram Moolenaar 	    {
9989f94b05bSBram Moolenaar 		int match = FALSE;
9999f94b05bSBram Moolenaar 
10000d6f5d97SBram Moolenaar 		// compare against all the flags in []
10019f94b05bSBram Moolenaar 		++p;
10029f94b05bSBram Moolenaar 		while (*p != ']' && *p != NUL)
10039f94b05bSBram Moolenaar 		    if (*p++ == c)
10049f94b05bSBram Moolenaar 			match = TRUE;
10059f94b05bSBram Moolenaar 		if (!match)
10060d6f5d97SBram Moolenaar 		    break;  // none matches
10079f94b05bSBram Moolenaar 	    }
10089f94b05bSBram Moolenaar 	    else if (*p != c)
10090d6f5d97SBram Moolenaar 		break;  // flag of word doesn't match flag in pattern
10109f94b05bSBram Moolenaar 	    ++p;
10119f94b05bSBram Moolenaar 	}
10129f94b05bSBram Moolenaar 
10130d6f5d97SBram Moolenaar 	// Skip to the next "/", where the next pattern starts.
10149f94b05bSBram Moolenaar 	p = vim_strchr(p, '/');
10159f94b05bSBram Moolenaar 	if (p == NULL)
10169f94b05bSBram Moolenaar 	    break;
10179f94b05bSBram Moolenaar     }
10189f94b05bSBram Moolenaar 
10190d6f5d97SBram Moolenaar     // Checked all the rules and none of them match the flags, so there
10200d6f5d97SBram Moolenaar     // can't possibly be a compound starting with these flags.
10219f94b05bSBram Moolenaar     return FALSE;
10229f94b05bSBram Moolenaar }
10239f94b05bSBram Moolenaar 
10249f94b05bSBram Moolenaar /*
1025dfb9ac00SBram Moolenaar  * Return non-zero if the prefix indicated by "arridx" matches with the prefix
1026dfb9ac00SBram Moolenaar  * ID in "flags" for the word "word".
1027cf6bf39fSBram Moolenaar  * The WF_RAREPFX flag is included in the return value for a rare prefix.
1028f417f2b6SBram Moolenaar  */
102946a426c9SBram Moolenaar     int
valid_word_prefix(int totprefcnt,int arridx,int flags,char_u * word,slang_T * slang,int cond_req)1030764b23c8SBram Moolenaar valid_word_prefix(
10310d6f5d97SBram Moolenaar     int		totprefcnt,	// nr of prefix IDs
10320d6f5d97SBram Moolenaar     int		arridx,		// idx in sl_pidxs[]
1033764b23c8SBram Moolenaar     int		flags,
1034764b23c8SBram Moolenaar     char_u	*word,
1035764b23c8SBram Moolenaar     slang_T	*slang,
10360d6f5d97SBram Moolenaar     int		cond_req)	// only use prefixes with a condition
1037f417f2b6SBram Moolenaar {
1038f417f2b6SBram Moolenaar     int		prefcnt;
1039f417f2b6SBram Moolenaar     int		pidx;
1040dffa5b8eSBram Moolenaar     regprog_T	**rp;
1041dfb9ac00SBram Moolenaar     int		prefid;
1042f417f2b6SBram Moolenaar 
1043dfb9ac00SBram Moolenaar     prefid = (unsigned)flags >> 24;
1044f417f2b6SBram Moolenaar     for (prefcnt = totprefcnt - 1; prefcnt >= 0; --prefcnt)
1045f417f2b6SBram Moolenaar     {
1046f417f2b6SBram Moolenaar 	pidx = slang->sl_pidxs[arridx + prefcnt];
1047f417f2b6SBram Moolenaar 
10480d6f5d97SBram Moolenaar 	// Check the prefix ID.
1049f417f2b6SBram Moolenaar 	if (prefid != (pidx & 0xff))
1050f417f2b6SBram Moolenaar 	    continue;
1051f417f2b6SBram Moolenaar 
10520d6f5d97SBram Moolenaar 	// Check if the prefix doesn't combine and the word already has a
10530d6f5d97SBram Moolenaar 	// suffix.
1054dfb9ac00SBram Moolenaar 	if ((flags & WF_HAS_AFF) && (pidx & WF_PFX_NC))
1055dfb9ac00SBram Moolenaar 	    continue;
1056dfb9ac00SBram Moolenaar 
10570d6f5d97SBram Moolenaar 	// Check the condition, if there is one.  The condition index is
10580d6f5d97SBram Moolenaar 	// stored in the two bytes above the prefix ID byte.
1059dffa5b8eSBram Moolenaar 	rp = &slang->sl_prefprog[((unsigned)pidx >> 8) & 0xffff];
1060dffa5b8eSBram Moolenaar 	if (*rp != NULL)
1061f417f2b6SBram Moolenaar 	{
1062dffa5b8eSBram Moolenaar 	    if (!vim_regexec_prog(rp, FALSE, word, 0))
1063f417f2b6SBram Moolenaar 		continue;
1064f417f2b6SBram Moolenaar 	}
106553805d1eSBram Moolenaar 	else if (cond_req)
106653805d1eSBram Moolenaar 	    continue;
1067f417f2b6SBram Moolenaar 
10680d6f5d97SBram Moolenaar 	// It's a match!  Return the WF_ flags.
1069cf6bf39fSBram Moolenaar 	return pidx;
1070f417f2b6SBram Moolenaar     }
1071cf6bf39fSBram Moolenaar     return 0;
1072f417f2b6SBram Moolenaar }
1073f417f2b6SBram Moolenaar 
1074f417f2b6SBram Moolenaar /*
10751d73c885SBram Moolenaar  * Check if the word at "mip->mi_word" has a matching prefix.
10761d73c885SBram Moolenaar  * If it does, then check the following word.
10771d73c885SBram Moolenaar  *
1078d12a1326SBram Moolenaar  * If "mode" is "FIND_COMPOUND" then do the same after another word, find a
1079d12a1326SBram Moolenaar  * prefix in a compound word.
1080d12a1326SBram Moolenaar  *
10811d73c885SBram Moolenaar  * For a match mip->mi_result is updated.
10821d73c885SBram Moolenaar  */
10831d73c885SBram Moolenaar     static void
find_prefix(matchinf_T * mip,int mode)1084764b23c8SBram Moolenaar find_prefix(matchinf_T *mip, int mode)
10851d73c885SBram Moolenaar {
10861d73c885SBram Moolenaar     idx_T	arridx = 0;
10871d73c885SBram Moolenaar     int		len;
10881d73c885SBram Moolenaar     int		wlen = 0;
10891d73c885SBram Moolenaar     int		flen;
10901d73c885SBram Moolenaar     int		c;
10911d73c885SBram Moolenaar     char_u	*ptr;
10921d73c885SBram Moolenaar     idx_T	lo, hi, m;
10931d73c885SBram Moolenaar     slang_T	*slang = mip->mi_lp->lp_slang;
10941d73c885SBram Moolenaar     char_u	*byts;
10951d73c885SBram Moolenaar     idx_T	*idxs;
10961d73c885SBram Moolenaar 
109742eeac35SBram Moolenaar     byts = slang->sl_pbyts;
109842eeac35SBram Moolenaar     if (byts == NULL)
10990d6f5d97SBram Moolenaar 	return;			// array is empty
110042eeac35SBram Moolenaar 
11010d6f5d97SBram Moolenaar     // We use the case-folded word here, since prefixes are always
11020d6f5d97SBram Moolenaar     // case-folded.
11031d73c885SBram Moolenaar     ptr = mip->mi_fword;
11040d6f5d97SBram Moolenaar     flen = mip->mi_fwordlen;    // available case-folded bytes
1105d12a1326SBram Moolenaar     if (mode == FIND_COMPOUND)
1106d12a1326SBram Moolenaar     {
11070d6f5d97SBram Moolenaar 	// Skip over the previously found word(s).
1108d12a1326SBram Moolenaar 	ptr += mip->mi_compoff;
1109d12a1326SBram Moolenaar 	flen -= mip->mi_compoff;
1110d12a1326SBram Moolenaar     }
11111d73c885SBram Moolenaar     idxs = slang->sl_pidxs;
11121d73c885SBram Moolenaar 
11131d73c885SBram Moolenaar     /*
11141d73c885SBram Moolenaar      * Repeat advancing in the tree until:
11151d73c885SBram Moolenaar      * - there is a byte that doesn't match,
11161d73c885SBram Moolenaar      * - we reach the end of the tree,
11171d73c885SBram Moolenaar      * - or we reach the end of the line.
11181d73c885SBram Moolenaar      */
11191d73c885SBram Moolenaar     for (;;)
11201d73c885SBram Moolenaar     {
11211d73c885SBram Moolenaar 	if (flen == 0 && *mip->mi_fend != NUL)
11221d73c885SBram Moolenaar 	    flen = fold_more(mip);
11231d73c885SBram Moolenaar 
11241d73c885SBram Moolenaar 	len = byts[arridx++];
11251d73c885SBram Moolenaar 
11260d6f5d97SBram Moolenaar 	// If the first possible byte is a zero the prefix could end here.
11270d6f5d97SBram Moolenaar 	// Check if the following word matches and supports the prefix.
11281d73c885SBram Moolenaar 	if (byts[arridx] == 0)
11291d73c885SBram Moolenaar 	{
11300d6f5d97SBram Moolenaar 	    // There can be several prefixes with different conditions.  We
11310d6f5d97SBram Moolenaar 	    // try them all, since we don't know which one will give the
11320d6f5d97SBram Moolenaar 	    // longest match.  The word is the same each time, pass the list
11330d6f5d97SBram Moolenaar 	    // of possible prefixes to find_word().
11341d73c885SBram Moolenaar 	    mip->mi_prefarridx = arridx;
11351d73c885SBram Moolenaar 	    mip->mi_prefcnt = len;
11361d73c885SBram Moolenaar 	    while (len > 0 && byts[arridx] == 0)
11371d73c885SBram Moolenaar 	    {
11381d73c885SBram Moolenaar 		++arridx;
11391d73c885SBram Moolenaar 		--len;
11401d73c885SBram Moolenaar 	    }
11411d73c885SBram Moolenaar 	    mip->mi_prefcnt -= len;
11421d73c885SBram Moolenaar 
11430d6f5d97SBram Moolenaar 	    // Find the word that comes after the prefix.
11441d73c885SBram Moolenaar 	    mip->mi_prefixlen = wlen;
1145d12a1326SBram Moolenaar 	    if (mode == FIND_COMPOUND)
11460d6f5d97SBram Moolenaar 		// Skip over the previously found word(s).
1147d12a1326SBram Moolenaar 		mip->mi_prefixlen += mip->mi_compoff;
1148d12a1326SBram Moolenaar 
114953805d1eSBram Moolenaar 	    if (has_mbyte)
115053805d1eSBram Moolenaar 	    {
11510d6f5d97SBram Moolenaar 		// Case-folded length may differ from original length.
1152d12a1326SBram Moolenaar 		mip->mi_cprefixlen = nofold_len(mip->mi_fword,
1153d12a1326SBram Moolenaar 					     mip->mi_prefixlen, mip->mi_word);
115453805d1eSBram Moolenaar 	    }
115553805d1eSBram Moolenaar 	    else
1156d12a1326SBram Moolenaar 		mip->mi_cprefixlen = mip->mi_prefixlen;
11571d73c885SBram Moolenaar 	    find_word(mip, FIND_PREFIX);
11581d73c885SBram Moolenaar 
11591d73c885SBram Moolenaar 
11601d73c885SBram Moolenaar 	    if (len == 0)
11610d6f5d97SBram Moolenaar 		break;	    // no children, word must end here
11621d73c885SBram Moolenaar 	}
11631d73c885SBram Moolenaar 
11640d6f5d97SBram Moolenaar 	// Stop looking at end of the line.
11651d73c885SBram Moolenaar 	if (ptr[wlen] == NUL)
11661d73c885SBram Moolenaar 	    break;
11671d73c885SBram Moolenaar 
11680d6f5d97SBram Moolenaar 	// Perform a binary search in the list of accepted bytes.
11691d73c885SBram Moolenaar 	c = ptr[wlen];
11701d73c885SBram Moolenaar 	lo = arridx;
11711d73c885SBram Moolenaar 	hi = arridx + len - 1;
11721d73c885SBram Moolenaar 	while (lo < hi)
11731d73c885SBram Moolenaar 	{
11741d73c885SBram Moolenaar 	    m = (lo + hi) / 2;
11751d73c885SBram Moolenaar 	    if (byts[m] > c)
11761d73c885SBram Moolenaar 		hi = m - 1;
11771d73c885SBram Moolenaar 	    else if (byts[m] < c)
11781d73c885SBram Moolenaar 		lo = m + 1;
11791d73c885SBram Moolenaar 	    else
11801d73c885SBram Moolenaar 	    {
11811d73c885SBram Moolenaar 		lo = hi = m;
11821d73c885SBram Moolenaar 		break;
11831d73c885SBram Moolenaar 	    }
11841d73c885SBram Moolenaar 	}
11851d73c885SBram Moolenaar 
11860d6f5d97SBram Moolenaar 	// Stop if there is no matching byte.
11871d73c885SBram Moolenaar 	if (hi < lo || byts[lo] != c)
11881d73c885SBram Moolenaar 	    break;
11891d73c885SBram Moolenaar 
11900d6f5d97SBram Moolenaar 	// Continue at the child (if there is one).
11911d73c885SBram Moolenaar 	arridx = idxs[lo];
11921d73c885SBram Moolenaar 	++wlen;
11931d73c885SBram Moolenaar 	--flen;
11941d73c885SBram Moolenaar     }
11951d73c885SBram Moolenaar }
11961d73c885SBram Moolenaar 
11971d73c885SBram Moolenaar /*
11981d73c885SBram Moolenaar  * Need to fold at least one more character.  Do until next non-word character
1199a40ceaf8SBram Moolenaar  * for efficiency.  Include the non-word character too.
12001d73c885SBram Moolenaar  * Return the length of the folded chars in bytes.
12011d73c885SBram Moolenaar  */
12021d73c885SBram Moolenaar     static int
fold_more(matchinf_T * mip)1203764b23c8SBram Moolenaar fold_more(matchinf_T *mip)
12041d73c885SBram Moolenaar {
12051d73c885SBram Moolenaar     int		flen;
12061d73c885SBram Moolenaar     char_u	*p;
12071d73c885SBram Moolenaar 
12081d73c885SBram Moolenaar     p = mip->mi_fend;
12091d73c885SBram Moolenaar     do
121091acfffcSBram Moolenaar 	MB_PTR_ADV(mip->mi_fend);
1211abab0b0fSBram Moolenaar     while (*mip->mi_fend != NUL && spell_iswordp(mip->mi_fend, mip->mi_win));
12121d73c885SBram Moolenaar 
12130d6f5d97SBram Moolenaar     // Include the non-word character so that we can check for the word end.
12141d73c885SBram Moolenaar     if (*mip->mi_fend != NUL)
121591acfffcSBram Moolenaar 	MB_PTR_ADV(mip->mi_fend);
12161d73c885SBram Moolenaar 
12174f135275SBram Moolenaar     (void)spell_casefold(mip->mi_win, p, (int)(mip->mi_fend - p),
12181d73c885SBram Moolenaar 			     mip->mi_fword + mip->mi_fwordlen,
12191d73c885SBram Moolenaar 			     MAXWLEN - mip->mi_fwordlen);
1220a93fa7eeSBram Moolenaar     flen = (int)STRLEN(mip->mi_fword + mip->mi_fwordlen);
12211d73c885SBram Moolenaar     mip->mi_fwordlen += flen;
12221d73c885SBram Moolenaar     return flen;
12231d73c885SBram Moolenaar }
12241d73c885SBram Moolenaar 
12251d73c885SBram Moolenaar /*
12269ba0eb85SBram Moolenaar  * Check case flags for a word.  Return TRUE if the word has the requested
12279ba0eb85SBram Moolenaar  * case.
12289ba0eb85SBram Moolenaar  */
122946a426c9SBram Moolenaar     int
spell_valid_case(int wordflags,int treeflags)1230764b23c8SBram Moolenaar spell_valid_case(
12310d6f5d97SBram Moolenaar     int	    wordflags,	    // flags for the checked word.
12320d6f5d97SBram Moolenaar     int	    treeflags)	    // flags for the word in the spell tree
12339ba0eb85SBram Moolenaar {
12340dc065eeSBram Moolenaar     return ((wordflags == WF_ALLCAP && (treeflags & WF_FIXCAP) == 0)
12359ba0eb85SBram Moolenaar 	    || ((treeflags & (WF_ALLCAP | WF_KEEPCAP)) == 0
12360fa313a7SBram Moolenaar 		&& ((treeflags & WF_ONECAP) == 0
12370fa313a7SBram Moolenaar 					   || (wordflags & WF_ONECAP) != 0)));
12389ba0eb85SBram Moolenaar }
12399ba0eb85SBram Moolenaar 
1240f417f2b6SBram Moolenaar /*
1241f417f2b6SBram Moolenaar  * Return TRUE if spell checking is not enabled.
1242f417f2b6SBram Moolenaar  */
1243*8ee52affSYegappan Lakshmanan     static int
no_spell_checking(win_T * wp)1244764b23c8SBram Moolenaar no_spell_checking(win_T *wp)
1245f417f2b6SBram Moolenaar {
1246860cae1cSBram Moolenaar     if (!wp->w_p_spell || *wp->w_s->b_p_spl == NUL
1247860cae1cSBram Moolenaar 					 || wp->w_s->b_langp.ga_len == 0)
1248f417f2b6SBram Moolenaar     {
1249152e79e9SBram Moolenaar 	emsg(_(e_no_spell));
1250f417f2b6SBram Moolenaar 	return TRUE;
1251f417f2b6SBram Moolenaar     }
1252f417f2b6SBram Moolenaar     return FALSE;
1253f417f2b6SBram Moolenaar }
1254402d2feaSBram Moolenaar 
1255402d2feaSBram Moolenaar /*
1256402d2feaSBram Moolenaar  * Move to next spell error.
1257ac6e65f8SBram Moolenaar  * "curline" is FALSE for "[s", "]s", "[S" and "]S".
1258ac6e65f8SBram Moolenaar  * "curline" is TRUE to find word under/after cursor in the same line.
12595195e456SBram Moolenaar  * For Insert mode completion "dir" is BACKWARD and "curline" is TRUE: move
12605195e456SBram Moolenaar  * to after badly spelled word before the cursor.
12616de6853cSBram Moolenaar  * Return 0 if not found, length of the badly spelled word otherwise.
1262402d2feaSBram Moolenaar  */
1263402d2feaSBram Moolenaar     int
spell_move_to(win_T * wp,int dir,int allwords,int curline,hlf_T * attrp)1264764b23c8SBram Moolenaar spell_move_to(
1265764b23c8SBram Moolenaar     win_T	*wp,
12660d6f5d97SBram Moolenaar     int		dir,		// FORWARD or BACKWARD
12670d6f5d97SBram Moolenaar     int		allwords,	// TRUE for "[s"/"]s", FALSE for "[S"/"]S"
1268764b23c8SBram Moolenaar     int		curline,
12690d6f5d97SBram Moolenaar     hlf_T	*attrp)		// return: attributes of bad word or NULL
12700d6f5d97SBram Moolenaar 				// (only when "dir" is FORWARD)
1271402d2feaSBram Moolenaar {
12722cf8b301SBram Moolenaar     linenr_T	lnum;
12732cf8b301SBram Moolenaar     pos_T	found_pos;
12746de6853cSBram Moolenaar     int		found_len = 0;
1275402d2feaSBram Moolenaar     char_u	*line;
1276402d2feaSBram Moolenaar     char_u	*p;
12770c40586aSBram Moolenaar     char_u	*endp;
1278482aaeb0SBram Moolenaar     hlf_T	attr;
1279402d2feaSBram Moolenaar     int		len;
1280f71a3db4SBram Moolenaar #ifdef FEAT_SYN_HL
1281860cae1cSBram Moolenaar     int		has_syntax = syntax_present(wp);
1282f71a3db4SBram Moolenaar #endif
128389d4032cSBram Moolenaar     int		col;
12842cf8b301SBram Moolenaar     int		can_spell;
12850c40586aSBram Moolenaar     char_u	*buf = NULL;
12860c40586aSBram Moolenaar     int		buflen = 0;
12870c40586aSBram Moolenaar     int		skip = 0;
1288f9184a1dSBram Moolenaar     int		capcol = -1;
1289ac6e65f8SBram Moolenaar     int		found_one = FALSE;
1290ac6e65f8SBram Moolenaar     int		wrapped = FALSE;
1291402d2feaSBram Moolenaar 
129295529568SBram Moolenaar     if (no_spell_checking(wp))
12936de6853cSBram Moolenaar 	return 0;
1294402d2feaSBram Moolenaar 
12952cf8b301SBram Moolenaar     /*
12962cf8b301SBram Moolenaar      * Start looking for bad word at the start of the line, because we can't
129786ca6e3bSBram Moolenaar      * start halfway a word, we don't know where it starts or ends.
12982cf8b301SBram Moolenaar      *
12992cf8b301SBram Moolenaar      * When searching backwards, we continue in the line to find the last
13002cf8b301SBram Moolenaar      * bad word (in the cursor line: before the cursor).
13010c40586aSBram Moolenaar      *
13020c40586aSBram Moolenaar      * We concatenate the start of the next line, so that wrapped words work
13030c40586aSBram Moolenaar      * (e.g. "et<line-break>cetera").  Doesn't work when searching backwards
13040c40586aSBram Moolenaar      * though...
13052cf8b301SBram Moolenaar      */
130695529568SBram Moolenaar     lnum = wp->w_cursor.lnum;
1307b5aedf3eSBram Moolenaar     CLEAR_POS(&found_pos);
1308402d2feaSBram Moolenaar 
1309402d2feaSBram Moolenaar     while (!got_int)
1310402d2feaSBram Moolenaar     {
131195529568SBram Moolenaar 	line = ml_get_buf(wp->w_buffer, lnum, FALSE);
13122cf8b301SBram Moolenaar 
1313a93fa7eeSBram Moolenaar 	len = (int)STRLEN(line);
13140c40586aSBram Moolenaar 	if (buflen < len + MAXWLEN + 2)
13150c40586aSBram Moolenaar 	{
13160c40586aSBram Moolenaar 	    vim_free(buf);
13170c40586aSBram Moolenaar 	    buflen = len + MAXWLEN + 2;
13180c40586aSBram Moolenaar 	    buf = alloc(buflen);
13190c40586aSBram Moolenaar 	    if (buf == NULL)
13200c40586aSBram Moolenaar 		break;
13210c40586aSBram Moolenaar 	}
13220c40586aSBram Moolenaar 
13230d6f5d97SBram Moolenaar 	// In first line check first word for Capital.
1324f9184a1dSBram Moolenaar 	if (lnum == 1)
1325f9184a1dSBram Moolenaar 	    capcol = 0;
1326f9184a1dSBram Moolenaar 
13270d6f5d97SBram Moolenaar 	// For checking first word with a capital skip white space.
1328f9184a1dSBram Moolenaar 	if (capcol == 0)
1329e2e69e48SBram Moolenaar 	    capcol = getwhitecols(line);
1330a93fa7eeSBram Moolenaar 	else if (curline && wp == curwin)
1331a93fa7eeSBram Moolenaar 	{
13320d6f5d97SBram Moolenaar 	    // For spellbadword(): check if first word needs a capital.
1333e2e69e48SBram Moolenaar 	    col = getwhitecols(line);
1334a93fa7eeSBram Moolenaar 	    if (check_need_cap(lnum, col))
1335a93fa7eeSBram Moolenaar 		capcol = col;
1336a93fa7eeSBram Moolenaar 
13370d6f5d97SBram Moolenaar 	    // Need to get the line again, may have looked at the previous
13380d6f5d97SBram Moolenaar 	    // one.
1339a93fa7eeSBram Moolenaar 	    line = ml_get_buf(wp->w_buffer, lnum, FALSE);
1340a93fa7eeSBram Moolenaar 	}
1341f9184a1dSBram Moolenaar 
13420d6f5d97SBram Moolenaar 	// Copy the line into "buf" and append the start of the next line if
13430d6f5d97SBram Moolenaar 	// possible.
13440c40586aSBram Moolenaar 	STRCPY(buf, line);
134595529568SBram Moolenaar 	if (lnum < wp->w_buffer->b_ml.ml_line_count)
13465dd95a10SBram Moolenaar 	    spell_cat_line(buf + STRLEN(buf),
13475dd95a10SBram Moolenaar 			  ml_get_buf(wp->w_buffer, lnum + 1, FALSE), MAXWLEN);
13480c40586aSBram Moolenaar 
13490c40586aSBram Moolenaar 	p = buf + skip;
13500c40586aSBram Moolenaar 	endp = buf + len;
13510c40586aSBram Moolenaar 	while (p < endp)
1352402d2feaSBram Moolenaar 	{
13530d6f5d97SBram Moolenaar 	    // When searching backward don't search after the cursor.  Unless
13540d6f5d97SBram Moolenaar 	    // we wrapped around the end of the buffer.
13552cf8b301SBram Moolenaar 	    if (dir == BACKWARD
135695529568SBram Moolenaar 		    && lnum == wp->w_cursor.lnum
1357ac6e65f8SBram Moolenaar 		    && !wrapped
135895529568SBram Moolenaar 		    && (colnr_T)(p - buf) >= wp->w_cursor.col)
13592cf8b301SBram Moolenaar 		break;
13602cf8b301SBram Moolenaar 
13610d6f5d97SBram Moolenaar 	    // start of word
1362482aaeb0SBram Moolenaar 	    attr = HLF_COUNT;
13634770d09aSBram Moolenaar 	    len = spell_check(wp, p, &attr, &capcol, FALSE);
13642cf8b301SBram Moolenaar 
1365482aaeb0SBram Moolenaar 	    if (attr != HLF_COUNT)
1366402d2feaSBram Moolenaar 	    {
13670d6f5d97SBram Moolenaar 		// We found a bad word.  Check the attribute.
1368482aaeb0SBram Moolenaar 		if (allwords || attr == HLF_SPB)
1369402d2feaSBram Moolenaar 		{
13700d6f5d97SBram Moolenaar 		    // When searching forward only accept a bad word after
13710d6f5d97SBram Moolenaar 		    // the cursor.
13722cf8b301SBram Moolenaar 		    if (dir == BACKWARD
1373ac6e65f8SBram Moolenaar 			    || lnum != wp->w_cursor.lnum
137495529568SBram Moolenaar 			    || (lnum == wp->w_cursor.lnum
1375ac6e65f8SBram Moolenaar 				&& (wrapped
1376ac6e65f8SBram Moolenaar 				    || (colnr_T)(curline ? p - buf + len
13770c40586aSBram Moolenaar 						     : p - buf)
1378ac6e65f8SBram Moolenaar 						  > wp->w_cursor.col)))
13792cf8b301SBram Moolenaar 		    {
1380f71a3db4SBram Moolenaar #ifdef FEAT_SYN_HL
13812cf8b301SBram Moolenaar 			if (has_syntax)
13822cf8b301SBram Moolenaar 			{
1383a93fa7eeSBram Moolenaar 			    col = (int)(p - buf);
138495529568SBram Moolenaar 			    (void)syn_get_id(wp, lnum, (colnr_T)col,
138556cefaf1SBram Moolenaar 						    FALSE, &can_spell, FALSE);
1386d68071d8SBram Moolenaar 			    if (!can_spell)
1387d68071d8SBram Moolenaar 				attr = HLF_COUNT;
13882cf8b301SBram Moolenaar 			}
13892cf8b301SBram Moolenaar 			else
1390f71a3db4SBram Moolenaar #endif
13912cf8b301SBram Moolenaar 			    can_spell = TRUE;
13922cf8b301SBram Moolenaar 
13932cf8b301SBram Moolenaar 			if (can_spell)
13942cf8b301SBram Moolenaar 			{
1395d68071d8SBram Moolenaar 			    found_one = TRUE;
13962cf8b301SBram Moolenaar 			    found_pos.lnum = lnum;
1397a93fa7eeSBram Moolenaar 			    found_pos.col = (int)(p - buf);
13982cf8b301SBram Moolenaar 			    found_pos.coladd = 0;
13992cf8b301SBram Moolenaar 			    if (dir == FORWARD)
14002cf8b301SBram Moolenaar 			    {
14010d6f5d97SBram Moolenaar 				// No need to search further.
140295529568SBram Moolenaar 				wp->w_cursor = found_pos;
14030c40586aSBram Moolenaar 				vim_free(buf);
140495529568SBram Moolenaar 				if (attrp != NULL)
140595529568SBram Moolenaar 				    *attrp = attr;
14066de6853cSBram Moolenaar 				return len;
1407402d2feaSBram Moolenaar 			    }
14085195e456SBram Moolenaar 			    else if (curline)
14090d6f5d97SBram Moolenaar 				// Insert mode completion: put cursor after
14100d6f5d97SBram Moolenaar 				// the bad word.
14115195e456SBram Moolenaar 				found_pos.col += len;
14126de6853cSBram Moolenaar 			    found_len = len;
14132cf8b301SBram Moolenaar 			}
14142cf8b301SBram Moolenaar 		    }
1415d68071d8SBram Moolenaar 		    else
1416d68071d8SBram Moolenaar 			found_one = TRUE;
14172cf8b301SBram Moolenaar 		}
1418402d2feaSBram Moolenaar 	    }
141951485f06SBram Moolenaar 
14200d6f5d97SBram Moolenaar 	    // advance to character after the word
1421402d2feaSBram Moolenaar 	    p += len;
1422f9184a1dSBram Moolenaar 	    capcol -= len;
1423402d2feaSBram Moolenaar 	}
1424402d2feaSBram Moolenaar 
14255195e456SBram Moolenaar 	if (dir == BACKWARD && found_pos.lnum != 0)
14262cf8b301SBram Moolenaar 	{
14270d6f5d97SBram Moolenaar 	    // Use the last match in the line (before the cursor).
142895529568SBram Moolenaar 	    wp->w_cursor = found_pos;
14290c40586aSBram Moolenaar 	    vim_free(buf);
14306de6853cSBram Moolenaar 	    return found_len;
14312cf8b301SBram Moolenaar 	}
14325195e456SBram Moolenaar 
14335195e456SBram Moolenaar 	if (curline)
14340d6f5d97SBram Moolenaar 	    break;	// only check cursor line
14355195e456SBram Moolenaar 
14360d6f5d97SBram Moolenaar 	// If we are back at the starting line and searched it again there
14370d6f5d97SBram Moolenaar 	// is no match, give up.
1438ac6e65f8SBram Moolenaar 	if (lnum == wp->w_cursor.lnum && wrapped)
14390c40586aSBram Moolenaar 	    break;
1440ac6e65f8SBram Moolenaar 
14410d6f5d97SBram Moolenaar 	// Advance to next line.
1442d3f78dc9SBram Moolenaar 	if (dir == BACKWARD)
1443d3f78dc9SBram Moolenaar 	{
1444ac6e65f8SBram Moolenaar 	    if (lnum > 1)
14452cf8b301SBram Moolenaar 		--lnum;
1446ac6e65f8SBram Moolenaar 	    else if (!p_ws)
14470d6f5d97SBram Moolenaar 		break;	    // at first line and 'nowrapscan'
1448ac6e65f8SBram Moolenaar 	    else
1449ac6e65f8SBram Moolenaar 	    {
14500d6f5d97SBram Moolenaar 		// Wrap around to the end of the buffer.  May search the
14510d6f5d97SBram Moolenaar 		// starting line again and accept the last match.
1452ac6e65f8SBram Moolenaar 		lnum = wp->w_buffer->b_ml.ml_line_count;
1453ac6e65f8SBram Moolenaar 		wrapped = TRUE;
14548b96d64cSBram Moolenaar 		if (!shortmess(SHM_SEARCH))
14558b96d64cSBram Moolenaar 		    give_warning((char_u *)_(top_bot_msg), TRUE);
1456ac6e65f8SBram Moolenaar 	    }
1457f9184a1dSBram Moolenaar 	    capcol = -1;
14582cf8b301SBram Moolenaar 	}
14592cf8b301SBram Moolenaar 	else
14602cf8b301SBram Moolenaar 	{
1461ac6e65f8SBram Moolenaar 	    if (lnum < wp->w_buffer->b_ml.ml_line_count)
14622cf8b301SBram Moolenaar 		++lnum;
1463ac6e65f8SBram Moolenaar 	    else if (!p_ws)
14640d6f5d97SBram Moolenaar 		break;	    // at first line and 'nowrapscan'
1465ac6e65f8SBram Moolenaar 	    else
1466ac6e65f8SBram Moolenaar 	    {
14670d6f5d97SBram Moolenaar 		// Wrap around to the start of the buffer.  May search the
14680d6f5d97SBram Moolenaar 		// starting line again and accept the first match.
1469ac6e65f8SBram Moolenaar 		lnum = 1;
1470ac6e65f8SBram Moolenaar 		wrapped = TRUE;
14718b96d64cSBram Moolenaar 		if (!shortmess(SHM_SEARCH))
14728b96d64cSBram Moolenaar 		    give_warning((char_u *)_(bot_top_msg), TRUE);
1473ac6e65f8SBram Moolenaar 	    }
1474ac6e65f8SBram Moolenaar 
14750d6f5d97SBram Moolenaar 	    // If we are back at the starting line and there is no match then
14760d6f5d97SBram Moolenaar 	    // give up.
1477d3f78dc9SBram Moolenaar 	    if (lnum == wp->w_cursor.lnum && !found_one)
1478ac6e65f8SBram Moolenaar 		break;
14790c40586aSBram Moolenaar 
14800d6f5d97SBram Moolenaar 	    // Skip the characters at the start of the next line that were
14810d6f5d97SBram Moolenaar 	    // included in a match crossing line boundaries.
1482482aaeb0SBram Moolenaar 	    if (attr == HLF_COUNT)
1483a93fa7eeSBram Moolenaar 		skip = (int)(p - endp);
14840c40586aSBram Moolenaar 	    else
14850c40586aSBram Moolenaar 		skip = 0;
1486f9184a1dSBram Moolenaar 
14870d6f5d97SBram Moolenaar 	    // Capcol skips over the inserted space.
1488f9184a1dSBram Moolenaar 	    --capcol;
1489f9184a1dSBram Moolenaar 
14900d6f5d97SBram Moolenaar 	    // But after empty line check first word in next line
1491f9184a1dSBram Moolenaar 	    if (*skipwhite(line) == NUL)
1492f9184a1dSBram Moolenaar 		capcol = 0;
14932cf8b301SBram Moolenaar 	}
1494402d2feaSBram Moolenaar 
1495402d2feaSBram Moolenaar 	line_breakcheck();
1496402d2feaSBram Moolenaar     }
1497402d2feaSBram Moolenaar 
14980c40586aSBram Moolenaar     vim_free(buf);
14996de6853cSBram Moolenaar     return 0;
15000c40586aSBram Moolenaar }
15010c40586aSBram Moolenaar 
15020c40586aSBram Moolenaar /*
15030c40586aSBram Moolenaar  * For spell checking: concatenate the start of the following line "line" into
15040c40586aSBram Moolenaar  * "buf", blanking-out special characters.  Copy less then "maxlen" bytes.
15056a5d2ac1SBram Moolenaar  * Keep the blanks at the start of the next line, this is used in win_line()
15066a5d2ac1SBram Moolenaar  * to skip those bytes if the word was OK.
15070c40586aSBram Moolenaar  */
15080c40586aSBram Moolenaar     void
spell_cat_line(char_u * buf,char_u * line,int maxlen)1509764b23c8SBram Moolenaar spell_cat_line(char_u *buf, char_u *line, int maxlen)
15100c40586aSBram Moolenaar {
15110c40586aSBram Moolenaar     char_u	*p;
15120c40586aSBram Moolenaar     int		n;
15130c40586aSBram Moolenaar 
15140c40586aSBram Moolenaar     p = skipwhite(line);
15150c40586aSBram Moolenaar     while (vim_strchr((char_u *)"*#/\"\t", *p) != NULL)
15160c40586aSBram Moolenaar 	p = skipwhite(p + 1);
15170c40586aSBram Moolenaar 
15180c40586aSBram Moolenaar     if (*p != NUL)
15190c40586aSBram Moolenaar     {
15200d6f5d97SBram Moolenaar 	// Only worth concatenating if there is something else than spaces to
15210d6f5d97SBram Moolenaar 	// concatenate.
15226a5d2ac1SBram Moolenaar 	n = (int)(p - line) + 1;
15236a5d2ac1SBram Moolenaar 	if (n < maxlen - 1)
15246a5d2ac1SBram Moolenaar 	{
15256a5d2ac1SBram Moolenaar 	    vim_memset(buf, ' ', n);
15266a5d2ac1SBram Moolenaar 	    vim_strncpy(buf +  n, p, maxlen - 1 - n);
15276a5d2ac1SBram Moolenaar 	}
15280c40586aSBram Moolenaar     }
1529402d2feaSBram Moolenaar }
1530402d2feaSBram Moolenaar 
1531a40ceaf8SBram Moolenaar /*
1532a40ceaf8SBram Moolenaar  * Structure used for the cookie argument of do_in_runtimepath().
1533a40ceaf8SBram Moolenaar  */
1534da2303d9SBram Moolenaar typedef struct spelload_S
1535da2303d9SBram Moolenaar {
15360d6f5d97SBram Moolenaar     char_u  sl_lang[MAXWLEN + 1];	// language name
15370d6f5d97SBram Moolenaar     slang_T *sl_slang;			// resulting slang_T struct
15380d6f5d97SBram Moolenaar     int	    sl_nobreak;			// NOBREAK language found
1539da2303d9SBram Moolenaar } spelload_T;
1540da2303d9SBram Moolenaar 
1541402d2feaSBram Moolenaar /*
1542cfc6c43cSBram Moolenaar  * Load word list(s) for "lang" from Vim spell file(s).
1543b765d634SBram Moolenaar  * "lang" must be the language without the region: e.g., "en".
1544402d2feaSBram Moolenaar  */
1545cfc6c43cSBram Moolenaar     static void
spell_load_lang(char_u * lang)1546764b23c8SBram Moolenaar spell_load_lang(char_u *lang)
1547402d2feaSBram Moolenaar {
1548b765d634SBram Moolenaar     char_u	fname_enc[85];
1549402d2feaSBram Moolenaar     int		r;
1550da2303d9SBram Moolenaar     spelload_T	sl;
1551b8a7b560SBram Moolenaar     int		round;
1552402d2feaSBram Moolenaar 
15530d6f5d97SBram Moolenaar     // Copy the language name to pass it to spell_load_cb() as a cookie.
15540d6f5d97SBram Moolenaar     // It's truncated when an error is detected.
1555da2303d9SBram Moolenaar     STRCPY(sl.sl_lang, lang);
1556da2303d9SBram Moolenaar     sl.sl_slang = NULL;
1557da2303d9SBram Moolenaar     sl.sl_nobreak = FALSE;
1558cfc6c43cSBram Moolenaar 
15590d6f5d97SBram Moolenaar     // We may retry when no spell file is found for the language, an
15600d6f5d97SBram Moolenaar     // autocommand may load it then.
1561b8a7b560SBram Moolenaar     for (round = 1; round <= 2; ++round)
1562b8a7b560SBram Moolenaar     {
1563b765d634SBram Moolenaar 	/*
1564b765d634SBram Moolenaar 	 * Find the first spell file for "lang" in 'runtimepath' and load it.
1565b765d634SBram Moolenaar 	 */
1566b765d634SBram Moolenaar 	vim_snprintf((char *)fname_enc, sizeof(fname_enc) - 5,
156756f78040SBram Moolenaar #ifdef VMS
156856f78040SBram Moolenaar 					"spell/%s_%s.spl",
156956f78040SBram Moolenaar #else
157056f78040SBram Moolenaar 					"spell/%s.%s.spl",
157156f78040SBram Moolenaar #endif
157256f78040SBram Moolenaar 							   lang, spell_enc());
15737f8989ddSBram Moolenaar 	r = do_in_runtimepath(fname_enc, 0, spell_load_cb, &sl);
1574402d2feaSBram Moolenaar 
1575da2303d9SBram Moolenaar 	if (r == FAIL && *sl.sl_lang != NUL)
15765482f33fSBram Moolenaar 	{
15770d6f5d97SBram Moolenaar 	    // Try loading the ASCII version.
1578b765d634SBram Moolenaar 	    vim_snprintf((char *)fname_enc, sizeof(fname_enc) - 5,
157956f78040SBram Moolenaar #ifdef VMS
158056f78040SBram Moolenaar 						  "spell/%s_ascii.spl",
158156f78040SBram Moolenaar #else
158256f78040SBram Moolenaar 						  "spell/%s.ascii.spl",
158356f78040SBram Moolenaar #endif
158456f78040SBram Moolenaar 									lang);
15857f8989ddSBram Moolenaar 	    r = do_in_runtimepath(fname_enc, 0, spell_load_cb, &sl);
1586b8a7b560SBram Moolenaar 
1587b8a7b560SBram Moolenaar 	    if (r == FAIL && *sl.sl_lang != NUL && round == 1
1588b8a7b560SBram Moolenaar 		    && apply_autocmds(EVENT_SPELLFILEMISSING, lang,
1589b8a7b560SBram Moolenaar 					      curbuf->b_fname, FALSE, curbuf))
1590b8a7b560SBram Moolenaar 		continue;
1591b8a7b560SBram Moolenaar 	    break;
1592b8a7b560SBram Moolenaar 	}
1593362e1a30SBram Moolenaar 	break;
15945482f33fSBram Moolenaar     }
1595cfc6c43cSBram Moolenaar 
1596402d2feaSBram Moolenaar     if (r == FAIL)
1597b8a7b560SBram Moolenaar     {
1598f9e3e09fSBram Moolenaar 	smsg(
159956f78040SBram Moolenaar #ifdef VMS
160056f78040SBram Moolenaar 	_("Warning: Cannot find word list \"%s_%s.spl\" or \"%s_ascii.spl\""),
160156f78040SBram Moolenaar #else
160256f78040SBram Moolenaar 	_("Warning: Cannot find word list \"%s.%s.spl\" or \"%s.ascii.spl\""),
160356f78040SBram Moolenaar #endif
16045195e456SBram Moolenaar 						     lang, spell_enc(), lang);
1605b8a7b560SBram Moolenaar     }
1606da2303d9SBram Moolenaar     else if (sl.sl_slang != NULL)
1607b765d634SBram Moolenaar     {
16080d6f5d97SBram Moolenaar 	// At least one file was loaded, now load ALL the additions.
1609b765d634SBram Moolenaar 	STRCPY(fname_enc + STRLEN(fname_enc) - 3, "add.spl");
16107f8989ddSBram Moolenaar 	do_in_runtimepath(fname_enc, DIP_ALL, spell_load_cb, &sl);
1611b765d634SBram Moolenaar     }
1612b765d634SBram Moolenaar }
1613b765d634SBram Moolenaar 
1614b765d634SBram Moolenaar /*
1615b765d634SBram Moolenaar  * Return the encoding used for spell checking: Use 'encoding', except that we
1616b765d634SBram Moolenaar  * use "latin1" for "latin9".  And limit to 60 characters (just in case).
1617b765d634SBram Moolenaar  */
16189ccfebddSBram Moolenaar     char_u *
spell_enc(void)1619764b23c8SBram Moolenaar spell_enc(void)
1620b765d634SBram Moolenaar {
1621b765d634SBram Moolenaar 
1622b765d634SBram Moolenaar     if (STRLEN(p_enc) < 60 && STRCMP(p_enc, "iso-8859-15") != 0)
1623b765d634SBram Moolenaar 	return p_enc;
1624b765d634SBram Moolenaar     return (char_u *)"latin1";
1625402d2feaSBram Moolenaar }
1626402d2feaSBram Moolenaar 
1627402d2feaSBram Moolenaar /*
1628f9184a1dSBram Moolenaar  * Get the name of the .spl file for the internal wordlist into
1629f9184a1dSBram Moolenaar  * "fname[MAXPATHL]".
1630f9184a1dSBram Moolenaar  */
1631f9184a1dSBram Moolenaar     static void
int_wordlist_spl(char_u * fname)1632764b23c8SBram Moolenaar int_wordlist_spl(char_u *fname)
1633f9184a1dSBram Moolenaar {
163456f78040SBram Moolenaar     vim_snprintf((char *)fname, MAXPATHL, SPL_FNAME_TMPL,
1635f9184a1dSBram Moolenaar 						  int_wordlist, spell_enc());
1636f9184a1dSBram Moolenaar }
1637f9184a1dSBram Moolenaar 
1638f9184a1dSBram Moolenaar /*
16394770d09aSBram Moolenaar  * Allocate a new slang_T for language "lang".  "lang" can be NULL.
1640402d2feaSBram Moolenaar  * Caller must fill "sl_next".
1641402d2feaSBram Moolenaar  */
16429ccfebddSBram Moolenaar     slang_T *
slang_alloc(char_u * lang)1643764b23c8SBram Moolenaar slang_alloc(char_u *lang)
1644402d2feaSBram Moolenaar {
1645402d2feaSBram Moolenaar     slang_T *lp;
1646402d2feaSBram Moolenaar 
1647c799fe20SBram Moolenaar     lp = ALLOC_CLEAR_ONE(slang_T);
1648402d2feaSBram Moolenaar     if (lp != NULL)
1649402d2feaSBram Moolenaar     {
16504770d09aSBram Moolenaar 	if (lang != NULL)
1651402d2feaSBram Moolenaar 	    lp->sl_name = vim_strsave(lang);
16529ba0eb85SBram Moolenaar 	ga_init2(&lp->sl_rep, sizeof(fromto_T), 10);
16534770d09aSBram Moolenaar 	ga_init2(&lp->sl_repsal, sizeof(fromto_T), 10);
16545195e456SBram Moolenaar 	lp->sl_compmax = MAXWLEN;
16555195e456SBram Moolenaar 	lp->sl_compsylmax = MAXWLEN;
16564770d09aSBram Moolenaar 	hash_init(&lp->sl_wordcount);
1657402d2feaSBram Moolenaar     }
16584770d09aSBram Moolenaar 
1659402d2feaSBram Moolenaar     return lp;
1660402d2feaSBram Moolenaar }
1661402d2feaSBram Moolenaar 
1662402d2feaSBram Moolenaar /*
1663402d2feaSBram Moolenaar  * Free the contents of an slang_T and the structure itself.
1664402d2feaSBram Moolenaar  */
16659ccfebddSBram Moolenaar     void
slang_free(slang_T * lp)1666764b23c8SBram Moolenaar slang_free(slang_T *lp)
1667402d2feaSBram Moolenaar {
1668402d2feaSBram Moolenaar     vim_free(lp->sl_name);
1669b765d634SBram Moolenaar     vim_free(lp->sl_fname);
1670b765d634SBram Moolenaar     slang_clear(lp);
1671b765d634SBram Moolenaar     vim_free(lp);
1672b765d634SBram Moolenaar }
1673b765d634SBram Moolenaar 
1674b765d634SBram Moolenaar /*
1675b765d634SBram Moolenaar  * Clear an slang_T so that the file can be reloaded.
1676b765d634SBram Moolenaar  */
16779ccfebddSBram Moolenaar     void
slang_clear(slang_T * lp)1678764b23c8SBram Moolenaar slang_clear(slang_T *lp)
1679b765d634SBram Moolenaar {
16809ba0eb85SBram Moolenaar     garray_T	*gap;
16819ba0eb85SBram Moolenaar     fromto_T	*ftp;
1682d857f0e0SBram Moolenaar     salitem_T	*smp;
16831d73c885SBram Moolenaar     int		i;
16844770d09aSBram Moolenaar     int		round;
16859ba0eb85SBram Moolenaar 
1686d23a8236SBram Moolenaar     VIM_CLEAR(lp->sl_fbyts);
1687d23a8236SBram Moolenaar     VIM_CLEAR(lp->sl_kbyts);
1688d23a8236SBram Moolenaar     VIM_CLEAR(lp->sl_pbyts);
16891d73c885SBram Moolenaar 
1690d23a8236SBram Moolenaar     VIM_CLEAR(lp->sl_fidxs);
1691d23a8236SBram Moolenaar     VIM_CLEAR(lp->sl_kidxs);
1692d23a8236SBram Moolenaar     VIM_CLEAR(lp->sl_pidxs);
16939ba0eb85SBram Moolenaar 
16944770d09aSBram Moolenaar     for (round = 1; round <= 2; ++round)
16954770d09aSBram Moolenaar     {
16964770d09aSBram Moolenaar 	gap = round == 1 ? &lp->sl_rep : &lp->sl_repsal;
16979ba0eb85SBram Moolenaar 	while (gap->ga_len > 0)
16989ba0eb85SBram Moolenaar 	{
16999ba0eb85SBram Moolenaar 	    ftp = &((fromto_T *)gap->ga_data)[--gap->ga_len];
17009ba0eb85SBram Moolenaar 	    vim_free(ftp->ft_from);
17019ba0eb85SBram Moolenaar 	    vim_free(ftp->ft_to);
17029ba0eb85SBram Moolenaar 	}
17039ba0eb85SBram Moolenaar 	ga_clear(gap);
17044770d09aSBram Moolenaar     }
1705d857f0e0SBram Moolenaar 
1706d857f0e0SBram Moolenaar     gap = &lp->sl_sal;
170742eeac35SBram Moolenaar     if (lp->sl_sofo)
17089c96f592SBram Moolenaar     {
17090d6f5d97SBram Moolenaar 	// "ga_len" is set to 1 without adding an item for latin1
17109c96f592SBram Moolenaar 	if (gap->ga_data != NULL)
17110d6f5d97SBram Moolenaar 	    // SOFOFROM and SOFOTO items: free lists of wide characters.
171242eeac35SBram Moolenaar 	    for (i = 0; i < gap->ga_len; ++i)
171342eeac35SBram Moolenaar 		vim_free(((int **)gap->ga_data)[i]);
17149c96f592SBram Moolenaar     }
171542eeac35SBram Moolenaar     else
17160d6f5d97SBram Moolenaar 	// SAL items: free salitem_T items
1717d857f0e0SBram Moolenaar 	while (gap->ga_len > 0)
1718d857f0e0SBram Moolenaar 	{
1719d857f0e0SBram Moolenaar 	    smp = &((salitem_T *)gap->ga_data)[--gap->ga_len];
1720d857f0e0SBram Moolenaar 	    vim_free(smp->sm_lead);
17210d6f5d97SBram Moolenaar 	    // Don't free sm_oneof and sm_rules, they point into sm_lead.
1722d857f0e0SBram Moolenaar 	    vim_free(smp->sm_to);
172342eeac35SBram Moolenaar 	    vim_free(smp->sm_lead_w);
172442eeac35SBram Moolenaar 	    vim_free(smp->sm_oneof_w);
172542eeac35SBram Moolenaar 	    vim_free(smp->sm_to_w);
17269ba0eb85SBram Moolenaar 	}
1727d857f0e0SBram Moolenaar     ga_clear(gap);
17289ba0eb85SBram Moolenaar 
17291d73c885SBram Moolenaar     for (i = 0; i < lp->sl_prefixcnt; ++i)
1730473de61bSBram Moolenaar 	vim_regfree(lp->sl_prefprog[i]);
17319c96f592SBram Moolenaar     lp->sl_prefixcnt = 0;
1732d23a8236SBram Moolenaar     VIM_CLEAR(lp->sl_prefprog);
17339c96f592SBram Moolenaar 
1734d23a8236SBram Moolenaar     VIM_CLEAR(lp->sl_info);
1735362e1a30SBram Moolenaar 
1736d23a8236SBram Moolenaar     VIM_CLEAR(lp->sl_midword);
17371d73c885SBram Moolenaar 
1738473de61bSBram Moolenaar     vim_regfree(lp->sl_compprog);
17395195e456SBram Moolenaar     lp->sl_compprog = NULL;
1740d23a8236SBram Moolenaar     VIM_CLEAR(lp->sl_comprules);
1741d23a8236SBram Moolenaar     VIM_CLEAR(lp->sl_compstartflags);
1742d23a8236SBram Moolenaar     VIM_CLEAR(lp->sl_compallflags);
17435195e456SBram Moolenaar 
1744d23a8236SBram Moolenaar     VIM_CLEAR(lp->sl_syllable);
17455195e456SBram Moolenaar     ga_clear(&lp->sl_syl_items);
1746ae5bce1cSBram Moolenaar 
1747899dddf8SBram Moolenaar     ga_clear_strings(&lp->sl_comppat);
1748899dddf8SBram Moolenaar 
17494770d09aSBram Moolenaar     hash_clear_all(&lp->sl_wordcount, WC_KEY_OFF);
17504770d09aSBram Moolenaar     hash_init(&lp->sl_wordcount);
1751ea424166SBram Moolenaar 
17524770d09aSBram Moolenaar     hash_clear_all(&lp->sl_map_hash, 0);
17535195e456SBram Moolenaar 
17540d6f5d97SBram Moolenaar     // Clear info from .sug file.
17554770d09aSBram Moolenaar     slang_clear_sug(lp);
17564770d09aSBram Moolenaar 
17575195e456SBram Moolenaar     lp->sl_compmax = MAXWLEN;
1758da2303d9SBram Moolenaar     lp->sl_compminlen = 0;
17595195e456SBram Moolenaar     lp->sl_compsylmax = MAXWLEN;
17605195e456SBram Moolenaar     lp->sl_regions[0] = NUL;
1761402d2feaSBram Moolenaar }
1762402d2feaSBram Moolenaar 
1763402d2feaSBram Moolenaar /*
17644770d09aSBram Moolenaar  * Clear the info from the .sug file in "lp".
17654770d09aSBram Moolenaar  */
17669ccfebddSBram Moolenaar     void
slang_clear_sug(slang_T * lp)1767764b23c8SBram Moolenaar slang_clear_sug(slang_T *lp)
17684770d09aSBram Moolenaar {
1769d23a8236SBram Moolenaar     VIM_CLEAR(lp->sl_sbyts);
1770d23a8236SBram Moolenaar     VIM_CLEAR(lp->sl_sidxs);
17714770d09aSBram Moolenaar     close_spellbuf(lp->sl_sugbuf);
17724770d09aSBram Moolenaar     lp->sl_sugbuf = NULL;
17734770d09aSBram Moolenaar     lp->sl_sugloaded = FALSE;
17744770d09aSBram Moolenaar     lp->sl_sugtime = 0;
17754770d09aSBram Moolenaar }
17764770d09aSBram Moolenaar 
17774770d09aSBram Moolenaar /*
1778cfc6c43cSBram Moolenaar  * Load one spell file and store the info into a slang_T.
1779402d2feaSBram Moolenaar  * Invoked through do_in_runtimepath().
1780402d2feaSBram Moolenaar  */
1781402d2feaSBram Moolenaar     static void
spell_load_cb(char_u * fname,void * cookie)1782764b23c8SBram Moolenaar spell_load_cb(char_u *fname, void *cookie)
1783402d2feaSBram Moolenaar {
1784da2303d9SBram Moolenaar     spelload_T	*slp = (spelload_T *)cookie;
1785da2303d9SBram Moolenaar     slang_T	*slang;
1786da2303d9SBram Moolenaar 
1787da2303d9SBram Moolenaar     slang = spell_load_file(fname, slp->sl_lang, NULL, FALSE);
1788da2303d9SBram Moolenaar     if (slang != NULL)
1789da2303d9SBram Moolenaar     {
17900d6f5d97SBram Moolenaar 	// When a previously loaded file has NOBREAK also use it for the
17910d6f5d97SBram Moolenaar 	// ".add" files.
1792da2303d9SBram Moolenaar 	if (slp->sl_nobreak && slang->sl_add)
1793da2303d9SBram Moolenaar 	    slang->sl_nobreak = TRUE;
1794da2303d9SBram Moolenaar 	else if (slang->sl_nobreak)
1795da2303d9SBram Moolenaar 	    slp->sl_nobreak = TRUE;
1796da2303d9SBram Moolenaar 
1797da2303d9SBram Moolenaar 	slp->sl_slang = slang;
1798da2303d9SBram Moolenaar     }
1799b765d634SBram Moolenaar }
1800b765d634SBram Moolenaar 
18014770d09aSBram Moolenaar 
18024770d09aSBram Moolenaar /*
18034770d09aSBram Moolenaar  * Add a word to the hashtable of common words.
18044770d09aSBram Moolenaar  * If it's already there then the counter is increased.
18054770d09aSBram Moolenaar  */
18069ccfebddSBram Moolenaar     void
count_common_word(slang_T * lp,char_u * word,int len,int count)1807764b23c8SBram Moolenaar count_common_word(
1808764b23c8SBram Moolenaar     slang_T	*lp,
1809764b23c8SBram Moolenaar     char_u	*word,
18100d6f5d97SBram Moolenaar     int		len,	    // word length, -1 for up to NUL
18110d6f5d97SBram Moolenaar     int		count)	    // 1 to count once, 10 to init
18124770d09aSBram Moolenaar {
18134770d09aSBram Moolenaar     hash_T	hash;
18144770d09aSBram Moolenaar     hashitem_T	*hi;
18154770d09aSBram Moolenaar     wordcount_T	*wc;
18164770d09aSBram Moolenaar     char_u	buf[MAXWLEN];
18174770d09aSBram Moolenaar     char_u	*p;
18184770d09aSBram Moolenaar 
18194770d09aSBram Moolenaar     if (len == -1)
18204770d09aSBram Moolenaar 	p = word;
18215bcc5a1fSBram Moolenaar     else if (len >= MAXWLEN)
18225bcc5a1fSBram Moolenaar 	return;
18234770d09aSBram Moolenaar     else
18244770d09aSBram Moolenaar     {
18254770d09aSBram Moolenaar 	vim_strncpy(buf, word, len);
18264770d09aSBram Moolenaar 	p = buf;
18274770d09aSBram Moolenaar     }
18284770d09aSBram Moolenaar 
18294770d09aSBram Moolenaar     hash = hash_hash(p);
18304770d09aSBram Moolenaar     hi = hash_lookup(&lp->sl_wordcount, p, hash);
18314770d09aSBram Moolenaar     if (HASHITEM_EMPTY(hi))
18324770d09aSBram Moolenaar     {
1833c799fe20SBram Moolenaar 	wc = alloc(sizeof(wordcount_T) + STRLEN(p));
18344770d09aSBram Moolenaar 	if (wc == NULL)
18354770d09aSBram Moolenaar 	    return;
18364770d09aSBram Moolenaar 	STRCPY(wc->wc_word, p);
18374770d09aSBram Moolenaar 	wc->wc_count = count;
18384770d09aSBram Moolenaar 	hash_add_item(&lp->sl_wordcount, hi, wc->wc_word, hash);
18394770d09aSBram Moolenaar     }
18404770d09aSBram Moolenaar     else
18414770d09aSBram Moolenaar     {
18424770d09aSBram Moolenaar 	wc = HI2WC(hi);
18430d6f5d97SBram Moolenaar 	if ((wc->wc_count += count) < (unsigned)count)	// check for overflow
18444770d09aSBram Moolenaar 	    wc->wc_count = MAXWORDCOUNT;
18454770d09aSBram Moolenaar     }
18464770d09aSBram Moolenaar }
18474770d09aSBram Moolenaar 
18484770d09aSBram Moolenaar /*
184995529568SBram Moolenaar  * Return TRUE if byte "n" appears in "str".
18506de6853cSBram Moolenaar  * Like strchr() but independent of locale.
18516de6853cSBram Moolenaar  */
18529ccfebddSBram Moolenaar     int
byte_in_str(char_u * str,int n)1853764b23c8SBram Moolenaar byte_in_str(char_u *str, int n)
18546de6853cSBram Moolenaar {
18556de6853cSBram Moolenaar     char_u	*p;
18566de6853cSBram Moolenaar 
18576de6853cSBram Moolenaar     for (p = str; *p != NUL; ++p)
185895529568SBram Moolenaar 	if (*p == n)
18596de6853cSBram Moolenaar 	    return TRUE;
18606de6853cSBram Moolenaar     return FALSE;
18616de6853cSBram Moolenaar }
18626de6853cSBram Moolenaar 
18635195e456SBram Moolenaar #define SY_MAXLEN   30
18645195e456SBram Moolenaar typedef struct syl_item_S
18655195e456SBram Moolenaar {
18660d6f5d97SBram Moolenaar     char_u	sy_chars[SY_MAXLEN];	    // the sequence of chars
18675195e456SBram Moolenaar     int		sy_len;
18685195e456SBram Moolenaar } syl_item_T;
18695195e456SBram Moolenaar 
18705195e456SBram Moolenaar /*
18715195e456SBram Moolenaar  * Truncate "slang->sl_syllable" at the first slash and put the following items
18725195e456SBram Moolenaar  * in "slang->sl_syl_items".
18735195e456SBram Moolenaar  */
18749ccfebddSBram Moolenaar     int
init_syl_tab(slang_T * slang)1875764b23c8SBram Moolenaar init_syl_tab(slang_T *slang)
18765195e456SBram Moolenaar {
18775195e456SBram Moolenaar     char_u	*p;
18785195e456SBram Moolenaar     char_u	*s;
18795195e456SBram Moolenaar     int		l;
18805195e456SBram Moolenaar     syl_item_T	*syl;
18815195e456SBram Moolenaar 
18825195e456SBram Moolenaar     ga_init2(&slang->sl_syl_items, sizeof(syl_item_T), 4);
18835195e456SBram Moolenaar     p = vim_strchr(slang->sl_syllable, '/');
18845195e456SBram Moolenaar     while (p != NULL)
18855195e456SBram Moolenaar     {
18865195e456SBram Moolenaar 	*p++ = NUL;
18870d6f5d97SBram Moolenaar 	if (*p == NUL)	    // trailing slash
18885195e456SBram Moolenaar 	    break;
18895195e456SBram Moolenaar 	s = p;
18905195e456SBram Moolenaar 	p = vim_strchr(p, '/');
18915195e456SBram Moolenaar 	if (p == NULL)
1892a93fa7eeSBram Moolenaar 	    l = (int)STRLEN(s);
18935195e456SBram Moolenaar 	else
1894a93fa7eeSBram Moolenaar 	    l = (int)(p - s);
18955195e456SBram Moolenaar 	if (l >= SY_MAXLEN)
18965195e456SBram Moolenaar 	    return SP_FORMERROR;
18975195e456SBram Moolenaar 	if (ga_grow(&slang->sl_syl_items, 1) == FAIL)
18986de6853cSBram Moolenaar 	    return SP_OTHERERROR;
18995195e456SBram Moolenaar 	syl = ((syl_item_T *)slang->sl_syl_items.ga_data)
19005195e456SBram Moolenaar 					       + slang->sl_syl_items.ga_len++;
19015195e456SBram Moolenaar 	vim_strncpy(syl->sy_chars, s, l);
19025195e456SBram Moolenaar 	syl->sy_len = l;
19035195e456SBram Moolenaar     }
19045195e456SBram Moolenaar     return OK;
19055195e456SBram Moolenaar }
19065195e456SBram Moolenaar 
19075195e456SBram Moolenaar /*
19085195e456SBram Moolenaar  * Count the number of syllables in "word".
19095195e456SBram Moolenaar  * When "word" contains spaces the syllables after the last space are counted.
19105195e456SBram Moolenaar  * Returns zero if syllables are not defines.
19115195e456SBram Moolenaar  */
19125195e456SBram Moolenaar     static int
count_syllables(slang_T * slang,char_u * word)1913764b23c8SBram Moolenaar count_syllables(slang_T *slang, char_u *word)
19145195e456SBram Moolenaar {
19155195e456SBram Moolenaar     int		cnt = 0;
19165195e456SBram Moolenaar     int		skip = FALSE;
19175195e456SBram Moolenaar     char_u	*p;
19185195e456SBram Moolenaar     int		len;
19195195e456SBram Moolenaar     int		i;
19205195e456SBram Moolenaar     syl_item_T	*syl;
19215195e456SBram Moolenaar     int		c;
19225195e456SBram Moolenaar 
19235195e456SBram Moolenaar     if (slang->sl_syllable == NULL)
19245195e456SBram Moolenaar 	return 0;
19255195e456SBram Moolenaar 
19265195e456SBram Moolenaar     for (p = word; *p != NUL; p += len)
19275195e456SBram Moolenaar     {
19280d6f5d97SBram Moolenaar 	// When running into a space reset counter.
19295195e456SBram Moolenaar 	if (*p == ' ')
19305195e456SBram Moolenaar 	{
19315195e456SBram Moolenaar 	    len = 1;
19325195e456SBram Moolenaar 	    cnt = 0;
19335195e456SBram Moolenaar 	    continue;
19345195e456SBram Moolenaar 	}
19355195e456SBram Moolenaar 
19360d6f5d97SBram Moolenaar 	// Find longest match of syllable items.
19375195e456SBram Moolenaar 	len = 0;
19385195e456SBram Moolenaar 	for (i = 0; i < slang->sl_syl_items.ga_len; ++i)
19395195e456SBram Moolenaar 	{
19405195e456SBram Moolenaar 	    syl = ((syl_item_T *)slang->sl_syl_items.ga_data) + i;
19415195e456SBram Moolenaar 	    if (syl->sy_len > len
19425195e456SBram Moolenaar 			       && STRNCMP(p, syl->sy_chars, syl->sy_len) == 0)
19435195e456SBram Moolenaar 		len = syl->sy_len;
19445195e456SBram Moolenaar 	}
19450d6f5d97SBram Moolenaar 	if (len != 0)	// found a match, count syllable
19465195e456SBram Moolenaar 	{
19475195e456SBram Moolenaar 	    ++cnt;
19485195e456SBram Moolenaar 	    skip = FALSE;
19495195e456SBram Moolenaar 	}
19505195e456SBram Moolenaar 	else
19515195e456SBram Moolenaar 	{
19520d6f5d97SBram Moolenaar 	    // No recognized syllable item, at least a syllable char then?
19535195e456SBram Moolenaar 	    c = mb_ptr2char(p);
19545195e456SBram Moolenaar 	    len = (*mb_ptr2len)(p);
19555195e456SBram Moolenaar 	    if (vim_strchr(slang->sl_syllable, c) == NULL)
19560d6f5d97SBram Moolenaar 		skip = FALSE;	    // No, search for next syllable
19575195e456SBram Moolenaar 	    else if (!skip)
19585195e456SBram Moolenaar 	    {
19590d6f5d97SBram Moolenaar 		++cnt;		    // Yes, count it
19600d6f5d97SBram Moolenaar 		skip = TRUE;	    // don't count following syllable chars
19615195e456SBram Moolenaar 	    }
19625195e456SBram Moolenaar 	}
19635195e456SBram Moolenaar     }
19645195e456SBram Moolenaar     return cnt;
19655195e456SBram Moolenaar }
19665195e456SBram Moolenaar 
19675195e456SBram Moolenaar /*
1968860cae1cSBram Moolenaar  * Parse 'spelllang' and set w_s->b_langp accordingly.
1969f417f2b6SBram Moolenaar  * Returns NULL if it's OK, an error message otherwise.
1970402d2feaSBram Moolenaar  */
1971f9e3e09fSBram Moolenaar     char *
did_set_spelllang(win_T * wp)1972764b23c8SBram Moolenaar did_set_spelllang(win_T *wp)
1973402d2feaSBram Moolenaar {
1974402d2feaSBram Moolenaar     garray_T	ga;
1975f417f2b6SBram Moolenaar     char_u	*splp;
1976402d2feaSBram Moolenaar     char_u	*region;
1977b6356339SBram Moolenaar     char_u	region_cp[3];
19780a5fe214SBram Moolenaar     int		filename;
1979402d2feaSBram Moolenaar     int		region_mask;
19808b96d64cSBram Moolenaar     slang_T	*slang;
1981402d2feaSBram Moolenaar     int		c;
1982f417f2b6SBram Moolenaar     char_u	lang[MAXWLEN + 1];
19839ba0eb85SBram Moolenaar     char_u	spf_name[MAXPATHL];
1984f417f2b6SBram Moolenaar     int		len;
1985f417f2b6SBram Moolenaar     char_u	*p;
19867887d88aSBram Moolenaar     int		round;
1987f9184a1dSBram Moolenaar     char_u	*spf;
19880dc065eeSBram Moolenaar     char_u	*use_region = NULL;
19890dc065eeSBram Moolenaar     int		dont_use_region = FALSE;
1990da2303d9SBram Moolenaar     int		nobreak = FALSE;
19918b96d64cSBram Moolenaar     int		i, j;
19928b96d64cSBram Moolenaar     langp_T	*lp, *lp2;
1993706cdebcSBram Moolenaar     static int	recursive = FALSE;
1994f9e3e09fSBram Moolenaar     char	*ret_msg = NULL;
1995706cdebcSBram Moolenaar     char_u	*spl_copy;
19967c0a2f36SBram Moolenaar     bufref_T	bufref;
19977c0a2f36SBram Moolenaar 
19987c0a2f36SBram Moolenaar     set_bufref(&bufref, wp->w_buffer);
1999706cdebcSBram Moolenaar 
20000d6f5d97SBram Moolenaar     // We don't want to do this recursively.  May happen when a language is
20010d6f5d97SBram Moolenaar     // not available and the SpellFileMissing autocommand opens a new buffer
20020d6f5d97SBram Moolenaar     // in which 'spell' is set.
2003706cdebcSBram Moolenaar     if (recursive)
2004706cdebcSBram Moolenaar 	return NULL;
2005706cdebcSBram Moolenaar     recursive = TRUE;
2006402d2feaSBram Moolenaar 
2007402d2feaSBram Moolenaar     ga_init2(&ga, sizeof(langp_T), 2);
2008860cae1cSBram Moolenaar     clear_midword(wp);
2009402d2feaSBram Moolenaar 
20100d6f5d97SBram Moolenaar     // Make a copy of 'spelllang', the SpellFileMissing autocommands may change
20110d6f5d97SBram Moolenaar     // it under our fingers.
2012860cae1cSBram Moolenaar     spl_copy = vim_strsave(wp->w_s->b_p_spl);
2013706cdebcSBram Moolenaar     if (spl_copy == NULL)
2014706cdebcSBram Moolenaar 	goto theend;
2015706cdebcSBram Moolenaar 
2016cc63c647SBram Moolenaar     wp->w_s->b_cjk = 0;
2017cc63c647SBram Moolenaar 
20180d6f5d97SBram Moolenaar     // Loop over comma separated language names.
2019706cdebcSBram Moolenaar     for (splp = spl_copy; *splp != NUL; )
2020f417f2b6SBram Moolenaar     {
20218f130edaSBram Moolenaar 	// Get one language name.
2022f417f2b6SBram Moolenaar 	copy_option_part(&splp, lang, MAXWLEN, ",");
2023f417f2b6SBram Moolenaar 	region = NULL;
2024a93fa7eeSBram Moolenaar 	len = (int)STRLEN(lang);
20250a5fe214SBram Moolenaar 
2026f154f3abSBram Moolenaar 	if (!valid_spelllang(lang))
20278f130edaSBram Moolenaar 	    continue;
20288f130edaSBram Moolenaar 
2029cc63c647SBram Moolenaar 	if (STRCMP(lang, "cjk") == 0)
2030cc63c647SBram Moolenaar 	{
2031cc63c647SBram Moolenaar 	    wp->w_s->b_cjk = 1;
2032cc63c647SBram Moolenaar 	    continue;
2033cc63c647SBram Moolenaar 	}
2034cc63c647SBram Moolenaar 
20350d6f5d97SBram Moolenaar 	// If the name ends in ".spl" use it as the name of the spell file.
20360d6f5d97SBram Moolenaar 	// If there is a region name let "region" point to it and remove it
20370d6f5d97SBram Moolenaar 	// from the name.
20380a5fe214SBram Moolenaar 	if (len > 4 && fnamecmp(lang + len - 4, ".spl") == 0)
20390a5fe214SBram Moolenaar 	{
20400a5fe214SBram Moolenaar 	    filename = TRUE;
20410a5fe214SBram Moolenaar 
20420d6f5d97SBram Moolenaar 	    // Locate a region and remove it from the file name.
2043b6356339SBram Moolenaar 	    p = vim_strchr(gettail(lang), '_');
2044b6356339SBram Moolenaar 	    if (p != NULL && ASCII_ISALPHA(p[1]) && ASCII_ISALPHA(p[2])
2045b6356339SBram Moolenaar 						      && !ASCII_ISALPHA(p[3]))
2046b6356339SBram Moolenaar 	    {
2047b6356339SBram Moolenaar 		vim_strncpy(region_cp, p + 1, 2);
2048b6356339SBram Moolenaar 		mch_memmove(p, p + 3, len - (p - lang) - 2);
2049b6356339SBram Moolenaar 		region = region_cp;
2050b6356339SBram Moolenaar 	    }
2051b6356339SBram Moolenaar 	    else
2052b6356339SBram Moolenaar 		dont_use_region = TRUE;
2053b6356339SBram Moolenaar 
20540d6f5d97SBram Moolenaar 	    // Check if we loaded this language before.
2055aeea7215SBram Moolenaar 	    FOR_ALL_SPELL_LANGS(slang)
205699499b1cSBram Moolenaar 		if (fullpathcmp(lang, slang->sl_fname, FALSE, TRUE) == FPC_SAME)
20570a5fe214SBram Moolenaar 		    break;
20580a5fe214SBram Moolenaar 	}
20590a5fe214SBram Moolenaar 	else
20600a5fe214SBram Moolenaar 	{
20610a5fe214SBram Moolenaar 	    filename = FALSE;
2062f417f2b6SBram Moolenaar 	    if (len > 3 && lang[len - 3] == '_')
2063f417f2b6SBram Moolenaar 	    {
2064f417f2b6SBram Moolenaar 		region = lang + len - 2;
2065f417f2b6SBram Moolenaar 		len -= 3;
2066f417f2b6SBram Moolenaar 		lang[len] = NUL;
2067402d2feaSBram Moolenaar 	    }
20680dc065eeSBram Moolenaar 	    else
20690dc065eeSBram Moolenaar 		dont_use_region = TRUE;
2070402d2feaSBram Moolenaar 
20710d6f5d97SBram Moolenaar 	    // Check if we loaded this language before.
2072aeea7215SBram Moolenaar 	    FOR_ALL_SPELL_LANGS(slang)
20738b96d64cSBram Moolenaar 		if (STRICMP(lang, slang->sl_name) == 0)
2074402d2feaSBram Moolenaar 		    break;
20750a5fe214SBram Moolenaar 	}
2076402d2feaSBram Moolenaar 
2077b6356339SBram Moolenaar 	if (region != NULL)
2078b6356339SBram Moolenaar 	{
20790d6f5d97SBram Moolenaar 	    // If the region differs from what was used before then don't
20800d6f5d97SBram Moolenaar 	    // use it for 'spellfile'.
2081b6356339SBram Moolenaar 	    if (use_region != NULL && STRCMP(region, use_region) != 0)
2082b6356339SBram Moolenaar 		dont_use_region = TRUE;
2083b6356339SBram Moolenaar 	    use_region = region;
2084b6356339SBram Moolenaar 	}
2085b6356339SBram Moolenaar 
20860d6f5d97SBram Moolenaar 	// If not found try loading the language now.
20878b96d64cSBram Moolenaar 	if (slang == NULL)
20880a5fe214SBram Moolenaar 	{
20890a5fe214SBram Moolenaar 	    if (filename)
20900a5fe214SBram Moolenaar 		(void)spell_load_file(lang, lang, NULL, FALSE);
20910a5fe214SBram Moolenaar 	    else
2092706cdebcSBram Moolenaar 	    {
2093f417f2b6SBram Moolenaar 		spell_load_lang(lang);
20940d6f5d97SBram Moolenaar 		// SpellFileMissing autocommands may do anything, including
20950d6f5d97SBram Moolenaar 		// destroying the buffer we are using...
20967c0a2f36SBram Moolenaar 		if (!bufref_valid(&bufref))
2097706cdebcSBram Moolenaar 		{
2098f9e3e09fSBram Moolenaar 		    ret_msg = N_("E797: SpellFileMissing autocommand deleted buffer");
2099706cdebcSBram Moolenaar 		    goto theend;
2100706cdebcSBram Moolenaar 		}
2101706cdebcSBram Moolenaar 	    }
21020a5fe214SBram Moolenaar 	}
2103402d2feaSBram Moolenaar 
2104cfc6c43cSBram Moolenaar 	/*
2105f417f2b6SBram Moolenaar 	 * Loop over the languages, there can be several files for "lang".
2106cfc6c43cSBram Moolenaar 	 */
2107aeea7215SBram Moolenaar 	FOR_ALL_SPELL_LANGS(slang)
210899499b1cSBram Moolenaar 	    if (filename ? fullpathcmp(lang, slang->sl_fname, FALSE, TRUE)
210999499b1cSBram Moolenaar 								    == FPC_SAME
21108b96d64cSBram Moolenaar 			 : STRICMP(lang, slang->sl_name) == 0)
2111402d2feaSBram Moolenaar 	    {
2112402d2feaSBram Moolenaar 		region_mask = REGION_ALL;
21130a5fe214SBram Moolenaar 		if (!filename && region != NULL)
2114402d2feaSBram Moolenaar 		{
21150d6f5d97SBram Moolenaar 		    // find region in sl_regions
21168b96d64cSBram Moolenaar 		    c = find_region(slang->sl_regions, region);
2117402d2feaSBram Moolenaar 		    if (c == REGION_ALL)
2118402d2feaSBram Moolenaar 		    {
21198b96d64cSBram Moolenaar 			if (slang->sl_add)
21200dc065eeSBram Moolenaar 			{
21218b96d64cSBram Moolenaar 			    if (*slang->sl_regions != NUL)
21220d6f5d97SBram Moolenaar 				// This addition file is for other regions.
21230dc065eeSBram Moolenaar 				region_mask = 0;
21240dc065eeSBram Moolenaar 			}
21250dc065eeSBram Moolenaar 			else
21260d6f5d97SBram Moolenaar 			    // This is probably an error.  Give a warning and
21270d6f5d97SBram Moolenaar 			    // accept the words anyway.
2128f9e3e09fSBram Moolenaar 			    smsg(_("Warning: region %s not supported"),
2129f417f2b6SBram Moolenaar 								      region);
2130402d2feaSBram Moolenaar 		    }
2131402d2feaSBram Moolenaar 		    else
2132402d2feaSBram Moolenaar 			region_mask = 1 << c;
2133402d2feaSBram Moolenaar 		}
2134402d2feaSBram Moolenaar 
21350dc065eeSBram Moolenaar 		if (region_mask != 0)
21360dc065eeSBram Moolenaar 		{
2137402d2feaSBram Moolenaar 		    if (ga_grow(&ga, 1) == FAIL)
2138402d2feaSBram Moolenaar 		    {
2139402d2feaSBram Moolenaar 			ga_clear(&ga);
2140e29a27f6SBram Moolenaar 			ret_msg = e_out_of_memory;
2141706cdebcSBram Moolenaar 			goto theend;
2142402d2feaSBram Moolenaar 		    }
21438b96d64cSBram Moolenaar 		    LANGP_ENTRY(ga, ga.ga_len)->lp_slang = slang;
2144402d2feaSBram Moolenaar 		    LANGP_ENTRY(ga, ga.ga_len)->lp_region = region_mask;
2145402d2feaSBram Moolenaar 		    ++ga.ga_len;
2146860cae1cSBram Moolenaar 		    use_midword(slang, wp);
21478b96d64cSBram Moolenaar 		    if (slang->sl_nobreak)
2148da2303d9SBram Moolenaar 			nobreak = TRUE;
2149402d2feaSBram Moolenaar 		}
2150402d2feaSBram Moolenaar 	    }
21510dc065eeSBram Moolenaar     }
2152402d2feaSBram Moolenaar 
21530d6f5d97SBram Moolenaar     // round 0: load int_wordlist, if possible.
21540d6f5d97SBram Moolenaar     // round 1: load first name in 'spellfile'.
21550d6f5d97SBram Moolenaar     // round 2: load second name in 'spellfile.
21560d6f5d97SBram Moolenaar     // etc.
2157860cae1cSBram Moolenaar     spf = curwin->w_s->b_p_spf;
2158f9184a1dSBram Moolenaar     for (round = 0; round == 0 || *spf != NUL; ++round)
21599ba0eb85SBram Moolenaar     {
2160f9184a1dSBram Moolenaar 	if (round == 0)
21617887d88aSBram Moolenaar 	{
21620d6f5d97SBram Moolenaar 	    // Internal wordlist, if there is one.
2163f9184a1dSBram Moolenaar 	    if (int_wordlist == NULL)
21647887d88aSBram Moolenaar 		continue;
2165f9184a1dSBram Moolenaar 	    int_wordlist_spl(spf_name);
21667887d88aSBram Moolenaar 	}
21677887d88aSBram Moolenaar 	else
21687887d88aSBram Moolenaar 	{
21690d6f5d97SBram Moolenaar 	    // One entry in 'spellfile'.
2170f9184a1dSBram Moolenaar 	    copy_option_part(&spf, spf_name, MAXPATHL - 5, ",");
2171f9184a1dSBram Moolenaar 	    STRCAT(spf_name, ".spl");
2172f9184a1dSBram Moolenaar 
21730d6f5d97SBram Moolenaar 	    // If it was already found above then skip it.
2174f9184a1dSBram Moolenaar 	    for (c = 0; c < ga.ga_len; ++c)
2175ac6e65f8SBram Moolenaar 	    {
2176ac6e65f8SBram Moolenaar 		p = LANGP_ENTRY(ga, c)->lp_slang->sl_fname;
217799499b1cSBram Moolenaar 		if (p != NULL && fullpathcmp(spf_name, p, FALSE, TRUE)
217899499b1cSBram Moolenaar 								== FPC_SAME)
2179f9184a1dSBram Moolenaar 		    break;
2180ac6e65f8SBram Moolenaar 	    }
2181f9184a1dSBram Moolenaar 	    if (c < ga.ga_len)
21827887d88aSBram Moolenaar 		continue;
21837887d88aSBram Moolenaar 	}
21847887d88aSBram Moolenaar 
21850d6f5d97SBram Moolenaar 	// Check if it was loaded already.
2186aeea7215SBram Moolenaar 	FOR_ALL_SPELL_LANGS(slang)
218799499b1cSBram Moolenaar 	    if (fullpathcmp(spf_name, slang->sl_fname, FALSE, TRUE)
218899499b1cSBram Moolenaar 								== FPC_SAME)
21899ba0eb85SBram Moolenaar 		break;
21908b96d64cSBram Moolenaar 	if (slang == NULL)
21919ba0eb85SBram Moolenaar 	{
21920d6f5d97SBram Moolenaar 	    // Not loaded, try loading it now.  The language name includes the
21930d6f5d97SBram Moolenaar 	    // region name, the region is ignored otherwise.  for int_wordlist
21940d6f5d97SBram Moolenaar 	    // use an arbitrary name.
2195f9184a1dSBram Moolenaar 	    if (round == 0)
2196f9184a1dSBram Moolenaar 		STRCPY(lang, "internal wordlist");
2197f9184a1dSBram Moolenaar 	    else
21987887d88aSBram Moolenaar 	    {
2199f9184a1dSBram Moolenaar 		vim_strncpy(lang, gettail(spf_name), MAXWLEN);
2200f417f2b6SBram Moolenaar 		p = vim_strchr(lang, '.');
2201f417f2b6SBram Moolenaar 		if (p != NULL)
22020d6f5d97SBram Moolenaar 		    *p = NUL;	// truncate at ".encoding.add"
22037887d88aSBram Moolenaar 	    }
22048b96d64cSBram Moolenaar 	    slang = spell_load_file(spf_name, lang, NULL, TRUE);
2205da2303d9SBram Moolenaar 
22060d6f5d97SBram Moolenaar 	    // If one of the languages has NOBREAK we assume the addition
22070d6f5d97SBram Moolenaar 	    // files also have this.
22088b96d64cSBram Moolenaar 	    if (slang != NULL && nobreak)
22098b96d64cSBram Moolenaar 		slang->sl_nobreak = TRUE;
22109ba0eb85SBram Moolenaar 	}
22118b96d64cSBram Moolenaar 	if (slang != NULL && ga_grow(&ga, 1) == OK)
22129ba0eb85SBram Moolenaar 	{
22130dc065eeSBram Moolenaar 	    region_mask = REGION_ALL;
22140dc065eeSBram Moolenaar 	    if (use_region != NULL && !dont_use_region)
22150dc065eeSBram Moolenaar 	    {
22160d6f5d97SBram Moolenaar 		// find region in sl_regions
22178b96d64cSBram Moolenaar 		c = find_region(slang->sl_regions, use_region);
22180dc065eeSBram Moolenaar 		if (c != REGION_ALL)
22190dc065eeSBram Moolenaar 		    region_mask = 1 << c;
22208b96d64cSBram Moolenaar 		else if (*slang->sl_regions != NUL)
22210d6f5d97SBram Moolenaar 		    // This spell file is for other regions.
22220dc065eeSBram Moolenaar 		    region_mask = 0;
22230dc065eeSBram Moolenaar 	    }
22240dc065eeSBram Moolenaar 
22250dc065eeSBram Moolenaar 	    if (region_mask != 0)
22260dc065eeSBram Moolenaar 	    {
22278b96d64cSBram Moolenaar 		LANGP_ENTRY(ga, ga.ga_len)->lp_slang = slang;
22288b96d64cSBram Moolenaar 		LANGP_ENTRY(ga, ga.ga_len)->lp_sallang = NULL;
22298b96d64cSBram Moolenaar 		LANGP_ENTRY(ga, ga.ga_len)->lp_replang = NULL;
22300dc065eeSBram Moolenaar 		LANGP_ENTRY(ga, ga.ga_len)->lp_region = region_mask;
22319ba0eb85SBram Moolenaar 		++ga.ga_len;
2232860cae1cSBram Moolenaar 		use_midword(slang, wp);
22339ba0eb85SBram Moolenaar 	    }
22349ba0eb85SBram Moolenaar 	}
22350dc065eeSBram Moolenaar     }
22369ba0eb85SBram Moolenaar 
22370d6f5d97SBram Moolenaar     // Everything is fine, store the new b_langp value.
2238860cae1cSBram Moolenaar     ga_clear(&wp->w_s->b_langp);
2239860cae1cSBram Moolenaar     wp->w_s->b_langp = ga;
2240402d2feaSBram Moolenaar 
22410d6f5d97SBram Moolenaar     // For each language figure out what language to use for sound folding and
22420d6f5d97SBram Moolenaar     // REP items.  If the language doesn't support it itself use another one
22430d6f5d97SBram Moolenaar     // with the same name.  E.g. for "en-math" use "en".
22448b96d64cSBram Moolenaar     for (i = 0; i < ga.ga_len; ++i)
22458b96d64cSBram Moolenaar     {
22468b96d64cSBram Moolenaar 	lp = LANGP_ENTRY(ga, i);
22478b96d64cSBram Moolenaar 
22480d6f5d97SBram Moolenaar 	// sound folding
22498b96d64cSBram Moolenaar 	if (lp->lp_slang->sl_sal.ga_len > 0)
22500d6f5d97SBram Moolenaar 	    // language does sound folding itself
22518b96d64cSBram Moolenaar 	    lp->lp_sallang = lp->lp_slang;
22528b96d64cSBram Moolenaar 	else
22530d6f5d97SBram Moolenaar 	    // find first similar language that does sound folding
22548b96d64cSBram Moolenaar 	    for (j = 0; j < ga.ga_len; ++j)
22558b96d64cSBram Moolenaar 	    {
22568b96d64cSBram Moolenaar 		lp2 = LANGP_ENTRY(ga, j);
22578b96d64cSBram Moolenaar 		if (lp2->lp_slang->sl_sal.ga_len > 0
22588b96d64cSBram Moolenaar 			&& STRNCMP(lp->lp_slang->sl_name,
22598b96d64cSBram Moolenaar 					      lp2->lp_slang->sl_name, 2) == 0)
22608b96d64cSBram Moolenaar 		{
22618b96d64cSBram Moolenaar 		    lp->lp_sallang = lp2->lp_slang;
22628b96d64cSBram Moolenaar 		    break;
22638b96d64cSBram Moolenaar 		}
22648b96d64cSBram Moolenaar 	    }
22658b96d64cSBram Moolenaar 
22660d6f5d97SBram Moolenaar 	// REP items
22678b96d64cSBram Moolenaar 	if (lp->lp_slang->sl_rep.ga_len > 0)
22680d6f5d97SBram Moolenaar 	    // language has REP items itself
22698b96d64cSBram Moolenaar 	    lp->lp_replang = lp->lp_slang;
22708b96d64cSBram Moolenaar 	else
22710d6f5d97SBram Moolenaar 	    // find first similar language that has REP items
22728b96d64cSBram Moolenaar 	    for (j = 0; j < ga.ga_len; ++j)
22738b96d64cSBram Moolenaar 	    {
22748b96d64cSBram Moolenaar 		lp2 = LANGP_ENTRY(ga, j);
22758b96d64cSBram Moolenaar 		if (lp2->lp_slang->sl_rep.ga_len > 0
22768b96d64cSBram Moolenaar 			&& STRNCMP(lp->lp_slang->sl_name,
22778b96d64cSBram Moolenaar 					      lp2->lp_slang->sl_name, 2) == 0)
22788b96d64cSBram Moolenaar 		{
22798b96d64cSBram Moolenaar 		    lp->lp_replang = lp2->lp_slang;
22808b96d64cSBram Moolenaar 		    break;
22818b96d64cSBram Moolenaar 		}
22828b96d64cSBram Moolenaar 	    }
22838b96d64cSBram Moolenaar     }
2284d569a9e7SBram Moolenaar     redraw_win_later(wp, NOT_VALID);
22858b96d64cSBram Moolenaar 
2286706cdebcSBram Moolenaar theend:
2287706cdebcSBram Moolenaar     vim_free(spl_copy);
2288706cdebcSBram Moolenaar     recursive = FALSE;
2289706cdebcSBram Moolenaar     return ret_msg;
2290402d2feaSBram Moolenaar }
2291402d2feaSBram Moolenaar 
2292402d2feaSBram Moolenaar /*
22939c96f592SBram Moolenaar  * Clear the midword characters for buffer "buf".
22949c96f592SBram Moolenaar  */
22959c96f592SBram Moolenaar     static void
clear_midword(win_T * wp)2296764b23c8SBram Moolenaar clear_midword(win_T *wp)
22979c96f592SBram Moolenaar {
2298a80faa89SBram Moolenaar     CLEAR_FIELD(wp->w_s->b_spell_ismw);
2299d23a8236SBram Moolenaar     VIM_CLEAR(wp->w_s->b_spell_ismw_mb);
23009c96f592SBram Moolenaar }
23019c96f592SBram Moolenaar 
23029c96f592SBram Moolenaar /*
23039c96f592SBram Moolenaar  * Use the "sl_midword" field of language "lp" for buffer "buf".
23049c96f592SBram Moolenaar  * They add up to any currently used midword characters.
23059c96f592SBram Moolenaar  */
23069c96f592SBram Moolenaar     static void
use_midword(slang_T * lp,win_T * wp)2307764b23c8SBram Moolenaar use_midword(slang_T *lp, win_T *wp)
23089c96f592SBram Moolenaar {
23099c96f592SBram Moolenaar     char_u	*p;
23109c96f592SBram Moolenaar 
23110d6f5d97SBram Moolenaar     if (lp->sl_midword == NULL)	    // there aren't any
23120dc065eeSBram Moolenaar 	return;
23130dc065eeSBram Moolenaar 
23149c96f592SBram Moolenaar     for (p = lp->sl_midword; *p != NUL; )
23159c96f592SBram Moolenaar 	if (has_mbyte)
23169c96f592SBram Moolenaar 	{
23179c96f592SBram Moolenaar 	    int	    c, l, n;
23189c96f592SBram Moolenaar 	    char_u  *bp;
23199c96f592SBram Moolenaar 
23209c96f592SBram Moolenaar 	    c = mb_ptr2char(p);
23210fa313a7SBram Moolenaar 	    l = (*mb_ptr2len)(p);
23220fa313a7SBram Moolenaar 	    if (c < 256 && l <= 2)
2323860cae1cSBram Moolenaar 		wp->w_s->b_spell_ismw[c] = TRUE;
2324860cae1cSBram Moolenaar 	    else if (wp->w_s->b_spell_ismw_mb == NULL)
23250d6f5d97SBram Moolenaar 		// First multi-byte char in "b_spell_ismw_mb".
2326860cae1cSBram Moolenaar 		wp->w_s->b_spell_ismw_mb = vim_strnsave(p, l);
23279c96f592SBram Moolenaar 	    else
23289c96f592SBram Moolenaar 	    {
23290d6f5d97SBram Moolenaar 		// Append multi-byte chars to "b_spell_ismw_mb".
2330860cae1cSBram Moolenaar 		n = (int)STRLEN(wp->w_s->b_spell_ismw_mb);
2331860cae1cSBram Moolenaar 		bp = vim_strnsave(wp->w_s->b_spell_ismw_mb, n + l);
23329c96f592SBram Moolenaar 		if (bp != NULL)
23339c96f592SBram Moolenaar 		{
2334860cae1cSBram Moolenaar 		    vim_free(wp->w_s->b_spell_ismw_mb);
2335860cae1cSBram Moolenaar 		    wp->w_s->b_spell_ismw_mb = bp;
23369c96f592SBram Moolenaar 		    vim_strncpy(bp + n, p, l);
23379c96f592SBram Moolenaar 		}
23389c96f592SBram Moolenaar 	    }
23399c96f592SBram Moolenaar 	    p += l;
23409c96f592SBram Moolenaar 	}
23419c96f592SBram Moolenaar 	else
2342860cae1cSBram Moolenaar 	    wp->w_s->b_spell_ismw[*p++] = TRUE;
23439c96f592SBram Moolenaar }
23449c96f592SBram Moolenaar 
23459c96f592SBram Moolenaar /*
2346402d2feaSBram Moolenaar  * Find the region "region[2]" in "rp" (points to "sl_regions").
2347c4568ab3SBram Moolenaar  * Each region is simply stored as the two characters of its name.
23487887d88aSBram Moolenaar  * Returns the index if found (first is 0), REGION_ALL if not found.
2349402d2feaSBram Moolenaar  */
2350402d2feaSBram Moolenaar     static int
find_region(char_u * rp,char_u * region)2351764b23c8SBram Moolenaar find_region(char_u *rp, char_u *region)
2352402d2feaSBram Moolenaar {
2353402d2feaSBram Moolenaar     int		i;
2354402d2feaSBram Moolenaar 
2355402d2feaSBram Moolenaar     for (i = 0; ; i += 2)
2356402d2feaSBram Moolenaar     {
2357402d2feaSBram Moolenaar 	if (rp[i] == NUL)
2358402d2feaSBram Moolenaar 	    return REGION_ALL;
2359402d2feaSBram Moolenaar 	if (rp[i] == region[0] && rp[i + 1] == region[1])
2360402d2feaSBram Moolenaar 	    break;
2361402d2feaSBram Moolenaar     }
2362402d2feaSBram Moolenaar     return i / 2;
2363402d2feaSBram Moolenaar }
2364402d2feaSBram Moolenaar 
2365402d2feaSBram Moolenaar /*
23669ba0eb85SBram Moolenaar  * Return case type of word:
2367402d2feaSBram Moolenaar  * w word	0
236851485f06SBram Moolenaar  * Word		WF_ONECAP
236951485f06SBram Moolenaar  * W WORD	WF_ALLCAP
237051485f06SBram Moolenaar  * WoRd	wOrd	WF_KEEPCAP
2371402d2feaSBram Moolenaar  */
23729ccfebddSBram Moolenaar     int
captype(char_u * word,char_u * end)2373764b23c8SBram Moolenaar captype(
2374764b23c8SBram Moolenaar     char_u	*word,
23750d6f5d97SBram Moolenaar     char_u	*end)	    // When NULL use up to NUL byte.
2376402d2feaSBram Moolenaar {
2377402d2feaSBram Moolenaar     char_u	*p;
2378402d2feaSBram Moolenaar     int		c;
2379402d2feaSBram Moolenaar     int		firstcap;
2380402d2feaSBram Moolenaar     int		allcap;
23810d6f5d97SBram Moolenaar     int		past_second = FALSE;	// past second word char
2382402d2feaSBram Moolenaar 
23830d6f5d97SBram Moolenaar     // find first letter
238491acfffcSBram Moolenaar     for (p = word; !spell_iswordp_nmw(p, curwin); MB_PTR_ADV(p))
23859ba0eb85SBram Moolenaar 	if (end == NULL ? *p == NUL : p >= end)
23860d6f5d97SBram Moolenaar 	    return 0;	    // only non-word characters, illegal word
2387b765d634SBram Moolenaar     if (has_mbyte)
2388402d2feaSBram Moolenaar 	c = mb_ptr2char_adv(&p);
2389b765d634SBram Moolenaar     else
2390b765d634SBram Moolenaar 	c = *p++;
23919f30f504SBram Moolenaar     firstcap = allcap = SPELL_ISUPPER(c);
2392402d2feaSBram Moolenaar 
2393402d2feaSBram Moolenaar     /*
2394402d2feaSBram Moolenaar      * Need to check all letters to find a word with mixed upper/lower.
2395402d2feaSBram Moolenaar      * But a word with an upper char only at start is a ONECAP.
2396402d2feaSBram Moolenaar      */
239791acfffcSBram Moolenaar     for ( ; end == NULL ? *p != NUL : p < end; MB_PTR_ADV(p))
2398cc63c647SBram Moolenaar 	if (spell_iswordp_nmw(p, curwin))
2399402d2feaSBram Moolenaar 	{
240053805d1eSBram Moolenaar 	    c = PTR2CHAR(p);
24019f30f504SBram Moolenaar 	    if (!SPELL_ISUPPER(c))
2402402d2feaSBram Moolenaar 	    {
24030d6f5d97SBram Moolenaar 		// UUl -> KEEPCAP
2404402d2feaSBram Moolenaar 		if (past_second && allcap)
240551485f06SBram Moolenaar 		    return WF_KEEPCAP;
2406402d2feaSBram Moolenaar 		allcap = FALSE;
2407402d2feaSBram Moolenaar 	    }
2408402d2feaSBram Moolenaar 	    else if (!allcap)
24090d6f5d97SBram Moolenaar 		// UlU -> KEEPCAP
241051485f06SBram Moolenaar 		return WF_KEEPCAP;
2411402d2feaSBram Moolenaar 	    past_second = TRUE;
2412402d2feaSBram Moolenaar 	}
2413402d2feaSBram Moolenaar 
2414402d2feaSBram Moolenaar     if (allcap)
241551485f06SBram Moolenaar 	return WF_ALLCAP;
2416402d2feaSBram Moolenaar     if (firstcap)
241751485f06SBram Moolenaar 	return WF_ONECAP;
2418402d2feaSBram Moolenaar     return 0;
2419402d2feaSBram Moolenaar }
2420402d2feaSBram Moolenaar 
24210fa313a7SBram Moolenaar /*
242234b466edSBram Moolenaar  * Delete the internal wordlist and its .spl file.
242334b466edSBram Moolenaar  */
242434b466edSBram Moolenaar     void
spell_delete_wordlist(void)2425764b23c8SBram Moolenaar spell_delete_wordlist(void)
242634b466edSBram Moolenaar {
242734b466edSBram Moolenaar     char_u	fname[MAXPATHL];
242834b466edSBram Moolenaar 
242934b466edSBram Moolenaar     if (int_wordlist != NULL)
243034b466edSBram Moolenaar     {
243134b466edSBram Moolenaar 	mch_remove(int_wordlist);
243234b466edSBram Moolenaar 	int_wordlist_spl(fname);
243334b466edSBram Moolenaar 	mch_remove(fname);
2434d23a8236SBram Moolenaar 	VIM_CLEAR(int_wordlist);
243534b466edSBram Moolenaar     }
243634b466edSBram Moolenaar }
243734b466edSBram Moolenaar 
24380a5fe214SBram Moolenaar /*
24390a5fe214SBram Moolenaar  * Free all languages.
24400a5fe214SBram Moolenaar  */
24410a5fe214SBram Moolenaar     void
spell_free_all(void)2442764b23c8SBram Moolenaar spell_free_all(void)
24430a5fe214SBram Moolenaar {
24448b96d64cSBram Moolenaar     slang_T	*slang;
24450a5fe214SBram Moolenaar     buf_T	*buf;
24460a5fe214SBram Moolenaar 
24470d6f5d97SBram Moolenaar     // Go through all buffers and handle 'spelllang'. <VN>
244829323590SBram Moolenaar     FOR_ALL_BUFFERS(buf)
2449860cae1cSBram Moolenaar 	ga_clear(&buf->b_s.b_langp);
24500a5fe214SBram Moolenaar 
24510a5fe214SBram Moolenaar     while (first_lang != NULL)
24520a5fe214SBram Moolenaar     {
24538b96d64cSBram Moolenaar 	slang = first_lang;
24548b96d64cSBram Moolenaar 	first_lang = slang->sl_next;
24558b96d64cSBram Moolenaar 	slang_free(slang);
24560a5fe214SBram Moolenaar     }
2457cf6bf39fSBram Moolenaar 
245834b466edSBram Moolenaar     spell_delete_wordlist();
24597887d88aSBram Moolenaar 
2460d23a8236SBram Moolenaar     VIM_CLEAR(repl_to);
2461d23a8236SBram Moolenaar     VIM_CLEAR(repl_from);
24620a5fe214SBram Moolenaar }
24630a5fe214SBram Moolenaar 
2464402d2feaSBram Moolenaar /*
2465402d2feaSBram Moolenaar  * Clear all spelling tables and reload them.
2466cfc6c43cSBram Moolenaar  * Used after 'encoding' is set and when ":mkspell" was used.
2467402d2feaSBram Moolenaar  */
2468402d2feaSBram Moolenaar     void
spell_reload(void)2469764b23c8SBram Moolenaar spell_reload(void)
2470402d2feaSBram Moolenaar {
24713982c541SBram Moolenaar     win_T	*wp;
2472402d2feaSBram Moolenaar 
24730d6f5d97SBram Moolenaar     // Initialize the table for spell_iswordp().
2474402d2feaSBram Moolenaar     init_spell_chartab();
2475402d2feaSBram Moolenaar 
24760d6f5d97SBram Moolenaar     // Unload all allocated memory.
24770a5fe214SBram Moolenaar     spell_free_all();
2478402d2feaSBram Moolenaar 
24790d6f5d97SBram Moolenaar     // Go through all buffers and handle 'spelllang'.
248029323590SBram Moolenaar     FOR_ALL_WINDOWS(wp)
2481402d2feaSBram Moolenaar     {
24820d6f5d97SBram Moolenaar 	// Only load the wordlists when 'spelllang' is set and there is a
24830d6f5d97SBram Moolenaar 	// window for this buffer in which 'spell' is set.
2484860cae1cSBram Moolenaar 	if (*wp->w_s->b_p_spl != NUL)
24853982c541SBram Moolenaar 	{
2486860cae1cSBram Moolenaar 		if (wp->w_p_spell)
24873982c541SBram Moolenaar 		{
2488860cae1cSBram Moolenaar 		    (void)did_set_spelllang(wp);
24893982c541SBram Moolenaar 		    break;
24903982c541SBram Moolenaar 		}
24913982c541SBram Moolenaar 	}
2492402d2feaSBram Moolenaar     }
2493402d2feaSBram Moolenaar }
2494402d2feaSBram Moolenaar 
2495b765d634SBram Moolenaar /*
24964770d09aSBram Moolenaar  * Open a spell buffer.  This is a nameless buffer that is not in the buffer
24974770d09aSBram Moolenaar  * list and only contains text lines.  Can use a swapfile to reduce memory
24984770d09aSBram Moolenaar  * use.
24994770d09aSBram Moolenaar  * Most other fields are invalid!  Esp. watch out for string options being
25004770d09aSBram Moolenaar  * NULL and there is no undo info.
25014770d09aSBram Moolenaar  * Returns NULL when out of memory.
25024770d09aSBram Moolenaar  */
25039ccfebddSBram Moolenaar     buf_T *
open_spellbuf(void)2504764b23c8SBram Moolenaar open_spellbuf(void)
25054770d09aSBram Moolenaar {
25064770d09aSBram Moolenaar     buf_T	*buf;
25074770d09aSBram Moolenaar 
2508c799fe20SBram Moolenaar     buf = ALLOC_CLEAR_ONE(buf_T);
25094770d09aSBram Moolenaar     if (buf != NULL)
25104770d09aSBram Moolenaar     {
25114770d09aSBram Moolenaar 	buf->b_spell = TRUE;
25120d6f5d97SBram Moolenaar 	buf->b_p_swf = TRUE;	// may create a swap file
2513706d2de9SBram Moolenaar #ifdef FEAT_CRYPT
2514706d2de9SBram Moolenaar 	buf->b_p_key = empty_option;
2515706d2de9SBram Moolenaar #endif
25164770d09aSBram Moolenaar 	ml_open(buf);
25170d6f5d97SBram Moolenaar 	ml_open_file(buf);	// create swap file now
25184770d09aSBram Moolenaar     }
25194770d09aSBram Moolenaar     return buf;
25204770d09aSBram Moolenaar }
25214770d09aSBram Moolenaar 
25224770d09aSBram Moolenaar /*
25234770d09aSBram Moolenaar  * Close the buffer used for spell info.
25244770d09aSBram Moolenaar  */
25259ccfebddSBram Moolenaar     void
close_spellbuf(buf_T * buf)2526764b23c8SBram Moolenaar close_spellbuf(buf_T *buf)
25274770d09aSBram Moolenaar {
25284770d09aSBram Moolenaar     if (buf != NULL)
25294770d09aSBram Moolenaar     {
25304770d09aSBram Moolenaar 	ml_close(buf, TRUE);
25314770d09aSBram Moolenaar 	vim_free(buf);
25324770d09aSBram Moolenaar     }
25334770d09aSBram Moolenaar }
25344770d09aSBram Moolenaar 
2535cfc6c43cSBram Moolenaar /*
2536cfc6c43cSBram Moolenaar  * Init the chartab used for spelling for ASCII.
2537cfc6c43cSBram Moolenaar  * EBCDIC is not supported!
2538cfc6c43cSBram Moolenaar  */
25399ccfebddSBram Moolenaar     void
clear_spell_chartab(spelltab_T * sp)2540764b23c8SBram Moolenaar clear_spell_chartab(spelltab_T *sp)
2541cfc6c43cSBram Moolenaar {
2542cfc6c43cSBram Moolenaar     int		i;
2543cfc6c43cSBram Moolenaar 
2544a80faa89SBram Moolenaar     // Init everything to FALSE (zero).
2545a80faa89SBram Moolenaar     CLEAR_FIELD(sp->st_isw);
2546a80faa89SBram Moolenaar     CLEAR_FIELD(sp->st_isu);
2547cfc6c43cSBram Moolenaar     for (i = 0; i < 256; ++i)
25489f30f504SBram Moolenaar     {
2549cfc6c43cSBram Moolenaar 	sp->st_fold[i] = i;
25509f30f504SBram Moolenaar 	sp->st_upper[i] = i;
25519f30f504SBram Moolenaar     }
2552cfc6c43cSBram Moolenaar 
25530d6f5d97SBram Moolenaar     // We include digits.  A word shouldn't start with a digit, but handling
25540d6f5d97SBram Moolenaar     // that is done separately.
2555cfc6c43cSBram Moolenaar     for (i = '0'; i <= '9'; ++i)
2556cfc6c43cSBram Moolenaar 	sp->st_isw[i] = TRUE;
2557cfc6c43cSBram Moolenaar     for (i = 'A'; i <= 'Z'; ++i)
2558cfc6c43cSBram Moolenaar     {
2559cfc6c43cSBram Moolenaar 	sp->st_isw[i] = TRUE;
2560cfc6c43cSBram Moolenaar 	sp->st_isu[i] = TRUE;
2561cfc6c43cSBram Moolenaar 	sp->st_fold[i] = i + 0x20;
2562cfc6c43cSBram Moolenaar     }
2563cfc6c43cSBram Moolenaar     for (i = 'a'; i <= 'z'; ++i)
25649f30f504SBram Moolenaar     {
2565cfc6c43cSBram Moolenaar 	sp->st_isw[i] = TRUE;
25669f30f504SBram Moolenaar 	sp->st_upper[i] = i - 0x20;
25679f30f504SBram Moolenaar     }
2568cfc6c43cSBram Moolenaar }
2569cfc6c43cSBram Moolenaar 
2570cfc6c43cSBram Moolenaar /*
2571cfc6c43cSBram Moolenaar  * Init the chartab used for spelling.  Only depends on 'encoding'.
2572cfc6c43cSBram Moolenaar  * Called once while starting up and when 'encoding' changes.
2573cfc6c43cSBram Moolenaar  * The default is to use isalpha(), but the spell file should define the word
2574cfc6c43cSBram Moolenaar  * characters to make it possible that 'encoding' differs from the current
2575dfb9ac00SBram Moolenaar  * locale.  For utf-8 we don't use isalpha() but our own functions.
2576cfc6c43cSBram Moolenaar  */
2577cfc6c43cSBram Moolenaar     void
init_spell_chartab(void)2578764b23c8SBram Moolenaar init_spell_chartab(void)
2579cfc6c43cSBram Moolenaar {
2580cfc6c43cSBram Moolenaar     int	    i;
2581cfc6c43cSBram Moolenaar 
2582cfc6c43cSBram Moolenaar     did_set_spelltab = FALSE;
2583cfc6c43cSBram Moolenaar     clear_spell_chartab(&spelltab);
2584cfc6c43cSBram Moolenaar     if (enc_dbcs)
2585cfc6c43cSBram Moolenaar     {
25860d6f5d97SBram Moolenaar 	// DBCS: assume double-wide characters are word characters.
2587cfc6c43cSBram Moolenaar 	for (i = 128; i <= 255; ++i)
2588cfc6c43cSBram Moolenaar 	    if (MB_BYTE2LEN(i) == 2)
2589cfc6c43cSBram Moolenaar 		spelltab.st_isw[i] = TRUE;
2590cfc6c43cSBram Moolenaar     }
25919f30f504SBram Moolenaar     else if (enc_utf8)
25929f30f504SBram Moolenaar     {
25939f30f504SBram Moolenaar 	for (i = 128; i < 256; ++i)
25949f30f504SBram Moolenaar 	{
259554ab0f1eSBram Moolenaar 	    int f = utf_fold(i);
259654ab0f1eSBram Moolenaar 	    int u = utf_toupper(i);
259754ab0f1eSBram Moolenaar 
25989f30f504SBram Moolenaar 	    spelltab.st_isu[i] = utf_isupper(i);
25999f30f504SBram Moolenaar 	    spelltab.st_isw[i] = spelltab.st_isu[i] || utf_islower(i);
26000d6f5d97SBram Moolenaar 	    // The folded/upper-cased value is different between latin1 and
26010d6f5d97SBram Moolenaar 	    // utf8 for 0xb5, causing E763 for no good reason.  Use the latin1
26020d6f5d97SBram Moolenaar 	    // value for utf-8 to avoid this.
260354ab0f1eSBram Moolenaar 	    spelltab.st_fold[i] = (f < 256) ? f : i;
260454ab0f1eSBram Moolenaar 	    spelltab.st_upper[i] = (u < 256) ? u : i;
26059f30f504SBram Moolenaar 	}
26069f30f504SBram Moolenaar     }
2607cfc6c43cSBram Moolenaar     else
2608cfc6c43cSBram Moolenaar     {
26090d6f5d97SBram Moolenaar 	// Rough guess: use locale-dependent library functions.
2610cfc6c43cSBram Moolenaar 	for (i = 128; i < 256; ++i)
2611cfc6c43cSBram Moolenaar 	{
2612cfc6c43cSBram Moolenaar 	    if (MB_ISUPPER(i))
2613cfc6c43cSBram Moolenaar 	    {
26149f30f504SBram Moolenaar 		spelltab.st_isw[i] = TRUE;
2615cfc6c43cSBram Moolenaar 		spelltab.st_isu[i] = TRUE;
2616cfc6c43cSBram Moolenaar 		spelltab.st_fold[i] = MB_TOLOWER(i);
2617cfc6c43cSBram Moolenaar 	    }
26189f30f504SBram Moolenaar 	    else if (MB_ISLOWER(i))
26199f30f504SBram Moolenaar 	    {
26209f30f504SBram Moolenaar 		spelltab.st_isw[i] = TRUE;
26219f30f504SBram Moolenaar 		spelltab.st_upper[i] = MB_TOUPPER(i);
26229f30f504SBram Moolenaar 	    }
2623cfc6c43cSBram Moolenaar 	}
2624cfc6c43cSBram Moolenaar     }
2625cfc6c43cSBram Moolenaar }
2626cfc6c43cSBram Moolenaar 
2627cfc6c43cSBram Moolenaar 
2628cfc6c43cSBram Moolenaar /*
2629ea408854SBram Moolenaar  * Return TRUE if "p" points to a word character.
2630cf6bf39fSBram Moolenaar  * As a special case we see "midword" characters as word character when it is
2631ea408854SBram Moolenaar  * followed by a word character.  This finds they'there but not 'they there'.
2632cf6bf39fSBram Moolenaar  * Thus this only works properly when past the first character of the word.
2633ea408854SBram Moolenaar  */
263446a426c9SBram Moolenaar     int
spell_iswordp(char_u * p,win_T * wp)2635764b23c8SBram Moolenaar spell_iswordp(
2636764b23c8SBram Moolenaar     char_u	*p,
26370d6f5d97SBram Moolenaar     win_T	*wp)	    // buffer used
2638ea408854SBram Moolenaar {
2639cf6bf39fSBram Moolenaar     char_u	*s;
2640cf6bf39fSBram Moolenaar     int		l;
2641cf6bf39fSBram Moolenaar     int		c;
2642cf6bf39fSBram Moolenaar 
2643cf6bf39fSBram Moolenaar     if (has_mbyte)
2644cf6bf39fSBram Moolenaar     {
26451614a149SBram Moolenaar 	l = mb_ptr2len(p);
2646cf6bf39fSBram Moolenaar 	s = p;
2647cf6bf39fSBram Moolenaar 	if (l == 1)
2648cf6bf39fSBram Moolenaar 	{
26490d6f5d97SBram Moolenaar 	    // be quick for ASCII
2650860cae1cSBram Moolenaar 	    if (wp->w_s->b_spell_ismw[*p])
26510d6f5d97SBram Moolenaar 		s = p + 1;		// skip a mid-word character
2652cf6bf39fSBram Moolenaar 	}
2653cf6bf39fSBram Moolenaar 	else
2654cf6bf39fSBram Moolenaar 	{
2655cf6bf39fSBram Moolenaar 	    c = mb_ptr2char(p);
2656860cae1cSBram Moolenaar 	    if (c < 256 ? wp->w_s->b_spell_ismw[c]
2657860cae1cSBram Moolenaar 		    : (wp->w_s->b_spell_ismw_mb != NULL
2658860cae1cSBram Moolenaar 			   && vim_strchr(wp->w_s->b_spell_ismw_mb, c) != NULL))
2659cf6bf39fSBram Moolenaar 		s = p + l;
2660cf6bf39fSBram Moolenaar 	}
2661cf6bf39fSBram Moolenaar 
2662dfb9ac00SBram Moolenaar 	c = mb_ptr2char(s);
2663dfb9ac00SBram Moolenaar 	if (c > 255)
2664cc63c647SBram Moolenaar 	    return spell_mb_isword_class(mb_get_class(s), wp);
2665dfb9ac00SBram Moolenaar 	return spelltab.st_isw[c];
2666ea408854SBram Moolenaar     }
2667cf6bf39fSBram Moolenaar 
2668860cae1cSBram Moolenaar     return spelltab.st_isw[wp->w_s->b_spell_ismw[*p] ? p[1] : p[0]];
26699c96f592SBram Moolenaar }
26709c96f592SBram Moolenaar 
26719c96f592SBram Moolenaar /*
26729c96f592SBram Moolenaar  * Return TRUE if "p" points to a word character.
26739c96f592SBram Moolenaar  * Unlike spell_iswordp() this doesn't check for "midword" characters.
26749c96f592SBram Moolenaar  */
267546a426c9SBram Moolenaar     int
spell_iswordp_nmw(char_u * p,win_T * wp)2676764b23c8SBram Moolenaar spell_iswordp_nmw(char_u *p, win_T *wp)
26779c96f592SBram Moolenaar {
2678dfb9ac00SBram Moolenaar     int		c;
26799c96f592SBram Moolenaar 
2680dfb9ac00SBram Moolenaar     if (has_mbyte)
2681dfb9ac00SBram Moolenaar     {
2682dfb9ac00SBram Moolenaar 	c = mb_ptr2char(p);
2683dfb9ac00SBram Moolenaar 	if (c > 255)
2684cc63c647SBram Moolenaar 	    return spell_mb_isword_class(mb_get_class(p), wp);
2685dfb9ac00SBram Moolenaar 	return spelltab.st_isw[c];
2686dfb9ac00SBram Moolenaar     }
26879c96f592SBram Moolenaar     return spelltab.st_isw[*p];
2688cf6bf39fSBram Moolenaar }
2689ea408854SBram Moolenaar 
2690a1ba811aSBram Moolenaar /*
26917a91a4a1SBram Moolenaar  * Return TRUE if word class indicates a word character.
26927a91a4a1SBram Moolenaar  * Only for characters above 255.
26937a91a4a1SBram Moolenaar  * Unicode subscript and superscript are not considered word characters.
2694cc63c647SBram Moolenaar  * See also dbcs_class() and utf_class() in mbyte.c.
26957a91a4a1SBram Moolenaar  */
26967a91a4a1SBram Moolenaar     static int
spell_mb_isword_class(int cl,win_T * wp)2697764b23c8SBram Moolenaar spell_mb_isword_class(int cl, win_T *wp)
26987a91a4a1SBram Moolenaar {
2699cc63c647SBram Moolenaar     if (wp->w_s->b_cjk)
27000d6f5d97SBram Moolenaar 	// East Asian characters are not considered word characters.
2701cc63c647SBram Moolenaar 	return cl == 2 || cl == 0x2800;
270206e63770SBram Moolenaar     return cl >= 2 && cl != 0x2070 && cl != 0x2080 && cl != 3;
27037a91a4a1SBram Moolenaar }
27047a91a4a1SBram Moolenaar 
27057a91a4a1SBram Moolenaar /*
2706a1ba811aSBram Moolenaar  * Return TRUE if "p" points to a word character.
2707a1ba811aSBram Moolenaar  * Wide version of spell_iswordp().
2708a1ba811aSBram Moolenaar  */
2709a1ba811aSBram Moolenaar     static int
spell_iswordp_w(int * p,win_T * wp)2710764b23c8SBram Moolenaar spell_iswordp_w(int *p, win_T *wp)
2711a1ba811aSBram Moolenaar {
2712a1ba811aSBram Moolenaar     int		*s;
2713a1ba811aSBram Moolenaar 
2714860cae1cSBram Moolenaar     if (*p < 256 ? wp->w_s->b_spell_ismw[*p]
2715860cae1cSBram Moolenaar 		 : (wp->w_s->b_spell_ismw_mb != NULL
2716860cae1cSBram Moolenaar 			     && vim_strchr(wp->w_s->b_spell_ismw_mb, *p) != NULL))
2717a1ba811aSBram Moolenaar 	s = p + 1;
2718a1ba811aSBram Moolenaar     else
2719a1ba811aSBram Moolenaar 	s = p;
2720a1ba811aSBram Moolenaar 
2721dfb9ac00SBram Moolenaar     if (*s > 255)
2722a1ba811aSBram Moolenaar     {
2723a1ba811aSBram Moolenaar 	if (enc_utf8)
2724cc63c647SBram Moolenaar 	    return spell_mb_isword_class(utf_class(*s), wp);
2725a1ba811aSBram Moolenaar 	if (enc_dbcs)
2726cc63c647SBram Moolenaar 	    return spell_mb_isword_class(
2727cc63c647SBram Moolenaar 				dbcs_class((unsigned)*s >> 8, *s & 0xff), wp);
2728a1ba811aSBram Moolenaar 	return 0;
2729a1ba811aSBram Moolenaar     }
2730a1ba811aSBram Moolenaar     return spelltab.st_isw[*s];
2731a1ba811aSBram Moolenaar }
2732a1ba811aSBram Moolenaar 
2733ea408854SBram Moolenaar /*
27349f30f504SBram Moolenaar  * Case-fold "str[len]" into "buf[buflen]".  The result is NUL terminated.
27359f30f504SBram Moolenaar  * Uses the character definitions from the .spl file.
2736cfc6c43cSBram Moolenaar  * When using a multi-byte 'encoding' the length may change!
2737cfc6c43cSBram Moolenaar  * Returns FAIL when something wrong.
2738cfc6c43cSBram Moolenaar  */
27399ccfebddSBram Moolenaar     int
spell_casefold(win_T * wp,char_u * str,int len,char_u * buf,int buflen)2740764b23c8SBram Moolenaar spell_casefold(
27414f135275SBram Moolenaar     win_T	*wp,
2742764b23c8SBram Moolenaar     char_u	*str,
2743764b23c8SBram Moolenaar     int		len,
2744764b23c8SBram Moolenaar     char_u	*buf,
2745764b23c8SBram Moolenaar     int		buflen)
2746cfc6c43cSBram Moolenaar {
2747cfc6c43cSBram Moolenaar     int		i;
2748cfc6c43cSBram Moolenaar 
2749cfc6c43cSBram Moolenaar     if (len >= buflen)
2750cfc6c43cSBram Moolenaar     {
2751cfc6c43cSBram Moolenaar 	buf[0] = NUL;
27520d6f5d97SBram Moolenaar 	return FAIL;		// result will not fit
2753cfc6c43cSBram Moolenaar     }
2754cfc6c43cSBram Moolenaar 
2755cfc6c43cSBram Moolenaar     if (has_mbyte)
2756cfc6c43cSBram Moolenaar     {
2757cfc6c43cSBram Moolenaar 	int	outi = 0;
27589f30f504SBram Moolenaar 	char_u	*p;
27599f30f504SBram Moolenaar 	int	c;
2760cfc6c43cSBram Moolenaar 
27610d6f5d97SBram Moolenaar 	// Fold one character at a time.
27629f30f504SBram Moolenaar 	for (p = str; p < str + len; )
2763cfc6c43cSBram Moolenaar 	{
2764cfc6c43cSBram Moolenaar 	    if (outi + MB_MAXBYTES > buflen)
2765cfc6c43cSBram Moolenaar 	    {
2766cfc6c43cSBram Moolenaar 		buf[outi] = NUL;
2767cfc6c43cSBram Moolenaar 		return FAIL;
2768cfc6c43cSBram Moolenaar 	    }
27690fa313a7SBram Moolenaar 	    c = mb_cptr2char_adv(&p);
27704f135275SBram Moolenaar 
27714f135275SBram Moolenaar 	    // Exception: greek capital sigma 0x03A3 folds to 0x03C3, except
27724f135275SBram Moolenaar 	    // when it is the last character in a word, then it folds to
27734f135275SBram Moolenaar 	    // 0x03C2.
27744f135275SBram Moolenaar 	    if (c == 0x03a3 || c == 0x03c2)
27754f135275SBram Moolenaar 	    {
27764f135275SBram Moolenaar 		if (p == str + len || !spell_iswordp(p, wp))
27774f135275SBram Moolenaar 		    c = 0x03c2;
27784f135275SBram Moolenaar 		else
27794f135275SBram Moolenaar 		    c = 0x03c3;
27804f135275SBram Moolenaar 	    }
27814f135275SBram Moolenaar 	    else
27824f135275SBram Moolenaar 		c = SPELL_TOFOLD(c);
27834f135275SBram Moolenaar 
27844f135275SBram Moolenaar 	    outi += mb_char2bytes(c, buf + outi);
2785cfc6c43cSBram Moolenaar 	}
2786cfc6c43cSBram Moolenaar 	buf[outi] = NUL;
2787cfc6c43cSBram Moolenaar     }
2788cfc6c43cSBram Moolenaar     else
2789cfc6c43cSBram Moolenaar     {
27900d6f5d97SBram Moolenaar 	// Be quick for non-multibyte encodings.
2791cfc6c43cSBram Moolenaar 	for (i = 0; i < len; ++i)
27929f30f504SBram Moolenaar 	    buf[i] = spelltab.st_fold[str[i]];
2793cfc6c43cSBram Moolenaar 	buf[i] = NUL;
2794cfc6c43cSBram Moolenaar     }
2795cfc6c43cSBram Moolenaar 
2796cfc6c43cSBram Moolenaar     return OK;
2797cfc6c43cSBram Moolenaar }
2798cfc6c43cSBram Moolenaar 
2799d857f0e0SBram Moolenaar /*
28008b59de9fSBram Moolenaar  * Check if the word at line "lnum" column "col" is required to start with a
28018b59de9fSBram Moolenaar  * capital.  This uses 'spellcapcheck' of the current buffer.
28028b59de9fSBram Moolenaar  */
280346a426c9SBram Moolenaar     int
check_need_cap(linenr_T lnum,colnr_T col)2804764b23c8SBram Moolenaar check_need_cap(linenr_T lnum, colnr_T col)
28058b59de9fSBram Moolenaar {
28068b59de9fSBram Moolenaar     int		need_cap = FALSE;
28078b59de9fSBram Moolenaar     char_u	*line;
28088b59de9fSBram Moolenaar     char_u	*line_copy = NULL;
28098b59de9fSBram Moolenaar     char_u	*p;
28108b59de9fSBram Moolenaar     colnr_T	endcol;
28118b59de9fSBram Moolenaar     regmatch_T	regmatch;
28128b59de9fSBram Moolenaar 
2813860cae1cSBram Moolenaar     if (curwin->w_s->b_cap_prog == NULL)
28148b59de9fSBram Moolenaar 	return FALSE;
28158b59de9fSBram Moolenaar 
28168b59de9fSBram Moolenaar     line = ml_get_curline();
28178b59de9fSBram Moolenaar     endcol = 0;
2818e2e69e48SBram Moolenaar     if (getwhitecols(line) >= (int)col)
28198b59de9fSBram Moolenaar     {
28200d6f5d97SBram Moolenaar 	// At start of line, check if previous line is empty or sentence
28210d6f5d97SBram Moolenaar 	// ends there.
28228b59de9fSBram Moolenaar 	if (lnum == 1)
28238b59de9fSBram Moolenaar 	    need_cap = TRUE;
28248b59de9fSBram Moolenaar 	else
28258b59de9fSBram Moolenaar 	{
28268b59de9fSBram Moolenaar 	    line = ml_get(lnum - 1);
28278b59de9fSBram Moolenaar 	    if (*skipwhite(line) == NUL)
28288b59de9fSBram Moolenaar 		need_cap = TRUE;
28298b59de9fSBram Moolenaar 	    else
28308b59de9fSBram Moolenaar 	    {
28310d6f5d97SBram Moolenaar 		// Append a space in place of the line break.
28328b59de9fSBram Moolenaar 		line_copy = concat_str(line, (char_u *)" ");
28338b59de9fSBram Moolenaar 		line = line_copy;
2834a93fa7eeSBram Moolenaar 		endcol = (colnr_T)STRLEN(line);
28358b59de9fSBram Moolenaar 	    }
28368b59de9fSBram Moolenaar 	}
28378b59de9fSBram Moolenaar     }
28388b59de9fSBram Moolenaar     else
28398b59de9fSBram Moolenaar 	endcol = col;
28408b59de9fSBram Moolenaar 
28418b59de9fSBram Moolenaar     if (endcol > 0)
28428b59de9fSBram Moolenaar     {
28430d6f5d97SBram Moolenaar 	// Check if sentence ends before the bad word.
2844860cae1cSBram Moolenaar 	regmatch.regprog = curwin->w_s->b_cap_prog;
28458b59de9fSBram Moolenaar 	regmatch.rm_ic = FALSE;
28468b59de9fSBram Moolenaar 	p = line + endcol;
28478b59de9fSBram Moolenaar 	for (;;)
28488b59de9fSBram Moolenaar 	{
284991acfffcSBram Moolenaar 	    MB_PTR_BACK(line, p);
2850cc63c647SBram Moolenaar 	    if (p == line || spell_iswordp_nmw(p, curwin))
28518b59de9fSBram Moolenaar 		break;
28528b59de9fSBram Moolenaar 	    if (vim_regexec(&regmatch, p, 0)
28538b59de9fSBram Moolenaar 					 && regmatch.endp[0] == line + endcol)
28548b59de9fSBram Moolenaar 	    {
28558b59de9fSBram Moolenaar 		need_cap = TRUE;
28568b59de9fSBram Moolenaar 		break;
28578b59de9fSBram Moolenaar 	    }
28588b59de9fSBram Moolenaar 	}
2859dffa5b8eSBram Moolenaar 	curwin->w_s->b_cap_prog = regmatch.regprog;
28608b59de9fSBram Moolenaar     }
28618b59de9fSBram Moolenaar 
28628b59de9fSBram Moolenaar     vim_free(line_copy);
28638b59de9fSBram Moolenaar 
28648b59de9fSBram Moolenaar     return need_cap;
28658b59de9fSBram Moolenaar }
28668b59de9fSBram Moolenaar 
28678b59de9fSBram Moolenaar 
28688b59de9fSBram Moolenaar /*
2869a1ba811aSBram Moolenaar  * ":spellrepall"
2870a1ba811aSBram Moolenaar  */
2871a1ba811aSBram Moolenaar     void
ex_spellrepall(exarg_T * eap UNUSED)2872764b23c8SBram Moolenaar ex_spellrepall(exarg_T *eap UNUSED)
2873a1ba811aSBram Moolenaar {
2874a1ba811aSBram Moolenaar     pos_T	pos = curwin->w_cursor;
2875a1ba811aSBram Moolenaar     char_u	*frompat;
2876a1ba811aSBram Moolenaar     int		addlen;
2877a1ba811aSBram Moolenaar     char_u	*line;
2878a1ba811aSBram Moolenaar     char_u	*p;
2879a1ba811aSBram Moolenaar     int		save_ws = p_ws;
28805195e456SBram Moolenaar     linenr_T	prev_lnum = 0;
2881a1ba811aSBram Moolenaar 
2882a1ba811aSBram Moolenaar     if (repl_from == NULL || repl_to == NULL)
2883a1ba811aSBram Moolenaar     {
2884f9e3e09fSBram Moolenaar 	emsg(_("E752: No previous spell replacement"));
2885a1ba811aSBram Moolenaar 	return;
2886a1ba811aSBram Moolenaar     }
2887a93fa7eeSBram Moolenaar     addlen = (int)(STRLEN(repl_to) - STRLEN(repl_from));
2888a1ba811aSBram Moolenaar 
2889964b3746SBram Moolenaar     frompat = alloc(STRLEN(repl_from) + 7);
2890a1ba811aSBram Moolenaar     if (frompat == NULL)
2891a1ba811aSBram Moolenaar 	return;
2892a1ba811aSBram Moolenaar     sprintf((char *)frompat, "\\V\\<%s\\>", repl_from);
2893a1ba811aSBram Moolenaar     p_ws = FALSE;
2894a1ba811aSBram Moolenaar 
28955195e456SBram Moolenaar     sub_nsubs = 0;
28965195e456SBram Moolenaar     sub_nlines = 0;
2897a1ba811aSBram Moolenaar     curwin->w_cursor.lnum = 0;
2898a1ba811aSBram Moolenaar     while (!got_int)
2899a1ba811aSBram Moolenaar     {
2900c036e87bSBram Moolenaar 	if (do_search(NULL, '/', '/', frompat, 1L, SEARCH_KEEP, NULL) == 0
2901a1ba811aSBram Moolenaar 						   || u_save_cursor() == FAIL)
2902a1ba811aSBram Moolenaar 	    break;
2903a1ba811aSBram Moolenaar 
29040d6f5d97SBram Moolenaar 	// Only replace when the right word isn't there yet.  This happens
29050d6f5d97SBram Moolenaar 	// when changing "etc" to "etc.".
2906a1ba811aSBram Moolenaar 	line = ml_get_curline();
2907a1ba811aSBram Moolenaar 	if (addlen <= 0 || STRNCMP(line + curwin->w_cursor.col,
2908a1ba811aSBram Moolenaar 					       repl_to, STRLEN(repl_to)) != 0)
2909a1ba811aSBram Moolenaar 	{
2910964b3746SBram Moolenaar 	    p = alloc(STRLEN(line) + addlen + 1);
2911a1ba811aSBram Moolenaar 	    if (p == NULL)
2912a1ba811aSBram Moolenaar 		break;
2913a1ba811aSBram Moolenaar 	    mch_memmove(p, line, curwin->w_cursor.col);
2914a1ba811aSBram Moolenaar 	    STRCPY(p + curwin->w_cursor.col, repl_to);
2915a1ba811aSBram Moolenaar 	    STRCAT(p, line + curwin->w_cursor.col + STRLEN(repl_from));
2916a1ba811aSBram Moolenaar 	    ml_replace(curwin->w_cursor.lnum, p, FALSE);
2917a1ba811aSBram Moolenaar 	    changed_bytes(curwin->w_cursor.lnum, curwin->w_cursor.col);
29185195e456SBram Moolenaar 
29195195e456SBram Moolenaar 	    if (curwin->w_cursor.lnum != prev_lnum)
29205195e456SBram Moolenaar 	    {
29215195e456SBram Moolenaar 		++sub_nlines;
29225195e456SBram Moolenaar 		prev_lnum = curwin->w_cursor.lnum;
29235195e456SBram Moolenaar 	    }
29245195e456SBram Moolenaar 	    ++sub_nsubs;
2925a1ba811aSBram Moolenaar 	}
2926a93fa7eeSBram Moolenaar 	curwin->w_cursor.col += (colnr_T)STRLEN(repl_to);
2927a1ba811aSBram Moolenaar     }
2928a1ba811aSBram Moolenaar 
2929a1ba811aSBram Moolenaar     p_ws = save_ws;
2930a1ba811aSBram Moolenaar     curwin->w_cursor = pos;
2931a1ba811aSBram Moolenaar     vim_free(frompat);
2932a1ba811aSBram Moolenaar 
29335195e456SBram Moolenaar     if (sub_nsubs == 0)
2934f9e3e09fSBram Moolenaar 	semsg(_("E753: Not found: %s"), repl_from);
29355195e456SBram Moolenaar     else
29365195e456SBram Moolenaar 	do_sub_msg(FALSE);
2937a1ba811aSBram Moolenaar }
2938a1ba811aSBram Moolenaar 
2939a1ba811aSBram Moolenaar /*
29409f30f504SBram Moolenaar  * Make a copy of "word", with the first letter upper or lower cased, to
29419f30f504SBram Moolenaar  * "wcopy[MAXWLEN]".  "word" must not be empty.
29429f30f504SBram Moolenaar  * The result is NUL terminated.
29439ba0eb85SBram Moolenaar  */
29449ccfebddSBram Moolenaar     void
onecap_copy(char_u * word,char_u * wcopy,int upper)2945764b23c8SBram Moolenaar onecap_copy(
2946764b23c8SBram Moolenaar     char_u	*word,
2947764b23c8SBram Moolenaar     char_u	*wcopy,
29480d6f5d97SBram Moolenaar     int		upper)	    // TRUE: first letter made upper case
29499ba0eb85SBram Moolenaar {
29509ba0eb85SBram Moolenaar     char_u	*p;
29519ba0eb85SBram Moolenaar     int		c;
29529ba0eb85SBram Moolenaar     int		l;
29539ba0eb85SBram Moolenaar 
29549ba0eb85SBram Moolenaar     p = word;
29559ba0eb85SBram Moolenaar     if (has_mbyte)
29560fa313a7SBram Moolenaar 	c = mb_cptr2char_adv(&p);
29579ba0eb85SBram Moolenaar     else
29589ba0eb85SBram Moolenaar 	c = *p++;
29599ba0eb85SBram Moolenaar     if (upper)
29609f30f504SBram Moolenaar 	c = SPELL_TOUPPER(c);
29619ba0eb85SBram Moolenaar     else
29629f30f504SBram Moolenaar 	c = SPELL_TOFOLD(c);
29639ba0eb85SBram Moolenaar     if (has_mbyte)
29649ba0eb85SBram Moolenaar 	l = mb_char2bytes(c, wcopy);
29659ba0eb85SBram Moolenaar     else
29669ba0eb85SBram Moolenaar     {
29679ba0eb85SBram Moolenaar 	l = 1;
29689ba0eb85SBram Moolenaar 	wcopy[0] = c;
29699ba0eb85SBram Moolenaar     }
29709c96f592SBram Moolenaar     vim_strncpy(wcopy + l, p, MAXWLEN - l - 1);
29719ba0eb85SBram Moolenaar }
29729ba0eb85SBram Moolenaar 
29739ba0eb85SBram Moolenaar /*
29749f30f504SBram Moolenaar  * Make a copy of "word" with all the letters upper cased into
29759f30f504SBram Moolenaar  * "wcopy[MAXWLEN]".  The result is NUL terminated.
29769ba0eb85SBram Moolenaar  */
297746a426c9SBram Moolenaar     void
allcap_copy(char_u * word,char_u * wcopy)2978764b23c8SBram Moolenaar allcap_copy(char_u *word, char_u *wcopy)
29799ba0eb85SBram Moolenaar {
29809ba0eb85SBram Moolenaar     char_u	*s;
29819ba0eb85SBram Moolenaar     char_u	*d;
29829ba0eb85SBram Moolenaar     int		c;
29839ba0eb85SBram Moolenaar 
29849ba0eb85SBram Moolenaar     d = wcopy;
29859ba0eb85SBram Moolenaar     for (s = word; *s != NUL; )
29869ba0eb85SBram Moolenaar     {
29879ba0eb85SBram Moolenaar 	if (has_mbyte)
29880fa313a7SBram Moolenaar 	    c = mb_cptr2char_adv(&s);
29899ba0eb85SBram Moolenaar 	else
29909ba0eb85SBram Moolenaar 	    c = *s++;
29917862282fSBram Moolenaar 
29920d6f5d97SBram Moolenaar 	// We only change 0xdf to SS when we are certain latin1 is used.  It
29930d6f5d97SBram Moolenaar 	// would cause weird errors in other 8-bit encodings.
29947862282fSBram Moolenaar 	if (enc_latin1like && c == 0xdf)
29957862282fSBram Moolenaar 	{
29967862282fSBram Moolenaar 	    c = 'S';
29977862282fSBram Moolenaar 	    if (d - wcopy >= MAXWLEN - 1)
29987862282fSBram Moolenaar 		break;
29997862282fSBram Moolenaar 	    *d++ = c;
30007862282fSBram Moolenaar 	}
30017862282fSBram Moolenaar 	else
30029f30f504SBram Moolenaar 	    c = SPELL_TOUPPER(c);
30039ba0eb85SBram Moolenaar 
30049ba0eb85SBram Moolenaar 	if (has_mbyte)
30059ba0eb85SBram Moolenaar 	{
30069ba0eb85SBram Moolenaar 	    if (d - wcopy >= MAXWLEN - MB_MAXBYTES)
30079ba0eb85SBram Moolenaar 		break;
30089ba0eb85SBram Moolenaar 	    d += mb_char2bytes(c, d);
30099ba0eb85SBram Moolenaar 	}
30109ba0eb85SBram Moolenaar 	else
30119ba0eb85SBram Moolenaar 	{
30129ba0eb85SBram Moolenaar 	    if (d - wcopy >= MAXWLEN - 1)
30139ba0eb85SBram Moolenaar 		break;
30149ba0eb85SBram Moolenaar 	    *d++ = c;
30159ba0eb85SBram Moolenaar 	}
30169ba0eb85SBram Moolenaar     }
30179ba0eb85SBram Moolenaar     *d = NUL;
30189ba0eb85SBram Moolenaar }
30199ba0eb85SBram Moolenaar 
30209ba0eb85SBram Moolenaar /*
302153805d1eSBram Moolenaar  * Case-folding may change the number of bytes: Count nr of chars in
302253805d1eSBram Moolenaar  * fword[flen] and return the byte length of that many chars in "word".
302353805d1eSBram Moolenaar  */
302446a426c9SBram Moolenaar     int
nofold_len(char_u * fword,int flen,char_u * word)3025764b23c8SBram Moolenaar nofold_len(char_u *fword, int flen, char_u *word)
302653805d1eSBram Moolenaar {
302753805d1eSBram Moolenaar     char_u	*p;
302853805d1eSBram Moolenaar     int		i = 0;
302953805d1eSBram Moolenaar 
303091acfffcSBram Moolenaar     for (p = fword; p < fword + flen; MB_PTR_ADV(p))
303153805d1eSBram Moolenaar 	++i;
303291acfffcSBram Moolenaar     for (p = word; i > 0; MB_PTR_ADV(p))
303353805d1eSBram Moolenaar 	--i;
303453805d1eSBram Moolenaar     return (int)(p - word);
303553805d1eSBram Moolenaar }
303653805d1eSBram Moolenaar 
30379ba0eb85SBram Moolenaar /*
30389f30f504SBram Moolenaar  * Copy "fword" to "cword", fixing case according to "flags".
30399ba0eb85SBram Moolenaar  */
304046a426c9SBram Moolenaar     void
make_case_word(char_u * fword,char_u * cword,int flags)3041764b23c8SBram Moolenaar make_case_word(char_u *fword, char_u *cword, int flags)
30429ba0eb85SBram Moolenaar {
30439ba0eb85SBram Moolenaar     if (flags & WF_ALLCAP)
30440d6f5d97SBram Moolenaar 	// Make it all upper-case
30459ba0eb85SBram Moolenaar 	allcap_copy(fword, cword);
30469ba0eb85SBram Moolenaar     else if (flags & WF_ONECAP)
30470d6f5d97SBram Moolenaar 	// Make the first letter upper-case
30489f30f504SBram Moolenaar 	onecap_copy(fword, cword, TRUE);
30499ba0eb85SBram Moolenaar     else
30500d6f5d97SBram Moolenaar 	// Use goodword as-is.
30519ba0eb85SBram Moolenaar 	STRCPY(cword, fword);
30529ba0eb85SBram Moolenaar }
30539ba0eb85SBram Moolenaar 
3054a1ba811aSBram Moolenaar #if defined(FEAT_EVAL) || defined(PROTO)
3055a1ba811aSBram Moolenaar /*
3056a1ba811aSBram Moolenaar  * Soundfold a string, for soundfold().
3057a1ba811aSBram Moolenaar  * Result is in allocated memory, NULL for an error.
3058a1ba811aSBram Moolenaar  */
3059a1ba811aSBram Moolenaar     char_u *
eval_soundfold(char_u * word)3060764b23c8SBram Moolenaar eval_soundfold(char_u *word)
3061a1ba811aSBram Moolenaar {
3062a1ba811aSBram Moolenaar     langp_T	*lp;
3063a1ba811aSBram Moolenaar     char_u	sound[MAXWLEN];
3064ac6e65f8SBram Moolenaar     int		lpi;
3065a1ba811aSBram Moolenaar 
3066860cae1cSBram Moolenaar     if (curwin->w_p_spell && *curwin->w_s->b_p_spl != NUL)
30670d6f5d97SBram Moolenaar 	// Use the sound-folding of the first language that supports it.
3068860cae1cSBram Moolenaar 	for (lpi = 0; lpi < curwin->w_s->b_langp.ga_len; ++lpi)
3069ac6e65f8SBram Moolenaar 	{
3070860cae1cSBram Moolenaar 	    lp = LANGP_ENTRY(curwin->w_s->b_langp, lpi);
3071a1ba811aSBram Moolenaar 	    if (lp->lp_slang->sl_sal.ga_len > 0)
3072a1ba811aSBram Moolenaar 	    {
30730d6f5d97SBram Moolenaar 		// soundfold the word
307442eeac35SBram Moolenaar 		spell_soundfold(lp->lp_slang, word, FALSE, sound);
3075a1ba811aSBram Moolenaar 		return vim_strsave(sound);
3076a1ba811aSBram Moolenaar 	    }
3077ac6e65f8SBram Moolenaar 	}
3078a1ba811aSBram Moolenaar 
30790d6f5d97SBram Moolenaar     // No language with sound folding, return word as-is.
3080a1ba811aSBram Moolenaar     return vim_strsave(word);
3081a1ba811aSBram Moolenaar }
3082a1ba811aSBram Moolenaar #endif
3083a1ba811aSBram Moolenaar 
30849ba0eb85SBram Moolenaar /*
30859ba0eb85SBram Moolenaar  * Turn "inword" into its sound-a-like equivalent in "res[MAXWLEN]".
3086d12a1326SBram Moolenaar  *
3087d12a1326SBram Moolenaar  * There are many ways to turn a word into a sound-a-like representation.  The
3088d12a1326SBram Moolenaar  * oldest is Soundex (1918!).   A nice overview can be found in "Approximate
3089d12a1326SBram Moolenaar  * swedish name matching - survey and test of different algorithms" by Klas
3090d12a1326SBram Moolenaar  * Erikson.
3091d12a1326SBram Moolenaar  *
3092d12a1326SBram Moolenaar  * We support two methods:
3093d12a1326SBram Moolenaar  * 1. SOFOFROM/SOFOTO do a simple character mapping.
3094d12a1326SBram Moolenaar  * 2. SAL items define a more advanced sound-folding (and much slower).
30959ba0eb85SBram Moolenaar  */
30969ccfebddSBram Moolenaar     void
spell_soundfold(slang_T * slang,char_u * inword,int folded,char_u * res)3097764b23c8SBram Moolenaar spell_soundfold(
3098764b23c8SBram Moolenaar     slang_T	*slang,
3099764b23c8SBram Moolenaar     char_u	*inword,
31000d6f5d97SBram Moolenaar     int		folded,	    // "inword" is already case-folded
3101764b23c8SBram Moolenaar     char_u	*res)
310242eeac35SBram Moolenaar {
310342eeac35SBram Moolenaar     char_u	fword[MAXWLEN];
310442eeac35SBram Moolenaar     char_u	*word;
310542eeac35SBram Moolenaar 
310642eeac35SBram Moolenaar     if (slang->sl_sofo)
31070d6f5d97SBram Moolenaar 	// SOFOFROM and SOFOTO used
310842eeac35SBram Moolenaar 	spell_soundfold_sofo(slang, inword, res);
310942eeac35SBram Moolenaar     else
311042eeac35SBram Moolenaar     {
31110d6f5d97SBram Moolenaar 	// SAL items used.  Requires the word to be case-folded.
311242eeac35SBram Moolenaar 	if (folded)
311342eeac35SBram Moolenaar 	    word = inword;
311442eeac35SBram Moolenaar 	else
311542eeac35SBram Moolenaar 	{
31164f135275SBram Moolenaar 	    (void)spell_casefold(curwin,
31174f135275SBram Moolenaar 				  inword, (int)STRLEN(inword), fword, MAXWLEN);
311842eeac35SBram Moolenaar 	    word = fword;
311942eeac35SBram Moolenaar 	}
312042eeac35SBram Moolenaar 
312142eeac35SBram Moolenaar 	if (has_mbyte)
312242eeac35SBram Moolenaar 	    spell_soundfold_wsal(slang, word, res);
312342eeac35SBram Moolenaar 	else
312442eeac35SBram Moolenaar 	    spell_soundfold_sal(slang, word, res);
312542eeac35SBram Moolenaar     }
312642eeac35SBram Moolenaar }
312742eeac35SBram Moolenaar 
312842eeac35SBram Moolenaar /*
312942eeac35SBram Moolenaar  * Perform sound folding of "inword" into "res" according to SOFOFROM and
313042eeac35SBram Moolenaar  * SOFOTO lines.
313142eeac35SBram Moolenaar  */
313242eeac35SBram Moolenaar     static void
spell_soundfold_sofo(slang_T * slang,char_u * inword,char_u * res)3133764b23c8SBram Moolenaar spell_soundfold_sofo(slang_T *slang, char_u *inword, char_u *res)
313442eeac35SBram Moolenaar {
313542eeac35SBram Moolenaar     char_u	*s;
313642eeac35SBram Moolenaar     int		ri = 0;
313742eeac35SBram Moolenaar     int		c;
313842eeac35SBram Moolenaar 
313942eeac35SBram Moolenaar     if (has_mbyte)
314042eeac35SBram Moolenaar     {
314142eeac35SBram Moolenaar 	int	prevc = 0;
314242eeac35SBram Moolenaar 	int	*ip;
314342eeac35SBram Moolenaar 
31440d6f5d97SBram Moolenaar 	// The sl_sal_first[] table contains the translation for chars up to
31450d6f5d97SBram Moolenaar 	// 255, sl_sal the rest.
314642eeac35SBram Moolenaar 	for (s = inword; *s != NUL; )
314742eeac35SBram Moolenaar 	{
31480fa313a7SBram Moolenaar 	    c = mb_cptr2char_adv(&s);
31491c465444SBram Moolenaar 	    if (enc_utf8 ? utf_class(c) == 0 : VIM_ISWHITE(c))
315042eeac35SBram Moolenaar 		c = ' ';
315142eeac35SBram Moolenaar 	    else if (c < 256)
315242eeac35SBram Moolenaar 		c = slang->sl_sal_first[c];
315342eeac35SBram Moolenaar 	    else
315442eeac35SBram Moolenaar 	    {
315542eeac35SBram Moolenaar 		ip = ((int **)slang->sl_sal.ga_data)[c & 0xff];
31560d6f5d97SBram Moolenaar 		if (ip == NULL)		// empty list, can't match
315742eeac35SBram Moolenaar 		    c = NUL;
315842eeac35SBram Moolenaar 		else
31590d6f5d97SBram Moolenaar 		    for (;;)		// find "c" in the list
316042eeac35SBram Moolenaar 		    {
31610d6f5d97SBram Moolenaar 			if (*ip == 0)	// not found
316242eeac35SBram Moolenaar 			{
316342eeac35SBram Moolenaar 			    c = NUL;
316442eeac35SBram Moolenaar 			    break;
316542eeac35SBram Moolenaar 			}
31660d6f5d97SBram Moolenaar 			if (*ip == c)	// match!
316742eeac35SBram Moolenaar 			{
316842eeac35SBram Moolenaar 			    c = ip[1];
316942eeac35SBram Moolenaar 			    break;
317042eeac35SBram Moolenaar 			}
317142eeac35SBram Moolenaar 			ip += 2;
317242eeac35SBram Moolenaar 		    }
317342eeac35SBram Moolenaar 	    }
317442eeac35SBram Moolenaar 
317542eeac35SBram Moolenaar 	    if (c != NUL && c != prevc)
317642eeac35SBram Moolenaar 	    {
317742eeac35SBram Moolenaar 		ri += mb_char2bytes(c, res + ri);
317842eeac35SBram Moolenaar 		if (ri + MB_MAXBYTES > MAXWLEN)
317942eeac35SBram Moolenaar 		    break;
318042eeac35SBram Moolenaar 		prevc = c;
318142eeac35SBram Moolenaar 	    }
318242eeac35SBram Moolenaar 	}
318342eeac35SBram Moolenaar     }
318442eeac35SBram Moolenaar     else
318542eeac35SBram Moolenaar     {
31860d6f5d97SBram Moolenaar 	// The sl_sal_first[] table contains the translation.
318742eeac35SBram Moolenaar 	for (s = inword; (c = *s) != NUL; ++s)
318842eeac35SBram Moolenaar 	{
31891c465444SBram Moolenaar 	    if (VIM_ISWHITE(c))
319042eeac35SBram Moolenaar 		c = ' ';
319142eeac35SBram Moolenaar 	    else
319242eeac35SBram Moolenaar 		c = slang->sl_sal_first[c];
319342eeac35SBram Moolenaar 	    if (c != NUL && (ri == 0 || res[ri - 1] != c))
319442eeac35SBram Moolenaar 		res[ri++] = c;
319542eeac35SBram Moolenaar 	}
319642eeac35SBram Moolenaar     }
319742eeac35SBram Moolenaar 
319842eeac35SBram Moolenaar     res[ri] = NUL;
319942eeac35SBram Moolenaar }
320042eeac35SBram Moolenaar 
320142eeac35SBram Moolenaar     static void
spell_soundfold_sal(slang_T * slang,char_u * inword,char_u * res)3202764b23c8SBram Moolenaar spell_soundfold_sal(slang_T *slang, char_u *inword, char_u *res)
32039ba0eb85SBram Moolenaar {
3204d857f0e0SBram Moolenaar     salitem_T	*smp;
32059ba0eb85SBram Moolenaar     char_u	word[MAXWLEN];
320642eeac35SBram Moolenaar     char_u	*s = inword;
32079ba0eb85SBram Moolenaar     char_u	*t;
3208d857f0e0SBram Moolenaar     char_u	*pf;
32099ba0eb85SBram Moolenaar     int		i, j, z;
3210d857f0e0SBram Moolenaar     int		reslen;
32119ba0eb85SBram Moolenaar     int		n, k = 0;
32129ba0eb85SBram Moolenaar     int		z0;
32139ba0eb85SBram Moolenaar     int		k0;
32149ba0eb85SBram Moolenaar     int		n0;
32159ba0eb85SBram Moolenaar     int		c;
32169ba0eb85SBram Moolenaar     int		pri;
32179ba0eb85SBram Moolenaar     int		p0 = -333;
32189ba0eb85SBram Moolenaar     int		c0;
32199ba0eb85SBram Moolenaar 
32200d6f5d97SBram Moolenaar     // Remove accents, if wanted.  We actually remove all non-word characters.
32210d6f5d97SBram Moolenaar     // But keep white space.  We need a copy, the word may be changed here.
32229ba0eb85SBram Moolenaar     if (slang->sl_rem_accents)
32239ba0eb85SBram Moolenaar     {
32249ba0eb85SBram Moolenaar 	t = word;
322542eeac35SBram Moolenaar 	while (*s != NUL)
32269ba0eb85SBram Moolenaar 	{
32271c465444SBram Moolenaar 	    if (VIM_ISWHITE(*s))
3228d857f0e0SBram Moolenaar 	    {
3229d857f0e0SBram Moolenaar 		*t++ = ' ';
3230d857f0e0SBram Moolenaar 		s = skipwhite(s);
3231d857f0e0SBram Moolenaar 	    }
32329f30f504SBram Moolenaar 	    else
32339ba0eb85SBram Moolenaar 	    {
3234cc63c647SBram Moolenaar 		if (spell_iswordp_nmw(s, curwin))
32359ba0eb85SBram Moolenaar 		    *t++ = *s;
32369ba0eb85SBram Moolenaar 		++s;
32379ba0eb85SBram Moolenaar 	    }
32389ba0eb85SBram Moolenaar 	}
32399ba0eb85SBram Moolenaar 	*t = NUL;
32409ba0eb85SBram Moolenaar     }
32419ba0eb85SBram Moolenaar     else
3242ef9d6aa7SBram Moolenaar 	vim_strncpy(word, s, MAXWLEN - 1);
32439ba0eb85SBram Moolenaar 
3244d857f0e0SBram Moolenaar     smp = (salitem_T *)slang->sl_sal.ga_data;
32459ba0eb85SBram Moolenaar 
32469ba0eb85SBram Moolenaar     /*
32479ba0eb85SBram Moolenaar      * This comes from Aspell phonet.cpp.  Converted from C++ to C.
32489f30f504SBram Moolenaar      * Changed to keep spaces.
32499ba0eb85SBram Moolenaar      */
3250d857f0e0SBram Moolenaar     i = reslen = z = 0;
32519ba0eb85SBram Moolenaar     while ((c = word[i]) != NUL)
32529ba0eb85SBram Moolenaar     {
32530d6f5d97SBram Moolenaar 	// Start with the first rule that has the character in the word.
32549ba0eb85SBram Moolenaar 	n = slang->sl_sal_first[c];
32559ba0eb85SBram Moolenaar 	z0 = 0;
32569ba0eb85SBram Moolenaar 
32579ba0eb85SBram Moolenaar 	if (n >= 0)
32589ba0eb85SBram Moolenaar 	{
32590d6f5d97SBram Moolenaar 	    // check all rules for the same letter
3260d857f0e0SBram Moolenaar 	    for (; (s = smp[n].sm_lead)[0] == c; ++n)
32619ba0eb85SBram Moolenaar 	    {
32620d6f5d97SBram Moolenaar 		// Quickly skip entries that don't match the word.  Most
32630d6f5d97SBram Moolenaar 		// entries are less then three chars, optimize for that.
3264d857f0e0SBram Moolenaar 		k = smp[n].sm_leadlen;
3265d857f0e0SBram Moolenaar 		if (k > 1)
32669ba0eb85SBram Moolenaar 		{
3267d857f0e0SBram Moolenaar 		    if (word[i + 1] != s[1])
3268d857f0e0SBram Moolenaar 			continue;
3269d857f0e0SBram Moolenaar 		    if (k > 2)
3270d857f0e0SBram Moolenaar 		    {
3271d857f0e0SBram Moolenaar 			for (j = 2; j < k; ++j)
3272d857f0e0SBram Moolenaar 			    if (word[i + j] != s[j])
3273d857f0e0SBram Moolenaar 				break;
3274d857f0e0SBram Moolenaar 			if (j < k)
3275d857f0e0SBram Moolenaar 			    continue;
3276d857f0e0SBram Moolenaar 		    }
32779ba0eb85SBram Moolenaar 		}
32789ba0eb85SBram Moolenaar 
327942eeac35SBram Moolenaar 		if ((pf = smp[n].sm_oneof) != NULL)
32809ba0eb85SBram Moolenaar 		{
32810d6f5d97SBram Moolenaar 		    // Check for match with one of the chars in "sm_oneof".
3282d857f0e0SBram Moolenaar 		    while (*pf != NUL && *pf != word[i + k])
3283d857f0e0SBram Moolenaar 			++pf;
3284d857f0e0SBram Moolenaar 		    if (*pf == NUL)
3285d857f0e0SBram Moolenaar 			continue;
32869ba0eb85SBram Moolenaar 		    ++k;
32879ba0eb85SBram Moolenaar 		}
3288d857f0e0SBram Moolenaar 		s = smp[n].sm_rules;
32890d6f5d97SBram Moolenaar 		pri = 5;    // default priority
32909ba0eb85SBram Moolenaar 
32919ba0eb85SBram Moolenaar 		p0 = *s;
32929ba0eb85SBram Moolenaar 		k0 = k;
32939ba0eb85SBram Moolenaar 		while (*s == '-' && k > 1)
32949ba0eb85SBram Moolenaar 		{
32959ba0eb85SBram Moolenaar 		    k--;
32969ba0eb85SBram Moolenaar 		    s++;
32979ba0eb85SBram Moolenaar 		}
32989ba0eb85SBram Moolenaar 		if (*s == '<')
32999ba0eb85SBram Moolenaar 		    s++;
3300d857f0e0SBram Moolenaar 		if (VIM_ISDIGIT(*s))
33019ba0eb85SBram Moolenaar 		{
33020d6f5d97SBram Moolenaar 		    // determine priority
33039ba0eb85SBram Moolenaar 		    pri = *s - '0';
33049ba0eb85SBram Moolenaar 		    s++;
33059ba0eb85SBram Moolenaar 		}
33069ba0eb85SBram Moolenaar 		if (*s == '^' && *(s + 1) == '^')
33079ba0eb85SBram Moolenaar 		    s++;
33089ba0eb85SBram Moolenaar 
33099ba0eb85SBram Moolenaar 		if (*s == NUL
33109ba0eb85SBram Moolenaar 			|| (*s == '^'
33119f30f504SBram Moolenaar 			    && (i == 0 || !(word[i - 1] == ' '
3312860cae1cSBram Moolenaar 				      || spell_iswordp(word + i - 1, curwin)))
33139ba0eb85SBram Moolenaar 			    && (*(s + 1) != '$'
3314860cae1cSBram Moolenaar 				|| (!spell_iswordp(word + i + k0, curwin))))
33159ba0eb85SBram Moolenaar 			|| (*s == '$' && i > 0
3316860cae1cSBram Moolenaar 			    && spell_iswordp(word + i - 1, curwin)
3317860cae1cSBram Moolenaar 			    && (!spell_iswordp(word + i + k0, curwin))))
33189ba0eb85SBram Moolenaar 		{
33190d6f5d97SBram Moolenaar 		    // search for followup rules, if:
33200d6f5d97SBram Moolenaar 		    // followup and k > 1  and  NO '-' in searchstring
33219ba0eb85SBram Moolenaar 		    c0 = word[i + k - 1];
33229ba0eb85SBram Moolenaar 		    n0 = slang->sl_sal_first[c0];
33239ba0eb85SBram Moolenaar 
33249ba0eb85SBram Moolenaar 		    if (slang->sl_followup && k > 1 && n0 >= 0
33259ba0eb85SBram Moolenaar 					   && p0 != '-' && word[i + k] != NUL)
33269ba0eb85SBram Moolenaar 		    {
33270d6f5d97SBram Moolenaar 			// test follow-up rule for "word[i + k]"
3328d857f0e0SBram Moolenaar 			for ( ; (s = smp[n0].sm_lead)[0] == c0; ++n0)
33299ba0eb85SBram Moolenaar 			{
33300d6f5d97SBram Moolenaar 			    // Quickly skip entries that don't match the word.
33310d6f5d97SBram Moolenaar 			    //
3332d857f0e0SBram Moolenaar 			    k0 = smp[n0].sm_leadlen;
3333d857f0e0SBram Moolenaar 			    if (k0 > 1)
3334d857f0e0SBram Moolenaar 			    {
3335d857f0e0SBram Moolenaar 				if (word[i + k] != s[1])
3336d857f0e0SBram Moolenaar 				    continue;
3337d857f0e0SBram Moolenaar 				if (k0 > 2)
3338d857f0e0SBram Moolenaar 				{
3339d857f0e0SBram Moolenaar 				    pf = word + i + k + 1;
3340d857f0e0SBram Moolenaar 				    for (j = 2; j < k0; ++j)
3341d857f0e0SBram Moolenaar 					if (*pf++ != s[j])
3342d857f0e0SBram Moolenaar 					    break;
3343d857f0e0SBram Moolenaar 				    if (j < k0)
3344d857f0e0SBram Moolenaar 					continue;
3345d857f0e0SBram Moolenaar 				}
3346d857f0e0SBram Moolenaar 			    }
3347d857f0e0SBram Moolenaar 			    k0 += k - 1;
33489ba0eb85SBram Moolenaar 
334942eeac35SBram Moolenaar 			    if ((pf = smp[n0].sm_oneof) != NULL)
33509ba0eb85SBram Moolenaar 			    {
33510d6f5d97SBram Moolenaar 				// Check for match with one of the chars in
33520d6f5d97SBram Moolenaar 				// "sm_oneof".
3353d857f0e0SBram Moolenaar 				while (*pf != NUL && *pf != word[i + k0])
3354d857f0e0SBram Moolenaar 				    ++pf;
3355d857f0e0SBram Moolenaar 				if (*pf == NUL)
3356d857f0e0SBram Moolenaar 				    continue;
33579ba0eb85SBram Moolenaar 				++k0;
33589ba0eb85SBram Moolenaar 			    }
3359d857f0e0SBram Moolenaar 
3360d857f0e0SBram Moolenaar 			    p0 = 5;
3361d857f0e0SBram Moolenaar 			    s = smp[n0].sm_rules;
33629ba0eb85SBram Moolenaar 			    while (*s == '-')
33639ba0eb85SBram Moolenaar 			    {
33640d6f5d97SBram Moolenaar 				// "k0" gets NOT reduced because
33650d6f5d97SBram Moolenaar 				// "if (k0 == k)"
33669ba0eb85SBram Moolenaar 				s++;
33679ba0eb85SBram Moolenaar 			    }
33689ba0eb85SBram Moolenaar 			    if (*s == '<')
33699ba0eb85SBram Moolenaar 				s++;
3370d857f0e0SBram Moolenaar 			    if (VIM_ISDIGIT(*s))
33719ba0eb85SBram Moolenaar 			    {
33729ba0eb85SBram Moolenaar 				p0 = *s - '0';
33739ba0eb85SBram Moolenaar 				s++;
33749ba0eb85SBram Moolenaar 			    }
33759ba0eb85SBram Moolenaar 
33769ba0eb85SBram Moolenaar 			    if (*s == NUL
33770d6f5d97SBram Moolenaar 				    // *s == '^' cuts
33789ba0eb85SBram Moolenaar 				    || (*s == '$'
33799c96f592SBram Moolenaar 					    && !spell_iswordp(word + i + k0,
3380860cae1cSBram Moolenaar 								     curwin)))
33819ba0eb85SBram Moolenaar 			    {
33829ba0eb85SBram Moolenaar 				if (k0 == k)
33830d6f5d97SBram Moolenaar 				    // this is just a piece of the string
33849ba0eb85SBram Moolenaar 				    continue;
33859ba0eb85SBram Moolenaar 
33869ba0eb85SBram Moolenaar 				if (p0 < pri)
33870d6f5d97SBram Moolenaar 				    // priority too low
33889ba0eb85SBram Moolenaar 				    continue;
33890d6f5d97SBram Moolenaar 				// rule fits; stop search
33909ba0eb85SBram Moolenaar 				break;
33919ba0eb85SBram Moolenaar 			    }
33929ba0eb85SBram Moolenaar 			}
33939ba0eb85SBram Moolenaar 
3394d857f0e0SBram Moolenaar 			if (p0 >= pri && smp[n0].sm_lead[0] == c0)
33959ba0eb85SBram Moolenaar 			    continue;
33969ba0eb85SBram Moolenaar 		    }
33979ba0eb85SBram Moolenaar 
33980d6f5d97SBram Moolenaar 		    // replace string
3399d857f0e0SBram Moolenaar 		    s = smp[n].sm_to;
34000dc065eeSBram Moolenaar 		    if (s == NULL)
34010dc065eeSBram Moolenaar 			s = (char_u *)"";
3402d857f0e0SBram Moolenaar 		    pf = smp[n].sm_rules;
3403d857f0e0SBram Moolenaar 		    p0 = (vim_strchr(pf, '<') != NULL) ? 1 : 0;
34049ba0eb85SBram Moolenaar 		    if (p0 == 1 && z == 0)
34059ba0eb85SBram Moolenaar 		    {
34060d6f5d97SBram Moolenaar 			// rule with '<' is used
3407d857f0e0SBram Moolenaar 			if (reslen > 0 && *s != NUL && (res[reslen - 1] == c
3408d857f0e0SBram Moolenaar 						    || res[reslen - 1] == *s))
3409d857f0e0SBram Moolenaar 			    reslen--;
34109ba0eb85SBram Moolenaar 			z0 = 1;
34119ba0eb85SBram Moolenaar 			z = 1;
34129ba0eb85SBram Moolenaar 			k0 = 0;
34139ba0eb85SBram Moolenaar 			while (*s != NUL && word[i + k0] != NUL)
34149ba0eb85SBram Moolenaar 			{
34159ba0eb85SBram Moolenaar 			    word[i + k0] = *s;
34169ba0eb85SBram Moolenaar 			    k0++;
34179ba0eb85SBram Moolenaar 			    s++;
34189ba0eb85SBram Moolenaar 			}
34199ba0eb85SBram Moolenaar 			if (k > k0)
3420a7241f5fSBram Moolenaar 			    STRMOVE(word + i + k0, word + i + k);
34219ba0eb85SBram Moolenaar 
34220d6f5d97SBram Moolenaar 			// new "actual letter"
34239ba0eb85SBram Moolenaar 			c = word[i];
34249ba0eb85SBram Moolenaar 		    }
34259ba0eb85SBram Moolenaar 		    else
34269ba0eb85SBram Moolenaar 		    {
34270d6f5d97SBram Moolenaar 			// no '<' rule used
34289ba0eb85SBram Moolenaar 			i += k - 1;
34299ba0eb85SBram Moolenaar 			z = 0;
3430d857f0e0SBram Moolenaar 			while (*s != NUL && s[1] != NUL && reslen < MAXWLEN)
34319ba0eb85SBram Moolenaar 			{
3432d857f0e0SBram Moolenaar 			    if (reslen == 0 || res[reslen - 1] != *s)
3433a1ba811aSBram Moolenaar 				res[reslen++] = *s;
34349ba0eb85SBram Moolenaar 			    s++;
34359ba0eb85SBram Moolenaar 			}
34360d6f5d97SBram Moolenaar 			// new "actual letter"
34379ba0eb85SBram Moolenaar 			c = *s;
3438d857f0e0SBram Moolenaar 			if (strstr((char *)pf, "^^") != NULL)
34399ba0eb85SBram Moolenaar 			{
34409ba0eb85SBram Moolenaar 			    if (c != NUL)
3441a1ba811aSBram Moolenaar 				res[reslen++] = c;
3442a7241f5fSBram Moolenaar 			    STRMOVE(word, word + i + 1);
34439ba0eb85SBram Moolenaar 			    i = 0;
34449ba0eb85SBram Moolenaar 			    z0 = 1;
34459ba0eb85SBram Moolenaar 			}
34469ba0eb85SBram Moolenaar 		    }
34479ba0eb85SBram Moolenaar 		    break;
34489ba0eb85SBram Moolenaar 		}
34499ba0eb85SBram Moolenaar 	    }
34509ba0eb85SBram Moolenaar 	}
34511c465444SBram Moolenaar 	else if (VIM_ISWHITE(c))
34529f30f504SBram Moolenaar 	{
34539f30f504SBram Moolenaar 	    c = ' ';
34549f30f504SBram Moolenaar 	    k = 1;
34559f30f504SBram Moolenaar 	}
34569ba0eb85SBram Moolenaar 
34579ba0eb85SBram Moolenaar 	if (z0 == 0)
34589ba0eb85SBram Moolenaar 	{
3459d857f0e0SBram Moolenaar 	    if (k && !p0 && reslen < MAXWLEN && c != NUL
3460d857f0e0SBram Moolenaar 		    && (!slang->sl_collapse || reslen == 0
3461d857f0e0SBram Moolenaar 						     || res[reslen - 1] != c))
34620d6f5d97SBram Moolenaar 		// condense only double letters
3463a1ba811aSBram Moolenaar 		res[reslen++] = c;
34649ba0eb85SBram Moolenaar 
34659ba0eb85SBram Moolenaar 	    i++;
34669ba0eb85SBram Moolenaar 	    z = 0;
34679ba0eb85SBram Moolenaar 	    k = 0;
34689ba0eb85SBram Moolenaar 	}
34699ba0eb85SBram Moolenaar     }
34709ba0eb85SBram Moolenaar 
3471d857f0e0SBram Moolenaar     res[reslen] = NUL;
34729ba0eb85SBram Moolenaar }
34739ba0eb85SBram Moolenaar 
3474a1ba811aSBram Moolenaar /*
3475a1ba811aSBram Moolenaar  * Turn "inword" into its sound-a-like equivalent in "res[MAXWLEN]".
3476a1ba811aSBram Moolenaar  * Multi-byte version of spell_soundfold().
3477a1ba811aSBram Moolenaar  */
3478a1ba811aSBram Moolenaar     static void
spell_soundfold_wsal(slang_T * slang,char_u * inword,char_u * res)3479764b23c8SBram Moolenaar spell_soundfold_wsal(slang_T *slang, char_u *inword, char_u *res)
3480a1ba811aSBram Moolenaar {
348142eeac35SBram Moolenaar     salitem_T	*smp = (salitem_T *)slang->sl_sal.ga_data;
3482a1ba811aSBram Moolenaar     int		word[MAXWLEN];
3483a1ba811aSBram Moolenaar     int		wres[MAXWLEN];
3484a1ba811aSBram Moolenaar     int		l;
3485a1ba811aSBram Moolenaar     char_u	*s;
3486a1ba811aSBram Moolenaar     int		*ws;
3487a1ba811aSBram Moolenaar     char_u	*t;
3488a1ba811aSBram Moolenaar     int		*pf;
3489a1ba811aSBram Moolenaar     int		i, j, z;
3490a1ba811aSBram Moolenaar     int		reslen;
3491a1ba811aSBram Moolenaar     int		n, k = 0;
3492a1ba811aSBram Moolenaar     int		z0;
3493a1ba811aSBram Moolenaar     int		k0;
3494a1ba811aSBram Moolenaar     int		n0;
3495a1ba811aSBram Moolenaar     int		c;
3496a1ba811aSBram Moolenaar     int		pri;
3497a1ba811aSBram Moolenaar     int		p0 = -333;
3498a1ba811aSBram Moolenaar     int		c0;
3499a1ba811aSBram Moolenaar     int		did_white = FALSE;
3500f9de140eSBram Moolenaar     int		wordlen;
3501f9de140eSBram Moolenaar 
3502a1ba811aSBram Moolenaar 
3503a1ba811aSBram Moolenaar     /*
3504a1ba811aSBram Moolenaar      * Convert the multi-byte string to a wide-character string.
3505a1ba811aSBram Moolenaar      * Remove accents, if wanted.  We actually remove all non-word characters.
3506a1ba811aSBram Moolenaar      * But keep white space.
3507a1ba811aSBram Moolenaar      */
3508f9de140eSBram Moolenaar     wordlen = 0;
3509a1ba811aSBram Moolenaar     for (s = inword; *s != NUL; )
3510a1ba811aSBram Moolenaar     {
3511a1ba811aSBram Moolenaar 	t = s;
35120fa313a7SBram Moolenaar 	c = mb_cptr2char_adv(&s);
3513a1ba811aSBram Moolenaar 	if (slang->sl_rem_accents)
3514a1ba811aSBram Moolenaar 	{
35151c465444SBram Moolenaar 	    if (enc_utf8 ? utf_class(c) == 0 : VIM_ISWHITE(c))
3516a1ba811aSBram Moolenaar 	    {
3517a1ba811aSBram Moolenaar 		if (did_white)
3518a1ba811aSBram Moolenaar 		    continue;
3519a1ba811aSBram Moolenaar 		c = ' ';
3520a1ba811aSBram Moolenaar 		did_white = TRUE;
3521a1ba811aSBram Moolenaar 	    }
3522a1ba811aSBram Moolenaar 	    else
3523a1ba811aSBram Moolenaar 	    {
3524a1ba811aSBram Moolenaar 		did_white = FALSE;
3525cc63c647SBram Moolenaar 		if (!spell_iswordp_nmw(t, curwin))
3526a1ba811aSBram Moolenaar 		    continue;
3527a1ba811aSBram Moolenaar 	    }
3528a1ba811aSBram Moolenaar 	}
3529f9de140eSBram Moolenaar 	word[wordlen++] = c;
3530a1ba811aSBram Moolenaar     }
3531f9de140eSBram Moolenaar     word[wordlen] = NUL;
3532a1ba811aSBram Moolenaar 
3533a1ba811aSBram Moolenaar     /*
3534f9de140eSBram Moolenaar      * This algorithm comes from Aspell phonet.cpp.
3535a1ba811aSBram Moolenaar      * Converted from C++ to C.  Added support for multi-byte chars.
3536a1ba811aSBram Moolenaar      * Changed to keep spaces.
3537a1ba811aSBram Moolenaar      */
3538a1ba811aSBram Moolenaar     i = reslen = z = 0;
3539a1ba811aSBram Moolenaar     while ((c = word[i]) != NUL)
3540a1ba811aSBram Moolenaar     {
35410d6f5d97SBram Moolenaar 	// Start with the first rule that has the character in the word.
3542a1ba811aSBram Moolenaar 	n = slang->sl_sal_first[c & 0xff];
3543a1ba811aSBram Moolenaar 	z0 = 0;
3544a1ba811aSBram Moolenaar 
3545a1ba811aSBram Moolenaar 	if (n >= 0)
3546a1ba811aSBram Moolenaar 	{
35470d6f5d97SBram Moolenaar 	    // Check all rules for the same index byte.
35480d6f5d97SBram Moolenaar 	    // If c is 0x300 need extra check for the end of the array, as
35490d6f5d97SBram Moolenaar 	    // (c & 0xff) is NUL.
355095e8579eSBram Moolenaar 	    for (; ((ws = smp[n].sm_lead_w)[0] & 0xff) == (c & 0xff)
355195e8579eSBram Moolenaar 							 && ws[0] != NUL; ++n)
3552a1ba811aSBram Moolenaar 	    {
35530d6f5d97SBram Moolenaar 		// Quickly skip entries that don't match the word.  Most
35540d6f5d97SBram Moolenaar 		// entries are less then three chars, optimize for that.
355542eeac35SBram Moolenaar 		if (c != ws[0])
355642eeac35SBram Moolenaar 		    continue;
3557a1ba811aSBram Moolenaar 		k = smp[n].sm_leadlen;
3558a1ba811aSBram Moolenaar 		if (k > 1)
3559a1ba811aSBram Moolenaar 		{
3560a1ba811aSBram Moolenaar 		    if (word[i + 1] != ws[1])
3561a1ba811aSBram Moolenaar 			continue;
3562a1ba811aSBram Moolenaar 		    if (k > 2)
3563a1ba811aSBram Moolenaar 		    {
3564a1ba811aSBram Moolenaar 			for (j = 2; j < k; ++j)
3565a1ba811aSBram Moolenaar 			    if (word[i + j] != ws[j])
3566a1ba811aSBram Moolenaar 				break;
3567a1ba811aSBram Moolenaar 			if (j < k)
3568a1ba811aSBram Moolenaar 			    continue;
3569a1ba811aSBram Moolenaar 		    }
3570a1ba811aSBram Moolenaar 		}
3571a1ba811aSBram Moolenaar 
357242eeac35SBram Moolenaar 		if ((pf = smp[n].sm_oneof_w) != NULL)
3573a1ba811aSBram Moolenaar 		{
35740d6f5d97SBram Moolenaar 		    // Check for match with one of the chars in "sm_oneof".
3575a1ba811aSBram Moolenaar 		    while (*pf != NUL && *pf != word[i + k])
3576a1ba811aSBram Moolenaar 			++pf;
3577a1ba811aSBram Moolenaar 		    if (*pf == NUL)
3578a1ba811aSBram Moolenaar 			continue;
3579a1ba811aSBram Moolenaar 		    ++k;
3580a1ba811aSBram Moolenaar 		}
3581a1ba811aSBram Moolenaar 		s = smp[n].sm_rules;
35820d6f5d97SBram Moolenaar 		pri = 5;    // default priority
3583a1ba811aSBram Moolenaar 
3584a1ba811aSBram Moolenaar 		p0 = *s;
3585a1ba811aSBram Moolenaar 		k0 = k;
3586a1ba811aSBram Moolenaar 		while (*s == '-' && k > 1)
3587a1ba811aSBram Moolenaar 		{
3588a1ba811aSBram Moolenaar 		    k--;
3589a1ba811aSBram Moolenaar 		    s++;
3590a1ba811aSBram Moolenaar 		}
3591a1ba811aSBram Moolenaar 		if (*s == '<')
3592a1ba811aSBram Moolenaar 		    s++;
3593a1ba811aSBram Moolenaar 		if (VIM_ISDIGIT(*s))
3594a1ba811aSBram Moolenaar 		{
35950d6f5d97SBram Moolenaar 		    // determine priority
3596a1ba811aSBram Moolenaar 		    pri = *s - '0';
3597a1ba811aSBram Moolenaar 		    s++;
3598a1ba811aSBram Moolenaar 		}
3599a1ba811aSBram Moolenaar 		if (*s == '^' && *(s + 1) == '^')
3600a1ba811aSBram Moolenaar 		    s++;
3601a1ba811aSBram Moolenaar 
3602a1ba811aSBram Moolenaar 		if (*s == NUL
3603a1ba811aSBram Moolenaar 			|| (*s == '^'
3604a1ba811aSBram Moolenaar 			    && (i == 0 || !(word[i - 1] == ' '
3605860cae1cSBram Moolenaar 				    || spell_iswordp_w(word + i - 1, curwin)))
3606a1ba811aSBram Moolenaar 			    && (*(s + 1) != '$'
3607860cae1cSBram Moolenaar 				|| (!spell_iswordp_w(word + i + k0, curwin))))
3608a1ba811aSBram Moolenaar 			|| (*s == '$' && i > 0
3609860cae1cSBram Moolenaar 			    && spell_iswordp_w(word + i - 1, curwin)
3610860cae1cSBram Moolenaar 			    && (!spell_iswordp_w(word + i + k0, curwin))))
3611a1ba811aSBram Moolenaar 		{
36120d6f5d97SBram Moolenaar 		    // search for followup rules, if:
36130d6f5d97SBram Moolenaar 		    // followup and k > 1  and  NO '-' in searchstring
3614a1ba811aSBram Moolenaar 		    c0 = word[i + k - 1];
3615a1ba811aSBram Moolenaar 		    n0 = slang->sl_sal_first[c0 & 0xff];
3616a1ba811aSBram Moolenaar 
3617a1ba811aSBram Moolenaar 		    if (slang->sl_followup && k > 1 && n0 >= 0
3618a1ba811aSBram Moolenaar 					   && p0 != '-' && word[i + k] != NUL)
3619a1ba811aSBram Moolenaar 		    {
36200d6f5d97SBram Moolenaar 			// Test follow-up rule for "word[i + k]"; loop over
36210d6f5d97SBram Moolenaar 			// all entries with the same index byte.
3622a1ba811aSBram Moolenaar 			for ( ; ((ws = smp[n0].sm_lead_w)[0] & 0xff)
3623a1ba811aSBram Moolenaar 							 == (c0 & 0xff); ++n0)
3624a1ba811aSBram Moolenaar 			{
36250d6f5d97SBram Moolenaar 			    // Quickly skip entries that don't match the word.
362642eeac35SBram Moolenaar 			    if (c0 != ws[0])
362742eeac35SBram Moolenaar 				continue;
3628a1ba811aSBram Moolenaar 			    k0 = smp[n0].sm_leadlen;
3629a1ba811aSBram Moolenaar 			    if (k0 > 1)
3630a1ba811aSBram Moolenaar 			    {
3631a1ba811aSBram Moolenaar 				if (word[i + k] != ws[1])
3632a1ba811aSBram Moolenaar 				    continue;
3633a1ba811aSBram Moolenaar 				if (k0 > 2)
3634a1ba811aSBram Moolenaar 				{
3635a1ba811aSBram Moolenaar 				    pf = word + i + k + 1;
3636a1ba811aSBram Moolenaar 				    for (j = 2; j < k0; ++j)
3637a1ba811aSBram Moolenaar 					if (*pf++ != ws[j])
3638a1ba811aSBram Moolenaar 					    break;
3639a1ba811aSBram Moolenaar 				    if (j < k0)
3640a1ba811aSBram Moolenaar 					continue;
3641a1ba811aSBram Moolenaar 				}
3642a1ba811aSBram Moolenaar 			    }
3643a1ba811aSBram Moolenaar 			    k0 += k - 1;
3644a1ba811aSBram Moolenaar 
364542eeac35SBram Moolenaar 			    if ((pf = smp[n0].sm_oneof_w) != NULL)
3646a1ba811aSBram Moolenaar 			    {
36470d6f5d97SBram Moolenaar 				// Check for match with one of the chars in
36480d6f5d97SBram Moolenaar 				// "sm_oneof".
3649a1ba811aSBram Moolenaar 				while (*pf != NUL && *pf != word[i + k0])
3650a1ba811aSBram Moolenaar 				    ++pf;
3651a1ba811aSBram Moolenaar 				if (*pf == NUL)
3652a1ba811aSBram Moolenaar 				    continue;
3653a1ba811aSBram Moolenaar 				++k0;
3654a1ba811aSBram Moolenaar 			    }
3655a1ba811aSBram Moolenaar 
3656a1ba811aSBram Moolenaar 			    p0 = 5;
3657a1ba811aSBram Moolenaar 			    s = smp[n0].sm_rules;
3658a1ba811aSBram Moolenaar 			    while (*s == '-')
3659a1ba811aSBram Moolenaar 			    {
36600d6f5d97SBram Moolenaar 				// "k0" gets NOT reduced because
36610d6f5d97SBram Moolenaar 				// "if (k0 == k)"
3662a1ba811aSBram Moolenaar 				s++;
3663a1ba811aSBram Moolenaar 			    }
3664a1ba811aSBram Moolenaar 			    if (*s == '<')
3665a1ba811aSBram Moolenaar 				s++;
3666a1ba811aSBram Moolenaar 			    if (VIM_ISDIGIT(*s))
3667a1ba811aSBram Moolenaar 			    {
3668a1ba811aSBram Moolenaar 				p0 = *s - '0';
3669a1ba811aSBram Moolenaar 				s++;
3670a1ba811aSBram Moolenaar 			    }
3671a1ba811aSBram Moolenaar 
3672a1ba811aSBram Moolenaar 			    if (*s == NUL
36730d6f5d97SBram Moolenaar 				    // *s == '^' cuts
3674a1ba811aSBram Moolenaar 				    || (*s == '$'
36759c96f592SBram Moolenaar 					 && !spell_iswordp_w(word + i + k0,
3676860cae1cSBram Moolenaar 								     curwin)))
3677a1ba811aSBram Moolenaar 			    {
3678a1ba811aSBram Moolenaar 				if (k0 == k)
36790d6f5d97SBram Moolenaar 				    // this is just a piece of the string
3680a1ba811aSBram Moolenaar 				    continue;
3681a1ba811aSBram Moolenaar 
3682a1ba811aSBram Moolenaar 				if (p0 < pri)
36830d6f5d97SBram Moolenaar 				    // priority too low
3684a1ba811aSBram Moolenaar 				    continue;
36850d6f5d97SBram Moolenaar 				// rule fits; stop search
3686a1ba811aSBram Moolenaar 				break;
3687a1ba811aSBram Moolenaar 			    }
3688a1ba811aSBram Moolenaar 			}
3689a1ba811aSBram Moolenaar 
3690a1ba811aSBram Moolenaar 			if (p0 >= pri && (smp[n0].sm_lead_w[0] & 0xff)
3691a1ba811aSBram Moolenaar 							       == (c0 & 0xff))
3692a1ba811aSBram Moolenaar 			    continue;
3693a1ba811aSBram Moolenaar 		    }
3694a1ba811aSBram Moolenaar 
36950d6f5d97SBram Moolenaar 		    // replace string
3696a1ba811aSBram Moolenaar 		    ws = smp[n].sm_to_w;
3697a1ba811aSBram Moolenaar 		    s = smp[n].sm_rules;
3698a1ba811aSBram Moolenaar 		    p0 = (vim_strchr(s, '<') != NULL) ? 1 : 0;
3699a1ba811aSBram Moolenaar 		    if (p0 == 1 && z == 0)
3700a1ba811aSBram Moolenaar 		    {
37010d6f5d97SBram Moolenaar 			// rule with '<' is used
37020dc065eeSBram Moolenaar 			if (reslen > 0 && ws != NULL && *ws != NUL
37030dc065eeSBram Moolenaar 				&& (wres[reslen - 1] == c
3704a1ba811aSBram Moolenaar 						    || wres[reslen - 1] == *ws))
3705a1ba811aSBram Moolenaar 			    reslen--;
3706a1ba811aSBram Moolenaar 			z0 = 1;
3707a1ba811aSBram Moolenaar 			z = 1;
3708a1ba811aSBram Moolenaar 			k0 = 0;
37090dc065eeSBram Moolenaar 			if (ws != NULL)
3710a1ba811aSBram Moolenaar 			    while (*ws != NUL && word[i + k0] != NUL)
3711a1ba811aSBram Moolenaar 			    {
3712a1ba811aSBram Moolenaar 				word[i + k0] = *ws;
3713a1ba811aSBram Moolenaar 				k0++;
3714a1ba811aSBram Moolenaar 				ws++;
3715a1ba811aSBram Moolenaar 			    }
3716a1ba811aSBram Moolenaar 			if (k > k0)
3717a1ba811aSBram Moolenaar 			    mch_memmove(word + i + k0, word + i + k,
3718f9de140eSBram Moolenaar 				    sizeof(int) * (wordlen - (i + k) + 1));
3719a1ba811aSBram Moolenaar 
37200d6f5d97SBram Moolenaar 			// new "actual letter"
3721a1ba811aSBram Moolenaar 			c = word[i];
3722a1ba811aSBram Moolenaar 		    }
3723a1ba811aSBram Moolenaar 		    else
3724a1ba811aSBram Moolenaar 		    {
37250d6f5d97SBram Moolenaar 			// no '<' rule used
3726a1ba811aSBram Moolenaar 			i += k - 1;
3727a1ba811aSBram Moolenaar 			z = 0;
37280dc065eeSBram Moolenaar 			if (ws != NULL)
37290dc065eeSBram Moolenaar 			    while (*ws != NUL && ws[1] != NUL
37300dc065eeSBram Moolenaar 							  && reslen < MAXWLEN)
3731a1ba811aSBram Moolenaar 			    {
3732a1ba811aSBram Moolenaar 				if (reslen == 0 || wres[reslen - 1] != *ws)
3733a1ba811aSBram Moolenaar 				    wres[reslen++] = *ws;
3734a1ba811aSBram Moolenaar 				ws++;
3735a1ba811aSBram Moolenaar 			    }
37360d6f5d97SBram Moolenaar 			// new "actual letter"
37370dc065eeSBram Moolenaar 			if (ws == NULL)
37380dc065eeSBram Moolenaar 			    c = NUL;
37390dc065eeSBram Moolenaar 			else
3740a1ba811aSBram Moolenaar 			    c = *ws;
3741a1ba811aSBram Moolenaar 			if (strstr((char *)s, "^^") != NULL)
3742a1ba811aSBram Moolenaar 			{
3743a1ba811aSBram Moolenaar 			    if (c != NUL)
3744a1ba811aSBram Moolenaar 				wres[reslen++] = c;
3745a1ba811aSBram Moolenaar 			    mch_memmove(word, word + i + 1,
3746f9de140eSBram Moolenaar 				       sizeof(int) * (wordlen - (i + 1) + 1));
3747a1ba811aSBram Moolenaar 			    i = 0;
3748a1ba811aSBram Moolenaar 			    z0 = 1;
3749a1ba811aSBram Moolenaar 			}
3750a1ba811aSBram Moolenaar 		    }
3751a1ba811aSBram Moolenaar 		    break;
3752a1ba811aSBram Moolenaar 		}
3753a1ba811aSBram Moolenaar 	    }
3754a1ba811aSBram Moolenaar 	}
37551c465444SBram Moolenaar 	else if (VIM_ISWHITE(c))
3756a1ba811aSBram Moolenaar 	{
3757a1ba811aSBram Moolenaar 	    c = ' ';
3758a1ba811aSBram Moolenaar 	    k = 1;
3759a1ba811aSBram Moolenaar 	}
3760a1ba811aSBram Moolenaar 
3761a1ba811aSBram Moolenaar 	if (z0 == 0)
3762a1ba811aSBram Moolenaar 	{
3763a1ba811aSBram Moolenaar 	    if (k && !p0 && reslen < MAXWLEN && c != NUL
3764a1ba811aSBram Moolenaar 		    && (!slang->sl_collapse || reslen == 0
3765a1ba811aSBram Moolenaar 						     || wres[reslen - 1] != c))
37660d6f5d97SBram Moolenaar 		// condense only double letters
3767a1ba811aSBram Moolenaar 		wres[reslen++] = c;
3768a1ba811aSBram Moolenaar 
3769a1ba811aSBram Moolenaar 	    i++;
3770a1ba811aSBram Moolenaar 	    z = 0;
3771a1ba811aSBram Moolenaar 	    k = 0;
3772a1ba811aSBram Moolenaar 	}
3773a1ba811aSBram Moolenaar     }
3774a1ba811aSBram Moolenaar 
37750d6f5d97SBram Moolenaar     // Convert wide characters in "wres" to a multi-byte string in "res".
3776a1ba811aSBram Moolenaar     l = 0;
3777a1ba811aSBram Moolenaar     for (n = 0; n < reslen; ++n)
3778a1ba811aSBram Moolenaar     {
3779a1ba811aSBram Moolenaar 	l += mb_char2bytes(wres[n], res + l);
3780a1ba811aSBram Moolenaar 	if (l + MB_MAXBYTES > MAXWLEN)
3781a1ba811aSBram Moolenaar 	    break;
3782a1ba811aSBram Moolenaar     }
3783a1ba811aSBram Moolenaar     res[l] = NUL;
3784a1ba811aSBram Moolenaar }
3785a1ba811aSBram Moolenaar 
37869f30f504SBram Moolenaar /*
3787362e1a30SBram Moolenaar  * ":spellinfo"
3788362e1a30SBram Moolenaar  */
3789362e1a30SBram Moolenaar     void
ex_spellinfo(exarg_T * eap UNUSED)3790764b23c8SBram Moolenaar ex_spellinfo(exarg_T *eap UNUSED)
3791362e1a30SBram Moolenaar {
3792362e1a30SBram Moolenaar     int		lpi;
3793362e1a30SBram Moolenaar     langp_T	*lp;
3794362e1a30SBram Moolenaar     char_u	*p;
3795362e1a30SBram Moolenaar 
3796362e1a30SBram Moolenaar     if (no_spell_checking(curwin))
3797362e1a30SBram Moolenaar 	return;
3798362e1a30SBram Moolenaar 
3799362e1a30SBram Moolenaar     msg_start();
3800860cae1cSBram Moolenaar     for (lpi = 0; lpi < curwin->w_s->b_langp.ga_len && !got_int; ++lpi)
3801362e1a30SBram Moolenaar     {
3802860cae1cSBram Moolenaar 	lp = LANGP_ENTRY(curwin->w_s->b_langp, lpi);
380332526b3cSBram Moolenaar 	msg_puts("file: ");
380432526b3cSBram Moolenaar 	msg_puts((char *)lp->lp_slang->sl_fname);
3805362e1a30SBram Moolenaar 	msg_putchar('\n');
3806362e1a30SBram Moolenaar 	p = lp->lp_slang->sl_info;
3807362e1a30SBram Moolenaar 	if (p != NULL)
3808362e1a30SBram Moolenaar 	{
380932526b3cSBram Moolenaar 	    msg_puts((char *)p);
3810362e1a30SBram Moolenaar 	    msg_putchar('\n');
3811362e1a30SBram Moolenaar 	}
3812362e1a30SBram Moolenaar     }
3813362e1a30SBram Moolenaar     msg_end();
3814362e1a30SBram Moolenaar }
3815362e1a30SBram Moolenaar 
38160d6f5d97SBram Moolenaar #define DUMPFLAG_KEEPCASE   1	// round 2: keep-case tree
38170d6f5d97SBram Moolenaar #define DUMPFLAG_COUNT	    2	// include word count
38180d6f5d97SBram Moolenaar #define DUMPFLAG_ICASE	    4	// ignore case when finding matches
38190d6f5d97SBram Moolenaar #define DUMPFLAG_ONECAP	    8	// pattern starts with capital
38200d6f5d97SBram Moolenaar #define DUMPFLAG_ALLCAP	    16	// pattern is all capitals
38214770d09aSBram Moolenaar 
3822f417f2b6SBram Moolenaar /*
3823f417f2b6SBram Moolenaar  * ":spelldump"
3824f417f2b6SBram Moolenaar  */
3825f417f2b6SBram Moolenaar     void
ex_spelldump(exarg_T * eap)3826764b23c8SBram Moolenaar ex_spelldump(exarg_T *eap)
3827f417f2b6SBram Moolenaar {
38287a18fdc8SBram Moolenaar     char_u  *spl;
38297a18fdc8SBram Moolenaar     long    dummy;
38307a18fdc8SBram Moolenaar 
3831b475fb91SBram Moolenaar     if (no_spell_checking(curwin))
3832b475fb91SBram Moolenaar 	return;
3833dd1f426bSBram Moolenaar     (void)get_option_value((char_u*)"spl", &dummy, &spl, OPT_LOCAL);
3834b475fb91SBram Moolenaar 
38350d6f5d97SBram Moolenaar     // Create a new empty buffer in a new window.
3836b475fb91SBram Moolenaar     do_cmdline_cmd((char_u *)"new");
38377a18fdc8SBram Moolenaar 
38380d6f5d97SBram Moolenaar     // enable spelling locally in the new window
38397a18fdc8SBram Moolenaar     set_option_value((char_u*)"spell", TRUE, (char_u*)"", OPT_LOCAL);
38407a18fdc8SBram Moolenaar     set_option_value((char_u*)"spl",  dummy, spl, OPT_LOCAL);
38417a18fdc8SBram Moolenaar     vim_free(spl);
38427a18fdc8SBram Moolenaar 
3843b5aedf3eSBram Moolenaar     if (!BUFEMPTY())
3844b475fb91SBram Moolenaar 	return;
3845b475fb91SBram Moolenaar 
3846860cae1cSBram Moolenaar     spell_dump_compl(NULL, 0, NULL, eap->forceit ? DUMPFLAG_COUNT : 0);
3847b475fb91SBram Moolenaar 
38480d6f5d97SBram Moolenaar     // Delete the empty line that we started with.
3849b475fb91SBram Moolenaar     if (curbuf->b_ml.ml_line_count > 1)
3850ca70c07bSBram Moolenaar 	ml_delete(curbuf->b_ml.ml_line_count);
3851b475fb91SBram Moolenaar 
3852b475fb91SBram Moolenaar     redraw_later(NOT_VALID);
3853b475fb91SBram Moolenaar }
3854b475fb91SBram Moolenaar 
3855b475fb91SBram Moolenaar /*
3856b475fb91SBram Moolenaar  * Go through all possible words and:
3857b475fb91SBram Moolenaar  * 1. When "pat" is NULL: dump a list of all words in the current buffer.
3858b475fb91SBram Moolenaar  *	"ic" and "dir" are not used.
3859b475fb91SBram Moolenaar  * 2. When "pat" is not NULL: add matching words to insert mode completion.
3860b475fb91SBram Moolenaar  */
3861b475fb91SBram Moolenaar     void
spell_dump_compl(char_u * pat,int ic,int * dir,int dumpflags_arg)3862764b23c8SBram Moolenaar spell_dump_compl(
38630d6f5d97SBram Moolenaar     char_u	*pat,	    // leading part of the word
38640d6f5d97SBram Moolenaar     int		ic,	    // ignore case
38650d6f5d97SBram Moolenaar     int		*dir,	    // direction for adding matches
38660d6f5d97SBram Moolenaar     int		dumpflags_arg)	// DUMPFLAG_*
3867b475fb91SBram Moolenaar {
3868f417f2b6SBram Moolenaar     langp_T	*lp;
3869f417f2b6SBram Moolenaar     slang_T	*slang;
3870f417f2b6SBram Moolenaar     idx_T	arridx[MAXWLEN];
3871f417f2b6SBram Moolenaar     int		curi[MAXWLEN];
3872f417f2b6SBram Moolenaar     char_u	word[MAXWLEN];
3873f417f2b6SBram Moolenaar     int		c;
3874f417f2b6SBram Moolenaar     char_u	*byts;
3875f417f2b6SBram Moolenaar     idx_T	*idxs;
3876f417f2b6SBram Moolenaar     linenr_T	lnum = 0;
3877f417f2b6SBram Moolenaar     int		round;
3878f417f2b6SBram Moolenaar     int		depth;
3879f417f2b6SBram Moolenaar     int		n;
3880f417f2b6SBram Moolenaar     int		flags;
38810d6f5d97SBram Moolenaar     char_u	*region_names = NULL;	    // region names being used
38820d6f5d97SBram Moolenaar     int		do_region = TRUE;	    // dump region names and numbers
38837887d88aSBram Moolenaar     char_u	*p;
3884ac6e65f8SBram Moolenaar     int		lpi;
3885b475fb91SBram Moolenaar     int		dumpflags = dumpflags_arg;
3886b475fb91SBram Moolenaar     int		patlen;
3887f417f2b6SBram Moolenaar 
38880d6f5d97SBram Moolenaar     // When ignoring case or when the pattern starts with capital pass this on
38890d6f5d97SBram Moolenaar     // to dump_word().
3890d0131a8bSBram Moolenaar     if (pat != NULL)
3891d0131a8bSBram Moolenaar     {
3892b475fb91SBram Moolenaar 	if (ic)
3893b475fb91SBram Moolenaar 	    dumpflags |= DUMPFLAG_ICASE;
3894d0131a8bSBram Moolenaar 	else
3895d0131a8bSBram Moolenaar 	{
3896d0131a8bSBram Moolenaar 	    n = captype(pat, NULL);
3897d0131a8bSBram Moolenaar 	    if (n == WF_ONECAP)
3898d0131a8bSBram Moolenaar 		dumpflags |= DUMPFLAG_ONECAP;
3899264b74faSBram Moolenaar 	    else if (n == WF_ALLCAP && (int)STRLEN(pat) > mb_ptr2len(pat))
3900d0131a8bSBram Moolenaar 		dumpflags |= DUMPFLAG_ALLCAP;
3901d0131a8bSBram Moolenaar 	}
3902d0131a8bSBram Moolenaar     }
3903f417f2b6SBram Moolenaar 
39040d6f5d97SBram Moolenaar     // Find out if we can support regions: All languages must support the same
39050d6f5d97SBram Moolenaar     // regions or none at all.
3906860cae1cSBram Moolenaar     for (lpi = 0; lpi < curwin->w_s->b_langp.ga_len; ++lpi)
39077887d88aSBram Moolenaar     {
3908860cae1cSBram Moolenaar 	lp = LANGP_ENTRY(curwin->w_s->b_langp, lpi);
39097887d88aSBram Moolenaar 	p = lp->lp_slang->sl_regions;
39107887d88aSBram Moolenaar 	if (p[0] != 0)
39117887d88aSBram Moolenaar 	{
39120d6f5d97SBram Moolenaar 	    if (region_names == NULL)	    // first language with regions
39137887d88aSBram Moolenaar 		region_names = p;
39147887d88aSBram Moolenaar 	    else if (STRCMP(region_names, p) != 0)
39157887d88aSBram Moolenaar 	    {
39160d6f5d97SBram Moolenaar 		do_region = FALSE;	    // region names are different
39177887d88aSBram Moolenaar 		break;
39187887d88aSBram Moolenaar 	    }
39197887d88aSBram Moolenaar 	}
39207887d88aSBram Moolenaar     }
39217887d88aSBram Moolenaar 
39227887d88aSBram Moolenaar     if (do_region && region_names != NULL)
39237887d88aSBram Moolenaar     {
3924b475fb91SBram Moolenaar 	if (pat == NULL)
3925b475fb91SBram Moolenaar 	{
39267887d88aSBram Moolenaar 	    vim_snprintf((char *)IObuff, IOSIZE, "/regions=%s", region_names);
39277887d88aSBram Moolenaar 	    ml_append(lnum++, IObuff, (colnr_T)0, FALSE);
39287887d88aSBram Moolenaar 	}
3929b475fb91SBram Moolenaar     }
39307887d88aSBram Moolenaar     else
39317887d88aSBram Moolenaar 	do_region = FALSE;
39327887d88aSBram Moolenaar 
39337887d88aSBram Moolenaar     /*
39347887d88aSBram Moolenaar      * Loop over all files loaded for the entries in 'spelllang'.
39357887d88aSBram Moolenaar      */
3936860cae1cSBram Moolenaar     for (lpi = 0; lpi < curwin->w_s->b_langp.ga_len; ++lpi)
3937f417f2b6SBram Moolenaar     {
3938860cae1cSBram Moolenaar 	lp = LANGP_ENTRY(curwin->w_s->b_langp, lpi);
3939f417f2b6SBram Moolenaar 	slang = lp->lp_slang;
39400d6f5d97SBram Moolenaar 	if (slang->sl_fbyts == NULL)	    // reloading failed
3941ac6e65f8SBram Moolenaar 	    continue;
3942f417f2b6SBram Moolenaar 
3943b475fb91SBram Moolenaar 	if (pat == NULL)
3944b475fb91SBram Moolenaar 	{
3945f417f2b6SBram Moolenaar 	    vim_snprintf((char *)IObuff, IOSIZE, "# file: %s", slang->sl_fname);
3946f417f2b6SBram Moolenaar 	    ml_append(lnum++, IObuff, (colnr_T)0, FALSE);
3947b475fb91SBram Moolenaar 	}
3948b475fb91SBram Moolenaar 
39490d6f5d97SBram Moolenaar 	// When matching with a pattern and there are no prefixes only use
39500d6f5d97SBram Moolenaar 	// parts of the tree that match "pat".
3951b475fb91SBram Moolenaar 	if (pat != NULL && slang->sl_pbyts == NULL)
3952a93fa7eeSBram Moolenaar 	    patlen = (int)STRLEN(pat);
3953b475fb91SBram Moolenaar 	else
3954eb3593b3SBram Moolenaar 	    patlen = -1;
3955f417f2b6SBram Moolenaar 
39560d6f5d97SBram Moolenaar 	// round 1: case-folded tree
39570d6f5d97SBram Moolenaar 	// round 2: keep-case tree
3958f417f2b6SBram Moolenaar 	for (round = 1; round <= 2; ++round)
3959f417f2b6SBram Moolenaar 	{
3960f417f2b6SBram Moolenaar 	    if (round == 1)
3961f417f2b6SBram Moolenaar 	    {
3962b475fb91SBram Moolenaar 		dumpflags &= ~DUMPFLAG_KEEPCASE;
3963f417f2b6SBram Moolenaar 		byts = slang->sl_fbyts;
3964f417f2b6SBram Moolenaar 		idxs = slang->sl_fidxs;
3965f417f2b6SBram Moolenaar 	    }
3966f417f2b6SBram Moolenaar 	    else
3967f417f2b6SBram Moolenaar 	    {
3968b475fb91SBram Moolenaar 		dumpflags |= DUMPFLAG_KEEPCASE;
3969f417f2b6SBram Moolenaar 		byts = slang->sl_kbyts;
3970f417f2b6SBram Moolenaar 		idxs = slang->sl_kidxs;
3971f417f2b6SBram Moolenaar 	    }
3972f417f2b6SBram Moolenaar 	    if (byts == NULL)
39730d6f5d97SBram Moolenaar 		continue;		// array is empty
3974f417f2b6SBram Moolenaar 
3975f417f2b6SBram Moolenaar 	    depth = 0;
3976f417f2b6SBram Moolenaar 	    arridx[0] = 0;
3977f417f2b6SBram Moolenaar 	    curi[0] = 1;
3978b475fb91SBram Moolenaar 	    while (depth >= 0 && !got_int
39797591bb39SBram Moolenaar 				  && (pat == NULL || !ins_compl_interrupted()))
3980f417f2b6SBram Moolenaar 	    {
3981f417f2b6SBram Moolenaar 		if (curi[depth] > byts[arridx[depth]])
3982f417f2b6SBram Moolenaar 		{
39830d6f5d97SBram Moolenaar 		    // Done all bytes at this node, go up one level.
3984f417f2b6SBram Moolenaar 		    --depth;
3985f417f2b6SBram Moolenaar 		    line_breakcheck();
3986472e8597SBram Moolenaar 		    ins_compl_check_keys(50, FALSE);
3987f417f2b6SBram Moolenaar 		}
3988f417f2b6SBram Moolenaar 		else
3989f417f2b6SBram Moolenaar 		{
39900d6f5d97SBram Moolenaar 		    // Do one more byte at this node.
3991f417f2b6SBram Moolenaar 		    n = arridx[depth] + curi[depth];
3992f417f2b6SBram Moolenaar 		    ++curi[depth];
3993f417f2b6SBram Moolenaar 		    c = byts[n];
3994f417f2b6SBram Moolenaar 		    if (c == 0)
3995f417f2b6SBram Moolenaar 		    {
39960d6f5d97SBram Moolenaar 			// End of word, deal with the word.
39970d6f5d97SBram Moolenaar 			// Don't use keep-case words in the fold-case tree,
39980d6f5d97SBram Moolenaar 			// they will appear in the keep-case tree.
39990d6f5d97SBram Moolenaar 			// Only use the word when the region matches.
4000f417f2b6SBram Moolenaar 			flags = (int)idxs[n];
4001f417f2b6SBram Moolenaar 			if ((round == 2 || (flags & WF_KEEPCAP) == 0)
4002ac6e65f8SBram Moolenaar 				&& (flags & WF_NEEDCOMP) == 0
40037887d88aSBram Moolenaar 				&& (do_region
40047887d88aSBram Moolenaar 				    || (flags & WF_REGION) == 0
4005dfb9ac00SBram Moolenaar 				    || (((unsigned)flags >> 16)
4006f417f2b6SBram Moolenaar 						       & lp->lp_region) != 0))
4007f417f2b6SBram Moolenaar 			{
4008f417f2b6SBram Moolenaar 			    word[depth] = NUL;
40097887d88aSBram Moolenaar 			    if (!do_region)
40107887d88aSBram Moolenaar 				flags &= ~WF_REGION;
40110a5fe214SBram Moolenaar 
40120d6f5d97SBram Moolenaar 			    // Dump the basic word if there is no prefix or
40130d6f5d97SBram Moolenaar 			    // when it's the first one.
4014dfb9ac00SBram Moolenaar 			    c = (unsigned)flags >> 24;
40150a5fe214SBram Moolenaar 			    if (c == 0 || curi[depth] == 2)
4016b475fb91SBram Moolenaar 			    {
4017b475fb91SBram Moolenaar 				dump_word(slang, word, pat, dir,
4018b475fb91SBram Moolenaar 						      dumpflags, flags, lnum);
4019b475fb91SBram Moolenaar 				if (pat == NULL)
4020b475fb91SBram Moolenaar 				    ++lnum;
4021b475fb91SBram Moolenaar 			    }
4022f417f2b6SBram Moolenaar 
40230d6f5d97SBram Moolenaar 			    // Apply the prefix, if there is one.
40240a5fe214SBram Moolenaar 			    if (c != 0)
4025b475fb91SBram Moolenaar 				lnum = dump_prefixes(slang, word, pat, dir,
4026b475fb91SBram Moolenaar 						      dumpflags, flags, lnum);
4027f417f2b6SBram Moolenaar 			}
4028f417f2b6SBram Moolenaar 		    }
4029f417f2b6SBram Moolenaar 		    else
4030f417f2b6SBram Moolenaar 		    {
40310d6f5d97SBram Moolenaar 			// Normal char, go one level deeper.
4032f417f2b6SBram Moolenaar 			word[depth++] = c;
4033f417f2b6SBram Moolenaar 			arridx[depth] = idxs[n];
4034f417f2b6SBram Moolenaar 			curi[depth] = 1;
4035f417f2b6SBram Moolenaar 
40360d6f5d97SBram Moolenaar 			// Check if this characters matches with the pattern.
40370d6f5d97SBram Moolenaar 			// If not skip the whole tree below it.
40380d6f5d97SBram Moolenaar 			// Always ignore case here, dump_word() will check
40390d6f5d97SBram Moolenaar 			// proper case later.  This isn't exactly right when
40400d6f5d97SBram Moolenaar 			// length changes for multi-byte characters with
40410d6f5d97SBram Moolenaar 			// ignore case...
4042d0131a8bSBram Moolenaar 			if (depth <= patlen
4043d0131a8bSBram Moolenaar 					&& MB_STRNICMP(word, pat, depth) != 0)
4044b475fb91SBram Moolenaar 			    --depth;
4045b475fb91SBram Moolenaar 		    }
4046b475fb91SBram Moolenaar 		}
4047b475fb91SBram Moolenaar 	    }
4048b475fb91SBram Moolenaar 	}
4049b475fb91SBram Moolenaar     }
4050f417f2b6SBram Moolenaar }
4051f417f2b6SBram Moolenaar 
4052f417f2b6SBram Moolenaar /*
4053f417f2b6SBram Moolenaar  * Dump one word: apply case modifications and append a line to the buffer.
4054b475fb91SBram Moolenaar  * When "lnum" is zero add insert mode completion.
4055f417f2b6SBram Moolenaar  */
4056f417f2b6SBram Moolenaar     static void
dump_word(slang_T * slang,char_u * word,char_u * pat,int * dir,int dumpflags,int wordflags,linenr_T lnum)4057764b23c8SBram Moolenaar dump_word(
4058764b23c8SBram Moolenaar     slang_T	*slang,
4059764b23c8SBram Moolenaar     char_u	*word,
4060764b23c8SBram Moolenaar     char_u	*pat,
4061764b23c8SBram Moolenaar     int		*dir,
4062764b23c8SBram Moolenaar     int		dumpflags,
4063764b23c8SBram Moolenaar     int		wordflags,
4064764b23c8SBram Moolenaar     linenr_T	lnum)
4065f417f2b6SBram Moolenaar {
4066f417f2b6SBram Moolenaar     int		keepcap = FALSE;
4067f417f2b6SBram Moolenaar     char_u	*p;
40684770d09aSBram Moolenaar     char_u	*tw;
4069f417f2b6SBram Moolenaar     char_u	cword[MAXWLEN];
40707887d88aSBram Moolenaar     char_u	badword[MAXWLEN + 10];
40717887d88aSBram Moolenaar     int		i;
4072d0131a8bSBram Moolenaar     int		flags = wordflags;
4073d0131a8bSBram Moolenaar 
4074d0131a8bSBram Moolenaar     if (dumpflags & DUMPFLAG_ONECAP)
4075d0131a8bSBram Moolenaar 	flags |= WF_ONECAP;
4076d0131a8bSBram Moolenaar     if (dumpflags & DUMPFLAG_ALLCAP)
4077d0131a8bSBram Moolenaar 	flags |= WF_ALLCAP;
4078f417f2b6SBram Moolenaar 
40794770d09aSBram Moolenaar     if ((dumpflags & DUMPFLAG_KEEPCASE) == 0 && (flags & WF_CAPMASK) != 0)
4080f417f2b6SBram Moolenaar     {
40810d6f5d97SBram Moolenaar 	// Need to fix case according to "flags".
4082f417f2b6SBram Moolenaar 	make_case_word(word, cword, flags);
4083f417f2b6SBram Moolenaar 	p = cword;
4084f417f2b6SBram Moolenaar     }
4085f417f2b6SBram Moolenaar     else
4086f417f2b6SBram Moolenaar     {
4087f417f2b6SBram Moolenaar 	p = word;
40884770d09aSBram Moolenaar 	if ((dumpflags & DUMPFLAG_KEEPCASE)
40894770d09aSBram Moolenaar 		&& ((captype(word, NULL) & WF_KEEPCAP) == 0
40900dc065eeSBram Moolenaar 						 || (flags & WF_FIXCAP) != 0))
4091f417f2b6SBram Moolenaar 	    keepcap = TRUE;
4092f417f2b6SBram Moolenaar     }
40934770d09aSBram Moolenaar     tw = p;
4094f417f2b6SBram Moolenaar 
4095b475fb91SBram Moolenaar     if (pat == NULL)
4096b475fb91SBram Moolenaar     {
40970d6f5d97SBram Moolenaar 	// Add flags and regions after a slash.
40987887d88aSBram Moolenaar 	if ((flags & (WF_BANNED | WF_RARE | WF_REGION)) || keepcap)
4099f417f2b6SBram Moolenaar 	{
41007887d88aSBram Moolenaar 	    STRCPY(badword, p);
41017887d88aSBram Moolenaar 	    STRCAT(badword, "/");
4102f417f2b6SBram Moolenaar 	    if (keepcap)
4103f417f2b6SBram Moolenaar 		STRCAT(badword, "=");
4104f417f2b6SBram Moolenaar 	    if (flags & WF_BANNED)
4105f417f2b6SBram Moolenaar 		STRCAT(badword, "!");
4106f417f2b6SBram Moolenaar 	    else if (flags & WF_RARE)
4107f417f2b6SBram Moolenaar 		STRCAT(badword, "?");
41087887d88aSBram Moolenaar 	    if (flags & WF_REGION)
41097887d88aSBram Moolenaar 		for (i = 0; i < 7; ++i)
4110dfb9ac00SBram Moolenaar 		    if (flags & (0x10000 << i))
41117887d88aSBram Moolenaar 			sprintf((char *)badword + STRLEN(badword), "%d", i + 1);
4112f417f2b6SBram Moolenaar 	    p = badword;
4113f417f2b6SBram Moolenaar 	}
4114f417f2b6SBram Moolenaar 
41154770d09aSBram Moolenaar 	if (dumpflags & DUMPFLAG_COUNT)
41164770d09aSBram Moolenaar 	{
41174770d09aSBram Moolenaar 	    hashitem_T  *hi;
41184770d09aSBram Moolenaar 
41190d6f5d97SBram Moolenaar 	    // Include the word count for ":spelldump!".
41204770d09aSBram Moolenaar 	    hi = hash_find(&slang->sl_wordcount, tw);
41214770d09aSBram Moolenaar 	    if (!HASHITEM_EMPTY(hi))
41224770d09aSBram Moolenaar 	    {
41234770d09aSBram Moolenaar 		vim_snprintf((char *)IObuff, IOSIZE, "%s\t%d",
41244770d09aSBram Moolenaar 						     tw, HI2WC(hi)->wc_count);
41254770d09aSBram Moolenaar 		p = IObuff;
41264770d09aSBram Moolenaar 	    }
41274770d09aSBram Moolenaar 	}
41284770d09aSBram Moolenaar 
4129f417f2b6SBram Moolenaar 	ml_append(lnum, p, (colnr_T)0, FALSE);
4130f417f2b6SBram Moolenaar     }
4131d0131a8bSBram Moolenaar     else if (((dumpflags & DUMPFLAG_ICASE)
4132d0131a8bSBram Moolenaar 		    ? MB_STRNICMP(p, pat, STRLEN(pat)) == 0
4133d0131a8bSBram Moolenaar 		    : STRNCMP(p, pat, STRLEN(pat)) == 0)
4134b475fb91SBram Moolenaar 		&& ins_compl_add_infercase(p, (int)STRLEN(p),
4135d9eefe31SBram Moolenaar 					  p_ic, NULL, *dir, FALSE) == OK)
41360d6f5d97SBram Moolenaar 	// if dir was BACKWARD then honor it just once
4137b475fb91SBram Moolenaar 	*dir = FORWARD;
4138b475fb91SBram Moolenaar }
4139f417f2b6SBram Moolenaar 
4140f417f2b6SBram Moolenaar /*
4141a1ba811aSBram Moolenaar  * For ":spelldump": Find matching prefixes for "word".  Prepend each to
4142a1ba811aSBram Moolenaar  * "word" and append a line to the buffer.
4143b475fb91SBram Moolenaar  * When "lnum" is zero add insert mode completion.
4144f417f2b6SBram Moolenaar  * Return the updated line number.
4145f417f2b6SBram Moolenaar  */
4146f417f2b6SBram Moolenaar     static linenr_T
dump_prefixes(slang_T * slang,char_u * word,char_u * pat,int * dir,int dumpflags,int flags,linenr_T startlnum)4147764b23c8SBram Moolenaar dump_prefixes(
4148764b23c8SBram Moolenaar     slang_T	*slang,
41490d6f5d97SBram Moolenaar     char_u	*word,	    // case-folded word
4150764b23c8SBram Moolenaar     char_u	*pat,
4151764b23c8SBram Moolenaar     int		*dir,
4152764b23c8SBram Moolenaar     int		dumpflags,
41530d6f5d97SBram Moolenaar     int		flags,	    // flags with prefix ID
4154764b23c8SBram Moolenaar     linenr_T	startlnum)
4155f417f2b6SBram Moolenaar {
4156f417f2b6SBram Moolenaar     idx_T	arridx[MAXWLEN];
4157f417f2b6SBram Moolenaar     int		curi[MAXWLEN];
4158f417f2b6SBram Moolenaar     char_u	prefix[MAXWLEN];
415953805d1eSBram Moolenaar     char_u	word_up[MAXWLEN];
416053805d1eSBram Moolenaar     int		has_word_up = FALSE;
4161f417f2b6SBram Moolenaar     int		c;
4162f417f2b6SBram Moolenaar     char_u	*byts;
4163f417f2b6SBram Moolenaar     idx_T	*idxs;
4164f417f2b6SBram Moolenaar     linenr_T	lnum = startlnum;
4165f417f2b6SBram Moolenaar     int		depth;
4166f417f2b6SBram Moolenaar     int		n;
4167f417f2b6SBram Moolenaar     int		len;
4168f417f2b6SBram Moolenaar     int		i;
4169f417f2b6SBram Moolenaar 
41700d6f5d97SBram Moolenaar     // If the word starts with a lower-case letter make the word with an
41710d6f5d97SBram Moolenaar     // upper-case letter in word_up[].
417253805d1eSBram Moolenaar     c = PTR2CHAR(word);
417353805d1eSBram Moolenaar     if (SPELL_TOUPPER(c) != c)
417453805d1eSBram Moolenaar     {
417553805d1eSBram Moolenaar 	onecap_copy(word, word_up, TRUE);
417653805d1eSBram Moolenaar 	has_word_up = TRUE;
417753805d1eSBram Moolenaar     }
417853805d1eSBram Moolenaar 
4179f417f2b6SBram Moolenaar     byts = slang->sl_pbyts;
4180f417f2b6SBram Moolenaar     idxs = slang->sl_pidxs;
41810d6f5d97SBram Moolenaar     if (byts != NULL)		// array not is empty
4182f417f2b6SBram Moolenaar     {
4183f417f2b6SBram Moolenaar 	/*
4184f417f2b6SBram Moolenaar 	 * Loop over all prefixes, building them byte-by-byte in prefix[].
4185dfb9ac00SBram Moolenaar 	 * When at the end of a prefix check that it supports "flags".
4186f417f2b6SBram Moolenaar 	 */
4187f417f2b6SBram Moolenaar 	depth = 0;
4188f417f2b6SBram Moolenaar 	arridx[0] = 0;
4189f417f2b6SBram Moolenaar 	curi[0] = 1;
4190f417f2b6SBram Moolenaar 	while (depth >= 0 && !got_int)
4191f417f2b6SBram Moolenaar 	{
4192dfb9ac00SBram Moolenaar 	    n = arridx[depth];
4193dfb9ac00SBram Moolenaar 	    len = byts[n];
4194dfb9ac00SBram Moolenaar 	    if (curi[depth] > len)
4195f417f2b6SBram Moolenaar 	    {
41960d6f5d97SBram Moolenaar 		// Done all bytes at this node, go up one level.
4197f417f2b6SBram Moolenaar 		--depth;
4198f417f2b6SBram Moolenaar 		line_breakcheck();
4199f417f2b6SBram Moolenaar 	    }
4200f417f2b6SBram Moolenaar 	    else
4201f417f2b6SBram Moolenaar 	    {
42020d6f5d97SBram Moolenaar 		// Do one more byte at this node.
4203dfb9ac00SBram Moolenaar 		n += curi[depth];
4204f417f2b6SBram Moolenaar 		++curi[depth];
4205f417f2b6SBram Moolenaar 		c = byts[n];
4206f417f2b6SBram Moolenaar 		if (c == 0)
4207f417f2b6SBram Moolenaar 		{
42080d6f5d97SBram Moolenaar 		    // End of prefix, find out how many IDs there are.
4209f417f2b6SBram Moolenaar 		    for (i = 1; i < len; ++i)
4210f417f2b6SBram Moolenaar 			if (byts[n + i] != 0)
4211f417f2b6SBram Moolenaar 			    break;
4212f417f2b6SBram Moolenaar 		    curi[depth] += i - 1;
4213f417f2b6SBram Moolenaar 
421453805d1eSBram Moolenaar 		    c = valid_word_prefix(i, n, flags, word, slang, FALSE);
421553805d1eSBram Moolenaar 		    if (c != 0)
4216f417f2b6SBram Moolenaar 		    {
42179c96f592SBram Moolenaar 			vim_strncpy(prefix + depth, word, MAXWLEN - depth - 1);
4218b475fb91SBram Moolenaar 			dump_word(slang, prefix, pat, dir, dumpflags,
421953805d1eSBram Moolenaar 				(c & WF_RAREPFX) ? (flags | WF_RARE)
4220b475fb91SBram Moolenaar 							       : flags, lnum);
4221b475fb91SBram Moolenaar 			if (lnum != 0)
4222b475fb91SBram Moolenaar 			    ++lnum;
4223f417f2b6SBram Moolenaar 		    }
422453805d1eSBram Moolenaar 
42250d6f5d97SBram Moolenaar 		    // Check for prefix that matches the word when the
42260d6f5d97SBram Moolenaar 		    // first letter is upper-case, but only if the prefix has
42270d6f5d97SBram Moolenaar 		    // a condition.
422853805d1eSBram Moolenaar 		    if (has_word_up)
422953805d1eSBram Moolenaar 		    {
423053805d1eSBram Moolenaar 			c = valid_word_prefix(i, n, flags, word_up, slang,
423153805d1eSBram Moolenaar 									TRUE);
423253805d1eSBram Moolenaar 			if (c != 0)
423353805d1eSBram Moolenaar 			{
423453805d1eSBram Moolenaar 			    vim_strncpy(prefix + depth, word_up,
423553805d1eSBram Moolenaar 							 MAXWLEN - depth - 1);
4236b475fb91SBram Moolenaar 			    dump_word(slang, prefix, pat, dir, dumpflags,
423753805d1eSBram Moolenaar 				    (c & WF_RAREPFX) ? (flags | WF_RARE)
4238b475fb91SBram Moolenaar 							       : flags, lnum);
4239b475fb91SBram Moolenaar 			    if (lnum != 0)
4240b475fb91SBram Moolenaar 				++lnum;
424153805d1eSBram Moolenaar 			}
424253805d1eSBram Moolenaar 		    }
4243f417f2b6SBram Moolenaar 		}
4244f417f2b6SBram Moolenaar 		else
4245f417f2b6SBram Moolenaar 		{
42460d6f5d97SBram Moolenaar 		    // Normal char, go one level deeper.
4247f417f2b6SBram Moolenaar 		    prefix[depth++] = c;
4248f417f2b6SBram Moolenaar 		    arridx[depth] = idxs[n];
4249f417f2b6SBram Moolenaar 		    curi[depth] = 1;
4250f417f2b6SBram Moolenaar 		}
4251f417f2b6SBram Moolenaar 	    }
4252f417f2b6SBram Moolenaar 	}
4253f417f2b6SBram Moolenaar     }
4254f417f2b6SBram Moolenaar 
4255f417f2b6SBram Moolenaar     return lnum;
4256f417f2b6SBram Moolenaar }
4257f417f2b6SBram Moolenaar 
425895529568SBram Moolenaar /*
4259a40ceaf8SBram Moolenaar  * Move "p" to the end of word "start".
4260a40ceaf8SBram Moolenaar  * Uses the spell-checking word characters.
426195529568SBram Moolenaar  */
426295529568SBram Moolenaar     char_u *
spell_to_word_end(char_u * start,win_T * win)4263764b23c8SBram Moolenaar spell_to_word_end(char_u *start, win_T *win)
426495529568SBram Moolenaar {
426595529568SBram Moolenaar     char_u  *p = start;
426695529568SBram Moolenaar 
4267860cae1cSBram Moolenaar     while (*p != NUL && spell_iswordp(p, win))
426891acfffcSBram Moolenaar 	MB_PTR_ADV(p);
426995529568SBram Moolenaar     return p;
427095529568SBram Moolenaar }
427195529568SBram Moolenaar 
42728b59de9fSBram Moolenaar /*
4273a40ceaf8SBram Moolenaar  * For Insert mode completion CTRL-X s:
4274a40ceaf8SBram Moolenaar  * Find start of the word in front of column "startcol".
4275a40ceaf8SBram Moolenaar  * We don't check if it is badly spelled, with completion we can only change
4276a40ceaf8SBram Moolenaar  * the word in front of the cursor.
42778b59de9fSBram Moolenaar  * Returns the column number of the word.
42788b59de9fSBram Moolenaar  */
42798b59de9fSBram Moolenaar     int
spell_word_start(int startcol)4280764b23c8SBram Moolenaar spell_word_start(int startcol)
42818b59de9fSBram Moolenaar {
42828b59de9fSBram Moolenaar     char_u	*line;
42838b59de9fSBram Moolenaar     char_u	*p;
42848b59de9fSBram Moolenaar     int		col = 0;
42858b59de9fSBram Moolenaar 
428695529568SBram Moolenaar     if (no_spell_checking(curwin))
42878b59de9fSBram Moolenaar 	return startcol;
42888b59de9fSBram Moolenaar 
42890d6f5d97SBram Moolenaar     // Find a word character before "startcol".
42908b59de9fSBram Moolenaar     line = ml_get_curline();
42918b59de9fSBram Moolenaar     for (p = line + startcol; p > line; )
42928b59de9fSBram Moolenaar     {
429391acfffcSBram Moolenaar 	MB_PTR_BACK(line, p);
4294cc63c647SBram Moolenaar 	if (spell_iswordp_nmw(p, curwin))
42958b59de9fSBram Moolenaar 	    break;
42968b59de9fSBram Moolenaar     }
42978b59de9fSBram Moolenaar 
42980d6f5d97SBram Moolenaar     // Go back to start of the word.
42998b59de9fSBram Moolenaar     while (p > line)
43008b59de9fSBram Moolenaar     {
4301a93fa7eeSBram Moolenaar 	col = (int)(p - line);
430291acfffcSBram Moolenaar 	MB_PTR_BACK(line, p);
4303860cae1cSBram Moolenaar 	if (!spell_iswordp(p, curwin))
43048b59de9fSBram Moolenaar 	    break;
43058b59de9fSBram Moolenaar 	col = 0;
43068b59de9fSBram Moolenaar     }
43078b59de9fSBram Moolenaar 
43088b59de9fSBram Moolenaar     return col;
43098b59de9fSBram Moolenaar }
43108b59de9fSBram Moolenaar 
43118b59de9fSBram Moolenaar /*
43124effc80aSBram Moolenaar  * Need to check for 'spellcapcheck' now, the word is removed before
43134effc80aSBram Moolenaar  * expand_spelling() is called.  Therefore the ugly global variable.
43144effc80aSBram Moolenaar  */
43154effc80aSBram Moolenaar static int spell_expand_need_cap;
43164effc80aSBram Moolenaar 
43174effc80aSBram Moolenaar     void
spell_expand_check_cap(colnr_T col)4318764b23c8SBram Moolenaar spell_expand_check_cap(colnr_T col)
43194effc80aSBram Moolenaar {
43204effc80aSBram Moolenaar     spell_expand_need_cap = check_need_cap(curwin->w_cursor.lnum, col);
43214effc80aSBram Moolenaar }
43224effc80aSBram Moolenaar 
43234effc80aSBram Moolenaar /*
43248b59de9fSBram Moolenaar  * Get list of spelling suggestions.
43258b59de9fSBram Moolenaar  * Used for Insert mode completion CTRL-X ?.
43268b59de9fSBram Moolenaar  * Returns the number of matches.  The matches are in "matchp[]", array of
43278b59de9fSBram Moolenaar  * allocated strings.
43288b59de9fSBram Moolenaar  */
43298b59de9fSBram Moolenaar     int
expand_spelling(linenr_T lnum UNUSED,char_u * pat,char_u *** matchp)4330764b23c8SBram Moolenaar expand_spelling(
4331764b23c8SBram Moolenaar     linenr_T	lnum UNUSED,
4332764b23c8SBram Moolenaar     char_u	*pat,
4333764b23c8SBram Moolenaar     char_u	***matchp)
43348b59de9fSBram Moolenaar {
43358b59de9fSBram Moolenaar     garray_T	ga;
43368b59de9fSBram Moolenaar 
43374770d09aSBram Moolenaar     spell_suggest_list(&ga, pat, 100, spell_expand_need_cap, TRUE);
43388b59de9fSBram Moolenaar     *matchp = ga.ga_data;
43398b59de9fSBram Moolenaar     return ga.ga_len;
43408b59de9fSBram Moolenaar }
43418b59de9fSBram Moolenaar 
4342e677df8dSBram Moolenaar /*
4343f154f3abSBram Moolenaar  * Return TRUE if "val" is a valid 'spelllang' value.
4344e677df8dSBram Moolenaar  */
4345e677df8dSBram Moolenaar     int
valid_spelllang(char_u * val)4346f154f3abSBram Moolenaar valid_spelllang(char_u *val)
4347e677df8dSBram Moolenaar {
4348e677df8dSBram Moolenaar     return valid_name(val, ".-_,@");
4349e677df8dSBram Moolenaar }
4350e677df8dSBram Moolenaar 
4351e677df8dSBram Moolenaar /*
4352e677df8dSBram Moolenaar  * Return TRUE if "val" is a valid 'spellfile' value.
4353e677df8dSBram Moolenaar  */
4354e677df8dSBram Moolenaar     int
valid_spellfile(char_u * val)4355e677df8dSBram Moolenaar valid_spellfile(char_u *val)
4356e677df8dSBram Moolenaar {
4357e677df8dSBram Moolenaar     char_u *s;
4358e677df8dSBram Moolenaar 
4359e677df8dSBram Moolenaar     for (s = val; *s != NUL; ++s)
4360b2620202SBram Moolenaar 	if (!vim_isfilec(*s) && *s != ',' && *s != ' ')
4361e677df8dSBram Moolenaar 	    return FALSE;
4362e677df8dSBram Moolenaar     return TRUE;
4363e677df8dSBram Moolenaar }
4364e677df8dSBram Moolenaar 
4365e677df8dSBram Moolenaar /*
4366e677df8dSBram Moolenaar  * Handle side effects of setting 'spell'.
4367e677df8dSBram Moolenaar  * Return an error message or NULL for success.
4368e677df8dSBram Moolenaar  */
4369e677df8dSBram Moolenaar     char *
did_set_spell_option(int is_spellfile)4370e677df8dSBram Moolenaar did_set_spell_option(int is_spellfile)
4371e677df8dSBram Moolenaar {
4372e677df8dSBram Moolenaar     char    *errmsg = NULL;
4373e677df8dSBram Moolenaar     win_T   *wp;
4374e677df8dSBram Moolenaar     int	    l;
4375e677df8dSBram Moolenaar 
4376e677df8dSBram Moolenaar     if (is_spellfile)
4377e677df8dSBram Moolenaar     {
4378e677df8dSBram Moolenaar 	l = (int)STRLEN(curwin->w_s->b_p_spf);
4379e677df8dSBram Moolenaar 	if (l > 0 && (l < 4
4380e677df8dSBram Moolenaar 			|| STRCMP(curwin->w_s->b_p_spf + l - 4, ".add") != 0))
4381e677df8dSBram Moolenaar 	    errmsg = e_invarg;
4382e677df8dSBram Moolenaar     }
4383e677df8dSBram Moolenaar 
4384e677df8dSBram Moolenaar     if (errmsg == NULL)
4385e677df8dSBram Moolenaar     {
4386e677df8dSBram Moolenaar 	FOR_ALL_WINDOWS(wp)
4387e677df8dSBram Moolenaar 	    if (wp->w_buffer == curbuf && wp->w_p_spell)
4388e677df8dSBram Moolenaar 	    {
4389e677df8dSBram Moolenaar 		errmsg = did_set_spelllang(wp);
4390e677df8dSBram Moolenaar 		break;
4391e677df8dSBram Moolenaar 	    }
4392e677df8dSBram Moolenaar     }
4393e677df8dSBram Moolenaar     return errmsg;
4394e677df8dSBram Moolenaar }
4395e677df8dSBram Moolenaar 
4396e677df8dSBram Moolenaar /*
4397e677df8dSBram Moolenaar  * Set curbuf->b_cap_prog to the regexp program for 'spellcapcheck'.
4398e677df8dSBram Moolenaar  * Return error message when failed, NULL when OK.
4399e677df8dSBram Moolenaar  */
4400e677df8dSBram Moolenaar     char *
compile_cap_prog(synblock_T * synblock)4401e677df8dSBram Moolenaar compile_cap_prog(synblock_T *synblock)
4402e677df8dSBram Moolenaar {
4403e677df8dSBram Moolenaar     regprog_T   *rp = synblock->b_cap_prog;
4404e677df8dSBram Moolenaar     char_u	*re;
4405e677df8dSBram Moolenaar 
440653efb185SBram Moolenaar     if (synblock->b_p_spc == NULL || *synblock->b_p_spc == NUL)
4407e677df8dSBram Moolenaar 	synblock->b_cap_prog = NULL;
4408e677df8dSBram Moolenaar     else
4409e677df8dSBram Moolenaar     {
4410e677df8dSBram Moolenaar 	// Prepend a ^ so that we only match at one column
4411e677df8dSBram Moolenaar 	re = concat_str((char_u *)"^", synblock->b_p_spc);
4412e677df8dSBram Moolenaar 	if (re != NULL)
4413e677df8dSBram Moolenaar 	{
4414e677df8dSBram Moolenaar 	    synblock->b_cap_prog = vim_regcomp(re, RE_MAGIC);
4415e677df8dSBram Moolenaar 	    vim_free(re);
4416e677df8dSBram Moolenaar 	    if (synblock->b_cap_prog == NULL)
4417e677df8dSBram Moolenaar 	    {
4418e677df8dSBram Moolenaar 		synblock->b_cap_prog = rp; // restore the previous program
4419e677df8dSBram Moolenaar 		return e_invarg;
4420e677df8dSBram Moolenaar 	    }
4421e677df8dSBram Moolenaar 	}
4422e677df8dSBram Moolenaar     }
4423e677df8dSBram Moolenaar 
4424e677df8dSBram Moolenaar     vim_regfree(rp);
4425e677df8dSBram Moolenaar     return NULL;
4426e677df8dSBram Moolenaar }
4427e677df8dSBram Moolenaar 
4428e677df8dSBram Moolenaar #endif  // FEAT_SPELL
4429