1edf3f97aSBram Moolenaar /* vi:set ts=8 sts=4 sw=4 noet:
2e19defe1SBram Moolenaar *
3e19defe1SBram Moolenaar * VIM - Vi IMproved by Bram Moolenaar
4e19defe1SBram Moolenaar *
5e19defe1SBram Moolenaar * Do ":help uganda" in Vim to read copying and usage conditions.
6e19defe1SBram Moolenaar * Do ":help credits" in Vim to see a list of people who contributed.
7e19defe1SBram Moolenaar * See README.txt for an overview of the Vim source code.
8e19defe1SBram Moolenaar */
9e19defe1SBram Moolenaar
10e19defe1SBram Moolenaar /*
11e19defe1SBram Moolenaar * spell.c: code for spell checking
12fc73515fSBram Moolenaar *
139ccfebddSBram Moolenaar * See spellfile.c for the Vim spell file format.
149ccfebddSBram Moolenaar *
1551485f06SBram Moolenaar * The spell checking mechanism uses a tree (aka trie). Each node in the tree
1651485f06SBram Moolenaar * has a list of bytes that can appear (siblings). For each byte there is a
1751485f06SBram Moolenaar * pointer to the node with the byte that follows in the word (child).
189f30f504SBram Moolenaar *
199f30f504SBram Moolenaar * A NUL byte is used where the word may end. The bytes are sorted, so that
209f30f504SBram Moolenaar * binary searching can be used and the NUL bytes are at the start. The
219f30f504SBram Moolenaar * number of possible bytes is stored before the list of bytes.
229f30f504SBram Moolenaar *
239f30f504SBram Moolenaar * The tree uses two arrays: "byts" stores the characters, "idxs" stores
249f30f504SBram Moolenaar * either the next index or flags. The tree starts at index 0. For example,
259f30f504SBram Moolenaar * to lookup "vi" this sequence is followed:
269f30f504SBram Moolenaar * i = 0
279f30f504SBram Moolenaar * len = byts[i]
289f30f504SBram Moolenaar * n = where "v" appears in byts[i + 1] to byts[i + len]
299f30f504SBram Moolenaar * i = idxs[n]
309f30f504SBram Moolenaar * len = byts[i]
319f30f504SBram Moolenaar * n = where "i" appears in byts[i + 1] to byts[i + len]
329f30f504SBram Moolenaar * i = idxs[n]
339f30f504SBram Moolenaar * len = byts[i]
349f30f504SBram Moolenaar * find that byts[i + 1] is 0, idxs[i + 1] has flags for "vi".
3551485f06SBram Moolenaar *
361d73c885SBram Moolenaar * There are two word trees: one with case-folded words and one with words in
3751485f06SBram Moolenaar * original case. The second one is only used for keep-case words and is
3851485f06SBram Moolenaar * usually small.
3951485f06SBram Moolenaar *
40ae5bce1cSBram Moolenaar * There is one additional tree for when not all prefixes are applied when
411d73c885SBram Moolenaar * generating the .spl file. This tree stores all the possible prefixes, as
421d73c885SBram Moolenaar * if they were words. At each word (prefix) end the prefix nr is stored, the
431d73c885SBram Moolenaar * following word must support this prefix nr. And the condition nr is
441d73c885SBram Moolenaar * stored, used to lookup the condition that the word must match with.
451d73c885SBram Moolenaar *
4651485f06SBram Moolenaar * Thanks to Olaf Seibert for providing an example implementation of this tree
4751485f06SBram Moolenaar * and the compression mechanism.
484770d09aSBram Moolenaar * LZ trie ideas:
494770d09aSBram Moolenaar * http://www.irb.hr/hr/home/ristov/papers/RistovLZtrieRevision1.pdf
504770d09aSBram Moolenaar * More papers: http://www-igm.univ-mlv.fr/~laporte/publi_en.html
5163d5a1e5SBram Moolenaar *
5263d5a1e5SBram Moolenaar * Matching involves checking the caps type: Onecap ALLCAP KeepCap.
5363d5a1e5SBram Moolenaar *
54402d2feaSBram Moolenaar * Why doesn't Vim use aspell/ispell/myspell/etc.?
55402d2feaSBram Moolenaar * See ":help develop-spell".
56402d2feaSBram Moolenaar */
57402d2feaSBram Moolenaar
589ccfebddSBram Moolenaar #define IN_SPELL_C
59e19defe1SBram Moolenaar #include "vim.h"
60e19defe1SBram Moolenaar
61f71a3db4SBram Moolenaar #if defined(FEAT_SPELL) || defined(PROTO)
62e19defe1SBram Moolenaar
630d6f5d97SBram Moolenaar #ifndef UNIX // it's in os_unix.h for Unix
640d6f5d97SBram Moolenaar # include <time.h> // for time_t
654770d09aSBram Moolenaar #endif
664770d09aSBram Moolenaar
670d6f5d97SBram Moolenaar #define REGION_ALL 0xff // word valid in all regions
68cfc6c43cSBram Moolenaar
690d6f5d97SBram Moolenaar // Result values. Lower number is accepted over higher one.
70cfc6c43cSBram Moolenaar #define SP_BANNED -1
71402d2feaSBram Moolenaar #define SP_OK 0
72cfc6c43cSBram Moolenaar #define SP_RARE 1
73cfc6c43cSBram Moolenaar #define SP_LOCAL 2
74cfc6c43cSBram Moolenaar #define SP_BAD 3
75402d2feaSBram Moolenaar
769ba0eb85SBram Moolenaar /*
77402d2feaSBram Moolenaar * Structure to store info for word matching.
78402d2feaSBram Moolenaar */
79402d2feaSBram Moolenaar typedef struct matchinf_S
80402d2feaSBram Moolenaar {
810d6f5d97SBram Moolenaar langp_T *mi_lp; // info for language and region
8263d5a1e5SBram Moolenaar
830d6f5d97SBram Moolenaar // pointers to original text to be checked
840d6f5d97SBram Moolenaar char_u *mi_word; // start of word being checked
850d6f5d97SBram Moolenaar char_u *mi_end; // end of matching word so far
860d6f5d97SBram Moolenaar char_u *mi_fend; // next char to be added to mi_fword
870d6f5d97SBram Moolenaar char_u *mi_cend; // char after what was used for
880d6f5d97SBram Moolenaar // mi_capflags
8963d5a1e5SBram Moolenaar
900d6f5d97SBram Moolenaar // case-folded text
910d6f5d97SBram Moolenaar char_u mi_fword[MAXWLEN + 1]; // mi_word case-folded
920d6f5d97SBram Moolenaar int mi_fwordlen; // nr of valid bytes in mi_fword
9363d5a1e5SBram Moolenaar
940d6f5d97SBram Moolenaar // for when checking word after a prefix
950d6f5d97SBram Moolenaar int mi_prefarridx; // index in sl_pidxs with list of
960d6f5d97SBram Moolenaar // affixID/condition
970d6f5d97SBram Moolenaar int mi_prefcnt; // number of entries at mi_prefarridx
980d6f5d97SBram Moolenaar int mi_prefixlen; // byte length of prefix
990d6f5d97SBram Moolenaar int mi_cprefixlen; // byte length of prefix in original
1000d6f5d97SBram Moolenaar // case
1011d73c885SBram Moolenaar
1020d6f5d97SBram Moolenaar // for when checking a compound word
1030d6f5d97SBram Moolenaar int mi_compoff; // start of following word offset
1040d6f5d97SBram Moolenaar char_u mi_compflags[MAXWLEN]; // flags for compound words used
1050d6f5d97SBram Moolenaar int mi_complen; // nr of compound words used
1060d6f5d97SBram Moolenaar int mi_compextra; // nr of COMPOUNDROOT words
107ae5bce1cSBram Moolenaar
1080d6f5d97SBram Moolenaar // others
1090d6f5d97SBram Moolenaar int mi_result; // result so far: SP_BAD, SP_OK, etc.
1100d6f5d97SBram Moolenaar int mi_capflags; // WF_ONECAP WF_ALLCAP WF_KEEPCAP
1110d6f5d97SBram Moolenaar win_T *mi_win; // buffer being checked
1127862282fSBram Moolenaar
1130d6f5d97SBram Moolenaar // for NOBREAK
1140d6f5d97SBram Moolenaar int mi_result2; // "mi_resul" without following word
1150d6f5d97SBram Moolenaar char_u *mi_end2; // "mi_end" without following word
116402d2feaSBram Moolenaar } matchinf_T;
117402d2feaSBram Moolenaar
118cfc6c43cSBram Moolenaar
119baaa7e9eSBram Moolenaar static int spell_mb_isword_class(int cl, win_T *wp);
120cfc6c43cSBram Moolenaar
1210d6f5d97SBram Moolenaar // mode values for find_word
1220d6f5d97SBram Moolenaar #define FIND_FOLDWORD 0 // find word case-folded
1230d6f5d97SBram Moolenaar #define FIND_KEEPWORD 1 // find keep-case word
1240d6f5d97SBram Moolenaar #define FIND_PREFIX 2 // find word after prefix
1250d6f5d97SBram Moolenaar #define FIND_COMPOUND 3 // find case-folded compound word
1260d6f5d97SBram Moolenaar #define FIND_KEEPCOMPOUND 4 // find keep-case compound word
1271d73c885SBram Moolenaar
128baaa7e9eSBram Moolenaar static void find_word(matchinf_T *mip, int mode);
129baaa7e9eSBram Moolenaar static void find_prefix(matchinf_T *mip, int mode);
130baaa7e9eSBram Moolenaar static int fold_more(matchinf_T *mip);
131baaa7e9eSBram Moolenaar static void spell_load_cb(char_u *fname, void *cookie);
132baaa7e9eSBram Moolenaar static int count_syllables(slang_T *slang, char_u *word);
133baaa7e9eSBram Moolenaar static void clear_midword(win_T *buf);
134baaa7e9eSBram Moolenaar static void use_midword(slang_T *lp, win_T *buf);
135baaa7e9eSBram Moolenaar static int find_region(char_u *rp, char_u *region);
136baaa7e9eSBram Moolenaar static void spell_soundfold_sofo(slang_T *slang, char_u *inword, char_u *res);
137baaa7e9eSBram Moolenaar static void spell_soundfold_sal(slang_T *slang, char_u *inword, char_u *res);
138baaa7e9eSBram Moolenaar static void spell_soundfold_wsal(slang_T *slang, char_u *inword, char_u *res);
139baaa7e9eSBram Moolenaar static void dump_word(slang_T *slang, char_u *word, char_u *pat, int *dir, int round, int flags, linenr_T lnum);
140baaa7e9eSBram Moolenaar static linenr_T dump_prefixes(slang_T *slang, char_u *word, char_u *pat, int *dir, int round, int flags, linenr_T startlnum);
1419ba0eb85SBram Moolenaar
142402d2feaSBram Moolenaar /*
143402d2feaSBram Moolenaar * Main spell-checking function.
14451485f06SBram Moolenaar * "ptr" points to a character that could be the start of a word.
145482aaeb0SBram Moolenaar * "*attrp" is set to the highlight index for a badly spelled word. For a
146482aaeb0SBram Moolenaar * non-word or when it's OK it remains unchanged.
147402d2feaSBram Moolenaar * This must only be called when 'spelllang' is not empty.
1489ba0eb85SBram Moolenaar *
149f9184a1dSBram Moolenaar * "capcol" is used to check for a Capitalised word after the end of a
150f9184a1dSBram Moolenaar * sentence. If it's zero then perform the check. Return the column where to
151f9184a1dSBram Moolenaar * check next, or -1 when no sentence end was found. If it's NULL then don't
152f9184a1dSBram Moolenaar * worry.
1539ba0eb85SBram Moolenaar *
154402d2feaSBram Moolenaar * Returns the length of the word in bytes, also when it's OK, so that the
155402d2feaSBram Moolenaar * caller can skip over the word.
156402d2feaSBram Moolenaar */
157402d2feaSBram Moolenaar int
spell_check(win_T * wp,char_u * ptr,hlf_T * attrp,int * capcol,int docount)158764b23c8SBram Moolenaar spell_check(
1590d6f5d97SBram Moolenaar win_T *wp, // current window
160764b23c8SBram Moolenaar char_u *ptr,
161764b23c8SBram Moolenaar hlf_T *attrp,
1620d6f5d97SBram Moolenaar int *capcol, // column to check for Capital
1630d6f5d97SBram Moolenaar int docount) // count good words
164402d2feaSBram Moolenaar {
1650d6f5d97SBram Moolenaar matchinf_T mi; // Most things are put in "mi" so that it can
1660d6f5d97SBram Moolenaar // be passed to functions quickly.
1670d6f5d97SBram Moolenaar int nrlen = 0; // found a number first
168f9184a1dSBram Moolenaar int c;
1695195e456SBram Moolenaar int wrongcaplen = 0;
170ac6e65f8SBram Moolenaar int lpi;
1714770d09aSBram Moolenaar int count_word = docount;
172e0ebeda4SBram Moolenaar int use_camel_case = *wp->w_s->b_p_spo != NUL;
173e0ebeda4SBram Moolenaar int camel_case = 0;
174402d2feaSBram Moolenaar
1750d6f5d97SBram Moolenaar // A word never starts at a space or a control character. Return quickly
1760d6f5d97SBram Moolenaar // then, skipping over the character.
177cfc6c43cSBram Moolenaar if (*ptr <= ' ')
178cfc6c43cSBram Moolenaar return 1;
179a226a6ddSBram Moolenaar
1800d6f5d97SBram Moolenaar // Return here when loading language files failed.
181860cae1cSBram Moolenaar if (wp->w_s->b_langp.ga_len == 0)
182a226a6ddSBram Moolenaar return 1;
183a226a6ddSBram Moolenaar
184a80faa89SBram Moolenaar CLEAR_FIELD(mi);
18551485f06SBram Moolenaar
1860d6f5d97SBram Moolenaar // A number is always OK. Also skip hexadecimal numbers 0xFF99 and
1870d6f5d97SBram Moolenaar // 0X99FF. But always do check spelling to find "3GPP" and "11
1880d6f5d97SBram Moolenaar // julifeest".
18951485f06SBram Moolenaar if (*ptr >= '0' && *ptr <= '9')
19051485f06SBram Moolenaar {
191887c1feaSBram Moolenaar if (*ptr == '0' && (ptr[1] == 'b' || ptr[1] == 'B'))
192887c1feaSBram Moolenaar mi.mi_end = skipbin(ptr + 2);
193887c1feaSBram Moolenaar else if (*ptr == '0' && (ptr[1] == 'x' || ptr[1] == 'X'))
1943982c541SBram Moolenaar mi.mi_end = skiphex(ptr + 2);
19551485f06SBram Moolenaar else
196d857f0e0SBram Moolenaar mi.mi_end = skipdigits(ptr);
197a93fa7eeSBram Moolenaar nrlen = (int)(mi.mi_end - ptr);
198d857f0e0SBram Moolenaar }
1999ba0eb85SBram Moolenaar
2000d6f5d97SBram Moolenaar // Find the normal end of the word (until the next non-word character).
2010c40586aSBram Moolenaar mi.mi_word = ptr;
20243abc521SBram Moolenaar mi.mi_fend = ptr;
203860cae1cSBram Moolenaar if (spell_iswordp(mi.mi_fend, wp))
20451485f06SBram Moolenaar {
205e0ebeda4SBram Moolenaar int prev_upper;
2062d4070d3SBram Moolenaar int this_upper = FALSE; // init for gcc
207e0ebeda4SBram Moolenaar
208e0ebeda4SBram Moolenaar if (use_camel_case)
209e0ebeda4SBram Moolenaar {
210e0ebeda4SBram Moolenaar c = PTR2CHAR(mi.mi_fend);
211e0ebeda4SBram Moolenaar this_upper = SPELL_ISUPPER(c);
212e0ebeda4SBram Moolenaar }
213e0ebeda4SBram Moolenaar
214402d2feaSBram Moolenaar do
215e0ebeda4SBram Moolenaar {
21691acfffcSBram Moolenaar MB_PTR_ADV(mi.mi_fend);
217e0ebeda4SBram Moolenaar if (use_camel_case)
218e0ebeda4SBram Moolenaar {
219e0ebeda4SBram Moolenaar prev_upper = this_upper;
220e0ebeda4SBram Moolenaar c = PTR2CHAR(mi.mi_fend);
221e0ebeda4SBram Moolenaar this_upper = SPELL_ISUPPER(c);
222e0ebeda4SBram Moolenaar camel_case = !prev_upper && this_upper;
223e0ebeda4SBram Moolenaar }
224e0ebeda4SBram Moolenaar } while (*mi.mi_fend != NUL && spell_iswordp(mi.mi_fend, wp)
225e0ebeda4SBram Moolenaar && !camel_case);
226f9184a1dSBram Moolenaar
227860cae1cSBram Moolenaar if (capcol != NULL && *capcol == 0 && wp->w_s->b_cap_prog != NULL)
228f9184a1dSBram Moolenaar {
2290d6f5d97SBram Moolenaar // Check word starting with capital letter.
23053805d1eSBram Moolenaar c = PTR2CHAR(ptr);
231f9184a1dSBram Moolenaar if (!SPELL_ISUPPER(c))
2325195e456SBram Moolenaar wrongcaplen = (int)(mi.mi_fend - ptr);
233f9184a1dSBram Moolenaar }
234f9184a1dSBram Moolenaar }
235f9184a1dSBram Moolenaar if (capcol != NULL)
236f9184a1dSBram Moolenaar *capcol = -1;
23751485f06SBram Moolenaar
2380d6f5d97SBram Moolenaar // We always use the characters up to the next non-word character,
2390d6f5d97SBram Moolenaar // also for bad words.
24051485f06SBram Moolenaar mi.mi_end = mi.mi_fend;
2419ba0eb85SBram Moolenaar
2420d6f5d97SBram Moolenaar // Check caps type later.
243860cae1cSBram Moolenaar mi.mi_capflags = 0;
244860cae1cSBram Moolenaar mi.mi_cend = NULL;
245860cae1cSBram Moolenaar mi.mi_win = wp;
24663d5a1e5SBram Moolenaar
2470d6f5d97SBram Moolenaar // case-fold the word with one non-word character, so that we can check
2480d6f5d97SBram Moolenaar // for the word end.
249cfc6c43cSBram Moolenaar if (*mi.mi_fend != NUL)
25091acfffcSBram Moolenaar MB_PTR_ADV(mi.mi_fend);
251cfc6c43cSBram Moolenaar
2524f135275SBram Moolenaar (void)spell_casefold(wp, ptr, (int)(mi.mi_fend - ptr), mi.mi_fword,
253cfc6c43cSBram Moolenaar MAXWLEN + 1);
254a93fa7eeSBram Moolenaar mi.mi_fwordlen = (int)STRLEN(mi.mi_fword);
255cfc6c43cSBram Moolenaar
256e0ebeda4SBram Moolenaar if (camel_case)
257e0ebeda4SBram Moolenaar // Introduce a fake word end space into the folded word.
258e0ebeda4SBram Moolenaar mi.mi_fword[mi.mi_fwordlen - 1] = ' ';
259e0ebeda4SBram Moolenaar
2600d6f5d97SBram Moolenaar // The word is bad unless we recognize it.
261402d2feaSBram Moolenaar mi.mi_result = SP_BAD;
2627862282fSBram Moolenaar mi.mi_result2 = SP_BAD;
263402d2feaSBram Moolenaar
264402d2feaSBram Moolenaar /*
265402d2feaSBram Moolenaar * Loop over the languages specified in 'spelllang'.
2664770d09aSBram Moolenaar * We check them all, because a word may be matched longer in another
2674770d09aSBram Moolenaar * language.
268402d2feaSBram Moolenaar */
269860cae1cSBram Moolenaar for (lpi = 0; lpi < wp->w_s->b_langp.ga_len; ++lpi)
270402d2feaSBram Moolenaar {
271860cae1cSBram Moolenaar mi.mi_lp = LANGP_ENTRY(wp->w_s->b_langp, lpi);
272ac6e65f8SBram Moolenaar
2730d6f5d97SBram Moolenaar // If reloading fails the language is still in the list but everything
2740d6f5d97SBram Moolenaar // has been cleared.
275ac6e65f8SBram Moolenaar if (mi.mi_lp->lp_slang->sl_fidxs == NULL)
276ac6e65f8SBram Moolenaar continue;
277ac6e65f8SBram Moolenaar
2780d6f5d97SBram Moolenaar // Check for a matching word in case-folded words.
2791d73c885SBram Moolenaar find_word(&mi, FIND_FOLDWORD);
28051485f06SBram Moolenaar
2810d6f5d97SBram Moolenaar // Check for a matching word in keep-case words.
2821d73c885SBram Moolenaar find_word(&mi, FIND_KEEPWORD);
2831d73c885SBram Moolenaar
2840d6f5d97SBram Moolenaar // Check for matching prefixes.
285d12a1326SBram Moolenaar find_prefix(&mi, FIND_FOLDWORD);
2867862282fSBram Moolenaar
2870d6f5d97SBram Moolenaar // For a NOBREAK language, may want to use a word without a following
2880d6f5d97SBram Moolenaar // word as a backup.
2897862282fSBram Moolenaar if (mi.mi_lp->lp_slang->sl_nobreak && mi.mi_result == SP_BAD
2907862282fSBram Moolenaar && mi.mi_result2 != SP_BAD)
2917862282fSBram Moolenaar {
2927862282fSBram Moolenaar mi.mi_result = mi.mi_result2;
2937862282fSBram Moolenaar mi.mi_end = mi.mi_end2;
2947862282fSBram Moolenaar }
2954770d09aSBram Moolenaar
2960d6f5d97SBram Moolenaar // Count the word in the first language where it's found to be OK.
2974770d09aSBram Moolenaar if (count_word && mi.mi_result == SP_OK)
2984770d09aSBram Moolenaar {
2994770d09aSBram Moolenaar count_common_word(mi.mi_lp->lp_slang, ptr,
3004770d09aSBram Moolenaar (int)(mi.mi_end - ptr), 1);
3014770d09aSBram Moolenaar count_word = FALSE;
3024770d09aSBram Moolenaar }
303402d2feaSBram Moolenaar }
304402d2feaSBram Moolenaar
305402d2feaSBram Moolenaar if (mi.mi_result != SP_OK)
306402d2feaSBram Moolenaar {
3070d6f5d97SBram Moolenaar // If we found a number skip over it. Allows for "42nd". Do flag
3080d6f5d97SBram Moolenaar // rare and local words, e.g., "3GPP".
309d857f0e0SBram Moolenaar if (nrlen > 0)
3100c40586aSBram Moolenaar {
3110c40586aSBram Moolenaar if (mi.mi_result == SP_BAD || mi.mi_result == SP_BANNED)
312d857f0e0SBram Moolenaar return nrlen;
3130c40586aSBram Moolenaar }
314d857f0e0SBram Moolenaar
3150d6f5d97SBram Moolenaar // When we are at a non-word character there is no error, just
3160d6f5d97SBram Moolenaar // skip over the character (try looking for a word after it).
317cc63c647SBram Moolenaar else if (!spell_iswordp_nmw(ptr, wp))
31851485f06SBram Moolenaar {
319860cae1cSBram Moolenaar if (capcol != NULL && wp->w_s->b_cap_prog != NULL)
320f9184a1dSBram Moolenaar {
321f9184a1dSBram Moolenaar regmatch_T regmatch;
322dffa5b8eSBram Moolenaar int r;
323f9184a1dSBram Moolenaar
3240d6f5d97SBram Moolenaar // Check for end of sentence.
325860cae1cSBram Moolenaar regmatch.regprog = wp->w_s->b_cap_prog;
326f9184a1dSBram Moolenaar regmatch.rm_ic = FALSE;
327dffa5b8eSBram Moolenaar r = vim_regexec(®match, ptr, 0);
328dffa5b8eSBram Moolenaar wp->w_s->b_cap_prog = regmatch.regprog;
329dffa5b8eSBram Moolenaar if (r)
330f9184a1dSBram Moolenaar *capcol = (int)(regmatch.endp[0] - ptr);
331f9184a1dSBram Moolenaar }
332f9184a1dSBram Moolenaar
33351485f06SBram Moolenaar if (has_mbyte)
3340fa313a7SBram Moolenaar return (*mb_ptr2len)(ptr);
33551485f06SBram Moolenaar return 1;
33651485f06SBram Moolenaar }
3375195e456SBram Moolenaar else if (mi.mi_end == ptr)
3380d6f5d97SBram Moolenaar // Always include at least one character. Required for when there
3390d6f5d97SBram Moolenaar // is a mixup in "midword".
34091acfffcSBram Moolenaar MB_PTR_ADV(mi.mi_end);
3417862282fSBram Moolenaar else if (mi.mi_result == SP_BAD
342860cae1cSBram Moolenaar && LANGP_ENTRY(wp->w_s->b_langp, 0)->lp_slang->sl_nobreak)
3437862282fSBram Moolenaar {
3447862282fSBram Moolenaar char_u *p, *fp;
3457862282fSBram Moolenaar int save_result = mi.mi_result;
3467862282fSBram Moolenaar
3470d6f5d97SBram Moolenaar // First language in 'spelllang' is NOBREAK. Find first position
3480d6f5d97SBram Moolenaar // at which any word would be valid.
349860cae1cSBram Moolenaar mi.mi_lp = LANGP_ENTRY(wp->w_s->b_langp, 0);
350ac6e65f8SBram Moolenaar if (mi.mi_lp->lp_slang->sl_fidxs != NULL)
351ac6e65f8SBram Moolenaar {
3527862282fSBram Moolenaar p = mi.mi_word;
3537862282fSBram Moolenaar fp = mi.mi_fword;
3547862282fSBram Moolenaar for (;;)
3557862282fSBram Moolenaar {
35691acfffcSBram Moolenaar MB_PTR_ADV(p);
35791acfffcSBram Moolenaar MB_PTR_ADV(fp);
3587862282fSBram Moolenaar if (p >= mi.mi_end)
3597862282fSBram Moolenaar break;
360a93fa7eeSBram Moolenaar mi.mi_compoff = (int)(fp - mi.mi_fword);
3617862282fSBram Moolenaar find_word(&mi, FIND_COMPOUND);
3627862282fSBram Moolenaar if (mi.mi_result != SP_BAD)
3637862282fSBram Moolenaar {
3647862282fSBram Moolenaar mi.mi_end = p;
3657862282fSBram Moolenaar break;
3667862282fSBram Moolenaar }
3677862282fSBram Moolenaar }
3687862282fSBram Moolenaar mi.mi_result = save_result;
3697862282fSBram Moolenaar }
370ac6e65f8SBram Moolenaar }
37151485f06SBram Moolenaar
372cfc6c43cSBram Moolenaar if (mi.mi_result == SP_BAD || mi.mi_result == SP_BANNED)
373482aaeb0SBram Moolenaar *attrp = HLF_SPB;
374402d2feaSBram Moolenaar else if (mi.mi_result == SP_RARE)
375482aaeb0SBram Moolenaar *attrp = HLF_SPR;
376402d2feaSBram Moolenaar else
377482aaeb0SBram Moolenaar *attrp = HLF_SPL;
378402d2feaSBram Moolenaar }
379402d2feaSBram Moolenaar
3805195e456SBram Moolenaar if (wrongcaplen > 0 && (mi.mi_result == SP_OK || mi.mi_result == SP_RARE))
3815195e456SBram Moolenaar {
3820d6f5d97SBram Moolenaar // Report SpellCap only when the word isn't badly spelled.
383482aaeb0SBram Moolenaar *attrp = HLF_SPC;
3845195e456SBram Moolenaar return wrongcaplen;
3855195e456SBram Moolenaar }
3865195e456SBram Moolenaar
38751485f06SBram Moolenaar return (int)(mi.mi_end - ptr);
388402d2feaSBram Moolenaar }
389402d2feaSBram Moolenaar
390402d2feaSBram Moolenaar /*
39151485f06SBram Moolenaar * Check if the word at "mip->mi_word" is in the tree.
3921d73c885SBram Moolenaar * When "mode" is FIND_FOLDWORD check in fold-case word tree.
3931d73c885SBram Moolenaar * When "mode" is FIND_KEEPWORD check in keep-case word tree.
3941d73c885SBram Moolenaar * When "mode" is FIND_PREFIX check for word after prefix in fold-case word
3951d73c885SBram Moolenaar * tree.
39663d5a1e5SBram Moolenaar *
39751485f06SBram Moolenaar * For a match mip->mi_result is updated.
39863d5a1e5SBram Moolenaar */
39963d5a1e5SBram Moolenaar static void
find_word(matchinf_T * mip,int mode)400764b23c8SBram Moolenaar find_word(matchinf_T *mip, int mode)
40163d5a1e5SBram Moolenaar {
4029f30f504SBram Moolenaar idx_T arridx = 0;
4030d6f5d97SBram Moolenaar int endlen[MAXWLEN]; // length at possible word endings
4040d6f5d97SBram Moolenaar idx_T endidx[MAXWLEN]; // possible word endings
40551485f06SBram Moolenaar int endidxcnt = 0;
406402d2feaSBram Moolenaar int len;
40751485f06SBram Moolenaar int wlen = 0;
40851485f06SBram Moolenaar int flen;
40951485f06SBram Moolenaar int c;
41051485f06SBram Moolenaar char_u *ptr;
4119f30f504SBram Moolenaar idx_T lo, hi, m;
41251485f06SBram Moolenaar char_u *s;
413e52325c2SBram Moolenaar char_u *p;
414cfc6c43cSBram Moolenaar int res = SP_BAD;
41551485f06SBram Moolenaar slang_T *slang = mip->mi_lp->lp_slang;
41651485f06SBram Moolenaar unsigned flags;
41751485f06SBram Moolenaar char_u *byts;
4189f30f504SBram Moolenaar idx_T *idxs;
419ae5bce1cSBram Moolenaar int word_ends;
420d12a1326SBram Moolenaar int prefix_found;
4217862282fSBram Moolenaar int nobreak_result;
42251485f06SBram Moolenaar
423ae5bce1cSBram Moolenaar if (mode == FIND_KEEPWORD || mode == FIND_KEEPCOMPOUND)
424402d2feaSBram Moolenaar {
4250d6f5d97SBram Moolenaar // Check for word with matching case in keep-case tree.
42651485f06SBram Moolenaar ptr = mip->mi_word;
4270d6f5d97SBram Moolenaar flen = 9999; // no case folding, always enough bytes
42851485f06SBram Moolenaar byts = slang->sl_kbyts;
42951485f06SBram Moolenaar idxs = slang->sl_kidxs;
430ae5bce1cSBram Moolenaar
431ae5bce1cSBram Moolenaar if (mode == FIND_KEEPCOMPOUND)
4320d6f5d97SBram Moolenaar // Skip over the previously found word(s).
433ae5bce1cSBram Moolenaar wlen += mip->mi_compoff;
43451485f06SBram Moolenaar }
435402d2feaSBram Moolenaar else
436402d2feaSBram Moolenaar {
4370d6f5d97SBram Moolenaar // Check for case-folded in case-folded tree.
43851485f06SBram Moolenaar ptr = mip->mi_fword;
4390d6f5d97SBram Moolenaar flen = mip->mi_fwordlen; // available case-folded bytes
44051485f06SBram Moolenaar byts = slang->sl_fbyts;
44151485f06SBram Moolenaar idxs = slang->sl_fidxs;
4421d73c885SBram Moolenaar
4431d73c885SBram Moolenaar if (mode == FIND_PREFIX)
4441d73c885SBram Moolenaar {
4450d6f5d97SBram Moolenaar // Skip over the prefix.
4461d73c885SBram Moolenaar wlen = mip->mi_prefixlen;
4471d73c885SBram Moolenaar flen -= mip->mi_prefixlen;
4481d73c885SBram Moolenaar }
449ae5bce1cSBram Moolenaar else if (mode == FIND_COMPOUND)
450ae5bce1cSBram Moolenaar {
4510d6f5d97SBram Moolenaar // Skip over the previously found word(s).
452ae5bce1cSBram Moolenaar wlen = mip->mi_compoff;
453ae5bce1cSBram Moolenaar flen -= mip->mi_compoff;
454ae5bce1cSBram Moolenaar }
455ae5bce1cSBram Moolenaar
45651485f06SBram Moolenaar }
45751485f06SBram Moolenaar
45851485f06SBram Moolenaar if (byts == NULL)
4590d6f5d97SBram Moolenaar return; // array is empty
46051485f06SBram Moolenaar
46151485f06SBram Moolenaar /*
462cfc6c43cSBram Moolenaar * Repeat advancing in the tree until:
463cfc6c43cSBram Moolenaar * - there is a byte that doesn't match,
464cfc6c43cSBram Moolenaar * - we reach the end of the tree,
465cfc6c43cSBram Moolenaar * - or we reach the end of the line.
46651485f06SBram Moolenaar */
46751485f06SBram Moolenaar for (;;)
46851485f06SBram Moolenaar {
4690c40586aSBram Moolenaar if (flen <= 0 && *mip->mi_fend != NUL)
4701d73c885SBram Moolenaar flen = fold_more(mip);
47151485f06SBram Moolenaar
47251485f06SBram Moolenaar len = byts[arridx++];
47351485f06SBram Moolenaar
4740d6f5d97SBram Moolenaar // If the first possible byte is a zero the word could end here.
4750d6f5d97SBram Moolenaar // Remember this index, we first check for the longest word.
47651485f06SBram Moolenaar if (byts[arridx] == 0)
477402d2feaSBram Moolenaar {
478cfc6c43cSBram Moolenaar if (endidxcnt == MAXWLEN)
479cfc6c43cSBram Moolenaar {
4800d6f5d97SBram Moolenaar // Must be a corrupted spell file.
481f9e3e09fSBram Moolenaar emsg(_(e_format));
482cfc6c43cSBram Moolenaar return;
483cfc6c43cSBram Moolenaar }
48451485f06SBram Moolenaar endlen[endidxcnt] = wlen;
48551485f06SBram Moolenaar endidx[endidxcnt++] = arridx++;
48651485f06SBram Moolenaar --len;
48751485f06SBram Moolenaar
4880d6f5d97SBram Moolenaar // Skip over the zeros, there can be several flag/region
4890d6f5d97SBram Moolenaar // combinations.
49051485f06SBram Moolenaar while (len > 0 && byts[arridx] == 0)
491402d2feaSBram Moolenaar {
49251485f06SBram Moolenaar ++arridx;
49351485f06SBram Moolenaar --len;
49451485f06SBram Moolenaar }
49551485f06SBram Moolenaar if (len == 0)
4960d6f5d97SBram Moolenaar break; // no children, word must end here
49751485f06SBram Moolenaar }
49851485f06SBram Moolenaar
4990d6f5d97SBram Moolenaar // Stop looking at end of the line.
50051485f06SBram Moolenaar if (ptr[wlen] == NUL)
50151485f06SBram Moolenaar break;
50251485f06SBram Moolenaar
5030d6f5d97SBram Moolenaar // Perform a binary search in the list of accepted bytes.
50451485f06SBram Moolenaar c = ptr[wlen];
5050d6f5d97SBram Moolenaar if (c == TAB) // <Tab> is handled like <Space>
5060c40586aSBram Moolenaar c = ' ';
50751485f06SBram Moolenaar lo = arridx;
50851485f06SBram Moolenaar hi = arridx + len - 1;
50951485f06SBram Moolenaar while (lo < hi)
51051485f06SBram Moolenaar {
51151485f06SBram Moolenaar m = (lo + hi) / 2;
51251485f06SBram Moolenaar if (byts[m] > c)
51351485f06SBram Moolenaar hi = m - 1;
51451485f06SBram Moolenaar else if (byts[m] < c)
51551485f06SBram Moolenaar lo = m + 1;
51651485f06SBram Moolenaar else
51751485f06SBram Moolenaar {
51851485f06SBram Moolenaar lo = hi = m;
519402d2feaSBram Moolenaar break;
520402d2feaSBram Moolenaar }
521402d2feaSBram Moolenaar }
52251485f06SBram Moolenaar
5230d6f5d97SBram Moolenaar // Stop if there is no matching byte.
52451485f06SBram Moolenaar if (hi < lo || byts[lo] != c)
52551485f06SBram Moolenaar break;
52651485f06SBram Moolenaar
5270d6f5d97SBram Moolenaar // Continue at the child (if there is one).
52851485f06SBram Moolenaar arridx = idxs[lo];
52951485f06SBram Moolenaar ++wlen;
53051485f06SBram Moolenaar --flen;
5310c40586aSBram Moolenaar
5320d6f5d97SBram Moolenaar // One space in the good word may stand for several spaces in the
5330d6f5d97SBram Moolenaar // checked word.
5340c40586aSBram Moolenaar if (c == ' ')
5350c40586aSBram Moolenaar {
5360c40586aSBram Moolenaar for (;;)
5370c40586aSBram Moolenaar {
5380c40586aSBram Moolenaar if (flen <= 0 && *mip->mi_fend != NUL)
5390c40586aSBram Moolenaar flen = fold_more(mip);
5400c40586aSBram Moolenaar if (ptr[wlen] != ' ' && ptr[wlen] != TAB)
5410c40586aSBram Moolenaar break;
5420c40586aSBram Moolenaar ++wlen;
5430c40586aSBram Moolenaar --flen;
5440c40586aSBram Moolenaar }
5450c40586aSBram Moolenaar }
54651485f06SBram Moolenaar }
54751485f06SBram Moolenaar
54851485f06SBram Moolenaar /*
54951485f06SBram Moolenaar * Verify that one of the possible endings is valid. Try the longest
55051485f06SBram Moolenaar * first.
55151485f06SBram Moolenaar */
55251485f06SBram Moolenaar while (endidxcnt > 0)
55351485f06SBram Moolenaar {
55451485f06SBram Moolenaar --endidxcnt;
55551485f06SBram Moolenaar arridx = endidx[endidxcnt];
55651485f06SBram Moolenaar wlen = endlen[endidxcnt];
55751485f06SBram Moolenaar
55851485f06SBram Moolenaar if ((*mb_head_off)(ptr, ptr + wlen) > 0)
5590d6f5d97SBram Moolenaar continue; // not at first byte of character
560860cae1cSBram Moolenaar if (spell_iswordp(ptr + wlen, mip->mi_win))
561ae5bce1cSBram Moolenaar {
5627862282fSBram Moolenaar if (slang->sl_compprog == NULL && !slang->sl_nobreak)
5630d6f5d97SBram Moolenaar continue; // next char is a word character
564ae5bce1cSBram Moolenaar word_ends = FALSE;
565ae5bce1cSBram Moolenaar }
566ae5bce1cSBram Moolenaar else
567ae5bce1cSBram Moolenaar word_ends = TRUE;
5680d6f5d97SBram Moolenaar // The prefix flag is before compound flags. Once a valid prefix flag
5690d6f5d97SBram Moolenaar // has been found we try compound flags.
570d12a1326SBram Moolenaar prefix_found = FALSE;
57151485f06SBram Moolenaar
5721d73c885SBram Moolenaar if (mode != FIND_KEEPWORD && has_mbyte)
57351485f06SBram Moolenaar {
5740d6f5d97SBram Moolenaar // Compute byte length in original word, length may change
5750d6f5d97SBram Moolenaar // when folding case. This can be slow, take a shortcut when the
5760d6f5d97SBram Moolenaar // case-folded word is equal to the keep-case word.
57751485f06SBram Moolenaar p = mip->mi_word;
5781d73c885SBram Moolenaar if (STRNCMP(ptr, p, wlen) != 0)
5791d73c885SBram Moolenaar {
58091acfffcSBram Moolenaar for (s = ptr; s < ptr + wlen; MB_PTR_ADV(s))
58191acfffcSBram Moolenaar MB_PTR_ADV(p);
582a93fa7eeSBram Moolenaar wlen = (int)(p - mip->mi_word);
58351485f06SBram Moolenaar }
5841d73c885SBram Moolenaar }
58551485f06SBram Moolenaar
5860d6f5d97SBram Moolenaar // Check flags and region. For FIND_PREFIX check the condition and
5870d6f5d97SBram Moolenaar // prefix ID.
5880d6f5d97SBram Moolenaar // Repeat this if there are more flags/region alternatives until there
5890d6f5d97SBram Moolenaar // is a match.
5901d73c885SBram Moolenaar res = SP_BAD;
5911d73c885SBram Moolenaar for (len = byts[arridx - 1]; len > 0 && byts[arridx] == 0;
5921d73c885SBram Moolenaar --len, ++arridx)
59351485f06SBram Moolenaar {
59451485f06SBram Moolenaar flags = idxs[arridx];
5959f30f504SBram Moolenaar
5960d6f5d97SBram Moolenaar // For the fold-case tree check that the case of the checked word
5970d6f5d97SBram Moolenaar // matches with what the word in the tree requires.
5980d6f5d97SBram Moolenaar // For keep-case tree the case is always right. For prefixes we
5990d6f5d97SBram Moolenaar // don't bother to check.
6001d73c885SBram Moolenaar if (mode == FIND_FOLDWORD)
60151485f06SBram Moolenaar {
60251485f06SBram Moolenaar if (mip->mi_cend != mip->mi_word + wlen)
603402d2feaSBram Moolenaar {
6040d6f5d97SBram Moolenaar // mi_capflags was set for a different word length, need
6050d6f5d97SBram Moolenaar // to do it again.
60651485f06SBram Moolenaar mip->mi_cend = mip->mi_word + wlen;
6079ba0eb85SBram Moolenaar mip->mi_capflags = captype(mip->mi_word, mip->mi_cend);
60851485f06SBram Moolenaar }
60951485f06SBram Moolenaar
6100c40586aSBram Moolenaar if (mip->mi_capflags == WF_KEEPCAP
6110c40586aSBram Moolenaar || !spell_valid_case(mip->mi_capflags, flags))
6121d73c885SBram Moolenaar continue;
61351485f06SBram Moolenaar }
61451485f06SBram Moolenaar
6150d6f5d97SBram Moolenaar // When mode is FIND_PREFIX the word must support the prefix:
6160d6f5d97SBram Moolenaar // check the prefix ID and the condition. Do that for the list at
6170d6f5d97SBram Moolenaar // mip->mi_prefarridx that find_prefix() filled.
618d12a1326SBram Moolenaar else if (mode == FIND_PREFIX && !prefix_found)
619402d2feaSBram Moolenaar {
620cf6bf39fSBram Moolenaar c = valid_word_prefix(mip->mi_prefcnt, mip->mi_prefarridx,
621dfb9ac00SBram Moolenaar flags,
62253805d1eSBram Moolenaar mip->mi_word + mip->mi_cprefixlen, slang,
62353805d1eSBram Moolenaar FALSE);
624cf6bf39fSBram Moolenaar if (c == 0)
6251d73c885SBram Moolenaar continue;
626cf6bf39fSBram Moolenaar
6270d6f5d97SBram Moolenaar // Use the WF_RARE flag for a rare prefix.
628cf6bf39fSBram Moolenaar if (c & WF_RAREPFX)
629cf6bf39fSBram Moolenaar flags |= WF_RARE;
630d12a1326SBram Moolenaar prefix_found = TRUE;
6311d73c885SBram Moolenaar }
6321d73c885SBram Moolenaar
6337862282fSBram Moolenaar if (slang->sl_nobreak)
6347862282fSBram Moolenaar {
6357862282fSBram Moolenaar if ((mode == FIND_COMPOUND || mode == FIND_KEEPCOMPOUND)
6367862282fSBram Moolenaar && (flags & WF_BANNED) == 0)
6377862282fSBram Moolenaar {
6380d6f5d97SBram Moolenaar // NOBREAK: found a valid following word. That's all we
6390d6f5d97SBram Moolenaar // need to know, so return.
6407862282fSBram Moolenaar mip->mi_result = SP_OK;
6417862282fSBram Moolenaar break;
6427862282fSBram Moolenaar }
6437862282fSBram Moolenaar }
6447862282fSBram Moolenaar
6457862282fSBram Moolenaar else if ((mode == FIND_COMPOUND || mode == FIND_KEEPCOMPOUND
6467862282fSBram Moolenaar || !word_ends))
647ae5bce1cSBram Moolenaar {
6480d6f5d97SBram Moolenaar // If there is no compound flag or the word is shorter than
6490d6f5d97SBram Moolenaar // COMPOUNDMIN reject it quickly.
6500d6f5d97SBram Moolenaar // Makes you wonder why someone puts a compound flag on a word
6510d6f5d97SBram Moolenaar // that's too short... Myspell compatibility requires this
6520d6f5d97SBram Moolenaar // anyway.
653e52325c2SBram Moolenaar if (((unsigned)flags >> 24) == 0
654e52325c2SBram Moolenaar || wlen - mip->mi_compoff < slang->sl_compminlen)
655ae5bce1cSBram Moolenaar continue;
6560d6f5d97SBram Moolenaar // For multi-byte chars check character length against
6570d6f5d97SBram Moolenaar // COMPOUNDMIN.
658ac6e65f8SBram Moolenaar if (has_mbyte
659da2303d9SBram Moolenaar && slang->sl_compminlen > 0
660ac6e65f8SBram Moolenaar && mb_charlen_len(mip->mi_word + mip->mi_compoff,
661ac6e65f8SBram Moolenaar wlen - mip->mi_compoff) < slang->sl_compminlen)
662ac6e65f8SBram Moolenaar continue;
663ae5bce1cSBram Moolenaar
6640d6f5d97SBram Moolenaar // Limit the number of compound words to COMPOUNDWORDMAX if no
6650d6f5d97SBram Moolenaar // maximum for syllables is specified.
666899dddf8SBram Moolenaar if (!word_ends && mip->mi_complen + mip->mi_compextra + 2
667899dddf8SBram Moolenaar > slang->sl_compmax
668e52325c2SBram Moolenaar && slang->sl_compsylmax == MAXWLEN)
669ae5bce1cSBram Moolenaar continue;
6705195e456SBram Moolenaar
6710d6f5d97SBram Moolenaar // Don't allow compounding on a side where an affix was added,
6720d6f5d97SBram Moolenaar // unless COMPOUNDPERMITFLAG was used.
673910f66f9SBram Moolenaar if (mip->mi_complen > 0 && (flags & WF_NOCOMPBEF))
674910f66f9SBram Moolenaar continue;
675910f66f9SBram Moolenaar if (!word_ends && (flags & WF_NOCOMPAFT))
676910f66f9SBram Moolenaar continue;
677910f66f9SBram Moolenaar
6780d6f5d97SBram Moolenaar // Quickly check if compounding is possible with this flag.
6796de6853cSBram Moolenaar if (!byte_in_str(mip->mi_complen == 0
680d12a1326SBram Moolenaar ? slang->sl_compstartflags
681d12a1326SBram Moolenaar : slang->sl_compallflags,
6826de6853cSBram Moolenaar ((unsigned)flags >> 24)))
6835195e456SBram Moolenaar continue;
6845195e456SBram Moolenaar
6850d6f5d97SBram Moolenaar // If there is a match with a CHECKCOMPOUNDPATTERN rule
6860d6f5d97SBram Moolenaar // discard the compound word.
6879f94b05bSBram Moolenaar if (match_checkcompoundpattern(ptr, wlen, &slang->sl_comppat))
6889f94b05bSBram Moolenaar continue;
6899f94b05bSBram Moolenaar
690e52325c2SBram Moolenaar if (mode == FIND_COMPOUND)
691e52325c2SBram Moolenaar {
692e52325c2SBram Moolenaar int capflags;
693e52325c2SBram Moolenaar
6940d6f5d97SBram Moolenaar // Need to check the caps type of the appended compound
6950d6f5d97SBram Moolenaar // word.
696e52325c2SBram Moolenaar if (has_mbyte && STRNCMP(ptr, mip->mi_word,
697e52325c2SBram Moolenaar mip->mi_compoff) != 0)
698e52325c2SBram Moolenaar {
6990d6f5d97SBram Moolenaar // case folding may have changed the length
700e52325c2SBram Moolenaar p = mip->mi_word;
70191acfffcSBram Moolenaar for (s = ptr; s < ptr + mip->mi_compoff; MB_PTR_ADV(s))
70291acfffcSBram Moolenaar MB_PTR_ADV(p);
703e52325c2SBram Moolenaar }
704e52325c2SBram Moolenaar else
705e52325c2SBram Moolenaar p = mip->mi_word + mip->mi_compoff;
706e52325c2SBram Moolenaar capflags = captype(p, mip->mi_word + wlen);
707e52325c2SBram Moolenaar if (capflags == WF_KEEPCAP || (capflags == WF_ALLCAP
708e52325c2SBram Moolenaar && (flags & WF_FIXCAP) != 0))
709e52325c2SBram Moolenaar continue;
710e52325c2SBram Moolenaar
711e52325c2SBram Moolenaar if (capflags != WF_ALLCAP)
712e52325c2SBram Moolenaar {
7130d6f5d97SBram Moolenaar // When the character before the word is a word
7140d6f5d97SBram Moolenaar // character we do not accept a Onecap word. We do
7150d6f5d97SBram Moolenaar // accept a no-caps word, even when the dictionary
7160d6f5d97SBram Moolenaar // word specifies ONECAP.
71791acfffcSBram Moolenaar MB_PTR_BACK(mip->mi_word, p);
718cc63c647SBram Moolenaar if (spell_iswordp_nmw(p, mip->mi_win)
719e52325c2SBram Moolenaar ? capflags == WF_ONECAP
720e52325c2SBram Moolenaar : (flags & WF_ONECAP) != 0
721e52325c2SBram Moolenaar && capflags != WF_ONECAP)
722e52325c2SBram Moolenaar continue;
723e52325c2SBram Moolenaar }
724e52325c2SBram Moolenaar }
725e52325c2SBram Moolenaar
7260d6f5d97SBram Moolenaar // If the word ends the sequence of compound flags of the
7270d6f5d97SBram Moolenaar // words must match with one of the COMPOUNDRULE items and
7280d6f5d97SBram Moolenaar // the number of syllables must not be too large.
7295195e456SBram Moolenaar mip->mi_compflags[mip->mi_complen] = ((unsigned)flags >> 24);
7305195e456SBram Moolenaar mip->mi_compflags[mip->mi_complen + 1] = NUL;
7315195e456SBram Moolenaar if (word_ends)
7325195e456SBram Moolenaar {
7335195e456SBram Moolenaar char_u fword[MAXWLEN];
7345195e456SBram Moolenaar
7355195e456SBram Moolenaar if (slang->sl_compsylmax < MAXWLEN)
7365195e456SBram Moolenaar {
7370d6f5d97SBram Moolenaar // "fword" is only needed for checking syllables.
7385195e456SBram Moolenaar if (ptr == mip->mi_word)
7394f135275SBram Moolenaar (void)spell_casefold(mip->mi_win,
7404f135275SBram Moolenaar ptr, wlen, fword, MAXWLEN);
7415195e456SBram Moolenaar else
7425195e456SBram Moolenaar vim_strncpy(fword, ptr, endlen[endidxcnt]);
7435195e456SBram Moolenaar }
7445195e456SBram Moolenaar if (!can_compound(slang, fword, mip->mi_compflags))
7455195e456SBram Moolenaar continue;
7465195e456SBram Moolenaar }
7479f94b05bSBram Moolenaar else if (slang->sl_comprules != NULL
7489f94b05bSBram Moolenaar && !match_compoundrule(slang, mip->mi_compflags))
7490d6f5d97SBram Moolenaar // The compound flags collected so far do not match any
7500d6f5d97SBram Moolenaar // COMPOUNDRULE, discard the compounded word.
7519f94b05bSBram Moolenaar continue;
752ae5bce1cSBram Moolenaar }
753ae5bce1cSBram Moolenaar
7540d6f5d97SBram Moolenaar // Check NEEDCOMPOUND: can't use word without compounding.
755ac6e65f8SBram Moolenaar else if (flags & WF_NEEDCOMP)
756ac6e65f8SBram Moolenaar continue;
757ac6e65f8SBram Moolenaar
7587862282fSBram Moolenaar nobreak_result = SP_OK;
7597862282fSBram Moolenaar
760ae5bce1cSBram Moolenaar if (!word_ends)
761ae5bce1cSBram Moolenaar {
7627862282fSBram Moolenaar int save_result = mip->mi_result;
7637862282fSBram Moolenaar char_u *save_end = mip->mi_end;
764da2303d9SBram Moolenaar langp_T *save_lp = mip->mi_lp;
765da2303d9SBram Moolenaar int lpi;
7667862282fSBram Moolenaar
7670d6f5d97SBram Moolenaar // Check that a valid word follows. If there is one and we
7680d6f5d97SBram Moolenaar // are compounding, it will set "mi_result", thus we are
7690d6f5d97SBram Moolenaar // always finished here. For NOBREAK we only check that a
7700d6f5d97SBram Moolenaar // valid word follows.
7710d6f5d97SBram Moolenaar // Recursive!
7727862282fSBram Moolenaar if (slang->sl_nobreak)
7737862282fSBram Moolenaar mip->mi_result = SP_BAD;
774ae5bce1cSBram Moolenaar
7750d6f5d97SBram Moolenaar // Find following word in case-folded tree.
776ae5bce1cSBram Moolenaar mip->mi_compoff = endlen[endidxcnt];
777ae5bce1cSBram Moolenaar if (has_mbyte && mode == FIND_KEEPWORD)
778ae5bce1cSBram Moolenaar {
7790d6f5d97SBram Moolenaar // Compute byte length in case-folded word from "wlen":
7800d6f5d97SBram Moolenaar // byte length in keep-case word. Length may change when
7810d6f5d97SBram Moolenaar // folding case. This can be slow, take a shortcut when
7820d6f5d97SBram Moolenaar // the case-folded word is equal to the keep-case word.
783ae5bce1cSBram Moolenaar p = mip->mi_fword;
784ae5bce1cSBram Moolenaar if (STRNCMP(ptr, p, wlen) != 0)
785ae5bce1cSBram Moolenaar {
78691acfffcSBram Moolenaar for (s = ptr; s < ptr + wlen; MB_PTR_ADV(s))
78791acfffcSBram Moolenaar MB_PTR_ADV(p);
788a93fa7eeSBram Moolenaar mip->mi_compoff = (int)(p - mip->mi_fword);
789ae5bce1cSBram Moolenaar }
790ae5bce1cSBram Moolenaar }
7910d6f5d97SBram Moolenaar #if 0 // Disabled, see below
792d12a1326SBram Moolenaar c = mip->mi_compoff;
793ba534351SBram Moolenaar #endif
7945195e456SBram Moolenaar ++mip->mi_complen;
795899dddf8SBram Moolenaar if (flags & WF_COMPROOT)
796899dddf8SBram Moolenaar ++mip->mi_compextra;
797da2303d9SBram Moolenaar
7980d6f5d97SBram Moolenaar // For NOBREAK we need to try all NOBREAK languages, at least
7990d6f5d97SBram Moolenaar // to find the ".add" file(s).
800860cae1cSBram Moolenaar for (lpi = 0; lpi < mip->mi_win->w_s->b_langp.ga_len; ++lpi)
801da2303d9SBram Moolenaar {
802da2303d9SBram Moolenaar if (slang->sl_nobreak)
803da2303d9SBram Moolenaar {
804860cae1cSBram Moolenaar mip->mi_lp = LANGP_ENTRY(mip->mi_win->w_s->b_langp, lpi);
805da2303d9SBram Moolenaar if (mip->mi_lp->lp_slang->sl_fidxs == NULL
806da2303d9SBram Moolenaar || !mip->mi_lp->lp_slang->sl_nobreak)
807da2303d9SBram Moolenaar continue;
808da2303d9SBram Moolenaar }
809da2303d9SBram Moolenaar
810ae5bce1cSBram Moolenaar find_word(mip, FIND_COMPOUND);
811ae5bce1cSBram Moolenaar
8120d6f5d97SBram Moolenaar // When NOBREAK any word that matches is OK. Otherwise we
8130d6f5d97SBram Moolenaar // need to find the longest match, thus try with keep-case
8140d6f5d97SBram Moolenaar // and prefix too.
8157862282fSBram Moolenaar if (!slang->sl_nobreak || mip->mi_result == SP_BAD)
8167862282fSBram Moolenaar {
8170d6f5d97SBram Moolenaar // Find following word in keep-case tree.
818ae5bce1cSBram Moolenaar mip->mi_compoff = wlen;
819ae5bce1cSBram Moolenaar find_word(mip, FIND_KEEPCOMPOUND);
820d12a1326SBram Moolenaar
8210d6f5d97SBram Moolenaar #if 0 // Disabled, a prefix must not appear halfway a compound word,
8220d6f5d97SBram Moolenaar // unless the COMPOUNDPERMITFLAG is used and then it can't be a
8230d6f5d97SBram Moolenaar // postponed prefix.
8247862282fSBram Moolenaar if (!slang->sl_nobreak || mip->mi_result == SP_BAD)
8257862282fSBram Moolenaar {
8260d6f5d97SBram Moolenaar // Check for following word with prefix.
827d12a1326SBram Moolenaar mip->mi_compoff = c;
828d12a1326SBram Moolenaar find_prefix(mip, FIND_COMPOUND);
8297862282fSBram Moolenaar }
830910f66f9SBram Moolenaar #endif
8317862282fSBram Moolenaar }
832da2303d9SBram Moolenaar
833da2303d9SBram Moolenaar if (!slang->sl_nobreak)
834da2303d9SBram Moolenaar break;
835da2303d9SBram Moolenaar }
8365195e456SBram Moolenaar --mip->mi_complen;
837899dddf8SBram Moolenaar if (flags & WF_COMPROOT)
838899dddf8SBram Moolenaar --mip->mi_compextra;
839da2303d9SBram Moolenaar mip->mi_lp = save_lp;
840d12a1326SBram Moolenaar
8417862282fSBram Moolenaar if (slang->sl_nobreak)
8427862282fSBram Moolenaar {
8437862282fSBram Moolenaar nobreak_result = mip->mi_result;
8447862282fSBram Moolenaar mip->mi_result = save_result;
8457862282fSBram Moolenaar mip->mi_end = save_end;
8467862282fSBram Moolenaar }
8477862282fSBram Moolenaar else
8487862282fSBram Moolenaar {
849ae5bce1cSBram Moolenaar if (mip->mi_result == SP_OK)
850ae5bce1cSBram Moolenaar break;
851ae5bce1cSBram Moolenaar continue;
852ae5bce1cSBram Moolenaar }
8537862282fSBram Moolenaar }
854ae5bce1cSBram Moolenaar
855cfc6c43cSBram Moolenaar if (flags & WF_BANNED)
856cfc6c43cSBram Moolenaar res = SP_BANNED;
857cfc6c43cSBram Moolenaar else if (flags & WF_REGION)
85851485f06SBram Moolenaar {
8590d6f5d97SBram Moolenaar // Check region.
860dfb9ac00SBram Moolenaar if ((mip->mi_lp->lp_region & (flags >> 16)) != 0)
86151485f06SBram Moolenaar res = SP_OK;
86251485f06SBram Moolenaar else
86351485f06SBram Moolenaar res = SP_LOCAL;
86451485f06SBram Moolenaar }
86551485f06SBram Moolenaar else if (flags & WF_RARE)
86651485f06SBram Moolenaar res = SP_RARE;
86751485f06SBram Moolenaar else
86851485f06SBram Moolenaar res = SP_OK;
869cfc6c43cSBram Moolenaar
8700d6f5d97SBram Moolenaar // Always use the longest match and the best result. For NOBREAK
8710d6f5d97SBram Moolenaar // we separately keep the longest match without a following good
8720d6f5d97SBram Moolenaar // word as a fall-back.
8737862282fSBram Moolenaar if (nobreak_result == SP_BAD)
8747862282fSBram Moolenaar {
8757862282fSBram Moolenaar if (mip->mi_result2 > res)
8767862282fSBram Moolenaar {
8777862282fSBram Moolenaar mip->mi_result2 = res;
8787862282fSBram Moolenaar mip->mi_end2 = mip->mi_word + wlen;
8797862282fSBram Moolenaar }
8807862282fSBram Moolenaar else if (mip->mi_result2 == res
8817862282fSBram Moolenaar && mip->mi_end2 < mip->mi_word + wlen)
8827862282fSBram Moolenaar mip->mi_end2 = mip->mi_word + wlen;
8837862282fSBram Moolenaar }
8847862282fSBram Moolenaar else if (mip->mi_result > res)
885cfc6c43cSBram Moolenaar {
886cfc6c43cSBram Moolenaar mip->mi_result = res;
887cfc6c43cSBram Moolenaar mip->mi_end = mip->mi_word + wlen;
888cfc6c43cSBram Moolenaar }
889f417f2b6SBram Moolenaar else if (mip->mi_result == res && mip->mi_end < mip->mi_word + wlen)
890cfc6c43cSBram Moolenaar mip->mi_end = mip->mi_word + wlen;
891cfc6c43cSBram Moolenaar
8927862282fSBram Moolenaar if (mip->mi_result == SP_OK)
893cfc6c43cSBram Moolenaar break;
894cfc6c43cSBram Moolenaar }
89551485f06SBram Moolenaar
8967862282fSBram Moolenaar if (mip->mi_result == SP_OK)
89751485f06SBram Moolenaar break;
898402d2feaSBram Moolenaar }
899402d2feaSBram Moolenaar }
900402d2feaSBram Moolenaar
9019ba0eb85SBram Moolenaar /*
9029f94b05bSBram Moolenaar * Return TRUE if there is a match between the word ptr[wlen] and
9039f94b05bSBram Moolenaar * CHECKCOMPOUNDPATTERN rules, assuming that we will concatenate with another
9049f94b05bSBram Moolenaar * word.
9059f94b05bSBram Moolenaar * A match means that the first part of CHECKCOMPOUNDPATTERN matches at the
9069f94b05bSBram Moolenaar * end of ptr[wlen] and the second part matches after it.
9079f94b05bSBram Moolenaar */
90846a426c9SBram Moolenaar int
match_checkcompoundpattern(char_u * ptr,int wlen,garray_T * gap)909764b23c8SBram Moolenaar match_checkcompoundpattern(
910764b23c8SBram Moolenaar char_u *ptr,
911764b23c8SBram Moolenaar int wlen,
9120d6f5d97SBram Moolenaar garray_T *gap) // &sl_comppat
9139f94b05bSBram Moolenaar {
9149f94b05bSBram Moolenaar int i;
9159f94b05bSBram Moolenaar char_u *p;
9169f94b05bSBram Moolenaar int len;
9179f94b05bSBram Moolenaar
9189f94b05bSBram Moolenaar for (i = 0; i + 1 < gap->ga_len; i += 2)
9199f94b05bSBram Moolenaar {
9209f94b05bSBram Moolenaar p = ((char_u **)gap->ga_data)[i + 1];
9219f94b05bSBram Moolenaar if (STRNCMP(ptr + wlen, p, STRLEN(p)) == 0)
9229f94b05bSBram Moolenaar {
9230d6f5d97SBram Moolenaar // Second part matches at start of following compound word, now
9240d6f5d97SBram Moolenaar // check if first part matches at end of previous word.
9259f94b05bSBram Moolenaar p = ((char_u **)gap->ga_data)[i];
92619c9c76cSBram Moolenaar len = (int)STRLEN(p);
9279f94b05bSBram Moolenaar if (len <= wlen && STRNCMP(ptr + wlen - len, p, len) == 0)
9289f94b05bSBram Moolenaar return TRUE;
9299f94b05bSBram Moolenaar }
9309f94b05bSBram Moolenaar }
9319f94b05bSBram Moolenaar return FALSE;
9329f94b05bSBram Moolenaar }
9339f94b05bSBram Moolenaar
9349f94b05bSBram Moolenaar /*
935a40ceaf8SBram Moolenaar * Return TRUE if "flags" is a valid sequence of compound flags and "word"
936a40ceaf8SBram Moolenaar * does not have too many syllables.
9375b8d8fdbSBram Moolenaar */
93846a426c9SBram Moolenaar int
can_compound(slang_T * slang,char_u * word,char_u * flags)939764b23c8SBram Moolenaar can_compound(slang_T *slang, char_u *word, char_u *flags)
9405b8d8fdbSBram Moolenaar {
9416de6853cSBram Moolenaar char_u uflags[MAXWLEN * 2];
9426de6853cSBram Moolenaar int i;
9436de6853cSBram Moolenaar char_u *p;
9445195e456SBram Moolenaar
9455195e456SBram Moolenaar if (slang->sl_compprog == NULL)
9465195e456SBram Moolenaar return FALSE;
9476de6853cSBram Moolenaar if (enc_utf8)
9486de6853cSBram Moolenaar {
9490d6f5d97SBram Moolenaar // Need to convert the single byte flags to utf8 characters.
9506de6853cSBram Moolenaar p = uflags;
9516de6853cSBram Moolenaar for (i = 0; flags[i] != NUL; ++i)
952ace95989SBram Moolenaar p += utf_char2bytes(flags[i], p);
9536de6853cSBram Moolenaar *p = NUL;
9546de6853cSBram Moolenaar p = uflags;
9556de6853cSBram Moolenaar }
9566de6853cSBram Moolenaar else
9576de6853cSBram Moolenaar p = flags;
958dffa5b8eSBram Moolenaar if (!vim_regexec_prog(&slang->sl_compprog, FALSE, p, 0))
9595195e456SBram Moolenaar return FALSE;
9605195e456SBram Moolenaar
9610d6f5d97SBram Moolenaar // Count the number of syllables. This may be slow, do it last. If there
9620d6f5d97SBram Moolenaar // are too many syllables AND the number of compound words is above
9630d6f5d97SBram Moolenaar // COMPOUNDWORDMAX then compounding is not allowed.
9645195e456SBram Moolenaar if (slang->sl_compsylmax < MAXWLEN
9655195e456SBram Moolenaar && count_syllables(slang, word) > slang->sl_compsylmax)
9666de6853cSBram Moolenaar return (int)STRLEN(flags) < slang->sl_compmax;
9675195e456SBram Moolenaar return TRUE;
9685b8d8fdbSBram Moolenaar }
9695b8d8fdbSBram Moolenaar
9705b8d8fdbSBram Moolenaar /*
9719f94b05bSBram Moolenaar * Return TRUE if the compound flags in compflags[] match the start of any
9729f94b05bSBram Moolenaar * compound rule. This is used to stop trying a compound if the flags
9739f94b05bSBram Moolenaar * collected so far can't possibly match any compound rule.
9749f94b05bSBram Moolenaar * Caller must check that slang->sl_comprules is not NULL.
9759f94b05bSBram Moolenaar */
97646a426c9SBram Moolenaar int
match_compoundrule(slang_T * slang,char_u * compflags)977764b23c8SBram Moolenaar match_compoundrule(slang_T *slang, char_u *compflags)
9789f94b05bSBram Moolenaar {
9799f94b05bSBram Moolenaar char_u *p;
9809f94b05bSBram Moolenaar int i;
9819f94b05bSBram Moolenaar int c;
9829f94b05bSBram Moolenaar
9830d6f5d97SBram Moolenaar // loop over all the COMPOUNDRULE entries
9849f94b05bSBram Moolenaar for (p = slang->sl_comprules; *p != NUL; ++p)
9859f94b05bSBram Moolenaar {
9860d6f5d97SBram Moolenaar // loop over the flags in the compound word we have made, match
9870d6f5d97SBram Moolenaar // them against the current rule entry
9889f94b05bSBram Moolenaar for (i = 0; ; ++i)
9899f94b05bSBram Moolenaar {
9909f94b05bSBram Moolenaar c = compflags[i];
9919f94b05bSBram Moolenaar if (c == NUL)
9920d6f5d97SBram Moolenaar // found a rule that matches for the flags we have so far
9939f94b05bSBram Moolenaar return TRUE;
9949f94b05bSBram Moolenaar if (*p == '/' || *p == NUL)
9950d6f5d97SBram Moolenaar break; // end of rule, it's too short
9969f94b05bSBram Moolenaar if (*p == '[')
9979f94b05bSBram Moolenaar {
9989f94b05bSBram Moolenaar int match = FALSE;
9999f94b05bSBram Moolenaar
10000d6f5d97SBram Moolenaar // compare against all the flags in []
10019f94b05bSBram Moolenaar ++p;
10029f94b05bSBram Moolenaar while (*p != ']' && *p != NUL)
10039f94b05bSBram Moolenaar if (*p++ == c)
10049f94b05bSBram Moolenaar match = TRUE;
10059f94b05bSBram Moolenaar if (!match)
10060d6f5d97SBram Moolenaar break; // none matches
10079f94b05bSBram Moolenaar }
10089f94b05bSBram Moolenaar else if (*p != c)
10090d6f5d97SBram Moolenaar break; // flag of word doesn't match flag in pattern
10109f94b05bSBram Moolenaar ++p;
10119f94b05bSBram Moolenaar }
10129f94b05bSBram Moolenaar
10130d6f5d97SBram Moolenaar // Skip to the next "/", where the next pattern starts.
10149f94b05bSBram Moolenaar p = vim_strchr(p, '/');
10159f94b05bSBram Moolenaar if (p == NULL)
10169f94b05bSBram Moolenaar break;
10179f94b05bSBram Moolenaar }
10189f94b05bSBram Moolenaar
10190d6f5d97SBram Moolenaar // Checked all the rules and none of them match the flags, so there
10200d6f5d97SBram Moolenaar // can't possibly be a compound starting with these flags.
10219f94b05bSBram Moolenaar return FALSE;
10229f94b05bSBram Moolenaar }
10239f94b05bSBram Moolenaar
10249f94b05bSBram Moolenaar /*
1025dfb9ac00SBram Moolenaar * Return non-zero if the prefix indicated by "arridx" matches with the prefix
1026dfb9ac00SBram Moolenaar * ID in "flags" for the word "word".
1027cf6bf39fSBram Moolenaar * The WF_RAREPFX flag is included in the return value for a rare prefix.
1028f417f2b6SBram Moolenaar */
102946a426c9SBram Moolenaar int
valid_word_prefix(int totprefcnt,int arridx,int flags,char_u * word,slang_T * slang,int cond_req)1030764b23c8SBram Moolenaar valid_word_prefix(
10310d6f5d97SBram Moolenaar int totprefcnt, // nr of prefix IDs
10320d6f5d97SBram Moolenaar int arridx, // idx in sl_pidxs[]
1033764b23c8SBram Moolenaar int flags,
1034764b23c8SBram Moolenaar char_u *word,
1035764b23c8SBram Moolenaar slang_T *slang,
10360d6f5d97SBram Moolenaar int cond_req) // only use prefixes with a condition
1037f417f2b6SBram Moolenaar {
1038f417f2b6SBram Moolenaar int prefcnt;
1039f417f2b6SBram Moolenaar int pidx;
1040dffa5b8eSBram Moolenaar regprog_T **rp;
1041dfb9ac00SBram Moolenaar int prefid;
1042f417f2b6SBram Moolenaar
1043dfb9ac00SBram Moolenaar prefid = (unsigned)flags >> 24;
1044f417f2b6SBram Moolenaar for (prefcnt = totprefcnt - 1; prefcnt >= 0; --prefcnt)
1045f417f2b6SBram Moolenaar {
1046f417f2b6SBram Moolenaar pidx = slang->sl_pidxs[arridx + prefcnt];
1047f417f2b6SBram Moolenaar
10480d6f5d97SBram Moolenaar // Check the prefix ID.
1049f417f2b6SBram Moolenaar if (prefid != (pidx & 0xff))
1050f417f2b6SBram Moolenaar continue;
1051f417f2b6SBram Moolenaar
10520d6f5d97SBram Moolenaar // Check if the prefix doesn't combine and the word already has a
10530d6f5d97SBram Moolenaar // suffix.
1054dfb9ac00SBram Moolenaar if ((flags & WF_HAS_AFF) && (pidx & WF_PFX_NC))
1055dfb9ac00SBram Moolenaar continue;
1056dfb9ac00SBram Moolenaar
10570d6f5d97SBram Moolenaar // Check the condition, if there is one. The condition index is
10580d6f5d97SBram Moolenaar // stored in the two bytes above the prefix ID byte.
1059dffa5b8eSBram Moolenaar rp = &slang->sl_prefprog[((unsigned)pidx >> 8) & 0xffff];
1060dffa5b8eSBram Moolenaar if (*rp != NULL)
1061f417f2b6SBram Moolenaar {
1062dffa5b8eSBram Moolenaar if (!vim_regexec_prog(rp, FALSE, word, 0))
1063f417f2b6SBram Moolenaar continue;
1064f417f2b6SBram Moolenaar }
106553805d1eSBram Moolenaar else if (cond_req)
106653805d1eSBram Moolenaar continue;
1067f417f2b6SBram Moolenaar
10680d6f5d97SBram Moolenaar // It's a match! Return the WF_ flags.
1069cf6bf39fSBram Moolenaar return pidx;
1070f417f2b6SBram Moolenaar }
1071cf6bf39fSBram Moolenaar return 0;
1072f417f2b6SBram Moolenaar }
1073f417f2b6SBram Moolenaar
1074f417f2b6SBram Moolenaar /*
10751d73c885SBram Moolenaar * Check if the word at "mip->mi_word" has a matching prefix.
10761d73c885SBram Moolenaar * If it does, then check the following word.
10771d73c885SBram Moolenaar *
1078d12a1326SBram Moolenaar * If "mode" is "FIND_COMPOUND" then do the same after another word, find a
1079d12a1326SBram Moolenaar * prefix in a compound word.
1080d12a1326SBram Moolenaar *
10811d73c885SBram Moolenaar * For a match mip->mi_result is updated.
10821d73c885SBram Moolenaar */
10831d73c885SBram Moolenaar static void
find_prefix(matchinf_T * mip,int mode)1084764b23c8SBram Moolenaar find_prefix(matchinf_T *mip, int mode)
10851d73c885SBram Moolenaar {
10861d73c885SBram Moolenaar idx_T arridx = 0;
10871d73c885SBram Moolenaar int len;
10881d73c885SBram Moolenaar int wlen = 0;
10891d73c885SBram Moolenaar int flen;
10901d73c885SBram Moolenaar int c;
10911d73c885SBram Moolenaar char_u *ptr;
10921d73c885SBram Moolenaar idx_T lo, hi, m;
10931d73c885SBram Moolenaar slang_T *slang = mip->mi_lp->lp_slang;
10941d73c885SBram Moolenaar char_u *byts;
10951d73c885SBram Moolenaar idx_T *idxs;
10961d73c885SBram Moolenaar
109742eeac35SBram Moolenaar byts = slang->sl_pbyts;
109842eeac35SBram Moolenaar if (byts == NULL)
10990d6f5d97SBram Moolenaar return; // array is empty
110042eeac35SBram Moolenaar
11010d6f5d97SBram Moolenaar // We use the case-folded word here, since prefixes are always
11020d6f5d97SBram Moolenaar // case-folded.
11031d73c885SBram Moolenaar ptr = mip->mi_fword;
11040d6f5d97SBram Moolenaar flen = mip->mi_fwordlen; // available case-folded bytes
1105d12a1326SBram Moolenaar if (mode == FIND_COMPOUND)
1106d12a1326SBram Moolenaar {
11070d6f5d97SBram Moolenaar // Skip over the previously found word(s).
1108d12a1326SBram Moolenaar ptr += mip->mi_compoff;
1109d12a1326SBram Moolenaar flen -= mip->mi_compoff;
1110d12a1326SBram Moolenaar }
11111d73c885SBram Moolenaar idxs = slang->sl_pidxs;
11121d73c885SBram Moolenaar
11131d73c885SBram Moolenaar /*
11141d73c885SBram Moolenaar * Repeat advancing in the tree until:
11151d73c885SBram Moolenaar * - there is a byte that doesn't match,
11161d73c885SBram Moolenaar * - we reach the end of the tree,
11171d73c885SBram Moolenaar * - or we reach the end of the line.
11181d73c885SBram Moolenaar */
11191d73c885SBram Moolenaar for (;;)
11201d73c885SBram Moolenaar {
11211d73c885SBram Moolenaar if (flen == 0 && *mip->mi_fend != NUL)
11221d73c885SBram Moolenaar flen = fold_more(mip);
11231d73c885SBram Moolenaar
11241d73c885SBram Moolenaar len = byts[arridx++];
11251d73c885SBram Moolenaar
11260d6f5d97SBram Moolenaar // If the first possible byte is a zero the prefix could end here.
11270d6f5d97SBram Moolenaar // Check if the following word matches and supports the prefix.
11281d73c885SBram Moolenaar if (byts[arridx] == 0)
11291d73c885SBram Moolenaar {
11300d6f5d97SBram Moolenaar // There can be several prefixes with different conditions. We
11310d6f5d97SBram Moolenaar // try them all, since we don't know which one will give the
11320d6f5d97SBram Moolenaar // longest match. The word is the same each time, pass the list
11330d6f5d97SBram Moolenaar // of possible prefixes to find_word().
11341d73c885SBram Moolenaar mip->mi_prefarridx = arridx;
11351d73c885SBram Moolenaar mip->mi_prefcnt = len;
11361d73c885SBram Moolenaar while (len > 0 && byts[arridx] == 0)
11371d73c885SBram Moolenaar {
11381d73c885SBram Moolenaar ++arridx;
11391d73c885SBram Moolenaar --len;
11401d73c885SBram Moolenaar }
11411d73c885SBram Moolenaar mip->mi_prefcnt -= len;
11421d73c885SBram Moolenaar
11430d6f5d97SBram Moolenaar // Find the word that comes after the prefix.
11441d73c885SBram Moolenaar mip->mi_prefixlen = wlen;
1145d12a1326SBram Moolenaar if (mode == FIND_COMPOUND)
11460d6f5d97SBram Moolenaar // Skip over the previously found word(s).
1147d12a1326SBram Moolenaar mip->mi_prefixlen += mip->mi_compoff;
1148d12a1326SBram Moolenaar
114953805d1eSBram Moolenaar if (has_mbyte)
115053805d1eSBram Moolenaar {
11510d6f5d97SBram Moolenaar // Case-folded length may differ from original length.
1152d12a1326SBram Moolenaar mip->mi_cprefixlen = nofold_len(mip->mi_fword,
1153d12a1326SBram Moolenaar mip->mi_prefixlen, mip->mi_word);
115453805d1eSBram Moolenaar }
115553805d1eSBram Moolenaar else
1156d12a1326SBram Moolenaar mip->mi_cprefixlen = mip->mi_prefixlen;
11571d73c885SBram Moolenaar find_word(mip, FIND_PREFIX);
11581d73c885SBram Moolenaar
11591d73c885SBram Moolenaar
11601d73c885SBram Moolenaar if (len == 0)
11610d6f5d97SBram Moolenaar break; // no children, word must end here
11621d73c885SBram Moolenaar }
11631d73c885SBram Moolenaar
11640d6f5d97SBram Moolenaar // Stop looking at end of the line.
11651d73c885SBram Moolenaar if (ptr[wlen] == NUL)
11661d73c885SBram Moolenaar break;
11671d73c885SBram Moolenaar
11680d6f5d97SBram Moolenaar // Perform a binary search in the list of accepted bytes.
11691d73c885SBram Moolenaar c = ptr[wlen];
11701d73c885SBram Moolenaar lo = arridx;
11711d73c885SBram Moolenaar hi = arridx + len - 1;
11721d73c885SBram Moolenaar while (lo < hi)
11731d73c885SBram Moolenaar {
11741d73c885SBram Moolenaar m = (lo + hi) / 2;
11751d73c885SBram Moolenaar if (byts[m] > c)
11761d73c885SBram Moolenaar hi = m - 1;
11771d73c885SBram Moolenaar else if (byts[m] < c)
11781d73c885SBram Moolenaar lo = m + 1;
11791d73c885SBram Moolenaar else
11801d73c885SBram Moolenaar {
11811d73c885SBram Moolenaar lo = hi = m;
11821d73c885SBram Moolenaar break;
11831d73c885SBram Moolenaar }
11841d73c885SBram Moolenaar }
11851d73c885SBram Moolenaar
11860d6f5d97SBram Moolenaar // Stop if there is no matching byte.
11871d73c885SBram Moolenaar if (hi < lo || byts[lo] != c)
11881d73c885SBram Moolenaar break;
11891d73c885SBram Moolenaar
11900d6f5d97SBram Moolenaar // Continue at the child (if there is one).
11911d73c885SBram Moolenaar arridx = idxs[lo];
11921d73c885SBram Moolenaar ++wlen;
11931d73c885SBram Moolenaar --flen;
11941d73c885SBram Moolenaar }
11951d73c885SBram Moolenaar }
11961d73c885SBram Moolenaar
11971d73c885SBram Moolenaar /*
11981d73c885SBram Moolenaar * Need to fold at least one more character. Do until next non-word character
1199a40ceaf8SBram Moolenaar * for efficiency. Include the non-word character too.
12001d73c885SBram Moolenaar * Return the length of the folded chars in bytes.
12011d73c885SBram Moolenaar */
12021d73c885SBram Moolenaar static int
fold_more(matchinf_T * mip)1203764b23c8SBram Moolenaar fold_more(matchinf_T *mip)
12041d73c885SBram Moolenaar {
12051d73c885SBram Moolenaar int flen;
12061d73c885SBram Moolenaar char_u *p;
12071d73c885SBram Moolenaar
12081d73c885SBram Moolenaar p = mip->mi_fend;
12091d73c885SBram Moolenaar do
121091acfffcSBram Moolenaar MB_PTR_ADV(mip->mi_fend);
1211abab0b0fSBram Moolenaar while (*mip->mi_fend != NUL && spell_iswordp(mip->mi_fend, mip->mi_win));
12121d73c885SBram Moolenaar
12130d6f5d97SBram Moolenaar // Include the non-word character so that we can check for the word end.
12141d73c885SBram Moolenaar if (*mip->mi_fend != NUL)
121591acfffcSBram Moolenaar MB_PTR_ADV(mip->mi_fend);
12161d73c885SBram Moolenaar
12174f135275SBram Moolenaar (void)spell_casefold(mip->mi_win, p, (int)(mip->mi_fend - p),
12181d73c885SBram Moolenaar mip->mi_fword + mip->mi_fwordlen,
12191d73c885SBram Moolenaar MAXWLEN - mip->mi_fwordlen);
1220a93fa7eeSBram Moolenaar flen = (int)STRLEN(mip->mi_fword + mip->mi_fwordlen);
12211d73c885SBram Moolenaar mip->mi_fwordlen += flen;
12221d73c885SBram Moolenaar return flen;
12231d73c885SBram Moolenaar }
12241d73c885SBram Moolenaar
12251d73c885SBram Moolenaar /*
12269ba0eb85SBram Moolenaar * Check case flags for a word. Return TRUE if the word has the requested
12279ba0eb85SBram Moolenaar * case.
12289ba0eb85SBram Moolenaar */
122946a426c9SBram Moolenaar int
spell_valid_case(int wordflags,int treeflags)1230764b23c8SBram Moolenaar spell_valid_case(
12310d6f5d97SBram Moolenaar int wordflags, // flags for the checked word.
12320d6f5d97SBram Moolenaar int treeflags) // flags for the word in the spell tree
12339ba0eb85SBram Moolenaar {
12340dc065eeSBram Moolenaar return ((wordflags == WF_ALLCAP && (treeflags & WF_FIXCAP) == 0)
12359ba0eb85SBram Moolenaar || ((treeflags & (WF_ALLCAP | WF_KEEPCAP)) == 0
12360fa313a7SBram Moolenaar && ((treeflags & WF_ONECAP) == 0
12370fa313a7SBram Moolenaar || (wordflags & WF_ONECAP) != 0)));
12389ba0eb85SBram Moolenaar }
12399ba0eb85SBram Moolenaar
1240f417f2b6SBram Moolenaar /*
1241f417f2b6SBram Moolenaar * Return TRUE if spell checking is not enabled.
1242f417f2b6SBram Moolenaar */
1243*8ee52affSYegappan Lakshmanan static int
no_spell_checking(win_T * wp)1244764b23c8SBram Moolenaar no_spell_checking(win_T *wp)
1245f417f2b6SBram Moolenaar {
1246860cae1cSBram Moolenaar if (!wp->w_p_spell || *wp->w_s->b_p_spl == NUL
1247860cae1cSBram Moolenaar || wp->w_s->b_langp.ga_len == 0)
1248f417f2b6SBram Moolenaar {
1249152e79e9SBram Moolenaar emsg(_(e_no_spell));
1250f417f2b6SBram Moolenaar return TRUE;
1251f417f2b6SBram Moolenaar }
1252f417f2b6SBram Moolenaar return FALSE;
1253f417f2b6SBram Moolenaar }
1254402d2feaSBram Moolenaar
1255402d2feaSBram Moolenaar /*
1256402d2feaSBram Moolenaar * Move to next spell error.
1257ac6e65f8SBram Moolenaar * "curline" is FALSE for "[s", "]s", "[S" and "]S".
1258ac6e65f8SBram Moolenaar * "curline" is TRUE to find word under/after cursor in the same line.
12595195e456SBram Moolenaar * For Insert mode completion "dir" is BACKWARD and "curline" is TRUE: move
12605195e456SBram Moolenaar * to after badly spelled word before the cursor.
12616de6853cSBram Moolenaar * Return 0 if not found, length of the badly spelled word otherwise.
1262402d2feaSBram Moolenaar */
1263402d2feaSBram Moolenaar int
spell_move_to(win_T * wp,int dir,int allwords,int curline,hlf_T * attrp)1264764b23c8SBram Moolenaar spell_move_to(
1265764b23c8SBram Moolenaar win_T *wp,
12660d6f5d97SBram Moolenaar int dir, // FORWARD or BACKWARD
12670d6f5d97SBram Moolenaar int allwords, // TRUE for "[s"/"]s", FALSE for "[S"/"]S"
1268764b23c8SBram Moolenaar int curline,
12690d6f5d97SBram Moolenaar hlf_T *attrp) // return: attributes of bad word or NULL
12700d6f5d97SBram Moolenaar // (only when "dir" is FORWARD)
1271402d2feaSBram Moolenaar {
12722cf8b301SBram Moolenaar linenr_T lnum;
12732cf8b301SBram Moolenaar pos_T found_pos;
12746de6853cSBram Moolenaar int found_len = 0;
1275402d2feaSBram Moolenaar char_u *line;
1276402d2feaSBram Moolenaar char_u *p;
12770c40586aSBram Moolenaar char_u *endp;
1278482aaeb0SBram Moolenaar hlf_T attr;
1279402d2feaSBram Moolenaar int len;
1280f71a3db4SBram Moolenaar #ifdef FEAT_SYN_HL
1281860cae1cSBram Moolenaar int has_syntax = syntax_present(wp);
1282f71a3db4SBram Moolenaar #endif
128389d4032cSBram Moolenaar int col;
12842cf8b301SBram Moolenaar int can_spell;
12850c40586aSBram Moolenaar char_u *buf = NULL;
12860c40586aSBram Moolenaar int buflen = 0;
12870c40586aSBram Moolenaar int skip = 0;
1288f9184a1dSBram Moolenaar int capcol = -1;
1289ac6e65f8SBram Moolenaar int found_one = FALSE;
1290ac6e65f8SBram Moolenaar int wrapped = FALSE;
1291402d2feaSBram Moolenaar
129295529568SBram Moolenaar if (no_spell_checking(wp))
12936de6853cSBram Moolenaar return 0;
1294402d2feaSBram Moolenaar
12952cf8b301SBram Moolenaar /*
12962cf8b301SBram Moolenaar * Start looking for bad word at the start of the line, because we can't
129786ca6e3bSBram Moolenaar * start halfway a word, we don't know where it starts or ends.
12982cf8b301SBram Moolenaar *
12992cf8b301SBram Moolenaar * When searching backwards, we continue in the line to find the last
13002cf8b301SBram Moolenaar * bad word (in the cursor line: before the cursor).
13010c40586aSBram Moolenaar *
13020c40586aSBram Moolenaar * We concatenate the start of the next line, so that wrapped words work
13030c40586aSBram Moolenaar * (e.g. "et<line-break>cetera"). Doesn't work when searching backwards
13040c40586aSBram Moolenaar * though...
13052cf8b301SBram Moolenaar */
130695529568SBram Moolenaar lnum = wp->w_cursor.lnum;
1307b5aedf3eSBram Moolenaar CLEAR_POS(&found_pos);
1308402d2feaSBram Moolenaar
1309402d2feaSBram Moolenaar while (!got_int)
1310402d2feaSBram Moolenaar {
131195529568SBram Moolenaar line = ml_get_buf(wp->w_buffer, lnum, FALSE);
13122cf8b301SBram Moolenaar
1313a93fa7eeSBram Moolenaar len = (int)STRLEN(line);
13140c40586aSBram Moolenaar if (buflen < len + MAXWLEN + 2)
13150c40586aSBram Moolenaar {
13160c40586aSBram Moolenaar vim_free(buf);
13170c40586aSBram Moolenaar buflen = len + MAXWLEN + 2;
13180c40586aSBram Moolenaar buf = alloc(buflen);
13190c40586aSBram Moolenaar if (buf == NULL)
13200c40586aSBram Moolenaar break;
13210c40586aSBram Moolenaar }
13220c40586aSBram Moolenaar
13230d6f5d97SBram Moolenaar // In first line check first word for Capital.
1324f9184a1dSBram Moolenaar if (lnum == 1)
1325f9184a1dSBram Moolenaar capcol = 0;
1326f9184a1dSBram Moolenaar
13270d6f5d97SBram Moolenaar // For checking first word with a capital skip white space.
1328f9184a1dSBram Moolenaar if (capcol == 0)
1329e2e69e48SBram Moolenaar capcol = getwhitecols(line);
1330a93fa7eeSBram Moolenaar else if (curline && wp == curwin)
1331a93fa7eeSBram Moolenaar {
13320d6f5d97SBram Moolenaar // For spellbadword(): check if first word needs a capital.
1333e2e69e48SBram Moolenaar col = getwhitecols(line);
1334a93fa7eeSBram Moolenaar if (check_need_cap(lnum, col))
1335a93fa7eeSBram Moolenaar capcol = col;
1336a93fa7eeSBram Moolenaar
13370d6f5d97SBram Moolenaar // Need to get the line again, may have looked at the previous
13380d6f5d97SBram Moolenaar // one.
1339a93fa7eeSBram Moolenaar line = ml_get_buf(wp->w_buffer, lnum, FALSE);
1340a93fa7eeSBram Moolenaar }
1341f9184a1dSBram Moolenaar
13420d6f5d97SBram Moolenaar // Copy the line into "buf" and append the start of the next line if
13430d6f5d97SBram Moolenaar // possible.
13440c40586aSBram Moolenaar STRCPY(buf, line);
134595529568SBram Moolenaar if (lnum < wp->w_buffer->b_ml.ml_line_count)
13465dd95a10SBram Moolenaar spell_cat_line(buf + STRLEN(buf),
13475dd95a10SBram Moolenaar ml_get_buf(wp->w_buffer, lnum + 1, FALSE), MAXWLEN);
13480c40586aSBram Moolenaar
13490c40586aSBram Moolenaar p = buf + skip;
13500c40586aSBram Moolenaar endp = buf + len;
13510c40586aSBram Moolenaar while (p < endp)
1352402d2feaSBram Moolenaar {
13530d6f5d97SBram Moolenaar // When searching backward don't search after the cursor. Unless
13540d6f5d97SBram Moolenaar // we wrapped around the end of the buffer.
13552cf8b301SBram Moolenaar if (dir == BACKWARD
135695529568SBram Moolenaar && lnum == wp->w_cursor.lnum
1357ac6e65f8SBram Moolenaar && !wrapped
135895529568SBram Moolenaar && (colnr_T)(p - buf) >= wp->w_cursor.col)
13592cf8b301SBram Moolenaar break;
13602cf8b301SBram Moolenaar
13610d6f5d97SBram Moolenaar // start of word
1362482aaeb0SBram Moolenaar attr = HLF_COUNT;
13634770d09aSBram Moolenaar len = spell_check(wp, p, &attr, &capcol, FALSE);
13642cf8b301SBram Moolenaar
1365482aaeb0SBram Moolenaar if (attr != HLF_COUNT)
1366402d2feaSBram Moolenaar {
13670d6f5d97SBram Moolenaar // We found a bad word. Check the attribute.
1368482aaeb0SBram Moolenaar if (allwords || attr == HLF_SPB)
1369402d2feaSBram Moolenaar {
13700d6f5d97SBram Moolenaar // When searching forward only accept a bad word after
13710d6f5d97SBram Moolenaar // the cursor.
13722cf8b301SBram Moolenaar if (dir == BACKWARD
1373ac6e65f8SBram Moolenaar || lnum != wp->w_cursor.lnum
137495529568SBram Moolenaar || (lnum == wp->w_cursor.lnum
1375ac6e65f8SBram Moolenaar && (wrapped
1376ac6e65f8SBram Moolenaar || (colnr_T)(curline ? p - buf + len
13770c40586aSBram Moolenaar : p - buf)
1378ac6e65f8SBram Moolenaar > wp->w_cursor.col)))
13792cf8b301SBram Moolenaar {
1380f71a3db4SBram Moolenaar #ifdef FEAT_SYN_HL
13812cf8b301SBram Moolenaar if (has_syntax)
13822cf8b301SBram Moolenaar {
1383a93fa7eeSBram Moolenaar col = (int)(p - buf);
138495529568SBram Moolenaar (void)syn_get_id(wp, lnum, (colnr_T)col,
138556cefaf1SBram Moolenaar FALSE, &can_spell, FALSE);
1386d68071d8SBram Moolenaar if (!can_spell)
1387d68071d8SBram Moolenaar attr = HLF_COUNT;
13882cf8b301SBram Moolenaar }
13892cf8b301SBram Moolenaar else
1390f71a3db4SBram Moolenaar #endif
13912cf8b301SBram Moolenaar can_spell = TRUE;
13922cf8b301SBram Moolenaar
13932cf8b301SBram Moolenaar if (can_spell)
13942cf8b301SBram Moolenaar {
1395d68071d8SBram Moolenaar found_one = TRUE;
13962cf8b301SBram Moolenaar found_pos.lnum = lnum;
1397a93fa7eeSBram Moolenaar found_pos.col = (int)(p - buf);
13982cf8b301SBram Moolenaar found_pos.coladd = 0;
13992cf8b301SBram Moolenaar if (dir == FORWARD)
14002cf8b301SBram Moolenaar {
14010d6f5d97SBram Moolenaar // No need to search further.
140295529568SBram Moolenaar wp->w_cursor = found_pos;
14030c40586aSBram Moolenaar vim_free(buf);
140495529568SBram Moolenaar if (attrp != NULL)
140595529568SBram Moolenaar *attrp = attr;
14066de6853cSBram Moolenaar return len;
1407402d2feaSBram Moolenaar }
14085195e456SBram Moolenaar else if (curline)
14090d6f5d97SBram Moolenaar // Insert mode completion: put cursor after
14100d6f5d97SBram Moolenaar // the bad word.
14115195e456SBram Moolenaar found_pos.col += len;
14126de6853cSBram Moolenaar found_len = len;
14132cf8b301SBram Moolenaar }
14142cf8b301SBram Moolenaar }
1415d68071d8SBram Moolenaar else
1416d68071d8SBram Moolenaar found_one = TRUE;
14172cf8b301SBram Moolenaar }
1418402d2feaSBram Moolenaar }
141951485f06SBram Moolenaar
14200d6f5d97SBram Moolenaar // advance to character after the word
1421402d2feaSBram Moolenaar p += len;
1422f9184a1dSBram Moolenaar capcol -= len;
1423402d2feaSBram Moolenaar }
1424402d2feaSBram Moolenaar
14255195e456SBram Moolenaar if (dir == BACKWARD && found_pos.lnum != 0)
14262cf8b301SBram Moolenaar {
14270d6f5d97SBram Moolenaar // Use the last match in the line (before the cursor).
142895529568SBram Moolenaar wp->w_cursor = found_pos;
14290c40586aSBram Moolenaar vim_free(buf);
14306de6853cSBram Moolenaar return found_len;
14312cf8b301SBram Moolenaar }
14325195e456SBram Moolenaar
14335195e456SBram Moolenaar if (curline)
14340d6f5d97SBram Moolenaar break; // only check cursor line
14355195e456SBram Moolenaar
14360d6f5d97SBram Moolenaar // If we are back at the starting line and searched it again there
14370d6f5d97SBram Moolenaar // is no match, give up.
1438ac6e65f8SBram Moolenaar if (lnum == wp->w_cursor.lnum && wrapped)
14390c40586aSBram Moolenaar break;
1440ac6e65f8SBram Moolenaar
14410d6f5d97SBram Moolenaar // Advance to next line.
1442d3f78dc9SBram Moolenaar if (dir == BACKWARD)
1443d3f78dc9SBram Moolenaar {
1444ac6e65f8SBram Moolenaar if (lnum > 1)
14452cf8b301SBram Moolenaar --lnum;
1446ac6e65f8SBram Moolenaar else if (!p_ws)
14470d6f5d97SBram Moolenaar break; // at first line and 'nowrapscan'
1448ac6e65f8SBram Moolenaar else
1449ac6e65f8SBram Moolenaar {
14500d6f5d97SBram Moolenaar // Wrap around to the end of the buffer. May search the
14510d6f5d97SBram Moolenaar // starting line again and accept the last match.
1452ac6e65f8SBram Moolenaar lnum = wp->w_buffer->b_ml.ml_line_count;
1453ac6e65f8SBram Moolenaar wrapped = TRUE;
14548b96d64cSBram Moolenaar if (!shortmess(SHM_SEARCH))
14558b96d64cSBram Moolenaar give_warning((char_u *)_(top_bot_msg), TRUE);
1456ac6e65f8SBram Moolenaar }
1457f9184a1dSBram Moolenaar capcol = -1;
14582cf8b301SBram Moolenaar }
14592cf8b301SBram Moolenaar else
14602cf8b301SBram Moolenaar {
1461ac6e65f8SBram Moolenaar if (lnum < wp->w_buffer->b_ml.ml_line_count)
14622cf8b301SBram Moolenaar ++lnum;
1463ac6e65f8SBram Moolenaar else if (!p_ws)
14640d6f5d97SBram Moolenaar break; // at first line and 'nowrapscan'
1465ac6e65f8SBram Moolenaar else
1466ac6e65f8SBram Moolenaar {
14670d6f5d97SBram Moolenaar // Wrap around to the start of the buffer. May search the
14680d6f5d97SBram Moolenaar // starting line again and accept the first match.
1469ac6e65f8SBram Moolenaar lnum = 1;
1470ac6e65f8SBram Moolenaar wrapped = TRUE;
14718b96d64cSBram Moolenaar if (!shortmess(SHM_SEARCH))
14728b96d64cSBram Moolenaar give_warning((char_u *)_(bot_top_msg), TRUE);
1473ac6e65f8SBram Moolenaar }
1474ac6e65f8SBram Moolenaar
14750d6f5d97SBram Moolenaar // If we are back at the starting line and there is no match then
14760d6f5d97SBram Moolenaar // give up.
1477d3f78dc9SBram Moolenaar if (lnum == wp->w_cursor.lnum && !found_one)
1478ac6e65f8SBram Moolenaar break;
14790c40586aSBram Moolenaar
14800d6f5d97SBram Moolenaar // Skip the characters at the start of the next line that were
14810d6f5d97SBram Moolenaar // included in a match crossing line boundaries.
1482482aaeb0SBram Moolenaar if (attr == HLF_COUNT)
1483a93fa7eeSBram Moolenaar skip = (int)(p - endp);
14840c40586aSBram Moolenaar else
14850c40586aSBram Moolenaar skip = 0;
1486f9184a1dSBram Moolenaar
14870d6f5d97SBram Moolenaar // Capcol skips over the inserted space.
1488f9184a1dSBram Moolenaar --capcol;
1489f9184a1dSBram Moolenaar
14900d6f5d97SBram Moolenaar // But after empty line check first word in next line
1491f9184a1dSBram Moolenaar if (*skipwhite(line) == NUL)
1492f9184a1dSBram Moolenaar capcol = 0;
14932cf8b301SBram Moolenaar }
1494402d2feaSBram Moolenaar
1495402d2feaSBram Moolenaar line_breakcheck();
1496402d2feaSBram Moolenaar }
1497402d2feaSBram Moolenaar
14980c40586aSBram Moolenaar vim_free(buf);
14996de6853cSBram Moolenaar return 0;
15000c40586aSBram Moolenaar }
15010c40586aSBram Moolenaar
15020c40586aSBram Moolenaar /*
15030c40586aSBram Moolenaar * For spell checking: concatenate the start of the following line "line" into
15040c40586aSBram Moolenaar * "buf", blanking-out special characters. Copy less then "maxlen" bytes.
15056a5d2ac1SBram Moolenaar * Keep the blanks at the start of the next line, this is used in win_line()
15066a5d2ac1SBram Moolenaar * to skip those bytes if the word was OK.
15070c40586aSBram Moolenaar */
15080c40586aSBram Moolenaar void
spell_cat_line(char_u * buf,char_u * line,int maxlen)1509764b23c8SBram Moolenaar spell_cat_line(char_u *buf, char_u *line, int maxlen)
15100c40586aSBram Moolenaar {
15110c40586aSBram Moolenaar char_u *p;
15120c40586aSBram Moolenaar int n;
15130c40586aSBram Moolenaar
15140c40586aSBram Moolenaar p = skipwhite(line);
15150c40586aSBram Moolenaar while (vim_strchr((char_u *)"*#/\"\t", *p) != NULL)
15160c40586aSBram Moolenaar p = skipwhite(p + 1);
15170c40586aSBram Moolenaar
15180c40586aSBram Moolenaar if (*p != NUL)
15190c40586aSBram Moolenaar {
15200d6f5d97SBram Moolenaar // Only worth concatenating if there is something else than spaces to
15210d6f5d97SBram Moolenaar // concatenate.
15226a5d2ac1SBram Moolenaar n = (int)(p - line) + 1;
15236a5d2ac1SBram Moolenaar if (n < maxlen - 1)
15246a5d2ac1SBram Moolenaar {
15256a5d2ac1SBram Moolenaar vim_memset(buf, ' ', n);
15266a5d2ac1SBram Moolenaar vim_strncpy(buf + n, p, maxlen - 1 - n);
15276a5d2ac1SBram Moolenaar }
15280c40586aSBram Moolenaar }
1529402d2feaSBram Moolenaar }
1530402d2feaSBram Moolenaar
1531a40ceaf8SBram Moolenaar /*
1532a40ceaf8SBram Moolenaar * Structure used for the cookie argument of do_in_runtimepath().
1533a40ceaf8SBram Moolenaar */
1534da2303d9SBram Moolenaar typedef struct spelload_S
1535da2303d9SBram Moolenaar {
15360d6f5d97SBram Moolenaar char_u sl_lang[MAXWLEN + 1]; // language name
15370d6f5d97SBram Moolenaar slang_T *sl_slang; // resulting slang_T struct
15380d6f5d97SBram Moolenaar int sl_nobreak; // NOBREAK language found
1539da2303d9SBram Moolenaar } spelload_T;
1540da2303d9SBram Moolenaar
1541402d2feaSBram Moolenaar /*
1542cfc6c43cSBram Moolenaar * Load word list(s) for "lang" from Vim spell file(s).
1543b765d634SBram Moolenaar * "lang" must be the language without the region: e.g., "en".
1544402d2feaSBram Moolenaar */
1545cfc6c43cSBram Moolenaar static void
spell_load_lang(char_u * lang)1546764b23c8SBram Moolenaar spell_load_lang(char_u *lang)
1547402d2feaSBram Moolenaar {
1548b765d634SBram Moolenaar char_u fname_enc[85];
1549402d2feaSBram Moolenaar int r;
1550da2303d9SBram Moolenaar spelload_T sl;
1551b8a7b560SBram Moolenaar int round;
1552402d2feaSBram Moolenaar
15530d6f5d97SBram Moolenaar // Copy the language name to pass it to spell_load_cb() as a cookie.
15540d6f5d97SBram Moolenaar // It's truncated when an error is detected.
1555da2303d9SBram Moolenaar STRCPY(sl.sl_lang, lang);
1556da2303d9SBram Moolenaar sl.sl_slang = NULL;
1557da2303d9SBram Moolenaar sl.sl_nobreak = FALSE;
1558cfc6c43cSBram Moolenaar
15590d6f5d97SBram Moolenaar // We may retry when no spell file is found for the language, an
15600d6f5d97SBram Moolenaar // autocommand may load it then.
1561b8a7b560SBram Moolenaar for (round = 1; round <= 2; ++round)
1562b8a7b560SBram Moolenaar {
1563b765d634SBram Moolenaar /*
1564b765d634SBram Moolenaar * Find the first spell file for "lang" in 'runtimepath' and load it.
1565b765d634SBram Moolenaar */
1566b765d634SBram Moolenaar vim_snprintf((char *)fname_enc, sizeof(fname_enc) - 5,
156756f78040SBram Moolenaar #ifdef VMS
156856f78040SBram Moolenaar "spell/%s_%s.spl",
156956f78040SBram Moolenaar #else
157056f78040SBram Moolenaar "spell/%s.%s.spl",
157156f78040SBram Moolenaar #endif
157256f78040SBram Moolenaar lang, spell_enc());
15737f8989ddSBram Moolenaar r = do_in_runtimepath(fname_enc, 0, spell_load_cb, &sl);
1574402d2feaSBram Moolenaar
1575da2303d9SBram Moolenaar if (r == FAIL && *sl.sl_lang != NUL)
15765482f33fSBram Moolenaar {
15770d6f5d97SBram Moolenaar // Try loading the ASCII version.
1578b765d634SBram Moolenaar vim_snprintf((char *)fname_enc, sizeof(fname_enc) - 5,
157956f78040SBram Moolenaar #ifdef VMS
158056f78040SBram Moolenaar "spell/%s_ascii.spl",
158156f78040SBram Moolenaar #else
158256f78040SBram Moolenaar "spell/%s.ascii.spl",
158356f78040SBram Moolenaar #endif
158456f78040SBram Moolenaar lang);
15857f8989ddSBram Moolenaar r = do_in_runtimepath(fname_enc, 0, spell_load_cb, &sl);
1586b8a7b560SBram Moolenaar
1587b8a7b560SBram Moolenaar if (r == FAIL && *sl.sl_lang != NUL && round == 1
1588b8a7b560SBram Moolenaar && apply_autocmds(EVENT_SPELLFILEMISSING, lang,
1589b8a7b560SBram Moolenaar curbuf->b_fname, FALSE, curbuf))
1590b8a7b560SBram Moolenaar continue;
1591b8a7b560SBram Moolenaar break;
1592b8a7b560SBram Moolenaar }
1593362e1a30SBram Moolenaar break;
15945482f33fSBram Moolenaar }
1595cfc6c43cSBram Moolenaar
1596402d2feaSBram Moolenaar if (r == FAIL)
1597b8a7b560SBram Moolenaar {
1598f9e3e09fSBram Moolenaar smsg(
159956f78040SBram Moolenaar #ifdef VMS
160056f78040SBram Moolenaar _("Warning: Cannot find word list \"%s_%s.spl\" or \"%s_ascii.spl\""),
160156f78040SBram Moolenaar #else
160256f78040SBram Moolenaar _("Warning: Cannot find word list \"%s.%s.spl\" or \"%s.ascii.spl\""),
160356f78040SBram Moolenaar #endif
16045195e456SBram Moolenaar lang, spell_enc(), lang);
1605b8a7b560SBram Moolenaar }
1606da2303d9SBram Moolenaar else if (sl.sl_slang != NULL)
1607b765d634SBram Moolenaar {
16080d6f5d97SBram Moolenaar // At least one file was loaded, now load ALL the additions.
1609b765d634SBram Moolenaar STRCPY(fname_enc + STRLEN(fname_enc) - 3, "add.spl");
16107f8989ddSBram Moolenaar do_in_runtimepath(fname_enc, DIP_ALL, spell_load_cb, &sl);
1611b765d634SBram Moolenaar }
1612b765d634SBram Moolenaar }
1613b765d634SBram Moolenaar
1614b765d634SBram Moolenaar /*
1615b765d634SBram Moolenaar * Return the encoding used for spell checking: Use 'encoding', except that we
1616b765d634SBram Moolenaar * use "latin1" for "latin9". And limit to 60 characters (just in case).
1617b765d634SBram Moolenaar */
16189ccfebddSBram Moolenaar char_u *
spell_enc(void)1619764b23c8SBram Moolenaar spell_enc(void)
1620b765d634SBram Moolenaar {
1621b765d634SBram Moolenaar
1622b765d634SBram Moolenaar if (STRLEN(p_enc) < 60 && STRCMP(p_enc, "iso-8859-15") != 0)
1623b765d634SBram Moolenaar return p_enc;
1624b765d634SBram Moolenaar return (char_u *)"latin1";
1625402d2feaSBram Moolenaar }
1626402d2feaSBram Moolenaar
1627402d2feaSBram Moolenaar /*
1628f9184a1dSBram Moolenaar * Get the name of the .spl file for the internal wordlist into
1629f9184a1dSBram Moolenaar * "fname[MAXPATHL]".
1630f9184a1dSBram Moolenaar */
1631f9184a1dSBram Moolenaar static void
int_wordlist_spl(char_u * fname)1632764b23c8SBram Moolenaar int_wordlist_spl(char_u *fname)
1633f9184a1dSBram Moolenaar {
163456f78040SBram Moolenaar vim_snprintf((char *)fname, MAXPATHL, SPL_FNAME_TMPL,
1635f9184a1dSBram Moolenaar int_wordlist, spell_enc());
1636f9184a1dSBram Moolenaar }
1637f9184a1dSBram Moolenaar
1638f9184a1dSBram Moolenaar /*
16394770d09aSBram Moolenaar * Allocate a new slang_T for language "lang". "lang" can be NULL.
1640402d2feaSBram Moolenaar * Caller must fill "sl_next".
1641402d2feaSBram Moolenaar */
16429ccfebddSBram Moolenaar slang_T *
slang_alloc(char_u * lang)1643764b23c8SBram Moolenaar slang_alloc(char_u *lang)
1644402d2feaSBram Moolenaar {
1645402d2feaSBram Moolenaar slang_T *lp;
1646402d2feaSBram Moolenaar
1647c799fe20SBram Moolenaar lp = ALLOC_CLEAR_ONE(slang_T);
1648402d2feaSBram Moolenaar if (lp != NULL)
1649402d2feaSBram Moolenaar {
16504770d09aSBram Moolenaar if (lang != NULL)
1651402d2feaSBram Moolenaar lp->sl_name = vim_strsave(lang);
16529ba0eb85SBram Moolenaar ga_init2(&lp->sl_rep, sizeof(fromto_T), 10);
16534770d09aSBram Moolenaar ga_init2(&lp->sl_repsal, sizeof(fromto_T), 10);
16545195e456SBram Moolenaar lp->sl_compmax = MAXWLEN;
16555195e456SBram Moolenaar lp->sl_compsylmax = MAXWLEN;
16564770d09aSBram Moolenaar hash_init(&lp->sl_wordcount);
1657402d2feaSBram Moolenaar }
16584770d09aSBram Moolenaar
1659402d2feaSBram Moolenaar return lp;
1660402d2feaSBram Moolenaar }
1661402d2feaSBram Moolenaar
1662402d2feaSBram Moolenaar /*
1663402d2feaSBram Moolenaar * Free the contents of an slang_T and the structure itself.
1664402d2feaSBram Moolenaar */
16659ccfebddSBram Moolenaar void
slang_free(slang_T * lp)1666764b23c8SBram Moolenaar slang_free(slang_T *lp)
1667402d2feaSBram Moolenaar {
1668402d2feaSBram Moolenaar vim_free(lp->sl_name);
1669b765d634SBram Moolenaar vim_free(lp->sl_fname);
1670b765d634SBram Moolenaar slang_clear(lp);
1671b765d634SBram Moolenaar vim_free(lp);
1672b765d634SBram Moolenaar }
1673b765d634SBram Moolenaar
1674b765d634SBram Moolenaar /*
1675b765d634SBram Moolenaar * Clear an slang_T so that the file can be reloaded.
1676b765d634SBram Moolenaar */
16779ccfebddSBram Moolenaar void
slang_clear(slang_T * lp)1678764b23c8SBram Moolenaar slang_clear(slang_T *lp)
1679b765d634SBram Moolenaar {
16809ba0eb85SBram Moolenaar garray_T *gap;
16819ba0eb85SBram Moolenaar fromto_T *ftp;
1682d857f0e0SBram Moolenaar salitem_T *smp;
16831d73c885SBram Moolenaar int i;
16844770d09aSBram Moolenaar int round;
16859ba0eb85SBram Moolenaar
1686d23a8236SBram Moolenaar VIM_CLEAR(lp->sl_fbyts);
1687d23a8236SBram Moolenaar VIM_CLEAR(lp->sl_kbyts);
1688d23a8236SBram Moolenaar VIM_CLEAR(lp->sl_pbyts);
16891d73c885SBram Moolenaar
1690d23a8236SBram Moolenaar VIM_CLEAR(lp->sl_fidxs);
1691d23a8236SBram Moolenaar VIM_CLEAR(lp->sl_kidxs);
1692d23a8236SBram Moolenaar VIM_CLEAR(lp->sl_pidxs);
16939ba0eb85SBram Moolenaar
16944770d09aSBram Moolenaar for (round = 1; round <= 2; ++round)
16954770d09aSBram Moolenaar {
16964770d09aSBram Moolenaar gap = round == 1 ? &lp->sl_rep : &lp->sl_repsal;
16979ba0eb85SBram Moolenaar while (gap->ga_len > 0)
16989ba0eb85SBram Moolenaar {
16999ba0eb85SBram Moolenaar ftp = &((fromto_T *)gap->ga_data)[--gap->ga_len];
17009ba0eb85SBram Moolenaar vim_free(ftp->ft_from);
17019ba0eb85SBram Moolenaar vim_free(ftp->ft_to);
17029ba0eb85SBram Moolenaar }
17039ba0eb85SBram Moolenaar ga_clear(gap);
17044770d09aSBram Moolenaar }
1705d857f0e0SBram Moolenaar
1706d857f0e0SBram Moolenaar gap = &lp->sl_sal;
170742eeac35SBram Moolenaar if (lp->sl_sofo)
17089c96f592SBram Moolenaar {
17090d6f5d97SBram Moolenaar // "ga_len" is set to 1 without adding an item for latin1
17109c96f592SBram Moolenaar if (gap->ga_data != NULL)
17110d6f5d97SBram Moolenaar // SOFOFROM and SOFOTO items: free lists of wide characters.
171242eeac35SBram Moolenaar for (i = 0; i < gap->ga_len; ++i)
171342eeac35SBram Moolenaar vim_free(((int **)gap->ga_data)[i]);
17149c96f592SBram Moolenaar }
171542eeac35SBram Moolenaar else
17160d6f5d97SBram Moolenaar // SAL items: free salitem_T items
1717d857f0e0SBram Moolenaar while (gap->ga_len > 0)
1718d857f0e0SBram Moolenaar {
1719d857f0e0SBram Moolenaar smp = &((salitem_T *)gap->ga_data)[--gap->ga_len];
1720d857f0e0SBram Moolenaar vim_free(smp->sm_lead);
17210d6f5d97SBram Moolenaar // Don't free sm_oneof and sm_rules, they point into sm_lead.
1722d857f0e0SBram Moolenaar vim_free(smp->sm_to);
172342eeac35SBram Moolenaar vim_free(smp->sm_lead_w);
172442eeac35SBram Moolenaar vim_free(smp->sm_oneof_w);
172542eeac35SBram Moolenaar vim_free(smp->sm_to_w);
17269ba0eb85SBram Moolenaar }
1727d857f0e0SBram Moolenaar ga_clear(gap);
17289ba0eb85SBram Moolenaar
17291d73c885SBram Moolenaar for (i = 0; i < lp->sl_prefixcnt; ++i)
1730473de61bSBram Moolenaar vim_regfree(lp->sl_prefprog[i]);
17319c96f592SBram Moolenaar lp->sl_prefixcnt = 0;
1732d23a8236SBram Moolenaar VIM_CLEAR(lp->sl_prefprog);
17339c96f592SBram Moolenaar
1734d23a8236SBram Moolenaar VIM_CLEAR(lp->sl_info);
1735362e1a30SBram Moolenaar
1736d23a8236SBram Moolenaar VIM_CLEAR(lp->sl_midword);
17371d73c885SBram Moolenaar
1738473de61bSBram Moolenaar vim_regfree(lp->sl_compprog);
17395195e456SBram Moolenaar lp->sl_compprog = NULL;
1740d23a8236SBram Moolenaar VIM_CLEAR(lp->sl_comprules);
1741d23a8236SBram Moolenaar VIM_CLEAR(lp->sl_compstartflags);
1742d23a8236SBram Moolenaar VIM_CLEAR(lp->sl_compallflags);
17435195e456SBram Moolenaar
1744d23a8236SBram Moolenaar VIM_CLEAR(lp->sl_syllable);
17455195e456SBram Moolenaar ga_clear(&lp->sl_syl_items);
1746ae5bce1cSBram Moolenaar
1747899dddf8SBram Moolenaar ga_clear_strings(&lp->sl_comppat);
1748899dddf8SBram Moolenaar
17494770d09aSBram Moolenaar hash_clear_all(&lp->sl_wordcount, WC_KEY_OFF);
17504770d09aSBram Moolenaar hash_init(&lp->sl_wordcount);
1751ea424166SBram Moolenaar
17524770d09aSBram Moolenaar hash_clear_all(&lp->sl_map_hash, 0);
17535195e456SBram Moolenaar
17540d6f5d97SBram Moolenaar // Clear info from .sug file.
17554770d09aSBram Moolenaar slang_clear_sug(lp);
17564770d09aSBram Moolenaar
17575195e456SBram Moolenaar lp->sl_compmax = MAXWLEN;
1758da2303d9SBram Moolenaar lp->sl_compminlen = 0;
17595195e456SBram Moolenaar lp->sl_compsylmax = MAXWLEN;
17605195e456SBram Moolenaar lp->sl_regions[0] = NUL;
1761402d2feaSBram Moolenaar }
1762402d2feaSBram Moolenaar
1763402d2feaSBram Moolenaar /*
17644770d09aSBram Moolenaar * Clear the info from the .sug file in "lp".
17654770d09aSBram Moolenaar */
17669ccfebddSBram Moolenaar void
slang_clear_sug(slang_T * lp)1767764b23c8SBram Moolenaar slang_clear_sug(slang_T *lp)
17684770d09aSBram Moolenaar {
1769d23a8236SBram Moolenaar VIM_CLEAR(lp->sl_sbyts);
1770d23a8236SBram Moolenaar VIM_CLEAR(lp->sl_sidxs);
17714770d09aSBram Moolenaar close_spellbuf(lp->sl_sugbuf);
17724770d09aSBram Moolenaar lp->sl_sugbuf = NULL;
17734770d09aSBram Moolenaar lp->sl_sugloaded = FALSE;
17744770d09aSBram Moolenaar lp->sl_sugtime = 0;
17754770d09aSBram Moolenaar }
17764770d09aSBram Moolenaar
17774770d09aSBram Moolenaar /*
1778cfc6c43cSBram Moolenaar * Load one spell file and store the info into a slang_T.
1779402d2feaSBram Moolenaar * Invoked through do_in_runtimepath().
1780402d2feaSBram Moolenaar */
1781402d2feaSBram Moolenaar static void
spell_load_cb(char_u * fname,void * cookie)1782764b23c8SBram Moolenaar spell_load_cb(char_u *fname, void *cookie)
1783402d2feaSBram Moolenaar {
1784da2303d9SBram Moolenaar spelload_T *slp = (spelload_T *)cookie;
1785da2303d9SBram Moolenaar slang_T *slang;
1786da2303d9SBram Moolenaar
1787da2303d9SBram Moolenaar slang = spell_load_file(fname, slp->sl_lang, NULL, FALSE);
1788da2303d9SBram Moolenaar if (slang != NULL)
1789da2303d9SBram Moolenaar {
17900d6f5d97SBram Moolenaar // When a previously loaded file has NOBREAK also use it for the
17910d6f5d97SBram Moolenaar // ".add" files.
1792da2303d9SBram Moolenaar if (slp->sl_nobreak && slang->sl_add)
1793da2303d9SBram Moolenaar slang->sl_nobreak = TRUE;
1794da2303d9SBram Moolenaar else if (slang->sl_nobreak)
1795da2303d9SBram Moolenaar slp->sl_nobreak = TRUE;
1796da2303d9SBram Moolenaar
1797da2303d9SBram Moolenaar slp->sl_slang = slang;
1798da2303d9SBram Moolenaar }
1799b765d634SBram Moolenaar }
1800b765d634SBram Moolenaar
18014770d09aSBram Moolenaar
18024770d09aSBram Moolenaar /*
18034770d09aSBram Moolenaar * Add a word to the hashtable of common words.
18044770d09aSBram Moolenaar * If it's already there then the counter is increased.
18054770d09aSBram Moolenaar */
18069ccfebddSBram Moolenaar void
count_common_word(slang_T * lp,char_u * word,int len,int count)1807764b23c8SBram Moolenaar count_common_word(
1808764b23c8SBram Moolenaar slang_T *lp,
1809764b23c8SBram Moolenaar char_u *word,
18100d6f5d97SBram Moolenaar int len, // word length, -1 for up to NUL
18110d6f5d97SBram Moolenaar int count) // 1 to count once, 10 to init
18124770d09aSBram Moolenaar {
18134770d09aSBram Moolenaar hash_T hash;
18144770d09aSBram Moolenaar hashitem_T *hi;
18154770d09aSBram Moolenaar wordcount_T *wc;
18164770d09aSBram Moolenaar char_u buf[MAXWLEN];
18174770d09aSBram Moolenaar char_u *p;
18184770d09aSBram Moolenaar
18194770d09aSBram Moolenaar if (len == -1)
18204770d09aSBram Moolenaar p = word;
18215bcc5a1fSBram Moolenaar else if (len >= MAXWLEN)
18225bcc5a1fSBram Moolenaar return;
18234770d09aSBram Moolenaar else
18244770d09aSBram Moolenaar {
18254770d09aSBram Moolenaar vim_strncpy(buf, word, len);
18264770d09aSBram Moolenaar p = buf;
18274770d09aSBram Moolenaar }
18284770d09aSBram Moolenaar
18294770d09aSBram Moolenaar hash = hash_hash(p);
18304770d09aSBram Moolenaar hi = hash_lookup(&lp->sl_wordcount, p, hash);
18314770d09aSBram Moolenaar if (HASHITEM_EMPTY(hi))
18324770d09aSBram Moolenaar {
1833c799fe20SBram Moolenaar wc = alloc(sizeof(wordcount_T) + STRLEN(p));
18344770d09aSBram Moolenaar if (wc == NULL)
18354770d09aSBram Moolenaar return;
18364770d09aSBram Moolenaar STRCPY(wc->wc_word, p);
18374770d09aSBram Moolenaar wc->wc_count = count;
18384770d09aSBram Moolenaar hash_add_item(&lp->sl_wordcount, hi, wc->wc_word, hash);
18394770d09aSBram Moolenaar }
18404770d09aSBram Moolenaar else
18414770d09aSBram Moolenaar {
18424770d09aSBram Moolenaar wc = HI2WC(hi);
18430d6f5d97SBram Moolenaar if ((wc->wc_count += count) < (unsigned)count) // check for overflow
18444770d09aSBram Moolenaar wc->wc_count = MAXWORDCOUNT;
18454770d09aSBram Moolenaar }
18464770d09aSBram Moolenaar }
18474770d09aSBram Moolenaar
18484770d09aSBram Moolenaar /*
184995529568SBram Moolenaar * Return TRUE if byte "n" appears in "str".
18506de6853cSBram Moolenaar * Like strchr() but independent of locale.
18516de6853cSBram Moolenaar */
18529ccfebddSBram Moolenaar int
byte_in_str(char_u * str,int n)1853764b23c8SBram Moolenaar byte_in_str(char_u *str, int n)
18546de6853cSBram Moolenaar {
18556de6853cSBram Moolenaar char_u *p;
18566de6853cSBram Moolenaar
18576de6853cSBram Moolenaar for (p = str; *p != NUL; ++p)
185895529568SBram Moolenaar if (*p == n)
18596de6853cSBram Moolenaar return TRUE;
18606de6853cSBram Moolenaar return FALSE;
18616de6853cSBram Moolenaar }
18626de6853cSBram Moolenaar
18635195e456SBram Moolenaar #define SY_MAXLEN 30
18645195e456SBram Moolenaar typedef struct syl_item_S
18655195e456SBram Moolenaar {
18660d6f5d97SBram Moolenaar char_u sy_chars[SY_MAXLEN]; // the sequence of chars
18675195e456SBram Moolenaar int sy_len;
18685195e456SBram Moolenaar } syl_item_T;
18695195e456SBram Moolenaar
18705195e456SBram Moolenaar /*
18715195e456SBram Moolenaar * Truncate "slang->sl_syllable" at the first slash and put the following items
18725195e456SBram Moolenaar * in "slang->sl_syl_items".
18735195e456SBram Moolenaar */
18749ccfebddSBram Moolenaar int
init_syl_tab(slang_T * slang)1875764b23c8SBram Moolenaar init_syl_tab(slang_T *slang)
18765195e456SBram Moolenaar {
18775195e456SBram Moolenaar char_u *p;
18785195e456SBram Moolenaar char_u *s;
18795195e456SBram Moolenaar int l;
18805195e456SBram Moolenaar syl_item_T *syl;
18815195e456SBram Moolenaar
18825195e456SBram Moolenaar ga_init2(&slang->sl_syl_items, sizeof(syl_item_T), 4);
18835195e456SBram Moolenaar p = vim_strchr(slang->sl_syllable, '/');
18845195e456SBram Moolenaar while (p != NULL)
18855195e456SBram Moolenaar {
18865195e456SBram Moolenaar *p++ = NUL;
18870d6f5d97SBram Moolenaar if (*p == NUL) // trailing slash
18885195e456SBram Moolenaar break;
18895195e456SBram Moolenaar s = p;
18905195e456SBram Moolenaar p = vim_strchr(p, '/');
18915195e456SBram Moolenaar if (p == NULL)
1892a93fa7eeSBram Moolenaar l = (int)STRLEN(s);
18935195e456SBram Moolenaar else
1894a93fa7eeSBram Moolenaar l = (int)(p - s);
18955195e456SBram Moolenaar if (l >= SY_MAXLEN)
18965195e456SBram Moolenaar return SP_FORMERROR;
18975195e456SBram Moolenaar if (ga_grow(&slang->sl_syl_items, 1) == FAIL)
18986de6853cSBram Moolenaar return SP_OTHERERROR;
18995195e456SBram Moolenaar syl = ((syl_item_T *)slang->sl_syl_items.ga_data)
19005195e456SBram Moolenaar + slang->sl_syl_items.ga_len++;
19015195e456SBram Moolenaar vim_strncpy(syl->sy_chars, s, l);
19025195e456SBram Moolenaar syl->sy_len = l;
19035195e456SBram Moolenaar }
19045195e456SBram Moolenaar return OK;
19055195e456SBram Moolenaar }
19065195e456SBram Moolenaar
19075195e456SBram Moolenaar /*
19085195e456SBram Moolenaar * Count the number of syllables in "word".
19095195e456SBram Moolenaar * When "word" contains spaces the syllables after the last space are counted.
19105195e456SBram Moolenaar * Returns zero if syllables are not defines.
19115195e456SBram Moolenaar */
19125195e456SBram Moolenaar static int
count_syllables(slang_T * slang,char_u * word)1913764b23c8SBram Moolenaar count_syllables(slang_T *slang, char_u *word)
19145195e456SBram Moolenaar {
19155195e456SBram Moolenaar int cnt = 0;
19165195e456SBram Moolenaar int skip = FALSE;
19175195e456SBram Moolenaar char_u *p;
19185195e456SBram Moolenaar int len;
19195195e456SBram Moolenaar int i;
19205195e456SBram Moolenaar syl_item_T *syl;
19215195e456SBram Moolenaar int c;
19225195e456SBram Moolenaar
19235195e456SBram Moolenaar if (slang->sl_syllable == NULL)
19245195e456SBram Moolenaar return 0;
19255195e456SBram Moolenaar
19265195e456SBram Moolenaar for (p = word; *p != NUL; p += len)
19275195e456SBram Moolenaar {
19280d6f5d97SBram Moolenaar // When running into a space reset counter.
19295195e456SBram Moolenaar if (*p == ' ')
19305195e456SBram Moolenaar {
19315195e456SBram Moolenaar len = 1;
19325195e456SBram Moolenaar cnt = 0;
19335195e456SBram Moolenaar continue;
19345195e456SBram Moolenaar }
19355195e456SBram Moolenaar
19360d6f5d97SBram Moolenaar // Find longest match of syllable items.
19375195e456SBram Moolenaar len = 0;
19385195e456SBram Moolenaar for (i = 0; i < slang->sl_syl_items.ga_len; ++i)
19395195e456SBram Moolenaar {
19405195e456SBram Moolenaar syl = ((syl_item_T *)slang->sl_syl_items.ga_data) + i;
19415195e456SBram Moolenaar if (syl->sy_len > len
19425195e456SBram Moolenaar && STRNCMP(p, syl->sy_chars, syl->sy_len) == 0)
19435195e456SBram Moolenaar len = syl->sy_len;
19445195e456SBram Moolenaar }
19450d6f5d97SBram Moolenaar if (len != 0) // found a match, count syllable
19465195e456SBram Moolenaar {
19475195e456SBram Moolenaar ++cnt;
19485195e456SBram Moolenaar skip = FALSE;
19495195e456SBram Moolenaar }
19505195e456SBram Moolenaar else
19515195e456SBram Moolenaar {
19520d6f5d97SBram Moolenaar // No recognized syllable item, at least a syllable char then?
19535195e456SBram Moolenaar c = mb_ptr2char(p);
19545195e456SBram Moolenaar len = (*mb_ptr2len)(p);
19555195e456SBram Moolenaar if (vim_strchr(slang->sl_syllable, c) == NULL)
19560d6f5d97SBram Moolenaar skip = FALSE; // No, search for next syllable
19575195e456SBram Moolenaar else if (!skip)
19585195e456SBram Moolenaar {
19590d6f5d97SBram Moolenaar ++cnt; // Yes, count it
19600d6f5d97SBram Moolenaar skip = TRUE; // don't count following syllable chars
19615195e456SBram Moolenaar }
19625195e456SBram Moolenaar }
19635195e456SBram Moolenaar }
19645195e456SBram Moolenaar return cnt;
19655195e456SBram Moolenaar }
19665195e456SBram Moolenaar
19675195e456SBram Moolenaar /*
1968860cae1cSBram Moolenaar * Parse 'spelllang' and set w_s->b_langp accordingly.
1969f417f2b6SBram Moolenaar * Returns NULL if it's OK, an error message otherwise.
1970402d2feaSBram Moolenaar */
1971f9e3e09fSBram Moolenaar char *
did_set_spelllang(win_T * wp)1972764b23c8SBram Moolenaar did_set_spelllang(win_T *wp)
1973402d2feaSBram Moolenaar {
1974402d2feaSBram Moolenaar garray_T ga;
1975f417f2b6SBram Moolenaar char_u *splp;
1976402d2feaSBram Moolenaar char_u *region;
1977b6356339SBram Moolenaar char_u region_cp[3];
19780a5fe214SBram Moolenaar int filename;
1979402d2feaSBram Moolenaar int region_mask;
19808b96d64cSBram Moolenaar slang_T *slang;
1981402d2feaSBram Moolenaar int c;
1982f417f2b6SBram Moolenaar char_u lang[MAXWLEN + 1];
19839ba0eb85SBram Moolenaar char_u spf_name[MAXPATHL];
1984f417f2b6SBram Moolenaar int len;
1985f417f2b6SBram Moolenaar char_u *p;
19867887d88aSBram Moolenaar int round;
1987f9184a1dSBram Moolenaar char_u *spf;
19880dc065eeSBram Moolenaar char_u *use_region = NULL;
19890dc065eeSBram Moolenaar int dont_use_region = FALSE;
1990da2303d9SBram Moolenaar int nobreak = FALSE;
19918b96d64cSBram Moolenaar int i, j;
19928b96d64cSBram Moolenaar langp_T *lp, *lp2;
1993706cdebcSBram Moolenaar static int recursive = FALSE;
1994f9e3e09fSBram Moolenaar char *ret_msg = NULL;
1995706cdebcSBram Moolenaar char_u *spl_copy;
19967c0a2f36SBram Moolenaar bufref_T bufref;
19977c0a2f36SBram Moolenaar
19987c0a2f36SBram Moolenaar set_bufref(&bufref, wp->w_buffer);
1999706cdebcSBram Moolenaar
20000d6f5d97SBram Moolenaar // We don't want to do this recursively. May happen when a language is
20010d6f5d97SBram Moolenaar // not available and the SpellFileMissing autocommand opens a new buffer
20020d6f5d97SBram Moolenaar // in which 'spell' is set.
2003706cdebcSBram Moolenaar if (recursive)
2004706cdebcSBram Moolenaar return NULL;
2005706cdebcSBram Moolenaar recursive = TRUE;
2006402d2feaSBram Moolenaar
2007402d2feaSBram Moolenaar ga_init2(&ga, sizeof(langp_T), 2);
2008860cae1cSBram Moolenaar clear_midword(wp);
2009402d2feaSBram Moolenaar
20100d6f5d97SBram Moolenaar // Make a copy of 'spelllang', the SpellFileMissing autocommands may change
20110d6f5d97SBram Moolenaar // it under our fingers.
2012860cae1cSBram Moolenaar spl_copy = vim_strsave(wp->w_s->b_p_spl);
2013706cdebcSBram Moolenaar if (spl_copy == NULL)
2014706cdebcSBram Moolenaar goto theend;
2015706cdebcSBram Moolenaar
2016cc63c647SBram Moolenaar wp->w_s->b_cjk = 0;
2017cc63c647SBram Moolenaar
20180d6f5d97SBram Moolenaar // Loop over comma separated language names.
2019706cdebcSBram Moolenaar for (splp = spl_copy; *splp != NUL; )
2020f417f2b6SBram Moolenaar {
20218f130edaSBram Moolenaar // Get one language name.
2022f417f2b6SBram Moolenaar copy_option_part(&splp, lang, MAXWLEN, ",");
2023f417f2b6SBram Moolenaar region = NULL;
2024a93fa7eeSBram Moolenaar len = (int)STRLEN(lang);
20250a5fe214SBram Moolenaar
2026f154f3abSBram Moolenaar if (!valid_spelllang(lang))
20278f130edaSBram Moolenaar continue;
20288f130edaSBram Moolenaar
2029cc63c647SBram Moolenaar if (STRCMP(lang, "cjk") == 0)
2030cc63c647SBram Moolenaar {
2031cc63c647SBram Moolenaar wp->w_s->b_cjk = 1;
2032cc63c647SBram Moolenaar continue;
2033cc63c647SBram Moolenaar }
2034cc63c647SBram Moolenaar
20350d6f5d97SBram Moolenaar // If the name ends in ".spl" use it as the name of the spell file.
20360d6f5d97SBram Moolenaar // If there is a region name let "region" point to it and remove it
20370d6f5d97SBram Moolenaar // from the name.
20380a5fe214SBram Moolenaar if (len > 4 && fnamecmp(lang + len - 4, ".spl") == 0)
20390a5fe214SBram Moolenaar {
20400a5fe214SBram Moolenaar filename = TRUE;
20410a5fe214SBram Moolenaar
20420d6f5d97SBram Moolenaar // Locate a region and remove it from the file name.
2043b6356339SBram Moolenaar p = vim_strchr(gettail(lang), '_');
2044b6356339SBram Moolenaar if (p != NULL && ASCII_ISALPHA(p[1]) && ASCII_ISALPHA(p[2])
2045b6356339SBram Moolenaar && !ASCII_ISALPHA(p[3]))
2046b6356339SBram Moolenaar {
2047b6356339SBram Moolenaar vim_strncpy(region_cp, p + 1, 2);
2048b6356339SBram Moolenaar mch_memmove(p, p + 3, len - (p - lang) - 2);
2049b6356339SBram Moolenaar region = region_cp;
2050b6356339SBram Moolenaar }
2051b6356339SBram Moolenaar else
2052b6356339SBram Moolenaar dont_use_region = TRUE;
2053b6356339SBram Moolenaar
20540d6f5d97SBram Moolenaar // Check if we loaded this language before.
2055aeea7215SBram Moolenaar FOR_ALL_SPELL_LANGS(slang)
205699499b1cSBram Moolenaar if (fullpathcmp(lang, slang->sl_fname, FALSE, TRUE) == FPC_SAME)
20570a5fe214SBram Moolenaar break;
20580a5fe214SBram Moolenaar }
20590a5fe214SBram Moolenaar else
20600a5fe214SBram Moolenaar {
20610a5fe214SBram Moolenaar filename = FALSE;
2062f417f2b6SBram Moolenaar if (len > 3 && lang[len - 3] == '_')
2063f417f2b6SBram Moolenaar {
2064f417f2b6SBram Moolenaar region = lang + len - 2;
2065f417f2b6SBram Moolenaar len -= 3;
2066f417f2b6SBram Moolenaar lang[len] = NUL;
2067402d2feaSBram Moolenaar }
20680dc065eeSBram Moolenaar else
20690dc065eeSBram Moolenaar dont_use_region = TRUE;
2070402d2feaSBram Moolenaar
20710d6f5d97SBram Moolenaar // Check if we loaded this language before.
2072aeea7215SBram Moolenaar FOR_ALL_SPELL_LANGS(slang)
20738b96d64cSBram Moolenaar if (STRICMP(lang, slang->sl_name) == 0)
2074402d2feaSBram Moolenaar break;
20750a5fe214SBram Moolenaar }
2076402d2feaSBram Moolenaar
2077b6356339SBram Moolenaar if (region != NULL)
2078b6356339SBram Moolenaar {
20790d6f5d97SBram Moolenaar // If the region differs from what was used before then don't
20800d6f5d97SBram Moolenaar // use it for 'spellfile'.
2081b6356339SBram Moolenaar if (use_region != NULL && STRCMP(region, use_region) != 0)
2082b6356339SBram Moolenaar dont_use_region = TRUE;
2083b6356339SBram Moolenaar use_region = region;
2084b6356339SBram Moolenaar }
2085b6356339SBram Moolenaar
20860d6f5d97SBram Moolenaar // If not found try loading the language now.
20878b96d64cSBram Moolenaar if (slang == NULL)
20880a5fe214SBram Moolenaar {
20890a5fe214SBram Moolenaar if (filename)
20900a5fe214SBram Moolenaar (void)spell_load_file(lang, lang, NULL, FALSE);
20910a5fe214SBram Moolenaar else
2092706cdebcSBram Moolenaar {
2093f417f2b6SBram Moolenaar spell_load_lang(lang);
20940d6f5d97SBram Moolenaar // SpellFileMissing autocommands may do anything, including
20950d6f5d97SBram Moolenaar // destroying the buffer we are using...
20967c0a2f36SBram Moolenaar if (!bufref_valid(&bufref))
2097706cdebcSBram Moolenaar {
2098f9e3e09fSBram Moolenaar ret_msg = N_("E797: SpellFileMissing autocommand deleted buffer");
2099706cdebcSBram Moolenaar goto theend;
2100706cdebcSBram Moolenaar }
2101706cdebcSBram Moolenaar }
21020a5fe214SBram Moolenaar }
2103402d2feaSBram Moolenaar
2104cfc6c43cSBram Moolenaar /*
2105f417f2b6SBram Moolenaar * Loop over the languages, there can be several files for "lang".
2106cfc6c43cSBram Moolenaar */
2107aeea7215SBram Moolenaar FOR_ALL_SPELL_LANGS(slang)
210899499b1cSBram Moolenaar if (filename ? fullpathcmp(lang, slang->sl_fname, FALSE, TRUE)
210999499b1cSBram Moolenaar == FPC_SAME
21108b96d64cSBram Moolenaar : STRICMP(lang, slang->sl_name) == 0)
2111402d2feaSBram Moolenaar {
2112402d2feaSBram Moolenaar region_mask = REGION_ALL;
21130a5fe214SBram Moolenaar if (!filename && region != NULL)
2114402d2feaSBram Moolenaar {
21150d6f5d97SBram Moolenaar // find region in sl_regions
21168b96d64cSBram Moolenaar c = find_region(slang->sl_regions, region);
2117402d2feaSBram Moolenaar if (c == REGION_ALL)
2118402d2feaSBram Moolenaar {
21198b96d64cSBram Moolenaar if (slang->sl_add)
21200dc065eeSBram Moolenaar {
21218b96d64cSBram Moolenaar if (*slang->sl_regions != NUL)
21220d6f5d97SBram Moolenaar // This addition file is for other regions.
21230dc065eeSBram Moolenaar region_mask = 0;
21240dc065eeSBram Moolenaar }
21250dc065eeSBram Moolenaar else
21260d6f5d97SBram Moolenaar // This is probably an error. Give a warning and
21270d6f5d97SBram Moolenaar // accept the words anyway.
2128f9e3e09fSBram Moolenaar smsg(_("Warning: region %s not supported"),
2129f417f2b6SBram Moolenaar region);
2130402d2feaSBram Moolenaar }
2131402d2feaSBram Moolenaar else
2132402d2feaSBram Moolenaar region_mask = 1 << c;
2133402d2feaSBram Moolenaar }
2134402d2feaSBram Moolenaar
21350dc065eeSBram Moolenaar if (region_mask != 0)
21360dc065eeSBram Moolenaar {
2137402d2feaSBram Moolenaar if (ga_grow(&ga, 1) == FAIL)
2138402d2feaSBram Moolenaar {
2139402d2feaSBram Moolenaar ga_clear(&ga);
2140e29a27f6SBram Moolenaar ret_msg = e_out_of_memory;
2141706cdebcSBram Moolenaar goto theend;
2142402d2feaSBram Moolenaar }
21438b96d64cSBram Moolenaar LANGP_ENTRY(ga, ga.ga_len)->lp_slang = slang;
2144402d2feaSBram Moolenaar LANGP_ENTRY(ga, ga.ga_len)->lp_region = region_mask;
2145402d2feaSBram Moolenaar ++ga.ga_len;
2146860cae1cSBram Moolenaar use_midword(slang, wp);
21478b96d64cSBram Moolenaar if (slang->sl_nobreak)
2148da2303d9SBram Moolenaar nobreak = TRUE;
2149402d2feaSBram Moolenaar }
2150402d2feaSBram Moolenaar }
21510dc065eeSBram Moolenaar }
2152402d2feaSBram Moolenaar
21530d6f5d97SBram Moolenaar // round 0: load int_wordlist, if possible.
21540d6f5d97SBram Moolenaar // round 1: load first name in 'spellfile'.
21550d6f5d97SBram Moolenaar // round 2: load second name in 'spellfile.
21560d6f5d97SBram Moolenaar // etc.
2157860cae1cSBram Moolenaar spf = curwin->w_s->b_p_spf;
2158f9184a1dSBram Moolenaar for (round = 0; round == 0 || *spf != NUL; ++round)
21599ba0eb85SBram Moolenaar {
2160f9184a1dSBram Moolenaar if (round == 0)
21617887d88aSBram Moolenaar {
21620d6f5d97SBram Moolenaar // Internal wordlist, if there is one.
2163f9184a1dSBram Moolenaar if (int_wordlist == NULL)
21647887d88aSBram Moolenaar continue;
2165f9184a1dSBram Moolenaar int_wordlist_spl(spf_name);
21667887d88aSBram Moolenaar }
21677887d88aSBram Moolenaar else
21687887d88aSBram Moolenaar {
21690d6f5d97SBram Moolenaar // One entry in 'spellfile'.
2170f9184a1dSBram Moolenaar copy_option_part(&spf, spf_name, MAXPATHL - 5, ",");
2171f9184a1dSBram Moolenaar STRCAT(spf_name, ".spl");
2172f9184a1dSBram Moolenaar
21730d6f5d97SBram Moolenaar // If it was already found above then skip it.
2174f9184a1dSBram Moolenaar for (c = 0; c < ga.ga_len; ++c)
2175ac6e65f8SBram Moolenaar {
2176ac6e65f8SBram Moolenaar p = LANGP_ENTRY(ga, c)->lp_slang->sl_fname;
217799499b1cSBram Moolenaar if (p != NULL && fullpathcmp(spf_name, p, FALSE, TRUE)
217899499b1cSBram Moolenaar == FPC_SAME)
2179f9184a1dSBram Moolenaar break;
2180ac6e65f8SBram Moolenaar }
2181f9184a1dSBram Moolenaar if (c < ga.ga_len)
21827887d88aSBram Moolenaar continue;
21837887d88aSBram Moolenaar }
21847887d88aSBram Moolenaar
21850d6f5d97SBram Moolenaar // Check if it was loaded already.
2186aeea7215SBram Moolenaar FOR_ALL_SPELL_LANGS(slang)
218799499b1cSBram Moolenaar if (fullpathcmp(spf_name, slang->sl_fname, FALSE, TRUE)
218899499b1cSBram Moolenaar == FPC_SAME)
21899ba0eb85SBram Moolenaar break;
21908b96d64cSBram Moolenaar if (slang == NULL)
21919ba0eb85SBram Moolenaar {
21920d6f5d97SBram Moolenaar // Not loaded, try loading it now. The language name includes the
21930d6f5d97SBram Moolenaar // region name, the region is ignored otherwise. for int_wordlist
21940d6f5d97SBram Moolenaar // use an arbitrary name.
2195f9184a1dSBram Moolenaar if (round == 0)
2196f9184a1dSBram Moolenaar STRCPY(lang, "internal wordlist");
2197f9184a1dSBram Moolenaar else
21987887d88aSBram Moolenaar {
2199f9184a1dSBram Moolenaar vim_strncpy(lang, gettail(spf_name), MAXWLEN);
2200f417f2b6SBram Moolenaar p = vim_strchr(lang, '.');
2201f417f2b6SBram Moolenaar if (p != NULL)
22020d6f5d97SBram Moolenaar *p = NUL; // truncate at ".encoding.add"
22037887d88aSBram Moolenaar }
22048b96d64cSBram Moolenaar slang = spell_load_file(spf_name, lang, NULL, TRUE);
2205da2303d9SBram Moolenaar
22060d6f5d97SBram Moolenaar // If one of the languages has NOBREAK we assume the addition
22070d6f5d97SBram Moolenaar // files also have this.
22088b96d64cSBram Moolenaar if (slang != NULL && nobreak)
22098b96d64cSBram Moolenaar slang->sl_nobreak = TRUE;
22109ba0eb85SBram Moolenaar }
22118b96d64cSBram Moolenaar if (slang != NULL && ga_grow(&ga, 1) == OK)
22129ba0eb85SBram Moolenaar {
22130dc065eeSBram Moolenaar region_mask = REGION_ALL;
22140dc065eeSBram Moolenaar if (use_region != NULL && !dont_use_region)
22150dc065eeSBram Moolenaar {
22160d6f5d97SBram Moolenaar // find region in sl_regions
22178b96d64cSBram Moolenaar c = find_region(slang->sl_regions, use_region);
22180dc065eeSBram Moolenaar if (c != REGION_ALL)
22190dc065eeSBram Moolenaar region_mask = 1 << c;
22208b96d64cSBram Moolenaar else if (*slang->sl_regions != NUL)
22210d6f5d97SBram Moolenaar // This spell file is for other regions.
22220dc065eeSBram Moolenaar region_mask = 0;
22230dc065eeSBram Moolenaar }
22240dc065eeSBram Moolenaar
22250dc065eeSBram Moolenaar if (region_mask != 0)
22260dc065eeSBram Moolenaar {
22278b96d64cSBram Moolenaar LANGP_ENTRY(ga, ga.ga_len)->lp_slang = slang;
22288b96d64cSBram Moolenaar LANGP_ENTRY(ga, ga.ga_len)->lp_sallang = NULL;
22298b96d64cSBram Moolenaar LANGP_ENTRY(ga, ga.ga_len)->lp_replang = NULL;
22300dc065eeSBram Moolenaar LANGP_ENTRY(ga, ga.ga_len)->lp_region = region_mask;
22319ba0eb85SBram Moolenaar ++ga.ga_len;
2232860cae1cSBram Moolenaar use_midword(slang, wp);
22339ba0eb85SBram Moolenaar }
22349ba0eb85SBram Moolenaar }
22350dc065eeSBram Moolenaar }
22369ba0eb85SBram Moolenaar
22370d6f5d97SBram Moolenaar // Everything is fine, store the new b_langp value.
2238860cae1cSBram Moolenaar ga_clear(&wp->w_s->b_langp);
2239860cae1cSBram Moolenaar wp->w_s->b_langp = ga;
2240402d2feaSBram Moolenaar
22410d6f5d97SBram Moolenaar // For each language figure out what language to use for sound folding and
22420d6f5d97SBram Moolenaar // REP items. If the language doesn't support it itself use another one
22430d6f5d97SBram Moolenaar // with the same name. E.g. for "en-math" use "en".
22448b96d64cSBram Moolenaar for (i = 0; i < ga.ga_len; ++i)
22458b96d64cSBram Moolenaar {
22468b96d64cSBram Moolenaar lp = LANGP_ENTRY(ga, i);
22478b96d64cSBram Moolenaar
22480d6f5d97SBram Moolenaar // sound folding
22498b96d64cSBram Moolenaar if (lp->lp_slang->sl_sal.ga_len > 0)
22500d6f5d97SBram Moolenaar // language does sound folding itself
22518b96d64cSBram Moolenaar lp->lp_sallang = lp->lp_slang;
22528b96d64cSBram Moolenaar else
22530d6f5d97SBram Moolenaar // find first similar language that does sound folding
22548b96d64cSBram Moolenaar for (j = 0; j < ga.ga_len; ++j)
22558b96d64cSBram Moolenaar {
22568b96d64cSBram Moolenaar lp2 = LANGP_ENTRY(ga, j);
22578b96d64cSBram Moolenaar if (lp2->lp_slang->sl_sal.ga_len > 0
22588b96d64cSBram Moolenaar && STRNCMP(lp->lp_slang->sl_name,
22598b96d64cSBram Moolenaar lp2->lp_slang->sl_name, 2) == 0)
22608b96d64cSBram Moolenaar {
22618b96d64cSBram Moolenaar lp->lp_sallang = lp2->lp_slang;
22628b96d64cSBram Moolenaar break;
22638b96d64cSBram Moolenaar }
22648b96d64cSBram Moolenaar }
22658b96d64cSBram Moolenaar
22660d6f5d97SBram Moolenaar // REP items
22678b96d64cSBram Moolenaar if (lp->lp_slang->sl_rep.ga_len > 0)
22680d6f5d97SBram Moolenaar // language has REP items itself
22698b96d64cSBram Moolenaar lp->lp_replang = lp->lp_slang;
22708b96d64cSBram Moolenaar else
22710d6f5d97SBram Moolenaar // find first similar language that has REP items
22728b96d64cSBram Moolenaar for (j = 0; j < ga.ga_len; ++j)
22738b96d64cSBram Moolenaar {
22748b96d64cSBram Moolenaar lp2 = LANGP_ENTRY(ga, j);
22758b96d64cSBram Moolenaar if (lp2->lp_slang->sl_rep.ga_len > 0
22768b96d64cSBram Moolenaar && STRNCMP(lp->lp_slang->sl_name,
22778b96d64cSBram Moolenaar lp2->lp_slang->sl_name, 2) == 0)
22788b96d64cSBram Moolenaar {
22798b96d64cSBram Moolenaar lp->lp_replang = lp2->lp_slang;
22808b96d64cSBram Moolenaar break;
22818b96d64cSBram Moolenaar }
22828b96d64cSBram Moolenaar }
22838b96d64cSBram Moolenaar }
2284d569a9e7SBram Moolenaar redraw_win_later(wp, NOT_VALID);
22858b96d64cSBram Moolenaar
2286706cdebcSBram Moolenaar theend:
2287706cdebcSBram Moolenaar vim_free(spl_copy);
2288706cdebcSBram Moolenaar recursive = FALSE;
2289706cdebcSBram Moolenaar return ret_msg;
2290402d2feaSBram Moolenaar }
2291402d2feaSBram Moolenaar
2292402d2feaSBram Moolenaar /*
22939c96f592SBram Moolenaar * Clear the midword characters for buffer "buf".
22949c96f592SBram Moolenaar */
22959c96f592SBram Moolenaar static void
clear_midword(win_T * wp)2296764b23c8SBram Moolenaar clear_midword(win_T *wp)
22979c96f592SBram Moolenaar {
2298a80faa89SBram Moolenaar CLEAR_FIELD(wp->w_s->b_spell_ismw);
2299d23a8236SBram Moolenaar VIM_CLEAR(wp->w_s->b_spell_ismw_mb);
23009c96f592SBram Moolenaar }
23019c96f592SBram Moolenaar
23029c96f592SBram Moolenaar /*
23039c96f592SBram Moolenaar * Use the "sl_midword" field of language "lp" for buffer "buf".
23049c96f592SBram Moolenaar * They add up to any currently used midword characters.
23059c96f592SBram Moolenaar */
23069c96f592SBram Moolenaar static void
use_midword(slang_T * lp,win_T * wp)2307764b23c8SBram Moolenaar use_midword(slang_T *lp, win_T *wp)
23089c96f592SBram Moolenaar {
23099c96f592SBram Moolenaar char_u *p;
23109c96f592SBram Moolenaar
23110d6f5d97SBram Moolenaar if (lp->sl_midword == NULL) // there aren't any
23120dc065eeSBram Moolenaar return;
23130dc065eeSBram Moolenaar
23149c96f592SBram Moolenaar for (p = lp->sl_midword; *p != NUL; )
23159c96f592SBram Moolenaar if (has_mbyte)
23169c96f592SBram Moolenaar {
23179c96f592SBram Moolenaar int c, l, n;
23189c96f592SBram Moolenaar char_u *bp;
23199c96f592SBram Moolenaar
23209c96f592SBram Moolenaar c = mb_ptr2char(p);
23210fa313a7SBram Moolenaar l = (*mb_ptr2len)(p);
23220fa313a7SBram Moolenaar if (c < 256 && l <= 2)
2323860cae1cSBram Moolenaar wp->w_s->b_spell_ismw[c] = TRUE;
2324860cae1cSBram Moolenaar else if (wp->w_s->b_spell_ismw_mb == NULL)
23250d6f5d97SBram Moolenaar // First multi-byte char in "b_spell_ismw_mb".
2326860cae1cSBram Moolenaar wp->w_s->b_spell_ismw_mb = vim_strnsave(p, l);
23279c96f592SBram Moolenaar else
23289c96f592SBram Moolenaar {
23290d6f5d97SBram Moolenaar // Append multi-byte chars to "b_spell_ismw_mb".
2330860cae1cSBram Moolenaar n = (int)STRLEN(wp->w_s->b_spell_ismw_mb);
2331860cae1cSBram Moolenaar bp = vim_strnsave(wp->w_s->b_spell_ismw_mb, n + l);
23329c96f592SBram Moolenaar if (bp != NULL)
23339c96f592SBram Moolenaar {
2334860cae1cSBram Moolenaar vim_free(wp->w_s->b_spell_ismw_mb);
2335860cae1cSBram Moolenaar wp->w_s->b_spell_ismw_mb = bp;
23369c96f592SBram Moolenaar vim_strncpy(bp + n, p, l);
23379c96f592SBram Moolenaar }
23389c96f592SBram Moolenaar }
23399c96f592SBram Moolenaar p += l;
23409c96f592SBram Moolenaar }
23419c96f592SBram Moolenaar else
2342860cae1cSBram Moolenaar wp->w_s->b_spell_ismw[*p++] = TRUE;
23439c96f592SBram Moolenaar }
23449c96f592SBram Moolenaar
23459c96f592SBram Moolenaar /*
2346402d2feaSBram Moolenaar * Find the region "region[2]" in "rp" (points to "sl_regions").
2347c4568ab3SBram Moolenaar * Each region is simply stored as the two characters of its name.
23487887d88aSBram Moolenaar * Returns the index if found (first is 0), REGION_ALL if not found.
2349402d2feaSBram Moolenaar */
2350402d2feaSBram Moolenaar static int
find_region(char_u * rp,char_u * region)2351764b23c8SBram Moolenaar find_region(char_u *rp, char_u *region)
2352402d2feaSBram Moolenaar {
2353402d2feaSBram Moolenaar int i;
2354402d2feaSBram Moolenaar
2355402d2feaSBram Moolenaar for (i = 0; ; i += 2)
2356402d2feaSBram Moolenaar {
2357402d2feaSBram Moolenaar if (rp[i] == NUL)
2358402d2feaSBram Moolenaar return REGION_ALL;
2359402d2feaSBram Moolenaar if (rp[i] == region[0] && rp[i + 1] == region[1])
2360402d2feaSBram Moolenaar break;
2361402d2feaSBram Moolenaar }
2362402d2feaSBram Moolenaar return i / 2;
2363402d2feaSBram Moolenaar }
2364402d2feaSBram Moolenaar
2365402d2feaSBram Moolenaar /*
23669ba0eb85SBram Moolenaar * Return case type of word:
2367402d2feaSBram Moolenaar * w word 0
236851485f06SBram Moolenaar * Word WF_ONECAP
236951485f06SBram Moolenaar * W WORD WF_ALLCAP
237051485f06SBram Moolenaar * WoRd wOrd WF_KEEPCAP
2371402d2feaSBram Moolenaar */
23729ccfebddSBram Moolenaar int
captype(char_u * word,char_u * end)2373764b23c8SBram Moolenaar captype(
2374764b23c8SBram Moolenaar char_u *word,
23750d6f5d97SBram Moolenaar char_u *end) // When NULL use up to NUL byte.
2376402d2feaSBram Moolenaar {
2377402d2feaSBram Moolenaar char_u *p;
2378402d2feaSBram Moolenaar int c;
2379402d2feaSBram Moolenaar int firstcap;
2380402d2feaSBram Moolenaar int allcap;
23810d6f5d97SBram Moolenaar int past_second = FALSE; // past second word char
2382402d2feaSBram Moolenaar
23830d6f5d97SBram Moolenaar // find first letter
238491acfffcSBram Moolenaar for (p = word; !spell_iswordp_nmw(p, curwin); MB_PTR_ADV(p))
23859ba0eb85SBram Moolenaar if (end == NULL ? *p == NUL : p >= end)
23860d6f5d97SBram Moolenaar return 0; // only non-word characters, illegal word
2387b765d634SBram Moolenaar if (has_mbyte)
2388402d2feaSBram Moolenaar c = mb_ptr2char_adv(&p);
2389b765d634SBram Moolenaar else
2390b765d634SBram Moolenaar c = *p++;
23919f30f504SBram Moolenaar firstcap = allcap = SPELL_ISUPPER(c);
2392402d2feaSBram Moolenaar
2393402d2feaSBram Moolenaar /*
2394402d2feaSBram Moolenaar * Need to check all letters to find a word with mixed upper/lower.
2395402d2feaSBram Moolenaar * But a word with an upper char only at start is a ONECAP.
2396402d2feaSBram Moolenaar */
239791acfffcSBram Moolenaar for ( ; end == NULL ? *p != NUL : p < end; MB_PTR_ADV(p))
2398cc63c647SBram Moolenaar if (spell_iswordp_nmw(p, curwin))
2399402d2feaSBram Moolenaar {
240053805d1eSBram Moolenaar c = PTR2CHAR(p);
24019f30f504SBram Moolenaar if (!SPELL_ISUPPER(c))
2402402d2feaSBram Moolenaar {
24030d6f5d97SBram Moolenaar // UUl -> KEEPCAP
2404402d2feaSBram Moolenaar if (past_second && allcap)
240551485f06SBram Moolenaar return WF_KEEPCAP;
2406402d2feaSBram Moolenaar allcap = FALSE;
2407402d2feaSBram Moolenaar }
2408402d2feaSBram Moolenaar else if (!allcap)
24090d6f5d97SBram Moolenaar // UlU -> KEEPCAP
241051485f06SBram Moolenaar return WF_KEEPCAP;
2411402d2feaSBram Moolenaar past_second = TRUE;
2412402d2feaSBram Moolenaar }
2413402d2feaSBram Moolenaar
2414402d2feaSBram Moolenaar if (allcap)
241551485f06SBram Moolenaar return WF_ALLCAP;
2416402d2feaSBram Moolenaar if (firstcap)
241751485f06SBram Moolenaar return WF_ONECAP;
2418402d2feaSBram Moolenaar return 0;
2419402d2feaSBram Moolenaar }
2420402d2feaSBram Moolenaar
24210fa313a7SBram Moolenaar /*
242234b466edSBram Moolenaar * Delete the internal wordlist and its .spl file.
242334b466edSBram Moolenaar */
242434b466edSBram Moolenaar void
spell_delete_wordlist(void)2425764b23c8SBram Moolenaar spell_delete_wordlist(void)
242634b466edSBram Moolenaar {
242734b466edSBram Moolenaar char_u fname[MAXPATHL];
242834b466edSBram Moolenaar
242934b466edSBram Moolenaar if (int_wordlist != NULL)
243034b466edSBram Moolenaar {
243134b466edSBram Moolenaar mch_remove(int_wordlist);
243234b466edSBram Moolenaar int_wordlist_spl(fname);
243334b466edSBram Moolenaar mch_remove(fname);
2434d23a8236SBram Moolenaar VIM_CLEAR(int_wordlist);
243534b466edSBram Moolenaar }
243634b466edSBram Moolenaar }
243734b466edSBram Moolenaar
24380a5fe214SBram Moolenaar /*
24390a5fe214SBram Moolenaar * Free all languages.
24400a5fe214SBram Moolenaar */
24410a5fe214SBram Moolenaar void
spell_free_all(void)2442764b23c8SBram Moolenaar spell_free_all(void)
24430a5fe214SBram Moolenaar {
24448b96d64cSBram Moolenaar slang_T *slang;
24450a5fe214SBram Moolenaar buf_T *buf;
24460a5fe214SBram Moolenaar
24470d6f5d97SBram Moolenaar // Go through all buffers and handle 'spelllang'. <VN>
244829323590SBram Moolenaar FOR_ALL_BUFFERS(buf)
2449860cae1cSBram Moolenaar ga_clear(&buf->b_s.b_langp);
24500a5fe214SBram Moolenaar
24510a5fe214SBram Moolenaar while (first_lang != NULL)
24520a5fe214SBram Moolenaar {
24538b96d64cSBram Moolenaar slang = first_lang;
24548b96d64cSBram Moolenaar first_lang = slang->sl_next;
24558b96d64cSBram Moolenaar slang_free(slang);
24560a5fe214SBram Moolenaar }
2457cf6bf39fSBram Moolenaar
245834b466edSBram Moolenaar spell_delete_wordlist();
24597887d88aSBram Moolenaar
2460d23a8236SBram Moolenaar VIM_CLEAR(repl_to);
2461d23a8236SBram Moolenaar VIM_CLEAR(repl_from);
24620a5fe214SBram Moolenaar }
24630a5fe214SBram Moolenaar
2464402d2feaSBram Moolenaar /*
2465402d2feaSBram Moolenaar * Clear all spelling tables and reload them.
2466cfc6c43cSBram Moolenaar * Used after 'encoding' is set and when ":mkspell" was used.
2467402d2feaSBram Moolenaar */
2468402d2feaSBram Moolenaar void
spell_reload(void)2469764b23c8SBram Moolenaar spell_reload(void)
2470402d2feaSBram Moolenaar {
24713982c541SBram Moolenaar win_T *wp;
2472402d2feaSBram Moolenaar
24730d6f5d97SBram Moolenaar // Initialize the table for spell_iswordp().
2474402d2feaSBram Moolenaar init_spell_chartab();
2475402d2feaSBram Moolenaar
24760d6f5d97SBram Moolenaar // Unload all allocated memory.
24770a5fe214SBram Moolenaar spell_free_all();
2478402d2feaSBram Moolenaar
24790d6f5d97SBram Moolenaar // Go through all buffers and handle 'spelllang'.
248029323590SBram Moolenaar FOR_ALL_WINDOWS(wp)
2481402d2feaSBram Moolenaar {
24820d6f5d97SBram Moolenaar // Only load the wordlists when 'spelllang' is set and there is a
24830d6f5d97SBram Moolenaar // window for this buffer in which 'spell' is set.
2484860cae1cSBram Moolenaar if (*wp->w_s->b_p_spl != NUL)
24853982c541SBram Moolenaar {
2486860cae1cSBram Moolenaar if (wp->w_p_spell)
24873982c541SBram Moolenaar {
2488860cae1cSBram Moolenaar (void)did_set_spelllang(wp);
24893982c541SBram Moolenaar break;
24903982c541SBram Moolenaar }
24913982c541SBram Moolenaar }
2492402d2feaSBram Moolenaar }
2493402d2feaSBram Moolenaar }
2494402d2feaSBram Moolenaar
2495b765d634SBram Moolenaar /*
24964770d09aSBram Moolenaar * Open a spell buffer. This is a nameless buffer that is not in the buffer
24974770d09aSBram Moolenaar * list and only contains text lines. Can use a swapfile to reduce memory
24984770d09aSBram Moolenaar * use.
24994770d09aSBram Moolenaar * Most other fields are invalid! Esp. watch out for string options being
25004770d09aSBram Moolenaar * NULL and there is no undo info.
25014770d09aSBram Moolenaar * Returns NULL when out of memory.
25024770d09aSBram Moolenaar */
25039ccfebddSBram Moolenaar buf_T *
open_spellbuf(void)2504764b23c8SBram Moolenaar open_spellbuf(void)
25054770d09aSBram Moolenaar {
25064770d09aSBram Moolenaar buf_T *buf;
25074770d09aSBram Moolenaar
2508c799fe20SBram Moolenaar buf = ALLOC_CLEAR_ONE(buf_T);
25094770d09aSBram Moolenaar if (buf != NULL)
25104770d09aSBram Moolenaar {
25114770d09aSBram Moolenaar buf->b_spell = TRUE;
25120d6f5d97SBram Moolenaar buf->b_p_swf = TRUE; // may create a swap file
2513706d2de9SBram Moolenaar #ifdef FEAT_CRYPT
2514706d2de9SBram Moolenaar buf->b_p_key = empty_option;
2515706d2de9SBram Moolenaar #endif
25164770d09aSBram Moolenaar ml_open(buf);
25170d6f5d97SBram Moolenaar ml_open_file(buf); // create swap file now
25184770d09aSBram Moolenaar }
25194770d09aSBram Moolenaar return buf;
25204770d09aSBram Moolenaar }
25214770d09aSBram Moolenaar
25224770d09aSBram Moolenaar /*
25234770d09aSBram Moolenaar * Close the buffer used for spell info.
25244770d09aSBram Moolenaar */
25259ccfebddSBram Moolenaar void
close_spellbuf(buf_T * buf)2526764b23c8SBram Moolenaar close_spellbuf(buf_T *buf)
25274770d09aSBram Moolenaar {
25284770d09aSBram Moolenaar if (buf != NULL)
25294770d09aSBram Moolenaar {
25304770d09aSBram Moolenaar ml_close(buf, TRUE);
25314770d09aSBram Moolenaar vim_free(buf);
25324770d09aSBram Moolenaar }
25334770d09aSBram Moolenaar }
25344770d09aSBram Moolenaar
2535cfc6c43cSBram Moolenaar /*
2536cfc6c43cSBram Moolenaar * Init the chartab used for spelling for ASCII.
2537cfc6c43cSBram Moolenaar * EBCDIC is not supported!
2538cfc6c43cSBram Moolenaar */
25399ccfebddSBram Moolenaar void
clear_spell_chartab(spelltab_T * sp)2540764b23c8SBram Moolenaar clear_spell_chartab(spelltab_T *sp)
2541cfc6c43cSBram Moolenaar {
2542cfc6c43cSBram Moolenaar int i;
2543cfc6c43cSBram Moolenaar
2544a80faa89SBram Moolenaar // Init everything to FALSE (zero).
2545a80faa89SBram Moolenaar CLEAR_FIELD(sp->st_isw);
2546a80faa89SBram Moolenaar CLEAR_FIELD(sp->st_isu);
2547cfc6c43cSBram Moolenaar for (i = 0; i < 256; ++i)
25489f30f504SBram Moolenaar {
2549cfc6c43cSBram Moolenaar sp->st_fold[i] = i;
25509f30f504SBram Moolenaar sp->st_upper[i] = i;
25519f30f504SBram Moolenaar }
2552cfc6c43cSBram Moolenaar
25530d6f5d97SBram Moolenaar // We include digits. A word shouldn't start with a digit, but handling
25540d6f5d97SBram Moolenaar // that is done separately.
2555cfc6c43cSBram Moolenaar for (i = '0'; i <= '9'; ++i)
2556cfc6c43cSBram Moolenaar sp->st_isw[i] = TRUE;
2557cfc6c43cSBram Moolenaar for (i = 'A'; i <= 'Z'; ++i)
2558cfc6c43cSBram Moolenaar {
2559cfc6c43cSBram Moolenaar sp->st_isw[i] = TRUE;
2560cfc6c43cSBram Moolenaar sp->st_isu[i] = TRUE;
2561cfc6c43cSBram Moolenaar sp->st_fold[i] = i + 0x20;
2562cfc6c43cSBram Moolenaar }
2563cfc6c43cSBram Moolenaar for (i = 'a'; i <= 'z'; ++i)
25649f30f504SBram Moolenaar {
2565cfc6c43cSBram Moolenaar sp->st_isw[i] = TRUE;
25669f30f504SBram Moolenaar sp->st_upper[i] = i - 0x20;
25679f30f504SBram Moolenaar }
2568cfc6c43cSBram Moolenaar }
2569cfc6c43cSBram Moolenaar
2570cfc6c43cSBram Moolenaar /*
2571cfc6c43cSBram Moolenaar * Init the chartab used for spelling. Only depends on 'encoding'.
2572cfc6c43cSBram Moolenaar * Called once while starting up and when 'encoding' changes.
2573cfc6c43cSBram Moolenaar * The default is to use isalpha(), but the spell file should define the word
2574cfc6c43cSBram Moolenaar * characters to make it possible that 'encoding' differs from the current
2575dfb9ac00SBram Moolenaar * locale. For utf-8 we don't use isalpha() but our own functions.
2576cfc6c43cSBram Moolenaar */
2577cfc6c43cSBram Moolenaar void
init_spell_chartab(void)2578764b23c8SBram Moolenaar init_spell_chartab(void)
2579cfc6c43cSBram Moolenaar {
2580cfc6c43cSBram Moolenaar int i;
2581cfc6c43cSBram Moolenaar
2582cfc6c43cSBram Moolenaar did_set_spelltab = FALSE;
2583cfc6c43cSBram Moolenaar clear_spell_chartab(&spelltab);
2584cfc6c43cSBram Moolenaar if (enc_dbcs)
2585cfc6c43cSBram Moolenaar {
25860d6f5d97SBram Moolenaar // DBCS: assume double-wide characters are word characters.
2587cfc6c43cSBram Moolenaar for (i = 128; i <= 255; ++i)
2588cfc6c43cSBram Moolenaar if (MB_BYTE2LEN(i) == 2)
2589cfc6c43cSBram Moolenaar spelltab.st_isw[i] = TRUE;
2590cfc6c43cSBram Moolenaar }
25919f30f504SBram Moolenaar else if (enc_utf8)
25929f30f504SBram Moolenaar {
25939f30f504SBram Moolenaar for (i = 128; i < 256; ++i)
25949f30f504SBram Moolenaar {
259554ab0f1eSBram Moolenaar int f = utf_fold(i);
259654ab0f1eSBram Moolenaar int u = utf_toupper(i);
259754ab0f1eSBram Moolenaar
25989f30f504SBram Moolenaar spelltab.st_isu[i] = utf_isupper(i);
25999f30f504SBram Moolenaar spelltab.st_isw[i] = spelltab.st_isu[i] || utf_islower(i);
26000d6f5d97SBram Moolenaar // The folded/upper-cased value is different between latin1 and
26010d6f5d97SBram Moolenaar // utf8 for 0xb5, causing E763 for no good reason. Use the latin1
26020d6f5d97SBram Moolenaar // value for utf-8 to avoid this.
260354ab0f1eSBram Moolenaar spelltab.st_fold[i] = (f < 256) ? f : i;
260454ab0f1eSBram Moolenaar spelltab.st_upper[i] = (u < 256) ? u : i;
26059f30f504SBram Moolenaar }
26069f30f504SBram Moolenaar }
2607cfc6c43cSBram Moolenaar else
2608cfc6c43cSBram Moolenaar {
26090d6f5d97SBram Moolenaar // Rough guess: use locale-dependent library functions.
2610cfc6c43cSBram Moolenaar for (i = 128; i < 256; ++i)
2611cfc6c43cSBram Moolenaar {
2612cfc6c43cSBram Moolenaar if (MB_ISUPPER(i))
2613cfc6c43cSBram Moolenaar {
26149f30f504SBram Moolenaar spelltab.st_isw[i] = TRUE;
2615cfc6c43cSBram Moolenaar spelltab.st_isu[i] = TRUE;
2616cfc6c43cSBram Moolenaar spelltab.st_fold[i] = MB_TOLOWER(i);
2617cfc6c43cSBram Moolenaar }
26189f30f504SBram Moolenaar else if (MB_ISLOWER(i))
26199f30f504SBram Moolenaar {
26209f30f504SBram Moolenaar spelltab.st_isw[i] = TRUE;
26219f30f504SBram Moolenaar spelltab.st_upper[i] = MB_TOUPPER(i);
26229f30f504SBram Moolenaar }
2623cfc6c43cSBram Moolenaar }
2624cfc6c43cSBram Moolenaar }
2625cfc6c43cSBram Moolenaar }
2626cfc6c43cSBram Moolenaar
2627cfc6c43cSBram Moolenaar
2628cfc6c43cSBram Moolenaar /*
2629ea408854SBram Moolenaar * Return TRUE if "p" points to a word character.
2630cf6bf39fSBram Moolenaar * As a special case we see "midword" characters as word character when it is
2631ea408854SBram Moolenaar * followed by a word character. This finds they'there but not 'they there'.
2632cf6bf39fSBram Moolenaar * Thus this only works properly when past the first character of the word.
2633ea408854SBram Moolenaar */
263446a426c9SBram Moolenaar int
spell_iswordp(char_u * p,win_T * wp)2635764b23c8SBram Moolenaar spell_iswordp(
2636764b23c8SBram Moolenaar char_u *p,
26370d6f5d97SBram Moolenaar win_T *wp) // buffer used
2638ea408854SBram Moolenaar {
2639cf6bf39fSBram Moolenaar char_u *s;
2640cf6bf39fSBram Moolenaar int l;
2641cf6bf39fSBram Moolenaar int c;
2642cf6bf39fSBram Moolenaar
2643cf6bf39fSBram Moolenaar if (has_mbyte)
2644cf6bf39fSBram Moolenaar {
26451614a149SBram Moolenaar l = mb_ptr2len(p);
2646cf6bf39fSBram Moolenaar s = p;
2647cf6bf39fSBram Moolenaar if (l == 1)
2648cf6bf39fSBram Moolenaar {
26490d6f5d97SBram Moolenaar // be quick for ASCII
2650860cae1cSBram Moolenaar if (wp->w_s->b_spell_ismw[*p])
26510d6f5d97SBram Moolenaar s = p + 1; // skip a mid-word character
2652cf6bf39fSBram Moolenaar }
2653cf6bf39fSBram Moolenaar else
2654cf6bf39fSBram Moolenaar {
2655cf6bf39fSBram Moolenaar c = mb_ptr2char(p);
2656860cae1cSBram Moolenaar if (c < 256 ? wp->w_s->b_spell_ismw[c]
2657860cae1cSBram Moolenaar : (wp->w_s->b_spell_ismw_mb != NULL
2658860cae1cSBram Moolenaar && vim_strchr(wp->w_s->b_spell_ismw_mb, c) != NULL))
2659cf6bf39fSBram Moolenaar s = p + l;
2660cf6bf39fSBram Moolenaar }
2661cf6bf39fSBram Moolenaar
2662dfb9ac00SBram Moolenaar c = mb_ptr2char(s);
2663dfb9ac00SBram Moolenaar if (c > 255)
2664cc63c647SBram Moolenaar return spell_mb_isword_class(mb_get_class(s), wp);
2665dfb9ac00SBram Moolenaar return spelltab.st_isw[c];
2666ea408854SBram Moolenaar }
2667cf6bf39fSBram Moolenaar
2668860cae1cSBram Moolenaar return spelltab.st_isw[wp->w_s->b_spell_ismw[*p] ? p[1] : p[0]];
26699c96f592SBram Moolenaar }
26709c96f592SBram Moolenaar
26719c96f592SBram Moolenaar /*
26729c96f592SBram Moolenaar * Return TRUE if "p" points to a word character.
26739c96f592SBram Moolenaar * Unlike spell_iswordp() this doesn't check for "midword" characters.
26749c96f592SBram Moolenaar */
267546a426c9SBram Moolenaar int
spell_iswordp_nmw(char_u * p,win_T * wp)2676764b23c8SBram Moolenaar spell_iswordp_nmw(char_u *p, win_T *wp)
26779c96f592SBram Moolenaar {
2678dfb9ac00SBram Moolenaar int c;
26799c96f592SBram Moolenaar
2680dfb9ac00SBram Moolenaar if (has_mbyte)
2681dfb9ac00SBram Moolenaar {
2682dfb9ac00SBram Moolenaar c = mb_ptr2char(p);
2683dfb9ac00SBram Moolenaar if (c > 255)
2684cc63c647SBram Moolenaar return spell_mb_isword_class(mb_get_class(p), wp);
2685dfb9ac00SBram Moolenaar return spelltab.st_isw[c];
2686dfb9ac00SBram Moolenaar }
26879c96f592SBram Moolenaar return spelltab.st_isw[*p];
2688cf6bf39fSBram Moolenaar }
2689ea408854SBram Moolenaar
2690a1ba811aSBram Moolenaar /*
26917a91a4a1SBram Moolenaar * Return TRUE if word class indicates a word character.
26927a91a4a1SBram Moolenaar * Only for characters above 255.
26937a91a4a1SBram Moolenaar * Unicode subscript and superscript are not considered word characters.
2694cc63c647SBram Moolenaar * See also dbcs_class() and utf_class() in mbyte.c.
26957a91a4a1SBram Moolenaar */
26967a91a4a1SBram Moolenaar static int
spell_mb_isword_class(int cl,win_T * wp)2697764b23c8SBram Moolenaar spell_mb_isword_class(int cl, win_T *wp)
26987a91a4a1SBram Moolenaar {
2699cc63c647SBram Moolenaar if (wp->w_s->b_cjk)
27000d6f5d97SBram Moolenaar // East Asian characters are not considered word characters.
2701cc63c647SBram Moolenaar return cl == 2 || cl == 0x2800;
270206e63770SBram Moolenaar return cl >= 2 && cl != 0x2070 && cl != 0x2080 && cl != 3;
27037a91a4a1SBram Moolenaar }
27047a91a4a1SBram Moolenaar
27057a91a4a1SBram Moolenaar /*
2706a1ba811aSBram Moolenaar * Return TRUE if "p" points to a word character.
2707a1ba811aSBram Moolenaar * Wide version of spell_iswordp().
2708a1ba811aSBram Moolenaar */
2709a1ba811aSBram Moolenaar static int
spell_iswordp_w(int * p,win_T * wp)2710764b23c8SBram Moolenaar spell_iswordp_w(int *p, win_T *wp)
2711a1ba811aSBram Moolenaar {
2712a1ba811aSBram Moolenaar int *s;
2713a1ba811aSBram Moolenaar
2714860cae1cSBram Moolenaar if (*p < 256 ? wp->w_s->b_spell_ismw[*p]
2715860cae1cSBram Moolenaar : (wp->w_s->b_spell_ismw_mb != NULL
2716860cae1cSBram Moolenaar && vim_strchr(wp->w_s->b_spell_ismw_mb, *p) != NULL))
2717a1ba811aSBram Moolenaar s = p + 1;
2718a1ba811aSBram Moolenaar else
2719a1ba811aSBram Moolenaar s = p;
2720a1ba811aSBram Moolenaar
2721dfb9ac00SBram Moolenaar if (*s > 255)
2722a1ba811aSBram Moolenaar {
2723a1ba811aSBram Moolenaar if (enc_utf8)
2724cc63c647SBram Moolenaar return spell_mb_isword_class(utf_class(*s), wp);
2725a1ba811aSBram Moolenaar if (enc_dbcs)
2726cc63c647SBram Moolenaar return spell_mb_isword_class(
2727cc63c647SBram Moolenaar dbcs_class((unsigned)*s >> 8, *s & 0xff), wp);
2728a1ba811aSBram Moolenaar return 0;
2729a1ba811aSBram Moolenaar }
2730a1ba811aSBram Moolenaar return spelltab.st_isw[*s];
2731a1ba811aSBram Moolenaar }
2732a1ba811aSBram Moolenaar
2733ea408854SBram Moolenaar /*
27349f30f504SBram Moolenaar * Case-fold "str[len]" into "buf[buflen]". The result is NUL terminated.
27359f30f504SBram Moolenaar * Uses the character definitions from the .spl file.
2736cfc6c43cSBram Moolenaar * When using a multi-byte 'encoding' the length may change!
2737cfc6c43cSBram Moolenaar * Returns FAIL when something wrong.
2738cfc6c43cSBram Moolenaar */
27399ccfebddSBram Moolenaar int
spell_casefold(win_T * wp,char_u * str,int len,char_u * buf,int buflen)2740764b23c8SBram Moolenaar spell_casefold(
27414f135275SBram Moolenaar win_T *wp,
2742764b23c8SBram Moolenaar char_u *str,
2743764b23c8SBram Moolenaar int len,
2744764b23c8SBram Moolenaar char_u *buf,
2745764b23c8SBram Moolenaar int buflen)
2746cfc6c43cSBram Moolenaar {
2747cfc6c43cSBram Moolenaar int i;
2748cfc6c43cSBram Moolenaar
2749cfc6c43cSBram Moolenaar if (len >= buflen)
2750cfc6c43cSBram Moolenaar {
2751cfc6c43cSBram Moolenaar buf[0] = NUL;
27520d6f5d97SBram Moolenaar return FAIL; // result will not fit
2753cfc6c43cSBram Moolenaar }
2754cfc6c43cSBram Moolenaar
2755cfc6c43cSBram Moolenaar if (has_mbyte)
2756cfc6c43cSBram Moolenaar {
2757cfc6c43cSBram Moolenaar int outi = 0;
27589f30f504SBram Moolenaar char_u *p;
27599f30f504SBram Moolenaar int c;
2760cfc6c43cSBram Moolenaar
27610d6f5d97SBram Moolenaar // Fold one character at a time.
27629f30f504SBram Moolenaar for (p = str; p < str + len; )
2763cfc6c43cSBram Moolenaar {
2764cfc6c43cSBram Moolenaar if (outi + MB_MAXBYTES > buflen)
2765cfc6c43cSBram Moolenaar {
2766cfc6c43cSBram Moolenaar buf[outi] = NUL;
2767cfc6c43cSBram Moolenaar return FAIL;
2768cfc6c43cSBram Moolenaar }
27690fa313a7SBram Moolenaar c = mb_cptr2char_adv(&p);
27704f135275SBram Moolenaar
27714f135275SBram Moolenaar // Exception: greek capital sigma 0x03A3 folds to 0x03C3, except
27724f135275SBram Moolenaar // when it is the last character in a word, then it folds to
27734f135275SBram Moolenaar // 0x03C2.
27744f135275SBram Moolenaar if (c == 0x03a3 || c == 0x03c2)
27754f135275SBram Moolenaar {
27764f135275SBram Moolenaar if (p == str + len || !spell_iswordp(p, wp))
27774f135275SBram Moolenaar c = 0x03c2;
27784f135275SBram Moolenaar else
27794f135275SBram Moolenaar c = 0x03c3;
27804f135275SBram Moolenaar }
27814f135275SBram Moolenaar else
27824f135275SBram Moolenaar c = SPELL_TOFOLD(c);
27834f135275SBram Moolenaar
27844f135275SBram Moolenaar outi += mb_char2bytes(c, buf + outi);
2785cfc6c43cSBram Moolenaar }
2786cfc6c43cSBram Moolenaar buf[outi] = NUL;
2787cfc6c43cSBram Moolenaar }
2788cfc6c43cSBram Moolenaar else
2789cfc6c43cSBram Moolenaar {
27900d6f5d97SBram Moolenaar // Be quick for non-multibyte encodings.
2791cfc6c43cSBram Moolenaar for (i = 0; i < len; ++i)
27929f30f504SBram Moolenaar buf[i] = spelltab.st_fold[str[i]];
2793cfc6c43cSBram Moolenaar buf[i] = NUL;
2794cfc6c43cSBram Moolenaar }
2795cfc6c43cSBram Moolenaar
2796cfc6c43cSBram Moolenaar return OK;
2797cfc6c43cSBram Moolenaar }
2798cfc6c43cSBram Moolenaar
2799d857f0e0SBram Moolenaar /*
28008b59de9fSBram Moolenaar * Check if the word at line "lnum" column "col" is required to start with a
28018b59de9fSBram Moolenaar * capital. This uses 'spellcapcheck' of the current buffer.
28028b59de9fSBram Moolenaar */
280346a426c9SBram Moolenaar int
check_need_cap(linenr_T lnum,colnr_T col)2804764b23c8SBram Moolenaar check_need_cap(linenr_T lnum, colnr_T col)
28058b59de9fSBram Moolenaar {
28068b59de9fSBram Moolenaar int need_cap = FALSE;
28078b59de9fSBram Moolenaar char_u *line;
28088b59de9fSBram Moolenaar char_u *line_copy = NULL;
28098b59de9fSBram Moolenaar char_u *p;
28108b59de9fSBram Moolenaar colnr_T endcol;
28118b59de9fSBram Moolenaar regmatch_T regmatch;
28128b59de9fSBram Moolenaar
2813860cae1cSBram Moolenaar if (curwin->w_s->b_cap_prog == NULL)
28148b59de9fSBram Moolenaar return FALSE;
28158b59de9fSBram Moolenaar
28168b59de9fSBram Moolenaar line = ml_get_curline();
28178b59de9fSBram Moolenaar endcol = 0;
2818e2e69e48SBram Moolenaar if (getwhitecols(line) >= (int)col)
28198b59de9fSBram Moolenaar {
28200d6f5d97SBram Moolenaar // At start of line, check if previous line is empty or sentence
28210d6f5d97SBram Moolenaar // ends there.
28228b59de9fSBram Moolenaar if (lnum == 1)
28238b59de9fSBram Moolenaar need_cap = TRUE;
28248b59de9fSBram Moolenaar else
28258b59de9fSBram Moolenaar {
28268b59de9fSBram Moolenaar line = ml_get(lnum - 1);
28278b59de9fSBram Moolenaar if (*skipwhite(line) == NUL)
28288b59de9fSBram Moolenaar need_cap = TRUE;
28298b59de9fSBram Moolenaar else
28308b59de9fSBram Moolenaar {
28310d6f5d97SBram Moolenaar // Append a space in place of the line break.
28328b59de9fSBram Moolenaar line_copy = concat_str(line, (char_u *)" ");
28338b59de9fSBram Moolenaar line = line_copy;
2834a93fa7eeSBram Moolenaar endcol = (colnr_T)STRLEN(line);
28358b59de9fSBram Moolenaar }
28368b59de9fSBram Moolenaar }
28378b59de9fSBram Moolenaar }
28388b59de9fSBram Moolenaar else
28398b59de9fSBram Moolenaar endcol = col;
28408b59de9fSBram Moolenaar
28418b59de9fSBram Moolenaar if (endcol > 0)
28428b59de9fSBram Moolenaar {
28430d6f5d97SBram Moolenaar // Check if sentence ends before the bad word.
2844860cae1cSBram Moolenaar regmatch.regprog = curwin->w_s->b_cap_prog;
28458b59de9fSBram Moolenaar regmatch.rm_ic = FALSE;
28468b59de9fSBram Moolenaar p = line + endcol;
28478b59de9fSBram Moolenaar for (;;)
28488b59de9fSBram Moolenaar {
284991acfffcSBram Moolenaar MB_PTR_BACK(line, p);
2850cc63c647SBram Moolenaar if (p == line || spell_iswordp_nmw(p, curwin))
28518b59de9fSBram Moolenaar break;
28528b59de9fSBram Moolenaar if (vim_regexec(®match, p, 0)
28538b59de9fSBram Moolenaar && regmatch.endp[0] == line + endcol)
28548b59de9fSBram Moolenaar {
28558b59de9fSBram Moolenaar need_cap = TRUE;
28568b59de9fSBram Moolenaar break;
28578b59de9fSBram Moolenaar }
28588b59de9fSBram Moolenaar }
2859dffa5b8eSBram Moolenaar curwin->w_s->b_cap_prog = regmatch.regprog;
28608b59de9fSBram Moolenaar }
28618b59de9fSBram Moolenaar
28628b59de9fSBram Moolenaar vim_free(line_copy);
28638b59de9fSBram Moolenaar
28648b59de9fSBram Moolenaar return need_cap;
28658b59de9fSBram Moolenaar }
28668b59de9fSBram Moolenaar
28678b59de9fSBram Moolenaar
28688b59de9fSBram Moolenaar /*
2869a1ba811aSBram Moolenaar * ":spellrepall"
2870a1ba811aSBram Moolenaar */
2871a1ba811aSBram Moolenaar void
ex_spellrepall(exarg_T * eap UNUSED)2872764b23c8SBram Moolenaar ex_spellrepall(exarg_T *eap UNUSED)
2873a1ba811aSBram Moolenaar {
2874a1ba811aSBram Moolenaar pos_T pos = curwin->w_cursor;
2875a1ba811aSBram Moolenaar char_u *frompat;
2876a1ba811aSBram Moolenaar int addlen;
2877a1ba811aSBram Moolenaar char_u *line;
2878a1ba811aSBram Moolenaar char_u *p;
2879a1ba811aSBram Moolenaar int save_ws = p_ws;
28805195e456SBram Moolenaar linenr_T prev_lnum = 0;
2881a1ba811aSBram Moolenaar
2882a1ba811aSBram Moolenaar if (repl_from == NULL || repl_to == NULL)
2883a1ba811aSBram Moolenaar {
2884f9e3e09fSBram Moolenaar emsg(_("E752: No previous spell replacement"));
2885a1ba811aSBram Moolenaar return;
2886a1ba811aSBram Moolenaar }
2887a93fa7eeSBram Moolenaar addlen = (int)(STRLEN(repl_to) - STRLEN(repl_from));
2888a1ba811aSBram Moolenaar
2889964b3746SBram Moolenaar frompat = alloc(STRLEN(repl_from) + 7);
2890a1ba811aSBram Moolenaar if (frompat == NULL)
2891a1ba811aSBram Moolenaar return;
2892a1ba811aSBram Moolenaar sprintf((char *)frompat, "\\V\\<%s\\>", repl_from);
2893a1ba811aSBram Moolenaar p_ws = FALSE;
2894a1ba811aSBram Moolenaar
28955195e456SBram Moolenaar sub_nsubs = 0;
28965195e456SBram Moolenaar sub_nlines = 0;
2897a1ba811aSBram Moolenaar curwin->w_cursor.lnum = 0;
2898a1ba811aSBram Moolenaar while (!got_int)
2899a1ba811aSBram Moolenaar {
2900c036e87bSBram Moolenaar if (do_search(NULL, '/', '/', frompat, 1L, SEARCH_KEEP, NULL) == 0
2901a1ba811aSBram Moolenaar || u_save_cursor() == FAIL)
2902a1ba811aSBram Moolenaar break;
2903a1ba811aSBram Moolenaar
29040d6f5d97SBram Moolenaar // Only replace when the right word isn't there yet. This happens
29050d6f5d97SBram Moolenaar // when changing "etc" to "etc.".
2906a1ba811aSBram Moolenaar line = ml_get_curline();
2907a1ba811aSBram Moolenaar if (addlen <= 0 || STRNCMP(line + curwin->w_cursor.col,
2908a1ba811aSBram Moolenaar repl_to, STRLEN(repl_to)) != 0)
2909a1ba811aSBram Moolenaar {
2910964b3746SBram Moolenaar p = alloc(STRLEN(line) + addlen + 1);
2911a1ba811aSBram Moolenaar if (p == NULL)
2912a1ba811aSBram Moolenaar break;
2913a1ba811aSBram Moolenaar mch_memmove(p, line, curwin->w_cursor.col);
2914a1ba811aSBram Moolenaar STRCPY(p + curwin->w_cursor.col, repl_to);
2915a1ba811aSBram Moolenaar STRCAT(p, line + curwin->w_cursor.col + STRLEN(repl_from));
2916a1ba811aSBram Moolenaar ml_replace(curwin->w_cursor.lnum, p, FALSE);
2917a1ba811aSBram Moolenaar changed_bytes(curwin->w_cursor.lnum, curwin->w_cursor.col);
29185195e456SBram Moolenaar
29195195e456SBram Moolenaar if (curwin->w_cursor.lnum != prev_lnum)
29205195e456SBram Moolenaar {
29215195e456SBram Moolenaar ++sub_nlines;
29225195e456SBram Moolenaar prev_lnum = curwin->w_cursor.lnum;
29235195e456SBram Moolenaar }
29245195e456SBram Moolenaar ++sub_nsubs;
2925a1ba811aSBram Moolenaar }
2926a93fa7eeSBram Moolenaar curwin->w_cursor.col += (colnr_T)STRLEN(repl_to);
2927a1ba811aSBram Moolenaar }
2928a1ba811aSBram Moolenaar
2929a1ba811aSBram Moolenaar p_ws = save_ws;
2930a1ba811aSBram Moolenaar curwin->w_cursor = pos;
2931a1ba811aSBram Moolenaar vim_free(frompat);
2932a1ba811aSBram Moolenaar
29335195e456SBram Moolenaar if (sub_nsubs == 0)
2934f9e3e09fSBram Moolenaar semsg(_("E753: Not found: %s"), repl_from);
29355195e456SBram Moolenaar else
29365195e456SBram Moolenaar do_sub_msg(FALSE);
2937a1ba811aSBram Moolenaar }
2938a1ba811aSBram Moolenaar
2939a1ba811aSBram Moolenaar /*
29409f30f504SBram Moolenaar * Make a copy of "word", with the first letter upper or lower cased, to
29419f30f504SBram Moolenaar * "wcopy[MAXWLEN]". "word" must not be empty.
29429f30f504SBram Moolenaar * The result is NUL terminated.
29439ba0eb85SBram Moolenaar */
29449ccfebddSBram Moolenaar void
onecap_copy(char_u * word,char_u * wcopy,int upper)2945764b23c8SBram Moolenaar onecap_copy(
2946764b23c8SBram Moolenaar char_u *word,
2947764b23c8SBram Moolenaar char_u *wcopy,
29480d6f5d97SBram Moolenaar int upper) // TRUE: first letter made upper case
29499ba0eb85SBram Moolenaar {
29509ba0eb85SBram Moolenaar char_u *p;
29519ba0eb85SBram Moolenaar int c;
29529ba0eb85SBram Moolenaar int l;
29539ba0eb85SBram Moolenaar
29549ba0eb85SBram Moolenaar p = word;
29559ba0eb85SBram Moolenaar if (has_mbyte)
29560fa313a7SBram Moolenaar c = mb_cptr2char_adv(&p);
29579ba0eb85SBram Moolenaar else
29589ba0eb85SBram Moolenaar c = *p++;
29599ba0eb85SBram Moolenaar if (upper)
29609f30f504SBram Moolenaar c = SPELL_TOUPPER(c);
29619ba0eb85SBram Moolenaar else
29629f30f504SBram Moolenaar c = SPELL_TOFOLD(c);
29639ba0eb85SBram Moolenaar if (has_mbyte)
29649ba0eb85SBram Moolenaar l = mb_char2bytes(c, wcopy);
29659ba0eb85SBram Moolenaar else
29669ba0eb85SBram Moolenaar {
29679ba0eb85SBram Moolenaar l = 1;
29689ba0eb85SBram Moolenaar wcopy[0] = c;
29699ba0eb85SBram Moolenaar }
29709c96f592SBram Moolenaar vim_strncpy(wcopy + l, p, MAXWLEN - l - 1);
29719ba0eb85SBram Moolenaar }
29729ba0eb85SBram Moolenaar
29739ba0eb85SBram Moolenaar /*
29749f30f504SBram Moolenaar * Make a copy of "word" with all the letters upper cased into
29759f30f504SBram Moolenaar * "wcopy[MAXWLEN]". The result is NUL terminated.
29769ba0eb85SBram Moolenaar */
297746a426c9SBram Moolenaar void
allcap_copy(char_u * word,char_u * wcopy)2978764b23c8SBram Moolenaar allcap_copy(char_u *word, char_u *wcopy)
29799ba0eb85SBram Moolenaar {
29809ba0eb85SBram Moolenaar char_u *s;
29819ba0eb85SBram Moolenaar char_u *d;
29829ba0eb85SBram Moolenaar int c;
29839ba0eb85SBram Moolenaar
29849ba0eb85SBram Moolenaar d = wcopy;
29859ba0eb85SBram Moolenaar for (s = word; *s != NUL; )
29869ba0eb85SBram Moolenaar {
29879ba0eb85SBram Moolenaar if (has_mbyte)
29880fa313a7SBram Moolenaar c = mb_cptr2char_adv(&s);
29899ba0eb85SBram Moolenaar else
29909ba0eb85SBram Moolenaar c = *s++;
29917862282fSBram Moolenaar
29920d6f5d97SBram Moolenaar // We only change 0xdf to SS when we are certain latin1 is used. It
29930d6f5d97SBram Moolenaar // would cause weird errors in other 8-bit encodings.
29947862282fSBram Moolenaar if (enc_latin1like && c == 0xdf)
29957862282fSBram Moolenaar {
29967862282fSBram Moolenaar c = 'S';
29977862282fSBram Moolenaar if (d - wcopy >= MAXWLEN - 1)
29987862282fSBram Moolenaar break;
29997862282fSBram Moolenaar *d++ = c;
30007862282fSBram Moolenaar }
30017862282fSBram Moolenaar else
30029f30f504SBram Moolenaar c = SPELL_TOUPPER(c);
30039ba0eb85SBram Moolenaar
30049ba0eb85SBram Moolenaar if (has_mbyte)
30059ba0eb85SBram Moolenaar {
30069ba0eb85SBram Moolenaar if (d - wcopy >= MAXWLEN - MB_MAXBYTES)
30079ba0eb85SBram Moolenaar break;
30089ba0eb85SBram Moolenaar d += mb_char2bytes(c, d);
30099ba0eb85SBram Moolenaar }
30109ba0eb85SBram Moolenaar else
30119ba0eb85SBram Moolenaar {
30129ba0eb85SBram Moolenaar if (d - wcopy >= MAXWLEN - 1)
30139ba0eb85SBram Moolenaar break;
30149ba0eb85SBram Moolenaar *d++ = c;
30159ba0eb85SBram Moolenaar }
30169ba0eb85SBram Moolenaar }
30179ba0eb85SBram Moolenaar *d = NUL;
30189ba0eb85SBram Moolenaar }
30199ba0eb85SBram Moolenaar
30209ba0eb85SBram Moolenaar /*
302153805d1eSBram Moolenaar * Case-folding may change the number of bytes: Count nr of chars in
302253805d1eSBram Moolenaar * fword[flen] and return the byte length of that many chars in "word".
302353805d1eSBram Moolenaar */
302446a426c9SBram Moolenaar int
nofold_len(char_u * fword,int flen,char_u * word)3025764b23c8SBram Moolenaar nofold_len(char_u *fword, int flen, char_u *word)
302653805d1eSBram Moolenaar {
302753805d1eSBram Moolenaar char_u *p;
302853805d1eSBram Moolenaar int i = 0;
302953805d1eSBram Moolenaar
303091acfffcSBram Moolenaar for (p = fword; p < fword + flen; MB_PTR_ADV(p))
303153805d1eSBram Moolenaar ++i;
303291acfffcSBram Moolenaar for (p = word; i > 0; MB_PTR_ADV(p))
303353805d1eSBram Moolenaar --i;
303453805d1eSBram Moolenaar return (int)(p - word);
303553805d1eSBram Moolenaar }
303653805d1eSBram Moolenaar
30379ba0eb85SBram Moolenaar /*
30389f30f504SBram Moolenaar * Copy "fword" to "cword", fixing case according to "flags".
30399ba0eb85SBram Moolenaar */
304046a426c9SBram Moolenaar void
make_case_word(char_u * fword,char_u * cword,int flags)3041764b23c8SBram Moolenaar make_case_word(char_u *fword, char_u *cword, int flags)
30429ba0eb85SBram Moolenaar {
30439ba0eb85SBram Moolenaar if (flags & WF_ALLCAP)
30440d6f5d97SBram Moolenaar // Make it all upper-case
30459ba0eb85SBram Moolenaar allcap_copy(fword, cword);
30469ba0eb85SBram Moolenaar else if (flags & WF_ONECAP)
30470d6f5d97SBram Moolenaar // Make the first letter upper-case
30489f30f504SBram Moolenaar onecap_copy(fword, cword, TRUE);
30499ba0eb85SBram Moolenaar else
30500d6f5d97SBram Moolenaar // Use goodword as-is.
30519ba0eb85SBram Moolenaar STRCPY(cword, fword);
30529ba0eb85SBram Moolenaar }
30539ba0eb85SBram Moolenaar
3054a1ba811aSBram Moolenaar #if defined(FEAT_EVAL) || defined(PROTO)
3055a1ba811aSBram Moolenaar /*
3056a1ba811aSBram Moolenaar * Soundfold a string, for soundfold().
3057a1ba811aSBram Moolenaar * Result is in allocated memory, NULL for an error.
3058a1ba811aSBram Moolenaar */
3059a1ba811aSBram Moolenaar char_u *
eval_soundfold(char_u * word)3060764b23c8SBram Moolenaar eval_soundfold(char_u *word)
3061a1ba811aSBram Moolenaar {
3062a1ba811aSBram Moolenaar langp_T *lp;
3063a1ba811aSBram Moolenaar char_u sound[MAXWLEN];
3064ac6e65f8SBram Moolenaar int lpi;
3065a1ba811aSBram Moolenaar
3066860cae1cSBram Moolenaar if (curwin->w_p_spell && *curwin->w_s->b_p_spl != NUL)
30670d6f5d97SBram Moolenaar // Use the sound-folding of the first language that supports it.
3068860cae1cSBram Moolenaar for (lpi = 0; lpi < curwin->w_s->b_langp.ga_len; ++lpi)
3069ac6e65f8SBram Moolenaar {
3070860cae1cSBram Moolenaar lp = LANGP_ENTRY(curwin->w_s->b_langp, lpi);
3071a1ba811aSBram Moolenaar if (lp->lp_slang->sl_sal.ga_len > 0)
3072a1ba811aSBram Moolenaar {
30730d6f5d97SBram Moolenaar // soundfold the word
307442eeac35SBram Moolenaar spell_soundfold(lp->lp_slang, word, FALSE, sound);
3075a1ba811aSBram Moolenaar return vim_strsave(sound);
3076a1ba811aSBram Moolenaar }
3077ac6e65f8SBram Moolenaar }
3078a1ba811aSBram Moolenaar
30790d6f5d97SBram Moolenaar // No language with sound folding, return word as-is.
3080a1ba811aSBram Moolenaar return vim_strsave(word);
3081a1ba811aSBram Moolenaar }
3082a1ba811aSBram Moolenaar #endif
3083a1ba811aSBram Moolenaar
30849ba0eb85SBram Moolenaar /*
30859ba0eb85SBram Moolenaar * Turn "inword" into its sound-a-like equivalent in "res[MAXWLEN]".
3086d12a1326SBram Moolenaar *
3087d12a1326SBram Moolenaar * There are many ways to turn a word into a sound-a-like representation. The
3088d12a1326SBram Moolenaar * oldest is Soundex (1918!). A nice overview can be found in "Approximate
3089d12a1326SBram Moolenaar * swedish name matching - survey and test of different algorithms" by Klas
3090d12a1326SBram Moolenaar * Erikson.
3091d12a1326SBram Moolenaar *
3092d12a1326SBram Moolenaar * We support two methods:
3093d12a1326SBram Moolenaar * 1. SOFOFROM/SOFOTO do a simple character mapping.
3094d12a1326SBram Moolenaar * 2. SAL items define a more advanced sound-folding (and much slower).
30959ba0eb85SBram Moolenaar */
30969ccfebddSBram Moolenaar void
spell_soundfold(slang_T * slang,char_u * inword,int folded,char_u * res)3097764b23c8SBram Moolenaar spell_soundfold(
3098764b23c8SBram Moolenaar slang_T *slang,
3099764b23c8SBram Moolenaar char_u *inword,
31000d6f5d97SBram Moolenaar int folded, // "inword" is already case-folded
3101764b23c8SBram Moolenaar char_u *res)
310242eeac35SBram Moolenaar {
310342eeac35SBram Moolenaar char_u fword[MAXWLEN];
310442eeac35SBram Moolenaar char_u *word;
310542eeac35SBram Moolenaar
310642eeac35SBram Moolenaar if (slang->sl_sofo)
31070d6f5d97SBram Moolenaar // SOFOFROM and SOFOTO used
310842eeac35SBram Moolenaar spell_soundfold_sofo(slang, inword, res);
310942eeac35SBram Moolenaar else
311042eeac35SBram Moolenaar {
31110d6f5d97SBram Moolenaar // SAL items used. Requires the word to be case-folded.
311242eeac35SBram Moolenaar if (folded)
311342eeac35SBram Moolenaar word = inword;
311442eeac35SBram Moolenaar else
311542eeac35SBram Moolenaar {
31164f135275SBram Moolenaar (void)spell_casefold(curwin,
31174f135275SBram Moolenaar inword, (int)STRLEN(inword), fword, MAXWLEN);
311842eeac35SBram Moolenaar word = fword;
311942eeac35SBram Moolenaar }
312042eeac35SBram Moolenaar
312142eeac35SBram Moolenaar if (has_mbyte)
312242eeac35SBram Moolenaar spell_soundfold_wsal(slang, word, res);
312342eeac35SBram Moolenaar else
312442eeac35SBram Moolenaar spell_soundfold_sal(slang, word, res);
312542eeac35SBram Moolenaar }
312642eeac35SBram Moolenaar }
312742eeac35SBram Moolenaar
312842eeac35SBram Moolenaar /*
312942eeac35SBram Moolenaar * Perform sound folding of "inword" into "res" according to SOFOFROM and
313042eeac35SBram Moolenaar * SOFOTO lines.
313142eeac35SBram Moolenaar */
313242eeac35SBram Moolenaar static void
spell_soundfold_sofo(slang_T * slang,char_u * inword,char_u * res)3133764b23c8SBram Moolenaar spell_soundfold_sofo(slang_T *slang, char_u *inword, char_u *res)
313442eeac35SBram Moolenaar {
313542eeac35SBram Moolenaar char_u *s;
313642eeac35SBram Moolenaar int ri = 0;
313742eeac35SBram Moolenaar int c;
313842eeac35SBram Moolenaar
313942eeac35SBram Moolenaar if (has_mbyte)
314042eeac35SBram Moolenaar {
314142eeac35SBram Moolenaar int prevc = 0;
314242eeac35SBram Moolenaar int *ip;
314342eeac35SBram Moolenaar
31440d6f5d97SBram Moolenaar // The sl_sal_first[] table contains the translation for chars up to
31450d6f5d97SBram Moolenaar // 255, sl_sal the rest.
314642eeac35SBram Moolenaar for (s = inword; *s != NUL; )
314742eeac35SBram Moolenaar {
31480fa313a7SBram Moolenaar c = mb_cptr2char_adv(&s);
31491c465444SBram Moolenaar if (enc_utf8 ? utf_class(c) == 0 : VIM_ISWHITE(c))
315042eeac35SBram Moolenaar c = ' ';
315142eeac35SBram Moolenaar else if (c < 256)
315242eeac35SBram Moolenaar c = slang->sl_sal_first[c];
315342eeac35SBram Moolenaar else
315442eeac35SBram Moolenaar {
315542eeac35SBram Moolenaar ip = ((int **)slang->sl_sal.ga_data)[c & 0xff];
31560d6f5d97SBram Moolenaar if (ip == NULL) // empty list, can't match
315742eeac35SBram Moolenaar c = NUL;
315842eeac35SBram Moolenaar else
31590d6f5d97SBram Moolenaar for (;;) // find "c" in the list
316042eeac35SBram Moolenaar {
31610d6f5d97SBram Moolenaar if (*ip == 0) // not found
316242eeac35SBram Moolenaar {
316342eeac35SBram Moolenaar c = NUL;
316442eeac35SBram Moolenaar break;
316542eeac35SBram Moolenaar }
31660d6f5d97SBram Moolenaar if (*ip == c) // match!
316742eeac35SBram Moolenaar {
316842eeac35SBram Moolenaar c = ip[1];
316942eeac35SBram Moolenaar break;
317042eeac35SBram Moolenaar }
317142eeac35SBram Moolenaar ip += 2;
317242eeac35SBram Moolenaar }
317342eeac35SBram Moolenaar }
317442eeac35SBram Moolenaar
317542eeac35SBram Moolenaar if (c != NUL && c != prevc)
317642eeac35SBram Moolenaar {
317742eeac35SBram Moolenaar ri += mb_char2bytes(c, res + ri);
317842eeac35SBram Moolenaar if (ri + MB_MAXBYTES > MAXWLEN)
317942eeac35SBram Moolenaar break;
318042eeac35SBram Moolenaar prevc = c;
318142eeac35SBram Moolenaar }
318242eeac35SBram Moolenaar }
318342eeac35SBram Moolenaar }
318442eeac35SBram Moolenaar else
318542eeac35SBram Moolenaar {
31860d6f5d97SBram Moolenaar // The sl_sal_first[] table contains the translation.
318742eeac35SBram Moolenaar for (s = inword; (c = *s) != NUL; ++s)
318842eeac35SBram Moolenaar {
31891c465444SBram Moolenaar if (VIM_ISWHITE(c))
319042eeac35SBram Moolenaar c = ' ';
319142eeac35SBram Moolenaar else
319242eeac35SBram Moolenaar c = slang->sl_sal_first[c];
319342eeac35SBram Moolenaar if (c != NUL && (ri == 0 || res[ri - 1] != c))
319442eeac35SBram Moolenaar res[ri++] = c;
319542eeac35SBram Moolenaar }
319642eeac35SBram Moolenaar }
319742eeac35SBram Moolenaar
319842eeac35SBram Moolenaar res[ri] = NUL;
319942eeac35SBram Moolenaar }
320042eeac35SBram Moolenaar
320142eeac35SBram Moolenaar static void
spell_soundfold_sal(slang_T * slang,char_u * inword,char_u * res)3202764b23c8SBram Moolenaar spell_soundfold_sal(slang_T *slang, char_u *inword, char_u *res)
32039ba0eb85SBram Moolenaar {
3204d857f0e0SBram Moolenaar salitem_T *smp;
32059ba0eb85SBram Moolenaar char_u word[MAXWLEN];
320642eeac35SBram Moolenaar char_u *s = inword;
32079ba0eb85SBram Moolenaar char_u *t;
3208d857f0e0SBram Moolenaar char_u *pf;
32099ba0eb85SBram Moolenaar int i, j, z;
3210d857f0e0SBram Moolenaar int reslen;
32119ba0eb85SBram Moolenaar int n, k = 0;
32129ba0eb85SBram Moolenaar int z0;
32139ba0eb85SBram Moolenaar int k0;
32149ba0eb85SBram Moolenaar int n0;
32159ba0eb85SBram Moolenaar int c;
32169ba0eb85SBram Moolenaar int pri;
32179ba0eb85SBram Moolenaar int p0 = -333;
32189ba0eb85SBram Moolenaar int c0;
32199ba0eb85SBram Moolenaar
32200d6f5d97SBram Moolenaar // Remove accents, if wanted. We actually remove all non-word characters.
32210d6f5d97SBram Moolenaar // But keep white space. We need a copy, the word may be changed here.
32229ba0eb85SBram Moolenaar if (slang->sl_rem_accents)
32239ba0eb85SBram Moolenaar {
32249ba0eb85SBram Moolenaar t = word;
322542eeac35SBram Moolenaar while (*s != NUL)
32269ba0eb85SBram Moolenaar {
32271c465444SBram Moolenaar if (VIM_ISWHITE(*s))
3228d857f0e0SBram Moolenaar {
3229d857f0e0SBram Moolenaar *t++ = ' ';
3230d857f0e0SBram Moolenaar s = skipwhite(s);
3231d857f0e0SBram Moolenaar }
32329f30f504SBram Moolenaar else
32339ba0eb85SBram Moolenaar {
3234cc63c647SBram Moolenaar if (spell_iswordp_nmw(s, curwin))
32359ba0eb85SBram Moolenaar *t++ = *s;
32369ba0eb85SBram Moolenaar ++s;
32379ba0eb85SBram Moolenaar }
32389ba0eb85SBram Moolenaar }
32399ba0eb85SBram Moolenaar *t = NUL;
32409ba0eb85SBram Moolenaar }
32419ba0eb85SBram Moolenaar else
3242ef9d6aa7SBram Moolenaar vim_strncpy(word, s, MAXWLEN - 1);
32439ba0eb85SBram Moolenaar
3244d857f0e0SBram Moolenaar smp = (salitem_T *)slang->sl_sal.ga_data;
32459ba0eb85SBram Moolenaar
32469ba0eb85SBram Moolenaar /*
32479ba0eb85SBram Moolenaar * This comes from Aspell phonet.cpp. Converted from C++ to C.
32489f30f504SBram Moolenaar * Changed to keep spaces.
32499ba0eb85SBram Moolenaar */
3250d857f0e0SBram Moolenaar i = reslen = z = 0;
32519ba0eb85SBram Moolenaar while ((c = word[i]) != NUL)
32529ba0eb85SBram Moolenaar {
32530d6f5d97SBram Moolenaar // Start with the first rule that has the character in the word.
32549ba0eb85SBram Moolenaar n = slang->sl_sal_first[c];
32559ba0eb85SBram Moolenaar z0 = 0;
32569ba0eb85SBram Moolenaar
32579ba0eb85SBram Moolenaar if (n >= 0)
32589ba0eb85SBram Moolenaar {
32590d6f5d97SBram Moolenaar // check all rules for the same letter
3260d857f0e0SBram Moolenaar for (; (s = smp[n].sm_lead)[0] == c; ++n)
32619ba0eb85SBram Moolenaar {
32620d6f5d97SBram Moolenaar // Quickly skip entries that don't match the word. Most
32630d6f5d97SBram Moolenaar // entries are less then three chars, optimize for that.
3264d857f0e0SBram Moolenaar k = smp[n].sm_leadlen;
3265d857f0e0SBram Moolenaar if (k > 1)
32669ba0eb85SBram Moolenaar {
3267d857f0e0SBram Moolenaar if (word[i + 1] != s[1])
3268d857f0e0SBram Moolenaar continue;
3269d857f0e0SBram Moolenaar if (k > 2)
3270d857f0e0SBram Moolenaar {
3271d857f0e0SBram Moolenaar for (j = 2; j < k; ++j)
3272d857f0e0SBram Moolenaar if (word[i + j] != s[j])
3273d857f0e0SBram Moolenaar break;
3274d857f0e0SBram Moolenaar if (j < k)
3275d857f0e0SBram Moolenaar continue;
3276d857f0e0SBram Moolenaar }
32779ba0eb85SBram Moolenaar }
32789ba0eb85SBram Moolenaar
327942eeac35SBram Moolenaar if ((pf = smp[n].sm_oneof) != NULL)
32809ba0eb85SBram Moolenaar {
32810d6f5d97SBram Moolenaar // Check for match with one of the chars in "sm_oneof".
3282d857f0e0SBram Moolenaar while (*pf != NUL && *pf != word[i + k])
3283d857f0e0SBram Moolenaar ++pf;
3284d857f0e0SBram Moolenaar if (*pf == NUL)
3285d857f0e0SBram Moolenaar continue;
32869ba0eb85SBram Moolenaar ++k;
32879ba0eb85SBram Moolenaar }
3288d857f0e0SBram Moolenaar s = smp[n].sm_rules;
32890d6f5d97SBram Moolenaar pri = 5; // default priority
32909ba0eb85SBram Moolenaar
32919ba0eb85SBram Moolenaar p0 = *s;
32929ba0eb85SBram Moolenaar k0 = k;
32939ba0eb85SBram Moolenaar while (*s == '-' && k > 1)
32949ba0eb85SBram Moolenaar {
32959ba0eb85SBram Moolenaar k--;
32969ba0eb85SBram Moolenaar s++;
32979ba0eb85SBram Moolenaar }
32989ba0eb85SBram Moolenaar if (*s == '<')
32999ba0eb85SBram Moolenaar s++;
3300d857f0e0SBram Moolenaar if (VIM_ISDIGIT(*s))
33019ba0eb85SBram Moolenaar {
33020d6f5d97SBram Moolenaar // determine priority
33039ba0eb85SBram Moolenaar pri = *s - '0';
33049ba0eb85SBram Moolenaar s++;
33059ba0eb85SBram Moolenaar }
33069ba0eb85SBram Moolenaar if (*s == '^' && *(s + 1) == '^')
33079ba0eb85SBram Moolenaar s++;
33089ba0eb85SBram Moolenaar
33099ba0eb85SBram Moolenaar if (*s == NUL
33109ba0eb85SBram Moolenaar || (*s == '^'
33119f30f504SBram Moolenaar && (i == 0 || !(word[i - 1] == ' '
3312860cae1cSBram Moolenaar || spell_iswordp(word + i - 1, curwin)))
33139ba0eb85SBram Moolenaar && (*(s + 1) != '$'
3314860cae1cSBram Moolenaar || (!spell_iswordp(word + i + k0, curwin))))
33159ba0eb85SBram Moolenaar || (*s == '$' && i > 0
3316860cae1cSBram Moolenaar && spell_iswordp(word + i - 1, curwin)
3317860cae1cSBram Moolenaar && (!spell_iswordp(word + i + k0, curwin))))
33189ba0eb85SBram Moolenaar {
33190d6f5d97SBram Moolenaar // search for followup rules, if:
33200d6f5d97SBram Moolenaar // followup and k > 1 and NO '-' in searchstring
33219ba0eb85SBram Moolenaar c0 = word[i + k - 1];
33229ba0eb85SBram Moolenaar n0 = slang->sl_sal_first[c0];
33239ba0eb85SBram Moolenaar
33249ba0eb85SBram Moolenaar if (slang->sl_followup && k > 1 && n0 >= 0
33259ba0eb85SBram Moolenaar && p0 != '-' && word[i + k] != NUL)
33269ba0eb85SBram Moolenaar {
33270d6f5d97SBram Moolenaar // test follow-up rule for "word[i + k]"
3328d857f0e0SBram Moolenaar for ( ; (s = smp[n0].sm_lead)[0] == c0; ++n0)
33299ba0eb85SBram Moolenaar {
33300d6f5d97SBram Moolenaar // Quickly skip entries that don't match the word.
33310d6f5d97SBram Moolenaar //
3332d857f0e0SBram Moolenaar k0 = smp[n0].sm_leadlen;
3333d857f0e0SBram Moolenaar if (k0 > 1)
3334d857f0e0SBram Moolenaar {
3335d857f0e0SBram Moolenaar if (word[i + k] != s[1])
3336d857f0e0SBram Moolenaar continue;
3337d857f0e0SBram Moolenaar if (k0 > 2)
3338d857f0e0SBram Moolenaar {
3339d857f0e0SBram Moolenaar pf = word + i + k + 1;
3340d857f0e0SBram Moolenaar for (j = 2; j < k0; ++j)
3341d857f0e0SBram Moolenaar if (*pf++ != s[j])
3342d857f0e0SBram Moolenaar break;
3343d857f0e0SBram Moolenaar if (j < k0)
3344d857f0e0SBram Moolenaar continue;
3345d857f0e0SBram Moolenaar }
3346d857f0e0SBram Moolenaar }
3347d857f0e0SBram Moolenaar k0 += k - 1;
33489ba0eb85SBram Moolenaar
334942eeac35SBram Moolenaar if ((pf = smp[n0].sm_oneof) != NULL)
33509ba0eb85SBram Moolenaar {
33510d6f5d97SBram Moolenaar // Check for match with one of the chars in
33520d6f5d97SBram Moolenaar // "sm_oneof".
3353d857f0e0SBram Moolenaar while (*pf != NUL && *pf != word[i + k0])
3354d857f0e0SBram Moolenaar ++pf;
3355d857f0e0SBram Moolenaar if (*pf == NUL)
3356d857f0e0SBram Moolenaar continue;
33579ba0eb85SBram Moolenaar ++k0;
33589ba0eb85SBram Moolenaar }
3359d857f0e0SBram Moolenaar
3360d857f0e0SBram Moolenaar p0 = 5;
3361d857f0e0SBram Moolenaar s = smp[n0].sm_rules;
33629ba0eb85SBram Moolenaar while (*s == '-')
33639ba0eb85SBram Moolenaar {
33640d6f5d97SBram Moolenaar // "k0" gets NOT reduced because
33650d6f5d97SBram Moolenaar // "if (k0 == k)"
33669ba0eb85SBram Moolenaar s++;
33679ba0eb85SBram Moolenaar }
33689ba0eb85SBram Moolenaar if (*s == '<')
33699ba0eb85SBram Moolenaar s++;
3370d857f0e0SBram Moolenaar if (VIM_ISDIGIT(*s))
33719ba0eb85SBram Moolenaar {
33729ba0eb85SBram Moolenaar p0 = *s - '0';
33739ba0eb85SBram Moolenaar s++;
33749ba0eb85SBram Moolenaar }
33759ba0eb85SBram Moolenaar
33769ba0eb85SBram Moolenaar if (*s == NUL
33770d6f5d97SBram Moolenaar // *s == '^' cuts
33789ba0eb85SBram Moolenaar || (*s == '$'
33799c96f592SBram Moolenaar && !spell_iswordp(word + i + k0,
3380860cae1cSBram Moolenaar curwin)))
33819ba0eb85SBram Moolenaar {
33829ba0eb85SBram Moolenaar if (k0 == k)
33830d6f5d97SBram Moolenaar // this is just a piece of the string
33849ba0eb85SBram Moolenaar continue;
33859ba0eb85SBram Moolenaar
33869ba0eb85SBram Moolenaar if (p0 < pri)
33870d6f5d97SBram Moolenaar // priority too low
33889ba0eb85SBram Moolenaar continue;
33890d6f5d97SBram Moolenaar // rule fits; stop search
33909ba0eb85SBram Moolenaar break;
33919ba0eb85SBram Moolenaar }
33929ba0eb85SBram Moolenaar }
33939ba0eb85SBram Moolenaar
3394d857f0e0SBram Moolenaar if (p0 >= pri && smp[n0].sm_lead[0] == c0)
33959ba0eb85SBram Moolenaar continue;
33969ba0eb85SBram Moolenaar }
33979ba0eb85SBram Moolenaar
33980d6f5d97SBram Moolenaar // replace string
3399d857f0e0SBram Moolenaar s = smp[n].sm_to;
34000dc065eeSBram Moolenaar if (s == NULL)
34010dc065eeSBram Moolenaar s = (char_u *)"";
3402d857f0e0SBram Moolenaar pf = smp[n].sm_rules;
3403d857f0e0SBram Moolenaar p0 = (vim_strchr(pf, '<') != NULL) ? 1 : 0;
34049ba0eb85SBram Moolenaar if (p0 == 1 && z == 0)
34059ba0eb85SBram Moolenaar {
34060d6f5d97SBram Moolenaar // rule with '<' is used
3407d857f0e0SBram Moolenaar if (reslen > 0 && *s != NUL && (res[reslen - 1] == c
3408d857f0e0SBram Moolenaar || res[reslen - 1] == *s))
3409d857f0e0SBram Moolenaar reslen--;
34109ba0eb85SBram Moolenaar z0 = 1;
34119ba0eb85SBram Moolenaar z = 1;
34129ba0eb85SBram Moolenaar k0 = 0;
34139ba0eb85SBram Moolenaar while (*s != NUL && word[i + k0] != NUL)
34149ba0eb85SBram Moolenaar {
34159ba0eb85SBram Moolenaar word[i + k0] = *s;
34169ba0eb85SBram Moolenaar k0++;
34179ba0eb85SBram Moolenaar s++;
34189ba0eb85SBram Moolenaar }
34199ba0eb85SBram Moolenaar if (k > k0)
3420a7241f5fSBram Moolenaar STRMOVE(word + i + k0, word + i + k);
34219ba0eb85SBram Moolenaar
34220d6f5d97SBram Moolenaar // new "actual letter"
34239ba0eb85SBram Moolenaar c = word[i];
34249ba0eb85SBram Moolenaar }
34259ba0eb85SBram Moolenaar else
34269ba0eb85SBram Moolenaar {
34270d6f5d97SBram Moolenaar // no '<' rule used
34289ba0eb85SBram Moolenaar i += k - 1;
34299ba0eb85SBram Moolenaar z = 0;
3430d857f0e0SBram Moolenaar while (*s != NUL && s[1] != NUL && reslen < MAXWLEN)
34319ba0eb85SBram Moolenaar {
3432d857f0e0SBram Moolenaar if (reslen == 0 || res[reslen - 1] != *s)
3433a1ba811aSBram Moolenaar res[reslen++] = *s;
34349ba0eb85SBram Moolenaar s++;
34359ba0eb85SBram Moolenaar }
34360d6f5d97SBram Moolenaar // new "actual letter"
34379ba0eb85SBram Moolenaar c = *s;
3438d857f0e0SBram Moolenaar if (strstr((char *)pf, "^^") != NULL)
34399ba0eb85SBram Moolenaar {
34409ba0eb85SBram Moolenaar if (c != NUL)
3441a1ba811aSBram Moolenaar res[reslen++] = c;
3442a7241f5fSBram Moolenaar STRMOVE(word, word + i + 1);
34439ba0eb85SBram Moolenaar i = 0;
34449ba0eb85SBram Moolenaar z0 = 1;
34459ba0eb85SBram Moolenaar }
34469ba0eb85SBram Moolenaar }
34479ba0eb85SBram Moolenaar break;
34489ba0eb85SBram Moolenaar }
34499ba0eb85SBram Moolenaar }
34509ba0eb85SBram Moolenaar }
34511c465444SBram Moolenaar else if (VIM_ISWHITE(c))
34529f30f504SBram Moolenaar {
34539f30f504SBram Moolenaar c = ' ';
34549f30f504SBram Moolenaar k = 1;
34559f30f504SBram Moolenaar }
34569ba0eb85SBram Moolenaar
34579ba0eb85SBram Moolenaar if (z0 == 0)
34589ba0eb85SBram Moolenaar {
3459d857f0e0SBram Moolenaar if (k && !p0 && reslen < MAXWLEN && c != NUL
3460d857f0e0SBram Moolenaar && (!slang->sl_collapse || reslen == 0
3461d857f0e0SBram Moolenaar || res[reslen - 1] != c))
34620d6f5d97SBram Moolenaar // condense only double letters
3463a1ba811aSBram Moolenaar res[reslen++] = c;
34649ba0eb85SBram Moolenaar
34659ba0eb85SBram Moolenaar i++;
34669ba0eb85SBram Moolenaar z = 0;
34679ba0eb85SBram Moolenaar k = 0;
34689ba0eb85SBram Moolenaar }
34699ba0eb85SBram Moolenaar }
34709ba0eb85SBram Moolenaar
3471d857f0e0SBram Moolenaar res[reslen] = NUL;
34729ba0eb85SBram Moolenaar }
34739ba0eb85SBram Moolenaar
3474a1ba811aSBram Moolenaar /*
3475a1ba811aSBram Moolenaar * Turn "inword" into its sound-a-like equivalent in "res[MAXWLEN]".
3476a1ba811aSBram Moolenaar * Multi-byte version of spell_soundfold().
3477a1ba811aSBram Moolenaar */
3478a1ba811aSBram Moolenaar static void
spell_soundfold_wsal(slang_T * slang,char_u * inword,char_u * res)3479764b23c8SBram Moolenaar spell_soundfold_wsal(slang_T *slang, char_u *inword, char_u *res)
3480a1ba811aSBram Moolenaar {
348142eeac35SBram Moolenaar salitem_T *smp = (salitem_T *)slang->sl_sal.ga_data;
3482a1ba811aSBram Moolenaar int word[MAXWLEN];
3483a1ba811aSBram Moolenaar int wres[MAXWLEN];
3484a1ba811aSBram Moolenaar int l;
3485a1ba811aSBram Moolenaar char_u *s;
3486a1ba811aSBram Moolenaar int *ws;
3487a1ba811aSBram Moolenaar char_u *t;
3488a1ba811aSBram Moolenaar int *pf;
3489a1ba811aSBram Moolenaar int i, j, z;
3490a1ba811aSBram Moolenaar int reslen;
3491a1ba811aSBram Moolenaar int n, k = 0;
3492a1ba811aSBram Moolenaar int z0;
3493a1ba811aSBram Moolenaar int k0;
3494a1ba811aSBram Moolenaar int n0;
3495a1ba811aSBram Moolenaar int c;
3496a1ba811aSBram Moolenaar int pri;
3497a1ba811aSBram Moolenaar int p0 = -333;
3498a1ba811aSBram Moolenaar int c0;
3499a1ba811aSBram Moolenaar int did_white = FALSE;
3500f9de140eSBram Moolenaar int wordlen;
3501f9de140eSBram Moolenaar
3502a1ba811aSBram Moolenaar
3503a1ba811aSBram Moolenaar /*
3504a1ba811aSBram Moolenaar * Convert the multi-byte string to a wide-character string.
3505a1ba811aSBram Moolenaar * Remove accents, if wanted. We actually remove all non-word characters.
3506a1ba811aSBram Moolenaar * But keep white space.
3507a1ba811aSBram Moolenaar */
3508f9de140eSBram Moolenaar wordlen = 0;
3509a1ba811aSBram Moolenaar for (s = inword; *s != NUL; )
3510a1ba811aSBram Moolenaar {
3511a1ba811aSBram Moolenaar t = s;
35120fa313a7SBram Moolenaar c = mb_cptr2char_adv(&s);
3513a1ba811aSBram Moolenaar if (slang->sl_rem_accents)
3514a1ba811aSBram Moolenaar {
35151c465444SBram Moolenaar if (enc_utf8 ? utf_class(c) == 0 : VIM_ISWHITE(c))
3516a1ba811aSBram Moolenaar {
3517a1ba811aSBram Moolenaar if (did_white)
3518a1ba811aSBram Moolenaar continue;
3519a1ba811aSBram Moolenaar c = ' ';
3520a1ba811aSBram Moolenaar did_white = TRUE;
3521a1ba811aSBram Moolenaar }
3522a1ba811aSBram Moolenaar else
3523a1ba811aSBram Moolenaar {
3524a1ba811aSBram Moolenaar did_white = FALSE;
3525cc63c647SBram Moolenaar if (!spell_iswordp_nmw(t, curwin))
3526a1ba811aSBram Moolenaar continue;
3527a1ba811aSBram Moolenaar }
3528a1ba811aSBram Moolenaar }
3529f9de140eSBram Moolenaar word[wordlen++] = c;
3530a1ba811aSBram Moolenaar }
3531f9de140eSBram Moolenaar word[wordlen] = NUL;
3532a1ba811aSBram Moolenaar
3533a1ba811aSBram Moolenaar /*
3534f9de140eSBram Moolenaar * This algorithm comes from Aspell phonet.cpp.
3535a1ba811aSBram Moolenaar * Converted from C++ to C. Added support for multi-byte chars.
3536a1ba811aSBram Moolenaar * Changed to keep spaces.
3537a1ba811aSBram Moolenaar */
3538a1ba811aSBram Moolenaar i = reslen = z = 0;
3539a1ba811aSBram Moolenaar while ((c = word[i]) != NUL)
3540a1ba811aSBram Moolenaar {
35410d6f5d97SBram Moolenaar // Start with the first rule that has the character in the word.
3542a1ba811aSBram Moolenaar n = slang->sl_sal_first[c & 0xff];
3543a1ba811aSBram Moolenaar z0 = 0;
3544a1ba811aSBram Moolenaar
3545a1ba811aSBram Moolenaar if (n >= 0)
3546a1ba811aSBram Moolenaar {
35470d6f5d97SBram Moolenaar // Check all rules for the same index byte.
35480d6f5d97SBram Moolenaar // If c is 0x300 need extra check for the end of the array, as
35490d6f5d97SBram Moolenaar // (c & 0xff) is NUL.
355095e8579eSBram Moolenaar for (; ((ws = smp[n].sm_lead_w)[0] & 0xff) == (c & 0xff)
355195e8579eSBram Moolenaar && ws[0] != NUL; ++n)
3552a1ba811aSBram Moolenaar {
35530d6f5d97SBram Moolenaar // Quickly skip entries that don't match the word. Most
35540d6f5d97SBram Moolenaar // entries are less then three chars, optimize for that.
355542eeac35SBram Moolenaar if (c != ws[0])
355642eeac35SBram Moolenaar continue;
3557a1ba811aSBram Moolenaar k = smp[n].sm_leadlen;
3558a1ba811aSBram Moolenaar if (k > 1)
3559a1ba811aSBram Moolenaar {
3560a1ba811aSBram Moolenaar if (word[i + 1] != ws[1])
3561a1ba811aSBram Moolenaar continue;
3562a1ba811aSBram Moolenaar if (k > 2)
3563a1ba811aSBram Moolenaar {
3564a1ba811aSBram Moolenaar for (j = 2; j < k; ++j)
3565a1ba811aSBram Moolenaar if (word[i + j] != ws[j])
3566a1ba811aSBram Moolenaar break;
3567a1ba811aSBram Moolenaar if (j < k)
3568a1ba811aSBram Moolenaar continue;
3569a1ba811aSBram Moolenaar }
3570a1ba811aSBram Moolenaar }
3571a1ba811aSBram Moolenaar
357242eeac35SBram Moolenaar if ((pf = smp[n].sm_oneof_w) != NULL)
3573a1ba811aSBram Moolenaar {
35740d6f5d97SBram Moolenaar // Check for match with one of the chars in "sm_oneof".
3575a1ba811aSBram Moolenaar while (*pf != NUL && *pf != word[i + k])
3576a1ba811aSBram Moolenaar ++pf;
3577a1ba811aSBram Moolenaar if (*pf == NUL)
3578a1ba811aSBram Moolenaar continue;
3579a1ba811aSBram Moolenaar ++k;
3580a1ba811aSBram Moolenaar }
3581a1ba811aSBram Moolenaar s = smp[n].sm_rules;
35820d6f5d97SBram Moolenaar pri = 5; // default priority
3583a1ba811aSBram Moolenaar
3584a1ba811aSBram Moolenaar p0 = *s;
3585a1ba811aSBram Moolenaar k0 = k;
3586a1ba811aSBram Moolenaar while (*s == '-' && k > 1)
3587a1ba811aSBram Moolenaar {
3588a1ba811aSBram Moolenaar k--;
3589a1ba811aSBram Moolenaar s++;
3590a1ba811aSBram Moolenaar }
3591a1ba811aSBram Moolenaar if (*s == '<')
3592a1ba811aSBram Moolenaar s++;
3593a1ba811aSBram Moolenaar if (VIM_ISDIGIT(*s))
3594a1ba811aSBram Moolenaar {
35950d6f5d97SBram Moolenaar // determine priority
3596a1ba811aSBram Moolenaar pri = *s - '0';
3597a1ba811aSBram Moolenaar s++;
3598a1ba811aSBram Moolenaar }
3599a1ba811aSBram Moolenaar if (*s == '^' && *(s + 1) == '^')
3600a1ba811aSBram Moolenaar s++;
3601a1ba811aSBram Moolenaar
3602a1ba811aSBram Moolenaar if (*s == NUL
3603a1ba811aSBram Moolenaar || (*s == '^'
3604a1ba811aSBram Moolenaar && (i == 0 || !(word[i - 1] == ' '
3605860cae1cSBram Moolenaar || spell_iswordp_w(word + i - 1, curwin)))
3606a1ba811aSBram Moolenaar && (*(s + 1) != '$'
3607860cae1cSBram Moolenaar || (!spell_iswordp_w(word + i + k0, curwin))))
3608a1ba811aSBram Moolenaar || (*s == '$' && i > 0
3609860cae1cSBram Moolenaar && spell_iswordp_w(word + i - 1, curwin)
3610860cae1cSBram Moolenaar && (!spell_iswordp_w(word + i + k0, curwin))))
3611a1ba811aSBram Moolenaar {
36120d6f5d97SBram Moolenaar // search for followup rules, if:
36130d6f5d97SBram Moolenaar // followup and k > 1 and NO '-' in searchstring
3614a1ba811aSBram Moolenaar c0 = word[i + k - 1];
3615a1ba811aSBram Moolenaar n0 = slang->sl_sal_first[c0 & 0xff];
3616a1ba811aSBram Moolenaar
3617a1ba811aSBram Moolenaar if (slang->sl_followup && k > 1 && n0 >= 0
3618a1ba811aSBram Moolenaar && p0 != '-' && word[i + k] != NUL)
3619a1ba811aSBram Moolenaar {
36200d6f5d97SBram Moolenaar // Test follow-up rule for "word[i + k]"; loop over
36210d6f5d97SBram Moolenaar // all entries with the same index byte.
3622a1ba811aSBram Moolenaar for ( ; ((ws = smp[n0].sm_lead_w)[0] & 0xff)
3623a1ba811aSBram Moolenaar == (c0 & 0xff); ++n0)
3624a1ba811aSBram Moolenaar {
36250d6f5d97SBram Moolenaar // Quickly skip entries that don't match the word.
362642eeac35SBram Moolenaar if (c0 != ws[0])
362742eeac35SBram Moolenaar continue;
3628a1ba811aSBram Moolenaar k0 = smp[n0].sm_leadlen;
3629a1ba811aSBram Moolenaar if (k0 > 1)
3630a1ba811aSBram Moolenaar {
3631a1ba811aSBram Moolenaar if (word[i + k] != ws[1])
3632a1ba811aSBram Moolenaar continue;
3633a1ba811aSBram Moolenaar if (k0 > 2)
3634a1ba811aSBram Moolenaar {
3635a1ba811aSBram Moolenaar pf = word + i + k + 1;
3636a1ba811aSBram Moolenaar for (j = 2; j < k0; ++j)
3637a1ba811aSBram Moolenaar if (*pf++ != ws[j])
3638a1ba811aSBram Moolenaar break;
3639a1ba811aSBram Moolenaar if (j < k0)
3640a1ba811aSBram Moolenaar continue;
3641a1ba811aSBram Moolenaar }
3642a1ba811aSBram Moolenaar }
3643a1ba811aSBram Moolenaar k0 += k - 1;
3644a1ba811aSBram Moolenaar
364542eeac35SBram Moolenaar if ((pf = smp[n0].sm_oneof_w) != NULL)
3646a1ba811aSBram Moolenaar {
36470d6f5d97SBram Moolenaar // Check for match with one of the chars in
36480d6f5d97SBram Moolenaar // "sm_oneof".
3649a1ba811aSBram Moolenaar while (*pf != NUL && *pf != word[i + k0])
3650a1ba811aSBram Moolenaar ++pf;
3651a1ba811aSBram Moolenaar if (*pf == NUL)
3652a1ba811aSBram Moolenaar continue;
3653a1ba811aSBram Moolenaar ++k0;
3654a1ba811aSBram Moolenaar }
3655a1ba811aSBram Moolenaar
3656a1ba811aSBram Moolenaar p0 = 5;
3657a1ba811aSBram Moolenaar s = smp[n0].sm_rules;
3658a1ba811aSBram Moolenaar while (*s == '-')
3659a1ba811aSBram Moolenaar {
36600d6f5d97SBram Moolenaar // "k0" gets NOT reduced because
36610d6f5d97SBram Moolenaar // "if (k0 == k)"
3662a1ba811aSBram Moolenaar s++;
3663a1ba811aSBram Moolenaar }
3664a1ba811aSBram Moolenaar if (*s == '<')
3665a1ba811aSBram Moolenaar s++;
3666a1ba811aSBram Moolenaar if (VIM_ISDIGIT(*s))
3667a1ba811aSBram Moolenaar {
3668a1ba811aSBram Moolenaar p0 = *s - '0';
3669a1ba811aSBram Moolenaar s++;
3670a1ba811aSBram Moolenaar }
3671a1ba811aSBram Moolenaar
3672a1ba811aSBram Moolenaar if (*s == NUL
36730d6f5d97SBram Moolenaar // *s == '^' cuts
3674a1ba811aSBram Moolenaar || (*s == '$'
36759c96f592SBram Moolenaar && !spell_iswordp_w(word + i + k0,
3676860cae1cSBram Moolenaar curwin)))
3677a1ba811aSBram Moolenaar {
3678a1ba811aSBram Moolenaar if (k0 == k)
36790d6f5d97SBram Moolenaar // this is just a piece of the string
3680a1ba811aSBram Moolenaar continue;
3681a1ba811aSBram Moolenaar
3682a1ba811aSBram Moolenaar if (p0 < pri)
36830d6f5d97SBram Moolenaar // priority too low
3684a1ba811aSBram Moolenaar continue;
36850d6f5d97SBram Moolenaar // rule fits; stop search
3686a1ba811aSBram Moolenaar break;
3687a1ba811aSBram Moolenaar }
3688a1ba811aSBram Moolenaar }
3689a1ba811aSBram Moolenaar
3690a1ba811aSBram Moolenaar if (p0 >= pri && (smp[n0].sm_lead_w[0] & 0xff)
3691a1ba811aSBram Moolenaar == (c0 & 0xff))
3692a1ba811aSBram Moolenaar continue;
3693a1ba811aSBram Moolenaar }
3694a1ba811aSBram Moolenaar
36950d6f5d97SBram Moolenaar // replace string
3696a1ba811aSBram Moolenaar ws = smp[n].sm_to_w;
3697a1ba811aSBram Moolenaar s = smp[n].sm_rules;
3698a1ba811aSBram Moolenaar p0 = (vim_strchr(s, '<') != NULL) ? 1 : 0;
3699a1ba811aSBram Moolenaar if (p0 == 1 && z == 0)
3700a1ba811aSBram Moolenaar {
37010d6f5d97SBram Moolenaar // rule with '<' is used
37020dc065eeSBram Moolenaar if (reslen > 0 && ws != NULL && *ws != NUL
37030dc065eeSBram Moolenaar && (wres[reslen - 1] == c
3704a1ba811aSBram Moolenaar || wres[reslen - 1] == *ws))
3705a1ba811aSBram Moolenaar reslen--;
3706a1ba811aSBram Moolenaar z0 = 1;
3707a1ba811aSBram Moolenaar z = 1;
3708a1ba811aSBram Moolenaar k0 = 0;
37090dc065eeSBram Moolenaar if (ws != NULL)
3710a1ba811aSBram Moolenaar while (*ws != NUL && word[i + k0] != NUL)
3711a1ba811aSBram Moolenaar {
3712a1ba811aSBram Moolenaar word[i + k0] = *ws;
3713a1ba811aSBram Moolenaar k0++;
3714a1ba811aSBram Moolenaar ws++;
3715a1ba811aSBram Moolenaar }
3716a1ba811aSBram Moolenaar if (k > k0)
3717a1ba811aSBram Moolenaar mch_memmove(word + i + k0, word + i + k,
3718f9de140eSBram Moolenaar sizeof(int) * (wordlen - (i + k) + 1));
3719a1ba811aSBram Moolenaar
37200d6f5d97SBram Moolenaar // new "actual letter"
3721a1ba811aSBram Moolenaar c = word[i];
3722a1ba811aSBram Moolenaar }
3723a1ba811aSBram Moolenaar else
3724a1ba811aSBram Moolenaar {
37250d6f5d97SBram Moolenaar // no '<' rule used
3726a1ba811aSBram Moolenaar i += k - 1;
3727a1ba811aSBram Moolenaar z = 0;
37280dc065eeSBram Moolenaar if (ws != NULL)
37290dc065eeSBram Moolenaar while (*ws != NUL && ws[1] != NUL
37300dc065eeSBram Moolenaar && reslen < MAXWLEN)
3731a1ba811aSBram Moolenaar {
3732a1ba811aSBram Moolenaar if (reslen == 0 || wres[reslen - 1] != *ws)
3733a1ba811aSBram Moolenaar wres[reslen++] = *ws;
3734a1ba811aSBram Moolenaar ws++;
3735a1ba811aSBram Moolenaar }
37360d6f5d97SBram Moolenaar // new "actual letter"
37370dc065eeSBram Moolenaar if (ws == NULL)
37380dc065eeSBram Moolenaar c = NUL;
37390dc065eeSBram Moolenaar else
3740a1ba811aSBram Moolenaar c = *ws;
3741a1ba811aSBram Moolenaar if (strstr((char *)s, "^^") != NULL)
3742a1ba811aSBram Moolenaar {
3743a1ba811aSBram Moolenaar if (c != NUL)
3744a1ba811aSBram Moolenaar wres[reslen++] = c;
3745a1ba811aSBram Moolenaar mch_memmove(word, word + i + 1,
3746f9de140eSBram Moolenaar sizeof(int) * (wordlen - (i + 1) + 1));
3747a1ba811aSBram Moolenaar i = 0;
3748a1ba811aSBram Moolenaar z0 = 1;
3749a1ba811aSBram Moolenaar }
3750a1ba811aSBram Moolenaar }
3751a1ba811aSBram Moolenaar break;
3752a1ba811aSBram Moolenaar }
3753a1ba811aSBram Moolenaar }
3754a1ba811aSBram Moolenaar }
37551c465444SBram Moolenaar else if (VIM_ISWHITE(c))
3756a1ba811aSBram Moolenaar {
3757a1ba811aSBram Moolenaar c = ' ';
3758a1ba811aSBram Moolenaar k = 1;
3759a1ba811aSBram Moolenaar }
3760a1ba811aSBram Moolenaar
3761a1ba811aSBram Moolenaar if (z0 == 0)
3762a1ba811aSBram Moolenaar {
3763a1ba811aSBram Moolenaar if (k && !p0 && reslen < MAXWLEN && c != NUL
3764a1ba811aSBram Moolenaar && (!slang->sl_collapse || reslen == 0
3765a1ba811aSBram Moolenaar || wres[reslen - 1] != c))
37660d6f5d97SBram Moolenaar // condense only double letters
3767a1ba811aSBram Moolenaar wres[reslen++] = c;
3768a1ba811aSBram Moolenaar
3769a1ba811aSBram Moolenaar i++;
3770a1ba811aSBram Moolenaar z = 0;
3771a1ba811aSBram Moolenaar k = 0;
3772a1ba811aSBram Moolenaar }
3773a1ba811aSBram Moolenaar }
3774a1ba811aSBram Moolenaar
37750d6f5d97SBram Moolenaar // Convert wide characters in "wres" to a multi-byte string in "res".
3776a1ba811aSBram Moolenaar l = 0;
3777a1ba811aSBram Moolenaar for (n = 0; n < reslen; ++n)
3778a1ba811aSBram Moolenaar {
3779a1ba811aSBram Moolenaar l += mb_char2bytes(wres[n], res + l);
3780a1ba811aSBram Moolenaar if (l + MB_MAXBYTES > MAXWLEN)
3781a1ba811aSBram Moolenaar break;
3782a1ba811aSBram Moolenaar }
3783a1ba811aSBram Moolenaar res[l] = NUL;
3784a1ba811aSBram Moolenaar }
3785a1ba811aSBram Moolenaar
37869f30f504SBram Moolenaar /*
3787362e1a30SBram Moolenaar * ":spellinfo"
3788362e1a30SBram Moolenaar */
3789362e1a30SBram Moolenaar void
ex_spellinfo(exarg_T * eap UNUSED)3790764b23c8SBram Moolenaar ex_spellinfo(exarg_T *eap UNUSED)
3791362e1a30SBram Moolenaar {
3792362e1a30SBram Moolenaar int lpi;
3793362e1a30SBram Moolenaar langp_T *lp;
3794362e1a30SBram Moolenaar char_u *p;
3795362e1a30SBram Moolenaar
3796362e1a30SBram Moolenaar if (no_spell_checking(curwin))
3797362e1a30SBram Moolenaar return;
3798362e1a30SBram Moolenaar
3799362e1a30SBram Moolenaar msg_start();
3800860cae1cSBram Moolenaar for (lpi = 0; lpi < curwin->w_s->b_langp.ga_len && !got_int; ++lpi)
3801362e1a30SBram Moolenaar {
3802860cae1cSBram Moolenaar lp = LANGP_ENTRY(curwin->w_s->b_langp, lpi);
380332526b3cSBram Moolenaar msg_puts("file: ");
380432526b3cSBram Moolenaar msg_puts((char *)lp->lp_slang->sl_fname);
3805362e1a30SBram Moolenaar msg_putchar('\n');
3806362e1a30SBram Moolenaar p = lp->lp_slang->sl_info;
3807362e1a30SBram Moolenaar if (p != NULL)
3808362e1a30SBram Moolenaar {
380932526b3cSBram Moolenaar msg_puts((char *)p);
3810362e1a30SBram Moolenaar msg_putchar('\n');
3811362e1a30SBram Moolenaar }
3812362e1a30SBram Moolenaar }
3813362e1a30SBram Moolenaar msg_end();
3814362e1a30SBram Moolenaar }
3815362e1a30SBram Moolenaar
38160d6f5d97SBram Moolenaar #define DUMPFLAG_KEEPCASE 1 // round 2: keep-case tree
38170d6f5d97SBram Moolenaar #define DUMPFLAG_COUNT 2 // include word count
38180d6f5d97SBram Moolenaar #define DUMPFLAG_ICASE 4 // ignore case when finding matches
38190d6f5d97SBram Moolenaar #define DUMPFLAG_ONECAP 8 // pattern starts with capital
38200d6f5d97SBram Moolenaar #define DUMPFLAG_ALLCAP 16 // pattern is all capitals
38214770d09aSBram Moolenaar
3822f417f2b6SBram Moolenaar /*
3823f417f2b6SBram Moolenaar * ":spelldump"
3824f417f2b6SBram Moolenaar */
3825f417f2b6SBram Moolenaar void
ex_spelldump(exarg_T * eap)3826764b23c8SBram Moolenaar ex_spelldump(exarg_T *eap)
3827f417f2b6SBram Moolenaar {
38287a18fdc8SBram Moolenaar char_u *spl;
38297a18fdc8SBram Moolenaar long dummy;
38307a18fdc8SBram Moolenaar
3831b475fb91SBram Moolenaar if (no_spell_checking(curwin))
3832b475fb91SBram Moolenaar return;
3833dd1f426bSBram Moolenaar (void)get_option_value((char_u*)"spl", &dummy, &spl, OPT_LOCAL);
3834b475fb91SBram Moolenaar
38350d6f5d97SBram Moolenaar // Create a new empty buffer in a new window.
3836b475fb91SBram Moolenaar do_cmdline_cmd((char_u *)"new");
38377a18fdc8SBram Moolenaar
38380d6f5d97SBram Moolenaar // enable spelling locally in the new window
38397a18fdc8SBram Moolenaar set_option_value((char_u*)"spell", TRUE, (char_u*)"", OPT_LOCAL);
38407a18fdc8SBram Moolenaar set_option_value((char_u*)"spl", dummy, spl, OPT_LOCAL);
38417a18fdc8SBram Moolenaar vim_free(spl);
38427a18fdc8SBram Moolenaar
3843b5aedf3eSBram Moolenaar if (!BUFEMPTY())
3844b475fb91SBram Moolenaar return;
3845b475fb91SBram Moolenaar
3846860cae1cSBram Moolenaar spell_dump_compl(NULL, 0, NULL, eap->forceit ? DUMPFLAG_COUNT : 0);
3847b475fb91SBram Moolenaar
38480d6f5d97SBram Moolenaar // Delete the empty line that we started with.
3849b475fb91SBram Moolenaar if (curbuf->b_ml.ml_line_count > 1)
3850ca70c07bSBram Moolenaar ml_delete(curbuf->b_ml.ml_line_count);
3851b475fb91SBram Moolenaar
3852b475fb91SBram Moolenaar redraw_later(NOT_VALID);
3853b475fb91SBram Moolenaar }
3854b475fb91SBram Moolenaar
3855b475fb91SBram Moolenaar /*
3856b475fb91SBram Moolenaar * Go through all possible words and:
3857b475fb91SBram Moolenaar * 1. When "pat" is NULL: dump a list of all words in the current buffer.
3858b475fb91SBram Moolenaar * "ic" and "dir" are not used.
3859b475fb91SBram Moolenaar * 2. When "pat" is not NULL: add matching words to insert mode completion.
3860b475fb91SBram Moolenaar */
3861b475fb91SBram Moolenaar void
spell_dump_compl(char_u * pat,int ic,int * dir,int dumpflags_arg)3862764b23c8SBram Moolenaar spell_dump_compl(
38630d6f5d97SBram Moolenaar char_u *pat, // leading part of the word
38640d6f5d97SBram Moolenaar int ic, // ignore case
38650d6f5d97SBram Moolenaar int *dir, // direction for adding matches
38660d6f5d97SBram Moolenaar int dumpflags_arg) // DUMPFLAG_*
3867b475fb91SBram Moolenaar {
3868f417f2b6SBram Moolenaar langp_T *lp;
3869f417f2b6SBram Moolenaar slang_T *slang;
3870f417f2b6SBram Moolenaar idx_T arridx[MAXWLEN];
3871f417f2b6SBram Moolenaar int curi[MAXWLEN];
3872f417f2b6SBram Moolenaar char_u word[MAXWLEN];
3873f417f2b6SBram Moolenaar int c;
3874f417f2b6SBram Moolenaar char_u *byts;
3875f417f2b6SBram Moolenaar idx_T *idxs;
3876f417f2b6SBram Moolenaar linenr_T lnum = 0;
3877f417f2b6SBram Moolenaar int round;
3878f417f2b6SBram Moolenaar int depth;
3879f417f2b6SBram Moolenaar int n;
3880f417f2b6SBram Moolenaar int flags;
38810d6f5d97SBram Moolenaar char_u *region_names = NULL; // region names being used
38820d6f5d97SBram Moolenaar int do_region = TRUE; // dump region names and numbers
38837887d88aSBram Moolenaar char_u *p;
3884ac6e65f8SBram Moolenaar int lpi;
3885b475fb91SBram Moolenaar int dumpflags = dumpflags_arg;
3886b475fb91SBram Moolenaar int patlen;
3887f417f2b6SBram Moolenaar
38880d6f5d97SBram Moolenaar // When ignoring case or when the pattern starts with capital pass this on
38890d6f5d97SBram Moolenaar // to dump_word().
3890d0131a8bSBram Moolenaar if (pat != NULL)
3891d0131a8bSBram Moolenaar {
3892b475fb91SBram Moolenaar if (ic)
3893b475fb91SBram Moolenaar dumpflags |= DUMPFLAG_ICASE;
3894d0131a8bSBram Moolenaar else
3895d0131a8bSBram Moolenaar {
3896d0131a8bSBram Moolenaar n = captype(pat, NULL);
3897d0131a8bSBram Moolenaar if (n == WF_ONECAP)
3898d0131a8bSBram Moolenaar dumpflags |= DUMPFLAG_ONECAP;
3899264b74faSBram Moolenaar else if (n == WF_ALLCAP && (int)STRLEN(pat) > mb_ptr2len(pat))
3900d0131a8bSBram Moolenaar dumpflags |= DUMPFLAG_ALLCAP;
3901d0131a8bSBram Moolenaar }
3902d0131a8bSBram Moolenaar }
3903f417f2b6SBram Moolenaar
39040d6f5d97SBram Moolenaar // Find out if we can support regions: All languages must support the same
39050d6f5d97SBram Moolenaar // regions or none at all.
3906860cae1cSBram Moolenaar for (lpi = 0; lpi < curwin->w_s->b_langp.ga_len; ++lpi)
39077887d88aSBram Moolenaar {
3908860cae1cSBram Moolenaar lp = LANGP_ENTRY(curwin->w_s->b_langp, lpi);
39097887d88aSBram Moolenaar p = lp->lp_slang->sl_regions;
39107887d88aSBram Moolenaar if (p[0] != 0)
39117887d88aSBram Moolenaar {
39120d6f5d97SBram Moolenaar if (region_names == NULL) // first language with regions
39137887d88aSBram Moolenaar region_names = p;
39147887d88aSBram Moolenaar else if (STRCMP(region_names, p) != 0)
39157887d88aSBram Moolenaar {
39160d6f5d97SBram Moolenaar do_region = FALSE; // region names are different
39177887d88aSBram Moolenaar break;
39187887d88aSBram Moolenaar }
39197887d88aSBram Moolenaar }
39207887d88aSBram Moolenaar }
39217887d88aSBram Moolenaar
39227887d88aSBram Moolenaar if (do_region && region_names != NULL)
39237887d88aSBram Moolenaar {
3924b475fb91SBram Moolenaar if (pat == NULL)
3925b475fb91SBram Moolenaar {
39267887d88aSBram Moolenaar vim_snprintf((char *)IObuff, IOSIZE, "/regions=%s", region_names);
39277887d88aSBram Moolenaar ml_append(lnum++, IObuff, (colnr_T)0, FALSE);
39287887d88aSBram Moolenaar }
3929b475fb91SBram Moolenaar }
39307887d88aSBram Moolenaar else
39317887d88aSBram Moolenaar do_region = FALSE;
39327887d88aSBram Moolenaar
39337887d88aSBram Moolenaar /*
39347887d88aSBram Moolenaar * Loop over all files loaded for the entries in 'spelllang'.
39357887d88aSBram Moolenaar */
3936860cae1cSBram Moolenaar for (lpi = 0; lpi < curwin->w_s->b_langp.ga_len; ++lpi)
3937f417f2b6SBram Moolenaar {
3938860cae1cSBram Moolenaar lp = LANGP_ENTRY(curwin->w_s->b_langp, lpi);
3939f417f2b6SBram Moolenaar slang = lp->lp_slang;
39400d6f5d97SBram Moolenaar if (slang->sl_fbyts == NULL) // reloading failed
3941ac6e65f8SBram Moolenaar continue;
3942f417f2b6SBram Moolenaar
3943b475fb91SBram Moolenaar if (pat == NULL)
3944b475fb91SBram Moolenaar {
3945f417f2b6SBram Moolenaar vim_snprintf((char *)IObuff, IOSIZE, "# file: %s", slang->sl_fname);
3946f417f2b6SBram Moolenaar ml_append(lnum++, IObuff, (colnr_T)0, FALSE);
3947b475fb91SBram Moolenaar }
3948b475fb91SBram Moolenaar
39490d6f5d97SBram Moolenaar // When matching with a pattern and there are no prefixes only use
39500d6f5d97SBram Moolenaar // parts of the tree that match "pat".
3951b475fb91SBram Moolenaar if (pat != NULL && slang->sl_pbyts == NULL)
3952a93fa7eeSBram Moolenaar patlen = (int)STRLEN(pat);
3953b475fb91SBram Moolenaar else
3954eb3593b3SBram Moolenaar patlen = -1;
3955f417f2b6SBram Moolenaar
39560d6f5d97SBram Moolenaar // round 1: case-folded tree
39570d6f5d97SBram Moolenaar // round 2: keep-case tree
3958f417f2b6SBram Moolenaar for (round = 1; round <= 2; ++round)
3959f417f2b6SBram Moolenaar {
3960f417f2b6SBram Moolenaar if (round == 1)
3961f417f2b6SBram Moolenaar {
3962b475fb91SBram Moolenaar dumpflags &= ~DUMPFLAG_KEEPCASE;
3963f417f2b6SBram Moolenaar byts = slang->sl_fbyts;
3964f417f2b6SBram Moolenaar idxs = slang->sl_fidxs;
3965f417f2b6SBram Moolenaar }
3966f417f2b6SBram Moolenaar else
3967f417f2b6SBram Moolenaar {
3968b475fb91SBram Moolenaar dumpflags |= DUMPFLAG_KEEPCASE;
3969f417f2b6SBram Moolenaar byts = slang->sl_kbyts;
3970f417f2b6SBram Moolenaar idxs = slang->sl_kidxs;
3971f417f2b6SBram Moolenaar }
3972f417f2b6SBram Moolenaar if (byts == NULL)
39730d6f5d97SBram Moolenaar continue; // array is empty
3974f417f2b6SBram Moolenaar
3975f417f2b6SBram Moolenaar depth = 0;
3976f417f2b6SBram Moolenaar arridx[0] = 0;
3977f417f2b6SBram Moolenaar curi[0] = 1;
3978b475fb91SBram Moolenaar while (depth >= 0 && !got_int
39797591bb39SBram Moolenaar && (pat == NULL || !ins_compl_interrupted()))
3980f417f2b6SBram Moolenaar {
3981f417f2b6SBram Moolenaar if (curi[depth] > byts[arridx[depth]])
3982f417f2b6SBram Moolenaar {
39830d6f5d97SBram Moolenaar // Done all bytes at this node, go up one level.
3984f417f2b6SBram Moolenaar --depth;
3985f417f2b6SBram Moolenaar line_breakcheck();
3986472e8597SBram Moolenaar ins_compl_check_keys(50, FALSE);
3987f417f2b6SBram Moolenaar }
3988f417f2b6SBram Moolenaar else
3989f417f2b6SBram Moolenaar {
39900d6f5d97SBram Moolenaar // Do one more byte at this node.
3991f417f2b6SBram Moolenaar n = arridx[depth] + curi[depth];
3992f417f2b6SBram Moolenaar ++curi[depth];
3993f417f2b6SBram Moolenaar c = byts[n];
3994f417f2b6SBram Moolenaar if (c == 0)
3995f417f2b6SBram Moolenaar {
39960d6f5d97SBram Moolenaar // End of word, deal with the word.
39970d6f5d97SBram Moolenaar // Don't use keep-case words in the fold-case tree,
39980d6f5d97SBram Moolenaar // they will appear in the keep-case tree.
39990d6f5d97SBram Moolenaar // Only use the word when the region matches.
4000f417f2b6SBram Moolenaar flags = (int)idxs[n];
4001f417f2b6SBram Moolenaar if ((round == 2 || (flags & WF_KEEPCAP) == 0)
4002ac6e65f8SBram Moolenaar && (flags & WF_NEEDCOMP) == 0
40037887d88aSBram Moolenaar && (do_region
40047887d88aSBram Moolenaar || (flags & WF_REGION) == 0
4005dfb9ac00SBram Moolenaar || (((unsigned)flags >> 16)
4006f417f2b6SBram Moolenaar & lp->lp_region) != 0))
4007f417f2b6SBram Moolenaar {
4008f417f2b6SBram Moolenaar word[depth] = NUL;
40097887d88aSBram Moolenaar if (!do_region)
40107887d88aSBram Moolenaar flags &= ~WF_REGION;
40110a5fe214SBram Moolenaar
40120d6f5d97SBram Moolenaar // Dump the basic word if there is no prefix or
40130d6f5d97SBram Moolenaar // when it's the first one.
4014dfb9ac00SBram Moolenaar c = (unsigned)flags >> 24;
40150a5fe214SBram Moolenaar if (c == 0 || curi[depth] == 2)
4016b475fb91SBram Moolenaar {
4017b475fb91SBram Moolenaar dump_word(slang, word, pat, dir,
4018b475fb91SBram Moolenaar dumpflags, flags, lnum);
4019b475fb91SBram Moolenaar if (pat == NULL)
4020b475fb91SBram Moolenaar ++lnum;
4021b475fb91SBram Moolenaar }
4022f417f2b6SBram Moolenaar
40230d6f5d97SBram Moolenaar // Apply the prefix, if there is one.
40240a5fe214SBram Moolenaar if (c != 0)
4025b475fb91SBram Moolenaar lnum = dump_prefixes(slang, word, pat, dir,
4026b475fb91SBram Moolenaar dumpflags, flags, lnum);
4027f417f2b6SBram Moolenaar }
4028f417f2b6SBram Moolenaar }
4029f417f2b6SBram Moolenaar else
4030f417f2b6SBram Moolenaar {
40310d6f5d97SBram Moolenaar // Normal char, go one level deeper.
4032f417f2b6SBram Moolenaar word[depth++] = c;
4033f417f2b6SBram Moolenaar arridx[depth] = idxs[n];
4034f417f2b6SBram Moolenaar curi[depth] = 1;
4035f417f2b6SBram Moolenaar
40360d6f5d97SBram Moolenaar // Check if this characters matches with the pattern.
40370d6f5d97SBram Moolenaar // If not skip the whole tree below it.
40380d6f5d97SBram Moolenaar // Always ignore case here, dump_word() will check
40390d6f5d97SBram Moolenaar // proper case later. This isn't exactly right when
40400d6f5d97SBram Moolenaar // length changes for multi-byte characters with
40410d6f5d97SBram Moolenaar // ignore case...
4042d0131a8bSBram Moolenaar if (depth <= patlen
4043d0131a8bSBram Moolenaar && MB_STRNICMP(word, pat, depth) != 0)
4044b475fb91SBram Moolenaar --depth;
4045b475fb91SBram Moolenaar }
4046b475fb91SBram Moolenaar }
4047b475fb91SBram Moolenaar }
4048b475fb91SBram Moolenaar }
4049b475fb91SBram Moolenaar }
4050f417f2b6SBram Moolenaar }
4051f417f2b6SBram Moolenaar
4052f417f2b6SBram Moolenaar /*
4053f417f2b6SBram Moolenaar * Dump one word: apply case modifications and append a line to the buffer.
4054b475fb91SBram Moolenaar * When "lnum" is zero add insert mode completion.
4055f417f2b6SBram Moolenaar */
4056f417f2b6SBram Moolenaar static void
dump_word(slang_T * slang,char_u * word,char_u * pat,int * dir,int dumpflags,int wordflags,linenr_T lnum)4057764b23c8SBram Moolenaar dump_word(
4058764b23c8SBram Moolenaar slang_T *slang,
4059764b23c8SBram Moolenaar char_u *word,
4060764b23c8SBram Moolenaar char_u *pat,
4061764b23c8SBram Moolenaar int *dir,
4062764b23c8SBram Moolenaar int dumpflags,
4063764b23c8SBram Moolenaar int wordflags,
4064764b23c8SBram Moolenaar linenr_T lnum)
4065f417f2b6SBram Moolenaar {
4066f417f2b6SBram Moolenaar int keepcap = FALSE;
4067f417f2b6SBram Moolenaar char_u *p;
40684770d09aSBram Moolenaar char_u *tw;
4069f417f2b6SBram Moolenaar char_u cword[MAXWLEN];
40707887d88aSBram Moolenaar char_u badword[MAXWLEN + 10];
40717887d88aSBram Moolenaar int i;
4072d0131a8bSBram Moolenaar int flags = wordflags;
4073d0131a8bSBram Moolenaar
4074d0131a8bSBram Moolenaar if (dumpflags & DUMPFLAG_ONECAP)
4075d0131a8bSBram Moolenaar flags |= WF_ONECAP;
4076d0131a8bSBram Moolenaar if (dumpflags & DUMPFLAG_ALLCAP)
4077d0131a8bSBram Moolenaar flags |= WF_ALLCAP;
4078f417f2b6SBram Moolenaar
40794770d09aSBram Moolenaar if ((dumpflags & DUMPFLAG_KEEPCASE) == 0 && (flags & WF_CAPMASK) != 0)
4080f417f2b6SBram Moolenaar {
40810d6f5d97SBram Moolenaar // Need to fix case according to "flags".
4082f417f2b6SBram Moolenaar make_case_word(word, cword, flags);
4083f417f2b6SBram Moolenaar p = cword;
4084f417f2b6SBram Moolenaar }
4085f417f2b6SBram Moolenaar else
4086f417f2b6SBram Moolenaar {
4087f417f2b6SBram Moolenaar p = word;
40884770d09aSBram Moolenaar if ((dumpflags & DUMPFLAG_KEEPCASE)
40894770d09aSBram Moolenaar && ((captype(word, NULL) & WF_KEEPCAP) == 0
40900dc065eeSBram Moolenaar || (flags & WF_FIXCAP) != 0))
4091f417f2b6SBram Moolenaar keepcap = TRUE;
4092f417f2b6SBram Moolenaar }
40934770d09aSBram Moolenaar tw = p;
4094f417f2b6SBram Moolenaar
4095b475fb91SBram Moolenaar if (pat == NULL)
4096b475fb91SBram Moolenaar {
40970d6f5d97SBram Moolenaar // Add flags and regions after a slash.
40987887d88aSBram Moolenaar if ((flags & (WF_BANNED | WF_RARE | WF_REGION)) || keepcap)
4099f417f2b6SBram Moolenaar {
41007887d88aSBram Moolenaar STRCPY(badword, p);
41017887d88aSBram Moolenaar STRCAT(badword, "/");
4102f417f2b6SBram Moolenaar if (keepcap)
4103f417f2b6SBram Moolenaar STRCAT(badword, "=");
4104f417f2b6SBram Moolenaar if (flags & WF_BANNED)
4105f417f2b6SBram Moolenaar STRCAT(badword, "!");
4106f417f2b6SBram Moolenaar else if (flags & WF_RARE)
4107f417f2b6SBram Moolenaar STRCAT(badword, "?");
41087887d88aSBram Moolenaar if (flags & WF_REGION)
41097887d88aSBram Moolenaar for (i = 0; i < 7; ++i)
4110dfb9ac00SBram Moolenaar if (flags & (0x10000 << i))
41117887d88aSBram Moolenaar sprintf((char *)badword + STRLEN(badword), "%d", i + 1);
4112f417f2b6SBram Moolenaar p = badword;
4113f417f2b6SBram Moolenaar }
4114f417f2b6SBram Moolenaar
41154770d09aSBram Moolenaar if (dumpflags & DUMPFLAG_COUNT)
41164770d09aSBram Moolenaar {
41174770d09aSBram Moolenaar hashitem_T *hi;
41184770d09aSBram Moolenaar
41190d6f5d97SBram Moolenaar // Include the word count for ":spelldump!".
41204770d09aSBram Moolenaar hi = hash_find(&slang->sl_wordcount, tw);
41214770d09aSBram Moolenaar if (!HASHITEM_EMPTY(hi))
41224770d09aSBram Moolenaar {
41234770d09aSBram Moolenaar vim_snprintf((char *)IObuff, IOSIZE, "%s\t%d",
41244770d09aSBram Moolenaar tw, HI2WC(hi)->wc_count);
41254770d09aSBram Moolenaar p = IObuff;
41264770d09aSBram Moolenaar }
41274770d09aSBram Moolenaar }
41284770d09aSBram Moolenaar
4129f417f2b6SBram Moolenaar ml_append(lnum, p, (colnr_T)0, FALSE);
4130f417f2b6SBram Moolenaar }
4131d0131a8bSBram Moolenaar else if (((dumpflags & DUMPFLAG_ICASE)
4132d0131a8bSBram Moolenaar ? MB_STRNICMP(p, pat, STRLEN(pat)) == 0
4133d0131a8bSBram Moolenaar : STRNCMP(p, pat, STRLEN(pat)) == 0)
4134b475fb91SBram Moolenaar && ins_compl_add_infercase(p, (int)STRLEN(p),
4135d9eefe31SBram Moolenaar p_ic, NULL, *dir, FALSE) == OK)
41360d6f5d97SBram Moolenaar // if dir was BACKWARD then honor it just once
4137b475fb91SBram Moolenaar *dir = FORWARD;
4138b475fb91SBram Moolenaar }
4139f417f2b6SBram Moolenaar
4140f417f2b6SBram Moolenaar /*
4141a1ba811aSBram Moolenaar * For ":spelldump": Find matching prefixes for "word". Prepend each to
4142a1ba811aSBram Moolenaar * "word" and append a line to the buffer.
4143b475fb91SBram Moolenaar * When "lnum" is zero add insert mode completion.
4144f417f2b6SBram Moolenaar * Return the updated line number.
4145f417f2b6SBram Moolenaar */
4146f417f2b6SBram Moolenaar static linenr_T
dump_prefixes(slang_T * slang,char_u * word,char_u * pat,int * dir,int dumpflags,int flags,linenr_T startlnum)4147764b23c8SBram Moolenaar dump_prefixes(
4148764b23c8SBram Moolenaar slang_T *slang,
41490d6f5d97SBram Moolenaar char_u *word, // case-folded word
4150764b23c8SBram Moolenaar char_u *pat,
4151764b23c8SBram Moolenaar int *dir,
4152764b23c8SBram Moolenaar int dumpflags,
41530d6f5d97SBram Moolenaar int flags, // flags with prefix ID
4154764b23c8SBram Moolenaar linenr_T startlnum)
4155f417f2b6SBram Moolenaar {
4156f417f2b6SBram Moolenaar idx_T arridx[MAXWLEN];
4157f417f2b6SBram Moolenaar int curi[MAXWLEN];
4158f417f2b6SBram Moolenaar char_u prefix[MAXWLEN];
415953805d1eSBram Moolenaar char_u word_up[MAXWLEN];
416053805d1eSBram Moolenaar int has_word_up = FALSE;
4161f417f2b6SBram Moolenaar int c;
4162f417f2b6SBram Moolenaar char_u *byts;
4163f417f2b6SBram Moolenaar idx_T *idxs;
4164f417f2b6SBram Moolenaar linenr_T lnum = startlnum;
4165f417f2b6SBram Moolenaar int depth;
4166f417f2b6SBram Moolenaar int n;
4167f417f2b6SBram Moolenaar int len;
4168f417f2b6SBram Moolenaar int i;
4169f417f2b6SBram Moolenaar
41700d6f5d97SBram Moolenaar // If the word starts with a lower-case letter make the word with an
41710d6f5d97SBram Moolenaar // upper-case letter in word_up[].
417253805d1eSBram Moolenaar c = PTR2CHAR(word);
417353805d1eSBram Moolenaar if (SPELL_TOUPPER(c) != c)
417453805d1eSBram Moolenaar {
417553805d1eSBram Moolenaar onecap_copy(word, word_up, TRUE);
417653805d1eSBram Moolenaar has_word_up = TRUE;
417753805d1eSBram Moolenaar }
417853805d1eSBram Moolenaar
4179f417f2b6SBram Moolenaar byts = slang->sl_pbyts;
4180f417f2b6SBram Moolenaar idxs = slang->sl_pidxs;
41810d6f5d97SBram Moolenaar if (byts != NULL) // array not is empty
4182f417f2b6SBram Moolenaar {
4183f417f2b6SBram Moolenaar /*
4184f417f2b6SBram Moolenaar * Loop over all prefixes, building them byte-by-byte in prefix[].
4185dfb9ac00SBram Moolenaar * When at the end of a prefix check that it supports "flags".
4186f417f2b6SBram Moolenaar */
4187f417f2b6SBram Moolenaar depth = 0;
4188f417f2b6SBram Moolenaar arridx[0] = 0;
4189f417f2b6SBram Moolenaar curi[0] = 1;
4190f417f2b6SBram Moolenaar while (depth >= 0 && !got_int)
4191f417f2b6SBram Moolenaar {
4192dfb9ac00SBram Moolenaar n = arridx[depth];
4193dfb9ac00SBram Moolenaar len = byts[n];
4194dfb9ac00SBram Moolenaar if (curi[depth] > len)
4195f417f2b6SBram Moolenaar {
41960d6f5d97SBram Moolenaar // Done all bytes at this node, go up one level.
4197f417f2b6SBram Moolenaar --depth;
4198f417f2b6SBram Moolenaar line_breakcheck();
4199f417f2b6SBram Moolenaar }
4200f417f2b6SBram Moolenaar else
4201f417f2b6SBram Moolenaar {
42020d6f5d97SBram Moolenaar // Do one more byte at this node.
4203dfb9ac00SBram Moolenaar n += curi[depth];
4204f417f2b6SBram Moolenaar ++curi[depth];
4205f417f2b6SBram Moolenaar c = byts[n];
4206f417f2b6SBram Moolenaar if (c == 0)
4207f417f2b6SBram Moolenaar {
42080d6f5d97SBram Moolenaar // End of prefix, find out how many IDs there are.
4209f417f2b6SBram Moolenaar for (i = 1; i < len; ++i)
4210f417f2b6SBram Moolenaar if (byts[n + i] != 0)
4211f417f2b6SBram Moolenaar break;
4212f417f2b6SBram Moolenaar curi[depth] += i - 1;
4213f417f2b6SBram Moolenaar
421453805d1eSBram Moolenaar c = valid_word_prefix(i, n, flags, word, slang, FALSE);
421553805d1eSBram Moolenaar if (c != 0)
4216f417f2b6SBram Moolenaar {
42179c96f592SBram Moolenaar vim_strncpy(prefix + depth, word, MAXWLEN - depth - 1);
4218b475fb91SBram Moolenaar dump_word(slang, prefix, pat, dir, dumpflags,
421953805d1eSBram Moolenaar (c & WF_RAREPFX) ? (flags | WF_RARE)
4220b475fb91SBram Moolenaar : flags, lnum);
4221b475fb91SBram Moolenaar if (lnum != 0)
4222b475fb91SBram Moolenaar ++lnum;
4223f417f2b6SBram Moolenaar }
422453805d1eSBram Moolenaar
42250d6f5d97SBram Moolenaar // Check for prefix that matches the word when the
42260d6f5d97SBram Moolenaar // first letter is upper-case, but only if the prefix has
42270d6f5d97SBram Moolenaar // a condition.
422853805d1eSBram Moolenaar if (has_word_up)
422953805d1eSBram Moolenaar {
423053805d1eSBram Moolenaar c = valid_word_prefix(i, n, flags, word_up, slang,
423153805d1eSBram Moolenaar TRUE);
423253805d1eSBram Moolenaar if (c != 0)
423353805d1eSBram Moolenaar {
423453805d1eSBram Moolenaar vim_strncpy(prefix + depth, word_up,
423553805d1eSBram Moolenaar MAXWLEN - depth - 1);
4236b475fb91SBram Moolenaar dump_word(slang, prefix, pat, dir, dumpflags,
423753805d1eSBram Moolenaar (c & WF_RAREPFX) ? (flags | WF_RARE)
4238b475fb91SBram Moolenaar : flags, lnum);
4239b475fb91SBram Moolenaar if (lnum != 0)
4240b475fb91SBram Moolenaar ++lnum;
424153805d1eSBram Moolenaar }
424253805d1eSBram Moolenaar }
4243f417f2b6SBram Moolenaar }
4244f417f2b6SBram Moolenaar else
4245f417f2b6SBram Moolenaar {
42460d6f5d97SBram Moolenaar // Normal char, go one level deeper.
4247f417f2b6SBram Moolenaar prefix[depth++] = c;
4248f417f2b6SBram Moolenaar arridx[depth] = idxs[n];
4249f417f2b6SBram Moolenaar curi[depth] = 1;
4250f417f2b6SBram Moolenaar }
4251f417f2b6SBram Moolenaar }
4252f417f2b6SBram Moolenaar }
4253f417f2b6SBram Moolenaar }
4254f417f2b6SBram Moolenaar
4255f417f2b6SBram Moolenaar return lnum;
4256f417f2b6SBram Moolenaar }
4257f417f2b6SBram Moolenaar
425895529568SBram Moolenaar /*
4259a40ceaf8SBram Moolenaar * Move "p" to the end of word "start".
4260a40ceaf8SBram Moolenaar * Uses the spell-checking word characters.
426195529568SBram Moolenaar */
426295529568SBram Moolenaar char_u *
spell_to_word_end(char_u * start,win_T * win)4263764b23c8SBram Moolenaar spell_to_word_end(char_u *start, win_T *win)
426495529568SBram Moolenaar {
426595529568SBram Moolenaar char_u *p = start;
426695529568SBram Moolenaar
4267860cae1cSBram Moolenaar while (*p != NUL && spell_iswordp(p, win))
426891acfffcSBram Moolenaar MB_PTR_ADV(p);
426995529568SBram Moolenaar return p;
427095529568SBram Moolenaar }
427195529568SBram Moolenaar
42728b59de9fSBram Moolenaar /*
4273a40ceaf8SBram Moolenaar * For Insert mode completion CTRL-X s:
4274a40ceaf8SBram Moolenaar * Find start of the word in front of column "startcol".
4275a40ceaf8SBram Moolenaar * We don't check if it is badly spelled, with completion we can only change
4276a40ceaf8SBram Moolenaar * the word in front of the cursor.
42778b59de9fSBram Moolenaar * Returns the column number of the word.
42788b59de9fSBram Moolenaar */
42798b59de9fSBram Moolenaar int
spell_word_start(int startcol)4280764b23c8SBram Moolenaar spell_word_start(int startcol)
42818b59de9fSBram Moolenaar {
42828b59de9fSBram Moolenaar char_u *line;
42838b59de9fSBram Moolenaar char_u *p;
42848b59de9fSBram Moolenaar int col = 0;
42858b59de9fSBram Moolenaar
428695529568SBram Moolenaar if (no_spell_checking(curwin))
42878b59de9fSBram Moolenaar return startcol;
42888b59de9fSBram Moolenaar
42890d6f5d97SBram Moolenaar // Find a word character before "startcol".
42908b59de9fSBram Moolenaar line = ml_get_curline();
42918b59de9fSBram Moolenaar for (p = line + startcol; p > line; )
42928b59de9fSBram Moolenaar {
429391acfffcSBram Moolenaar MB_PTR_BACK(line, p);
4294cc63c647SBram Moolenaar if (spell_iswordp_nmw(p, curwin))
42958b59de9fSBram Moolenaar break;
42968b59de9fSBram Moolenaar }
42978b59de9fSBram Moolenaar
42980d6f5d97SBram Moolenaar // Go back to start of the word.
42998b59de9fSBram Moolenaar while (p > line)
43008b59de9fSBram Moolenaar {
4301a93fa7eeSBram Moolenaar col = (int)(p - line);
430291acfffcSBram Moolenaar MB_PTR_BACK(line, p);
4303860cae1cSBram Moolenaar if (!spell_iswordp(p, curwin))
43048b59de9fSBram Moolenaar break;
43058b59de9fSBram Moolenaar col = 0;
43068b59de9fSBram Moolenaar }
43078b59de9fSBram Moolenaar
43088b59de9fSBram Moolenaar return col;
43098b59de9fSBram Moolenaar }
43108b59de9fSBram Moolenaar
43118b59de9fSBram Moolenaar /*
43124effc80aSBram Moolenaar * Need to check for 'spellcapcheck' now, the word is removed before
43134effc80aSBram Moolenaar * expand_spelling() is called. Therefore the ugly global variable.
43144effc80aSBram Moolenaar */
43154effc80aSBram Moolenaar static int spell_expand_need_cap;
43164effc80aSBram Moolenaar
43174effc80aSBram Moolenaar void
spell_expand_check_cap(colnr_T col)4318764b23c8SBram Moolenaar spell_expand_check_cap(colnr_T col)
43194effc80aSBram Moolenaar {
43204effc80aSBram Moolenaar spell_expand_need_cap = check_need_cap(curwin->w_cursor.lnum, col);
43214effc80aSBram Moolenaar }
43224effc80aSBram Moolenaar
43234effc80aSBram Moolenaar /*
43248b59de9fSBram Moolenaar * Get list of spelling suggestions.
43258b59de9fSBram Moolenaar * Used for Insert mode completion CTRL-X ?.
43268b59de9fSBram Moolenaar * Returns the number of matches. The matches are in "matchp[]", array of
43278b59de9fSBram Moolenaar * allocated strings.
43288b59de9fSBram Moolenaar */
43298b59de9fSBram Moolenaar int
expand_spelling(linenr_T lnum UNUSED,char_u * pat,char_u *** matchp)4330764b23c8SBram Moolenaar expand_spelling(
4331764b23c8SBram Moolenaar linenr_T lnum UNUSED,
4332764b23c8SBram Moolenaar char_u *pat,
4333764b23c8SBram Moolenaar char_u ***matchp)
43348b59de9fSBram Moolenaar {
43358b59de9fSBram Moolenaar garray_T ga;
43368b59de9fSBram Moolenaar
43374770d09aSBram Moolenaar spell_suggest_list(&ga, pat, 100, spell_expand_need_cap, TRUE);
43388b59de9fSBram Moolenaar *matchp = ga.ga_data;
43398b59de9fSBram Moolenaar return ga.ga_len;
43408b59de9fSBram Moolenaar }
43418b59de9fSBram Moolenaar
4342e677df8dSBram Moolenaar /*
4343f154f3abSBram Moolenaar * Return TRUE if "val" is a valid 'spelllang' value.
4344e677df8dSBram Moolenaar */
4345e677df8dSBram Moolenaar int
valid_spelllang(char_u * val)4346f154f3abSBram Moolenaar valid_spelllang(char_u *val)
4347e677df8dSBram Moolenaar {
4348e677df8dSBram Moolenaar return valid_name(val, ".-_,@");
4349e677df8dSBram Moolenaar }
4350e677df8dSBram Moolenaar
4351e677df8dSBram Moolenaar /*
4352e677df8dSBram Moolenaar * Return TRUE if "val" is a valid 'spellfile' value.
4353e677df8dSBram Moolenaar */
4354e677df8dSBram Moolenaar int
valid_spellfile(char_u * val)4355e677df8dSBram Moolenaar valid_spellfile(char_u *val)
4356e677df8dSBram Moolenaar {
4357e677df8dSBram Moolenaar char_u *s;
4358e677df8dSBram Moolenaar
4359e677df8dSBram Moolenaar for (s = val; *s != NUL; ++s)
4360b2620202SBram Moolenaar if (!vim_isfilec(*s) && *s != ',' && *s != ' ')
4361e677df8dSBram Moolenaar return FALSE;
4362e677df8dSBram Moolenaar return TRUE;
4363e677df8dSBram Moolenaar }
4364e677df8dSBram Moolenaar
4365e677df8dSBram Moolenaar /*
4366e677df8dSBram Moolenaar * Handle side effects of setting 'spell'.
4367e677df8dSBram Moolenaar * Return an error message or NULL for success.
4368e677df8dSBram Moolenaar */
4369e677df8dSBram Moolenaar char *
did_set_spell_option(int is_spellfile)4370e677df8dSBram Moolenaar did_set_spell_option(int is_spellfile)
4371e677df8dSBram Moolenaar {
4372e677df8dSBram Moolenaar char *errmsg = NULL;
4373e677df8dSBram Moolenaar win_T *wp;
4374e677df8dSBram Moolenaar int l;
4375e677df8dSBram Moolenaar
4376e677df8dSBram Moolenaar if (is_spellfile)
4377e677df8dSBram Moolenaar {
4378e677df8dSBram Moolenaar l = (int)STRLEN(curwin->w_s->b_p_spf);
4379e677df8dSBram Moolenaar if (l > 0 && (l < 4
4380e677df8dSBram Moolenaar || STRCMP(curwin->w_s->b_p_spf + l - 4, ".add") != 0))
4381e677df8dSBram Moolenaar errmsg = e_invarg;
4382e677df8dSBram Moolenaar }
4383e677df8dSBram Moolenaar
4384e677df8dSBram Moolenaar if (errmsg == NULL)
4385e677df8dSBram Moolenaar {
4386e677df8dSBram Moolenaar FOR_ALL_WINDOWS(wp)
4387e677df8dSBram Moolenaar if (wp->w_buffer == curbuf && wp->w_p_spell)
4388e677df8dSBram Moolenaar {
4389e677df8dSBram Moolenaar errmsg = did_set_spelllang(wp);
4390e677df8dSBram Moolenaar break;
4391e677df8dSBram Moolenaar }
4392e677df8dSBram Moolenaar }
4393e677df8dSBram Moolenaar return errmsg;
4394e677df8dSBram Moolenaar }
4395e677df8dSBram Moolenaar
4396e677df8dSBram Moolenaar /*
4397e677df8dSBram Moolenaar * Set curbuf->b_cap_prog to the regexp program for 'spellcapcheck'.
4398e677df8dSBram Moolenaar * Return error message when failed, NULL when OK.
4399e677df8dSBram Moolenaar */
4400e677df8dSBram Moolenaar char *
compile_cap_prog(synblock_T * synblock)4401e677df8dSBram Moolenaar compile_cap_prog(synblock_T *synblock)
4402e677df8dSBram Moolenaar {
4403e677df8dSBram Moolenaar regprog_T *rp = synblock->b_cap_prog;
4404e677df8dSBram Moolenaar char_u *re;
4405e677df8dSBram Moolenaar
440653efb185SBram Moolenaar if (synblock->b_p_spc == NULL || *synblock->b_p_spc == NUL)
4407e677df8dSBram Moolenaar synblock->b_cap_prog = NULL;
4408e677df8dSBram Moolenaar else
4409e677df8dSBram Moolenaar {
4410e677df8dSBram Moolenaar // Prepend a ^ so that we only match at one column
4411e677df8dSBram Moolenaar re = concat_str((char_u *)"^", synblock->b_p_spc);
4412e677df8dSBram Moolenaar if (re != NULL)
4413e677df8dSBram Moolenaar {
4414e677df8dSBram Moolenaar synblock->b_cap_prog = vim_regcomp(re, RE_MAGIC);
4415e677df8dSBram Moolenaar vim_free(re);
4416e677df8dSBram Moolenaar if (synblock->b_cap_prog == NULL)
4417e677df8dSBram Moolenaar {
4418e677df8dSBram Moolenaar synblock->b_cap_prog = rp; // restore the previous program
4419e677df8dSBram Moolenaar return e_invarg;
4420e677df8dSBram Moolenaar }
4421e677df8dSBram Moolenaar }
4422e677df8dSBram Moolenaar }
4423e677df8dSBram Moolenaar
4424e677df8dSBram Moolenaar vim_regfree(rp);
4425e677df8dSBram Moolenaar return NULL;
4426e677df8dSBram Moolenaar }
4427e677df8dSBram Moolenaar
4428e677df8dSBram Moolenaar #endif // FEAT_SPELL
4429