1 /* vi:set ts=8 sts=4 sw=4 noet: 2 * 3 * VIM - Vi IMproved by Bram Moolenaar 4 * 5 * Do ":help uganda" in Vim to read copying and usage conditions. 6 * Do ":help credits" in Vim to see a list of people who contributed. 7 * See README.txt for an overview of the Vim source code. 8 */ 9 10 /* 11 * spell.c: code for spell checking 12 * 13 * See spellfile.c for the Vim spell file format. 14 * 15 * The spell checking mechanism uses a tree (aka trie). Each node in the tree 16 * has a list of bytes that can appear (siblings). For each byte there is a 17 * pointer to the node with the byte that follows in the word (child). 18 * 19 * A NUL byte is used where the word may end. The bytes are sorted, so that 20 * binary searching can be used and the NUL bytes are at the start. The 21 * number of possible bytes is stored before the list of bytes. 22 * 23 * The tree uses two arrays: "byts" stores the characters, "idxs" stores 24 * either the next index or flags. The tree starts at index 0. For example, 25 * to lookup "vi" this sequence is followed: 26 * i = 0 27 * len = byts[i] 28 * n = where "v" appears in byts[i + 1] to byts[i + len] 29 * i = idxs[n] 30 * len = byts[i] 31 * n = where "i" appears in byts[i + 1] to byts[i + len] 32 * i = idxs[n] 33 * len = byts[i] 34 * find that byts[i + 1] is 0, idxs[i + 1] has flags for "vi". 35 * 36 * There are two word trees: one with case-folded words and one with words in 37 * original case. The second one is only used for keep-case words and is 38 * usually small. 39 * 40 * There is one additional tree for when not all prefixes are applied when 41 * generating the .spl file. This tree stores all the possible prefixes, as 42 * if they were words. At each word (prefix) end the prefix nr is stored, the 43 * following word must support this prefix nr. And the condition nr is 44 * stored, used to lookup the condition that the word must match with. 45 * 46 * Thanks to Olaf Seibert for providing an example implementation of this tree 47 * and the compression mechanism. 48 * LZ trie ideas: 49 * http://www.irb.hr/hr/home/ristov/papers/RistovLZtrieRevision1.pdf 50 * More papers: http://www-igm.univ-mlv.fr/~laporte/publi_en.html 51 * 52 * Matching involves checking the caps type: Onecap ALLCAP KeepCap. 53 * 54 * Why doesn't Vim use aspell/ispell/myspell/etc.? 55 * See ":help develop-spell". 56 */ 57 58 /* 59 * Use this to adjust the score after finding suggestions, based on the 60 * suggested word sounding like the bad word. This is much faster than doing 61 * it for every possible suggestion. 62 * Disadvantage: When "the" is typed as "hte" it sounds quite different ("@" 63 * vs "ht") and goes down in the list. 64 * Used when 'spellsuggest' is set to "best". 65 */ 66 #define RESCORE(word_score, sound_score) ((3 * word_score + sound_score) / 4) 67 68 /* 69 * Do the opposite: based on a maximum end score and a known sound score, 70 * compute the maximum word score that can be used. 71 */ 72 #define MAXSCORE(word_score, sound_score) ((4 * word_score - sound_score) / 3) 73 74 #define IN_SPELL_C 75 #include "vim.h" 76 77 #if defined(FEAT_SPELL) || defined(PROTO) 78 79 #ifndef UNIX /* it's in os_unix.h for Unix */ 80 # include <time.h> /* for time_t */ 81 #endif 82 83 /* only used for su_badflags */ 84 #define WF_MIXCAP 0x20 /* mix of upper and lower case: macaRONI */ 85 86 #define WF_CAPMASK (WF_ONECAP | WF_ALLCAP | WF_KEEPCAP | WF_FIXCAP) 87 88 #define REGION_ALL 0xff /* word valid in all regions */ 89 90 #define VIMSUGMAGIC "VIMsug" /* string at start of Vim .sug file */ 91 #define VIMSUGMAGICL 6 92 #define VIMSUGVERSION 1 93 94 /* Result values. Lower number is accepted over higher one. */ 95 #define SP_BANNED -1 96 #define SP_OK 0 97 #define SP_RARE 1 98 #define SP_LOCAL 2 99 #define SP_BAD 3 100 101 typedef struct wordcount_S 102 { 103 short_u wc_count; /* nr of times word was seen */ 104 char_u wc_word[1]; /* word, actually longer */ 105 } wordcount_T; 106 107 #define WC_KEY_OFF offsetof(wordcount_T, wc_word) 108 #define HI2WC(hi) ((wordcount_T *)((hi)->hi_key - WC_KEY_OFF)) 109 #define MAXWORDCOUNT 0xffff 110 111 /* 112 * Information used when looking for suggestions. 113 */ 114 typedef struct suginfo_S 115 { 116 garray_T su_ga; /* suggestions, contains "suggest_T" */ 117 int su_maxcount; /* max. number of suggestions displayed */ 118 int su_maxscore; /* maximum score for adding to su_ga */ 119 int su_sfmaxscore; /* idem, for when doing soundfold words */ 120 garray_T su_sga; /* like su_ga, sound-folded scoring */ 121 char_u *su_badptr; /* start of bad word in line */ 122 int su_badlen; /* length of detected bad word in line */ 123 int su_badflags; /* caps flags for bad word */ 124 char_u su_badword[MAXWLEN]; /* bad word truncated at su_badlen */ 125 char_u su_fbadword[MAXWLEN]; /* su_badword case-folded */ 126 char_u su_sal_badword[MAXWLEN]; /* su_badword soundfolded */ 127 hashtab_T su_banned; /* table with banned words */ 128 slang_T *su_sallang; /* default language for sound folding */ 129 } suginfo_T; 130 131 /* One word suggestion. Used in "si_ga". */ 132 typedef struct suggest_S 133 { 134 char_u *st_word; /* suggested word, allocated string */ 135 int st_wordlen; /* STRLEN(st_word) */ 136 int st_orglen; /* length of replaced text */ 137 int st_score; /* lower is better */ 138 int st_altscore; /* used when st_score compares equal */ 139 int st_salscore; /* st_score is for soundalike */ 140 int st_had_bonus; /* bonus already included in score */ 141 slang_T *st_slang; /* language used for sound folding */ 142 } suggest_T; 143 144 #define SUG(ga, i) (((suggest_T *)(ga).ga_data)[i]) 145 146 /* TRUE if a word appears in the list of banned words. */ 147 #define WAS_BANNED(su, word) (!HASHITEM_EMPTY(hash_find(&su->su_banned, word))) 148 149 /* Number of suggestions kept when cleaning up. We need to keep more than 150 * what is displayed, because when rescore_suggestions() is called the score 151 * may change and wrong suggestions may be removed later. */ 152 #define SUG_CLEAN_COUNT(su) ((su)->su_maxcount < 130 ? 150 : (su)->su_maxcount + 20) 153 154 /* Threshold for sorting and cleaning up suggestions. Don't want to keep lots 155 * of suggestions that are not going to be displayed. */ 156 #define SUG_MAX_COUNT(su) (SUG_CLEAN_COUNT(su) + 50) 157 158 /* score for various changes */ 159 #define SCORE_SPLIT 149 /* split bad word */ 160 #define SCORE_SPLIT_NO 249 /* split bad word with NOSPLITSUGS */ 161 #define SCORE_ICASE 52 /* slightly different case */ 162 #define SCORE_REGION 200 /* word is for different region */ 163 #define SCORE_RARE 180 /* rare word */ 164 #define SCORE_SWAP 75 /* swap two characters */ 165 #define SCORE_SWAP3 110 /* swap two characters in three */ 166 #define SCORE_REP 65 /* REP replacement */ 167 #define SCORE_SUBST 93 /* substitute a character */ 168 #define SCORE_SIMILAR 33 /* substitute a similar character */ 169 #define SCORE_SUBCOMP 33 /* substitute a composing character */ 170 #define SCORE_DEL 94 /* delete a character */ 171 #define SCORE_DELDUP 66 /* delete a duplicated character */ 172 #define SCORE_DELCOMP 28 /* delete a composing character */ 173 #define SCORE_INS 96 /* insert a character */ 174 #define SCORE_INSDUP 67 /* insert a duplicate character */ 175 #define SCORE_INSCOMP 30 /* insert a composing character */ 176 #define SCORE_NONWORD 103 /* change non-word to word char */ 177 178 #define SCORE_FILE 30 /* suggestion from a file */ 179 #define SCORE_MAXINIT 350 /* Initial maximum score: higher == slower. 180 * 350 allows for about three changes. */ 181 182 #define SCORE_COMMON1 30 /* subtracted for words seen before */ 183 #define SCORE_COMMON2 40 /* subtracted for words often seen */ 184 #define SCORE_COMMON3 50 /* subtracted for words very often seen */ 185 #define SCORE_THRES2 10 /* word count threshold for COMMON2 */ 186 #define SCORE_THRES3 100 /* word count threshold for COMMON3 */ 187 188 /* When trying changed soundfold words it becomes slow when trying more than 189 * two changes. With less then two changes it's slightly faster but we miss a 190 * few good suggestions. In rare cases we need to try three of four changes. 191 */ 192 #define SCORE_SFMAX1 200 /* maximum score for first try */ 193 #define SCORE_SFMAX2 300 /* maximum score for second try */ 194 #define SCORE_SFMAX3 400 /* maximum score for third try */ 195 196 #define SCORE_BIG SCORE_INS * 3 /* big difference */ 197 #define SCORE_MAXMAX 999999 /* accept any score */ 198 #define SCORE_LIMITMAX 350 /* for spell_edit_score_limit() */ 199 200 /* for spell_edit_score_limit() we need to know the minimum value of 201 * SCORE_ICASE, SCORE_SWAP, SCORE_DEL, SCORE_SIMILAR and SCORE_INS */ 202 #define SCORE_EDIT_MIN SCORE_SIMILAR 203 204 /* 205 * Structure to store info for word matching. 206 */ 207 typedef struct matchinf_S 208 { 209 langp_T *mi_lp; /* info for language and region */ 210 211 /* pointers to original text to be checked */ 212 char_u *mi_word; /* start of word being checked */ 213 char_u *mi_end; /* end of matching word so far */ 214 char_u *mi_fend; /* next char to be added to mi_fword */ 215 char_u *mi_cend; /* char after what was used for 216 mi_capflags */ 217 218 /* case-folded text */ 219 char_u mi_fword[MAXWLEN + 1]; /* mi_word case-folded */ 220 int mi_fwordlen; /* nr of valid bytes in mi_fword */ 221 222 /* for when checking word after a prefix */ 223 int mi_prefarridx; /* index in sl_pidxs with list of 224 affixID/condition */ 225 int mi_prefcnt; /* number of entries at mi_prefarridx */ 226 int mi_prefixlen; /* byte length of prefix */ 227 #ifdef FEAT_MBYTE 228 int mi_cprefixlen; /* byte length of prefix in original 229 case */ 230 #else 231 # define mi_cprefixlen mi_prefixlen /* it's the same value */ 232 #endif 233 234 /* for when checking a compound word */ 235 int mi_compoff; /* start of following word offset */ 236 char_u mi_compflags[MAXWLEN]; /* flags for compound words used */ 237 int mi_complen; /* nr of compound words used */ 238 int mi_compextra; /* nr of COMPOUNDROOT words */ 239 240 /* others */ 241 int mi_result; /* result so far: SP_BAD, SP_OK, etc. */ 242 int mi_capflags; /* WF_ONECAP WF_ALLCAP WF_KEEPCAP */ 243 win_T *mi_win; /* buffer being checked */ 244 245 /* for NOBREAK */ 246 int mi_result2; /* "mi_resul" without following word */ 247 char_u *mi_end2; /* "mi_end" without following word */ 248 } matchinf_T; 249 250 251 static int spell_iswordp(char_u *p, win_T *wp); 252 #ifdef FEAT_MBYTE 253 static int spell_mb_isword_class(int cl, win_T *wp); 254 static int spell_iswordp_w(int *p, win_T *wp); 255 #endif 256 257 /* 258 * For finding suggestions: At each node in the tree these states are tried: 259 */ 260 typedef enum 261 { 262 STATE_START = 0, /* At start of node check for NUL bytes (goodword 263 * ends); if badword ends there is a match, otherwise 264 * try splitting word. */ 265 STATE_NOPREFIX, /* try without prefix */ 266 STATE_SPLITUNDO, /* Undo splitting. */ 267 STATE_ENDNUL, /* Past NUL bytes at start of the node. */ 268 STATE_PLAIN, /* Use each byte of the node. */ 269 STATE_DEL, /* Delete a byte from the bad word. */ 270 STATE_INS_PREP, /* Prepare for inserting bytes. */ 271 STATE_INS, /* Insert a byte in the bad word. */ 272 STATE_SWAP, /* Swap two bytes. */ 273 STATE_UNSWAP, /* Undo swap two characters. */ 274 STATE_SWAP3, /* Swap two characters over three. */ 275 STATE_UNSWAP3, /* Undo Swap two characters over three. */ 276 STATE_UNROT3L, /* Undo rotate three characters left */ 277 STATE_UNROT3R, /* Undo rotate three characters right */ 278 STATE_REP_INI, /* Prepare for using REP items. */ 279 STATE_REP, /* Use matching REP items from the .aff file. */ 280 STATE_REP_UNDO, /* Undo a REP item replacement. */ 281 STATE_FINAL /* End of this node. */ 282 } state_T; 283 284 /* 285 * Struct to keep the state at each level in suggest_try_change(). 286 */ 287 typedef struct trystate_S 288 { 289 state_T ts_state; /* state at this level, STATE_ */ 290 int ts_score; /* score */ 291 idx_T ts_arridx; /* index in tree array, start of node */ 292 short ts_curi; /* index in list of child nodes */ 293 char_u ts_fidx; /* index in fword[], case-folded bad word */ 294 char_u ts_fidxtry; /* ts_fidx at which bytes may be changed */ 295 char_u ts_twordlen; /* valid length of tword[] */ 296 char_u ts_prefixdepth; /* stack depth for end of prefix or 297 * PFD_PREFIXTREE or PFD_NOPREFIX */ 298 char_u ts_flags; /* TSF_ flags */ 299 #ifdef FEAT_MBYTE 300 char_u ts_tcharlen; /* number of bytes in tword character */ 301 char_u ts_tcharidx; /* current byte index in tword character */ 302 char_u ts_isdiff; /* DIFF_ values */ 303 char_u ts_fcharstart; /* index in fword where badword char started */ 304 #endif 305 char_u ts_prewordlen; /* length of word in "preword[]" */ 306 char_u ts_splitoff; /* index in "tword" after last split */ 307 char_u ts_splitfidx; /* "ts_fidx" at word split */ 308 char_u ts_complen; /* nr of compound words used */ 309 char_u ts_compsplit; /* index for "compflags" where word was spit */ 310 char_u ts_save_badflags; /* su_badflags saved here */ 311 char_u ts_delidx; /* index in fword for char that was deleted, 312 valid when "ts_flags" has TSF_DIDDEL */ 313 } trystate_T; 314 315 /* values for ts_isdiff */ 316 #define DIFF_NONE 0 /* no different byte (yet) */ 317 #define DIFF_YES 1 /* different byte found */ 318 #define DIFF_INSERT 2 /* inserting character */ 319 320 /* values for ts_flags */ 321 #define TSF_PREFIXOK 1 /* already checked that prefix is OK */ 322 #define TSF_DIDSPLIT 2 /* tried split at this point */ 323 #define TSF_DIDDEL 4 /* did a delete, "ts_delidx" has index */ 324 325 /* special values ts_prefixdepth */ 326 #define PFD_NOPREFIX 0xff /* not using prefixes */ 327 #define PFD_PREFIXTREE 0xfe /* walking through the prefix tree */ 328 #define PFD_NOTSPECIAL 0xfd /* highest value that's not special */ 329 330 /* mode values for find_word */ 331 #define FIND_FOLDWORD 0 /* find word case-folded */ 332 #define FIND_KEEPWORD 1 /* find keep-case word */ 333 #define FIND_PREFIX 2 /* find word after prefix */ 334 #define FIND_COMPOUND 3 /* find case-folded compound word */ 335 #define FIND_KEEPCOMPOUND 4 /* find keep-case compound word */ 336 337 static void find_word(matchinf_T *mip, int mode); 338 static int match_checkcompoundpattern(char_u *ptr, int wlen, garray_T *gap); 339 static int can_compound(slang_T *slang, char_u *word, char_u *flags); 340 static int can_be_compound(trystate_T *sp, slang_T *slang, char_u *compflags, int flag); 341 static int match_compoundrule(slang_T *slang, char_u *compflags); 342 static int valid_word_prefix(int totprefcnt, int arridx, int flags, char_u *word, slang_T *slang, int cond_req); 343 static void find_prefix(matchinf_T *mip, int mode); 344 static int fold_more(matchinf_T *mip); 345 static int spell_valid_case(int wordflags, int treeflags); 346 static int no_spell_checking(win_T *wp); 347 static void spell_load_lang(char_u *lang); 348 static void int_wordlist_spl(char_u *fname); 349 static void spell_load_cb(char_u *fname, void *cookie); 350 static int score_wordcount_adj(slang_T *slang, int score, char_u *word, int split); 351 static int count_syllables(slang_T *slang, char_u *word); 352 static void clear_midword(win_T *buf); 353 static void use_midword(slang_T *lp, win_T *buf); 354 static int find_region(char_u *rp, char_u *region); 355 static int badword_captype(char_u *word, char_u *end); 356 static int check_need_cap(linenr_T lnum, colnr_T col); 357 static void spell_find_suggest(char_u *badptr, int badlen, suginfo_T *su, int maxcount, int banbadword, int need_cap, int interactive); 358 #ifdef FEAT_EVAL 359 static void spell_suggest_expr(suginfo_T *su, char_u *expr); 360 #endif 361 static void spell_suggest_file(suginfo_T *su, char_u *fname); 362 static void spell_suggest_intern(suginfo_T *su, int interactive); 363 static void spell_find_cleanup(suginfo_T *su); 364 static void allcap_copy(char_u *word, char_u *wcopy); 365 static void suggest_try_special(suginfo_T *su); 366 static void suggest_try_change(suginfo_T *su); 367 static void suggest_trie_walk(suginfo_T *su, langp_T *lp, char_u *fword, int soundfold); 368 static void go_deeper(trystate_T *stack, int depth, int score_add); 369 #ifdef FEAT_MBYTE 370 static int nofold_len(char_u *fword, int flen, char_u *word); 371 #endif 372 static void find_keepcap_word(slang_T *slang, char_u *fword, char_u *kword); 373 static void score_comp_sal(suginfo_T *su); 374 static void score_combine(suginfo_T *su); 375 static int stp_sal_score(suggest_T *stp, suginfo_T *su, slang_T *slang, char_u *badsound); 376 static void suggest_try_soundalike_prep(void); 377 static void suggest_try_soundalike(suginfo_T *su); 378 static void suggest_try_soundalike_finish(void); 379 static void add_sound_suggest(suginfo_T *su, char_u *goodword, int score, langp_T *lp); 380 static int soundfold_find(slang_T *slang, char_u *word); 381 static void make_case_word(char_u *fword, char_u *cword, int flags); 382 static int similar_chars(slang_T *slang, int c1, int c2); 383 static void add_suggestion(suginfo_T *su, garray_T *gap, char_u *goodword, int badlen, int score, int altscore, int had_bonus, slang_T *slang, int maxsf); 384 static void check_suggestions(suginfo_T *su, garray_T *gap); 385 static void add_banned(suginfo_T *su, char_u *word); 386 static void rescore_suggestions(suginfo_T *su); 387 static void rescore_one(suginfo_T *su, suggest_T *stp); 388 static int cleanup_suggestions(garray_T *gap, int maxscore, int keep); 389 static void spell_soundfold_sofo(slang_T *slang, char_u *inword, char_u *res); 390 static void spell_soundfold_sal(slang_T *slang, char_u *inword, char_u *res); 391 #ifdef FEAT_MBYTE 392 static void spell_soundfold_wsal(slang_T *slang, char_u *inword, char_u *res); 393 #endif 394 static int soundalike_score(char_u *goodsound, char_u *badsound); 395 static int spell_edit_score(slang_T *slang, char_u *badword, char_u *goodword); 396 static int spell_edit_score_limit(slang_T *slang, char_u *badword, char_u *goodword, int limit); 397 #ifdef FEAT_MBYTE 398 static int spell_edit_score_limit_w(slang_T *slang, char_u *badword, char_u *goodword, int limit); 399 #endif 400 static void dump_word(slang_T *slang, char_u *word, char_u *pat, int *dir, int round, int flags, linenr_T lnum); 401 static linenr_T dump_prefixes(slang_T *slang, char_u *word, char_u *pat, int *dir, int round, int flags, linenr_T startlnum); 402 403 404 /* Remember what "z?" replaced. */ 405 static char_u *repl_from = NULL; 406 static char_u *repl_to = NULL; 407 408 /* 409 * Main spell-checking function. 410 * "ptr" points to a character that could be the start of a word. 411 * "*attrp" is set to the highlight index for a badly spelled word. For a 412 * non-word or when it's OK it remains unchanged. 413 * This must only be called when 'spelllang' is not empty. 414 * 415 * "capcol" is used to check for a Capitalised word after the end of a 416 * sentence. If it's zero then perform the check. Return the column where to 417 * check next, or -1 when no sentence end was found. If it's NULL then don't 418 * worry. 419 * 420 * Returns the length of the word in bytes, also when it's OK, so that the 421 * caller can skip over the word. 422 */ 423 int 424 spell_check( 425 win_T *wp, /* current window */ 426 char_u *ptr, 427 hlf_T *attrp, 428 int *capcol, /* column to check for Capital */ 429 int docount) /* count good words */ 430 { 431 matchinf_T mi; /* Most things are put in "mi" so that it can 432 be passed to functions quickly. */ 433 int nrlen = 0; /* found a number first */ 434 int c; 435 int wrongcaplen = 0; 436 int lpi; 437 int count_word = docount; 438 439 /* A word never starts at a space or a control character. Return quickly 440 * then, skipping over the character. */ 441 if (*ptr <= ' ') 442 return 1; 443 444 /* Return here when loading language files failed. */ 445 if (wp->w_s->b_langp.ga_len == 0) 446 return 1; 447 448 vim_memset(&mi, 0, sizeof(matchinf_T)); 449 450 /* A number is always OK. Also skip hexadecimal numbers 0xFF99 and 451 * 0X99FF. But always do check spelling to find "3GPP" and "11 452 * julifeest". */ 453 if (*ptr >= '0' && *ptr <= '9') 454 { 455 if (*ptr == '0' && (ptr[1] == 'b' || ptr[1] == 'B')) 456 mi.mi_end = skipbin(ptr + 2); 457 else if (*ptr == '0' && (ptr[1] == 'x' || ptr[1] == 'X')) 458 mi.mi_end = skiphex(ptr + 2); 459 else 460 mi.mi_end = skipdigits(ptr); 461 nrlen = (int)(mi.mi_end - ptr); 462 } 463 464 /* Find the normal end of the word (until the next non-word character). */ 465 mi.mi_word = ptr; 466 mi.mi_fend = ptr; 467 if (spell_iswordp(mi.mi_fend, wp)) 468 { 469 do 470 { 471 MB_PTR_ADV(mi.mi_fend); 472 } while (*mi.mi_fend != NUL && spell_iswordp(mi.mi_fend, wp)); 473 474 if (capcol != NULL && *capcol == 0 && wp->w_s->b_cap_prog != NULL) 475 { 476 /* Check word starting with capital letter. */ 477 c = PTR2CHAR(ptr); 478 if (!SPELL_ISUPPER(c)) 479 wrongcaplen = (int)(mi.mi_fend - ptr); 480 } 481 } 482 if (capcol != NULL) 483 *capcol = -1; 484 485 /* We always use the characters up to the next non-word character, 486 * also for bad words. */ 487 mi.mi_end = mi.mi_fend; 488 489 /* Check caps type later. */ 490 mi.mi_capflags = 0; 491 mi.mi_cend = NULL; 492 mi.mi_win = wp; 493 494 /* case-fold the word with one non-word character, so that we can check 495 * for the word end. */ 496 if (*mi.mi_fend != NUL) 497 MB_PTR_ADV(mi.mi_fend); 498 499 (void)spell_casefold(ptr, (int)(mi.mi_fend - ptr), mi.mi_fword, 500 MAXWLEN + 1); 501 mi.mi_fwordlen = (int)STRLEN(mi.mi_fword); 502 503 /* The word is bad unless we recognize it. */ 504 mi.mi_result = SP_BAD; 505 mi.mi_result2 = SP_BAD; 506 507 /* 508 * Loop over the languages specified in 'spelllang'. 509 * We check them all, because a word may be matched longer in another 510 * language. 511 */ 512 for (lpi = 0; lpi < wp->w_s->b_langp.ga_len; ++lpi) 513 { 514 mi.mi_lp = LANGP_ENTRY(wp->w_s->b_langp, lpi); 515 516 /* If reloading fails the language is still in the list but everything 517 * has been cleared. */ 518 if (mi.mi_lp->lp_slang->sl_fidxs == NULL) 519 continue; 520 521 /* Check for a matching word in case-folded words. */ 522 find_word(&mi, FIND_FOLDWORD); 523 524 /* Check for a matching word in keep-case words. */ 525 find_word(&mi, FIND_KEEPWORD); 526 527 /* Check for matching prefixes. */ 528 find_prefix(&mi, FIND_FOLDWORD); 529 530 /* For a NOBREAK language, may want to use a word without a following 531 * word as a backup. */ 532 if (mi.mi_lp->lp_slang->sl_nobreak && mi.mi_result == SP_BAD 533 && mi.mi_result2 != SP_BAD) 534 { 535 mi.mi_result = mi.mi_result2; 536 mi.mi_end = mi.mi_end2; 537 } 538 539 /* Count the word in the first language where it's found to be OK. */ 540 if (count_word && mi.mi_result == SP_OK) 541 { 542 count_common_word(mi.mi_lp->lp_slang, ptr, 543 (int)(mi.mi_end - ptr), 1); 544 count_word = FALSE; 545 } 546 } 547 548 if (mi.mi_result != SP_OK) 549 { 550 /* If we found a number skip over it. Allows for "42nd". Do flag 551 * rare and local words, e.g., "3GPP". */ 552 if (nrlen > 0) 553 { 554 if (mi.mi_result == SP_BAD || mi.mi_result == SP_BANNED) 555 return nrlen; 556 } 557 558 /* When we are at a non-word character there is no error, just 559 * skip over the character (try looking for a word after it). */ 560 else if (!spell_iswordp_nmw(ptr, wp)) 561 { 562 if (capcol != NULL && wp->w_s->b_cap_prog != NULL) 563 { 564 regmatch_T regmatch; 565 int r; 566 567 /* Check for end of sentence. */ 568 regmatch.regprog = wp->w_s->b_cap_prog; 569 regmatch.rm_ic = FALSE; 570 r = vim_regexec(®match, ptr, 0); 571 wp->w_s->b_cap_prog = regmatch.regprog; 572 if (r) 573 *capcol = (int)(regmatch.endp[0] - ptr); 574 } 575 576 #ifdef FEAT_MBYTE 577 if (has_mbyte) 578 return (*mb_ptr2len)(ptr); 579 #endif 580 return 1; 581 } 582 else if (mi.mi_end == ptr) 583 /* Always include at least one character. Required for when there 584 * is a mixup in "midword". */ 585 MB_PTR_ADV(mi.mi_end); 586 else if (mi.mi_result == SP_BAD 587 && LANGP_ENTRY(wp->w_s->b_langp, 0)->lp_slang->sl_nobreak) 588 { 589 char_u *p, *fp; 590 int save_result = mi.mi_result; 591 592 /* First language in 'spelllang' is NOBREAK. Find first position 593 * at which any word would be valid. */ 594 mi.mi_lp = LANGP_ENTRY(wp->w_s->b_langp, 0); 595 if (mi.mi_lp->lp_slang->sl_fidxs != NULL) 596 { 597 p = mi.mi_word; 598 fp = mi.mi_fword; 599 for (;;) 600 { 601 MB_PTR_ADV(p); 602 MB_PTR_ADV(fp); 603 if (p >= mi.mi_end) 604 break; 605 mi.mi_compoff = (int)(fp - mi.mi_fword); 606 find_word(&mi, FIND_COMPOUND); 607 if (mi.mi_result != SP_BAD) 608 { 609 mi.mi_end = p; 610 break; 611 } 612 } 613 mi.mi_result = save_result; 614 } 615 } 616 617 if (mi.mi_result == SP_BAD || mi.mi_result == SP_BANNED) 618 *attrp = HLF_SPB; 619 else if (mi.mi_result == SP_RARE) 620 *attrp = HLF_SPR; 621 else 622 *attrp = HLF_SPL; 623 } 624 625 if (wrongcaplen > 0 && (mi.mi_result == SP_OK || mi.mi_result == SP_RARE)) 626 { 627 /* Report SpellCap only when the word isn't badly spelled. */ 628 *attrp = HLF_SPC; 629 return wrongcaplen; 630 } 631 632 return (int)(mi.mi_end - ptr); 633 } 634 635 /* 636 * Check if the word at "mip->mi_word" is in the tree. 637 * When "mode" is FIND_FOLDWORD check in fold-case word tree. 638 * When "mode" is FIND_KEEPWORD check in keep-case word tree. 639 * When "mode" is FIND_PREFIX check for word after prefix in fold-case word 640 * tree. 641 * 642 * For a match mip->mi_result is updated. 643 */ 644 static void 645 find_word(matchinf_T *mip, int mode) 646 { 647 idx_T arridx = 0; 648 int endlen[MAXWLEN]; /* length at possible word endings */ 649 idx_T endidx[MAXWLEN]; /* possible word endings */ 650 int endidxcnt = 0; 651 int len; 652 int wlen = 0; 653 int flen; 654 int c; 655 char_u *ptr; 656 idx_T lo, hi, m; 657 #ifdef FEAT_MBYTE 658 char_u *s; 659 #endif 660 char_u *p; 661 int res = SP_BAD; 662 slang_T *slang = mip->mi_lp->lp_slang; 663 unsigned flags; 664 char_u *byts; 665 idx_T *idxs; 666 int word_ends; 667 int prefix_found; 668 int nobreak_result; 669 670 if (mode == FIND_KEEPWORD || mode == FIND_KEEPCOMPOUND) 671 { 672 /* Check for word with matching case in keep-case tree. */ 673 ptr = mip->mi_word; 674 flen = 9999; /* no case folding, always enough bytes */ 675 byts = slang->sl_kbyts; 676 idxs = slang->sl_kidxs; 677 678 if (mode == FIND_KEEPCOMPOUND) 679 /* Skip over the previously found word(s). */ 680 wlen += mip->mi_compoff; 681 } 682 else 683 { 684 /* Check for case-folded in case-folded tree. */ 685 ptr = mip->mi_fword; 686 flen = mip->mi_fwordlen; /* available case-folded bytes */ 687 byts = slang->sl_fbyts; 688 idxs = slang->sl_fidxs; 689 690 if (mode == FIND_PREFIX) 691 { 692 /* Skip over the prefix. */ 693 wlen = mip->mi_prefixlen; 694 flen -= mip->mi_prefixlen; 695 } 696 else if (mode == FIND_COMPOUND) 697 { 698 /* Skip over the previously found word(s). */ 699 wlen = mip->mi_compoff; 700 flen -= mip->mi_compoff; 701 } 702 703 } 704 705 if (byts == NULL) 706 return; /* array is empty */ 707 708 /* 709 * Repeat advancing in the tree until: 710 * - there is a byte that doesn't match, 711 * - we reach the end of the tree, 712 * - or we reach the end of the line. 713 */ 714 for (;;) 715 { 716 if (flen <= 0 && *mip->mi_fend != NUL) 717 flen = fold_more(mip); 718 719 len = byts[arridx++]; 720 721 /* If the first possible byte is a zero the word could end here. 722 * Remember this index, we first check for the longest word. */ 723 if (byts[arridx] == 0) 724 { 725 if (endidxcnt == MAXWLEN) 726 { 727 /* Must be a corrupted spell file. */ 728 EMSG(_(e_format)); 729 return; 730 } 731 endlen[endidxcnt] = wlen; 732 endidx[endidxcnt++] = arridx++; 733 --len; 734 735 /* Skip over the zeros, there can be several flag/region 736 * combinations. */ 737 while (len > 0 && byts[arridx] == 0) 738 { 739 ++arridx; 740 --len; 741 } 742 if (len == 0) 743 break; /* no children, word must end here */ 744 } 745 746 /* Stop looking at end of the line. */ 747 if (ptr[wlen] == NUL) 748 break; 749 750 /* Perform a binary search in the list of accepted bytes. */ 751 c = ptr[wlen]; 752 if (c == TAB) /* <Tab> is handled like <Space> */ 753 c = ' '; 754 lo = arridx; 755 hi = arridx + len - 1; 756 while (lo < hi) 757 { 758 m = (lo + hi) / 2; 759 if (byts[m] > c) 760 hi = m - 1; 761 else if (byts[m] < c) 762 lo = m + 1; 763 else 764 { 765 lo = hi = m; 766 break; 767 } 768 } 769 770 /* Stop if there is no matching byte. */ 771 if (hi < lo || byts[lo] != c) 772 break; 773 774 /* Continue at the child (if there is one). */ 775 arridx = idxs[lo]; 776 ++wlen; 777 --flen; 778 779 /* One space in the good word may stand for several spaces in the 780 * checked word. */ 781 if (c == ' ') 782 { 783 for (;;) 784 { 785 if (flen <= 0 && *mip->mi_fend != NUL) 786 flen = fold_more(mip); 787 if (ptr[wlen] != ' ' && ptr[wlen] != TAB) 788 break; 789 ++wlen; 790 --flen; 791 } 792 } 793 } 794 795 /* 796 * Verify that one of the possible endings is valid. Try the longest 797 * first. 798 */ 799 while (endidxcnt > 0) 800 { 801 --endidxcnt; 802 arridx = endidx[endidxcnt]; 803 wlen = endlen[endidxcnt]; 804 805 #ifdef FEAT_MBYTE 806 if ((*mb_head_off)(ptr, ptr + wlen) > 0) 807 continue; /* not at first byte of character */ 808 #endif 809 if (spell_iswordp(ptr + wlen, mip->mi_win)) 810 { 811 if (slang->sl_compprog == NULL && !slang->sl_nobreak) 812 continue; /* next char is a word character */ 813 word_ends = FALSE; 814 } 815 else 816 word_ends = TRUE; 817 /* The prefix flag is before compound flags. Once a valid prefix flag 818 * has been found we try compound flags. */ 819 prefix_found = FALSE; 820 821 #ifdef FEAT_MBYTE 822 if (mode != FIND_KEEPWORD && has_mbyte) 823 { 824 /* Compute byte length in original word, length may change 825 * when folding case. This can be slow, take a shortcut when the 826 * case-folded word is equal to the keep-case word. */ 827 p = mip->mi_word; 828 if (STRNCMP(ptr, p, wlen) != 0) 829 { 830 for (s = ptr; s < ptr + wlen; MB_PTR_ADV(s)) 831 MB_PTR_ADV(p); 832 wlen = (int)(p - mip->mi_word); 833 } 834 } 835 #endif 836 837 /* Check flags and region. For FIND_PREFIX check the condition and 838 * prefix ID. 839 * Repeat this if there are more flags/region alternatives until there 840 * is a match. */ 841 res = SP_BAD; 842 for (len = byts[arridx - 1]; len > 0 && byts[arridx] == 0; 843 --len, ++arridx) 844 { 845 flags = idxs[arridx]; 846 847 /* For the fold-case tree check that the case of the checked word 848 * matches with what the word in the tree requires. 849 * For keep-case tree the case is always right. For prefixes we 850 * don't bother to check. */ 851 if (mode == FIND_FOLDWORD) 852 { 853 if (mip->mi_cend != mip->mi_word + wlen) 854 { 855 /* mi_capflags was set for a different word length, need 856 * to do it again. */ 857 mip->mi_cend = mip->mi_word + wlen; 858 mip->mi_capflags = captype(mip->mi_word, mip->mi_cend); 859 } 860 861 if (mip->mi_capflags == WF_KEEPCAP 862 || !spell_valid_case(mip->mi_capflags, flags)) 863 continue; 864 } 865 866 /* When mode is FIND_PREFIX the word must support the prefix: 867 * check the prefix ID and the condition. Do that for the list at 868 * mip->mi_prefarridx that find_prefix() filled. */ 869 else if (mode == FIND_PREFIX && !prefix_found) 870 { 871 c = valid_word_prefix(mip->mi_prefcnt, mip->mi_prefarridx, 872 flags, 873 mip->mi_word + mip->mi_cprefixlen, slang, 874 FALSE); 875 if (c == 0) 876 continue; 877 878 /* Use the WF_RARE flag for a rare prefix. */ 879 if (c & WF_RAREPFX) 880 flags |= WF_RARE; 881 prefix_found = TRUE; 882 } 883 884 if (slang->sl_nobreak) 885 { 886 if ((mode == FIND_COMPOUND || mode == FIND_KEEPCOMPOUND) 887 && (flags & WF_BANNED) == 0) 888 { 889 /* NOBREAK: found a valid following word. That's all we 890 * need to know, so return. */ 891 mip->mi_result = SP_OK; 892 break; 893 } 894 } 895 896 else if ((mode == FIND_COMPOUND || mode == FIND_KEEPCOMPOUND 897 || !word_ends)) 898 { 899 /* If there is no compound flag or the word is shorter than 900 * COMPOUNDMIN reject it quickly. 901 * Makes you wonder why someone puts a compound flag on a word 902 * that's too short... Myspell compatibility requires this 903 * anyway. */ 904 if (((unsigned)flags >> 24) == 0 905 || wlen - mip->mi_compoff < slang->sl_compminlen) 906 continue; 907 #ifdef FEAT_MBYTE 908 /* For multi-byte chars check character length against 909 * COMPOUNDMIN. */ 910 if (has_mbyte 911 && slang->sl_compminlen > 0 912 && mb_charlen_len(mip->mi_word + mip->mi_compoff, 913 wlen - mip->mi_compoff) < slang->sl_compminlen) 914 continue; 915 #endif 916 917 /* Limit the number of compound words to COMPOUNDWORDMAX if no 918 * maximum for syllables is specified. */ 919 if (!word_ends && mip->mi_complen + mip->mi_compextra + 2 920 > slang->sl_compmax 921 && slang->sl_compsylmax == MAXWLEN) 922 continue; 923 924 /* Don't allow compounding on a side where an affix was added, 925 * unless COMPOUNDPERMITFLAG was used. */ 926 if (mip->mi_complen > 0 && (flags & WF_NOCOMPBEF)) 927 continue; 928 if (!word_ends && (flags & WF_NOCOMPAFT)) 929 continue; 930 931 /* Quickly check if compounding is possible with this flag. */ 932 if (!byte_in_str(mip->mi_complen == 0 933 ? slang->sl_compstartflags 934 : slang->sl_compallflags, 935 ((unsigned)flags >> 24))) 936 continue; 937 938 /* If there is a match with a CHECKCOMPOUNDPATTERN rule 939 * discard the compound word. */ 940 if (match_checkcompoundpattern(ptr, wlen, &slang->sl_comppat)) 941 continue; 942 943 if (mode == FIND_COMPOUND) 944 { 945 int capflags; 946 947 /* Need to check the caps type of the appended compound 948 * word. */ 949 #ifdef FEAT_MBYTE 950 if (has_mbyte && STRNCMP(ptr, mip->mi_word, 951 mip->mi_compoff) != 0) 952 { 953 /* case folding may have changed the length */ 954 p = mip->mi_word; 955 for (s = ptr; s < ptr + mip->mi_compoff; MB_PTR_ADV(s)) 956 MB_PTR_ADV(p); 957 } 958 else 959 #endif 960 p = mip->mi_word + mip->mi_compoff; 961 capflags = captype(p, mip->mi_word + wlen); 962 if (capflags == WF_KEEPCAP || (capflags == WF_ALLCAP 963 && (flags & WF_FIXCAP) != 0)) 964 continue; 965 966 if (capflags != WF_ALLCAP) 967 { 968 /* When the character before the word is a word 969 * character we do not accept a Onecap word. We do 970 * accept a no-caps word, even when the dictionary 971 * word specifies ONECAP. */ 972 MB_PTR_BACK(mip->mi_word, p); 973 if (spell_iswordp_nmw(p, mip->mi_win) 974 ? capflags == WF_ONECAP 975 : (flags & WF_ONECAP) != 0 976 && capflags != WF_ONECAP) 977 continue; 978 } 979 } 980 981 /* If the word ends the sequence of compound flags of the 982 * words must match with one of the COMPOUNDRULE items and 983 * the number of syllables must not be too large. */ 984 mip->mi_compflags[mip->mi_complen] = ((unsigned)flags >> 24); 985 mip->mi_compflags[mip->mi_complen + 1] = NUL; 986 if (word_ends) 987 { 988 char_u fword[MAXWLEN]; 989 990 if (slang->sl_compsylmax < MAXWLEN) 991 { 992 /* "fword" is only needed for checking syllables. */ 993 if (ptr == mip->mi_word) 994 (void)spell_casefold(ptr, wlen, fword, MAXWLEN); 995 else 996 vim_strncpy(fword, ptr, endlen[endidxcnt]); 997 } 998 if (!can_compound(slang, fword, mip->mi_compflags)) 999 continue; 1000 } 1001 else if (slang->sl_comprules != NULL 1002 && !match_compoundrule(slang, mip->mi_compflags)) 1003 /* The compound flags collected so far do not match any 1004 * COMPOUNDRULE, discard the compounded word. */ 1005 continue; 1006 } 1007 1008 /* Check NEEDCOMPOUND: can't use word without compounding. */ 1009 else if (flags & WF_NEEDCOMP) 1010 continue; 1011 1012 nobreak_result = SP_OK; 1013 1014 if (!word_ends) 1015 { 1016 int save_result = mip->mi_result; 1017 char_u *save_end = mip->mi_end; 1018 langp_T *save_lp = mip->mi_lp; 1019 int lpi; 1020 1021 /* Check that a valid word follows. If there is one and we 1022 * are compounding, it will set "mi_result", thus we are 1023 * always finished here. For NOBREAK we only check that a 1024 * valid word follows. 1025 * Recursive! */ 1026 if (slang->sl_nobreak) 1027 mip->mi_result = SP_BAD; 1028 1029 /* Find following word in case-folded tree. */ 1030 mip->mi_compoff = endlen[endidxcnt]; 1031 #ifdef FEAT_MBYTE 1032 if (has_mbyte && mode == FIND_KEEPWORD) 1033 { 1034 /* Compute byte length in case-folded word from "wlen": 1035 * byte length in keep-case word. Length may change when 1036 * folding case. This can be slow, take a shortcut when 1037 * the case-folded word is equal to the keep-case word. */ 1038 p = mip->mi_fword; 1039 if (STRNCMP(ptr, p, wlen) != 0) 1040 { 1041 for (s = ptr; s < ptr + wlen; MB_PTR_ADV(s)) 1042 MB_PTR_ADV(p); 1043 mip->mi_compoff = (int)(p - mip->mi_fword); 1044 } 1045 } 1046 #endif 1047 #if 0 /* Disabled, see below */ 1048 c = mip->mi_compoff; 1049 #endif 1050 ++mip->mi_complen; 1051 if (flags & WF_COMPROOT) 1052 ++mip->mi_compextra; 1053 1054 /* For NOBREAK we need to try all NOBREAK languages, at least 1055 * to find the ".add" file(s). */ 1056 for (lpi = 0; lpi < mip->mi_win->w_s->b_langp.ga_len; ++lpi) 1057 { 1058 if (slang->sl_nobreak) 1059 { 1060 mip->mi_lp = LANGP_ENTRY(mip->mi_win->w_s->b_langp, lpi); 1061 if (mip->mi_lp->lp_slang->sl_fidxs == NULL 1062 || !mip->mi_lp->lp_slang->sl_nobreak) 1063 continue; 1064 } 1065 1066 find_word(mip, FIND_COMPOUND); 1067 1068 /* When NOBREAK any word that matches is OK. Otherwise we 1069 * need to find the longest match, thus try with keep-case 1070 * and prefix too. */ 1071 if (!slang->sl_nobreak || mip->mi_result == SP_BAD) 1072 { 1073 /* Find following word in keep-case tree. */ 1074 mip->mi_compoff = wlen; 1075 find_word(mip, FIND_KEEPCOMPOUND); 1076 1077 #if 0 /* Disabled, a prefix must not appear halfway a compound word, 1078 unless the COMPOUNDPERMITFLAG is used and then it can't be a 1079 postponed prefix. */ 1080 if (!slang->sl_nobreak || mip->mi_result == SP_BAD) 1081 { 1082 /* Check for following word with prefix. */ 1083 mip->mi_compoff = c; 1084 find_prefix(mip, FIND_COMPOUND); 1085 } 1086 #endif 1087 } 1088 1089 if (!slang->sl_nobreak) 1090 break; 1091 } 1092 --mip->mi_complen; 1093 if (flags & WF_COMPROOT) 1094 --mip->mi_compextra; 1095 mip->mi_lp = save_lp; 1096 1097 if (slang->sl_nobreak) 1098 { 1099 nobreak_result = mip->mi_result; 1100 mip->mi_result = save_result; 1101 mip->mi_end = save_end; 1102 } 1103 else 1104 { 1105 if (mip->mi_result == SP_OK) 1106 break; 1107 continue; 1108 } 1109 } 1110 1111 if (flags & WF_BANNED) 1112 res = SP_BANNED; 1113 else if (flags & WF_REGION) 1114 { 1115 /* Check region. */ 1116 if ((mip->mi_lp->lp_region & (flags >> 16)) != 0) 1117 res = SP_OK; 1118 else 1119 res = SP_LOCAL; 1120 } 1121 else if (flags & WF_RARE) 1122 res = SP_RARE; 1123 else 1124 res = SP_OK; 1125 1126 /* Always use the longest match and the best result. For NOBREAK 1127 * we separately keep the longest match without a following good 1128 * word as a fall-back. */ 1129 if (nobreak_result == SP_BAD) 1130 { 1131 if (mip->mi_result2 > res) 1132 { 1133 mip->mi_result2 = res; 1134 mip->mi_end2 = mip->mi_word + wlen; 1135 } 1136 else if (mip->mi_result2 == res 1137 && mip->mi_end2 < mip->mi_word + wlen) 1138 mip->mi_end2 = mip->mi_word + wlen; 1139 } 1140 else if (mip->mi_result > res) 1141 { 1142 mip->mi_result = res; 1143 mip->mi_end = mip->mi_word + wlen; 1144 } 1145 else if (mip->mi_result == res && mip->mi_end < mip->mi_word + wlen) 1146 mip->mi_end = mip->mi_word + wlen; 1147 1148 if (mip->mi_result == SP_OK) 1149 break; 1150 } 1151 1152 if (mip->mi_result == SP_OK) 1153 break; 1154 } 1155 } 1156 1157 /* 1158 * Return TRUE if there is a match between the word ptr[wlen] and 1159 * CHECKCOMPOUNDPATTERN rules, assuming that we will concatenate with another 1160 * word. 1161 * A match means that the first part of CHECKCOMPOUNDPATTERN matches at the 1162 * end of ptr[wlen] and the second part matches after it. 1163 */ 1164 static int 1165 match_checkcompoundpattern( 1166 char_u *ptr, 1167 int wlen, 1168 garray_T *gap) /* &sl_comppat */ 1169 { 1170 int i; 1171 char_u *p; 1172 int len; 1173 1174 for (i = 0; i + 1 < gap->ga_len; i += 2) 1175 { 1176 p = ((char_u **)gap->ga_data)[i + 1]; 1177 if (STRNCMP(ptr + wlen, p, STRLEN(p)) == 0) 1178 { 1179 /* Second part matches at start of following compound word, now 1180 * check if first part matches at end of previous word. */ 1181 p = ((char_u **)gap->ga_data)[i]; 1182 len = (int)STRLEN(p); 1183 if (len <= wlen && STRNCMP(ptr + wlen - len, p, len) == 0) 1184 return TRUE; 1185 } 1186 } 1187 return FALSE; 1188 } 1189 1190 /* 1191 * Return TRUE if "flags" is a valid sequence of compound flags and "word" 1192 * does not have too many syllables. 1193 */ 1194 static int 1195 can_compound(slang_T *slang, char_u *word, char_u *flags) 1196 { 1197 #ifdef FEAT_MBYTE 1198 char_u uflags[MAXWLEN * 2]; 1199 int i; 1200 #endif 1201 char_u *p; 1202 1203 if (slang->sl_compprog == NULL) 1204 return FALSE; 1205 #ifdef FEAT_MBYTE 1206 if (enc_utf8) 1207 { 1208 /* Need to convert the single byte flags to utf8 characters. */ 1209 p = uflags; 1210 for (i = 0; flags[i] != NUL; ++i) 1211 p += utf_char2bytes(flags[i], p); 1212 *p = NUL; 1213 p = uflags; 1214 } 1215 else 1216 #endif 1217 p = flags; 1218 if (!vim_regexec_prog(&slang->sl_compprog, FALSE, p, 0)) 1219 return FALSE; 1220 1221 /* Count the number of syllables. This may be slow, do it last. If there 1222 * are too many syllables AND the number of compound words is above 1223 * COMPOUNDWORDMAX then compounding is not allowed. */ 1224 if (slang->sl_compsylmax < MAXWLEN 1225 && count_syllables(slang, word) > slang->sl_compsylmax) 1226 return (int)STRLEN(flags) < slang->sl_compmax; 1227 return TRUE; 1228 } 1229 1230 /* 1231 * Return TRUE when the sequence of flags in "compflags" plus "flag" can 1232 * possibly form a valid compounded word. This also checks the COMPOUNDRULE 1233 * lines if they don't contain wildcards. 1234 */ 1235 static int 1236 can_be_compound( 1237 trystate_T *sp, 1238 slang_T *slang, 1239 char_u *compflags, 1240 int flag) 1241 { 1242 /* If the flag doesn't appear in sl_compstartflags or sl_compallflags 1243 * then it can't possibly compound. */ 1244 if (!byte_in_str(sp->ts_complen == sp->ts_compsplit 1245 ? slang->sl_compstartflags : slang->sl_compallflags, flag)) 1246 return FALSE; 1247 1248 /* If there are no wildcards, we can check if the flags collected so far 1249 * possibly can form a match with COMPOUNDRULE patterns. This only 1250 * makes sense when we have two or more words. */ 1251 if (slang->sl_comprules != NULL && sp->ts_complen > sp->ts_compsplit) 1252 { 1253 int v; 1254 1255 compflags[sp->ts_complen] = flag; 1256 compflags[sp->ts_complen + 1] = NUL; 1257 v = match_compoundrule(slang, compflags + sp->ts_compsplit); 1258 compflags[sp->ts_complen] = NUL; 1259 return v; 1260 } 1261 1262 return TRUE; 1263 } 1264 1265 1266 /* 1267 * Return TRUE if the compound flags in compflags[] match the start of any 1268 * compound rule. This is used to stop trying a compound if the flags 1269 * collected so far can't possibly match any compound rule. 1270 * Caller must check that slang->sl_comprules is not NULL. 1271 */ 1272 static int 1273 match_compoundrule(slang_T *slang, char_u *compflags) 1274 { 1275 char_u *p; 1276 int i; 1277 int c; 1278 1279 /* loop over all the COMPOUNDRULE entries */ 1280 for (p = slang->sl_comprules; *p != NUL; ++p) 1281 { 1282 /* loop over the flags in the compound word we have made, match 1283 * them against the current rule entry */ 1284 for (i = 0; ; ++i) 1285 { 1286 c = compflags[i]; 1287 if (c == NUL) 1288 /* found a rule that matches for the flags we have so far */ 1289 return TRUE; 1290 if (*p == '/' || *p == NUL) 1291 break; /* end of rule, it's too short */ 1292 if (*p == '[') 1293 { 1294 int match = FALSE; 1295 1296 /* compare against all the flags in [] */ 1297 ++p; 1298 while (*p != ']' && *p != NUL) 1299 if (*p++ == c) 1300 match = TRUE; 1301 if (!match) 1302 break; /* none matches */ 1303 } 1304 else if (*p != c) 1305 break; /* flag of word doesn't match flag in pattern */ 1306 ++p; 1307 } 1308 1309 /* Skip to the next "/", where the next pattern starts. */ 1310 p = vim_strchr(p, '/'); 1311 if (p == NULL) 1312 break; 1313 } 1314 1315 /* Checked all the rules and none of them match the flags, so there 1316 * can't possibly be a compound starting with these flags. */ 1317 return FALSE; 1318 } 1319 1320 /* 1321 * Return non-zero if the prefix indicated by "arridx" matches with the prefix 1322 * ID in "flags" for the word "word". 1323 * The WF_RAREPFX flag is included in the return value for a rare prefix. 1324 */ 1325 static int 1326 valid_word_prefix( 1327 int totprefcnt, /* nr of prefix IDs */ 1328 int arridx, /* idx in sl_pidxs[] */ 1329 int flags, 1330 char_u *word, 1331 slang_T *slang, 1332 int cond_req) /* only use prefixes with a condition */ 1333 { 1334 int prefcnt; 1335 int pidx; 1336 regprog_T **rp; 1337 int prefid; 1338 1339 prefid = (unsigned)flags >> 24; 1340 for (prefcnt = totprefcnt - 1; prefcnt >= 0; --prefcnt) 1341 { 1342 pidx = slang->sl_pidxs[arridx + prefcnt]; 1343 1344 /* Check the prefix ID. */ 1345 if (prefid != (pidx & 0xff)) 1346 continue; 1347 1348 /* Check if the prefix doesn't combine and the word already has a 1349 * suffix. */ 1350 if ((flags & WF_HAS_AFF) && (pidx & WF_PFX_NC)) 1351 continue; 1352 1353 /* Check the condition, if there is one. The condition index is 1354 * stored in the two bytes above the prefix ID byte. */ 1355 rp = &slang->sl_prefprog[((unsigned)pidx >> 8) & 0xffff]; 1356 if (*rp != NULL) 1357 { 1358 if (!vim_regexec_prog(rp, FALSE, word, 0)) 1359 continue; 1360 } 1361 else if (cond_req) 1362 continue; 1363 1364 /* It's a match! Return the WF_ flags. */ 1365 return pidx; 1366 } 1367 return 0; 1368 } 1369 1370 /* 1371 * Check if the word at "mip->mi_word" has a matching prefix. 1372 * If it does, then check the following word. 1373 * 1374 * If "mode" is "FIND_COMPOUND" then do the same after another word, find a 1375 * prefix in a compound word. 1376 * 1377 * For a match mip->mi_result is updated. 1378 */ 1379 static void 1380 find_prefix(matchinf_T *mip, int mode) 1381 { 1382 idx_T arridx = 0; 1383 int len; 1384 int wlen = 0; 1385 int flen; 1386 int c; 1387 char_u *ptr; 1388 idx_T lo, hi, m; 1389 slang_T *slang = mip->mi_lp->lp_slang; 1390 char_u *byts; 1391 idx_T *idxs; 1392 1393 byts = slang->sl_pbyts; 1394 if (byts == NULL) 1395 return; /* array is empty */ 1396 1397 /* We use the case-folded word here, since prefixes are always 1398 * case-folded. */ 1399 ptr = mip->mi_fword; 1400 flen = mip->mi_fwordlen; /* available case-folded bytes */ 1401 if (mode == FIND_COMPOUND) 1402 { 1403 /* Skip over the previously found word(s). */ 1404 ptr += mip->mi_compoff; 1405 flen -= mip->mi_compoff; 1406 } 1407 idxs = slang->sl_pidxs; 1408 1409 /* 1410 * Repeat advancing in the tree until: 1411 * - there is a byte that doesn't match, 1412 * - we reach the end of the tree, 1413 * - or we reach the end of the line. 1414 */ 1415 for (;;) 1416 { 1417 if (flen == 0 && *mip->mi_fend != NUL) 1418 flen = fold_more(mip); 1419 1420 len = byts[arridx++]; 1421 1422 /* If the first possible byte is a zero the prefix could end here. 1423 * Check if the following word matches and supports the prefix. */ 1424 if (byts[arridx] == 0) 1425 { 1426 /* There can be several prefixes with different conditions. We 1427 * try them all, since we don't know which one will give the 1428 * longest match. The word is the same each time, pass the list 1429 * of possible prefixes to find_word(). */ 1430 mip->mi_prefarridx = arridx; 1431 mip->mi_prefcnt = len; 1432 while (len > 0 && byts[arridx] == 0) 1433 { 1434 ++arridx; 1435 --len; 1436 } 1437 mip->mi_prefcnt -= len; 1438 1439 /* Find the word that comes after the prefix. */ 1440 mip->mi_prefixlen = wlen; 1441 if (mode == FIND_COMPOUND) 1442 /* Skip over the previously found word(s). */ 1443 mip->mi_prefixlen += mip->mi_compoff; 1444 1445 #ifdef FEAT_MBYTE 1446 if (has_mbyte) 1447 { 1448 /* Case-folded length may differ from original length. */ 1449 mip->mi_cprefixlen = nofold_len(mip->mi_fword, 1450 mip->mi_prefixlen, mip->mi_word); 1451 } 1452 else 1453 mip->mi_cprefixlen = mip->mi_prefixlen; 1454 #endif 1455 find_word(mip, FIND_PREFIX); 1456 1457 1458 if (len == 0) 1459 break; /* no children, word must end here */ 1460 } 1461 1462 /* Stop looking at end of the line. */ 1463 if (ptr[wlen] == NUL) 1464 break; 1465 1466 /* Perform a binary search in the list of accepted bytes. */ 1467 c = ptr[wlen]; 1468 lo = arridx; 1469 hi = arridx + len - 1; 1470 while (lo < hi) 1471 { 1472 m = (lo + hi) / 2; 1473 if (byts[m] > c) 1474 hi = m - 1; 1475 else if (byts[m] < c) 1476 lo = m + 1; 1477 else 1478 { 1479 lo = hi = m; 1480 break; 1481 } 1482 } 1483 1484 /* Stop if there is no matching byte. */ 1485 if (hi < lo || byts[lo] != c) 1486 break; 1487 1488 /* Continue at the child (if there is one). */ 1489 arridx = idxs[lo]; 1490 ++wlen; 1491 --flen; 1492 } 1493 } 1494 1495 /* 1496 * Need to fold at least one more character. Do until next non-word character 1497 * for efficiency. Include the non-word character too. 1498 * Return the length of the folded chars in bytes. 1499 */ 1500 static int 1501 fold_more(matchinf_T *mip) 1502 { 1503 int flen; 1504 char_u *p; 1505 1506 p = mip->mi_fend; 1507 do 1508 { 1509 MB_PTR_ADV(mip->mi_fend); 1510 } while (*mip->mi_fend != NUL && spell_iswordp(mip->mi_fend, mip->mi_win)); 1511 1512 /* Include the non-word character so that we can check for the word end. */ 1513 if (*mip->mi_fend != NUL) 1514 MB_PTR_ADV(mip->mi_fend); 1515 1516 (void)spell_casefold(p, (int)(mip->mi_fend - p), 1517 mip->mi_fword + mip->mi_fwordlen, 1518 MAXWLEN - mip->mi_fwordlen); 1519 flen = (int)STRLEN(mip->mi_fword + mip->mi_fwordlen); 1520 mip->mi_fwordlen += flen; 1521 return flen; 1522 } 1523 1524 /* 1525 * Check case flags for a word. Return TRUE if the word has the requested 1526 * case. 1527 */ 1528 static int 1529 spell_valid_case( 1530 int wordflags, /* flags for the checked word. */ 1531 int treeflags) /* flags for the word in the spell tree */ 1532 { 1533 return ((wordflags == WF_ALLCAP && (treeflags & WF_FIXCAP) == 0) 1534 || ((treeflags & (WF_ALLCAP | WF_KEEPCAP)) == 0 1535 && ((treeflags & WF_ONECAP) == 0 1536 || (wordflags & WF_ONECAP) != 0))); 1537 } 1538 1539 /* 1540 * Return TRUE if spell checking is not enabled. 1541 */ 1542 static int 1543 no_spell_checking(win_T *wp) 1544 { 1545 if (!wp->w_p_spell || *wp->w_s->b_p_spl == NUL 1546 || wp->w_s->b_langp.ga_len == 0) 1547 { 1548 EMSG(_("E756: Spell checking is not enabled")); 1549 return TRUE; 1550 } 1551 return FALSE; 1552 } 1553 1554 /* 1555 * Move to next spell error. 1556 * "curline" is FALSE for "[s", "]s", "[S" and "]S". 1557 * "curline" is TRUE to find word under/after cursor in the same line. 1558 * For Insert mode completion "dir" is BACKWARD and "curline" is TRUE: move 1559 * to after badly spelled word before the cursor. 1560 * Return 0 if not found, length of the badly spelled word otherwise. 1561 */ 1562 int 1563 spell_move_to( 1564 win_T *wp, 1565 int dir, /* FORWARD or BACKWARD */ 1566 int allwords, /* TRUE for "[s"/"]s", FALSE for "[S"/"]S" */ 1567 int curline, 1568 hlf_T *attrp) /* return: attributes of bad word or NULL 1569 (only when "dir" is FORWARD) */ 1570 { 1571 linenr_T lnum; 1572 pos_T found_pos; 1573 int found_len = 0; 1574 char_u *line; 1575 char_u *p; 1576 char_u *endp; 1577 hlf_T attr; 1578 int len; 1579 #ifdef FEAT_SYN_HL 1580 int has_syntax = syntax_present(wp); 1581 #endif 1582 int col; 1583 int can_spell; 1584 char_u *buf = NULL; 1585 int buflen = 0; 1586 int skip = 0; 1587 int capcol = -1; 1588 int found_one = FALSE; 1589 int wrapped = FALSE; 1590 1591 if (no_spell_checking(wp)) 1592 return 0; 1593 1594 /* 1595 * Start looking for bad word at the start of the line, because we can't 1596 * start halfway a word, we don't know where it starts or ends. 1597 * 1598 * When searching backwards, we continue in the line to find the last 1599 * bad word (in the cursor line: before the cursor). 1600 * 1601 * We concatenate the start of the next line, so that wrapped words work 1602 * (e.g. "et<line-break>cetera"). Doesn't work when searching backwards 1603 * though... 1604 */ 1605 lnum = wp->w_cursor.lnum; 1606 CLEAR_POS(&found_pos); 1607 1608 while (!got_int) 1609 { 1610 line = ml_get_buf(wp->w_buffer, lnum, FALSE); 1611 1612 len = (int)STRLEN(line); 1613 if (buflen < len + MAXWLEN + 2) 1614 { 1615 vim_free(buf); 1616 buflen = len + MAXWLEN + 2; 1617 buf = alloc(buflen); 1618 if (buf == NULL) 1619 break; 1620 } 1621 1622 /* In first line check first word for Capital. */ 1623 if (lnum == 1) 1624 capcol = 0; 1625 1626 /* For checking first word with a capital skip white space. */ 1627 if (capcol == 0) 1628 capcol = getwhitecols(line); 1629 else if (curline && wp == curwin) 1630 { 1631 /* For spellbadword(): check if first word needs a capital. */ 1632 col = getwhitecols(line); 1633 if (check_need_cap(lnum, col)) 1634 capcol = col; 1635 1636 /* Need to get the line again, may have looked at the previous 1637 * one. */ 1638 line = ml_get_buf(wp->w_buffer, lnum, FALSE); 1639 } 1640 1641 /* Copy the line into "buf" and append the start of the next line if 1642 * possible. */ 1643 STRCPY(buf, line); 1644 if (lnum < wp->w_buffer->b_ml.ml_line_count) 1645 spell_cat_line(buf + STRLEN(buf), 1646 ml_get_buf(wp->w_buffer, lnum + 1, FALSE), MAXWLEN); 1647 1648 p = buf + skip; 1649 endp = buf + len; 1650 while (p < endp) 1651 { 1652 /* When searching backward don't search after the cursor. Unless 1653 * we wrapped around the end of the buffer. */ 1654 if (dir == BACKWARD 1655 && lnum == wp->w_cursor.lnum 1656 && !wrapped 1657 && (colnr_T)(p - buf) >= wp->w_cursor.col) 1658 break; 1659 1660 /* start of word */ 1661 attr = HLF_COUNT; 1662 len = spell_check(wp, p, &attr, &capcol, FALSE); 1663 1664 if (attr != HLF_COUNT) 1665 { 1666 /* We found a bad word. Check the attribute. */ 1667 if (allwords || attr == HLF_SPB) 1668 { 1669 /* When searching forward only accept a bad word after 1670 * the cursor. */ 1671 if (dir == BACKWARD 1672 || lnum != wp->w_cursor.lnum 1673 || (lnum == wp->w_cursor.lnum 1674 && (wrapped 1675 || (colnr_T)(curline ? p - buf + len 1676 : p - buf) 1677 > wp->w_cursor.col))) 1678 { 1679 #ifdef FEAT_SYN_HL 1680 if (has_syntax) 1681 { 1682 col = (int)(p - buf); 1683 (void)syn_get_id(wp, lnum, (colnr_T)col, 1684 FALSE, &can_spell, FALSE); 1685 if (!can_spell) 1686 attr = HLF_COUNT; 1687 } 1688 else 1689 #endif 1690 can_spell = TRUE; 1691 1692 if (can_spell) 1693 { 1694 found_one = TRUE; 1695 found_pos.lnum = lnum; 1696 found_pos.col = (int)(p - buf); 1697 #ifdef FEAT_VIRTUALEDIT 1698 found_pos.coladd = 0; 1699 #endif 1700 if (dir == FORWARD) 1701 { 1702 /* No need to search further. */ 1703 wp->w_cursor = found_pos; 1704 vim_free(buf); 1705 if (attrp != NULL) 1706 *attrp = attr; 1707 return len; 1708 } 1709 else if (curline) 1710 /* Insert mode completion: put cursor after 1711 * the bad word. */ 1712 found_pos.col += len; 1713 found_len = len; 1714 } 1715 } 1716 else 1717 found_one = TRUE; 1718 } 1719 } 1720 1721 /* advance to character after the word */ 1722 p += len; 1723 capcol -= len; 1724 } 1725 1726 if (dir == BACKWARD && found_pos.lnum != 0) 1727 { 1728 /* Use the last match in the line (before the cursor). */ 1729 wp->w_cursor = found_pos; 1730 vim_free(buf); 1731 return found_len; 1732 } 1733 1734 if (curline) 1735 break; /* only check cursor line */ 1736 1737 /* If we are back at the starting line and searched it again there 1738 * is no match, give up. */ 1739 if (lnum == wp->w_cursor.lnum && wrapped) 1740 break; 1741 1742 /* Advance to next line. */ 1743 if (dir == BACKWARD) 1744 { 1745 if (lnum > 1) 1746 --lnum; 1747 else if (!p_ws) 1748 break; /* at first line and 'nowrapscan' */ 1749 else 1750 { 1751 /* Wrap around to the end of the buffer. May search the 1752 * starting line again and accept the last match. */ 1753 lnum = wp->w_buffer->b_ml.ml_line_count; 1754 wrapped = TRUE; 1755 if (!shortmess(SHM_SEARCH)) 1756 give_warning((char_u *)_(top_bot_msg), TRUE); 1757 } 1758 capcol = -1; 1759 } 1760 else 1761 { 1762 if (lnum < wp->w_buffer->b_ml.ml_line_count) 1763 ++lnum; 1764 else if (!p_ws) 1765 break; /* at first line and 'nowrapscan' */ 1766 else 1767 { 1768 /* Wrap around to the start of the buffer. May search the 1769 * starting line again and accept the first match. */ 1770 lnum = 1; 1771 wrapped = TRUE; 1772 if (!shortmess(SHM_SEARCH)) 1773 give_warning((char_u *)_(bot_top_msg), TRUE); 1774 } 1775 1776 /* If we are back at the starting line and there is no match then 1777 * give up. */ 1778 if (lnum == wp->w_cursor.lnum && !found_one) 1779 break; 1780 1781 /* Skip the characters at the start of the next line that were 1782 * included in a match crossing line boundaries. */ 1783 if (attr == HLF_COUNT) 1784 skip = (int)(p - endp); 1785 else 1786 skip = 0; 1787 1788 /* Capcol skips over the inserted space. */ 1789 --capcol; 1790 1791 /* But after empty line check first word in next line */ 1792 if (*skipwhite(line) == NUL) 1793 capcol = 0; 1794 } 1795 1796 line_breakcheck(); 1797 } 1798 1799 vim_free(buf); 1800 return 0; 1801 } 1802 1803 /* 1804 * For spell checking: concatenate the start of the following line "line" into 1805 * "buf", blanking-out special characters. Copy less then "maxlen" bytes. 1806 * Keep the blanks at the start of the next line, this is used in win_line() 1807 * to skip those bytes if the word was OK. 1808 */ 1809 void 1810 spell_cat_line(char_u *buf, char_u *line, int maxlen) 1811 { 1812 char_u *p; 1813 int n; 1814 1815 p = skipwhite(line); 1816 while (vim_strchr((char_u *)"*#/\"\t", *p) != NULL) 1817 p = skipwhite(p + 1); 1818 1819 if (*p != NUL) 1820 { 1821 /* Only worth concatenating if there is something else than spaces to 1822 * concatenate. */ 1823 n = (int)(p - line) + 1; 1824 if (n < maxlen - 1) 1825 { 1826 vim_memset(buf, ' ', n); 1827 vim_strncpy(buf + n, p, maxlen - 1 - n); 1828 } 1829 } 1830 } 1831 1832 /* 1833 * Structure used for the cookie argument of do_in_runtimepath(). 1834 */ 1835 typedef struct spelload_S 1836 { 1837 char_u sl_lang[MAXWLEN + 1]; /* language name */ 1838 slang_T *sl_slang; /* resulting slang_T struct */ 1839 int sl_nobreak; /* NOBREAK language found */ 1840 } spelload_T; 1841 1842 /* 1843 * Load word list(s) for "lang" from Vim spell file(s). 1844 * "lang" must be the language without the region: e.g., "en". 1845 */ 1846 static void 1847 spell_load_lang(char_u *lang) 1848 { 1849 char_u fname_enc[85]; 1850 int r; 1851 spelload_T sl; 1852 #ifdef FEAT_AUTOCMD 1853 int round; 1854 #endif 1855 1856 /* Copy the language name to pass it to spell_load_cb() as a cookie. 1857 * It's truncated when an error is detected. */ 1858 STRCPY(sl.sl_lang, lang); 1859 sl.sl_slang = NULL; 1860 sl.sl_nobreak = FALSE; 1861 1862 #ifdef FEAT_AUTOCMD 1863 /* We may retry when no spell file is found for the language, an 1864 * autocommand may load it then. */ 1865 for (round = 1; round <= 2; ++round) 1866 #endif 1867 { 1868 /* 1869 * Find the first spell file for "lang" in 'runtimepath' and load it. 1870 */ 1871 vim_snprintf((char *)fname_enc, sizeof(fname_enc) - 5, 1872 #ifdef VMS 1873 "spell/%s_%s.spl", 1874 #else 1875 "spell/%s.%s.spl", 1876 #endif 1877 lang, spell_enc()); 1878 r = do_in_runtimepath(fname_enc, 0, spell_load_cb, &sl); 1879 1880 if (r == FAIL && *sl.sl_lang != NUL) 1881 { 1882 /* Try loading the ASCII version. */ 1883 vim_snprintf((char *)fname_enc, sizeof(fname_enc) - 5, 1884 #ifdef VMS 1885 "spell/%s_ascii.spl", 1886 #else 1887 "spell/%s.ascii.spl", 1888 #endif 1889 lang); 1890 r = do_in_runtimepath(fname_enc, 0, spell_load_cb, &sl); 1891 1892 #ifdef FEAT_AUTOCMD 1893 if (r == FAIL && *sl.sl_lang != NUL && round == 1 1894 && apply_autocmds(EVENT_SPELLFILEMISSING, lang, 1895 curbuf->b_fname, FALSE, curbuf)) 1896 continue; 1897 break; 1898 #endif 1899 } 1900 #ifdef FEAT_AUTOCMD 1901 break; 1902 #endif 1903 } 1904 1905 if (r == FAIL) 1906 { 1907 smsg((char_u *) 1908 #ifdef VMS 1909 _("Warning: Cannot find word list \"%s_%s.spl\" or \"%s_ascii.spl\""), 1910 #else 1911 _("Warning: Cannot find word list \"%s.%s.spl\" or \"%s.ascii.spl\""), 1912 #endif 1913 lang, spell_enc(), lang); 1914 } 1915 else if (sl.sl_slang != NULL) 1916 { 1917 /* At least one file was loaded, now load ALL the additions. */ 1918 STRCPY(fname_enc + STRLEN(fname_enc) - 3, "add.spl"); 1919 do_in_runtimepath(fname_enc, DIP_ALL, spell_load_cb, &sl); 1920 } 1921 } 1922 1923 /* 1924 * Return the encoding used for spell checking: Use 'encoding', except that we 1925 * use "latin1" for "latin9". And limit to 60 characters (just in case). 1926 */ 1927 char_u * 1928 spell_enc(void) 1929 { 1930 1931 #ifdef FEAT_MBYTE 1932 if (STRLEN(p_enc) < 60 && STRCMP(p_enc, "iso-8859-15") != 0) 1933 return p_enc; 1934 #endif 1935 return (char_u *)"latin1"; 1936 } 1937 1938 /* 1939 * Get the name of the .spl file for the internal wordlist into 1940 * "fname[MAXPATHL]". 1941 */ 1942 static void 1943 int_wordlist_spl(char_u *fname) 1944 { 1945 vim_snprintf((char *)fname, MAXPATHL, SPL_FNAME_TMPL, 1946 int_wordlist, spell_enc()); 1947 } 1948 1949 /* 1950 * Allocate a new slang_T for language "lang". "lang" can be NULL. 1951 * Caller must fill "sl_next". 1952 */ 1953 slang_T * 1954 slang_alloc(char_u *lang) 1955 { 1956 slang_T *lp; 1957 1958 lp = (slang_T *)alloc_clear(sizeof(slang_T)); 1959 if (lp != NULL) 1960 { 1961 if (lang != NULL) 1962 lp->sl_name = vim_strsave(lang); 1963 ga_init2(&lp->sl_rep, sizeof(fromto_T), 10); 1964 ga_init2(&lp->sl_repsal, sizeof(fromto_T), 10); 1965 lp->sl_compmax = MAXWLEN; 1966 lp->sl_compsylmax = MAXWLEN; 1967 hash_init(&lp->sl_wordcount); 1968 } 1969 1970 return lp; 1971 } 1972 1973 /* 1974 * Free the contents of an slang_T and the structure itself. 1975 */ 1976 void 1977 slang_free(slang_T *lp) 1978 { 1979 vim_free(lp->sl_name); 1980 vim_free(lp->sl_fname); 1981 slang_clear(lp); 1982 vim_free(lp); 1983 } 1984 1985 /* 1986 * Clear an slang_T so that the file can be reloaded. 1987 */ 1988 void 1989 slang_clear(slang_T *lp) 1990 { 1991 garray_T *gap; 1992 fromto_T *ftp; 1993 salitem_T *smp; 1994 int i; 1995 int round; 1996 1997 vim_free(lp->sl_fbyts); 1998 lp->sl_fbyts = NULL; 1999 vim_free(lp->sl_kbyts); 2000 lp->sl_kbyts = NULL; 2001 vim_free(lp->sl_pbyts); 2002 lp->sl_pbyts = NULL; 2003 2004 vim_free(lp->sl_fidxs); 2005 lp->sl_fidxs = NULL; 2006 vim_free(lp->sl_kidxs); 2007 lp->sl_kidxs = NULL; 2008 vim_free(lp->sl_pidxs); 2009 lp->sl_pidxs = NULL; 2010 2011 for (round = 1; round <= 2; ++round) 2012 { 2013 gap = round == 1 ? &lp->sl_rep : &lp->sl_repsal; 2014 while (gap->ga_len > 0) 2015 { 2016 ftp = &((fromto_T *)gap->ga_data)[--gap->ga_len]; 2017 vim_free(ftp->ft_from); 2018 vim_free(ftp->ft_to); 2019 } 2020 ga_clear(gap); 2021 } 2022 2023 gap = &lp->sl_sal; 2024 if (lp->sl_sofo) 2025 { 2026 /* "ga_len" is set to 1 without adding an item for latin1 */ 2027 if (gap->ga_data != NULL) 2028 /* SOFOFROM and SOFOTO items: free lists of wide characters. */ 2029 for (i = 0; i < gap->ga_len; ++i) 2030 vim_free(((int **)gap->ga_data)[i]); 2031 } 2032 else 2033 /* SAL items: free salitem_T items */ 2034 while (gap->ga_len > 0) 2035 { 2036 smp = &((salitem_T *)gap->ga_data)[--gap->ga_len]; 2037 vim_free(smp->sm_lead); 2038 /* Don't free sm_oneof and sm_rules, they point into sm_lead. */ 2039 vim_free(smp->sm_to); 2040 #ifdef FEAT_MBYTE 2041 vim_free(smp->sm_lead_w); 2042 vim_free(smp->sm_oneof_w); 2043 vim_free(smp->sm_to_w); 2044 #endif 2045 } 2046 ga_clear(gap); 2047 2048 for (i = 0; i < lp->sl_prefixcnt; ++i) 2049 vim_regfree(lp->sl_prefprog[i]); 2050 lp->sl_prefixcnt = 0; 2051 vim_free(lp->sl_prefprog); 2052 lp->sl_prefprog = NULL; 2053 2054 vim_free(lp->sl_info); 2055 lp->sl_info = NULL; 2056 2057 vim_free(lp->sl_midword); 2058 lp->sl_midword = NULL; 2059 2060 vim_regfree(lp->sl_compprog); 2061 vim_free(lp->sl_comprules); 2062 vim_free(lp->sl_compstartflags); 2063 vim_free(lp->sl_compallflags); 2064 lp->sl_compprog = NULL; 2065 lp->sl_comprules = NULL; 2066 lp->sl_compstartflags = NULL; 2067 lp->sl_compallflags = NULL; 2068 2069 vim_free(lp->sl_syllable); 2070 lp->sl_syllable = NULL; 2071 ga_clear(&lp->sl_syl_items); 2072 2073 ga_clear_strings(&lp->sl_comppat); 2074 2075 hash_clear_all(&lp->sl_wordcount, WC_KEY_OFF); 2076 hash_init(&lp->sl_wordcount); 2077 2078 #ifdef FEAT_MBYTE 2079 hash_clear_all(&lp->sl_map_hash, 0); 2080 #endif 2081 2082 /* Clear info from .sug file. */ 2083 slang_clear_sug(lp); 2084 2085 lp->sl_compmax = MAXWLEN; 2086 lp->sl_compminlen = 0; 2087 lp->sl_compsylmax = MAXWLEN; 2088 lp->sl_regions[0] = NUL; 2089 } 2090 2091 /* 2092 * Clear the info from the .sug file in "lp". 2093 */ 2094 void 2095 slang_clear_sug(slang_T *lp) 2096 { 2097 vim_free(lp->sl_sbyts); 2098 lp->sl_sbyts = NULL; 2099 vim_free(lp->sl_sidxs); 2100 lp->sl_sidxs = NULL; 2101 close_spellbuf(lp->sl_sugbuf); 2102 lp->sl_sugbuf = NULL; 2103 lp->sl_sugloaded = FALSE; 2104 lp->sl_sugtime = 0; 2105 } 2106 2107 /* 2108 * Load one spell file and store the info into a slang_T. 2109 * Invoked through do_in_runtimepath(). 2110 */ 2111 static void 2112 spell_load_cb(char_u *fname, void *cookie) 2113 { 2114 spelload_T *slp = (spelload_T *)cookie; 2115 slang_T *slang; 2116 2117 slang = spell_load_file(fname, slp->sl_lang, NULL, FALSE); 2118 if (slang != NULL) 2119 { 2120 /* When a previously loaded file has NOBREAK also use it for the 2121 * ".add" files. */ 2122 if (slp->sl_nobreak && slang->sl_add) 2123 slang->sl_nobreak = TRUE; 2124 else if (slang->sl_nobreak) 2125 slp->sl_nobreak = TRUE; 2126 2127 slp->sl_slang = slang; 2128 } 2129 } 2130 2131 2132 /* 2133 * Add a word to the hashtable of common words. 2134 * If it's already there then the counter is increased. 2135 */ 2136 void 2137 count_common_word( 2138 slang_T *lp, 2139 char_u *word, 2140 int len, /* word length, -1 for upto NUL */ 2141 int count) /* 1 to count once, 10 to init */ 2142 { 2143 hash_T hash; 2144 hashitem_T *hi; 2145 wordcount_T *wc; 2146 char_u buf[MAXWLEN]; 2147 char_u *p; 2148 2149 if (len == -1) 2150 p = word; 2151 else 2152 { 2153 vim_strncpy(buf, word, len); 2154 p = buf; 2155 } 2156 2157 hash = hash_hash(p); 2158 hi = hash_lookup(&lp->sl_wordcount, p, hash); 2159 if (HASHITEM_EMPTY(hi)) 2160 { 2161 wc = (wordcount_T *)alloc((unsigned)(sizeof(wordcount_T) + STRLEN(p))); 2162 if (wc == NULL) 2163 return; 2164 STRCPY(wc->wc_word, p); 2165 wc->wc_count = count; 2166 hash_add_item(&lp->sl_wordcount, hi, wc->wc_word, hash); 2167 } 2168 else 2169 { 2170 wc = HI2WC(hi); 2171 if ((wc->wc_count += count) < (unsigned)count) /* check for overflow */ 2172 wc->wc_count = MAXWORDCOUNT; 2173 } 2174 } 2175 2176 /* 2177 * Adjust the score of common words. 2178 */ 2179 static int 2180 score_wordcount_adj( 2181 slang_T *slang, 2182 int score, 2183 char_u *word, 2184 int split) /* word was split, less bonus */ 2185 { 2186 hashitem_T *hi; 2187 wordcount_T *wc; 2188 int bonus; 2189 int newscore; 2190 2191 hi = hash_find(&slang->sl_wordcount, word); 2192 if (!HASHITEM_EMPTY(hi)) 2193 { 2194 wc = HI2WC(hi); 2195 if (wc->wc_count < SCORE_THRES2) 2196 bonus = SCORE_COMMON1; 2197 else if (wc->wc_count < SCORE_THRES3) 2198 bonus = SCORE_COMMON2; 2199 else 2200 bonus = SCORE_COMMON3; 2201 if (split) 2202 newscore = score - bonus / 2; 2203 else 2204 newscore = score - bonus; 2205 if (newscore < 0) 2206 return 0; 2207 return newscore; 2208 } 2209 return score; 2210 } 2211 2212 2213 /* 2214 * Return TRUE if byte "n" appears in "str". 2215 * Like strchr() but independent of locale. 2216 */ 2217 int 2218 byte_in_str(char_u *str, int n) 2219 { 2220 char_u *p; 2221 2222 for (p = str; *p != NUL; ++p) 2223 if (*p == n) 2224 return TRUE; 2225 return FALSE; 2226 } 2227 2228 #define SY_MAXLEN 30 2229 typedef struct syl_item_S 2230 { 2231 char_u sy_chars[SY_MAXLEN]; /* the sequence of chars */ 2232 int sy_len; 2233 } syl_item_T; 2234 2235 /* 2236 * Truncate "slang->sl_syllable" at the first slash and put the following items 2237 * in "slang->sl_syl_items". 2238 */ 2239 int 2240 init_syl_tab(slang_T *slang) 2241 { 2242 char_u *p; 2243 char_u *s; 2244 int l; 2245 syl_item_T *syl; 2246 2247 ga_init2(&slang->sl_syl_items, sizeof(syl_item_T), 4); 2248 p = vim_strchr(slang->sl_syllable, '/'); 2249 while (p != NULL) 2250 { 2251 *p++ = NUL; 2252 if (*p == NUL) /* trailing slash */ 2253 break; 2254 s = p; 2255 p = vim_strchr(p, '/'); 2256 if (p == NULL) 2257 l = (int)STRLEN(s); 2258 else 2259 l = (int)(p - s); 2260 if (l >= SY_MAXLEN) 2261 return SP_FORMERROR; 2262 if (ga_grow(&slang->sl_syl_items, 1) == FAIL) 2263 return SP_OTHERERROR; 2264 syl = ((syl_item_T *)slang->sl_syl_items.ga_data) 2265 + slang->sl_syl_items.ga_len++; 2266 vim_strncpy(syl->sy_chars, s, l); 2267 syl->sy_len = l; 2268 } 2269 return OK; 2270 } 2271 2272 /* 2273 * Count the number of syllables in "word". 2274 * When "word" contains spaces the syllables after the last space are counted. 2275 * Returns zero if syllables are not defines. 2276 */ 2277 static int 2278 count_syllables(slang_T *slang, char_u *word) 2279 { 2280 int cnt = 0; 2281 int skip = FALSE; 2282 char_u *p; 2283 int len; 2284 int i; 2285 syl_item_T *syl; 2286 int c; 2287 2288 if (slang->sl_syllable == NULL) 2289 return 0; 2290 2291 for (p = word; *p != NUL; p += len) 2292 { 2293 /* When running into a space reset counter. */ 2294 if (*p == ' ') 2295 { 2296 len = 1; 2297 cnt = 0; 2298 continue; 2299 } 2300 2301 /* Find longest match of syllable items. */ 2302 len = 0; 2303 for (i = 0; i < slang->sl_syl_items.ga_len; ++i) 2304 { 2305 syl = ((syl_item_T *)slang->sl_syl_items.ga_data) + i; 2306 if (syl->sy_len > len 2307 && STRNCMP(p, syl->sy_chars, syl->sy_len) == 0) 2308 len = syl->sy_len; 2309 } 2310 if (len != 0) /* found a match, count syllable */ 2311 { 2312 ++cnt; 2313 skip = FALSE; 2314 } 2315 else 2316 { 2317 /* No recognized syllable item, at least a syllable char then? */ 2318 #ifdef FEAT_MBYTE 2319 c = mb_ptr2char(p); 2320 len = (*mb_ptr2len)(p); 2321 #else 2322 c = *p; 2323 len = 1; 2324 #endif 2325 if (vim_strchr(slang->sl_syllable, c) == NULL) 2326 skip = FALSE; /* No, search for next syllable */ 2327 else if (!skip) 2328 { 2329 ++cnt; /* Yes, count it */ 2330 skip = TRUE; /* don't count following syllable chars */ 2331 } 2332 } 2333 } 2334 return cnt; 2335 } 2336 2337 /* 2338 * Parse 'spelllang' and set w_s->b_langp accordingly. 2339 * Returns NULL if it's OK, an error message otherwise. 2340 */ 2341 char_u * 2342 did_set_spelllang(win_T *wp) 2343 { 2344 garray_T ga; 2345 char_u *splp; 2346 char_u *region; 2347 char_u region_cp[3]; 2348 int filename; 2349 int region_mask; 2350 slang_T *slang; 2351 int c; 2352 char_u lang[MAXWLEN + 1]; 2353 char_u spf_name[MAXPATHL]; 2354 int len; 2355 char_u *p; 2356 int round; 2357 char_u *spf; 2358 char_u *use_region = NULL; 2359 int dont_use_region = FALSE; 2360 int nobreak = FALSE; 2361 int i, j; 2362 langp_T *lp, *lp2; 2363 static int recursive = FALSE; 2364 char_u *ret_msg = NULL; 2365 char_u *spl_copy; 2366 #ifdef FEAT_AUTOCMD 2367 bufref_T bufref; 2368 2369 set_bufref(&bufref, wp->w_buffer); 2370 #endif 2371 2372 /* We don't want to do this recursively. May happen when a language is 2373 * not available and the SpellFileMissing autocommand opens a new buffer 2374 * in which 'spell' is set. */ 2375 if (recursive) 2376 return NULL; 2377 recursive = TRUE; 2378 2379 ga_init2(&ga, sizeof(langp_T), 2); 2380 clear_midword(wp); 2381 2382 /* Make a copy of 'spelllang', the SpellFileMissing autocommands may change 2383 * it under our fingers. */ 2384 spl_copy = vim_strsave(wp->w_s->b_p_spl); 2385 if (spl_copy == NULL) 2386 goto theend; 2387 2388 #ifdef FEAT_MBYTE 2389 wp->w_s->b_cjk = 0; 2390 #endif 2391 2392 /* Loop over comma separated language names. */ 2393 for (splp = spl_copy; *splp != NUL; ) 2394 { 2395 /* Get one language name. */ 2396 copy_option_part(&splp, lang, MAXWLEN, ","); 2397 region = NULL; 2398 len = (int)STRLEN(lang); 2399 2400 if (STRCMP(lang, "cjk") == 0) 2401 { 2402 #ifdef FEAT_MBYTE 2403 wp->w_s->b_cjk = 1; 2404 #endif 2405 continue; 2406 } 2407 2408 /* If the name ends in ".spl" use it as the name of the spell file. 2409 * If there is a region name let "region" point to it and remove it 2410 * from the name. */ 2411 if (len > 4 && fnamecmp(lang + len - 4, ".spl") == 0) 2412 { 2413 filename = TRUE; 2414 2415 /* Locate a region and remove it from the file name. */ 2416 p = vim_strchr(gettail(lang), '_'); 2417 if (p != NULL && ASCII_ISALPHA(p[1]) && ASCII_ISALPHA(p[2]) 2418 && !ASCII_ISALPHA(p[3])) 2419 { 2420 vim_strncpy(region_cp, p + 1, 2); 2421 mch_memmove(p, p + 3, len - (p - lang) - 2); 2422 len -= 3; 2423 region = region_cp; 2424 } 2425 else 2426 dont_use_region = TRUE; 2427 2428 /* Check if we loaded this language before. */ 2429 for (slang = first_lang; slang != NULL; slang = slang->sl_next) 2430 if (fullpathcmp(lang, slang->sl_fname, FALSE) == FPC_SAME) 2431 break; 2432 } 2433 else 2434 { 2435 filename = FALSE; 2436 if (len > 3 && lang[len - 3] == '_') 2437 { 2438 region = lang + len - 2; 2439 len -= 3; 2440 lang[len] = NUL; 2441 } 2442 else 2443 dont_use_region = TRUE; 2444 2445 /* Check if we loaded this language before. */ 2446 for (slang = first_lang; slang != NULL; slang = slang->sl_next) 2447 if (STRICMP(lang, slang->sl_name) == 0) 2448 break; 2449 } 2450 2451 if (region != NULL) 2452 { 2453 /* If the region differs from what was used before then don't 2454 * use it for 'spellfile'. */ 2455 if (use_region != NULL && STRCMP(region, use_region) != 0) 2456 dont_use_region = TRUE; 2457 use_region = region; 2458 } 2459 2460 /* If not found try loading the language now. */ 2461 if (slang == NULL) 2462 { 2463 if (filename) 2464 (void)spell_load_file(lang, lang, NULL, FALSE); 2465 else 2466 { 2467 spell_load_lang(lang); 2468 #ifdef FEAT_AUTOCMD 2469 /* SpellFileMissing autocommands may do anything, including 2470 * destroying the buffer we are using... */ 2471 if (!bufref_valid(&bufref)) 2472 { 2473 ret_msg = (char_u *)N_("E797: SpellFileMissing autocommand deleted buffer"); 2474 goto theend; 2475 } 2476 #endif 2477 } 2478 } 2479 2480 /* 2481 * Loop over the languages, there can be several files for "lang". 2482 */ 2483 for (slang = first_lang; slang != NULL; slang = slang->sl_next) 2484 if (filename ? fullpathcmp(lang, slang->sl_fname, FALSE) == FPC_SAME 2485 : STRICMP(lang, slang->sl_name) == 0) 2486 { 2487 region_mask = REGION_ALL; 2488 if (!filename && region != NULL) 2489 { 2490 /* find region in sl_regions */ 2491 c = find_region(slang->sl_regions, region); 2492 if (c == REGION_ALL) 2493 { 2494 if (slang->sl_add) 2495 { 2496 if (*slang->sl_regions != NUL) 2497 /* This addition file is for other regions. */ 2498 region_mask = 0; 2499 } 2500 else 2501 /* This is probably an error. Give a warning and 2502 * accept the words anyway. */ 2503 smsg((char_u *) 2504 _("Warning: region %s not supported"), 2505 region); 2506 } 2507 else 2508 region_mask = 1 << c; 2509 } 2510 2511 if (region_mask != 0) 2512 { 2513 if (ga_grow(&ga, 1) == FAIL) 2514 { 2515 ga_clear(&ga); 2516 ret_msg = e_outofmem; 2517 goto theend; 2518 } 2519 LANGP_ENTRY(ga, ga.ga_len)->lp_slang = slang; 2520 LANGP_ENTRY(ga, ga.ga_len)->lp_region = region_mask; 2521 ++ga.ga_len; 2522 use_midword(slang, wp); 2523 if (slang->sl_nobreak) 2524 nobreak = TRUE; 2525 } 2526 } 2527 } 2528 2529 /* round 0: load int_wordlist, if possible. 2530 * round 1: load first name in 'spellfile'. 2531 * round 2: load second name in 'spellfile. 2532 * etc. */ 2533 spf = curwin->w_s->b_p_spf; 2534 for (round = 0; round == 0 || *spf != NUL; ++round) 2535 { 2536 if (round == 0) 2537 { 2538 /* Internal wordlist, if there is one. */ 2539 if (int_wordlist == NULL) 2540 continue; 2541 int_wordlist_spl(spf_name); 2542 } 2543 else 2544 { 2545 /* One entry in 'spellfile'. */ 2546 copy_option_part(&spf, spf_name, MAXPATHL - 5, ","); 2547 STRCAT(spf_name, ".spl"); 2548 2549 /* If it was already found above then skip it. */ 2550 for (c = 0; c < ga.ga_len; ++c) 2551 { 2552 p = LANGP_ENTRY(ga, c)->lp_slang->sl_fname; 2553 if (p != NULL && fullpathcmp(spf_name, p, FALSE) == FPC_SAME) 2554 break; 2555 } 2556 if (c < ga.ga_len) 2557 continue; 2558 } 2559 2560 /* Check if it was loaded already. */ 2561 for (slang = first_lang; slang != NULL; slang = slang->sl_next) 2562 if (fullpathcmp(spf_name, slang->sl_fname, FALSE) == FPC_SAME) 2563 break; 2564 if (slang == NULL) 2565 { 2566 /* Not loaded, try loading it now. The language name includes the 2567 * region name, the region is ignored otherwise. for int_wordlist 2568 * use an arbitrary name. */ 2569 if (round == 0) 2570 STRCPY(lang, "internal wordlist"); 2571 else 2572 { 2573 vim_strncpy(lang, gettail(spf_name), MAXWLEN); 2574 p = vim_strchr(lang, '.'); 2575 if (p != NULL) 2576 *p = NUL; /* truncate at ".encoding.add" */ 2577 } 2578 slang = spell_load_file(spf_name, lang, NULL, TRUE); 2579 2580 /* If one of the languages has NOBREAK we assume the addition 2581 * files also have this. */ 2582 if (slang != NULL && nobreak) 2583 slang->sl_nobreak = TRUE; 2584 } 2585 if (slang != NULL && ga_grow(&ga, 1) == OK) 2586 { 2587 region_mask = REGION_ALL; 2588 if (use_region != NULL && !dont_use_region) 2589 { 2590 /* find region in sl_regions */ 2591 c = find_region(slang->sl_regions, use_region); 2592 if (c != REGION_ALL) 2593 region_mask = 1 << c; 2594 else if (*slang->sl_regions != NUL) 2595 /* This spell file is for other regions. */ 2596 region_mask = 0; 2597 } 2598 2599 if (region_mask != 0) 2600 { 2601 LANGP_ENTRY(ga, ga.ga_len)->lp_slang = slang; 2602 LANGP_ENTRY(ga, ga.ga_len)->lp_sallang = NULL; 2603 LANGP_ENTRY(ga, ga.ga_len)->lp_replang = NULL; 2604 LANGP_ENTRY(ga, ga.ga_len)->lp_region = region_mask; 2605 ++ga.ga_len; 2606 use_midword(slang, wp); 2607 } 2608 } 2609 } 2610 2611 /* Everything is fine, store the new b_langp value. */ 2612 ga_clear(&wp->w_s->b_langp); 2613 wp->w_s->b_langp = ga; 2614 2615 /* For each language figure out what language to use for sound folding and 2616 * REP items. If the language doesn't support it itself use another one 2617 * with the same name. E.g. for "en-math" use "en". */ 2618 for (i = 0; i < ga.ga_len; ++i) 2619 { 2620 lp = LANGP_ENTRY(ga, i); 2621 2622 /* sound folding */ 2623 if (lp->lp_slang->sl_sal.ga_len > 0) 2624 /* language does sound folding itself */ 2625 lp->lp_sallang = lp->lp_slang; 2626 else 2627 /* find first similar language that does sound folding */ 2628 for (j = 0; j < ga.ga_len; ++j) 2629 { 2630 lp2 = LANGP_ENTRY(ga, j); 2631 if (lp2->lp_slang->sl_sal.ga_len > 0 2632 && STRNCMP(lp->lp_slang->sl_name, 2633 lp2->lp_slang->sl_name, 2) == 0) 2634 { 2635 lp->lp_sallang = lp2->lp_slang; 2636 break; 2637 } 2638 } 2639 2640 /* REP items */ 2641 if (lp->lp_slang->sl_rep.ga_len > 0) 2642 /* language has REP items itself */ 2643 lp->lp_replang = lp->lp_slang; 2644 else 2645 /* find first similar language that has REP items */ 2646 for (j = 0; j < ga.ga_len; ++j) 2647 { 2648 lp2 = LANGP_ENTRY(ga, j); 2649 if (lp2->lp_slang->sl_rep.ga_len > 0 2650 && STRNCMP(lp->lp_slang->sl_name, 2651 lp2->lp_slang->sl_name, 2) == 0) 2652 { 2653 lp->lp_replang = lp2->lp_slang; 2654 break; 2655 } 2656 } 2657 } 2658 2659 theend: 2660 vim_free(spl_copy); 2661 recursive = FALSE; 2662 redraw_win_later(wp, NOT_VALID); 2663 return ret_msg; 2664 } 2665 2666 /* 2667 * Clear the midword characters for buffer "buf". 2668 */ 2669 static void 2670 clear_midword(win_T *wp) 2671 { 2672 vim_memset(wp->w_s->b_spell_ismw, 0, 256); 2673 #ifdef FEAT_MBYTE 2674 vim_free(wp->w_s->b_spell_ismw_mb); 2675 wp->w_s->b_spell_ismw_mb = NULL; 2676 #endif 2677 } 2678 2679 /* 2680 * Use the "sl_midword" field of language "lp" for buffer "buf". 2681 * They add up to any currently used midword characters. 2682 */ 2683 static void 2684 use_midword(slang_T *lp, win_T *wp) 2685 { 2686 char_u *p; 2687 2688 if (lp->sl_midword == NULL) /* there aren't any */ 2689 return; 2690 2691 for (p = lp->sl_midword; *p != NUL; ) 2692 #ifdef FEAT_MBYTE 2693 if (has_mbyte) 2694 { 2695 int c, l, n; 2696 char_u *bp; 2697 2698 c = mb_ptr2char(p); 2699 l = (*mb_ptr2len)(p); 2700 if (c < 256 && l <= 2) 2701 wp->w_s->b_spell_ismw[c] = TRUE; 2702 else if (wp->w_s->b_spell_ismw_mb == NULL) 2703 /* First multi-byte char in "b_spell_ismw_mb". */ 2704 wp->w_s->b_spell_ismw_mb = vim_strnsave(p, l); 2705 else 2706 { 2707 /* Append multi-byte chars to "b_spell_ismw_mb". */ 2708 n = (int)STRLEN(wp->w_s->b_spell_ismw_mb); 2709 bp = vim_strnsave(wp->w_s->b_spell_ismw_mb, n + l); 2710 if (bp != NULL) 2711 { 2712 vim_free(wp->w_s->b_spell_ismw_mb); 2713 wp->w_s->b_spell_ismw_mb = bp; 2714 vim_strncpy(bp + n, p, l); 2715 } 2716 } 2717 p += l; 2718 } 2719 else 2720 #endif 2721 wp->w_s->b_spell_ismw[*p++] = TRUE; 2722 } 2723 2724 /* 2725 * Find the region "region[2]" in "rp" (points to "sl_regions"). 2726 * Each region is simply stored as the two characters of it's name. 2727 * Returns the index if found (first is 0), REGION_ALL if not found. 2728 */ 2729 static int 2730 find_region(char_u *rp, char_u *region) 2731 { 2732 int i; 2733 2734 for (i = 0; ; i += 2) 2735 { 2736 if (rp[i] == NUL) 2737 return REGION_ALL; 2738 if (rp[i] == region[0] && rp[i + 1] == region[1]) 2739 break; 2740 } 2741 return i / 2; 2742 } 2743 2744 /* 2745 * Return case type of word: 2746 * w word 0 2747 * Word WF_ONECAP 2748 * W WORD WF_ALLCAP 2749 * WoRd wOrd WF_KEEPCAP 2750 */ 2751 int 2752 captype( 2753 char_u *word, 2754 char_u *end) /* When NULL use up to NUL byte. */ 2755 { 2756 char_u *p; 2757 int c; 2758 int firstcap; 2759 int allcap; 2760 int past_second = FALSE; /* past second word char */ 2761 2762 /* find first letter */ 2763 for (p = word; !spell_iswordp_nmw(p, curwin); MB_PTR_ADV(p)) 2764 if (end == NULL ? *p == NUL : p >= end) 2765 return 0; /* only non-word characters, illegal word */ 2766 #ifdef FEAT_MBYTE 2767 if (has_mbyte) 2768 c = mb_ptr2char_adv(&p); 2769 else 2770 #endif 2771 c = *p++; 2772 firstcap = allcap = SPELL_ISUPPER(c); 2773 2774 /* 2775 * Need to check all letters to find a word with mixed upper/lower. 2776 * But a word with an upper char only at start is a ONECAP. 2777 */ 2778 for ( ; end == NULL ? *p != NUL : p < end; MB_PTR_ADV(p)) 2779 if (spell_iswordp_nmw(p, curwin)) 2780 { 2781 c = PTR2CHAR(p); 2782 if (!SPELL_ISUPPER(c)) 2783 { 2784 /* UUl -> KEEPCAP */ 2785 if (past_second && allcap) 2786 return WF_KEEPCAP; 2787 allcap = FALSE; 2788 } 2789 else if (!allcap) 2790 /* UlU -> KEEPCAP */ 2791 return WF_KEEPCAP; 2792 past_second = TRUE; 2793 } 2794 2795 if (allcap) 2796 return WF_ALLCAP; 2797 if (firstcap) 2798 return WF_ONECAP; 2799 return 0; 2800 } 2801 2802 /* 2803 * Like captype() but for a KEEPCAP word add ONECAP if the word starts with a 2804 * capital. So that make_case_word() can turn WOrd into Word. 2805 * Add ALLCAP for "WOrD". 2806 */ 2807 static int 2808 badword_captype(char_u *word, char_u *end) 2809 { 2810 int flags = captype(word, end); 2811 int c; 2812 int l, u; 2813 int first; 2814 char_u *p; 2815 2816 if (flags & WF_KEEPCAP) 2817 { 2818 /* Count the number of UPPER and lower case letters. */ 2819 l = u = 0; 2820 first = FALSE; 2821 for (p = word; p < end; MB_PTR_ADV(p)) 2822 { 2823 c = PTR2CHAR(p); 2824 if (SPELL_ISUPPER(c)) 2825 { 2826 ++u; 2827 if (p == word) 2828 first = TRUE; 2829 } 2830 else 2831 ++l; 2832 } 2833 2834 /* If there are more UPPER than lower case letters suggest an 2835 * ALLCAP word. Otherwise, if the first letter is UPPER then 2836 * suggest ONECAP. Exception: "ALl" most likely should be "All", 2837 * require three upper case letters. */ 2838 if (u > l && u > 2) 2839 flags |= WF_ALLCAP; 2840 else if (first) 2841 flags |= WF_ONECAP; 2842 2843 if (u >= 2 && l >= 2) /* maCARONI maCAroni */ 2844 flags |= WF_MIXCAP; 2845 } 2846 return flags; 2847 } 2848 2849 /* 2850 * Delete the internal wordlist and its .spl file. 2851 */ 2852 void 2853 spell_delete_wordlist(void) 2854 { 2855 char_u fname[MAXPATHL]; 2856 2857 if (int_wordlist != NULL) 2858 { 2859 mch_remove(int_wordlist); 2860 int_wordlist_spl(fname); 2861 mch_remove(fname); 2862 vim_free(int_wordlist); 2863 int_wordlist = NULL; 2864 } 2865 } 2866 2867 #if defined(FEAT_MBYTE) || defined(EXITFREE) || defined(PROTO) 2868 /* 2869 * Free all languages. 2870 */ 2871 void 2872 spell_free_all(void) 2873 { 2874 slang_T *slang; 2875 buf_T *buf; 2876 2877 /* Go through all buffers and handle 'spelllang'. <VN> */ 2878 FOR_ALL_BUFFERS(buf) 2879 ga_clear(&buf->b_s.b_langp); 2880 2881 while (first_lang != NULL) 2882 { 2883 slang = first_lang; 2884 first_lang = slang->sl_next; 2885 slang_free(slang); 2886 } 2887 2888 spell_delete_wordlist(); 2889 2890 vim_free(repl_to); 2891 repl_to = NULL; 2892 vim_free(repl_from); 2893 repl_from = NULL; 2894 } 2895 #endif 2896 2897 #if defined(FEAT_MBYTE) || defined(PROTO) 2898 /* 2899 * Clear all spelling tables and reload them. 2900 * Used after 'encoding' is set and when ":mkspell" was used. 2901 */ 2902 void 2903 spell_reload(void) 2904 { 2905 win_T *wp; 2906 2907 /* Initialize the table for spell_iswordp(). */ 2908 init_spell_chartab(); 2909 2910 /* Unload all allocated memory. */ 2911 spell_free_all(); 2912 2913 /* Go through all buffers and handle 'spelllang'. */ 2914 FOR_ALL_WINDOWS(wp) 2915 { 2916 /* Only load the wordlists when 'spelllang' is set and there is a 2917 * window for this buffer in which 'spell' is set. */ 2918 if (*wp->w_s->b_p_spl != NUL) 2919 { 2920 if (wp->w_p_spell) 2921 { 2922 (void)did_set_spelllang(wp); 2923 break; 2924 } 2925 } 2926 } 2927 } 2928 #endif 2929 2930 /* 2931 * Opposite of offset2bytes(). 2932 * "pp" points to the bytes and is advanced over it. 2933 * Returns the offset. 2934 */ 2935 static int 2936 bytes2offset(char_u **pp) 2937 { 2938 char_u *p = *pp; 2939 int nr; 2940 int c; 2941 2942 c = *p++; 2943 if ((c & 0x80) == 0x00) /* 1 byte */ 2944 { 2945 nr = c - 1; 2946 } 2947 else if ((c & 0xc0) == 0x80) /* 2 bytes */ 2948 { 2949 nr = (c & 0x3f) - 1; 2950 nr = nr * 255 + (*p++ - 1); 2951 } 2952 else if ((c & 0xe0) == 0xc0) /* 3 bytes */ 2953 { 2954 nr = (c & 0x1f) - 1; 2955 nr = nr * 255 + (*p++ - 1); 2956 nr = nr * 255 + (*p++ - 1); 2957 } 2958 else /* 4 bytes */ 2959 { 2960 nr = (c & 0x0f) - 1; 2961 nr = nr * 255 + (*p++ - 1); 2962 nr = nr * 255 + (*p++ - 1); 2963 nr = nr * 255 + (*p++ - 1); 2964 } 2965 2966 *pp = p; 2967 return nr; 2968 } 2969 2970 2971 /* 2972 * Open a spell buffer. This is a nameless buffer that is not in the buffer 2973 * list and only contains text lines. Can use a swapfile to reduce memory 2974 * use. 2975 * Most other fields are invalid! Esp. watch out for string options being 2976 * NULL and there is no undo info. 2977 * Returns NULL when out of memory. 2978 */ 2979 buf_T * 2980 open_spellbuf(void) 2981 { 2982 buf_T *buf; 2983 2984 buf = (buf_T *)alloc_clear(sizeof(buf_T)); 2985 if (buf != NULL) 2986 { 2987 buf->b_spell = TRUE; 2988 buf->b_p_swf = TRUE; /* may create a swap file */ 2989 #ifdef FEAT_CRYPT 2990 buf->b_p_key = empty_option; 2991 #endif 2992 ml_open(buf); 2993 ml_open_file(buf); /* create swap file now */ 2994 } 2995 return buf; 2996 } 2997 2998 /* 2999 * Close the buffer used for spell info. 3000 */ 3001 void 3002 close_spellbuf(buf_T *buf) 3003 { 3004 if (buf != NULL) 3005 { 3006 ml_close(buf, TRUE); 3007 vim_free(buf); 3008 } 3009 } 3010 3011 /* 3012 * Init the chartab used for spelling for ASCII. 3013 * EBCDIC is not supported! 3014 */ 3015 void 3016 clear_spell_chartab(spelltab_T *sp) 3017 { 3018 int i; 3019 3020 /* Init everything to FALSE. */ 3021 vim_memset(sp->st_isw, FALSE, sizeof(sp->st_isw)); 3022 vim_memset(sp->st_isu, FALSE, sizeof(sp->st_isu)); 3023 for (i = 0; i < 256; ++i) 3024 { 3025 sp->st_fold[i] = i; 3026 sp->st_upper[i] = i; 3027 } 3028 3029 /* We include digits. A word shouldn't start with a digit, but handling 3030 * that is done separately. */ 3031 for (i = '0'; i <= '9'; ++i) 3032 sp->st_isw[i] = TRUE; 3033 for (i = 'A'; i <= 'Z'; ++i) 3034 { 3035 sp->st_isw[i] = TRUE; 3036 sp->st_isu[i] = TRUE; 3037 sp->st_fold[i] = i + 0x20; 3038 } 3039 for (i = 'a'; i <= 'z'; ++i) 3040 { 3041 sp->st_isw[i] = TRUE; 3042 sp->st_upper[i] = i - 0x20; 3043 } 3044 } 3045 3046 /* 3047 * Init the chartab used for spelling. Only depends on 'encoding'. 3048 * Called once while starting up and when 'encoding' changes. 3049 * The default is to use isalpha(), but the spell file should define the word 3050 * characters to make it possible that 'encoding' differs from the current 3051 * locale. For utf-8 we don't use isalpha() but our own functions. 3052 */ 3053 void 3054 init_spell_chartab(void) 3055 { 3056 int i; 3057 3058 did_set_spelltab = FALSE; 3059 clear_spell_chartab(&spelltab); 3060 #ifdef FEAT_MBYTE 3061 if (enc_dbcs) 3062 { 3063 /* DBCS: assume double-wide characters are word characters. */ 3064 for (i = 128; i <= 255; ++i) 3065 if (MB_BYTE2LEN(i) == 2) 3066 spelltab.st_isw[i] = TRUE; 3067 } 3068 else if (enc_utf8) 3069 { 3070 for (i = 128; i < 256; ++i) 3071 { 3072 int f = utf_fold(i); 3073 int u = utf_toupper(i); 3074 3075 spelltab.st_isu[i] = utf_isupper(i); 3076 spelltab.st_isw[i] = spelltab.st_isu[i] || utf_islower(i); 3077 /* The folded/upper-cased value is different between latin1 and 3078 * utf8 for 0xb5, causing E763 for no good reason. Use the latin1 3079 * value for utf-8 to avoid this. */ 3080 spelltab.st_fold[i] = (f < 256) ? f : i; 3081 spelltab.st_upper[i] = (u < 256) ? u : i; 3082 } 3083 } 3084 else 3085 #endif 3086 { 3087 /* Rough guess: use locale-dependent library functions. */ 3088 for (i = 128; i < 256; ++i) 3089 { 3090 if (MB_ISUPPER(i)) 3091 { 3092 spelltab.st_isw[i] = TRUE; 3093 spelltab.st_isu[i] = TRUE; 3094 spelltab.st_fold[i] = MB_TOLOWER(i); 3095 } 3096 else if (MB_ISLOWER(i)) 3097 { 3098 spelltab.st_isw[i] = TRUE; 3099 spelltab.st_upper[i] = MB_TOUPPER(i); 3100 } 3101 } 3102 } 3103 } 3104 3105 3106 /* 3107 * Return TRUE if "p" points to a word character. 3108 * As a special case we see "midword" characters as word character when it is 3109 * followed by a word character. This finds they'there but not 'they there'. 3110 * Thus this only works properly when past the first character of the word. 3111 */ 3112 static int 3113 spell_iswordp( 3114 char_u *p, 3115 win_T *wp) /* buffer used */ 3116 { 3117 #ifdef FEAT_MBYTE 3118 char_u *s; 3119 int l; 3120 int c; 3121 3122 if (has_mbyte) 3123 { 3124 l = MB_PTR2LEN(p); 3125 s = p; 3126 if (l == 1) 3127 { 3128 /* be quick for ASCII */ 3129 if (wp->w_s->b_spell_ismw[*p]) 3130 s = p + 1; /* skip a mid-word character */ 3131 } 3132 else 3133 { 3134 c = mb_ptr2char(p); 3135 if (c < 256 ? wp->w_s->b_spell_ismw[c] 3136 : (wp->w_s->b_spell_ismw_mb != NULL 3137 && vim_strchr(wp->w_s->b_spell_ismw_mb, c) != NULL)) 3138 s = p + l; 3139 } 3140 3141 c = mb_ptr2char(s); 3142 if (c > 255) 3143 return spell_mb_isword_class(mb_get_class(s), wp); 3144 return spelltab.st_isw[c]; 3145 } 3146 #endif 3147 3148 return spelltab.st_isw[wp->w_s->b_spell_ismw[*p] ? p[1] : p[0]]; 3149 } 3150 3151 /* 3152 * Return TRUE if "p" points to a word character. 3153 * Unlike spell_iswordp() this doesn't check for "midword" characters. 3154 */ 3155 int 3156 spell_iswordp_nmw(char_u *p, win_T *wp) 3157 { 3158 #ifdef FEAT_MBYTE 3159 int c; 3160 3161 if (has_mbyte) 3162 { 3163 c = mb_ptr2char(p); 3164 if (c > 255) 3165 return spell_mb_isword_class(mb_get_class(p), wp); 3166 return spelltab.st_isw[c]; 3167 } 3168 #endif 3169 return spelltab.st_isw[*p]; 3170 } 3171 3172 #ifdef FEAT_MBYTE 3173 /* 3174 * Return TRUE if word class indicates a word character. 3175 * Only for characters above 255. 3176 * Unicode subscript and superscript are not considered word characters. 3177 * See also dbcs_class() and utf_class() in mbyte.c. 3178 */ 3179 static int 3180 spell_mb_isword_class(int cl, win_T *wp) 3181 { 3182 if (wp->w_s->b_cjk) 3183 /* East Asian characters are not considered word characters. */ 3184 return cl == 2 || cl == 0x2800; 3185 return cl >= 2 && cl != 0x2070 && cl != 0x2080; 3186 } 3187 3188 /* 3189 * Return TRUE if "p" points to a word character. 3190 * Wide version of spell_iswordp(). 3191 */ 3192 static int 3193 spell_iswordp_w(int *p, win_T *wp) 3194 { 3195 int *s; 3196 3197 if (*p < 256 ? wp->w_s->b_spell_ismw[*p] 3198 : (wp->w_s->b_spell_ismw_mb != NULL 3199 && vim_strchr(wp->w_s->b_spell_ismw_mb, *p) != NULL)) 3200 s = p + 1; 3201 else 3202 s = p; 3203 3204 if (*s > 255) 3205 { 3206 if (enc_utf8) 3207 return spell_mb_isword_class(utf_class(*s), wp); 3208 if (enc_dbcs) 3209 return spell_mb_isword_class( 3210 dbcs_class((unsigned)*s >> 8, *s & 0xff), wp); 3211 return 0; 3212 } 3213 return spelltab.st_isw[*s]; 3214 } 3215 #endif 3216 3217 /* 3218 * Case-fold "str[len]" into "buf[buflen]". The result is NUL terminated. 3219 * Uses the character definitions from the .spl file. 3220 * When using a multi-byte 'encoding' the length may change! 3221 * Returns FAIL when something wrong. 3222 */ 3223 int 3224 spell_casefold( 3225 char_u *str, 3226 int len, 3227 char_u *buf, 3228 int buflen) 3229 { 3230 int i; 3231 3232 if (len >= buflen) 3233 { 3234 buf[0] = NUL; 3235 return FAIL; /* result will not fit */ 3236 } 3237 3238 #ifdef FEAT_MBYTE 3239 if (has_mbyte) 3240 { 3241 int outi = 0; 3242 char_u *p; 3243 int c; 3244 3245 /* Fold one character at a time. */ 3246 for (p = str; p < str + len; ) 3247 { 3248 if (outi + MB_MAXBYTES > buflen) 3249 { 3250 buf[outi] = NUL; 3251 return FAIL; 3252 } 3253 c = mb_cptr2char_adv(&p); 3254 outi += mb_char2bytes(SPELL_TOFOLD(c), buf + outi); 3255 } 3256 buf[outi] = NUL; 3257 } 3258 else 3259 #endif 3260 { 3261 /* Be quick for non-multibyte encodings. */ 3262 for (i = 0; i < len; ++i) 3263 buf[i] = spelltab.st_fold[str[i]]; 3264 buf[i] = NUL; 3265 } 3266 3267 return OK; 3268 } 3269 3270 /* values for sps_flags */ 3271 #define SPS_BEST 1 3272 #define SPS_FAST 2 3273 #define SPS_DOUBLE 4 3274 3275 static int sps_flags = SPS_BEST; /* flags from 'spellsuggest' */ 3276 static int sps_limit = 9999; /* max nr of suggestions given */ 3277 3278 /* 3279 * Check the 'spellsuggest' option. Return FAIL if it's wrong. 3280 * Sets "sps_flags" and "sps_limit". 3281 */ 3282 int 3283 spell_check_sps(void) 3284 { 3285 char_u *p; 3286 char_u *s; 3287 char_u buf[MAXPATHL]; 3288 int f; 3289 3290 sps_flags = 0; 3291 sps_limit = 9999; 3292 3293 for (p = p_sps; *p != NUL; ) 3294 { 3295 copy_option_part(&p, buf, MAXPATHL, ","); 3296 3297 f = 0; 3298 if (VIM_ISDIGIT(*buf)) 3299 { 3300 s = buf; 3301 sps_limit = getdigits(&s); 3302 if (*s != NUL && !VIM_ISDIGIT(*s)) 3303 f = -1; 3304 } 3305 else if (STRCMP(buf, "best") == 0) 3306 f = SPS_BEST; 3307 else if (STRCMP(buf, "fast") == 0) 3308 f = SPS_FAST; 3309 else if (STRCMP(buf, "double") == 0) 3310 f = SPS_DOUBLE; 3311 else if (STRNCMP(buf, "expr:", 5) != 0 3312 && STRNCMP(buf, "file:", 5) != 0) 3313 f = -1; 3314 3315 if (f == -1 || (sps_flags != 0 && f != 0)) 3316 { 3317 sps_flags = SPS_BEST; 3318 sps_limit = 9999; 3319 return FAIL; 3320 } 3321 if (f != 0) 3322 sps_flags = f; 3323 } 3324 3325 if (sps_flags == 0) 3326 sps_flags = SPS_BEST; 3327 3328 return OK; 3329 } 3330 3331 /* 3332 * "z=": Find badly spelled word under or after the cursor. 3333 * Give suggestions for the properly spelled word. 3334 * In Visual mode use the highlighted word as the bad word. 3335 * When "count" is non-zero use that suggestion. 3336 */ 3337 void 3338 spell_suggest(int count) 3339 { 3340 char_u *line; 3341 pos_T prev_cursor = curwin->w_cursor; 3342 char_u wcopy[MAXWLEN + 2]; 3343 char_u *p; 3344 int i; 3345 int c; 3346 suginfo_T sug; 3347 suggest_T *stp; 3348 int mouse_used; 3349 int need_cap; 3350 int limit; 3351 int selected = count; 3352 int badlen = 0; 3353 int msg_scroll_save = msg_scroll; 3354 3355 if (no_spell_checking(curwin)) 3356 return; 3357 3358 if (VIsual_active) 3359 { 3360 /* Use the Visually selected text as the bad word. But reject 3361 * a multi-line selection. */ 3362 if (curwin->w_cursor.lnum != VIsual.lnum) 3363 { 3364 vim_beep(BO_SPELL); 3365 return; 3366 } 3367 badlen = (int)curwin->w_cursor.col - (int)VIsual.col; 3368 if (badlen < 0) 3369 badlen = -badlen; 3370 else 3371 curwin->w_cursor.col = VIsual.col; 3372 ++badlen; 3373 end_visual_mode(); 3374 } 3375 /* Find the start of the badly spelled word. */ 3376 else if (spell_move_to(curwin, FORWARD, TRUE, TRUE, NULL) == 0 3377 || curwin->w_cursor.col > prev_cursor.col) 3378 { 3379 /* No bad word or it starts after the cursor: use the word under the 3380 * cursor. */ 3381 curwin->w_cursor = prev_cursor; 3382 line = ml_get_curline(); 3383 p = line + curwin->w_cursor.col; 3384 /* Backup to before start of word. */ 3385 while (p > line && spell_iswordp_nmw(p, curwin)) 3386 MB_PTR_BACK(line, p); 3387 /* Forward to start of word. */ 3388 while (*p != NUL && !spell_iswordp_nmw(p, curwin)) 3389 MB_PTR_ADV(p); 3390 3391 if (!spell_iswordp_nmw(p, curwin)) /* No word found. */ 3392 { 3393 beep_flush(); 3394 return; 3395 } 3396 curwin->w_cursor.col = (colnr_T)(p - line); 3397 } 3398 3399 /* Get the word and its length. */ 3400 3401 /* Figure out if the word should be capitalised. */ 3402 need_cap = check_need_cap(curwin->w_cursor.lnum, curwin->w_cursor.col); 3403 3404 /* Make a copy of current line since autocommands may free the line. */ 3405 line = vim_strsave(ml_get_curline()); 3406 if (line == NULL) 3407 goto skip; 3408 3409 /* Get the list of suggestions. Limit to 'lines' - 2 or the number in 3410 * 'spellsuggest', whatever is smaller. */ 3411 if (sps_limit > (int)Rows - 2) 3412 limit = (int)Rows - 2; 3413 else 3414 limit = sps_limit; 3415 spell_find_suggest(line + curwin->w_cursor.col, badlen, &sug, limit, 3416 TRUE, need_cap, TRUE); 3417 3418 if (sug.su_ga.ga_len == 0) 3419 MSG(_("Sorry, no suggestions")); 3420 else if (count > 0) 3421 { 3422 if (count > sug.su_ga.ga_len) 3423 smsg((char_u *)_("Sorry, only %ld suggestions"), 3424 (long)sug.su_ga.ga_len); 3425 } 3426 else 3427 { 3428 vim_free(repl_from); 3429 repl_from = NULL; 3430 vim_free(repl_to); 3431 repl_to = NULL; 3432 3433 #ifdef FEAT_RIGHTLEFT 3434 /* When 'rightleft' is set the list is drawn right-left. */ 3435 cmdmsg_rl = curwin->w_p_rl; 3436 if (cmdmsg_rl) 3437 msg_col = Columns - 1; 3438 #endif 3439 3440 /* List the suggestions. */ 3441 msg_start(); 3442 msg_row = Rows - 1; /* for when 'cmdheight' > 1 */ 3443 lines_left = Rows; /* avoid more prompt */ 3444 vim_snprintf((char *)IObuff, IOSIZE, _("Change \"%.*s\" to:"), 3445 sug.su_badlen, sug.su_badptr); 3446 #ifdef FEAT_RIGHTLEFT 3447 if (cmdmsg_rl && STRNCMP(IObuff, "Change", 6) == 0) 3448 { 3449 /* And now the rabbit from the high hat: Avoid showing the 3450 * untranslated message rightleft. */ 3451 vim_snprintf((char *)IObuff, IOSIZE, ":ot \"%.*s\" egnahC", 3452 sug.su_badlen, sug.su_badptr); 3453 } 3454 #endif 3455 msg_puts(IObuff); 3456 msg_clr_eos(); 3457 msg_putchar('\n'); 3458 3459 msg_scroll = TRUE; 3460 for (i = 0; i < sug.su_ga.ga_len; ++i) 3461 { 3462 stp = &SUG(sug.su_ga, i); 3463 3464 /* The suggested word may replace only part of the bad word, add 3465 * the not replaced part. */ 3466 vim_strncpy(wcopy, stp->st_word, MAXWLEN); 3467 if (sug.su_badlen > stp->st_orglen) 3468 vim_strncpy(wcopy + stp->st_wordlen, 3469 sug.su_badptr + stp->st_orglen, 3470 sug.su_badlen - stp->st_orglen); 3471 vim_snprintf((char *)IObuff, IOSIZE, "%2d", i + 1); 3472 #ifdef FEAT_RIGHTLEFT 3473 if (cmdmsg_rl) 3474 rl_mirror(IObuff); 3475 #endif 3476 msg_puts(IObuff); 3477 3478 vim_snprintf((char *)IObuff, IOSIZE, " \"%s\"", wcopy); 3479 msg_puts(IObuff); 3480 3481 /* The word may replace more than "su_badlen". */ 3482 if (sug.su_badlen < stp->st_orglen) 3483 { 3484 vim_snprintf((char *)IObuff, IOSIZE, _(" < \"%.*s\""), 3485 stp->st_orglen, sug.su_badptr); 3486 msg_puts(IObuff); 3487 } 3488 3489 if (p_verbose > 0) 3490 { 3491 /* Add the score. */ 3492 if (sps_flags & (SPS_DOUBLE | SPS_BEST)) 3493 vim_snprintf((char *)IObuff, IOSIZE, " (%s%d - %d)", 3494 stp->st_salscore ? "s " : "", 3495 stp->st_score, stp->st_altscore); 3496 else 3497 vim_snprintf((char *)IObuff, IOSIZE, " (%d)", 3498 stp->st_score); 3499 #ifdef FEAT_RIGHTLEFT 3500 if (cmdmsg_rl) 3501 /* Mirror the numbers, but keep the leading space. */ 3502 rl_mirror(IObuff + 1); 3503 #endif 3504 msg_advance(30); 3505 msg_puts(IObuff); 3506 } 3507 msg_putchar('\n'); 3508 } 3509 3510 #ifdef FEAT_RIGHTLEFT 3511 cmdmsg_rl = FALSE; 3512 msg_col = 0; 3513 #endif 3514 /* Ask for choice. */ 3515 selected = prompt_for_number(&mouse_used); 3516 if (mouse_used) 3517 selected -= lines_left; 3518 lines_left = Rows; /* avoid more prompt */ 3519 /* don't delay for 'smd' in normal_cmd() */ 3520 msg_scroll = msg_scroll_save; 3521 } 3522 3523 if (selected > 0 && selected <= sug.su_ga.ga_len && u_save_cursor() == OK) 3524 { 3525 /* Save the from and to text for :spellrepall. */ 3526 stp = &SUG(sug.su_ga, selected - 1); 3527 if (sug.su_badlen > stp->st_orglen) 3528 { 3529 /* Replacing less than "su_badlen", append the remainder to 3530 * repl_to. */ 3531 repl_from = vim_strnsave(sug.su_badptr, sug.su_badlen); 3532 vim_snprintf((char *)IObuff, IOSIZE, "%s%.*s", stp->st_word, 3533 sug.su_badlen - stp->st_orglen, 3534 sug.su_badptr + stp->st_orglen); 3535 repl_to = vim_strsave(IObuff); 3536 } 3537 else 3538 { 3539 /* Replacing su_badlen or more, use the whole word. */ 3540 repl_from = vim_strnsave(sug.su_badptr, stp->st_orglen); 3541 repl_to = vim_strsave(stp->st_word); 3542 } 3543 3544 /* Replace the word. */ 3545 p = alloc((unsigned)STRLEN(line) - stp->st_orglen 3546 + stp->st_wordlen + 1); 3547 if (p != NULL) 3548 { 3549 c = (int)(sug.su_badptr - line); 3550 mch_memmove(p, line, c); 3551 STRCPY(p + c, stp->st_word); 3552 STRCAT(p, sug.su_badptr + stp->st_orglen); 3553 ml_replace(curwin->w_cursor.lnum, p, FALSE); 3554 curwin->w_cursor.col = c; 3555 3556 /* For redo we use a change-word command. */ 3557 ResetRedobuff(); 3558 AppendToRedobuff((char_u *)"ciw"); 3559 AppendToRedobuffLit(p + c, 3560 stp->st_wordlen + sug.su_badlen - stp->st_orglen); 3561 AppendCharToRedobuff(ESC); 3562 3563 /* After this "p" may be invalid. */ 3564 changed_bytes(curwin->w_cursor.lnum, c); 3565 } 3566 } 3567 else 3568 curwin->w_cursor = prev_cursor; 3569 3570 spell_find_cleanup(&sug); 3571 skip: 3572 vim_free(line); 3573 } 3574 3575 /* 3576 * Check if the word at line "lnum" column "col" is required to start with a 3577 * capital. This uses 'spellcapcheck' of the current buffer. 3578 */ 3579 static int 3580 check_need_cap(linenr_T lnum, colnr_T col) 3581 { 3582 int need_cap = FALSE; 3583 char_u *line; 3584 char_u *line_copy = NULL; 3585 char_u *p; 3586 colnr_T endcol; 3587 regmatch_T regmatch; 3588 3589 if (curwin->w_s->b_cap_prog == NULL) 3590 return FALSE; 3591 3592 line = ml_get_curline(); 3593 endcol = 0; 3594 if (getwhitecols(line) >= (int)col) 3595 { 3596 /* At start of line, check if previous line is empty or sentence 3597 * ends there. */ 3598 if (lnum == 1) 3599 need_cap = TRUE; 3600 else 3601 { 3602 line = ml_get(lnum - 1); 3603 if (*skipwhite(line) == NUL) 3604 need_cap = TRUE; 3605 else 3606 { 3607 /* Append a space in place of the line break. */ 3608 line_copy = concat_str(line, (char_u *)" "); 3609 line = line_copy; 3610 endcol = (colnr_T)STRLEN(line); 3611 } 3612 } 3613 } 3614 else 3615 endcol = col; 3616 3617 if (endcol > 0) 3618 { 3619 /* Check if sentence ends before the bad word. */ 3620 regmatch.regprog = curwin->w_s->b_cap_prog; 3621 regmatch.rm_ic = FALSE; 3622 p = line + endcol; 3623 for (;;) 3624 { 3625 MB_PTR_BACK(line, p); 3626 if (p == line || spell_iswordp_nmw(p, curwin)) 3627 break; 3628 if (vim_regexec(®match, p, 0) 3629 && regmatch.endp[0] == line + endcol) 3630 { 3631 need_cap = TRUE; 3632 break; 3633 } 3634 } 3635 curwin->w_s->b_cap_prog = regmatch.regprog; 3636 } 3637 3638 vim_free(line_copy); 3639 3640 return need_cap; 3641 } 3642 3643 3644 /* 3645 * ":spellrepall" 3646 */ 3647 void 3648 ex_spellrepall(exarg_T *eap UNUSED) 3649 { 3650 pos_T pos = curwin->w_cursor; 3651 char_u *frompat; 3652 int addlen; 3653 char_u *line; 3654 char_u *p; 3655 int save_ws = p_ws; 3656 linenr_T prev_lnum = 0; 3657 3658 if (repl_from == NULL || repl_to == NULL) 3659 { 3660 EMSG(_("E752: No previous spell replacement")); 3661 return; 3662 } 3663 addlen = (int)(STRLEN(repl_to) - STRLEN(repl_from)); 3664 3665 frompat = alloc((unsigned)STRLEN(repl_from) + 7); 3666 if (frompat == NULL) 3667 return; 3668 sprintf((char *)frompat, "\\V\\<%s\\>", repl_from); 3669 p_ws = FALSE; 3670 3671 sub_nsubs = 0; 3672 sub_nlines = 0; 3673 curwin->w_cursor.lnum = 0; 3674 while (!got_int) 3675 { 3676 if (do_search(NULL, '/', frompat, 1L, SEARCH_KEEP, NULL, NULL) == 0 3677 || u_save_cursor() == FAIL) 3678 break; 3679 3680 /* Only replace when the right word isn't there yet. This happens 3681 * when changing "etc" to "etc.". */ 3682 line = ml_get_curline(); 3683 if (addlen <= 0 || STRNCMP(line + curwin->w_cursor.col, 3684 repl_to, STRLEN(repl_to)) != 0) 3685 { 3686 p = alloc((unsigned)STRLEN(line) + addlen + 1); 3687 if (p == NULL) 3688 break; 3689 mch_memmove(p, line, curwin->w_cursor.col); 3690 STRCPY(p + curwin->w_cursor.col, repl_to); 3691 STRCAT(p, line + curwin->w_cursor.col + STRLEN(repl_from)); 3692 ml_replace(curwin->w_cursor.lnum, p, FALSE); 3693 changed_bytes(curwin->w_cursor.lnum, curwin->w_cursor.col); 3694 3695 if (curwin->w_cursor.lnum != prev_lnum) 3696 { 3697 ++sub_nlines; 3698 prev_lnum = curwin->w_cursor.lnum; 3699 } 3700 ++sub_nsubs; 3701 } 3702 curwin->w_cursor.col += (colnr_T)STRLEN(repl_to); 3703 } 3704 3705 p_ws = save_ws; 3706 curwin->w_cursor = pos; 3707 vim_free(frompat); 3708 3709 if (sub_nsubs == 0) 3710 EMSG2(_("E753: Not found: %s"), repl_from); 3711 else 3712 do_sub_msg(FALSE); 3713 } 3714 3715 /* 3716 * Find spell suggestions for "word". Return them in the growarray "*gap" as 3717 * a list of allocated strings. 3718 */ 3719 void 3720 spell_suggest_list( 3721 garray_T *gap, 3722 char_u *word, 3723 int maxcount, /* maximum nr of suggestions */ 3724 int need_cap, /* 'spellcapcheck' matched */ 3725 int interactive) 3726 { 3727 suginfo_T sug; 3728 int i; 3729 suggest_T *stp; 3730 char_u *wcopy; 3731 3732 spell_find_suggest(word, 0, &sug, maxcount, FALSE, need_cap, interactive); 3733 3734 /* Make room in "gap". */ 3735 ga_init2(gap, sizeof(char_u *), sug.su_ga.ga_len + 1); 3736 if (ga_grow(gap, sug.su_ga.ga_len) == OK) 3737 { 3738 for (i = 0; i < sug.su_ga.ga_len; ++i) 3739 { 3740 stp = &SUG(sug.su_ga, i); 3741 3742 /* The suggested word may replace only part of "word", add the not 3743 * replaced part. */ 3744 wcopy = alloc(stp->st_wordlen 3745 + (unsigned)STRLEN(sug.su_badptr + stp->st_orglen) + 1); 3746 if (wcopy == NULL) 3747 break; 3748 STRCPY(wcopy, stp->st_word); 3749 STRCPY(wcopy + stp->st_wordlen, sug.su_badptr + stp->st_orglen); 3750 ((char_u **)gap->ga_data)[gap->ga_len++] = wcopy; 3751 } 3752 } 3753 3754 spell_find_cleanup(&sug); 3755 } 3756 3757 /* 3758 * Find spell suggestions for the word at the start of "badptr". 3759 * Return the suggestions in "su->su_ga". 3760 * The maximum number of suggestions is "maxcount". 3761 * Note: does use info for the current window. 3762 * This is based on the mechanisms of Aspell, but completely reimplemented. 3763 */ 3764 static void 3765 spell_find_suggest( 3766 char_u *badptr, 3767 int badlen, /* length of bad word or 0 if unknown */ 3768 suginfo_T *su, 3769 int maxcount, 3770 int banbadword, /* don't include badword in suggestions */ 3771 int need_cap, /* word should start with capital */ 3772 int interactive) 3773 { 3774 hlf_T attr = HLF_COUNT; 3775 char_u buf[MAXPATHL]; 3776 char_u *p; 3777 int do_combine = FALSE; 3778 char_u *sps_copy; 3779 #ifdef FEAT_EVAL 3780 static int expr_busy = FALSE; 3781 #endif 3782 int c; 3783 int i; 3784 langp_T *lp; 3785 3786 /* 3787 * Set the info in "*su". 3788 */ 3789 vim_memset(su, 0, sizeof(suginfo_T)); 3790 ga_init2(&su->su_ga, (int)sizeof(suggest_T), 10); 3791 ga_init2(&su->su_sga, (int)sizeof(suggest_T), 10); 3792 if (*badptr == NUL) 3793 return; 3794 hash_init(&su->su_banned); 3795 3796 su->su_badptr = badptr; 3797 if (badlen != 0) 3798 su->su_badlen = badlen; 3799 else 3800 su->su_badlen = spell_check(curwin, su->su_badptr, &attr, NULL, FALSE); 3801 su->su_maxcount = maxcount; 3802 su->su_maxscore = SCORE_MAXINIT; 3803 3804 if (su->su_badlen >= MAXWLEN) 3805 su->su_badlen = MAXWLEN - 1; /* just in case */ 3806 vim_strncpy(su->su_badword, su->su_badptr, su->su_badlen); 3807 (void)spell_casefold(su->su_badptr, su->su_badlen, 3808 su->su_fbadword, MAXWLEN); 3809 /* TODO: make this work if the case-folded text is longer than the original 3810 * text. Currently an illegal byte causes wrong pointer computations. */ 3811 su->su_fbadword[su->su_badlen] = NUL; 3812 3813 /* get caps flags for bad word */ 3814 su->su_badflags = badword_captype(su->su_badptr, 3815 su->su_badptr + su->su_badlen); 3816 if (need_cap) 3817 su->su_badflags |= WF_ONECAP; 3818 3819 /* Find the default language for sound folding. We simply use the first 3820 * one in 'spelllang' that supports sound folding. That's good for when 3821 * using multiple files for one language, it's not that bad when mixing 3822 * languages (e.g., "pl,en"). */ 3823 for (i = 0; i < curbuf->b_s.b_langp.ga_len; ++i) 3824 { 3825 lp = LANGP_ENTRY(curbuf->b_s.b_langp, i); 3826 if (lp->lp_sallang != NULL) 3827 { 3828 su->su_sallang = lp->lp_sallang; 3829 break; 3830 } 3831 } 3832 3833 /* Soundfold the bad word with the default sound folding, so that we don't 3834 * have to do this many times. */ 3835 if (su->su_sallang != NULL) 3836 spell_soundfold(su->su_sallang, su->su_fbadword, TRUE, 3837 su->su_sal_badword); 3838 3839 /* If the word is not capitalised and spell_check() doesn't consider the 3840 * word to be bad then it might need to be capitalised. Add a suggestion 3841 * for that. */ 3842 c = PTR2CHAR(su->su_badptr); 3843 if (!SPELL_ISUPPER(c) && attr == HLF_COUNT) 3844 { 3845 make_case_word(su->su_badword, buf, WF_ONECAP); 3846 add_suggestion(su, &su->su_ga, buf, su->su_badlen, SCORE_ICASE, 3847 0, TRUE, su->su_sallang, FALSE); 3848 } 3849 3850 /* Ban the bad word itself. It may appear in another region. */ 3851 if (banbadword) 3852 add_banned(su, su->su_badword); 3853 3854 /* Make a copy of 'spellsuggest', because the expression may change it. */ 3855 sps_copy = vim_strsave(p_sps); 3856 if (sps_copy == NULL) 3857 return; 3858 3859 /* Loop over the items in 'spellsuggest'. */ 3860 for (p = sps_copy; *p != NUL; ) 3861 { 3862 copy_option_part(&p, buf, MAXPATHL, ","); 3863 3864 if (STRNCMP(buf, "expr:", 5) == 0) 3865 { 3866 #ifdef FEAT_EVAL 3867 /* Evaluate an expression. Skip this when called recursively, 3868 * when using spellsuggest() in the expression. */ 3869 if (!expr_busy) 3870 { 3871 expr_busy = TRUE; 3872 spell_suggest_expr(su, buf + 5); 3873 expr_busy = FALSE; 3874 } 3875 #endif 3876 } 3877 else if (STRNCMP(buf, "file:", 5) == 0) 3878 /* Use list of suggestions in a file. */ 3879 spell_suggest_file(su, buf + 5); 3880 else 3881 { 3882 /* Use internal method. */ 3883 spell_suggest_intern(su, interactive); 3884 if (sps_flags & SPS_DOUBLE) 3885 do_combine = TRUE; 3886 } 3887 } 3888 3889 vim_free(sps_copy); 3890 3891 if (do_combine) 3892 /* Combine the two list of suggestions. This must be done last, 3893 * because sorting changes the order again. */ 3894 score_combine(su); 3895 } 3896 3897 #ifdef FEAT_EVAL 3898 /* 3899 * Find suggestions by evaluating expression "expr". 3900 */ 3901 static void 3902 spell_suggest_expr(suginfo_T *su, char_u *expr) 3903 { 3904 list_T *list; 3905 listitem_T *li; 3906 int score; 3907 char_u *p; 3908 3909 /* The work is split up in a few parts to avoid having to export 3910 * suginfo_T. 3911 * First evaluate the expression and get the resulting list. */ 3912 list = eval_spell_expr(su->su_badword, expr); 3913 if (list != NULL) 3914 { 3915 /* Loop over the items in the list. */ 3916 for (li = list->lv_first; li != NULL; li = li->li_next) 3917 if (li->li_tv.v_type == VAR_LIST) 3918 { 3919 /* Get the word and the score from the items. */ 3920 score = get_spellword(li->li_tv.vval.v_list, &p); 3921 if (score >= 0 && score <= su->su_maxscore) 3922 add_suggestion(su, &su->su_ga, p, su->su_badlen, 3923 score, 0, TRUE, su->su_sallang, FALSE); 3924 } 3925 list_unref(list); 3926 } 3927 3928 /* Remove bogus suggestions, sort and truncate at "maxcount". */ 3929 check_suggestions(su, &su->su_ga); 3930 (void)cleanup_suggestions(&su->su_ga, su->su_maxscore, su->su_maxcount); 3931 } 3932 #endif 3933 3934 /* 3935 * Find suggestions in file "fname". Used for "file:" in 'spellsuggest'. 3936 */ 3937 static void 3938 spell_suggest_file(suginfo_T *su, char_u *fname) 3939 { 3940 FILE *fd; 3941 char_u line[MAXWLEN * 2]; 3942 char_u *p; 3943 int len; 3944 char_u cword[MAXWLEN]; 3945 3946 /* Open the file. */ 3947 fd = mch_fopen((char *)fname, "r"); 3948 if (fd == NULL) 3949 { 3950 EMSG2(_(e_notopen), fname); 3951 return; 3952 } 3953 3954 /* Read it line by line. */ 3955 while (!vim_fgets(line, MAXWLEN * 2, fd) && !got_int) 3956 { 3957 line_breakcheck(); 3958 3959 p = vim_strchr(line, '/'); 3960 if (p == NULL) 3961 continue; /* No Tab found, just skip the line. */ 3962 *p++ = NUL; 3963 if (STRICMP(su->su_badword, line) == 0) 3964 { 3965 /* Match! Isolate the good word, until CR or NL. */ 3966 for (len = 0; p[len] >= ' '; ++len) 3967 ; 3968 p[len] = NUL; 3969 3970 /* If the suggestion doesn't have specific case duplicate the case 3971 * of the bad word. */ 3972 if (captype(p, NULL) == 0) 3973 { 3974 make_case_word(p, cword, su->su_badflags); 3975 p = cword; 3976 } 3977 3978 add_suggestion(su, &su->su_ga, p, su->su_badlen, 3979 SCORE_FILE, 0, TRUE, su->su_sallang, FALSE); 3980 } 3981 } 3982 3983 fclose(fd); 3984 3985 /* Remove bogus suggestions, sort and truncate at "maxcount". */ 3986 check_suggestions(su, &su->su_ga); 3987 (void)cleanup_suggestions(&su->su_ga, su->su_maxscore, su->su_maxcount); 3988 } 3989 3990 /* 3991 * Find suggestions for the internal method indicated by "sps_flags". 3992 */ 3993 static void 3994 spell_suggest_intern(suginfo_T *su, int interactive) 3995 { 3996 /* 3997 * Load the .sug file(s) that are available and not done yet. 3998 */ 3999 suggest_load_files(); 4000 4001 /* 4002 * 1. Try special cases, such as repeating a word: "the the" -> "the". 4003 * 4004 * Set a maximum score to limit the combination of operations that is 4005 * tried. 4006 */ 4007 suggest_try_special(su); 4008 4009 /* 4010 * 2. Try inserting/deleting/swapping/changing a letter, use REP entries 4011 * from the .aff file and inserting a space (split the word). 4012 */ 4013 suggest_try_change(su); 4014 4015 /* For the resulting top-scorers compute the sound-a-like score. */ 4016 if (sps_flags & SPS_DOUBLE) 4017 score_comp_sal(su); 4018 4019 /* 4020 * 3. Try finding sound-a-like words. 4021 */ 4022 if ((sps_flags & SPS_FAST) == 0) 4023 { 4024 if (sps_flags & SPS_BEST) 4025 /* Adjust the word score for the suggestions found so far for how 4026 * they sounds like. */ 4027 rescore_suggestions(su); 4028 4029 /* 4030 * While going through the soundfold tree "su_maxscore" is the score 4031 * for the soundfold word, limits the changes that are being tried, 4032 * and "su_sfmaxscore" the rescored score, which is set by 4033 * cleanup_suggestions(). 4034 * First find words with a small edit distance, because this is much 4035 * faster and often already finds the top-N suggestions. If we didn't 4036 * find many suggestions try again with a higher edit distance. 4037 * "sl_sounddone" is used to avoid doing the same word twice. 4038 */ 4039 suggest_try_soundalike_prep(); 4040 su->su_maxscore = SCORE_SFMAX1; 4041 su->su_sfmaxscore = SCORE_MAXINIT * 3; 4042 suggest_try_soundalike(su); 4043 if (su->su_ga.ga_len < SUG_CLEAN_COUNT(su)) 4044 { 4045 /* We didn't find enough matches, try again, allowing more 4046 * changes to the soundfold word. */ 4047 su->su_maxscore = SCORE_SFMAX2; 4048 suggest_try_soundalike(su); 4049 if (su->su_ga.ga_len < SUG_CLEAN_COUNT(su)) 4050 { 4051 /* Still didn't find enough matches, try again, allowing even 4052 * more changes to the soundfold word. */ 4053 su->su_maxscore = SCORE_SFMAX3; 4054 suggest_try_soundalike(su); 4055 } 4056 } 4057 su->su_maxscore = su->su_sfmaxscore; 4058 suggest_try_soundalike_finish(); 4059 } 4060 4061 /* When CTRL-C was hit while searching do show the results. Only clear 4062 * got_int when using a command, not for spellsuggest(). */ 4063 ui_breakcheck(); 4064 if (interactive && got_int) 4065 { 4066 (void)vgetc(); 4067 got_int = FALSE; 4068 } 4069 4070 if ((sps_flags & SPS_DOUBLE) == 0 && su->su_ga.ga_len != 0) 4071 { 4072 if (sps_flags & SPS_BEST) 4073 /* Adjust the word score for how it sounds like. */ 4074 rescore_suggestions(su); 4075 4076 /* Remove bogus suggestions, sort and truncate at "maxcount". */ 4077 check_suggestions(su, &su->su_ga); 4078 (void)cleanup_suggestions(&su->su_ga, su->su_maxscore, su->su_maxcount); 4079 } 4080 } 4081 4082 /* 4083 * Free the info put in "*su" by spell_find_suggest(). 4084 */ 4085 static void 4086 spell_find_cleanup(suginfo_T *su) 4087 { 4088 int i; 4089 4090 /* Free the suggestions. */ 4091 for (i = 0; i < su->su_ga.ga_len; ++i) 4092 vim_free(SUG(su->su_ga, i).st_word); 4093 ga_clear(&su->su_ga); 4094 for (i = 0; i < su->su_sga.ga_len; ++i) 4095 vim_free(SUG(su->su_sga, i).st_word); 4096 ga_clear(&su->su_sga); 4097 4098 /* Free the banned words. */ 4099 hash_clear_all(&su->su_banned, 0); 4100 } 4101 4102 /* 4103 * Make a copy of "word", with the first letter upper or lower cased, to 4104 * "wcopy[MAXWLEN]". "word" must not be empty. 4105 * The result is NUL terminated. 4106 */ 4107 void 4108 onecap_copy( 4109 char_u *word, 4110 char_u *wcopy, 4111 int upper) /* TRUE: first letter made upper case */ 4112 { 4113 char_u *p; 4114 int c; 4115 int l; 4116 4117 p = word; 4118 #ifdef FEAT_MBYTE 4119 if (has_mbyte) 4120 c = mb_cptr2char_adv(&p); 4121 else 4122 #endif 4123 c = *p++; 4124 if (upper) 4125 c = SPELL_TOUPPER(c); 4126 else 4127 c = SPELL_TOFOLD(c); 4128 #ifdef FEAT_MBYTE 4129 if (has_mbyte) 4130 l = mb_char2bytes(c, wcopy); 4131 else 4132 #endif 4133 { 4134 l = 1; 4135 wcopy[0] = c; 4136 } 4137 vim_strncpy(wcopy + l, p, MAXWLEN - l - 1); 4138 } 4139 4140 /* 4141 * Make a copy of "word" with all the letters upper cased into 4142 * "wcopy[MAXWLEN]". The result is NUL terminated. 4143 */ 4144 static void 4145 allcap_copy(char_u *word, char_u *wcopy) 4146 { 4147 char_u *s; 4148 char_u *d; 4149 int c; 4150 4151 d = wcopy; 4152 for (s = word; *s != NUL; ) 4153 { 4154 #ifdef FEAT_MBYTE 4155 if (has_mbyte) 4156 c = mb_cptr2char_adv(&s); 4157 else 4158 #endif 4159 c = *s++; 4160 4161 #ifdef FEAT_MBYTE 4162 /* We only change 0xdf to SS when we are certain latin1 is used. It 4163 * would cause weird errors in other 8-bit encodings. */ 4164 if (enc_latin1like && c == 0xdf) 4165 { 4166 c = 'S'; 4167 if (d - wcopy >= MAXWLEN - 1) 4168 break; 4169 *d++ = c; 4170 } 4171 else 4172 #endif 4173 c = SPELL_TOUPPER(c); 4174 4175 #ifdef FEAT_MBYTE 4176 if (has_mbyte) 4177 { 4178 if (d - wcopy >= MAXWLEN - MB_MAXBYTES) 4179 break; 4180 d += mb_char2bytes(c, d); 4181 } 4182 else 4183 #endif 4184 { 4185 if (d - wcopy >= MAXWLEN - 1) 4186 break; 4187 *d++ = c; 4188 } 4189 } 4190 *d = NUL; 4191 } 4192 4193 /* 4194 * Try finding suggestions by recognizing specific situations. 4195 */ 4196 static void 4197 suggest_try_special(suginfo_T *su) 4198 { 4199 char_u *p; 4200 size_t len; 4201 int c; 4202 char_u word[MAXWLEN]; 4203 4204 /* 4205 * Recognize a word that is repeated: "the the". 4206 */ 4207 p = skiptowhite(su->su_fbadword); 4208 len = p - su->su_fbadword; 4209 p = skipwhite(p); 4210 if (STRLEN(p) == len && STRNCMP(su->su_fbadword, p, len) == 0) 4211 { 4212 /* Include badflags: if the badword is onecap or allcap 4213 * use that for the goodword too: "The the" -> "The". */ 4214 c = su->su_fbadword[len]; 4215 su->su_fbadword[len] = NUL; 4216 make_case_word(su->su_fbadword, word, su->su_badflags); 4217 su->su_fbadword[len] = c; 4218 4219 /* Give a soundalike score of 0, compute the score as if deleting one 4220 * character. */ 4221 add_suggestion(su, &su->su_ga, word, su->su_badlen, 4222 RESCORE(SCORE_REP, 0), 0, TRUE, su->su_sallang, FALSE); 4223 } 4224 } 4225 4226 /* 4227 * Change the 0 to 1 to measure how much time is spent in each state. 4228 * Output is dumped in "suggestprof". 4229 */ 4230 #if 0 4231 # define SUGGEST_PROFILE 4232 proftime_T current; 4233 proftime_T total; 4234 proftime_T times[STATE_FINAL + 1]; 4235 long counts[STATE_FINAL + 1]; 4236 4237 static void 4238 prof_init(void) 4239 { 4240 for (int i = 0; i <= STATE_FINAL; ++i) 4241 { 4242 profile_zero(×[i]); 4243 counts[i] = 0; 4244 } 4245 profile_start(¤t); 4246 profile_start(&total); 4247 } 4248 4249 /* call before changing state */ 4250 static void 4251 prof_store(state_T state) 4252 { 4253 profile_end(¤t); 4254 profile_add(×[state], ¤t); 4255 ++counts[state]; 4256 profile_start(¤t); 4257 } 4258 # define PROF_STORE(state) prof_store(state); 4259 4260 static void 4261 prof_report(char *name) 4262 { 4263 FILE *fd = fopen("suggestprof", "a"); 4264 4265 profile_end(&total); 4266 fprintf(fd, "-----------------------\n"); 4267 fprintf(fd, "%s: %s\n", name, profile_msg(&total)); 4268 for (int i = 0; i <= STATE_FINAL; ++i) 4269 fprintf(fd, "%d: %s (%ld)\n", i, profile_msg(×[i]), counts[i]); 4270 fclose(fd); 4271 } 4272 #else 4273 # define PROF_STORE(state) 4274 #endif 4275 4276 /* 4277 * Try finding suggestions by adding/removing/swapping letters. 4278 */ 4279 static void 4280 suggest_try_change(suginfo_T *su) 4281 { 4282 char_u fword[MAXWLEN]; /* copy of the bad word, case-folded */ 4283 int n; 4284 char_u *p; 4285 int lpi; 4286 langp_T *lp; 4287 4288 /* We make a copy of the case-folded bad word, so that we can modify it 4289 * to find matches (esp. REP items). Append some more text, changing 4290 * chars after the bad word may help. */ 4291 STRCPY(fword, su->su_fbadword); 4292 n = (int)STRLEN(fword); 4293 p = su->su_badptr + su->su_badlen; 4294 (void)spell_casefold(p, (int)STRLEN(p), fword + n, MAXWLEN - n); 4295 4296 for (lpi = 0; lpi < curwin->w_s->b_langp.ga_len; ++lpi) 4297 { 4298 lp = LANGP_ENTRY(curwin->w_s->b_langp, lpi); 4299 4300 /* If reloading a spell file fails it's still in the list but 4301 * everything has been cleared. */ 4302 if (lp->lp_slang->sl_fbyts == NULL) 4303 continue; 4304 4305 /* Try it for this language. Will add possible suggestions. */ 4306 #ifdef SUGGEST_PROFILE 4307 prof_init(); 4308 #endif 4309 suggest_trie_walk(su, lp, fword, FALSE); 4310 #ifdef SUGGEST_PROFILE 4311 prof_report("try_change"); 4312 #endif 4313 } 4314 } 4315 4316 /* Check the maximum score, if we go over it we won't try this change. */ 4317 #define TRY_DEEPER(su, stack, depth, add) \ 4318 (stack[depth].ts_score + (add) < su->su_maxscore) 4319 4320 /* 4321 * Try finding suggestions by adding/removing/swapping letters. 4322 * 4323 * This uses a state machine. At each node in the tree we try various 4324 * operations. When trying if an operation works "depth" is increased and the 4325 * stack[] is used to store info. This allows combinations, thus insert one 4326 * character, replace one and delete another. The number of changes is 4327 * limited by su->su_maxscore. 4328 * 4329 * After implementing this I noticed an article by Kemal Oflazer that 4330 * describes something similar: "Error-tolerant Finite State Recognition with 4331 * Applications to Morphological Analysis and Spelling Correction" (1996). 4332 * The implementation in the article is simplified and requires a stack of 4333 * unknown depth. The implementation here only needs a stack depth equal to 4334 * the length of the word. 4335 * 4336 * This is also used for the sound-folded word, "soundfold" is TRUE then. 4337 * The mechanism is the same, but we find a match with a sound-folded word 4338 * that comes from one or more original words. Each of these words may be 4339 * added, this is done by add_sound_suggest(). 4340 * Don't use: 4341 * the prefix tree or the keep-case tree 4342 * "su->su_badlen" 4343 * anything to do with upper and lower case 4344 * anything to do with word or non-word characters ("spell_iswordp()") 4345 * banned words 4346 * word flags (rare, region, compounding) 4347 * word splitting for now 4348 * "similar_chars()" 4349 * use "slang->sl_repsal" instead of "lp->lp_replang->sl_rep" 4350 */ 4351 static void 4352 suggest_trie_walk( 4353 suginfo_T *su, 4354 langp_T *lp, 4355 char_u *fword, 4356 int soundfold) 4357 { 4358 char_u tword[MAXWLEN]; /* good word collected so far */ 4359 trystate_T stack[MAXWLEN]; 4360 char_u preword[MAXWLEN * 3]; /* word found with proper case; 4361 * concatenation of prefix compound 4362 * words and split word. NUL terminated 4363 * when going deeper but not when coming 4364 * back. */ 4365 char_u compflags[MAXWLEN]; /* compound flags, one for each word */ 4366 trystate_T *sp; 4367 int newscore; 4368 int score; 4369 char_u *byts, *fbyts, *pbyts; 4370 idx_T *idxs, *fidxs, *pidxs; 4371 int depth; 4372 int c, c2, c3; 4373 int n = 0; 4374 int flags; 4375 garray_T *gap; 4376 idx_T arridx; 4377 int len; 4378 char_u *p; 4379 fromto_T *ftp; 4380 int fl = 0, tl; 4381 int repextra = 0; /* extra bytes in fword[] from REP item */ 4382 slang_T *slang = lp->lp_slang; 4383 int fword_ends; 4384 int goodword_ends; 4385 #ifdef DEBUG_TRIEWALK 4386 /* Stores the name of the change made at each level. */ 4387 char_u changename[MAXWLEN][80]; 4388 #endif 4389 int breakcheckcount = 1000; 4390 int compound_ok; 4391 4392 /* 4393 * Go through the whole case-fold tree, try changes at each node. 4394 * "tword[]" contains the word collected from nodes in the tree. 4395 * "fword[]" the word we are trying to match with (initially the bad 4396 * word). 4397 */ 4398 depth = 0; 4399 sp = &stack[0]; 4400 vim_memset(sp, 0, sizeof(trystate_T)); 4401 sp->ts_curi = 1; 4402 4403 if (soundfold) 4404 { 4405 /* Going through the soundfold tree. */ 4406 byts = fbyts = slang->sl_sbyts; 4407 idxs = fidxs = slang->sl_sidxs; 4408 pbyts = NULL; 4409 pidxs = NULL; 4410 sp->ts_prefixdepth = PFD_NOPREFIX; 4411 sp->ts_state = STATE_START; 4412 } 4413 else 4414 { 4415 /* 4416 * When there are postponed prefixes we need to use these first. At 4417 * the end of the prefix we continue in the case-fold tree. 4418 */ 4419 fbyts = slang->sl_fbyts; 4420 fidxs = slang->sl_fidxs; 4421 pbyts = slang->sl_pbyts; 4422 pidxs = slang->sl_pidxs; 4423 if (pbyts != NULL) 4424 { 4425 byts = pbyts; 4426 idxs = pidxs; 4427 sp->ts_prefixdepth = PFD_PREFIXTREE; 4428 sp->ts_state = STATE_NOPREFIX; /* try without prefix first */ 4429 } 4430 else 4431 { 4432 byts = fbyts; 4433 idxs = fidxs; 4434 sp->ts_prefixdepth = PFD_NOPREFIX; 4435 sp->ts_state = STATE_START; 4436 } 4437 } 4438 4439 /* 4440 * Loop to find all suggestions. At each round we either: 4441 * - For the current state try one operation, advance "ts_curi", 4442 * increase "depth". 4443 * - When a state is done go to the next, set "ts_state". 4444 * - When all states are tried decrease "depth". 4445 */ 4446 while (depth >= 0 && !got_int) 4447 { 4448 sp = &stack[depth]; 4449 switch (sp->ts_state) 4450 { 4451 case STATE_START: 4452 case STATE_NOPREFIX: 4453 /* 4454 * Start of node: Deal with NUL bytes, which means 4455 * tword[] may end here. 4456 */ 4457 arridx = sp->ts_arridx; /* current node in the tree */ 4458 len = byts[arridx]; /* bytes in this node */ 4459 arridx += sp->ts_curi; /* index of current byte */ 4460 4461 if (sp->ts_prefixdepth == PFD_PREFIXTREE) 4462 { 4463 /* Skip over the NUL bytes, we use them later. */ 4464 for (n = 0; n < len && byts[arridx + n] == 0; ++n) 4465 ; 4466 sp->ts_curi += n; 4467 4468 /* Always past NUL bytes now. */ 4469 n = (int)sp->ts_state; 4470 PROF_STORE(sp->ts_state) 4471 sp->ts_state = STATE_ENDNUL; 4472 sp->ts_save_badflags = su->su_badflags; 4473 4474 /* At end of a prefix or at start of prefixtree: check for 4475 * following word. */ 4476 if (byts[arridx] == 0 || n == (int)STATE_NOPREFIX) 4477 { 4478 /* Set su->su_badflags to the caps type at this position. 4479 * Use the caps type until here for the prefix itself. */ 4480 #ifdef FEAT_MBYTE 4481 if (has_mbyte) 4482 n = nofold_len(fword, sp->ts_fidx, su->su_badptr); 4483 else 4484 #endif 4485 n = sp->ts_fidx; 4486 flags = badword_captype(su->su_badptr, su->su_badptr + n); 4487 su->su_badflags = badword_captype(su->su_badptr + n, 4488 su->su_badptr + su->su_badlen); 4489 #ifdef DEBUG_TRIEWALK 4490 sprintf(changename[depth], "prefix"); 4491 #endif 4492 go_deeper(stack, depth, 0); 4493 ++depth; 4494 sp = &stack[depth]; 4495 sp->ts_prefixdepth = depth - 1; 4496 byts = fbyts; 4497 idxs = fidxs; 4498 sp->ts_arridx = 0; 4499 4500 /* Move the prefix to preword[] with the right case 4501 * and make find_keepcap_word() works. */ 4502 tword[sp->ts_twordlen] = NUL; 4503 make_case_word(tword + sp->ts_splitoff, 4504 preword + sp->ts_prewordlen, flags); 4505 sp->ts_prewordlen = (char_u)STRLEN(preword); 4506 sp->ts_splitoff = sp->ts_twordlen; 4507 } 4508 break; 4509 } 4510 4511 if (sp->ts_curi > len || byts[arridx] != 0) 4512 { 4513 /* Past bytes in node and/or past NUL bytes. */ 4514 PROF_STORE(sp->ts_state) 4515 sp->ts_state = STATE_ENDNUL; 4516 sp->ts_save_badflags = su->su_badflags; 4517 break; 4518 } 4519 4520 /* 4521 * End of word in tree. 4522 */ 4523 ++sp->ts_curi; /* eat one NUL byte */ 4524 4525 flags = (int)idxs[arridx]; 4526 4527 /* Skip words with the NOSUGGEST flag. */ 4528 if (flags & WF_NOSUGGEST) 4529 break; 4530 4531 fword_ends = (fword[sp->ts_fidx] == NUL 4532 || (soundfold 4533 ? VIM_ISWHITE(fword[sp->ts_fidx]) 4534 : !spell_iswordp(fword + sp->ts_fidx, curwin))); 4535 tword[sp->ts_twordlen] = NUL; 4536 4537 if (sp->ts_prefixdepth <= PFD_NOTSPECIAL 4538 && (sp->ts_flags & TSF_PREFIXOK) == 0) 4539 { 4540 /* There was a prefix before the word. Check that the prefix 4541 * can be used with this word. */ 4542 /* Count the length of the NULs in the prefix. If there are 4543 * none this must be the first try without a prefix. */ 4544 n = stack[sp->ts_prefixdepth].ts_arridx; 4545 len = pbyts[n++]; 4546 for (c = 0; c < len && pbyts[n + c] == 0; ++c) 4547 ; 4548 if (c > 0) 4549 { 4550 c = valid_word_prefix(c, n, flags, 4551 tword + sp->ts_splitoff, slang, FALSE); 4552 if (c == 0) 4553 break; 4554 4555 /* Use the WF_RARE flag for a rare prefix. */ 4556 if (c & WF_RAREPFX) 4557 flags |= WF_RARE; 4558 4559 /* Tricky: when checking for both prefix and compounding 4560 * we run into the prefix flag first. 4561 * Remember that it's OK, so that we accept the prefix 4562 * when arriving at a compound flag. */ 4563 sp->ts_flags |= TSF_PREFIXOK; 4564 } 4565 } 4566 4567 /* Check NEEDCOMPOUND: can't use word without compounding. Do try 4568 * appending another compound word below. */ 4569 if (sp->ts_complen == sp->ts_compsplit && fword_ends 4570 && (flags & WF_NEEDCOMP)) 4571 goodword_ends = FALSE; 4572 else 4573 goodword_ends = TRUE; 4574 4575 p = NULL; 4576 compound_ok = TRUE; 4577 if (sp->ts_complen > sp->ts_compsplit) 4578 { 4579 if (slang->sl_nobreak) 4580 { 4581 /* There was a word before this word. When there was no 4582 * change in this word (it was correct) add the first word 4583 * as a suggestion. If this word was corrected too, we 4584 * need to check if a correct word follows. */ 4585 if (sp->ts_fidx - sp->ts_splitfidx 4586 == sp->ts_twordlen - sp->ts_splitoff 4587 && STRNCMP(fword + sp->ts_splitfidx, 4588 tword + sp->ts_splitoff, 4589 sp->ts_fidx - sp->ts_splitfidx) == 0) 4590 { 4591 preword[sp->ts_prewordlen] = NUL; 4592 newscore = score_wordcount_adj(slang, sp->ts_score, 4593 preword + sp->ts_prewordlen, 4594 sp->ts_prewordlen > 0); 4595 /* Add the suggestion if the score isn't too bad. */ 4596 if (newscore <= su->su_maxscore) 4597 add_suggestion(su, &su->su_ga, preword, 4598 sp->ts_splitfidx - repextra, 4599 newscore, 0, FALSE, 4600 lp->lp_sallang, FALSE); 4601 break; 4602 } 4603 } 4604 else 4605 { 4606 /* There was a compound word before this word. If this 4607 * word does not support compounding then give up 4608 * (splitting is tried for the word without compound 4609 * flag). */ 4610 if (((unsigned)flags >> 24) == 0 4611 || sp->ts_twordlen - sp->ts_splitoff 4612 < slang->sl_compminlen) 4613 break; 4614 #ifdef FEAT_MBYTE 4615 /* For multi-byte chars check character length against 4616 * COMPOUNDMIN. */ 4617 if (has_mbyte 4618 && slang->sl_compminlen > 0 4619 && mb_charlen(tword + sp->ts_splitoff) 4620 < slang->sl_compminlen) 4621 break; 4622 #endif 4623 4624 compflags[sp->ts_complen] = ((unsigned)flags >> 24); 4625 compflags[sp->ts_complen + 1] = NUL; 4626 vim_strncpy(preword + sp->ts_prewordlen, 4627 tword + sp->ts_splitoff, 4628 sp->ts_twordlen - sp->ts_splitoff); 4629 4630 /* Verify CHECKCOMPOUNDPATTERN rules. */ 4631 if (match_checkcompoundpattern(preword, sp->ts_prewordlen, 4632 &slang->sl_comppat)) 4633 compound_ok = FALSE; 4634 4635 if (compound_ok) 4636 { 4637 p = preword; 4638 while (*skiptowhite(p) != NUL) 4639 p = skipwhite(skiptowhite(p)); 4640 if (fword_ends && !can_compound(slang, p, 4641 compflags + sp->ts_compsplit)) 4642 /* Compound is not allowed. But it may still be 4643 * possible if we add another (short) word. */ 4644 compound_ok = FALSE; 4645 } 4646 4647 /* Get pointer to last char of previous word. */ 4648 p = preword + sp->ts_prewordlen; 4649 MB_PTR_BACK(preword, p); 4650 } 4651 } 4652 4653 /* 4654 * Form the word with proper case in preword. 4655 * If there is a word from a previous split, append. 4656 * For the soundfold tree don't change the case, simply append. 4657 */ 4658 if (soundfold) 4659 STRCPY(preword + sp->ts_prewordlen, tword + sp->ts_splitoff); 4660 else if (flags & WF_KEEPCAP) 4661 /* Must find the word in the keep-case tree. */ 4662 find_keepcap_word(slang, tword + sp->ts_splitoff, 4663 preword + sp->ts_prewordlen); 4664 else 4665 { 4666 /* Include badflags: If the badword is onecap or allcap 4667 * use that for the goodword too. But if the badword is 4668 * allcap and it's only one char long use onecap. */ 4669 c = su->su_badflags; 4670 if ((c & WF_ALLCAP) 4671 #ifdef FEAT_MBYTE 4672 && su->su_badlen == (*mb_ptr2len)(su->su_badptr) 4673 #else 4674 && su->su_badlen == 1 4675 #endif 4676 ) 4677 c = WF_ONECAP; 4678 c |= flags; 4679 4680 /* When appending a compound word after a word character don't 4681 * use Onecap. */ 4682 if (p != NULL && spell_iswordp_nmw(p, curwin)) 4683 c &= ~WF_ONECAP; 4684 make_case_word(tword + sp->ts_splitoff, 4685 preword + sp->ts_prewordlen, c); 4686 } 4687 4688 if (!soundfold) 4689 { 4690 /* Don't use a banned word. It may appear again as a good 4691 * word, thus remember it. */ 4692 if (flags & WF_BANNED) 4693 { 4694 add_banned(su, preword + sp->ts_prewordlen); 4695 break; 4696 } 4697 if ((sp->ts_complen == sp->ts_compsplit 4698 && WAS_BANNED(su, preword + sp->ts_prewordlen)) 4699 || WAS_BANNED(su, preword)) 4700 { 4701 if (slang->sl_compprog == NULL) 4702 break; 4703 /* the word so far was banned but we may try compounding */ 4704 goodword_ends = FALSE; 4705 } 4706 } 4707 4708 newscore = 0; 4709 if (!soundfold) /* soundfold words don't have flags */ 4710 { 4711 if ((flags & WF_REGION) 4712 && (((unsigned)flags >> 16) & lp->lp_region) == 0) 4713 newscore += SCORE_REGION; 4714 if (flags & WF_RARE) 4715 newscore += SCORE_RARE; 4716 4717 if (!spell_valid_case(su->su_badflags, 4718 captype(preword + sp->ts_prewordlen, NULL))) 4719 newscore += SCORE_ICASE; 4720 } 4721 4722 /* TODO: how about splitting in the soundfold tree? */ 4723 if (fword_ends 4724 && goodword_ends 4725 && sp->ts_fidx >= sp->ts_fidxtry 4726 && compound_ok) 4727 { 4728 /* The badword also ends: add suggestions. */ 4729 #ifdef DEBUG_TRIEWALK 4730 if (soundfold && STRCMP(preword, "smwrd") == 0) 4731 { 4732 int j; 4733 4734 /* print the stack of changes that brought us here */ 4735 smsg("------ %s -------", fword); 4736 for (j = 0; j < depth; ++j) 4737 smsg("%s", changename[j]); 4738 } 4739 #endif 4740 if (soundfold) 4741 { 4742 /* For soundfolded words we need to find the original 4743 * words, the edit distance and then add them. */ 4744 add_sound_suggest(su, preword, sp->ts_score, lp); 4745 } 4746 else if (sp->ts_fidx > 0) 4747 { 4748 /* Give a penalty when changing non-word char to word 4749 * char, e.g., "thes," -> "these". */ 4750 p = fword + sp->ts_fidx; 4751 MB_PTR_BACK(fword, p); 4752 if (!spell_iswordp(p, curwin)) 4753 { 4754 p = preword + STRLEN(preword); 4755 MB_PTR_BACK(preword, p); 4756 if (spell_iswordp(p, curwin)) 4757 newscore += SCORE_NONWORD; 4758 } 4759 4760 /* Give a bonus to words seen before. */ 4761 score = score_wordcount_adj(slang, 4762 sp->ts_score + newscore, 4763 preword + sp->ts_prewordlen, 4764 sp->ts_prewordlen > 0); 4765 4766 /* Add the suggestion if the score isn't too bad. */ 4767 if (score <= su->su_maxscore) 4768 { 4769 add_suggestion(su, &su->su_ga, preword, 4770 sp->ts_fidx - repextra, 4771 score, 0, FALSE, lp->lp_sallang, FALSE); 4772 4773 if (su->su_badflags & WF_MIXCAP) 4774 { 4775 /* We really don't know if the word should be 4776 * upper or lower case, add both. */ 4777 c = captype(preword, NULL); 4778 if (c == 0 || c == WF_ALLCAP) 4779 { 4780 make_case_word(tword + sp->ts_splitoff, 4781 preword + sp->ts_prewordlen, 4782 c == 0 ? WF_ALLCAP : 0); 4783 4784 add_suggestion(su, &su->su_ga, preword, 4785 sp->ts_fidx - repextra, 4786 score + SCORE_ICASE, 0, FALSE, 4787 lp->lp_sallang, FALSE); 4788 } 4789 } 4790 } 4791 } 4792 } 4793 4794 /* 4795 * Try word split and/or compounding. 4796 */ 4797 if ((sp->ts_fidx >= sp->ts_fidxtry || fword_ends) 4798 #ifdef FEAT_MBYTE 4799 /* Don't split halfway a character. */ 4800 && (!has_mbyte || sp->ts_tcharlen == 0) 4801 #endif 4802 ) 4803 { 4804 int try_compound; 4805 int try_split; 4806 4807 /* If past the end of the bad word don't try a split. 4808 * Otherwise try changing the next word. E.g., find 4809 * suggestions for "the the" where the second "the" is 4810 * different. It's done like a split. 4811 * TODO: word split for soundfold words */ 4812 try_split = (sp->ts_fidx - repextra < su->su_badlen) 4813 && !soundfold; 4814 4815 /* Get here in several situations: 4816 * 1. The word in the tree ends: 4817 * If the word allows compounding try that. Otherwise try 4818 * a split by inserting a space. For both check that a 4819 * valid words starts at fword[sp->ts_fidx]. 4820 * For NOBREAK do like compounding to be able to check if 4821 * the next word is valid. 4822 * 2. The badword does end, but it was due to a change (e.g., 4823 * a swap). No need to split, but do check that the 4824 * following word is valid. 4825 * 3. The badword and the word in the tree end. It may still 4826 * be possible to compound another (short) word. 4827 */ 4828 try_compound = FALSE; 4829 if (!soundfold 4830 && !slang->sl_nocompoundsugs 4831 && slang->sl_compprog != NULL 4832 && ((unsigned)flags >> 24) != 0 4833 && sp->ts_twordlen - sp->ts_splitoff 4834 >= slang->sl_compminlen 4835 #ifdef FEAT_MBYTE 4836 && (!has_mbyte 4837 || slang->sl_compminlen == 0 4838 || mb_charlen(tword + sp->ts_splitoff) 4839 >= slang->sl_compminlen) 4840 #endif 4841 && (slang->sl_compsylmax < MAXWLEN 4842 || sp->ts_complen + 1 - sp->ts_compsplit 4843 < slang->sl_compmax) 4844 && (can_be_compound(sp, slang, 4845 compflags, ((unsigned)flags >> 24)))) 4846 4847 { 4848 try_compound = TRUE; 4849 compflags[sp->ts_complen] = ((unsigned)flags >> 24); 4850 compflags[sp->ts_complen + 1] = NUL; 4851 } 4852 4853 /* For NOBREAK we never try splitting, it won't make any word 4854 * valid. */ 4855 if (slang->sl_nobreak && !slang->sl_nocompoundsugs) 4856 try_compound = TRUE; 4857 4858 /* If we could add a compound word, and it's also possible to 4859 * split at this point, do the split first and set 4860 * TSF_DIDSPLIT to avoid doing it again. */ 4861 else if (!fword_ends 4862 && try_compound 4863 && (sp->ts_flags & TSF_DIDSPLIT) == 0) 4864 { 4865 try_compound = FALSE; 4866 sp->ts_flags |= TSF_DIDSPLIT; 4867 --sp->ts_curi; /* do the same NUL again */ 4868 compflags[sp->ts_complen] = NUL; 4869 } 4870 else 4871 sp->ts_flags &= ~TSF_DIDSPLIT; 4872 4873 if (try_split || try_compound) 4874 { 4875 if (!try_compound && (!fword_ends || !goodword_ends)) 4876 { 4877 /* If we're going to split need to check that the 4878 * words so far are valid for compounding. If there 4879 * is only one word it must not have the NEEDCOMPOUND 4880 * flag. */ 4881 if (sp->ts_complen == sp->ts_compsplit 4882 && (flags & WF_NEEDCOMP)) 4883 break; 4884 p = preword; 4885 while (*skiptowhite(p) != NUL) 4886 p = skipwhite(skiptowhite(p)); 4887 if (sp->ts_complen > sp->ts_compsplit 4888 && !can_compound(slang, p, 4889 compflags + sp->ts_compsplit)) 4890 break; 4891 4892 if (slang->sl_nosplitsugs) 4893 newscore += SCORE_SPLIT_NO; 4894 else 4895 newscore += SCORE_SPLIT; 4896 4897 /* Give a bonus to words seen before. */ 4898 newscore = score_wordcount_adj(slang, newscore, 4899 preword + sp->ts_prewordlen, TRUE); 4900 } 4901 4902 if (TRY_DEEPER(su, stack, depth, newscore)) 4903 { 4904 go_deeper(stack, depth, newscore); 4905 #ifdef DEBUG_TRIEWALK 4906 if (!try_compound && !fword_ends) 4907 sprintf(changename[depth], "%.*s-%s: split", 4908 sp->ts_twordlen, tword, fword + sp->ts_fidx); 4909 else 4910 sprintf(changename[depth], "%.*s-%s: compound", 4911 sp->ts_twordlen, tword, fword + sp->ts_fidx); 4912 #endif 4913 /* Save things to be restored at STATE_SPLITUNDO. */ 4914 sp->ts_save_badflags = su->su_badflags; 4915 PROF_STORE(sp->ts_state) 4916 sp->ts_state = STATE_SPLITUNDO; 4917 4918 ++depth; 4919 sp = &stack[depth]; 4920 4921 /* Append a space to preword when splitting. */ 4922 if (!try_compound && !fword_ends) 4923 STRCAT(preword, " "); 4924 sp->ts_prewordlen = (char_u)STRLEN(preword); 4925 sp->ts_splitoff = sp->ts_twordlen; 4926 sp->ts_splitfidx = sp->ts_fidx; 4927 4928 /* If the badword has a non-word character at this 4929 * position skip it. That means replacing the 4930 * non-word character with a space. Always skip a 4931 * character when the word ends. But only when the 4932 * good word can end. */ 4933 if (((!try_compound && !spell_iswordp_nmw(fword 4934 + sp->ts_fidx, 4935 curwin)) 4936 || fword_ends) 4937 && fword[sp->ts_fidx] != NUL 4938 && goodword_ends) 4939 { 4940 int l; 4941 4942 l = MB_PTR2LEN(fword + sp->ts_fidx); 4943 if (fword_ends) 4944 { 4945 /* Copy the skipped character to preword. */ 4946 mch_memmove(preword + sp->ts_prewordlen, 4947 fword + sp->ts_fidx, l); 4948 sp->ts_prewordlen += l; 4949 preword[sp->ts_prewordlen] = NUL; 4950 } 4951 else 4952 sp->ts_score -= SCORE_SPLIT - SCORE_SUBST; 4953 sp->ts_fidx += l; 4954 } 4955 4956 /* When compounding include compound flag in 4957 * compflags[] (already set above). When splitting we 4958 * may start compounding over again. */ 4959 if (try_compound) 4960 ++sp->ts_complen; 4961 else 4962 sp->ts_compsplit = sp->ts_complen; 4963 sp->ts_prefixdepth = PFD_NOPREFIX; 4964 4965 /* set su->su_badflags to the caps type at this 4966 * position */ 4967 #ifdef FEAT_MBYTE 4968 if (has_mbyte) 4969 n = nofold_len(fword, sp->ts_fidx, su->su_badptr); 4970 else 4971 #endif 4972 n = sp->ts_fidx; 4973 su->su_badflags = badword_captype(su->su_badptr + n, 4974 su->su_badptr + su->su_badlen); 4975 4976 /* Restart at top of the tree. */ 4977 sp->ts_arridx = 0; 4978 4979 /* If there are postponed prefixes, try these too. */ 4980 if (pbyts != NULL) 4981 { 4982 byts = pbyts; 4983 idxs = pidxs; 4984 sp->ts_prefixdepth = PFD_PREFIXTREE; 4985 PROF_STORE(sp->ts_state) 4986 sp->ts_state = STATE_NOPREFIX; 4987 } 4988 } 4989 } 4990 } 4991 break; 4992 4993 case STATE_SPLITUNDO: 4994 /* Undo the changes done for word split or compound word. */ 4995 su->su_badflags = sp->ts_save_badflags; 4996 4997 /* Continue looking for NUL bytes. */ 4998 PROF_STORE(sp->ts_state) 4999 sp->ts_state = STATE_START; 5000 5001 /* In case we went into the prefix tree. */ 5002 byts = fbyts; 5003 idxs = fidxs; 5004 break; 5005 5006 case STATE_ENDNUL: 5007 /* Past the NUL bytes in the node. */ 5008 su->su_badflags = sp->ts_save_badflags; 5009 if (fword[sp->ts_fidx] == NUL 5010 #ifdef FEAT_MBYTE 5011 && sp->ts_tcharlen == 0 5012 #endif 5013 ) 5014 { 5015 /* The badword ends, can't use STATE_PLAIN. */ 5016 PROF_STORE(sp->ts_state) 5017 sp->ts_state = STATE_DEL; 5018 break; 5019 } 5020 PROF_STORE(sp->ts_state) 5021 sp->ts_state = STATE_PLAIN; 5022 /* FALLTHROUGH */ 5023 5024 case STATE_PLAIN: 5025 /* 5026 * Go over all possible bytes at this node, add each to tword[] 5027 * and use child node. "ts_curi" is the index. 5028 */ 5029 arridx = sp->ts_arridx; 5030 if (sp->ts_curi > byts[arridx]) 5031 { 5032 /* Done all bytes at this node, do next state. When still at 5033 * already changed bytes skip the other tricks. */ 5034 PROF_STORE(sp->ts_state) 5035 if (sp->ts_fidx >= sp->ts_fidxtry) 5036 sp->ts_state = STATE_DEL; 5037 else 5038 sp->ts_state = STATE_FINAL; 5039 } 5040 else 5041 { 5042 arridx += sp->ts_curi++; 5043 c = byts[arridx]; 5044 5045 /* Normal byte, go one level deeper. If it's not equal to the 5046 * byte in the bad word adjust the score. But don't even try 5047 * when the byte was already changed. And don't try when we 5048 * just deleted this byte, accepting it is always cheaper than 5049 * delete + substitute. */ 5050 if (c == fword[sp->ts_fidx] 5051 #ifdef FEAT_MBYTE 5052 || (sp->ts_tcharlen > 0 && sp->ts_isdiff != DIFF_NONE) 5053 #endif 5054 ) 5055 newscore = 0; 5056 else 5057 newscore = SCORE_SUBST; 5058 if ((newscore == 0 5059 || (sp->ts_fidx >= sp->ts_fidxtry 5060 && ((sp->ts_flags & TSF_DIDDEL) == 0 5061 || c != fword[sp->ts_delidx]))) 5062 && TRY_DEEPER(su, stack, depth, newscore)) 5063 { 5064 go_deeper(stack, depth, newscore); 5065 #ifdef DEBUG_TRIEWALK 5066 if (newscore > 0) 5067 sprintf(changename[depth], "%.*s-%s: subst %c to %c", 5068 sp->ts_twordlen, tword, fword + sp->ts_fidx, 5069 fword[sp->ts_fidx], c); 5070 else 5071 sprintf(changename[depth], "%.*s-%s: accept %c", 5072 sp->ts_twordlen, tword, fword + sp->ts_fidx, 5073 fword[sp->ts_fidx]); 5074 #endif 5075 ++depth; 5076 sp = &stack[depth]; 5077 ++sp->ts_fidx; 5078 tword[sp->ts_twordlen++] = c; 5079 sp->ts_arridx = idxs[arridx]; 5080 #ifdef FEAT_MBYTE 5081 if (newscore == SCORE_SUBST) 5082 sp->ts_isdiff = DIFF_YES; 5083 if (has_mbyte) 5084 { 5085 /* Multi-byte characters are a bit complicated to 5086 * handle: They differ when any of the bytes differ 5087 * and then their length may also differ. */ 5088 if (sp->ts_tcharlen == 0) 5089 { 5090 /* First byte. */ 5091 sp->ts_tcharidx = 0; 5092 sp->ts_tcharlen = MB_BYTE2LEN(c); 5093 sp->ts_fcharstart = sp->ts_fidx - 1; 5094 sp->ts_isdiff = (newscore != 0) 5095 ? DIFF_YES : DIFF_NONE; 5096 } 5097 else if (sp->ts_isdiff == DIFF_INSERT) 5098 /* When inserting trail bytes don't advance in the 5099 * bad word. */ 5100 --sp->ts_fidx; 5101 if (++sp->ts_tcharidx == sp->ts_tcharlen) 5102 { 5103 /* Last byte of character. */ 5104 if (sp->ts_isdiff == DIFF_YES) 5105 { 5106 /* Correct ts_fidx for the byte length of the 5107 * character (we didn't check that before). */ 5108 sp->ts_fidx = sp->ts_fcharstart 5109 + MB_PTR2LEN( 5110 fword + sp->ts_fcharstart); 5111 /* For changing a composing character adjust 5112 * the score from SCORE_SUBST to 5113 * SCORE_SUBCOMP. */ 5114 if (enc_utf8 5115 && utf_iscomposing( 5116 utf_ptr2char(tword 5117 + sp->ts_twordlen 5118 - sp->ts_tcharlen)) 5119 && utf_iscomposing( 5120 utf_ptr2char(fword 5121 + sp->ts_fcharstart))) 5122 sp->ts_score -= 5123 SCORE_SUBST - SCORE_SUBCOMP; 5124 5125 /* For a similar character adjust score from 5126 * SCORE_SUBST to SCORE_SIMILAR. */ 5127 else if (!soundfold 5128 && slang->sl_has_map 5129 && similar_chars(slang, 5130 mb_ptr2char(tword 5131 + sp->ts_twordlen 5132 - sp->ts_tcharlen), 5133 mb_ptr2char(fword 5134 + sp->ts_fcharstart))) 5135 sp->ts_score -= 5136 SCORE_SUBST - SCORE_SIMILAR; 5137 } 5138 else if (sp->ts_isdiff == DIFF_INSERT 5139 && sp->ts_twordlen > sp->ts_tcharlen) 5140 { 5141 p = tword + sp->ts_twordlen - sp->ts_tcharlen; 5142 c = mb_ptr2char(p); 5143 if (enc_utf8 && utf_iscomposing(c)) 5144 { 5145 /* Inserting a composing char doesn't 5146 * count that much. */ 5147 sp->ts_score -= SCORE_INS - SCORE_INSCOMP; 5148 } 5149 else 5150 { 5151 /* If the previous character was the same, 5152 * thus doubling a character, give a bonus 5153 * to the score. Also for the soundfold 5154 * tree (might seem illogical but does 5155 * give better scores). */ 5156 MB_PTR_BACK(tword, p); 5157 if (c == mb_ptr2char(p)) 5158 sp->ts_score -= SCORE_INS 5159 - SCORE_INSDUP; 5160 } 5161 } 5162 5163 /* Starting a new char, reset the length. */ 5164 sp->ts_tcharlen = 0; 5165 } 5166 } 5167 else 5168 #endif 5169 { 5170 /* If we found a similar char adjust the score. 5171 * We do this after calling go_deeper() because 5172 * it's slow. */ 5173 if (newscore != 0 5174 && !soundfold 5175 && slang->sl_has_map 5176 && similar_chars(slang, 5177 c, fword[sp->ts_fidx - 1])) 5178 sp->ts_score -= SCORE_SUBST - SCORE_SIMILAR; 5179 } 5180 } 5181 } 5182 break; 5183 5184 case STATE_DEL: 5185 #ifdef FEAT_MBYTE 5186 /* When past the first byte of a multi-byte char don't try 5187 * delete/insert/swap a character. */ 5188 if (has_mbyte && sp->ts_tcharlen > 0) 5189 { 5190 PROF_STORE(sp->ts_state) 5191 sp->ts_state = STATE_FINAL; 5192 break; 5193 } 5194 #endif 5195 /* 5196 * Try skipping one character in the bad word (delete it). 5197 */ 5198 PROF_STORE(sp->ts_state) 5199 sp->ts_state = STATE_INS_PREP; 5200 sp->ts_curi = 1; 5201 if (soundfold && sp->ts_fidx == 0 && fword[sp->ts_fidx] == '*') 5202 /* Deleting a vowel at the start of a word counts less, see 5203 * soundalike_score(). */ 5204 newscore = 2 * SCORE_DEL / 3; 5205 else 5206 newscore = SCORE_DEL; 5207 if (fword[sp->ts_fidx] != NUL 5208 && TRY_DEEPER(su, stack, depth, newscore)) 5209 { 5210 go_deeper(stack, depth, newscore); 5211 #ifdef DEBUG_TRIEWALK 5212 sprintf(changename[depth], "%.*s-%s: delete %c", 5213 sp->ts_twordlen, tword, fword + sp->ts_fidx, 5214 fword[sp->ts_fidx]); 5215 #endif 5216 ++depth; 5217 5218 /* Remember what character we deleted, so that we can avoid 5219 * inserting it again. */ 5220 stack[depth].ts_flags |= TSF_DIDDEL; 5221 stack[depth].ts_delidx = sp->ts_fidx; 5222 5223 /* Advance over the character in fword[]. Give a bonus to the 5224 * score if the same character is following "nn" -> "n". It's 5225 * a bit illogical for soundfold tree but it does give better 5226 * results. */ 5227 #ifdef FEAT_MBYTE 5228 if (has_mbyte) 5229 { 5230 c = mb_ptr2char(fword + sp->ts_fidx); 5231 stack[depth].ts_fidx += MB_PTR2LEN(fword + sp->ts_fidx); 5232 if (enc_utf8 && utf_iscomposing(c)) 5233 stack[depth].ts_score -= SCORE_DEL - SCORE_DELCOMP; 5234 else if (c == mb_ptr2char(fword + stack[depth].ts_fidx)) 5235 stack[depth].ts_score -= SCORE_DEL - SCORE_DELDUP; 5236 } 5237 else 5238 #endif 5239 { 5240 ++stack[depth].ts_fidx; 5241 if (fword[sp->ts_fidx] == fword[sp->ts_fidx + 1]) 5242 stack[depth].ts_score -= SCORE_DEL - SCORE_DELDUP; 5243 } 5244 break; 5245 } 5246 /* FALLTHROUGH */ 5247 5248 case STATE_INS_PREP: 5249 if (sp->ts_flags & TSF_DIDDEL) 5250 { 5251 /* If we just deleted a byte then inserting won't make sense, 5252 * a substitute is always cheaper. */ 5253 PROF_STORE(sp->ts_state) 5254 sp->ts_state = STATE_SWAP; 5255 break; 5256 } 5257 5258 /* skip over NUL bytes */ 5259 n = sp->ts_arridx; 5260 for (;;) 5261 { 5262 if (sp->ts_curi > byts[n]) 5263 { 5264 /* Only NUL bytes at this node, go to next state. */ 5265 PROF_STORE(sp->ts_state) 5266 sp->ts_state = STATE_SWAP; 5267 break; 5268 } 5269 if (byts[n + sp->ts_curi] != NUL) 5270 { 5271 /* Found a byte to insert. */ 5272 PROF_STORE(sp->ts_state) 5273 sp->ts_state = STATE_INS; 5274 break; 5275 } 5276 ++sp->ts_curi; 5277 } 5278 break; 5279 5280 /* FALLTHROUGH */ 5281 5282 case STATE_INS: 5283 /* Insert one byte. Repeat this for each possible byte at this 5284 * node. */ 5285 n = sp->ts_arridx; 5286 if (sp->ts_curi > byts[n]) 5287 { 5288 /* Done all bytes at this node, go to next state. */ 5289 PROF_STORE(sp->ts_state) 5290 sp->ts_state = STATE_SWAP; 5291 break; 5292 } 5293 5294 /* Do one more byte at this node, but: 5295 * - Skip NUL bytes. 5296 * - Skip the byte if it's equal to the byte in the word, 5297 * accepting that byte is always better. 5298 */ 5299 n += sp->ts_curi++; 5300 c = byts[n]; 5301 if (soundfold && sp->ts_twordlen == 0 && c == '*') 5302 /* Inserting a vowel at the start of a word counts less, 5303 * see soundalike_score(). */ 5304 newscore = 2 * SCORE_INS / 3; 5305 else 5306 newscore = SCORE_INS; 5307 if (c != fword[sp->ts_fidx] 5308 && TRY_DEEPER(su, stack, depth, newscore)) 5309 { 5310 go_deeper(stack, depth, newscore); 5311 #ifdef DEBUG_TRIEWALK 5312 sprintf(changename[depth], "%.*s-%s: insert %c", 5313 sp->ts_twordlen, tword, fword + sp->ts_fidx, 5314 c); 5315 #endif 5316 ++depth; 5317 sp = &stack[depth]; 5318 tword[sp->ts_twordlen++] = c; 5319 sp->ts_arridx = idxs[n]; 5320 #ifdef FEAT_MBYTE 5321 if (has_mbyte) 5322 { 5323 fl = MB_BYTE2LEN(c); 5324 if (fl > 1) 5325 { 5326 /* There are following bytes for the same character. 5327 * We must find all bytes before trying 5328 * delete/insert/swap/etc. */ 5329 sp->ts_tcharlen = fl; 5330 sp->ts_tcharidx = 1; 5331 sp->ts_isdiff = DIFF_INSERT; 5332 } 5333 } 5334 else 5335 fl = 1; 5336 if (fl == 1) 5337 #endif 5338 { 5339 /* If the previous character was the same, thus doubling a 5340 * character, give a bonus to the score. Also for 5341 * soundfold words (illogical but does give a better 5342 * score). */ 5343 if (sp->ts_twordlen >= 2 5344 && tword[sp->ts_twordlen - 2] == c) 5345 sp->ts_score -= SCORE_INS - SCORE_INSDUP; 5346 } 5347 } 5348 break; 5349 5350 case STATE_SWAP: 5351 /* 5352 * Swap two bytes in the bad word: "12" -> "21". 5353 * We change "fword" here, it's changed back afterwards at 5354 * STATE_UNSWAP. 5355 */ 5356 p = fword + sp->ts_fidx; 5357 c = *p; 5358 if (c == NUL) 5359 { 5360 /* End of word, can't swap or replace. */ 5361 PROF_STORE(sp->ts_state) 5362 sp->ts_state = STATE_FINAL; 5363 break; 5364 } 5365 5366 /* Don't swap if the first character is not a word character. 5367 * SWAP3 etc. also don't make sense then. */ 5368 if (!soundfold && !spell_iswordp(p, curwin)) 5369 { 5370 PROF_STORE(sp->ts_state) 5371 sp->ts_state = STATE_REP_INI; 5372 break; 5373 } 5374 5375 #ifdef FEAT_MBYTE 5376 if (has_mbyte) 5377 { 5378 n = MB_CPTR2LEN(p); 5379 c = mb_ptr2char(p); 5380 if (p[n] == NUL) 5381 c2 = NUL; 5382 else if (!soundfold && !spell_iswordp(p + n, curwin)) 5383 c2 = c; /* don't swap non-word char */ 5384 else 5385 c2 = mb_ptr2char(p + n); 5386 } 5387 else 5388 #endif 5389 { 5390 if (p[1] == NUL) 5391 c2 = NUL; 5392 else if (!soundfold && !spell_iswordp(p + 1, curwin)) 5393 c2 = c; /* don't swap non-word char */ 5394 else 5395 c2 = p[1]; 5396 } 5397 5398 /* When the second character is NUL we can't swap. */ 5399 if (c2 == NUL) 5400 { 5401 PROF_STORE(sp->ts_state) 5402 sp->ts_state = STATE_REP_INI; 5403 break; 5404 } 5405 5406 /* When characters are identical, swap won't do anything. 5407 * Also get here if the second char is not a word character. */ 5408 if (c == c2) 5409 { 5410 PROF_STORE(sp->ts_state) 5411 sp->ts_state = STATE_SWAP3; 5412 break; 5413 } 5414 if (c2 != NUL && TRY_DEEPER(su, stack, depth, SCORE_SWAP)) 5415 { 5416 go_deeper(stack, depth, SCORE_SWAP); 5417 #ifdef DEBUG_TRIEWALK 5418 sprintf(changename[depth], "%.*s-%s: swap %c and %c", 5419 sp->ts_twordlen, tword, fword + sp->ts_fidx, 5420 c, c2); 5421 #endif 5422 PROF_STORE(sp->ts_state) 5423 sp->ts_state = STATE_UNSWAP; 5424 ++depth; 5425 #ifdef FEAT_MBYTE 5426 if (has_mbyte) 5427 { 5428 fl = mb_char2len(c2); 5429 mch_memmove(p, p + n, fl); 5430 mb_char2bytes(c, p + fl); 5431 stack[depth].ts_fidxtry = sp->ts_fidx + n + fl; 5432 } 5433 else 5434 #endif 5435 { 5436 p[0] = c2; 5437 p[1] = c; 5438 stack[depth].ts_fidxtry = sp->ts_fidx + 2; 5439 } 5440 } 5441 else 5442 { 5443 /* If this swap doesn't work then SWAP3 won't either. */ 5444 PROF_STORE(sp->ts_state) 5445 sp->ts_state = STATE_REP_INI; 5446 } 5447 break; 5448 5449 case STATE_UNSWAP: 5450 /* Undo the STATE_SWAP swap: "21" -> "12". */ 5451 p = fword + sp->ts_fidx; 5452 #ifdef FEAT_MBYTE 5453 if (has_mbyte) 5454 { 5455 n = MB_PTR2LEN(p); 5456 c = mb_ptr2char(p + n); 5457 mch_memmove(p + MB_PTR2LEN(p + n), p, n); 5458 mb_char2bytes(c, p); 5459 } 5460 else 5461 #endif 5462 { 5463 c = *p; 5464 *p = p[1]; 5465 p[1] = c; 5466 } 5467 /* FALLTHROUGH */ 5468 5469 case STATE_SWAP3: 5470 /* Swap two bytes, skipping one: "123" -> "321". We change 5471 * "fword" here, it's changed back afterwards at STATE_UNSWAP3. */ 5472 p = fword + sp->ts_fidx; 5473 #ifdef FEAT_MBYTE 5474 if (has_mbyte) 5475 { 5476 n = MB_CPTR2LEN(p); 5477 c = mb_ptr2char(p); 5478 fl = MB_CPTR2LEN(p + n); 5479 c2 = mb_ptr2char(p + n); 5480 if (!soundfold && !spell_iswordp(p + n + fl, curwin)) 5481 c3 = c; /* don't swap non-word char */ 5482 else 5483 c3 = mb_ptr2char(p + n + fl); 5484 } 5485 else 5486 #endif 5487 { 5488 c = *p; 5489 c2 = p[1]; 5490 if (!soundfold && !spell_iswordp(p + 2, curwin)) 5491 c3 = c; /* don't swap non-word char */ 5492 else 5493 c3 = p[2]; 5494 } 5495 5496 /* When characters are identical: "121" then SWAP3 result is 5497 * identical, ROT3L result is same as SWAP: "211", ROT3L result is 5498 * same as SWAP on next char: "112". Thus skip all swapping. 5499 * Also skip when c3 is NUL. 5500 * Also get here when the third character is not a word character. 5501 * Second character may any char: "a.b" -> "b.a" */ 5502 if (c == c3 || c3 == NUL) 5503 { 5504 PROF_STORE(sp->ts_state) 5505 sp->ts_state = STATE_REP_INI; 5506 break; 5507 } 5508 if (TRY_DEEPER(su, stack, depth, SCORE_SWAP3)) 5509 { 5510 go_deeper(stack, depth, SCORE_SWAP3); 5511 #ifdef DEBUG_TRIEWALK 5512 sprintf(changename[depth], "%.*s-%s: swap3 %c and %c", 5513 sp->ts_twordlen, tword, fword + sp->ts_fidx, 5514 c, c3); 5515 #endif 5516 PROF_STORE(sp->ts_state) 5517 sp->ts_state = STATE_UNSWAP3; 5518 ++depth; 5519 #ifdef FEAT_MBYTE 5520 if (has_mbyte) 5521 { 5522 tl = mb_char2len(c3); 5523 mch_memmove(p, p + n + fl, tl); 5524 mb_char2bytes(c2, p + tl); 5525 mb_char2bytes(c, p + fl + tl); 5526 stack[depth].ts_fidxtry = sp->ts_fidx + n + fl + tl; 5527 } 5528 else 5529 #endif 5530 { 5531 p[0] = p[2]; 5532 p[2] = c; 5533 stack[depth].ts_fidxtry = sp->ts_fidx + 3; 5534 } 5535 } 5536 else 5537 { 5538 PROF_STORE(sp->ts_state) 5539 sp->ts_state = STATE_REP_INI; 5540 } 5541 break; 5542 5543 case STATE_UNSWAP3: 5544 /* Undo STATE_SWAP3: "321" -> "123" */ 5545 p = fword + sp->ts_fidx; 5546 #ifdef FEAT_MBYTE 5547 if (has_mbyte) 5548 { 5549 n = MB_PTR2LEN(p); 5550 c2 = mb_ptr2char(p + n); 5551 fl = MB_PTR2LEN(p + n); 5552 c = mb_ptr2char(p + n + fl); 5553 tl = MB_PTR2LEN(p + n + fl); 5554 mch_memmove(p + fl + tl, p, n); 5555 mb_char2bytes(c, p); 5556 mb_char2bytes(c2, p + tl); 5557 p = p + tl; 5558 } 5559 else 5560 #endif 5561 { 5562 c = *p; 5563 *p = p[2]; 5564 p[2] = c; 5565 ++p; 5566 } 5567 5568 if (!soundfold && !spell_iswordp(p, curwin)) 5569 { 5570 /* Middle char is not a word char, skip the rotate. First and 5571 * third char were already checked at swap and swap3. */ 5572 PROF_STORE(sp->ts_state) 5573 sp->ts_state = STATE_REP_INI; 5574 break; 5575 } 5576 5577 /* Rotate three characters left: "123" -> "231". We change 5578 * "fword" here, it's changed back afterwards at STATE_UNROT3L. */ 5579 if (TRY_DEEPER(su, stack, depth, SCORE_SWAP3)) 5580 { 5581 go_deeper(stack, depth, SCORE_SWAP3); 5582 #ifdef DEBUG_TRIEWALK 5583 p = fword + sp->ts_fidx; 5584 sprintf(changename[depth], "%.*s-%s: rotate left %c%c%c", 5585 sp->ts_twordlen, tword, fword + sp->ts_fidx, 5586 p[0], p[1], p[2]); 5587 #endif 5588 PROF_STORE(sp->ts_state) 5589 sp->ts_state = STATE_UNROT3L; 5590 ++depth; 5591 p = fword + sp->ts_fidx; 5592 #ifdef FEAT_MBYTE 5593 if (has_mbyte) 5594 { 5595 n = MB_CPTR2LEN(p); 5596 c = mb_ptr2char(p); 5597 fl = MB_CPTR2LEN(p + n); 5598 fl += MB_CPTR2LEN(p + n + fl); 5599 mch_memmove(p, p + n, fl); 5600 mb_char2bytes(c, p + fl); 5601 stack[depth].ts_fidxtry = sp->ts_fidx + n + fl; 5602 } 5603 else 5604 #endif 5605 { 5606 c = *p; 5607 *p = p[1]; 5608 p[1] = p[2]; 5609 p[2] = c; 5610 stack[depth].ts_fidxtry = sp->ts_fidx + 3; 5611 } 5612 } 5613 else 5614 { 5615 PROF_STORE(sp->ts_state) 5616 sp->ts_state = STATE_REP_INI; 5617 } 5618 break; 5619 5620 case STATE_UNROT3L: 5621 /* Undo ROT3L: "231" -> "123" */ 5622 p = fword + sp->ts_fidx; 5623 #ifdef FEAT_MBYTE 5624 if (has_mbyte) 5625 { 5626 n = MB_PTR2LEN(p); 5627 n += MB_PTR2LEN(p + n); 5628 c = mb_ptr2char(p + n); 5629 tl = MB_PTR2LEN(p + n); 5630 mch_memmove(p + tl, p, n); 5631 mb_char2bytes(c, p); 5632 } 5633 else 5634 #endif 5635 { 5636 c = p[2]; 5637 p[2] = p[1]; 5638 p[1] = *p; 5639 *p = c; 5640 } 5641 5642 /* Rotate three bytes right: "123" -> "312". We change "fword" 5643 * here, it's changed back afterwards at STATE_UNROT3R. */ 5644 if (TRY_DEEPER(su, stack, depth, SCORE_SWAP3)) 5645 { 5646 go_deeper(stack, depth, SCORE_SWAP3); 5647 #ifdef DEBUG_TRIEWALK 5648 p = fword + sp->ts_fidx; 5649 sprintf(changename[depth], "%.*s-%s: rotate right %c%c%c", 5650 sp->ts_twordlen, tword, fword + sp->ts_fidx, 5651 p[0], p[1], p[2]); 5652 #endif 5653 PROF_STORE(sp->ts_state) 5654 sp->ts_state = STATE_UNROT3R; 5655 ++depth; 5656 p = fword + sp->ts_fidx; 5657 #ifdef FEAT_MBYTE 5658 if (has_mbyte) 5659 { 5660 n = MB_CPTR2LEN(p); 5661 n += MB_CPTR2LEN(p + n); 5662 c = mb_ptr2char(p + n); 5663 tl = MB_CPTR2LEN(p + n); 5664 mch_memmove(p + tl, p, n); 5665 mb_char2bytes(c, p); 5666 stack[depth].ts_fidxtry = sp->ts_fidx + n + tl; 5667 } 5668 else 5669 #endif 5670 { 5671 c = p[2]; 5672 p[2] = p[1]; 5673 p[1] = *p; 5674 *p = c; 5675 stack[depth].ts_fidxtry = sp->ts_fidx + 3; 5676 } 5677 } 5678 else 5679 { 5680 PROF_STORE(sp->ts_state) 5681 sp->ts_state = STATE_REP_INI; 5682 } 5683 break; 5684 5685 case STATE_UNROT3R: 5686 /* Undo ROT3R: "312" -> "123" */ 5687 p = fword + sp->ts_fidx; 5688 #ifdef FEAT_MBYTE 5689 if (has_mbyte) 5690 { 5691 c = mb_ptr2char(p); 5692 tl = MB_PTR2LEN(p); 5693 n = MB_PTR2LEN(p + tl); 5694 n += MB_PTR2LEN(p + tl + n); 5695 mch_memmove(p, p + tl, n); 5696 mb_char2bytes(c, p + n); 5697 } 5698 else 5699 #endif 5700 { 5701 c = *p; 5702 *p = p[1]; 5703 p[1] = p[2]; 5704 p[2] = c; 5705 } 5706 /* FALLTHROUGH */ 5707 5708 case STATE_REP_INI: 5709 /* Check if matching with REP items from the .aff file would work. 5710 * Quickly skip if: 5711 * - there are no REP items and we are not in the soundfold trie 5712 * - the score is going to be too high anyway 5713 * - already applied a REP item or swapped here */ 5714 if ((lp->lp_replang == NULL && !soundfold) 5715 || sp->ts_score + SCORE_REP >= su->su_maxscore 5716 || sp->ts_fidx < sp->ts_fidxtry) 5717 { 5718 PROF_STORE(sp->ts_state) 5719 sp->ts_state = STATE_FINAL; 5720 break; 5721 } 5722 5723 /* Use the first byte to quickly find the first entry that may 5724 * match. If the index is -1 there is none. */ 5725 if (soundfold) 5726 sp->ts_curi = slang->sl_repsal_first[fword[sp->ts_fidx]]; 5727 else 5728 sp->ts_curi = lp->lp_replang->sl_rep_first[fword[sp->ts_fidx]]; 5729 5730 if (sp->ts_curi < 0) 5731 { 5732 PROF_STORE(sp->ts_state) 5733 sp->ts_state = STATE_FINAL; 5734 break; 5735 } 5736 5737 PROF_STORE(sp->ts_state) 5738 sp->ts_state = STATE_REP; 5739 /* FALLTHROUGH */ 5740 5741 case STATE_REP: 5742 /* Try matching with REP items from the .aff file. For each match 5743 * replace the characters and check if the resulting word is 5744 * valid. */ 5745 p = fword + sp->ts_fidx; 5746 5747 if (soundfold) 5748 gap = &slang->sl_repsal; 5749 else 5750 gap = &lp->lp_replang->sl_rep; 5751 while (sp->ts_curi < gap->ga_len) 5752 { 5753 ftp = (fromto_T *)gap->ga_data + sp->ts_curi++; 5754 if (*ftp->ft_from != *p) 5755 { 5756 /* past possible matching entries */ 5757 sp->ts_curi = gap->ga_len; 5758 break; 5759 } 5760 if (STRNCMP(ftp->ft_from, p, STRLEN(ftp->ft_from)) == 0 5761 && TRY_DEEPER(su, stack, depth, SCORE_REP)) 5762 { 5763 go_deeper(stack, depth, SCORE_REP); 5764 #ifdef DEBUG_TRIEWALK 5765 sprintf(changename[depth], "%.*s-%s: replace %s with %s", 5766 sp->ts_twordlen, tword, fword + sp->ts_fidx, 5767 ftp->ft_from, ftp->ft_to); 5768 #endif 5769 /* Need to undo this afterwards. */ 5770 PROF_STORE(sp->ts_state) 5771 sp->ts_state = STATE_REP_UNDO; 5772 5773 /* Change the "from" to the "to" string. */ 5774 ++depth; 5775 fl = (int)STRLEN(ftp->ft_from); 5776 tl = (int)STRLEN(ftp->ft_to); 5777 if (fl != tl) 5778 { 5779 STRMOVE(p + tl, p + fl); 5780 repextra += tl - fl; 5781 } 5782 mch_memmove(p, ftp->ft_to, tl); 5783 stack[depth].ts_fidxtry = sp->ts_fidx + tl; 5784 #ifdef FEAT_MBYTE 5785 stack[depth].ts_tcharlen = 0; 5786 #endif 5787 break; 5788 } 5789 } 5790 5791 if (sp->ts_curi >= gap->ga_len && sp->ts_state == STATE_REP) 5792 { 5793 /* No (more) matches. */ 5794 PROF_STORE(sp->ts_state) 5795 sp->ts_state = STATE_FINAL; 5796 } 5797 5798 break; 5799 5800 case STATE_REP_UNDO: 5801 /* Undo a REP replacement and continue with the next one. */ 5802 if (soundfold) 5803 gap = &slang->sl_repsal; 5804 else 5805 gap = &lp->lp_replang->sl_rep; 5806 ftp = (fromto_T *)gap->ga_data + sp->ts_curi - 1; 5807 fl = (int)STRLEN(ftp->ft_from); 5808 tl = (int)STRLEN(ftp->ft_to); 5809 p = fword + sp->ts_fidx; 5810 if (fl != tl) 5811 { 5812 STRMOVE(p + fl, p + tl); 5813 repextra -= tl - fl; 5814 } 5815 mch_memmove(p, ftp->ft_from, fl); 5816 PROF_STORE(sp->ts_state) 5817 sp->ts_state = STATE_REP; 5818 break; 5819 5820 default: 5821 /* Did all possible states at this level, go up one level. */ 5822 --depth; 5823 5824 if (depth >= 0 && stack[depth].ts_prefixdepth == PFD_PREFIXTREE) 5825 { 5826 /* Continue in or go back to the prefix tree. */ 5827 byts = pbyts; 5828 idxs = pidxs; 5829 } 5830 5831 /* Don't check for CTRL-C too often, it takes time. */ 5832 if (--breakcheckcount == 0) 5833 { 5834 ui_breakcheck(); 5835 breakcheckcount = 1000; 5836 } 5837 } 5838 } 5839 } 5840 5841 5842 /* 5843 * Go one level deeper in the tree. 5844 */ 5845 static void 5846 go_deeper(trystate_T *stack, int depth, int score_add) 5847 { 5848 stack[depth + 1] = stack[depth]; 5849 stack[depth + 1].ts_state = STATE_START; 5850 stack[depth + 1].ts_score = stack[depth].ts_score + score_add; 5851 stack[depth + 1].ts_curi = 1; /* start just after length byte */ 5852 stack[depth + 1].ts_flags = 0; 5853 } 5854 5855 #ifdef FEAT_MBYTE 5856 /* 5857 * Case-folding may change the number of bytes: Count nr of chars in 5858 * fword[flen] and return the byte length of that many chars in "word". 5859 */ 5860 static int 5861 nofold_len(char_u *fword, int flen, char_u *word) 5862 { 5863 char_u *p; 5864 int i = 0; 5865 5866 for (p = fword; p < fword + flen; MB_PTR_ADV(p)) 5867 ++i; 5868 for (p = word; i > 0; MB_PTR_ADV(p)) 5869 --i; 5870 return (int)(p - word); 5871 } 5872 #endif 5873 5874 /* 5875 * "fword" is a good word with case folded. Find the matching keep-case 5876 * words and put it in "kword". 5877 * Theoretically there could be several keep-case words that result in the 5878 * same case-folded word, but we only find one... 5879 */ 5880 static void 5881 find_keepcap_word(slang_T *slang, char_u *fword, char_u *kword) 5882 { 5883 char_u uword[MAXWLEN]; /* "fword" in upper-case */ 5884 int depth; 5885 idx_T tryidx; 5886 5887 /* The following arrays are used at each depth in the tree. */ 5888 idx_T arridx[MAXWLEN]; 5889 int round[MAXWLEN]; 5890 int fwordidx[MAXWLEN]; 5891 int uwordidx[MAXWLEN]; 5892 int kwordlen[MAXWLEN]; 5893 5894 int flen, ulen; 5895 int l; 5896 int len; 5897 int c; 5898 idx_T lo, hi, m; 5899 char_u *p; 5900 char_u *byts = slang->sl_kbyts; /* array with bytes of the words */ 5901 idx_T *idxs = slang->sl_kidxs; /* array with indexes */ 5902 5903 if (byts == NULL) 5904 { 5905 /* array is empty: "cannot happen" */ 5906 *kword = NUL; 5907 return; 5908 } 5909 5910 /* Make an all-cap version of "fword". */ 5911 allcap_copy(fword, uword); 5912 5913 /* 5914 * Each character needs to be tried both case-folded and upper-case. 5915 * All this gets very complicated if we keep in mind that changing case 5916 * may change the byte length of a multi-byte character... 5917 */ 5918 depth = 0; 5919 arridx[0] = 0; 5920 round[0] = 0; 5921 fwordidx[0] = 0; 5922 uwordidx[0] = 0; 5923 kwordlen[0] = 0; 5924 while (depth >= 0) 5925 { 5926 if (fword[fwordidx[depth]] == NUL) 5927 { 5928 /* We are at the end of "fword". If the tree allows a word to end 5929 * here we have found a match. */ 5930 if (byts[arridx[depth] + 1] == 0) 5931 { 5932 kword[kwordlen[depth]] = NUL; 5933 return; 5934 } 5935 5936 /* kword is getting too long, continue one level up */ 5937 --depth; 5938 } 5939 else if (++round[depth] > 2) 5940 { 5941 /* tried both fold-case and upper-case character, continue one 5942 * level up */ 5943 --depth; 5944 } 5945 else 5946 { 5947 /* 5948 * round[depth] == 1: Try using the folded-case character. 5949 * round[depth] == 2: Try using the upper-case character. 5950 */ 5951 #ifdef FEAT_MBYTE 5952 if (has_mbyte) 5953 { 5954 flen = MB_CPTR2LEN(fword + fwordidx[depth]); 5955 ulen = MB_CPTR2LEN(uword + uwordidx[depth]); 5956 } 5957 else 5958 #endif 5959 ulen = flen = 1; 5960 if (round[depth] == 1) 5961 { 5962 p = fword + fwordidx[depth]; 5963 l = flen; 5964 } 5965 else 5966 { 5967 p = uword + uwordidx[depth]; 5968 l = ulen; 5969 } 5970 5971 for (tryidx = arridx[depth]; l > 0; --l) 5972 { 5973 /* Perform a binary search in the list of accepted bytes. */ 5974 len = byts[tryidx++]; 5975 c = *p++; 5976 lo = tryidx; 5977 hi = tryidx + len - 1; 5978 while (lo < hi) 5979 { 5980 m = (lo + hi) / 2; 5981 if (byts[m] > c) 5982 hi = m - 1; 5983 else if (byts[m] < c) 5984 lo = m + 1; 5985 else 5986 { 5987 lo = hi = m; 5988 break; 5989 } 5990 } 5991 5992 /* Stop if there is no matching byte. */ 5993 if (hi < lo || byts[lo] != c) 5994 break; 5995 5996 /* Continue at the child (if there is one). */ 5997 tryidx = idxs[lo]; 5998 } 5999 6000 if (l == 0) 6001 { 6002 /* 6003 * Found the matching char. Copy it to "kword" and go a 6004 * level deeper. 6005 */ 6006 if (round[depth] == 1) 6007 { 6008 STRNCPY(kword + kwordlen[depth], fword + fwordidx[depth], 6009 flen); 6010 kwordlen[depth + 1] = kwordlen[depth] + flen; 6011 } 6012 else 6013 { 6014 STRNCPY(kword + kwordlen[depth], uword + uwordidx[depth], 6015 ulen); 6016 kwordlen[depth + 1] = kwordlen[depth] + ulen; 6017 } 6018 fwordidx[depth + 1] = fwordidx[depth] + flen; 6019 uwordidx[depth + 1] = uwordidx[depth] + ulen; 6020 6021 ++depth; 6022 arridx[depth] = tryidx; 6023 round[depth] = 0; 6024 } 6025 } 6026 } 6027 6028 /* Didn't find it: "cannot happen". */ 6029 *kword = NUL; 6030 } 6031 6032 /* 6033 * Compute the sound-a-like score for suggestions in su->su_ga and add them to 6034 * su->su_sga. 6035 */ 6036 static void 6037 score_comp_sal(suginfo_T *su) 6038 { 6039 langp_T *lp; 6040 char_u badsound[MAXWLEN]; 6041 int i; 6042 suggest_T *stp; 6043 suggest_T *sstp; 6044 int score; 6045 int lpi; 6046 6047 if (ga_grow(&su->su_sga, su->su_ga.ga_len) == FAIL) 6048 return; 6049 6050 /* Use the sound-folding of the first language that supports it. */ 6051 for (lpi = 0; lpi < curwin->w_s->b_langp.ga_len; ++lpi) 6052 { 6053 lp = LANGP_ENTRY(curwin->w_s->b_langp, lpi); 6054 if (lp->lp_slang->sl_sal.ga_len > 0) 6055 { 6056 /* soundfold the bad word */ 6057 spell_soundfold(lp->lp_slang, su->su_fbadword, TRUE, badsound); 6058 6059 for (i = 0; i < su->su_ga.ga_len; ++i) 6060 { 6061 stp = &SUG(su->su_ga, i); 6062 6063 /* Case-fold the suggested word, sound-fold it and compute the 6064 * sound-a-like score. */ 6065 score = stp_sal_score(stp, su, lp->lp_slang, badsound); 6066 if (score < SCORE_MAXMAX) 6067 { 6068 /* Add the suggestion. */ 6069 sstp = &SUG(su->su_sga, su->su_sga.ga_len); 6070 sstp->st_word = vim_strsave(stp->st_word); 6071 if (sstp->st_word != NULL) 6072 { 6073 sstp->st_wordlen = stp->st_wordlen; 6074 sstp->st_score = score; 6075 sstp->st_altscore = 0; 6076 sstp->st_orglen = stp->st_orglen; 6077 ++su->su_sga.ga_len; 6078 } 6079 } 6080 } 6081 break; 6082 } 6083 } 6084 } 6085 6086 /* 6087 * Combine the list of suggestions in su->su_ga and su->su_sga. 6088 * They are entwined. 6089 */ 6090 static void 6091 score_combine(suginfo_T *su) 6092 { 6093 int i; 6094 int j; 6095 garray_T ga; 6096 garray_T *gap; 6097 langp_T *lp; 6098 suggest_T *stp; 6099 char_u *p; 6100 char_u badsound[MAXWLEN]; 6101 int round; 6102 int lpi; 6103 slang_T *slang = NULL; 6104 6105 /* Add the alternate score to su_ga. */ 6106 for (lpi = 0; lpi < curwin->w_s->b_langp.ga_len; ++lpi) 6107 { 6108 lp = LANGP_ENTRY(curwin->w_s->b_langp, lpi); 6109 if (lp->lp_slang->sl_sal.ga_len > 0) 6110 { 6111 /* soundfold the bad word */ 6112 slang = lp->lp_slang; 6113 spell_soundfold(slang, su->su_fbadword, TRUE, badsound); 6114 6115 for (i = 0; i < su->su_ga.ga_len; ++i) 6116 { 6117 stp = &SUG(su->su_ga, i); 6118 stp->st_altscore = stp_sal_score(stp, su, slang, badsound); 6119 if (stp->st_altscore == SCORE_MAXMAX) 6120 stp->st_score = (stp->st_score * 3 + SCORE_BIG) / 4; 6121 else 6122 stp->st_score = (stp->st_score * 3 6123 + stp->st_altscore) / 4; 6124 stp->st_salscore = FALSE; 6125 } 6126 break; 6127 } 6128 } 6129 6130 if (slang == NULL) /* Using "double" without sound folding. */ 6131 { 6132 (void)cleanup_suggestions(&su->su_ga, su->su_maxscore, 6133 su->su_maxcount); 6134 return; 6135 } 6136 6137 /* Add the alternate score to su_sga. */ 6138 for (i = 0; i < su->su_sga.ga_len; ++i) 6139 { 6140 stp = &SUG(su->su_sga, i); 6141 stp->st_altscore = spell_edit_score(slang, 6142 su->su_badword, stp->st_word); 6143 if (stp->st_score == SCORE_MAXMAX) 6144 stp->st_score = (SCORE_BIG * 7 + stp->st_altscore) / 8; 6145 else 6146 stp->st_score = (stp->st_score * 7 + stp->st_altscore) / 8; 6147 stp->st_salscore = TRUE; 6148 } 6149 6150 /* Remove bad suggestions, sort the suggestions and truncate at "maxcount" 6151 * for both lists. */ 6152 check_suggestions(su, &su->su_ga); 6153 (void)cleanup_suggestions(&su->su_ga, su->su_maxscore, su->su_maxcount); 6154 check_suggestions(su, &su->su_sga); 6155 (void)cleanup_suggestions(&su->su_sga, su->su_maxscore, su->su_maxcount); 6156 6157 ga_init2(&ga, (int)sizeof(suginfo_T), 1); 6158 if (ga_grow(&ga, su->su_ga.ga_len + su->su_sga.ga_len) == FAIL) 6159 return; 6160 6161 stp = &SUG(ga, 0); 6162 for (i = 0; i < su->su_ga.ga_len || i < su->su_sga.ga_len; ++i) 6163 { 6164 /* round 1: get a suggestion from su_ga 6165 * round 2: get a suggestion from su_sga */ 6166 for (round = 1; round <= 2; ++round) 6167 { 6168 gap = round == 1 ? &su->su_ga : &su->su_sga; 6169 if (i < gap->ga_len) 6170 { 6171 /* Don't add a word if it's already there. */ 6172 p = SUG(*gap, i).st_word; 6173 for (j = 0; j < ga.ga_len; ++j) 6174 if (STRCMP(stp[j].st_word, p) == 0) 6175 break; 6176 if (j == ga.ga_len) 6177 stp[ga.ga_len++] = SUG(*gap, i); 6178 else 6179 vim_free(p); 6180 } 6181 } 6182 } 6183 6184 ga_clear(&su->su_ga); 6185 ga_clear(&su->su_sga); 6186 6187 /* Truncate the list to the number of suggestions that will be displayed. */ 6188 if (ga.ga_len > su->su_maxcount) 6189 { 6190 for (i = su->su_maxcount; i < ga.ga_len; ++i) 6191 vim_free(stp[i].st_word); 6192 ga.ga_len = su->su_maxcount; 6193 } 6194 6195 su->su_ga = ga; 6196 } 6197 6198 /* 6199 * For the goodword in "stp" compute the soundalike score compared to the 6200 * badword. 6201 */ 6202 static int 6203 stp_sal_score( 6204 suggest_T *stp, 6205 suginfo_T *su, 6206 slang_T *slang, 6207 char_u *badsound) /* sound-folded badword */ 6208 { 6209 char_u *p; 6210 char_u *pbad; 6211 char_u *pgood; 6212 char_u badsound2[MAXWLEN]; 6213 char_u fword[MAXWLEN]; 6214 char_u goodsound[MAXWLEN]; 6215 char_u goodword[MAXWLEN]; 6216 int lendiff; 6217 6218 lendiff = (int)(su->su_badlen - stp->st_orglen); 6219 if (lendiff >= 0) 6220 pbad = badsound; 6221 else 6222 { 6223 /* soundfold the bad word with more characters following */ 6224 (void)spell_casefold(su->su_badptr, stp->st_orglen, fword, MAXWLEN); 6225 6226 /* When joining two words the sound often changes a lot. E.g., "t he" 6227 * sounds like "t h" while "the" sounds like "@". Avoid that by 6228 * removing the space. Don't do it when the good word also contains a 6229 * space. */ 6230 if (VIM_ISWHITE(su->su_badptr[su->su_badlen]) 6231 && *skiptowhite(stp->st_word) == NUL) 6232 for (p = fword; *(p = skiptowhite(p)) != NUL; ) 6233 STRMOVE(p, p + 1); 6234 6235 spell_soundfold(slang, fword, TRUE, badsound2); 6236 pbad = badsound2; 6237 } 6238 6239 if (lendiff > 0 && stp->st_wordlen + lendiff < MAXWLEN) 6240 { 6241 /* Add part of the bad word to the good word, so that we soundfold 6242 * what replaces the bad word. */ 6243 STRCPY(goodword, stp->st_word); 6244 vim_strncpy(goodword + stp->st_wordlen, 6245 su->su_badptr + su->su_badlen - lendiff, lendiff); 6246 pgood = goodword; 6247 } 6248 else 6249 pgood = stp->st_word; 6250 6251 /* Sound-fold the word and compute the score for the difference. */ 6252 spell_soundfold(slang, pgood, FALSE, goodsound); 6253 6254 return soundalike_score(goodsound, pbad); 6255 } 6256 6257 /* structure used to store soundfolded words that add_sound_suggest() has 6258 * handled already. */ 6259 typedef struct 6260 { 6261 short sft_score; /* lowest score used */ 6262 char_u sft_word[1]; /* soundfolded word, actually longer */ 6263 } sftword_T; 6264 6265 static sftword_T dumsft; 6266 #define HIKEY2SFT(p) ((sftword_T *)(p - (dumsft.sft_word - (char_u *)&dumsft))) 6267 #define HI2SFT(hi) HIKEY2SFT((hi)->hi_key) 6268 6269 /* 6270 * Prepare for calling suggest_try_soundalike(). 6271 */ 6272 static void 6273 suggest_try_soundalike_prep(void) 6274 { 6275 langp_T *lp; 6276 int lpi; 6277 slang_T *slang; 6278 6279 /* Do this for all languages that support sound folding and for which a 6280 * .sug file has been loaded. */ 6281 for (lpi = 0; lpi < curwin->w_s->b_langp.ga_len; ++lpi) 6282 { 6283 lp = LANGP_ENTRY(curwin->w_s->b_langp, lpi); 6284 slang = lp->lp_slang; 6285 if (slang->sl_sal.ga_len > 0 && slang->sl_sbyts != NULL) 6286 /* prepare the hashtable used by add_sound_suggest() */ 6287 hash_init(&slang->sl_sounddone); 6288 } 6289 } 6290 6291 /* 6292 * Find suggestions by comparing the word in a sound-a-like form. 6293 * Note: This doesn't support postponed prefixes. 6294 */ 6295 static void 6296 suggest_try_soundalike(suginfo_T *su) 6297 { 6298 char_u salword[MAXWLEN]; 6299 langp_T *lp; 6300 int lpi; 6301 slang_T *slang; 6302 6303 /* Do this for all languages that support sound folding and for which a 6304 * .sug file has been loaded. */ 6305 for (lpi = 0; lpi < curwin->w_s->b_langp.ga_len; ++lpi) 6306 { 6307 lp = LANGP_ENTRY(curwin->w_s->b_langp, lpi); 6308 slang = lp->lp_slang; 6309 if (slang->sl_sal.ga_len > 0 && slang->sl_sbyts != NULL) 6310 { 6311 /* soundfold the bad word */ 6312 spell_soundfold(slang, su->su_fbadword, TRUE, salword); 6313 6314 /* try all kinds of inserts/deletes/swaps/etc. */ 6315 /* TODO: also soundfold the next words, so that we can try joining 6316 * and splitting */ 6317 #ifdef SUGGEST_PROFILE 6318 prof_init(); 6319 #endif 6320 suggest_trie_walk(su, lp, salword, TRUE); 6321 #ifdef SUGGEST_PROFILE 6322 prof_report("soundalike"); 6323 #endif 6324 } 6325 } 6326 } 6327 6328 /* 6329 * Finish up after calling suggest_try_soundalike(). 6330 */ 6331 static void 6332 suggest_try_soundalike_finish(void) 6333 { 6334 langp_T *lp; 6335 int lpi; 6336 slang_T *slang; 6337 int todo; 6338 hashitem_T *hi; 6339 6340 /* Do this for all languages that support sound folding and for which a 6341 * .sug file has been loaded. */ 6342 for (lpi = 0; lpi < curwin->w_s->b_langp.ga_len; ++lpi) 6343 { 6344 lp = LANGP_ENTRY(curwin->w_s->b_langp, lpi); 6345 slang = lp->lp_slang; 6346 if (slang->sl_sal.ga_len > 0 && slang->sl_sbyts != NULL) 6347 { 6348 /* Free the info about handled words. */ 6349 todo = (int)slang->sl_sounddone.ht_used; 6350 for (hi = slang->sl_sounddone.ht_array; todo > 0; ++hi) 6351 if (!HASHITEM_EMPTY(hi)) 6352 { 6353 vim_free(HI2SFT(hi)); 6354 --todo; 6355 } 6356 6357 /* Clear the hashtable, it may also be used by another region. */ 6358 hash_clear(&slang->sl_sounddone); 6359 hash_init(&slang->sl_sounddone); 6360 } 6361 } 6362 } 6363 6364 /* 6365 * A match with a soundfolded word is found. Add the good word(s) that 6366 * produce this soundfolded word. 6367 */ 6368 static void 6369 add_sound_suggest( 6370 suginfo_T *su, 6371 char_u *goodword, 6372 int score, /* soundfold score */ 6373 langp_T *lp) 6374 { 6375 slang_T *slang = lp->lp_slang; /* language for sound folding */ 6376 int sfwordnr; 6377 char_u *nrline; 6378 int orgnr; 6379 char_u theword[MAXWLEN]; 6380 int i; 6381 int wlen; 6382 char_u *byts; 6383 idx_T *idxs; 6384 int n; 6385 int wordcount; 6386 int wc; 6387 int goodscore; 6388 hash_T hash; 6389 hashitem_T *hi; 6390 sftword_T *sft; 6391 int bc, gc; 6392 int limit; 6393 6394 /* 6395 * It's very well possible that the same soundfold word is found several 6396 * times with different scores. Since the following is quite slow only do 6397 * the words that have a better score than before. Use a hashtable to 6398 * remember the words that have been done. 6399 */ 6400 hash = hash_hash(goodword); 6401 hi = hash_lookup(&slang->sl_sounddone, goodword, hash); 6402 if (HASHITEM_EMPTY(hi)) 6403 { 6404 sft = (sftword_T *)alloc((unsigned)(sizeof(sftword_T) 6405 + STRLEN(goodword))); 6406 if (sft != NULL) 6407 { 6408 sft->sft_score = score; 6409 STRCPY(sft->sft_word, goodword); 6410 hash_add_item(&slang->sl_sounddone, hi, sft->sft_word, hash); 6411 } 6412 } 6413 else 6414 { 6415 sft = HI2SFT(hi); 6416 if (score >= sft->sft_score) 6417 return; 6418 sft->sft_score = score; 6419 } 6420 6421 /* 6422 * Find the word nr in the soundfold tree. 6423 */ 6424 sfwordnr = soundfold_find(slang, goodword); 6425 if (sfwordnr < 0) 6426 { 6427 internal_error("add_sound_suggest()"); 6428 return; 6429 } 6430 6431 /* 6432 * go over the list of good words that produce this soundfold word 6433 */ 6434 nrline = ml_get_buf(slang->sl_sugbuf, (linenr_T)(sfwordnr + 1), FALSE); 6435 orgnr = 0; 6436 while (*nrline != NUL) 6437 { 6438 /* The wordnr was stored in a minimal nr of bytes as an offset to the 6439 * previous wordnr. */ 6440 orgnr += bytes2offset(&nrline); 6441 6442 byts = slang->sl_fbyts; 6443 idxs = slang->sl_fidxs; 6444 6445 /* Lookup the word "orgnr" one of the two tries. */ 6446 n = 0; 6447 wordcount = 0; 6448 for (wlen = 0; wlen < MAXWLEN - 3; ++wlen) 6449 { 6450 i = 1; 6451 if (wordcount == orgnr && byts[n + 1] == NUL) 6452 break; /* found end of word */ 6453 6454 if (byts[n + 1] == NUL) 6455 ++wordcount; 6456 6457 /* skip over the NUL bytes */ 6458 for ( ; byts[n + i] == NUL; ++i) 6459 if (i > byts[n]) /* safety check */ 6460 { 6461 STRCPY(theword + wlen, "BAD"); 6462 wlen += 3; 6463 goto badword; 6464 } 6465 6466 /* One of the siblings must have the word. */ 6467 for ( ; i < byts[n]; ++i) 6468 { 6469 wc = idxs[idxs[n + i]]; /* nr of words under this byte */ 6470 if (wordcount + wc > orgnr) 6471 break; 6472 wordcount += wc; 6473 } 6474 6475 theword[wlen] = byts[n + i]; 6476 n = idxs[n + i]; 6477 } 6478 badword: 6479 theword[wlen] = NUL; 6480 6481 /* Go over the possible flags and regions. */ 6482 for (; i <= byts[n] && byts[n + i] == NUL; ++i) 6483 { 6484 char_u cword[MAXWLEN]; 6485 char_u *p; 6486 int flags = (int)idxs[n + i]; 6487 6488 /* Skip words with the NOSUGGEST flag */ 6489 if (flags & WF_NOSUGGEST) 6490 continue; 6491 6492 if (flags & WF_KEEPCAP) 6493 { 6494 /* Must find the word in the keep-case tree. */ 6495 find_keepcap_word(slang, theword, cword); 6496 p = cword; 6497 } 6498 else 6499 { 6500 flags |= su->su_badflags; 6501 if ((flags & WF_CAPMASK) != 0) 6502 { 6503 /* Need to fix case according to "flags". */ 6504 make_case_word(theword, cword, flags); 6505 p = cword; 6506 } 6507 else 6508 p = theword; 6509 } 6510 6511 /* Add the suggestion. */ 6512 if (sps_flags & SPS_DOUBLE) 6513 { 6514 /* Add the suggestion if the score isn't too bad. */ 6515 if (score <= su->su_maxscore) 6516 add_suggestion(su, &su->su_sga, p, su->su_badlen, 6517 score, 0, FALSE, slang, FALSE); 6518 } 6519 else 6520 { 6521 /* Add a penalty for words in another region. */ 6522 if ((flags & WF_REGION) 6523 && (((unsigned)flags >> 16) & lp->lp_region) == 0) 6524 goodscore = SCORE_REGION; 6525 else 6526 goodscore = 0; 6527 6528 /* Add a small penalty for changing the first letter from 6529 * lower to upper case. Helps for "tath" -> "Kath", which is 6530 * less common than "tath" -> "path". Don't do it when the 6531 * letter is the same, that has already been counted. */ 6532 gc = PTR2CHAR(p); 6533 if (SPELL_ISUPPER(gc)) 6534 { 6535 bc = PTR2CHAR(su->su_badword); 6536 if (!SPELL_ISUPPER(bc) 6537 && SPELL_TOFOLD(bc) != SPELL_TOFOLD(gc)) 6538 goodscore += SCORE_ICASE / 2; 6539 } 6540 6541 /* Compute the score for the good word. This only does letter 6542 * insert/delete/swap/replace. REP items are not considered, 6543 * which may make the score a bit higher. 6544 * Use a limit for the score to make it work faster. Use 6545 * MAXSCORE(), because RESCORE() will change the score. 6546 * If the limit is very high then the iterative method is 6547 * inefficient, using an array is quicker. */ 6548 limit = MAXSCORE(su->su_sfmaxscore - goodscore, score); 6549 if (limit > SCORE_LIMITMAX) 6550 goodscore += spell_edit_score(slang, su->su_badword, p); 6551 else 6552 goodscore += spell_edit_score_limit(slang, su->su_badword, 6553 p, limit); 6554 6555 /* When going over the limit don't bother to do the rest. */ 6556 if (goodscore < SCORE_MAXMAX) 6557 { 6558 /* Give a bonus to words seen before. */ 6559 goodscore = score_wordcount_adj(slang, goodscore, p, FALSE); 6560 6561 /* Add the suggestion if the score isn't too bad. */ 6562 goodscore = RESCORE(goodscore, score); 6563 if (goodscore <= su->su_sfmaxscore) 6564 add_suggestion(su, &su->su_ga, p, su->su_badlen, 6565 goodscore, score, TRUE, slang, TRUE); 6566 } 6567 } 6568 } 6569 /* smsg("word %s (%d): %s (%d)", sftword, sftnr, theword, orgnr); */ 6570 } 6571 } 6572 6573 /* 6574 * Find word "word" in fold-case tree for "slang" and return the word number. 6575 */ 6576 static int 6577 soundfold_find(slang_T *slang, char_u *word) 6578 { 6579 idx_T arridx = 0; 6580 int len; 6581 int wlen = 0; 6582 int c; 6583 char_u *ptr = word; 6584 char_u *byts; 6585 idx_T *idxs; 6586 int wordnr = 0; 6587 6588 byts = slang->sl_sbyts; 6589 idxs = slang->sl_sidxs; 6590 6591 for (;;) 6592 { 6593 /* First byte is the number of possible bytes. */ 6594 len = byts[arridx++]; 6595 6596 /* If the first possible byte is a zero the word could end here. 6597 * If the word ends we found the word. If not skip the NUL bytes. */ 6598 c = ptr[wlen]; 6599 if (byts[arridx] == NUL) 6600 { 6601 if (c == NUL) 6602 break; 6603 6604 /* Skip over the zeros, there can be several. */ 6605 while (len > 0 && byts[arridx] == NUL) 6606 { 6607 ++arridx; 6608 --len; 6609 } 6610 if (len == 0) 6611 return -1; /* no children, word should have ended here */ 6612 ++wordnr; 6613 } 6614 6615 /* If the word ends we didn't find it. */ 6616 if (c == NUL) 6617 return -1; 6618 6619 /* Perform a binary search in the list of accepted bytes. */ 6620 if (c == TAB) /* <Tab> is handled like <Space> */ 6621 c = ' '; 6622 while (byts[arridx] < c) 6623 { 6624 /* The word count is in the first idxs[] entry of the child. */ 6625 wordnr += idxs[idxs[arridx]]; 6626 ++arridx; 6627 if (--len == 0) /* end of the bytes, didn't find it */ 6628 return -1; 6629 } 6630 if (byts[arridx] != c) /* didn't find the byte */ 6631 return -1; 6632 6633 /* Continue at the child (if there is one). */ 6634 arridx = idxs[arridx]; 6635 ++wlen; 6636 6637 /* One space in the good word may stand for several spaces in the 6638 * checked word. */ 6639 if (c == ' ') 6640 while (ptr[wlen] == ' ' || ptr[wlen] == TAB) 6641 ++wlen; 6642 } 6643 6644 return wordnr; 6645 } 6646 6647 /* 6648 * Copy "fword" to "cword", fixing case according to "flags". 6649 */ 6650 static void 6651 make_case_word(char_u *fword, char_u *cword, int flags) 6652 { 6653 if (flags & WF_ALLCAP) 6654 /* Make it all upper-case */ 6655 allcap_copy(fword, cword); 6656 else if (flags & WF_ONECAP) 6657 /* Make the first letter upper-case */ 6658 onecap_copy(fword, cword, TRUE); 6659 else 6660 /* Use goodword as-is. */ 6661 STRCPY(cword, fword); 6662 } 6663 6664 6665 /* 6666 * Return TRUE if "c1" and "c2" are similar characters according to the MAP 6667 * lines in the .aff file. 6668 */ 6669 static int 6670 similar_chars(slang_T *slang, int c1, int c2) 6671 { 6672 int m1, m2; 6673 #ifdef FEAT_MBYTE 6674 char_u buf[MB_MAXBYTES + 1]; 6675 hashitem_T *hi; 6676 6677 if (c1 >= 256) 6678 { 6679 buf[mb_char2bytes(c1, buf)] = 0; 6680 hi = hash_find(&slang->sl_map_hash, buf); 6681 if (HASHITEM_EMPTY(hi)) 6682 m1 = 0; 6683 else 6684 m1 = mb_ptr2char(hi->hi_key + STRLEN(hi->hi_key) + 1); 6685 } 6686 else 6687 #endif 6688 m1 = slang->sl_map_array[c1]; 6689 if (m1 == 0) 6690 return FALSE; 6691 6692 6693 #ifdef FEAT_MBYTE 6694 if (c2 >= 256) 6695 { 6696 buf[mb_char2bytes(c2, buf)] = 0; 6697 hi = hash_find(&slang->sl_map_hash, buf); 6698 if (HASHITEM_EMPTY(hi)) 6699 m2 = 0; 6700 else 6701 m2 = mb_ptr2char(hi->hi_key + STRLEN(hi->hi_key) + 1); 6702 } 6703 else 6704 #endif 6705 m2 = slang->sl_map_array[c2]; 6706 6707 return m1 == m2; 6708 } 6709 6710 /* 6711 * Add a suggestion to the list of suggestions. 6712 * For a suggestion that is already in the list the lowest score is remembered. 6713 */ 6714 static void 6715 add_suggestion( 6716 suginfo_T *su, 6717 garray_T *gap, /* either su_ga or su_sga */ 6718 char_u *goodword, 6719 int badlenarg, /* len of bad word replaced with "goodword" */ 6720 int score, 6721 int altscore, 6722 int had_bonus, /* value for st_had_bonus */ 6723 slang_T *slang, /* language for sound folding */ 6724 int maxsf) /* su_maxscore applies to soundfold score, 6725 su_sfmaxscore to the total score. */ 6726 { 6727 int goodlen; /* len of goodword changed */ 6728 int badlen; /* len of bad word changed */ 6729 suggest_T *stp; 6730 suggest_T new_sug; 6731 int i; 6732 char_u *pgood, *pbad; 6733 6734 /* Minimize "badlen" for consistency. Avoids that changing "the the" to 6735 * "thee the" is added next to changing the first "the" the "thee". */ 6736 pgood = goodword + STRLEN(goodword); 6737 pbad = su->su_badptr + badlenarg; 6738 for (;;) 6739 { 6740 goodlen = (int)(pgood - goodword); 6741 badlen = (int)(pbad - su->su_badptr); 6742 if (goodlen <= 0 || badlen <= 0) 6743 break; 6744 MB_PTR_BACK(goodword, pgood); 6745 MB_PTR_BACK(su->su_badptr, pbad); 6746 #ifdef FEAT_MBYTE 6747 if (has_mbyte) 6748 { 6749 if (mb_ptr2char(pgood) != mb_ptr2char(pbad)) 6750 break; 6751 } 6752 else 6753 #endif 6754 if (*pgood != *pbad) 6755 break; 6756 } 6757 6758 if (badlen == 0 && goodlen == 0) 6759 /* goodword doesn't change anything; may happen for "the the" changing 6760 * the first "the" to itself. */ 6761 return; 6762 6763 if (gap->ga_len == 0) 6764 i = -1; 6765 else 6766 { 6767 /* Check if the word is already there. Also check the length that is 6768 * being replaced "thes," -> "these" is a different suggestion from 6769 * "thes" -> "these". */ 6770 stp = &SUG(*gap, 0); 6771 for (i = gap->ga_len; --i >= 0; ++stp) 6772 if (stp->st_wordlen == goodlen 6773 && stp->st_orglen == badlen 6774 && STRNCMP(stp->st_word, goodword, goodlen) == 0) 6775 { 6776 /* 6777 * Found it. Remember the word with the lowest score. 6778 */ 6779 if (stp->st_slang == NULL) 6780 stp->st_slang = slang; 6781 6782 new_sug.st_score = score; 6783 new_sug.st_altscore = altscore; 6784 new_sug.st_had_bonus = had_bonus; 6785 6786 if (stp->st_had_bonus != had_bonus) 6787 { 6788 /* Only one of the two had the soundalike score computed. 6789 * Need to do that for the other one now, otherwise the 6790 * scores can't be compared. This happens because 6791 * suggest_try_change() doesn't compute the soundalike 6792 * word to keep it fast, while some special methods set 6793 * the soundalike score to zero. */ 6794 if (had_bonus) 6795 rescore_one(su, stp); 6796 else 6797 { 6798 new_sug.st_word = stp->st_word; 6799 new_sug.st_wordlen = stp->st_wordlen; 6800 new_sug.st_slang = stp->st_slang; 6801 new_sug.st_orglen = badlen; 6802 rescore_one(su, &new_sug); 6803 } 6804 } 6805 6806 if (stp->st_score > new_sug.st_score) 6807 { 6808 stp->st_score = new_sug.st_score; 6809 stp->st_altscore = new_sug.st_altscore; 6810 stp->st_had_bonus = new_sug.st_had_bonus; 6811 } 6812 break; 6813 } 6814 } 6815 6816 if (i < 0 && ga_grow(gap, 1) == OK) 6817 { 6818 /* Add a suggestion. */ 6819 stp = &SUG(*gap, gap->ga_len); 6820 stp->st_word = vim_strnsave(goodword, goodlen); 6821 if (stp->st_word != NULL) 6822 { 6823 stp->st_wordlen = goodlen; 6824 stp->st_score = score; 6825 stp->st_altscore = altscore; 6826 stp->st_had_bonus = had_bonus; 6827 stp->st_orglen = badlen; 6828 stp->st_slang = slang; 6829 ++gap->ga_len; 6830 6831 /* If we have too many suggestions now, sort the list and keep 6832 * the best suggestions. */ 6833 if (gap->ga_len > SUG_MAX_COUNT(su)) 6834 { 6835 if (maxsf) 6836 su->su_sfmaxscore = cleanup_suggestions(gap, 6837 su->su_sfmaxscore, SUG_CLEAN_COUNT(su)); 6838 else 6839 su->su_maxscore = cleanup_suggestions(gap, 6840 su->su_maxscore, SUG_CLEAN_COUNT(su)); 6841 } 6842 } 6843 } 6844 } 6845 6846 /* 6847 * Suggestions may in fact be flagged as errors. Esp. for banned words and 6848 * for split words, such as "the the". Remove these from the list here. 6849 */ 6850 static void 6851 check_suggestions( 6852 suginfo_T *su, 6853 garray_T *gap) /* either su_ga or su_sga */ 6854 { 6855 suggest_T *stp; 6856 int i; 6857 char_u longword[MAXWLEN + 1]; 6858 int len; 6859 hlf_T attr; 6860 6861 stp = &SUG(*gap, 0); 6862 for (i = gap->ga_len - 1; i >= 0; --i) 6863 { 6864 /* Need to append what follows to check for "the the". */ 6865 vim_strncpy(longword, stp[i].st_word, MAXWLEN); 6866 len = stp[i].st_wordlen; 6867 vim_strncpy(longword + len, su->su_badptr + stp[i].st_orglen, 6868 MAXWLEN - len); 6869 attr = HLF_COUNT; 6870 (void)spell_check(curwin, longword, &attr, NULL, FALSE); 6871 if (attr != HLF_COUNT) 6872 { 6873 /* Remove this entry. */ 6874 vim_free(stp[i].st_word); 6875 --gap->ga_len; 6876 if (i < gap->ga_len) 6877 mch_memmove(stp + i, stp + i + 1, 6878 sizeof(suggest_T) * (gap->ga_len - i)); 6879 } 6880 } 6881 } 6882 6883 6884 /* 6885 * Add a word to be banned. 6886 */ 6887 static void 6888 add_banned( 6889 suginfo_T *su, 6890 char_u *word) 6891 { 6892 char_u *s; 6893 hash_T hash; 6894 hashitem_T *hi; 6895 6896 hash = hash_hash(word); 6897 hi = hash_lookup(&su->su_banned, word, hash); 6898 if (HASHITEM_EMPTY(hi)) 6899 { 6900 s = vim_strsave(word); 6901 if (s != NULL) 6902 hash_add_item(&su->su_banned, hi, s, hash); 6903 } 6904 } 6905 6906 /* 6907 * Recompute the score for all suggestions if sound-folding is possible. This 6908 * is slow, thus only done for the final results. 6909 */ 6910 static void 6911 rescore_suggestions(suginfo_T *su) 6912 { 6913 int i; 6914 6915 if (su->su_sallang != NULL) 6916 for (i = 0; i < su->su_ga.ga_len; ++i) 6917 rescore_one(su, &SUG(su->su_ga, i)); 6918 } 6919 6920 /* 6921 * Recompute the score for one suggestion if sound-folding is possible. 6922 */ 6923 static void 6924 rescore_one(suginfo_T *su, suggest_T *stp) 6925 { 6926 slang_T *slang = stp->st_slang; 6927 char_u sal_badword[MAXWLEN]; 6928 char_u *p; 6929 6930 /* Only rescore suggestions that have no sal score yet and do have a 6931 * language. */ 6932 if (slang != NULL && slang->sl_sal.ga_len > 0 && !stp->st_had_bonus) 6933 { 6934 if (slang == su->su_sallang) 6935 p = su->su_sal_badword; 6936 else 6937 { 6938 spell_soundfold(slang, su->su_fbadword, TRUE, sal_badword); 6939 p = sal_badword; 6940 } 6941 6942 stp->st_altscore = stp_sal_score(stp, su, slang, p); 6943 if (stp->st_altscore == SCORE_MAXMAX) 6944 stp->st_altscore = SCORE_BIG; 6945 stp->st_score = RESCORE(stp->st_score, stp->st_altscore); 6946 stp->st_had_bonus = TRUE; 6947 } 6948 } 6949 6950 static int 6951 #ifdef __BORLANDC__ 6952 _RTLENTRYF 6953 #endif 6954 sug_compare(const void *s1, const void *s2); 6955 6956 /* 6957 * Function given to qsort() to sort the suggestions on st_score. 6958 * First on "st_score", then "st_altscore" then alphabetically. 6959 */ 6960 static int 6961 #ifdef __BORLANDC__ 6962 _RTLENTRYF 6963 #endif 6964 sug_compare(const void *s1, const void *s2) 6965 { 6966 suggest_T *p1 = (suggest_T *)s1; 6967 suggest_T *p2 = (suggest_T *)s2; 6968 int n = p1->st_score - p2->st_score; 6969 6970 if (n == 0) 6971 { 6972 n = p1->st_altscore - p2->st_altscore; 6973 if (n == 0) 6974 n = STRICMP(p1->st_word, p2->st_word); 6975 } 6976 return n; 6977 } 6978 6979 /* 6980 * Cleanup the suggestions: 6981 * - Sort on score. 6982 * - Remove words that won't be displayed. 6983 * Returns the maximum score in the list or "maxscore" unmodified. 6984 */ 6985 static int 6986 cleanup_suggestions( 6987 garray_T *gap, 6988 int maxscore, 6989 int keep) /* nr of suggestions to keep */ 6990 { 6991 suggest_T *stp = &SUG(*gap, 0); 6992 int i; 6993 6994 /* Sort the list. */ 6995 qsort(gap->ga_data, (size_t)gap->ga_len, sizeof(suggest_T), sug_compare); 6996 6997 /* Truncate the list to the number of suggestions that will be displayed. */ 6998 if (gap->ga_len > keep) 6999 { 7000 for (i = keep; i < gap->ga_len; ++i) 7001 vim_free(stp[i].st_word); 7002 gap->ga_len = keep; 7003 return stp[keep - 1].st_score; 7004 } 7005 return maxscore; 7006 } 7007 7008 #if defined(FEAT_EVAL) || defined(PROTO) 7009 /* 7010 * Soundfold a string, for soundfold(). 7011 * Result is in allocated memory, NULL for an error. 7012 */ 7013 char_u * 7014 eval_soundfold(char_u *word) 7015 { 7016 langp_T *lp; 7017 char_u sound[MAXWLEN]; 7018 int lpi; 7019 7020 if (curwin->w_p_spell && *curwin->w_s->b_p_spl != NUL) 7021 /* Use the sound-folding of the first language that supports it. */ 7022 for (lpi = 0; lpi < curwin->w_s->b_langp.ga_len; ++lpi) 7023 { 7024 lp = LANGP_ENTRY(curwin->w_s->b_langp, lpi); 7025 if (lp->lp_slang->sl_sal.ga_len > 0) 7026 { 7027 /* soundfold the word */ 7028 spell_soundfold(lp->lp_slang, word, FALSE, sound); 7029 return vim_strsave(sound); 7030 } 7031 } 7032 7033 /* No language with sound folding, return word as-is. */ 7034 return vim_strsave(word); 7035 } 7036 #endif 7037 7038 /* 7039 * Turn "inword" into its sound-a-like equivalent in "res[MAXWLEN]". 7040 * 7041 * There are many ways to turn a word into a sound-a-like representation. The 7042 * oldest is Soundex (1918!). A nice overview can be found in "Approximate 7043 * swedish name matching - survey and test of different algorithms" by Klas 7044 * Erikson. 7045 * 7046 * We support two methods: 7047 * 1. SOFOFROM/SOFOTO do a simple character mapping. 7048 * 2. SAL items define a more advanced sound-folding (and much slower). 7049 */ 7050 void 7051 spell_soundfold( 7052 slang_T *slang, 7053 char_u *inword, 7054 int folded, /* "inword" is already case-folded */ 7055 char_u *res) 7056 { 7057 char_u fword[MAXWLEN]; 7058 char_u *word; 7059 7060 if (slang->sl_sofo) 7061 /* SOFOFROM and SOFOTO used */ 7062 spell_soundfold_sofo(slang, inword, res); 7063 else 7064 { 7065 /* SAL items used. Requires the word to be case-folded. */ 7066 if (folded) 7067 word = inword; 7068 else 7069 { 7070 (void)spell_casefold(inword, (int)STRLEN(inword), fword, MAXWLEN); 7071 word = fword; 7072 } 7073 7074 #ifdef FEAT_MBYTE 7075 if (has_mbyte) 7076 spell_soundfold_wsal(slang, word, res); 7077 else 7078 #endif 7079 spell_soundfold_sal(slang, word, res); 7080 } 7081 } 7082 7083 /* 7084 * Perform sound folding of "inword" into "res" according to SOFOFROM and 7085 * SOFOTO lines. 7086 */ 7087 static void 7088 spell_soundfold_sofo(slang_T *slang, char_u *inword, char_u *res) 7089 { 7090 char_u *s; 7091 int ri = 0; 7092 int c; 7093 7094 #ifdef FEAT_MBYTE 7095 if (has_mbyte) 7096 { 7097 int prevc = 0; 7098 int *ip; 7099 7100 /* The sl_sal_first[] table contains the translation for chars up to 7101 * 255, sl_sal the rest. */ 7102 for (s = inword; *s != NUL; ) 7103 { 7104 c = mb_cptr2char_adv(&s); 7105 if (enc_utf8 ? utf_class(c) == 0 : VIM_ISWHITE(c)) 7106 c = ' '; 7107 else if (c < 256) 7108 c = slang->sl_sal_first[c]; 7109 else 7110 { 7111 ip = ((int **)slang->sl_sal.ga_data)[c & 0xff]; 7112 if (ip == NULL) /* empty list, can't match */ 7113 c = NUL; 7114 else 7115 for (;;) /* find "c" in the list */ 7116 { 7117 if (*ip == 0) /* not found */ 7118 { 7119 c = NUL; 7120 break; 7121 } 7122 if (*ip == c) /* match! */ 7123 { 7124 c = ip[1]; 7125 break; 7126 } 7127 ip += 2; 7128 } 7129 } 7130 7131 if (c != NUL && c != prevc) 7132 { 7133 ri += mb_char2bytes(c, res + ri); 7134 if (ri + MB_MAXBYTES > MAXWLEN) 7135 break; 7136 prevc = c; 7137 } 7138 } 7139 } 7140 else 7141 #endif 7142 { 7143 /* The sl_sal_first[] table contains the translation. */ 7144 for (s = inword; (c = *s) != NUL; ++s) 7145 { 7146 if (VIM_ISWHITE(c)) 7147 c = ' '; 7148 else 7149 c = slang->sl_sal_first[c]; 7150 if (c != NUL && (ri == 0 || res[ri - 1] != c)) 7151 res[ri++] = c; 7152 } 7153 } 7154 7155 res[ri] = NUL; 7156 } 7157 7158 static void 7159 spell_soundfold_sal(slang_T *slang, char_u *inword, char_u *res) 7160 { 7161 salitem_T *smp; 7162 char_u word[MAXWLEN]; 7163 char_u *s = inword; 7164 char_u *t; 7165 char_u *pf; 7166 int i, j, z; 7167 int reslen; 7168 int n, k = 0; 7169 int z0; 7170 int k0; 7171 int n0; 7172 int c; 7173 int pri; 7174 int p0 = -333; 7175 int c0; 7176 7177 /* Remove accents, if wanted. We actually remove all non-word characters. 7178 * But keep white space. We need a copy, the word may be changed here. */ 7179 if (slang->sl_rem_accents) 7180 { 7181 t = word; 7182 while (*s != NUL) 7183 { 7184 if (VIM_ISWHITE(*s)) 7185 { 7186 *t++ = ' '; 7187 s = skipwhite(s); 7188 } 7189 else 7190 { 7191 if (spell_iswordp_nmw(s, curwin)) 7192 *t++ = *s; 7193 ++s; 7194 } 7195 } 7196 *t = NUL; 7197 } 7198 else 7199 vim_strncpy(word, s, MAXWLEN - 1); 7200 7201 smp = (salitem_T *)slang->sl_sal.ga_data; 7202 7203 /* 7204 * This comes from Aspell phonet.cpp. Converted from C++ to C. 7205 * Changed to keep spaces. 7206 */ 7207 i = reslen = z = 0; 7208 while ((c = word[i]) != NUL) 7209 { 7210 /* Start with the first rule that has the character in the word. */ 7211 n = slang->sl_sal_first[c]; 7212 z0 = 0; 7213 7214 if (n >= 0) 7215 { 7216 /* check all rules for the same letter */ 7217 for (; (s = smp[n].sm_lead)[0] == c; ++n) 7218 { 7219 /* Quickly skip entries that don't match the word. Most 7220 * entries are less then three chars, optimize for that. */ 7221 k = smp[n].sm_leadlen; 7222 if (k > 1) 7223 { 7224 if (word[i + 1] != s[1]) 7225 continue; 7226 if (k > 2) 7227 { 7228 for (j = 2; j < k; ++j) 7229 if (word[i + j] != s[j]) 7230 break; 7231 if (j < k) 7232 continue; 7233 } 7234 } 7235 7236 if ((pf = smp[n].sm_oneof) != NULL) 7237 { 7238 /* Check for match with one of the chars in "sm_oneof". */ 7239 while (*pf != NUL && *pf != word[i + k]) 7240 ++pf; 7241 if (*pf == NUL) 7242 continue; 7243 ++k; 7244 } 7245 s = smp[n].sm_rules; 7246 pri = 5; /* default priority */ 7247 7248 p0 = *s; 7249 k0 = k; 7250 while (*s == '-' && k > 1) 7251 { 7252 k--; 7253 s++; 7254 } 7255 if (*s == '<') 7256 s++; 7257 if (VIM_ISDIGIT(*s)) 7258 { 7259 /* determine priority */ 7260 pri = *s - '0'; 7261 s++; 7262 } 7263 if (*s == '^' && *(s + 1) == '^') 7264 s++; 7265 7266 if (*s == NUL 7267 || (*s == '^' 7268 && (i == 0 || !(word[i - 1] == ' ' 7269 || spell_iswordp(word + i - 1, curwin))) 7270 && (*(s + 1) != '$' 7271 || (!spell_iswordp(word + i + k0, curwin)))) 7272 || (*s == '$' && i > 0 7273 && spell_iswordp(word + i - 1, curwin) 7274 && (!spell_iswordp(word + i + k0, curwin)))) 7275 { 7276 /* search for followup rules, if: */ 7277 /* followup and k > 1 and NO '-' in searchstring */ 7278 c0 = word[i + k - 1]; 7279 n0 = slang->sl_sal_first[c0]; 7280 7281 if (slang->sl_followup && k > 1 && n0 >= 0 7282 && p0 != '-' && word[i + k] != NUL) 7283 { 7284 /* test follow-up rule for "word[i + k]" */ 7285 for ( ; (s = smp[n0].sm_lead)[0] == c0; ++n0) 7286 { 7287 /* Quickly skip entries that don't match the word. 7288 * */ 7289 k0 = smp[n0].sm_leadlen; 7290 if (k0 > 1) 7291 { 7292 if (word[i + k] != s[1]) 7293 continue; 7294 if (k0 > 2) 7295 { 7296 pf = word + i + k + 1; 7297 for (j = 2; j < k0; ++j) 7298 if (*pf++ != s[j]) 7299 break; 7300 if (j < k0) 7301 continue; 7302 } 7303 } 7304 k0 += k - 1; 7305 7306 if ((pf = smp[n0].sm_oneof) != NULL) 7307 { 7308 /* Check for match with one of the chars in 7309 * "sm_oneof". */ 7310 while (*pf != NUL && *pf != word[i + k0]) 7311 ++pf; 7312 if (*pf == NUL) 7313 continue; 7314 ++k0; 7315 } 7316 7317 p0 = 5; 7318 s = smp[n0].sm_rules; 7319 while (*s == '-') 7320 { 7321 /* "k0" gets NOT reduced because 7322 * "if (k0 == k)" */ 7323 s++; 7324 } 7325 if (*s == '<') 7326 s++; 7327 if (VIM_ISDIGIT(*s)) 7328 { 7329 p0 = *s - '0'; 7330 s++; 7331 } 7332 7333 if (*s == NUL 7334 /* *s == '^' cuts */ 7335 || (*s == '$' 7336 && !spell_iswordp(word + i + k0, 7337 curwin))) 7338 { 7339 if (k0 == k) 7340 /* this is just a piece of the string */ 7341 continue; 7342 7343 if (p0 < pri) 7344 /* priority too low */ 7345 continue; 7346 /* rule fits; stop search */ 7347 break; 7348 } 7349 } 7350 7351 if (p0 >= pri && smp[n0].sm_lead[0] == c0) 7352 continue; 7353 } 7354 7355 /* replace string */ 7356 s = smp[n].sm_to; 7357 if (s == NULL) 7358 s = (char_u *)""; 7359 pf = smp[n].sm_rules; 7360 p0 = (vim_strchr(pf, '<') != NULL) ? 1 : 0; 7361 if (p0 == 1 && z == 0) 7362 { 7363 /* rule with '<' is used */ 7364 if (reslen > 0 && *s != NUL && (res[reslen - 1] == c 7365 || res[reslen - 1] == *s)) 7366 reslen--; 7367 z0 = 1; 7368 z = 1; 7369 k0 = 0; 7370 while (*s != NUL && word[i + k0] != NUL) 7371 { 7372 word[i + k0] = *s; 7373 k0++; 7374 s++; 7375 } 7376 if (k > k0) 7377 STRMOVE(word + i + k0, word + i + k); 7378 7379 /* new "actual letter" */ 7380 c = word[i]; 7381 } 7382 else 7383 { 7384 /* no '<' rule used */ 7385 i += k - 1; 7386 z = 0; 7387 while (*s != NUL && s[1] != NUL && reslen < MAXWLEN) 7388 { 7389 if (reslen == 0 || res[reslen - 1] != *s) 7390 res[reslen++] = *s; 7391 s++; 7392 } 7393 /* new "actual letter" */ 7394 c = *s; 7395 if (strstr((char *)pf, "^^") != NULL) 7396 { 7397 if (c != NUL) 7398 res[reslen++] = c; 7399 STRMOVE(word, word + i + 1); 7400 i = 0; 7401 z0 = 1; 7402 } 7403 } 7404 break; 7405 } 7406 } 7407 } 7408 else if (VIM_ISWHITE(c)) 7409 { 7410 c = ' '; 7411 k = 1; 7412 } 7413 7414 if (z0 == 0) 7415 { 7416 if (k && !p0 && reslen < MAXWLEN && c != NUL 7417 && (!slang->sl_collapse || reslen == 0 7418 || res[reslen - 1] != c)) 7419 /* condense only double letters */ 7420 res[reslen++] = c; 7421 7422 i++; 7423 z = 0; 7424 k = 0; 7425 } 7426 } 7427 7428 res[reslen] = NUL; 7429 } 7430 7431 #ifdef FEAT_MBYTE 7432 /* 7433 * Turn "inword" into its sound-a-like equivalent in "res[MAXWLEN]". 7434 * Multi-byte version of spell_soundfold(). 7435 */ 7436 static void 7437 spell_soundfold_wsal(slang_T *slang, char_u *inword, char_u *res) 7438 { 7439 salitem_T *smp = (salitem_T *)slang->sl_sal.ga_data; 7440 int word[MAXWLEN]; 7441 int wres[MAXWLEN]; 7442 int l; 7443 char_u *s; 7444 int *ws; 7445 char_u *t; 7446 int *pf; 7447 int i, j, z; 7448 int reslen; 7449 int n, k = 0; 7450 int z0; 7451 int k0; 7452 int n0; 7453 int c; 7454 int pri; 7455 int p0 = -333; 7456 int c0; 7457 int did_white = FALSE; 7458 int wordlen; 7459 7460 7461 /* 7462 * Convert the multi-byte string to a wide-character string. 7463 * Remove accents, if wanted. We actually remove all non-word characters. 7464 * But keep white space. 7465 */ 7466 wordlen = 0; 7467 for (s = inword; *s != NUL; ) 7468 { 7469 t = s; 7470 c = mb_cptr2char_adv(&s); 7471 if (slang->sl_rem_accents) 7472 { 7473 if (enc_utf8 ? utf_class(c) == 0 : VIM_ISWHITE(c)) 7474 { 7475 if (did_white) 7476 continue; 7477 c = ' '; 7478 did_white = TRUE; 7479 } 7480 else 7481 { 7482 did_white = FALSE; 7483 if (!spell_iswordp_nmw(t, curwin)) 7484 continue; 7485 } 7486 } 7487 word[wordlen++] = c; 7488 } 7489 word[wordlen] = NUL; 7490 7491 /* 7492 * This algorithm comes from Aspell phonet.cpp. 7493 * Converted from C++ to C. Added support for multi-byte chars. 7494 * Changed to keep spaces. 7495 */ 7496 i = reslen = z = 0; 7497 while ((c = word[i]) != NUL) 7498 { 7499 /* Start with the first rule that has the character in the word. */ 7500 n = slang->sl_sal_first[c & 0xff]; 7501 z0 = 0; 7502 7503 if (n >= 0) 7504 { 7505 /* Check all rules for the same index byte. 7506 * If c is 0x300 need extra check for the end of the array, as 7507 * (c & 0xff) is NUL. */ 7508 for (; ((ws = smp[n].sm_lead_w)[0] & 0xff) == (c & 0xff) 7509 && ws[0] != NUL; ++n) 7510 { 7511 /* Quickly skip entries that don't match the word. Most 7512 * entries are less then three chars, optimize for that. */ 7513 if (c != ws[0]) 7514 continue; 7515 k = smp[n].sm_leadlen; 7516 if (k > 1) 7517 { 7518 if (word[i + 1] != ws[1]) 7519 continue; 7520 if (k > 2) 7521 { 7522 for (j = 2; j < k; ++j) 7523 if (word[i + j] != ws[j]) 7524 break; 7525 if (j < k) 7526 continue; 7527 } 7528 } 7529 7530 if ((pf = smp[n].sm_oneof_w) != NULL) 7531 { 7532 /* Check for match with one of the chars in "sm_oneof". */ 7533 while (*pf != NUL && *pf != word[i + k]) 7534 ++pf; 7535 if (*pf == NUL) 7536 continue; 7537 ++k; 7538 } 7539 s = smp[n].sm_rules; 7540 pri = 5; /* default priority */ 7541 7542 p0 = *s; 7543 k0 = k; 7544 while (*s == '-' && k > 1) 7545 { 7546 k--; 7547 s++; 7548 } 7549 if (*s == '<') 7550 s++; 7551 if (VIM_ISDIGIT(*s)) 7552 { 7553 /* determine priority */ 7554 pri = *s - '0'; 7555 s++; 7556 } 7557 if (*s == '^' && *(s + 1) == '^') 7558 s++; 7559 7560 if (*s == NUL 7561 || (*s == '^' 7562 && (i == 0 || !(word[i - 1] == ' ' 7563 || spell_iswordp_w(word + i - 1, curwin))) 7564 && (*(s + 1) != '$' 7565 || (!spell_iswordp_w(word + i + k0, curwin)))) 7566 || (*s == '$' && i > 0 7567 && spell_iswordp_w(word + i - 1, curwin) 7568 && (!spell_iswordp_w(word + i + k0, curwin)))) 7569 { 7570 /* search for followup rules, if: */ 7571 /* followup and k > 1 and NO '-' in searchstring */ 7572 c0 = word[i + k - 1]; 7573 n0 = slang->sl_sal_first[c0 & 0xff]; 7574 7575 if (slang->sl_followup && k > 1 && n0 >= 0 7576 && p0 != '-' && word[i + k] != NUL) 7577 { 7578 /* Test follow-up rule for "word[i + k]"; loop over 7579 * all entries with the same index byte. */ 7580 for ( ; ((ws = smp[n0].sm_lead_w)[0] & 0xff) 7581 == (c0 & 0xff); ++n0) 7582 { 7583 /* Quickly skip entries that don't match the word. 7584 */ 7585 if (c0 != ws[0]) 7586 continue; 7587 k0 = smp[n0].sm_leadlen; 7588 if (k0 > 1) 7589 { 7590 if (word[i + k] != ws[1]) 7591 continue; 7592 if (k0 > 2) 7593 { 7594 pf = word + i + k + 1; 7595 for (j = 2; j < k0; ++j) 7596 if (*pf++ != ws[j]) 7597 break; 7598 if (j < k0) 7599 continue; 7600 } 7601 } 7602 k0 += k - 1; 7603 7604 if ((pf = smp[n0].sm_oneof_w) != NULL) 7605 { 7606 /* Check for match with one of the chars in 7607 * "sm_oneof". */ 7608 while (*pf != NUL && *pf != word[i + k0]) 7609 ++pf; 7610 if (*pf == NUL) 7611 continue; 7612 ++k0; 7613 } 7614 7615 p0 = 5; 7616 s = smp[n0].sm_rules; 7617 while (*s == '-') 7618 { 7619 /* "k0" gets NOT reduced because 7620 * "if (k0 == k)" */ 7621 s++; 7622 } 7623 if (*s == '<') 7624 s++; 7625 if (VIM_ISDIGIT(*s)) 7626 { 7627 p0 = *s - '0'; 7628 s++; 7629 } 7630 7631 if (*s == NUL 7632 /* *s == '^' cuts */ 7633 || (*s == '$' 7634 && !spell_iswordp_w(word + i + k0, 7635 curwin))) 7636 { 7637 if (k0 == k) 7638 /* this is just a piece of the string */ 7639 continue; 7640 7641 if (p0 < pri) 7642 /* priority too low */ 7643 continue; 7644 /* rule fits; stop search */ 7645 break; 7646 } 7647 } 7648 7649 if (p0 >= pri && (smp[n0].sm_lead_w[0] & 0xff) 7650 == (c0 & 0xff)) 7651 continue; 7652 } 7653 7654 /* replace string */ 7655 ws = smp[n].sm_to_w; 7656 s = smp[n].sm_rules; 7657 p0 = (vim_strchr(s, '<') != NULL) ? 1 : 0; 7658 if (p0 == 1 && z == 0) 7659 { 7660 /* rule with '<' is used */ 7661 if (reslen > 0 && ws != NULL && *ws != NUL 7662 && (wres[reslen - 1] == c 7663 || wres[reslen - 1] == *ws)) 7664 reslen--; 7665 z0 = 1; 7666 z = 1; 7667 k0 = 0; 7668 if (ws != NULL) 7669 while (*ws != NUL && word[i + k0] != NUL) 7670 { 7671 word[i + k0] = *ws; 7672 k0++; 7673 ws++; 7674 } 7675 if (k > k0) 7676 mch_memmove(word + i + k0, word + i + k, 7677 sizeof(int) * (wordlen - (i + k) + 1)); 7678 7679 /* new "actual letter" */ 7680 c = word[i]; 7681 } 7682 else 7683 { 7684 /* no '<' rule used */ 7685 i += k - 1; 7686 z = 0; 7687 if (ws != NULL) 7688 while (*ws != NUL && ws[1] != NUL 7689 && reslen < MAXWLEN) 7690 { 7691 if (reslen == 0 || wres[reslen - 1] != *ws) 7692 wres[reslen++] = *ws; 7693 ws++; 7694 } 7695 /* new "actual letter" */ 7696 if (ws == NULL) 7697 c = NUL; 7698 else 7699 c = *ws; 7700 if (strstr((char *)s, "^^") != NULL) 7701 { 7702 if (c != NUL) 7703 wres[reslen++] = c; 7704 mch_memmove(word, word + i + 1, 7705 sizeof(int) * (wordlen - (i + 1) + 1)); 7706 i = 0; 7707 z0 = 1; 7708 } 7709 } 7710 break; 7711 } 7712 } 7713 } 7714 else if (VIM_ISWHITE(c)) 7715 { 7716 c = ' '; 7717 k = 1; 7718 } 7719 7720 if (z0 == 0) 7721 { 7722 if (k && !p0 && reslen < MAXWLEN && c != NUL 7723 && (!slang->sl_collapse || reslen == 0 7724 || wres[reslen - 1] != c)) 7725 /* condense only double letters */ 7726 wres[reslen++] = c; 7727 7728 i++; 7729 z = 0; 7730 k = 0; 7731 } 7732 } 7733 7734 /* Convert wide characters in "wres" to a multi-byte string in "res". */ 7735 l = 0; 7736 for (n = 0; n < reslen; ++n) 7737 { 7738 l += mb_char2bytes(wres[n], res + l); 7739 if (l + MB_MAXBYTES > MAXWLEN) 7740 break; 7741 } 7742 res[l] = NUL; 7743 } 7744 #endif 7745 7746 /* 7747 * Compute a score for two sound-a-like words. 7748 * This permits up to two inserts/deletes/swaps/etc. to keep things fast. 7749 * Instead of a generic loop we write out the code. That keeps it fast by 7750 * avoiding checks that will not be possible. 7751 */ 7752 static int 7753 soundalike_score( 7754 char_u *goodstart, /* sound-folded good word */ 7755 char_u *badstart) /* sound-folded bad word */ 7756 { 7757 char_u *goodsound = goodstart; 7758 char_u *badsound = badstart; 7759 int goodlen; 7760 int badlen; 7761 int n; 7762 char_u *pl, *ps; 7763 char_u *pl2, *ps2; 7764 int score = 0; 7765 7766 /* Adding/inserting "*" at the start (word starts with vowel) shouldn't be 7767 * counted so much, vowels halfway the word aren't counted at all. */ 7768 if ((*badsound == '*' || *goodsound == '*') && *badsound != *goodsound) 7769 { 7770 if ((badsound[0] == NUL && goodsound[1] == NUL) 7771 || (goodsound[0] == NUL && badsound[1] == NUL)) 7772 /* changing word with vowel to word without a sound */ 7773 return SCORE_DEL; 7774 if (badsound[0] == NUL || goodsound[0] == NUL) 7775 /* more than two changes */ 7776 return SCORE_MAXMAX; 7777 7778 if (badsound[1] == goodsound[1] 7779 || (badsound[1] != NUL 7780 && goodsound[1] != NUL 7781 && badsound[2] == goodsound[2])) 7782 { 7783 /* handle like a substitute */ 7784 } 7785 else 7786 { 7787 score = 2 * SCORE_DEL / 3; 7788 if (*badsound == '*') 7789 ++badsound; 7790 else 7791 ++goodsound; 7792 } 7793 } 7794 7795 goodlen = (int)STRLEN(goodsound); 7796 badlen = (int)STRLEN(badsound); 7797 7798 /* Return quickly if the lengths are too different to be fixed by two 7799 * changes. */ 7800 n = goodlen - badlen; 7801 if (n < -2 || n > 2) 7802 return SCORE_MAXMAX; 7803 7804 if (n > 0) 7805 { 7806 pl = goodsound; /* goodsound is longest */ 7807 ps = badsound; 7808 } 7809 else 7810 { 7811 pl = badsound; /* badsound is longest */ 7812 ps = goodsound; 7813 } 7814 7815 /* Skip over the identical part. */ 7816 while (*pl == *ps && *pl != NUL) 7817 { 7818 ++pl; 7819 ++ps; 7820 } 7821 7822 switch (n) 7823 { 7824 case -2: 7825 case 2: 7826 /* 7827 * Must delete two characters from "pl". 7828 */ 7829 ++pl; /* first delete */ 7830 while (*pl == *ps) 7831 { 7832 ++pl; 7833 ++ps; 7834 } 7835 /* strings must be equal after second delete */ 7836 if (STRCMP(pl + 1, ps) == 0) 7837 return score + SCORE_DEL * 2; 7838 7839 /* Failed to compare. */ 7840 break; 7841 7842 case -1: 7843 case 1: 7844 /* 7845 * Minimal one delete from "pl" required. 7846 */ 7847 7848 /* 1: delete */ 7849 pl2 = pl + 1; 7850 ps2 = ps; 7851 while (*pl2 == *ps2) 7852 { 7853 if (*pl2 == NUL) /* reached the end */ 7854 return score + SCORE_DEL; 7855 ++pl2; 7856 ++ps2; 7857 } 7858 7859 /* 2: delete then swap, then rest must be equal */ 7860 if (pl2[0] == ps2[1] && pl2[1] == ps2[0] 7861 && STRCMP(pl2 + 2, ps2 + 2) == 0) 7862 return score + SCORE_DEL + SCORE_SWAP; 7863 7864 /* 3: delete then substitute, then the rest must be equal */ 7865 if (STRCMP(pl2 + 1, ps2 + 1) == 0) 7866 return score + SCORE_DEL + SCORE_SUBST; 7867 7868 /* 4: first swap then delete */ 7869 if (pl[0] == ps[1] && pl[1] == ps[0]) 7870 { 7871 pl2 = pl + 2; /* swap, skip two chars */ 7872 ps2 = ps + 2; 7873 while (*pl2 == *ps2) 7874 { 7875 ++pl2; 7876 ++ps2; 7877 } 7878 /* delete a char and then strings must be equal */ 7879 if (STRCMP(pl2 + 1, ps2) == 0) 7880 return score + SCORE_SWAP + SCORE_DEL; 7881 } 7882 7883 /* 5: first substitute then delete */ 7884 pl2 = pl + 1; /* substitute, skip one char */ 7885 ps2 = ps + 1; 7886 while (*pl2 == *ps2) 7887 { 7888 ++pl2; 7889 ++ps2; 7890 } 7891 /* delete a char and then strings must be equal */ 7892 if (STRCMP(pl2 + 1, ps2) == 0) 7893 return score + SCORE_SUBST + SCORE_DEL; 7894 7895 /* Failed to compare. */ 7896 break; 7897 7898 case 0: 7899 /* 7900 * Lengths are equal, thus changes must result in same length: An 7901 * insert is only possible in combination with a delete. 7902 * 1: check if for identical strings 7903 */ 7904 if (*pl == NUL) 7905 return score; 7906 7907 /* 2: swap */ 7908 if (pl[0] == ps[1] && pl[1] == ps[0]) 7909 { 7910 pl2 = pl + 2; /* swap, skip two chars */ 7911 ps2 = ps + 2; 7912 while (*pl2 == *ps2) 7913 { 7914 if (*pl2 == NUL) /* reached the end */ 7915 return score + SCORE_SWAP; 7916 ++pl2; 7917 ++ps2; 7918 } 7919 /* 3: swap and swap again */ 7920 if (pl2[0] == ps2[1] && pl2[1] == ps2[0] 7921 && STRCMP(pl2 + 2, ps2 + 2) == 0) 7922 return score + SCORE_SWAP + SCORE_SWAP; 7923 7924 /* 4: swap and substitute */ 7925 if (STRCMP(pl2 + 1, ps2 + 1) == 0) 7926 return score + SCORE_SWAP + SCORE_SUBST; 7927 } 7928 7929 /* 5: substitute */ 7930 pl2 = pl + 1; 7931 ps2 = ps + 1; 7932 while (*pl2 == *ps2) 7933 { 7934 if (*pl2 == NUL) /* reached the end */ 7935 return score + SCORE_SUBST; 7936 ++pl2; 7937 ++ps2; 7938 } 7939 7940 /* 6: substitute and swap */ 7941 if (pl2[0] == ps2[1] && pl2[1] == ps2[0] 7942 && STRCMP(pl2 + 2, ps2 + 2) == 0) 7943 return score + SCORE_SUBST + SCORE_SWAP; 7944 7945 /* 7: substitute and substitute */ 7946 if (STRCMP(pl2 + 1, ps2 + 1) == 0) 7947 return score + SCORE_SUBST + SCORE_SUBST; 7948 7949 /* 8: insert then delete */ 7950 pl2 = pl; 7951 ps2 = ps + 1; 7952 while (*pl2 == *ps2) 7953 { 7954 ++pl2; 7955 ++ps2; 7956 } 7957 if (STRCMP(pl2 + 1, ps2) == 0) 7958 return score + SCORE_INS + SCORE_DEL; 7959 7960 /* 9: delete then insert */ 7961 pl2 = pl + 1; 7962 ps2 = ps; 7963 while (*pl2 == *ps2) 7964 { 7965 ++pl2; 7966 ++ps2; 7967 } 7968 if (STRCMP(pl2, ps2 + 1) == 0) 7969 return score + SCORE_INS + SCORE_DEL; 7970 7971 /* Failed to compare. */ 7972 break; 7973 } 7974 7975 return SCORE_MAXMAX; 7976 } 7977 7978 /* 7979 * Compute the "edit distance" to turn "badword" into "goodword". The less 7980 * deletes/inserts/substitutes/swaps are required the lower the score. 7981 * 7982 * The algorithm is described by Du and Chang, 1992. 7983 * The implementation of the algorithm comes from Aspell editdist.cpp, 7984 * edit_distance(). It has been converted from C++ to C and modified to 7985 * support multi-byte characters. 7986 */ 7987 static int 7988 spell_edit_score( 7989 slang_T *slang, 7990 char_u *badword, 7991 char_u *goodword) 7992 { 7993 int *cnt; 7994 int badlen, goodlen; /* lengths including NUL */ 7995 int j, i; 7996 int t; 7997 int bc, gc; 7998 int pbc, pgc; 7999 #ifdef FEAT_MBYTE 8000 char_u *p; 8001 int wbadword[MAXWLEN]; 8002 int wgoodword[MAXWLEN]; 8003 8004 if (has_mbyte) 8005 { 8006 /* Get the characters from the multi-byte strings and put them in an 8007 * int array for easy access. */ 8008 for (p = badword, badlen = 0; *p != NUL; ) 8009 wbadword[badlen++] = mb_cptr2char_adv(&p); 8010 wbadword[badlen++] = 0; 8011 for (p = goodword, goodlen = 0; *p != NUL; ) 8012 wgoodword[goodlen++] = mb_cptr2char_adv(&p); 8013 wgoodword[goodlen++] = 0; 8014 } 8015 else 8016 #endif 8017 { 8018 badlen = (int)STRLEN(badword) + 1; 8019 goodlen = (int)STRLEN(goodword) + 1; 8020 } 8021 8022 /* We use "cnt" as an array: CNT(badword_idx, goodword_idx). */ 8023 #define CNT(a, b) cnt[(a) + (b) * (badlen + 1)] 8024 cnt = (int *)lalloc((long_u)(sizeof(int) * (badlen + 1) * (goodlen + 1)), 8025 TRUE); 8026 if (cnt == NULL) 8027 return 0; /* out of memory */ 8028 8029 CNT(0, 0) = 0; 8030 for (j = 1; j <= goodlen; ++j) 8031 CNT(0, j) = CNT(0, j - 1) + SCORE_INS; 8032 8033 for (i = 1; i <= badlen; ++i) 8034 { 8035 CNT(i, 0) = CNT(i - 1, 0) + SCORE_DEL; 8036 for (j = 1; j <= goodlen; ++j) 8037 { 8038 #ifdef FEAT_MBYTE 8039 if (has_mbyte) 8040 { 8041 bc = wbadword[i - 1]; 8042 gc = wgoodword[j - 1]; 8043 } 8044 else 8045 #endif 8046 { 8047 bc = badword[i - 1]; 8048 gc = goodword[j - 1]; 8049 } 8050 if (bc == gc) 8051 CNT(i, j) = CNT(i - 1, j - 1); 8052 else 8053 { 8054 /* Use a better score when there is only a case difference. */ 8055 if (SPELL_TOFOLD(bc) == SPELL_TOFOLD(gc)) 8056 CNT(i, j) = SCORE_ICASE + CNT(i - 1, j - 1); 8057 else 8058 { 8059 /* For a similar character use SCORE_SIMILAR. */ 8060 if (slang != NULL 8061 && slang->sl_has_map 8062 && similar_chars(slang, gc, bc)) 8063 CNT(i, j) = SCORE_SIMILAR + CNT(i - 1, j - 1); 8064 else 8065 CNT(i, j) = SCORE_SUBST + CNT(i - 1, j - 1); 8066 } 8067 8068 if (i > 1 && j > 1) 8069 { 8070 #ifdef FEAT_MBYTE 8071 if (has_mbyte) 8072 { 8073 pbc = wbadword[i - 2]; 8074 pgc = wgoodword[j - 2]; 8075 } 8076 else 8077 #endif 8078 { 8079 pbc = badword[i - 2]; 8080 pgc = goodword[j - 2]; 8081 } 8082 if (bc == pgc && pbc == gc) 8083 { 8084 t = SCORE_SWAP + CNT(i - 2, j - 2); 8085 if (t < CNT(i, j)) 8086 CNT(i, j) = t; 8087 } 8088 } 8089 t = SCORE_DEL + CNT(i - 1, j); 8090 if (t < CNT(i, j)) 8091 CNT(i, j) = t; 8092 t = SCORE_INS + CNT(i, j - 1); 8093 if (t < CNT(i, j)) 8094 CNT(i, j) = t; 8095 } 8096 } 8097 } 8098 8099 i = CNT(badlen - 1, goodlen - 1); 8100 vim_free(cnt); 8101 return i; 8102 } 8103 8104 typedef struct 8105 { 8106 int badi; 8107 int goodi; 8108 int score; 8109 } limitscore_T; 8110 8111 /* 8112 * Like spell_edit_score(), but with a limit on the score to make it faster. 8113 * May return SCORE_MAXMAX when the score is higher than "limit". 8114 * 8115 * This uses a stack for the edits still to be tried. 8116 * The idea comes from Aspell leditdist.cpp. Rewritten in C and added support 8117 * for multi-byte characters. 8118 */ 8119 static int 8120 spell_edit_score_limit( 8121 slang_T *slang, 8122 char_u *badword, 8123 char_u *goodword, 8124 int limit) 8125 { 8126 limitscore_T stack[10]; /* allow for over 3 * 2 edits */ 8127 int stackidx; 8128 int bi, gi; 8129 int bi2, gi2; 8130 int bc, gc; 8131 int score; 8132 int score_off; 8133 int minscore; 8134 int round; 8135 8136 #ifdef FEAT_MBYTE 8137 /* Multi-byte characters require a bit more work, use a different function 8138 * to avoid testing "has_mbyte" quite often. */ 8139 if (has_mbyte) 8140 return spell_edit_score_limit_w(slang, badword, goodword, limit); 8141 #endif 8142 8143 /* 8144 * The idea is to go from start to end over the words. So long as 8145 * characters are equal just continue, this always gives the lowest score. 8146 * When there is a difference try several alternatives. Each alternative 8147 * increases "score" for the edit distance. Some of the alternatives are 8148 * pushed unto a stack and tried later, some are tried right away. At the 8149 * end of the word the score for one alternative is known. The lowest 8150 * possible score is stored in "minscore". 8151 */ 8152 stackidx = 0; 8153 bi = 0; 8154 gi = 0; 8155 score = 0; 8156 minscore = limit + 1; 8157 8158 for (;;) 8159 { 8160 /* Skip over an equal part, score remains the same. */ 8161 for (;;) 8162 { 8163 bc = badword[bi]; 8164 gc = goodword[gi]; 8165 if (bc != gc) /* stop at a char that's different */ 8166 break; 8167 if (bc == NUL) /* both words end */ 8168 { 8169 if (score < minscore) 8170 minscore = score; 8171 goto pop; /* do next alternative */ 8172 } 8173 ++bi; 8174 ++gi; 8175 } 8176 8177 if (gc == NUL) /* goodword ends, delete badword chars */ 8178 { 8179 do 8180 { 8181 if ((score += SCORE_DEL) >= minscore) 8182 goto pop; /* do next alternative */ 8183 } while (badword[++bi] != NUL); 8184 minscore = score; 8185 } 8186 else if (bc == NUL) /* badword ends, insert badword chars */ 8187 { 8188 do 8189 { 8190 if ((score += SCORE_INS) >= minscore) 8191 goto pop; /* do next alternative */ 8192 } while (goodword[++gi] != NUL); 8193 minscore = score; 8194 } 8195 else /* both words continue */ 8196 { 8197 /* If not close to the limit, perform a change. Only try changes 8198 * that may lead to a lower score than "minscore". 8199 * round 0: try deleting a char from badword 8200 * round 1: try inserting a char in badword */ 8201 for (round = 0; round <= 1; ++round) 8202 { 8203 score_off = score + (round == 0 ? SCORE_DEL : SCORE_INS); 8204 if (score_off < minscore) 8205 { 8206 if (score_off + SCORE_EDIT_MIN >= minscore) 8207 { 8208 /* Near the limit, rest of the words must match. We 8209 * can check that right now, no need to push an item 8210 * onto the stack. */ 8211 bi2 = bi + 1 - round; 8212 gi2 = gi + round; 8213 while (goodword[gi2] == badword[bi2]) 8214 { 8215 if (goodword[gi2] == NUL) 8216 { 8217 minscore = score_off; 8218 break; 8219 } 8220 ++bi2; 8221 ++gi2; 8222 } 8223 } 8224 else 8225 { 8226 /* try deleting/inserting a character later */ 8227 stack[stackidx].badi = bi + 1 - round; 8228 stack[stackidx].goodi = gi + round; 8229 stack[stackidx].score = score_off; 8230 ++stackidx; 8231 } 8232 } 8233 } 8234 8235 if (score + SCORE_SWAP < minscore) 8236 { 8237 /* If swapping two characters makes a match then the 8238 * substitution is more expensive, thus there is no need to 8239 * try both. */ 8240 if (gc == badword[bi + 1] && bc == goodword[gi + 1]) 8241 { 8242 /* Swap two characters, that is: skip them. */ 8243 gi += 2; 8244 bi += 2; 8245 score += SCORE_SWAP; 8246 continue; 8247 } 8248 } 8249 8250 /* Substitute one character for another which is the same 8251 * thing as deleting a character from both goodword and badword. 8252 * Use a better score when there is only a case difference. */ 8253 if (SPELL_TOFOLD(bc) == SPELL_TOFOLD(gc)) 8254 score += SCORE_ICASE; 8255 else 8256 { 8257 /* For a similar character use SCORE_SIMILAR. */ 8258 if (slang != NULL 8259 && slang->sl_has_map 8260 && similar_chars(slang, gc, bc)) 8261 score += SCORE_SIMILAR; 8262 else 8263 score += SCORE_SUBST; 8264 } 8265 8266 if (score < minscore) 8267 { 8268 /* Do the substitution. */ 8269 ++gi; 8270 ++bi; 8271 continue; 8272 } 8273 } 8274 pop: 8275 /* 8276 * Get here to try the next alternative, pop it from the stack. 8277 */ 8278 if (stackidx == 0) /* stack is empty, finished */ 8279 break; 8280 8281 /* pop an item from the stack */ 8282 --stackidx; 8283 gi = stack[stackidx].goodi; 8284 bi = stack[stackidx].badi; 8285 score = stack[stackidx].score; 8286 } 8287 8288 /* When the score goes over "limit" it may actually be much higher. 8289 * Return a very large number to avoid going below the limit when giving a 8290 * bonus. */ 8291 if (minscore > limit) 8292 return SCORE_MAXMAX; 8293 return minscore; 8294 } 8295 8296 #ifdef FEAT_MBYTE 8297 /* 8298 * Multi-byte version of spell_edit_score_limit(). 8299 * Keep it in sync with the above! 8300 */ 8301 static int 8302 spell_edit_score_limit_w( 8303 slang_T *slang, 8304 char_u *badword, 8305 char_u *goodword, 8306 int limit) 8307 { 8308 limitscore_T stack[10]; /* allow for over 3 * 2 edits */ 8309 int stackidx; 8310 int bi, gi; 8311 int bi2, gi2; 8312 int bc, gc; 8313 int score; 8314 int score_off; 8315 int minscore; 8316 int round; 8317 char_u *p; 8318 int wbadword[MAXWLEN]; 8319 int wgoodword[MAXWLEN]; 8320 8321 /* Get the characters from the multi-byte strings and put them in an 8322 * int array for easy access. */ 8323 bi = 0; 8324 for (p = badword; *p != NUL; ) 8325 wbadword[bi++] = mb_cptr2char_adv(&p); 8326 wbadword[bi++] = 0; 8327 gi = 0; 8328 for (p = goodword; *p != NUL; ) 8329 wgoodword[gi++] = mb_cptr2char_adv(&p); 8330 wgoodword[gi++] = 0; 8331 8332 /* 8333 * The idea is to go from start to end over the words. So long as 8334 * characters are equal just continue, this always gives the lowest score. 8335 * When there is a difference try several alternatives. Each alternative 8336 * increases "score" for the edit distance. Some of the alternatives are 8337 * pushed unto a stack and tried later, some are tried right away. At the 8338 * end of the word the score for one alternative is known. The lowest 8339 * possible score is stored in "minscore". 8340 */ 8341 stackidx = 0; 8342 bi = 0; 8343 gi = 0; 8344 score = 0; 8345 minscore = limit + 1; 8346 8347 for (;;) 8348 { 8349 /* Skip over an equal part, score remains the same. */ 8350 for (;;) 8351 { 8352 bc = wbadword[bi]; 8353 gc = wgoodword[gi]; 8354 8355 if (bc != gc) /* stop at a char that's different */ 8356 break; 8357 if (bc == NUL) /* both words end */ 8358 { 8359 if (score < minscore) 8360 minscore = score; 8361 goto pop; /* do next alternative */ 8362 } 8363 ++bi; 8364 ++gi; 8365 } 8366 8367 if (gc == NUL) /* goodword ends, delete badword chars */ 8368 { 8369 do 8370 { 8371 if ((score += SCORE_DEL) >= minscore) 8372 goto pop; /* do next alternative */ 8373 } while (wbadword[++bi] != NUL); 8374 minscore = score; 8375 } 8376 else if (bc == NUL) /* badword ends, insert badword chars */ 8377 { 8378 do 8379 { 8380 if ((score += SCORE_INS) >= minscore) 8381 goto pop; /* do next alternative */ 8382 } while (wgoodword[++gi] != NUL); 8383 minscore = score; 8384 } 8385 else /* both words continue */ 8386 { 8387 /* If not close to the limit, perform a change. Only try changes 8388 * that may lead to a lower score than "minscore". 8389 * round 0: try deleting a char from badword 8390 * round 1: try inserting a char in badword */ 8391 for (round = 0; round <= 1; ++round) 8392 { 8393 score_off = score + (round == 0 ? SCORE_DEL : SCORE_INS); 8394 if (score_off < minscore) 8395 { 8396 if (score_off + SCORE_EDIT_MIN >= minscore) 8397 { 8398 /* Near the limit, rest of the words must match. We 8399 * can check that right now, no need to push an item 8400 * onto the stack. */ 8401 bi2 = bi + 1 - round; 8402 gi2 = gi + round; 8403 while (wgoodword[gi2] == wbadword[bi2]) 8404 { 8405 if (wgoodword[gi2] == NUL) 8406 { 8407 minscore = score_off; 8408 break; 8409 } 8410 ++bi2; 8411 ++gi2; 8412 } 8413 } 8414 else 8415 { 8416 /* try deleting a character from badword later */ 8417 stack[stackidx].badi = bi + 1 - round; 8418 stack[stackidx].goodi = gi + round; 8419 stack[stackidx].score = score_off; 8420 ++stackidx; 8421 } 8422 } 8423 } 8424 8425 if (score + SCORE_SWAP < minscore) 8426 { 8427 /* If swapping two characters makes a match then the 8428 * substitution is more expensive, thus there is no need to 8429 * try both. */ 8430 if (gc == wbadword[bi + 1] && bc == wgoodword[gi + 1]) 8431 { 8432 /* Swap two characters, that is: skip them. */ 8433 gi += 2; 8434 bi += 2; 8435 score += SCORE_SWAP; 8436 continue; 8437 } 8438 } 8439 8440 /* Substitute one character for another which is the same 8441 * thing as deleting a character from both goodword and badword. 8442 * Use a better score when there is only a case difference. */ 8443 if (SPELL_TOFOLD(bc) == SPELL_TOFOLD(gc)) 8444 score += SCORE_ICASE; 8445 else 8446 { 8447 /* For a similar character use SCORE_SIMILAR. */ 8448 if (slang != NULL 8449 && slang->sl_has_map 8450 && similar_chars(slang, gc, bc)) 8451 score += SCORE_SIMILAR; 8452 else 8453 score += SCORE_SUBST; 8454 } 8455 8456 if (score < minscore) 8457 { 8458 /* Do the substitution. */ 8459 ++gi; 8460 ++bi; 8461 continue; 8462 } 8463 } 8464 pop: 8465 /* 8466 * Get here to try the next alternative, pop it from the stack. 8467 */ 8468 if (stackidx == 0) /* stack is empty, finished */ 8469 break; 8470 8471 /* pop an item from the stack */ 8472 --stackidx; 8473 gi = stack[stackidx].goodi; 8474 bi = stack[stackidx].badi; 8475 score = stack[stackidx].score; 8476 } 8477 8478 /* When the score goes over "limit" it may actually be much higher. 8479 * Return a very large number to avoid going below the limit when giving a 8480 * bonus. */ 8481 if (minscore > limit) 8482 return SCORE_MAXMAX; 8483 return minscore; 8484 } 8485 #endif 8486 8487 /* 8488 * ":spellinfo" 8489 */ 8490 void 8491 ex_spellinfo(exarg_T *eap UNUSED) 8492 { 8493 int lpi; 8494 langp_T *lp; 8495 char_u *p; 8496 8497 if (no_spell_checking(curwin)) 8498 return; 8499 8500 msg_start(); 8501 for (lpi = 0; lpi < curwin->w_s->b_langp.ga_len && !got_int; ++lpi) 8502 { 8503 lp = LANGP_ENTRY(curwin->w_s->b_langp, lpi); 8504 msg_puts((char_u *)"file: "); 8505 msg_puts(lp->lp_slang->sl_fname); 8506 msg_putchar('\n'); 8507 p = lp->lp_slang->sl_info; 8508 if (p != NULL) 8509 { 8510 msg_puts(p); 8511 msg_putchar('\n'); 8512 } 8513 } 8514 msg_end(); 8515 } 8516 8517 #define DUMPFLAG_KEEPCASE 1 /* round 2: keep-case tree */ 8518 #define DUMPFLAG_COUNT 2 /* include word count */ 8519 #define DUMPFLAG_ICASE 4 /* ignore case when finding matches */ 8520 #define DUMPFLAG_ONECAP 8 /* pattern starts with capital */ 8521 #define DUMPFLAG_ALLCAP 16 /* pattern is all capitals */ 8522 8523 /* 8524 * ":spelldump" 8525 */ 8526 void 8527 ex_spelldump(exarg_T *eap) 8528 { 8529 char_u *spl; 8530 long dummy; 8531 8532 if (no_spell_checking(curwin)) 8533 return; 8534 get_option_value((char_u*)"spl", &dummy, &spl, OPT_LOCAL); 8535 8536 /* Create a new empty buffer in a new window. */ 8537 do_cmdline_cmd((char_u *)"new"); 8538 8539 /* enable spelling locally in the new window */ 8540 set_option_value((char_u*)"spell", TRUE, (char_u*)"", OPT_LOCAL); 8541 set_option_value((char_u*)"spl", dummy, spl, OPT_LOCAL); 8542 vim_free(spl); 8543 8544 if (!BUFEMPTY()) 8545 return; 8546 8547 spell_dump_compl(NULL, 0, NULL, eap->forceit ? DUMPFLAG_COUNT : 0); 8548 8549 /* Delete the empty line that we started with. */ 8550 if (curbuf->b_ml.ml_line_count > 1) 8551 ml_delete(curbuf->b_ml.ml_line_count, FALSE); 8552 8553 redraw_later(NOT_VALID); 8554 } 8555 8556 /* 8557 * Go through all possible words and: 8558 * 1. When "pat" is NULL: dump a list of all words in the current buffer. 8559 * "ic" and "dir" are not used. 8560 * 2. When "pat" is not NULL: add matching words to insert mode completion. 8561 */ 8562 void 8563 spell_dump_compl( 8564 char_u *pat, /* leading part of the word */ 8565 int ic, /* ignore case */ 8566 int *dir, /* direction for adding matches */ 8567 int dumpflags_arg) /* DUMPFLAG_* */ 8568 { 8569 langp_T *lp; 8570 slang_T *slang; 8571 idx_T arridx[MAXWLEN]; 8572 int curi[MAXWLEN]; 8573 char_u word[MAXWLEN]; 8574 int c; 8575 char_u *byts; 8576 idx_T *idxs; 8577 linenr_T lnum = 0; 8578 int round; 8579 int depth; 8580 int n; 8581 int flags; 8582 char_u *region_names = NULL; /* region names being used */ 8583 int do_region = TRUE; /* dump region names and numbers */ 8584 char_u *p; 8585 int lpi; 8586 int dumpflags = dumpflags_arg; 8587 int patlen; 8588 8589 /* When ignoring case or when the pattern starts with capital pass this on 8590 * to dump_word(). */ 8591 if (pat != NULL) 8592 { 8593 if (ic) 8594 dumpflags |= DUMPFLAG_ICASE; 8595 else 8596 { 8597 n = captype(pat, NULL); 8598 if (n == WF_ONECAP) 8599 dumpflags |= DUMPFLAG_ONECAP; 8600 else if (n == WF_ALLCAP 8601 #ifdef FEAT_MBYTE 8602 && (int)STRLEN(pat) > mb_ptr2len(pat) 8603 #else 8604 && (int)STRLEN(pat) > 1 8605 #endif 8606 ) 8607 dumpflags |= DUMPFLAG_ALLCAP; 8608 } 8609 } 8610 8611 /* Find out if we can support regions: All languages must support the same 8612 * regions or none at all. */ 8613 for (lpi = 0; lpi < curwin->w_s->b_langp.ga_len; ++lpi) 8614 { 8615 lp = LANGP_ENTRY(curwin->w_s->b_langp, lpi); 8616 p = lp->lp_slang->sl_regions; 8617 if (p[0] != 0) 8618 { 8619 if (region_names == NULL) /* first language with regions */ 8620 region_names = p; 8621 else if (STRCMP(region_names, p) != 0) 8622 { 8623 do_region = FALSE; /* region names are different */ 8624 break; 8625 } 8626 } 8627 } 8628 8629 if (do_region && region_names != NULL) 8630 { 8631 if (pat == NULL) 8632 { 8633 vim_snprintf((char *)IObuff, IOSIZE, "/regions=%s", region_names); 8634 ml_append(lnum++, IObuff, (colnr_T)0, FALSE); 8635 } 8636 } 8637 else 8638 do_region = FALSE; 8639 8640 /* 8641 * Loop over all files loaded for the entries in 'spelllang'. 8642 */ 8643 for (lpi = 0; lpi < curwin->w_s->b_langp.ga_len; ++lpi) 8644 { 8645 lp = LANGP_ENTRY(curwin->w_s->b_langp, lpi); 8646 slang = lp->lp_slang; 8647 if (slang->sl_fbyts == NULL) /* reloading failed */ 8648 continue; 8649 8650 if (pat == NULL) 8651 { 8652 vim_snprintf((char *)IObuff, IOSIZE, "# file: %s", slang->sl_fname); 8653 ml_append(lnum++, IObuff, (colnr_T)0, FALSE); 8654 } 8655 8656 /* When matching with a pattern and there are no prefixes only use 8657 * parts of the tree that match "pat". */ 8658 if (pat != NULL && slang->sl_pbyts == NULL) 8659 patlen = (int)STRLEN(pat); 8660 else 8661 patlen = -1; 8662 8663 /* round 1: case-folded tree 8664 * round 2: keep-case tree */ 8665 for (round = 1; round <= 2; ++round) 8666 { 8667 if (round == 1) 8668 { 8669 dumpflags &= ~DUMPFLAG_KEEPCASE; 8670 byts = slang->sl_fbyts; 8671 idxs = slang->sl_fidxs; 8672 } 8673 else 8674 { 8675 dumpflags |= DUMPFLAG_KEEPCASE; 8676 byts = slang->sl_kbyts; 8677 idxs = slang->sl_kidxs; 8678 } 8679 if (byts == NULL) 8680 continue; /* array is empty */ 8681 8682 depth = 0; 8683 arridx[0] = 0; 8684 curi[0] = 1; 8685 while (depth >= 0 && !got_int 8686 && (pat == NULL || !compl_interrupted)) 8687 { 8688 if (curi[depth] > byts[arridx[depth]]) 8689 { 8690 /* Done all bytes at this node, go up one level. */ 8691 --depth; 8692 line_breakcheck(); 8693 ins_compl_check_keys(50, FALSE); 8694 } 8695 else 8696 { 8697 /* Do one more byte at this node. */ 8698 n = arridx[depth] + curi[depth]; 8699 ++curi[depth]; 8700 c = byts[n]; 8701 if (c == 0) 8702 { 8703 /* End of word, deal with the word. 8704 * Don't use keep-case words in the fold-case tree, 8705 * they will appear in the keep-case tree. 8706 * Only use the word when the region matches. */ 8707 flags = (int)idxs[n]; 8708 if ((round == 2 || (flags & WF_KEEPCAP) == 0) 8709 && (flags & WF_NEEDCOMP) == 0 8710 && (do_region 8711 || (flags & WF_REGION) == 0 8712 || (((unsigned)flags >> 16) 8713 & lp->lp_region) != 0)) 8714 { 8715 word[depth] = NUL; 8716 if (!do_region) 8717 flags &= ~WF_REGION; 8718 8719 /* Dump the basic word if there is no prefix or 8720 * when it's the first one. */ 8721 c = (unsigned)flags >> 24; 8722 if (c == 0 || curi[depth] == 2) 8723 { 8724 dump_word(slang, word, pat, dir, 8725 dumpflags, flags, lnum); 8726 if (pat == NULL) 8727 ++lnum; 8728 } 8729 8730 /* Apply the prefix, if there is one. */ 8731 if (c != 0) 8732 lnum = dump_prefixes(slang, word, pat, dir, 8733 dumpflags, flags, lnum); 8734 } 8735 } 8736 else 8737 { 8738 /* Normal char, go one level deeper. */ 8739 word[depth++] = c; 8740 arridx[depth] = idxs[n]; 8741 curi[depth] = 1; 8742 8743 /* Check if this characters matches with the pattern. 8744 * If not skip the whole tree below it. 8745 * Always ignore case here, dump_word() will check 8746 * proper case later. This isn't exactly right when 8747 * length changes for multi-byte characters with 8748 * ignore case... */ 8749 if (depth <= patlen 8750 && MB_STRNICMP(word, pat, depth) != 0) 8751 --depth; 8752 } 8753 } 8754 } 8755 } 8756 } 8757 } 8758 8759 /* 8760 * Dump one word: apply case modifications and append a line to the buffer. 8761 * When "lnum" is zero add insert mode completion. 8762 */ 8763 static void 8764 dump_word( 8765 slang_T *slang, 8766 char_u *word, 8767 char_u *pat, 8768 int *dir, 8769 int dumpflags, 8770 int wordflags, 8771 linenr_T lnum) 8772 { 8773 int keepcap = FALSE; 8774 char_u *p; 8775 char_u *tw; 8776 char_u cword[MAXWLEN]; 8777 char_u badword[MAXWLEN + 10]; 8778 int i; 8779 int flags = wordflags; 8780 8781 if (dumpflags & DUMPFLAG_ONECAP) 8782 flags |= WF_ONECAP; 8783 if (dumpflags & DUMPFLAG_ALLCAP) 8784 flags |= WF_ALLCAP; 8785 8786 if ((dumpflags & DUMPFLAG_KEEPCASE) == 0 && (flags & WF_CAPMASK) != 0) 8787 { 8788 /* Need to fix case according to "flags". */ 8789 make_case_word(word, cword, flags); 8790 p = cword; 8791 } 8792 else 8793 { 8794 p = word; 8795 if ((dumpflags & DUMPFLAG_KEEPCASE) 8796 && ((captype(word, NULL) & WF_KEEPCAP) == 0 8797 || (flags & WF_FIXCAP) != 0)) 8798 keepcap = TRUE; 8799 } 8800 tw = p; 8801 8802 if (pat == NULL) 8803 { 8804 /* Add flags and regions after a slash. */ 8805 if ((flags & (WF_BANNED | WF_RARE | WF_REGION)) || keepcap) 8806 { 8807 STRCPY(badword, p); 8808 STRCAT(badword, "/"); 8809 if (keepcap) 8810 STRCAT(badword, "="); 8811 if (flags & WF_BANNED) 8812 STRCAT(badword, "!"); 8813 else if (flags & WF_RARE) 8814 STRCAT(badword, "?"); 8815 if (flags & WF_REGION) 8816 for (i = 0; i < 7; ++i) 8817 if (flags & (0x10000 << i)) 8818 sprintf((char *)badword + STRLEN(badword), "%d", i + 1); 8819 p = badword; 8820 } 8821 8822 if (dumpflags & DUMPFLAG_COUNT) 8823 { 8824 hashitem_T *hi; 8825 8826 /* Include the word count for ":spelldump!". */ 8827 hi = hash_find(&slang->sl_wordcount, tw); 8828 if (!HASHITEM_EMPTY(hi)) 8829 { 8830 vim_snprintf((char *)IObuff, IOSIZE, "%s\t%d", 8831 tw, HI2WC(hi)->wc_count); 8832 p = IObuff; 8833 } 8834 } 8835 8836 ml_append(lnum, p, (colnr_T)0, FALSE); 8837 } 8838 else if (((dumpflags & DUMPFLAG_ICASE) 8839 ? MB_STRNICMP(p, pat, STRLEN(pat)) == 0 8840 : STRNCMP(p, pat, STRLEN(pat)) == 0) 8841 && ins_compl_add_infercase(p, (int)STRLEN(p), 8842 p_ic, NULL, *dir, 0) == OK) 8843 /* if dir was BACKWARD then honor it just once */ 8844 *dir = FORWARD; 8845 } 8846 8847 /* 8848 * For ":spelldump": Find matching prefixes for "word". Prepend each to 8849 * "word" and append a line to the buffer. 8850 * When "lnum" is zero add insert mode completion. 8851 * Return the updated line number. 8852 */ 8853 static linenr_T 8854 dump_prefixes( 8855 slang_T *slang, 8856 char_u *word, /* case-folded word */ 8857 char_u *pat, 8858 int *dir, 8859 int dumpflags, 8860 int flags, /* flags with prefix ID */ 8861 linenr_T startlnum) 8862 { 8863 idx_T arridx[MAXWLEN]; 8864 int curi[MAXWLEN]; 8865 char_u prefix[MAXWLEN]; 8866 char_u word_up[MAXWLEN]; 8867 int has_word_up = FALSE; 8868 int c; 8869 char_u *byts; 8870 idx_T *idxs; 8871 linenr_T lnum = startlnum; 8872 int depth; 8873 int n; 8874 int len; 8875 int i; 8876 8877 /* If the word starts with a lower-case letter make the word with an 8878 * upper-case letter in word_up[]. */ 8879 c = PTR2CHAR(word); 8880 if (SPELL_TOUPPER(c) != c) 8881 { 8882 onecap_copy(word, word_up, TRUE); 8883 has_word_up = TRUE; 8884 } 8885 8886 byts = slang->sl_pbyts; 8887 idxs = slang->sl_pidxs; 8888 if (byts != NULL) /* array not is empty */ 8889 { 8890 /* 8891 * Loop over all prefixes, building them byte-by-byte in prefix[]. 8892 * When at the end of a prefix check that it supports "flags". 8893 */ 8894 depth = 0; 8895 arridx[0] = 0; 8896 curi[0] = 1; 8897 while (depth >= 0 && !got_int) 8898 { 8899 n = arridx[depth]; 8900 len = byts[n]; 8901 if (curi[depth] > len) 8902 { 8903 /* Done all bytes at this node, go up one level. */ 8904 --depth; 8905 line_breakcheck(); 8906 } 8907 else 8908 { 8909 /* Do one more byte at this node. */ 8910 n += curi[depth]; 8911 ++curi[depth]; 8912 c = byts[n]; 8913 if (c == 0) 8914 { 8915 /* End of prefix, find out how many IDs there are. */ 8916 for (i = 1; i < len; ++i) 8917 if (byts[n + i] != 0) 8918 break; 8919 curi[depth] += i - 1; 8920 8921 c = valid_word_prefix(i, n, flags, word, slang, FALSE); 8922 if (c != 0) 8923 { 8924 vim_strncpy(prefix + depth, word, MAXWLEN - depth - 1); 8925 dump_word(slang, prefix, pat, dir, dumpflags, 8926 (c & WF_RAREPFX) ? (flags | WF_RARE) 8927 : flags, lnum); 8928 if (lnum != 0) 8929 ++lnum; 8930 } 8931 8932 /* Check for prefix that matches the word when the 8933 * first letter is upper-case, but only if the prefix has 8934 * a condition. */ 8935 if (has_word_up) 8936 { 8937 c = valid_word_prefix(i, n, flags, word_up, slang, 8938 TRUE); 8939 if (c != 0) 8940 { 8941 vim_strncpy(prefix + depth, word_up, 8942 MAXWLEN - depth - 1); 8943 dump_word(slang, prefix, pat, dir, dumpflags, 8944 (c & WF_RAREPFX) ? (flags | WF_RARE) 8945 : flags, lnum); 8946 if (lnum != 0) 8947 ++lnum; 8948 } 8949 } 8950 } 8951 else 8952 { 8953 /* Normal char, go one level deeper. */ 8954 prefix[depth++] = c; 8955 arridx[depth] = idxs[n]; 8956 curi[depth] = 1; 8957 } 8958 } 8959 } 8960 } 8961 8962 return lnum; 8963 } 8964 8965 /* 8966 * Move "p" to the end of word "start". 8967 * Uses the spell-checking word characters. 8968 */ 8969 char_u * 8970 spell_to_word_end(char_u *start, win_T *win) 8971 { 8972 char_u *p = start; 8973 8974 while (*p != NUL && spell_iswordp(p, win)) 8975 MB_PTR_ADV(p); 8976 return p; 8977 } 8978 8979 #if defined(FEAT_INS_EXPAND) || defined(PROTO) 8980 /* 8981 * For Insert mode completion CTRL-X s: 8982 * Find start of the word in front of column "startcol". 8983 * We don't check if it is badly spelled, with completion we can only change 8984 * the word in front of the cursor. 8985 * Returns the column number of the word. 8986 */ 8987 int 8988 spell_word_start(int startcol) 8989 { 8990 char_u *line; 8991 char_u *p; 8992 int col = 0; 8993 8994 if (no_spell_checking(curwin)) 8995 return startcol; 8996 8997 /* Find a word character before "startcol". */ 8998 line = ml_get_curline(); 8999 for (p = line + startcol; p > line; ) 9000 { 9001 MB_PTR_BACK(line, p); 9002 if (spell_iswordp_nmw(p, curwin)) 9003 break; 9004 } 9005 9006 /* Go back to start of the word. */ 9007 while (p > line) 9008 { 9009 col = (int)(p - line); 9010 MB_PTR_BACK(line, p); 9011 if (!spell_iswordp(p, curwin)) 9012 break; 9013 col = 0; 9014 } 9015 9016 return col; 9017 } 9018 9019 /* 9020 * Need to check for 'spellcapcheck' now, the word is removed before 9021 * expand_spelling() is called. Therefore the ugly global variable. 9022 */ 9023 static int spell_expand_need_cap; 9024 9025 void 9026 spell_expand_check_cap(colnr_T col) 9027 { 9028 spell_expand_need_cap = check_need_cap(curwin->w_cursor.lnum, col); 9029 } 9030 9031 /* 9032 * Get list of spelling suggestions. 9033 * Used for Insert mode completion CTRL-X ?. 9034 * Returns the number of matches. The matches are in "matchp[]", array of 9035 * allocated strings. 9036 */ 9037 int 9038 expand_spelling( 9039 linenr_T lnum UNUSED, 9040 char_u *pat, 9041 char_u ***matchp) 9042 { 9043 garray_T ga; 9044 9045 spell_suggest_list(&ga, pat, 100, spell_expand_need_cap, TRUE); 9046 *matchp = ga.ga_data; 9047 return ga.ga_len; 9048 } 9049 #endif 9050 9051 #endif /* FEAT_SPELL */ 9052