1 /* vi:set ts=8 sts=4 sw=4 noet: 2 * 3 * VIM - Vi IMproved by Bram Moolenaar 4 * 5 * Do ":help uganda" in Vim to read copying and usage conditions. 6 * Do ":help credits" in Vim to see a list of people who contributed. 7 * See README.txt for an overview of the Vim source code. 8 */ 9 10 /* 11 * spell.c: code for spell checking 12 * 13 * See spellfile.c for the Vim spell file format. 14 * 15 * The spell checking mechanism uses a tree (aka trie). Each node in the tree 16 * has a list of bytes that can appear (siblings). For each byte there is a 17 * pointer to the node with the byte that follows in the word (child). 18 * 19 * A NUL byte is used where the word may end. The bytes are sorted, so that 20 * binary searching can be used and the NUL bytes are at the start. The 21 * number of possible bytes is stored before the list of bytes. 22 * 23 * The tree uses two arrays: "byts" stores the characters, "idxs" stores 24 * either the next index or flags. The tree starts at index 0. For example, 25 * to lookup "vi" this sequence is followed: 26 * i = 0 27 * len = byts[i] 28 * n = where "v" appears in byts[i + 1] to byts[i + len] 29 * i = idxs[n] 30 * len = byts[i] 31 * n = where "i" appears in byts[i + 1] to byts[i + len] 32 * i = idxs[n] 33 * len = byts[i] 34 * find that byts[i + 1] is 0, idxs[i + 1] has flags for "vi". 35 * 36 * There are two word trees: one with case-folded words and one with words in 37 * original case. The second one is only used for keep-case words and is 38 * usually small. 39 * 40 * There is one additional tree for when not all prefixes are applied when 41 * generating the .spl file. This tree stores all the possible prefixes, as 42 * if they were words. At each word (prefix) end the prefix nr is stored, the 43 * following word must support this prefix nr. And the condition nr is 44 * stored, used to lookup the condition that the word must match with. 45 * 46 * Thanks to Olaf Seibert for providing an example implementation of this tree 47 * and the compression mechanism. 48 * LZ trie ideas: 49 * http://www.irb.hr/hr/home/ristov/papers/RistovLZtrieRevision1.pdf 50 * More papers: http://www-igm.univ-mlv.fr/~laporte/publi_en.html 51 * 52 * Matching involves checking the caps type: Onecap ALLCAP KeepCap. 53 * 54 * Why doesn't Vim use aspell/ispell/myspell/etc.? 55 * See ":help develop-spell". 56 */ 57 58 /* 59 * Use this to adjust the score after finding suggestions, based on the 60 * suggested word sounding like the bad word. This is much faster than doing 61 * it for every possible suggestion. 62 * Disadvantage: When "the" is typed as "hte" it sounds quite different ("@" 63 * vs "ht") and goes down in the list. 64 * Used when 'spellsuggest' is set to "best". 65 */ 66 #define RESCORE(word_score, sound_score) ((3 * word_score + sound_score) / 4) 67 68 /* 69 * Do the opposite: based on a maximum end score and a known sound score, 70 * compute the maximum word score that can be used. 71 */ 72 #define MAXSCORE(word_score, sound_score) ((4 * word_score - sound_score) / 3) 73 74 #define IN_SPELL_C 75 #include "vim.h" 76 77 #if defined(FEAT_SPELL) || defined(PROTO) 78 79 #ifndef UNIX /* it's in os_unix.h for Unix */ 80 # include <time.h> /* for time_t */ 81 #endif 82 83 /* only used for su_badflags */ 84 #define WF_MIXCAP 0x20 /* mix of upper and lower case: macaRONI */ 85 86 #define WF_CAPMASK (WF_ONECAP | WF_ALLCAP | WF_KEEPCAP | WF_FIXCAP) 87 88 #define REGION_ALL 0xff /* word valid in all regions */ 89 90 #define VIMSUGMAGIC "VIMsug" /* string at start of Vim .sug file */ 91 #define VIMSUGMAGICL 6 92 #define VIMSUGVERSION 1 93 94 /* Result values. Lower number is accepted over higher one. */ 95 #define SP_BANNED -1 96 #define SP_OK 0 97 #define SP_RARE 1 98 #define SP_LOCAL 2 99 #define SP_BAD 3 100 101 typedef struct wordcount_S 102 { 103 short_u wc_count; /* nr of times word was seen */ 104 char_u wc_word[1]; /* word, actually longer */ 105 } wordcount_T; 106 107 #define WC_KEY_OFF offsetof(wordcount_T, wc_word) 108 #define HI2WC(hi) ((wordcount_T *)((hi)->hi_key - WC_KEY_OFF)) 109 #define MAXWORDCOUNT 0xffff 110 111 /* 112 * Information used when looking for suggestions. 113 */ 114 typedef struct suginfo_S 115 { 116 garray_T su_ga; /* suggestions, contains "suggest_T" */ 117 int su_maxcount; /* max. number of suggestions displayed */ 118 int su_maxscore; /* maximum score for adding to su_ga */ 119 int su_sfmaxscore; /* idem, for when doing soundfold words */ 120 garray_T su_sga; /* like su_ga, sound-folded scoring */ 121 char_u *su_badptr; /* start of bad word in line */ 122 int su_badlen; /* length of detected bad word in line */ 123 int su_badflags; /* caps flags for bad word */ 124 char_u su_badword[MAXWLEN]; /* bad word truncated at su_badlen */ 125 char_u su_fbadword[MAXWLEN]; /* su_badword case-folded */ 126 char_u su_sal_badword[MAXWLEN]; /* su_badword soundfolded */ 127 hashtab_T su_banned; /* table with banned words */ 128 slang_T *su_sallang; /* default language for sound folding */ 129 } suginfo_T; 130 131 /* One word suggestion. Used in "si_ga". */ 132 typedef struct suggest_S 133 { 134 char_u *st_word; /* suggested word, allocated string */ 135 int st_wordlen; /* STRLEN(st_word) */ 136 int st_orglen; /* length of replaced text */ 137 int st_score; /* lower is better */ 138 int st_altscore; /* used when st_score compares equal */ 139 int st_salscore; /* st_score is for soundalike */ 140 int st_had_bonus; /* bonus already included in score */ 141 slang_T *st_slang; /* language used for sound folding */ 142 } suggest_T; 143 144 #define SUG(ga, i) (((suggest_T *)(ga).ga_data)[i]) 145 146 /* TRUE if a word appears in the list of banned words. */ 147 #define WAS_BANNED(su, word) (!HASHITEM_EMPTY(hash_find(&su->su_banned, word))) 148 149 /* Number of suggestions kept when cleaning up. We need to keep more than 150 * what is displayed, because when rescore_suggestions() is called the score 151 * may change and wrong suggestions may be removed later. */ 152 #define SUG_CLEAN_COUNT(su) ((su)->su_maxcount < 130 ? 150 : (su)->su_maxcount + 20) 153 154 /* Threshold for sorting and cleaning up suggestions. Don't want to keep lots 155 * of suggestions that are not going to be displayed. */ 156 #define SUG_MAX_COUNT(su) (SUG_CLEAN_COUNT(su) + 50) 157 158 /* score for various changes */ 159 #define SCORE_SPLIT 149 /* split bad word */ 160 #define SCORE_SPLIT_NO 249 /* split bad word with NOSPLITSUGS */ 161 #define SCORE_ICASE 52 /* slightly different case */ 162 #define SCORE_REGION 200 /* word is for different region */ 163 #define SCORE_RARE 180 /* rare word */ 164 #define SCORE_SWAP 75 /* swap two characters */ 165 #define SCORE_SWAP3 110 /* swap two characters in three */ 166 #define SCORE_REP 65 /* REP replacement */ 167 #define SCORE_SUBST 93 /* substitute a character */ 168 #define SCORE_SIMILAR 33 /* substitute a similar character */ 169 #define SCORE_SUBCOMP 33 /* substitute a composing character */ 170 #define SCORE_DEL 94 /* delete a character */ 171 #define SCORE_DELDUP 66 /* delete a duplicated character */ 172 #define SCORE_DELCOMP 28 /* delete a composing character */ 173 #define SCORE_INS 96 /* insert a character */ 174 #define SCORE_INSDUP 67 /* insert a duplicate character */ 175 #define SCORE_INSCOMP 30 /* insert a composing character */ 176 #define SCORE_NONWORD 103 /* change non-word to word char */ 177 178 #define SCORE_FILE 30 /* suggestion from a file */ 179 #define SCORE_MAXINIT 350 /* Initial maximum score: higher == slower. 180 * 350 allows for about three changes. */ 181 182 #define SCORE_COMMON1 30 /* subtracted for words seen before */ 183 #define SCORE_COMMON2 40 /* subtracted for words often seen */ 184 #define SCORE_COMMON3 50 /* subtracted for words very often seen */ 185 #define SCORE_THRES2 10 /* word count threshold for COMMON2 */ 186 #define SCORE_THRES3 100 /* word count threshold for COMMON3 */ 187 188 /* When trying changed soundfold words it becomes slow when trying more than 189 * two changes. With less then two changes it's slightly faster but we miss a 190 * few good suggestions. In rare cases we need to try three of four changes. 191 */ 192 #define SCORE_SFMAX1 200 /* maximum score for first try */ 193 #define SCORE_SFMAX2 300 /* maximum score for second try */ 194 #define SCORE_SFMAX3 400 /* maximum score for third try */ 195 196 #define SCORE_BIG SCORE_INS * 3 /* big difference */ 197 #define SCORE_MAXMAX 999999 /* accept any score */ 198 #define SCORE_LIMITMAX 350 /* for spell_edit_score_limit() */ 199 200 /* for spell_edit_score_limit() we need to know the minimum value of 201 * SCORE_ICASE, SCORE_SWAP, SCORE_DEL, SCORE_SIMILAR and SCORE_INS */ 202 #define SCORE_EDIT_MIN SCORE_SIMILAR 203 204 /* 205 * Structure to store info for word matching. 206 */ 207 typedef struct matchinf_S 208 { 209 langp_T *mi_lp; /* info for language and region */ 210 211 /* pointers to original text to be checked */ 212 char_u *mi_word; /* start of word being checked */ 213 char_u *mi_end; /* end of matching word so far */ 214 char_u *mi_fend; /* next char to be added to mi_fword */ 215 char_u *mi_cend; /* char after what was used for 216 mi_capflags */ 217 218 /* case-folded text */ 219 char_u mi_fword[MAXWLEN + 1]; /* mi_word case-folded */ 220 int mi_fwordlen; /* nr of valid bytes in mi_fword */ 221 222 /* for when checking word after a prefix */ 223 int mi_prefarridx; /* index in sl_pidxs with list of 224 affixID/condition */ 225 int mi_prefcnt; /* number of entries at mi_prefarridx */ 226 int mi_prefixlen; /* byte length of prefix */ 227 #ifdef FEAT_MBYTE 228 int mi_cprefixlen; /* byte length of prefix in original 229 case */ 230 #else 231 # define mi_cprefixlen mi_prefixlen /* it's the same value */ 232 #endif 233 234 /* for when checking a compound word */ 235 int mi_compoff; /* start of following word offset */ 236 char_u mi_compflags[MAXWLEN]; /* flags for compound words used */ 237 int mi_complen; /* nr of compound words used */ 238 int mi_compextra; /* nr of COMPOUNDROOT words */ 239 240 /* others */ 241 int mi_result; /* result so far: SP_BAD, SP_OK, etc. */ 242 int mi_capflags; /* WF_ONECAP WF_ALLCAP WF_KEEPCAP */ 243 win_T *mi_win; /* buffer being checked */ 244 245 /* for NOBREAK */ 246 int mi_result2; /* "mi_resul" without following word */ 247 char_u *mi_end2; /* "mi_end" without following word */ 248 } matchinf_T; 249 250 251 static int spell_iswordp(char_u *p, win_T *wp); 252 #ifdef FEAT_MBYTE 253 static int spell_mb_isword_class(int cl, win_T *wp); 254 static int spell_iswordp_w(int *p, win_T *wp); 255 #endif 256 257 /* 258 * For finding suggestions: At each node in the tree these states are tried: 259 */ 260 typedef enum 261 { 262 STATE_START = 0, /* At start of node check for NUL bytes (goodword 263 * ends); if badword ends there is a match, otherwise 264 * try splitting word. */ 265 STATE_NOPREFIX, /* try without prefix */ 266 STATE_SPLITUNDO, /* Undo splitting. */ 267 STATE_ENDNUL, /* Past NUL bytes at start of the node. */ 268 STATE_PLAIN, /* Use each byte of the node. */ 269 STATE_DEL, /* Delete a byte from the bad word. */ 270 STATE_INS_PREP, /* Prepare for inserting bytes. */ 271 STATE_INS, /* Insert a byte in the bad word. */ 272 STATE_SWAP, /* Swap two bytes. */ 273 STATE_UNSWAP, /* Undo swap two characters. */ 274 STATE_SWAP3, /* Swap two characters over three. */ 275 STATE_UNSWAP3, /* Undo Swap two characters over three. */ 276 STATE_UNROT3L, /* Undo rotate three characters left */ 277 STATE_UNROT3R, /* Undo rotate three characters right */ 278 STATE_REP_INI, /* Prepare for using REP items. */ 279 STATE_REP, /* Use matching REP items from the .aff file. */ 280 STATE_REP_UNDO, /* Undo a REP item replacement. */ 281 STATE_FINAL /* End of this node. */ 282 } state_T; 283 284 /* 285 * Struct to keep the state at each level in suggest_try_change(). 286 */ 287 typedef struct trystate_S 288 { 289 state_T ts_state; /* state at this level, STATE_ */ 290 int ts_score; /* score */ 291 idx_T ts_arridx; /* index in tree array, start of node */ 292 short ts_curi; /* index in list of child nodes */ 293 char_u ts_fidx; /* index in fword[], case-folded bad word */ 294 char_u ts_fidxtry; /* ts_fidx at which bytes may be changed */ 295 char_u ts_twordlen; /* valid length of tword[] */ 296 char_u ts_prefixdepth; /* stack depth for end of prefix or 297 * PFD_PREFIXTREE or PFD_NOPREFIX */ 298 char_u ts_flags; /* TSF_ flags */ 299 #ifdef FEAT_MBYTE 300 char_u ts_tcharlen; /* number of bytes in tword character */ 301 char_u ts_tcharidx; /* current byte index in tword character */ 302 char_u ts_isdiff; /* DIFF_ values */ 303 char_u ts_fcharstart; /* index in fword where badword char started */ 304 #endif 305 char_u ts_prewordlen; /* length of word in "preword[]" */ 306 char_u ts_splitoff; /* index in "tword" after last split */ 307 char_u ts_splitfidx; /* "ts_fidx" at word split */ 308 char_u ts_complen; /* nr of compound words used */ 309 char_u ts_compsplit; /* index for "compflags" where word was spit */ 310 char_u ts_save_badflags; /* su_badflags saved here */ 311 char_u ts_delidx; /* index in fword for char that was deleted, 312 valid when "ts_flags" has TSF_DIDDEL */ 313 } trystate_T; 314 315 /* values for ts_isdiff */ 316 #define DIFF_NONE 0 /* no different byte (yet) */ 317 #define DIFF_YES 1 /* different byte found */ 318 #define DIFF_INSERT 2 /* inserting character */ 319 320 /* values for ts_flags */ 321 #define TSF_PREFIXOK 1 /* already checked that prefix is OK */ 322 #define TSF_DIDSPLIT 2 /* tried split at this point */ 323 #define TSF_DIDDEL 4 /* did a delete, "ts_delidx" has index */ 324 325 /* special values ts_prefixdepth */ 326 #define PFD_NOPREFIX 0xff /* not using prefixes */ 327 #define PFD_PREFIXTREE 0xfe /* walking through the prefix tree */ 328 #define PFD_NOTSPECIAL 0xfd /* highest value that's not special */ 329 330 /* mode values for find_word */ 331 #define FIND_FOLDWORD 0 /* find word case-folded */ 332 #define FIND_KEEPWORD 1 /* find keep-case word */ 333 #define FIND_PREFIX 2 /* find word after prefix */ 334 #define FIND_COMPOUND 3 /* find case-folded compound word */ 335 #define FIND_KEEPCOMPOUND 4 /* find keep-case compound word */ 336 337 static void find_word(matchinf_T *mip, int mode); 338 static int match_checkcompoundpattern(char_u *ptr, int wlen, garray_T *gap); 339 static int can_compound(slang_T *slang, char_u *word, char_u *flags); 340 static int can_be_compound(trystate_T *sp, slang_T *slang, char_u *compflags, int flag); 341 static int match_compoundrule(slang_T *slang, char_u *compflags); 342 static int valid_word_prefix(int totprefcnt, int arridx, int flags, char_u *word, slang_T *slang, int cond_req); 343 static void find_prefix(matchinf_T *mip, int mode); 344 static int fold_more(matchinf_T *mip); 345 static int spell_valid_case(int wordflags, int treeflags); 346 static int no_spell_checking(win_T *wp); 347 static void spell_load_lang(char_u *lang); 348 static void int_wordlist_spl(char_u *fname); 349 static void spell_load_cb(char_u *fname, void *cookie); 350 static int score_wordcount_adj(slang_T *slang, int score, char_u *word, int split); 351 static int count_syllables(slang_T *slang, char_u *word); 352 static void clear_midword(win_T *buf); 353 static void use_midword(slang_T *lp, win_T *buf); 354 static int find_region(char_u *rp, char_u *region); 355 static int badword_captype(char_u *word, char_u *end); 356 static int check_need_cap(linenr_T lnum, colnr_T col); 357 static void spell_find_suggest(char_u *badptr, int badlen, suginfo_T *su, int maxcount, int banbadword, int need_cap, int interactive); 358 #ifdef FEAT_EVAL 359 static void spell_suggest_expr(suginfo_T *su, char_u *expr); 360 #endif 361 static void spell_suggest_file(suginfo_T *su, char_u *fname); 362 static void spell_suggest_intern(suginfo_T *su, int interactive); 363 static void spell_find_cleanup(suginfo_T *su); 364 static void allcap_copy(char_u *word, char_u *wcopy); 365 static void suggest_try_special(suginfo_T *su); 366 static void suggest_try_change(suginfo_T *su); 367 static void suggest_trie_walk(suginfo_T *su, langp_T *lp, char_u *fword, int soundfold); 368 static void go_deeper(trystate_T *stack, int depth, int score_add); 369 #ifdef FEAT_MBYTE 370 static int nofold_len(char_u *fword, int flen, char_u *word); 371 #endif 372 static void find_keepcap_word(slang_T *slang, char_u *fword, char_u *kword); 373 static void score_comp_sal(suginfo_T *su); 374 static void score_combine(suginfo_T *su); 375 static int stp_sal_score(suggest_T *stp, suginfo_T *su, slang_T *slang, char_u *badsound); 376 static void suggest_try_soundalike_prep(void); 377 static void suggest_try_soundalike(suginfo_T *su); 378 static void suggest_try_soundalike_finish(void); 379 static void add_sound_suggest(suginfo_T *su, char_u *goodword, int score, langp_T *lp); 380 static int soundfold_find(slang_T *slang, char_u *word); 381 static void make_case_word(char_u *fword, char_u *cword, int flags); 382 static int similar_chars(slang_T *slang, int c1, int c2); 383 static void add_suggestion(suginfo_T *su, garray_T *gap, char_u *goodword, int badlen, int score, int altscore, int had_bonus, slang_T *slang, int maxsf); 384 static void check_suggestions(suginfo_T *su, garray_T *gap); 385 static void add_banned(suginfo_T *su, char_u *word); 386 static void rescore_suggestions(suginfo_T *su); 387 static void rescore_one(suginfo_T *su, suggest_T *stp); 388 static int cleanup_suggestions(garray_T *gap, int maxscore, int keep); 389 static void spell_soundfold_sofo(slang_T *slang, char_u *inword, char_u *res); 390 static void spell_soundfold_sal(slang_T *slang, char_u *inword, char_u *res); 391 #ifdef FEAT_MBYTE 392 static void spell_soundfold_wsal(slang_T *slang, char_u *inword, char_u *res); 393 #endif 394 static int soundalike_score(char_u *goodsound, char_u *badsound); 395 static int spell_edit_score(slang_T *slang, char_u *badword, char_u *goodword); 396 static int spell_edit_score_limit(slang_T *slang, char_u *badword, char_u *goodword, int limit); 397 #ifdef FEAT_MBYTE 398 static int spell_edit_score_limit_w(slang_T *slang, char_u *badword, char_u *goodword, int limit); 399 #endif 400 static void dump_word(slang_T *slang, char_u *word, char_u *pat, int *dir, int round, int flags, linenr_T lnum); 401 static linenr_T dump_prefixes(slang_T *slang, char_u *word, char_u *pat, int *dir, int round, int flags, linenr_T startlnum); 402 403 404 /* Remember what "z?" replaced. */ 405 static char_u *repl_from = NULL; 406 static char_u *repl_to = NULL; 407 408 /* 409 * Main spell-checking function. 410 * "ptr" points to a character that could be the start of a word. 411 * "*attrp" is set to the highlight index for a badly spelled word. For a 412 * non-word or when it's OK it remains unchanged. 413 * This must only be called when 'spelllang' is not empty. 414 * 415 * "capcol" is used to check for a Capitalised word after the end of a 416 * sentence. If it's zero then perform the check. Return the column where to 417 * check next, or -1 when no sentence end was found. If it's NULL then don't 418 * worry. 419 * 420 * Returns the length of the word in bytes, also when it's OK, so that the 421 * caller can skip over the word. 422 */ 423 int 424 spell_check( 425 win_T *wp, /* current window */ 426 char_u *ptr, 427 hlf_T *attrp, 428 int *capcol, /* column to check for Capital */ 429 int docount) /* count good words */ 430 { 431 matchinf_T mi; /* Most things are put in "mi" so that it can 432 be passed to functions quickly. */ 433 int nrlen = 0; /* found a number first */ 434 int c; 435 int wrongcaplen = 0; 436 int lpi; 437 int count_word = docount; 438 439 /* A word never starts at a space or a control character. Return quickly 440 * then, skipping over the character. */ 441 if (*ptr <= ' ') 442 return 1; 443 444 /* Return here when loading language files failed. */ 445 if (wp->w_s->b_langp.ga_len == 0) 446 return 1; 447 448 vim_memset(&mi, 0, sizeof(matchinf_T)); 449 450 /* A number is always OK. Also skip hexadecimal numbers 0xFF99 and 451 * 0X99FF. But always do check spelling to find "3GPP" and "11 452 * julifeest". */ 453 if (*ptr >= '0' && *ptr <= '9') 454 { 455 if (*ptr == '0' && (ptr[1] == 'b' || ptr[1] == 'B')) 456 mi.mi_end = skipbin(ptr + 2); 457 else if (*ptr == '0' && (ptr[1] == 'x' || ptr[1] == 'X')) 458 mi.mi_end = skiphex(ptr + 2); 459 else 460 mi.mi_end = skipdigits(ptr); 461 nrlen = (int)(mi.mi_end - ptr); 462 } 463 464 /* Find the normal end of the word (until the next non-word character). */ 465 mi.mi_word = ptr; 466 mi.mi_fend = ptr; 467 if (spell_iswordp(mi.mi_fend, wp)) 468 { 469 do 470 { 471 mb_ptr_adv(mi.mi_fend); 472 } while (*mi.mi_fend != NUL && spell_iswordp(mi.mi_fend, wp)); 473 474 if (capcol != NULL && *capcol == 0 && wp->w_s->b_cap_prog != NULL) 475 { 476 /* Check word starting with capital letter. */ 477 c = PTR2CHAR(ptr); 478 if (!SPELL_ISUPPER(c)) 479 wrongcaplen = (int)(mi.mi_fend - ptr); 480 } 481 } 482 if (capcol != NULL) 483 *capcol = -1; 484 485 /* We always use the characters up to the next non-word character, 486 * also for bad words. */ 487 mi.mi_end = mi.mi_fend; 488 489 /* Check caps type later. */ 490 mi.mi_capflags = 0; 491 mi.mi_cend = NULL; 492 mi.mi_win = wp; 493 494 /* case-fold the word with one non-word character, so that we can check 495 * for the word end. */ 496 if (*mi.mi_fend != NUL) 497 mb_ptr_adv(mi.mi_fend); 498 499 (void)spell_casefold(ptr, (int)(mi.mi_fend - ptr), mi.mi_fword, 500 MAXWLEN + 1); 501 mi.mi_fwordlen = (int)STRLEN(mi.mi_fword); 502 503 /* The word is bad unless we recognize it. */ 504 mi.mi_result = SP_BAD; 505 mi.mi_result2 = SP_BAD; 506 507 /* 508 * Loop over the languages specified in 'spelllang'. 509 * We check them all, because a word may be matched longer in another 510 * language. 511 */ 512 for (lpi = 0; lpi < wp->w_s->b_langp.ga_len; ++lpi) 513 { 514 mi.mi_lp = LANGP_ENTRY(wp->w_s->b_langp, lpi); 515 516 /* If reloading fails the language is still in the list but everything 517 * has been cleared. */ 518 if (mi.mi_lp->lp_slang->sl_fidxs == NULL) 519 continue; 520 521 /* Check for a matching word in case-folded words. */ 522 find_word(&mi, FIND_FOLDWORD); 523 524 /* Check for a matching word in keep-case words. */ 525 find_word(&mi, FIND_KEEPWORD); 526 527 /* Check for matching prefixes. */ 528 find_prefix(&mi, FIND_FOLDWORD); 529 530 /* For a NOBREAK language, may want to use a word without a following 531 * word as a backup. */ 532 if (mi.mi_lp->lp_slang->sl_nobreak && mi.mi_result == SP_BAD 533 && mi.mi_result2 != SP_BAD) 534 { 535 mi.mi_result = mi.mi_result2; 536 mi.mi_end = mi.mi_end2; 537 } 538 539 /* Count the word in the first language where it's found to be OK. */ 540 if (count_word && mi.mi_result == SP_OK) 541 { 542 count_common_word(mi.mi_lp->lp_slang, ptr, 543 (int)(mi.mi_end - ptr), 1); 544 count_word = FALSE; 545 } 546 } 547 548 if (mi.mi_result != SP_OK) 549 { 550 /* If we found a number skip over it. Allows for "42nd". Do flag 551 * rare and local words, e.g., "3GPP". */ 552 if (nrlen > 0) 553 { 554 if (mi.mi_result == SP_BAD || mi.mi_result == SP_BANNED) 555 return nrlen; 556 } 557 558 /* When we are at a non-word character there is no error, just 559 * skip over the character (try looking for a word after it). */ 560 else if (!spell_iswordp_nmw(ptr, wp)) 561 { 562 if (capcol != NULL && wp->w_s->b_cap_prog != NULL) 563 { 564 regmatch_T regmatch; 565 int r; 566 567 /* Check for end of sentence. */ 568 regmatch.regprog = wp->w_s->b_cap_prog; 569 regmatch.rm_ic = FALSE; 570 r = vim_regexec(®match, ptr, 0); 571 wp->w_s->b_cap_prog = regmatch.regprog; 572 if (r) 573 *capcol = (int)(regmatch.endp[0] - ptr); 574 } 575 576 #ifdef FEAT_MBYTE 577 if (has_mbyte) 578 return (*mb_ptr2len)(ptr); 579 #endif 580 return 1; 581 } 582 else if (mi.mi_end == ptr) 583 /* Always include at least one character. Required for when there 584 * is a mixup in "midword". */ 585 mb_ptr_adv(mi.mi_end); 586 else if (mi.mi_result == SP_BAD 587 && LANGP_ENTRY(wp->w_s->b_langp, 0)->lp_slang->sl_nobreak) 588 { 589 char_u *p, *fp; 590 int save_result = mi.mi_result; 591 592 /* First language in 'spelllang' is NOBREAK. Find first position 593 * at which any word would be valid. */ 594 mi.mi_lp = LANGP_ENTRY(wp->w_s->b_langp, 0); 595 if (mi.mi_lp->lp_slang->sl_fidxs != NULL) 596 { 597 p = mi.mi_word; 598 fp = mi.mi_fword; 599 for (;;) 600 { 601 mb_ptr_adv(p); 602 mb_ptr_adv(fp); 603 if (p >= mi.mi_end) 604 break; 605 mi.mi_compoff = (int)(fp - mi.mi_fword); 606 find_word(&mi, FIND_COMPOUND); 607 if (mi.mi_result != SP_BAD) 608 { 609 mi.mi_end = p; 610 break; 611 } 612 } 613 mi.mi_result = save_result; 614 } 615 } 616 617 if (mi.mi_result == SP_BAD || mi.mi_result == SP_BANNED) 618 *attrp = HLF_SPB; 619 else if (mi.mi_result == SP_RARE) 620 *attrp = HLF_SPR; 621 else 622 *attrp = HLF_SPL; 623 } 624 625 if (wrongcaplen > 0 && (mi.mi_result == SP_OK || mi.mi_result == SP_RARE)) 626 { 627 /* Report SpellCap only when the word isn't badly spelled. */ 628 *attrp = HLF_SPC; 629 return wrongcaplen; 630 } 631 632 return (int)(mi.mi_end - ptr); 633 } 634 635 /* 636 * Check if the word at "mip->mi_word" is in the tree. 637 * When "mode" is FIND_FOLDWORD check in fold-case word tree. 638 * When "mode" is FIND_KEEPWORD check in keep-case word tree. 639 * When "mode" is FIND_PREFIX check for word after prefix in fold-case word 640 * tree. 641 * 642 * For a match mip->mi_result is updated. 643 */ 644 static void 645 find_word(matchinf_T *mip, int mode) 646 { 647 idx_T arridx = 0; 648 int endlen[MAXWLEN]; /* length at possible word endings */ 649 idx_T endidx[MAXWLEN]; /* possible word endings */ 650 int endidxcnt = 0; 651 int len; 652 int wlen = 0; 653 int flen; 654 int c; 655 char_u *ptr; 656 idx_T lo, hi, m; 657 #ifdef FEAT_MBYTE 658 char_u *s; 659 #endif 660 char_u *p; 661 int res = SP_BAD; 662 slang_T *slang = mip->mi_lp->lp_slang; 663 unsigned flags; 664 char_u *byts; 665 idx_T *idxs; 666 int word_ends; 667 int prefix_found; 668 int nobreak_result; 669 670 if (mode == FIND_KEEPWORD || mode == FIND_KEEPCOMPOUND) 671 { 672 /* Check for word with matching case in keep-case tree. */ 673 ptr = mip->mi_word; 674 flen = 9999; /* no case folding, always enough bytes */ 675 byts = slang->sl_kbyts; 676 idxs = slang->sl_kidxs; 677 678 if (mode == FIND_KEEPCOMPOUND) 679 /* Skip over the previously found word(s). */ 680 wlen += mip->mi_compoff; 681 } 682 else 683 { 684 /* Check for case-folded in case-folded tree. */ 685 ptr = mip->mi_fword; 686 flen = mip->mi_fwordlen; /* available case-folded bytes */ 687 byts = slang->sl_fbyts; 688 idxs = slang->sl_fidxs; 689 690 if (mode == FIND_PREFIX) 691 { 692 /* Skip over the prefix. */ 693 wlen = mip->mi_prefixlen; 694 flen -= mip->mi_prefixlen; 695 } 696 else if (mode == FIND_COMPOUND) 697 { 698 /* Skip over the previously found word(s). */ 699 wlen = mip->mi_compoff; 700 flen -= mip->mi_compoff; 701 } 702 703 } 704 705 if (byts == NULL) 706 return; /* array is empty */ 707 708 /* 709 * Repeat advancing in the tree until: 710 * - there is a byte that doesn't match, 711 * - we reach the end of the tree, 712 * - or we reach the end of the line. 713 */ 714 for (;;) 715 { 716 if (flen <= 0 && *mip->mi_fend != NUL) 717 flen = fold_more(mip); 718 719 len = byts[arridx++]; 720 721 /* If the first possible byte is a zero the word could end here. 722 * Remember this index, we first check for the longest word. */ 723 if (byts[arridx] == 0) 724 { 725 if (endidxcnt == MAXWLEN) 726 { 727 /* Must be a corrupted spell file. */ 728 EMSG(_(e_format)); 729 return; 730 } 731 endlen[endidxcnt] = wlen; 732 endidx[endidxcnt++] = arridx++; 733 --len; 734 735 /* Skip over the zeros, there can be several flag/region 736 * combinations. */ 737 while (len > 0 && byts[arridx] == 0) 738 { 739 ++arridx; 740 --len; 741 } 742 if (len == 0) 743 break; /* no children, word must end here */ 744 } 745 746 /* Stop looking at end of the line. */ 747 if (ptr[wlen] == NUL) 748 break; 749 750 /* Perform a binary search in the list of accepted bytes. */ 751 c = ptr[wlen]; 752 if (c == TAB) /* <Tab> is handled like <Space> */ 753 c = ' '; 754 lo = arridx; 755 hi = arridx + len - 1; 756 while (lo < hi) 757 { 758 m = (lo + hi) / 2; 759 if (byts[m] > c) 760 hi = m - 1; 761 else if (byts[m] < c) 762 lo = m + 1; 763 else 764 { 765 lo = hi = m; 766 break; 767 } 768 } 769 770 /* Stop if there is no matching byte. */ 771 if (hi < lo || byts[lo] != c) 772 break; 773 774 /* Continue at the child (if there is one). */ 775 arridx = idxs[lo]; 776 ++wlen; 777 --flen; 778 779 /* One space in the good word may stand for several spaces in the 780 * checked word. */ 781 if (c == ' ') 782 { 783 for (;;) 784 { 785 if (flen <= 0 && *mip->mi_fend != NUL) 786 flen = fold_more(mip); 787 if (ptr[wlen] != ' ' && ptr[wlen] != TAB) 788 break; 789 ++wlen; 790 --flen; 791 } 792 } 793 } 794 795 /* 796 * Verify that one of the possible endings is valid. Try the longest 797 * first. 798 */ 799 while (endidxcnt > 0) 800 { 801 --endidxcnt; 802 arridx = endidx[endidxcnt]; 803 wlen = endlen[endidxcnt]; 804 805 #ifdef FEAT_MBYTE 806 if ((*mb_head_off)(ptr, ptr + wlen) > 0) 807 continue; /* not at first byte of character */ 808 #endif 809 if (spell_iswordp(ptr + wlen, mip->mi_win)) 810 { 811 if (slang->sl_compprog == NULL && !slang->sl_nobreak) 812 continue; /* next char is a word character */ 813 word_ends = FALSE; 814 } 815 else 816 word_ends = TRUE; 817 /* The prefix flag is before compound flags. Once a valid prefix flag 818 * has been found we try compound flags. */ 819 prefix_found = FALSE; 820 821 #ifdef FEAT_MBYTE 822 if (mode != FIND_KEEPWORD && has_mbyte) 823 { 824 /* Compute byte length in original word, length may change 825 * when folding case. This can be slow, take a shortcut when the 826 * case-folded word is equal to the keep-case word. */ 827 p = mip->mi_word; 828 if (STRNCMP(ptr, p, wlen) != 0) 829 { 830 for (s = ptr; s < ptr + wlen; mb_ptr_adv(s)) 831 mb_ptr_adv(p); 832 wlen = (int)(p - mip->mi_word); 833 } 834 } 835 #endif 836 837 /* Check flags and region. For FIND_PREFIX check the condition and 838 * prefix ID. 839 * Repeat this if there are more flags/region alternatives until there 840 * is a match. */ 841 res = SP_BAD; 842 for (len = byts[arridx - 1]; len > 0 && byts[arridx] == 0; 843 --len, ++arridx) 844 { 845 flags = idxs[arridx]; 846 847 /* For the fold-case tree check that the case of the checked word 848 * matches with what the word in the tree requires. 849 * For keep-case tree the case is always right. For prefixes we 850 * don't bother to check. */ 851 if (mode == FIND_FOLDWORD) 852 { 853 if (mip->mi_cend != mip->mi_word + wlen) 854 { 855 /* mi_capflags was set for a different word length, need 856 * to do it again. */ 857 mip->mi_cend = mip->mi_word + wlen; 858 mip->mi_capflags = captype(mip->mi_word, mip->mi_cend); 859 } 860 861 if (mip->mi_capflags == WF_KEEPCAP 862 || !spell_valid_case(mip->mi_capflags, flags)) 863 continue; 864 } 865 866 /* When mode is FIND_PREFIX the word must support the prefix: 867 * check the prefix ID and the condition. Do that for the list at 868 * mip->mi_prefarridx that find_prefix() filled. */ 869 else if (mode == FIND_PREFIX && !prefix_found) 870 { 871 c = valid_word_prefix(mip->mi_prefcnt, mip->mi_prefarridx, 872 flags, 873 mip->mi_word + mip->mi_cprefixlen, slang, 874 FALSE); 875 if (c == 0) 876 continue; 877 878 /* Use the WF_RARE flag for a rare prefix. */ 879 if (c & WF_RAREPFX) 880 flags |= WF_RARE; 881 prefix_found = TRUE; 882 } 883 884 if (slang->sl_nobreak) 885 { 886 if ((mode == FIND_COMPOUND || mode == FIND_KEEPCOMPOUND) 887 && (flags & WF_BANNED) == 0) 888 { 889 /* NOBREAK: found a valid following word. That's all we 890 * need to know, so return. */ 891 mip->mi_result = SP_OK; 892 break; 893 } 894 } 895 896 else if ((mode == FIND_COMPOUND || mode == FIND_KEEPCOMPOUND 897 || !word_ends)) 898 { 899 /* If there is no compound flag or the word is shorter than 900 * COMPOUNDMIN reject it quickly. 901 * Makes you wonder why someone puts a compound flag on a word 902 * that's too short... Myspell compatibility requires this 903 * anyway. */ 904 if (((unsigned)flags >> 24) == 0 905 || wlen - mip->mi_compoff < slang->sl_compminlen) 906 continue; 907 #ifdef FEAT_MBYTE 908 /* For multi-byte chars check character length against 909 * COMPOUNDMIN. */ 910 if (has_mbyte 911 && slang->sl_compminlen > 0 912 && mb_charlen_len(mip->mi_word + mip->mi_compoff, 913 wlen - mip->mi_compoff) < slang->sl_compminlen) 914 continue; 915 #endif 916 917 /* Limit the number of compound words to COMPOUNDWORDMAX if no 918 * maximum for syllables is specified. */ 919 if (!word_ends && mip->mi_complen + mip->mi_compextra + 2 920 > slang->sl_compmax 921 && slang->sl_compsylmax == MAXWLEN) 922 continue; 923 924 /* Don't allow compounding on a side where an affix was added, 925 * unless COMPOUNDPERMITFLAG was used. */ 926 if (mip->mi_complen > 0 && (flags & WF_NOCOMPBEF)) 927 continue; 928 if (!word_ends && (flags & WF_NOCOMPAFT)) 929 continue; 930 931 /* Quickly check if compounding is possible with this flag. */ 932 if (!byte_in_str(mip->mi_complen == 0 933 ? slang->sl_compstartflags 934 : slang->sl_compallflags, 935 ((unsigned)flags >> 24))) 936 continue; 937 938 /* If there is a match with a CHECKCOMPOUNDPATTERN rule 939 * discard the compound word. */ 940 if (match_checkcompoundpattern(ptr, wlen, &slang->sl_comppat)) 941 continue; 942 943 if (mode == FIND_COMPOUND) 944 { 945 int capflags; 946 947 /* Need to check the caps type of the appended compound 948 * word. */ 949 #ifdef FEAT_MBYTE 950 if (has_mbyte && STRNCMP(ptr, mip->mi_word, 951 mip->mi_compoff) != 0) 952 { 953 /* case folding may have changed the length */ 954 p = mip->mi_word; 955 for (s = ptr; s < ptr + mip->mi_compoff; mb_ptr_adv(s)) 956 mb_ptr_adv(p); 957 } 958 else 959 #endif 960 p = mip->mi_word + mip->mi_compoff; 961 capflags = captype(p, mip->mi_word + wlen); 962 if (capflags == WF_KEEPCAP || (capflags == WF_ALLCAP 963 && (flags & WF_FIXCAP) != 0)) 964 continue; 965 966 if (capflags != WF_ALLCAP) 967 { 968 /* When the character before the word is a word 969 * character we do not accept a Onecap word. We do 970 * accept a no-caps word, even when the dictionary 971 * word specifies ONECAP. */ 972 mb_ptr_back(mip->mi_word, p); 973 if (spell_iswordp_nmw(p, mip->mi_win) 974 ? capflags == WF_ONECAP 975 : (flags & WF_ONECAP) != 0 976 && capflags != WF_ONECAP) 977 continue; 978 } 979 } 980 981 /* If the word ends the sequence of compound flags of the 982 * words must match with one of the COMPOUNDRULE items and 983 * the number of syllables must not be too large. */ 984 mip->mi_compflags[mip->mi_complen] = ((unsigned)flags >> 24); 985 mip->mi_compflags[mip->mi_complen + 1] = NUL; 986 if (word_ends) 987 { 988 char_u fword[MAXWLEN]; 989 990 if (slang->sl_compsylmax < MAXWLEN) 991 { 992 /* "fword" is only needed for checking syllables. */ 993 if (ptr == mip->mi_word) 994 (void)spell_casefold(ptr, wlen, fword, MAXWLEN); 995 else 996 vim_strncpy(fword, ptr, endlen[endidxcnt]); 997 } 998 if (!can_compound(slang, fword, mip->mi_compflags)) 999 continue; 1000 } 1001 else if (slang->sl_comprules != NULL 1002 && !match_compoundrule(slang, mip->mi_compflags)) 1003 /* The compound flags collected so far do not match any 1004 * COMPOUNDRULE, discard the compounded word. */ 1005 continue; 1006 } 1007 1008 /* Check NEEDCOMPOUND: can't use word without compounding. */ 1009 else if (flags & WF_NEEDCOMP) 1010 continue; 1011 1012 nobreak_result = SP_OK; 1013 1014 if (!word_ends) 1015 { 1016 int save_result = mip->mi_result; 1017 char_u *save_end = mip->mi_end; 1018 langp_T *save_lp = mip->mi_lp; 1019 int lpi; 1020 1021 /* Check that a valid word follows. If there is one and we 1022 * are compounding, it will set "mi_result", thus we are 1023 * always finished here. For NOBREAK we only check that a 1024 * valid word follows. 1025 * Recursive! */ 1026 if (slang->sl_nobreak) 1027 mip->mi_result = SP_BAD; 1028 1029 /* Find following word in case-folded tree. */ 1030 mip->mi_compoff = endlen[endidxcnt]; 1031 #ifdef FEAT_MBYTE 1032 if (has_mbyte && mode == FIND_KEEPWORD) 1033 { 1034 /* Compute byte length in case-folded word from "wlen": 1035 * byte length in keep-case word. Length may change when 1036 * folding case. This can be slow, take a shortcut when 1037 * the case-folded word is equal to the keep-case word. */ 1038 p = mip->mi_fword; 1039 if (STRNCMP(ptr, p, wlen) != 0) 1040 { 1041 for (s = ptr; s < ptr + wlen; mb_ptr_adv(s)) 1042 mb_ptr_adv(p); 1043 mip->mi_compoff = (int)(p - mip->mi_fword); 1044 } 1045 } 1046 #endif 1047 #if 0 /* Disabled, see below */ 1048 c = mip->mi_compoff; 1049 #endif 1050 ++mip->mi_complen; 1051 if (flags & WF_COMPROOT) 1052 ++mip->mi_compextra; 1053 1054 /* For NOBREAK we need to try all NOBREAK languages, at least 1055 * to find the ".add" file(s). */ 1056 for (lpi = 0; lpi < mip->mi_win->w_s->b_langp.ga_len; ++lpi) 1057 { 1058 if (slang->sl_nobreak) 1059 { 1060 mip->mi_lp = LANGP_ENTRY(mip->mi_win->w_s->b_langp, lpi); 1061 if (mip->mi_lp->lp_slang->sl_fidxs == NULL 1062 || !mip->mi_lp->lp_slang->sl_nobreak) 1063 continue; 1064 } 1065 1066 find_word(mip, FIND_COMPOUND); 1067 1068 /* When NOBREAK any word that matches is OK. Otherwise we 1069 * need to find the longest match, thus try with keep-case 1070 * and prefix too. */ 1071 if (!slang->sl_nobreak || mip->mi_result == SP_BAD) 1072 { 1073 /* Find following word in keep-case tree. */ 1074 mip->mi_compoff = wlen; 1075 find_word(mip, FIND_KEEPCOMPOUND); 1076 1077 #if 0 /* Disabled, a prefix must not appear halfway a compound word, 1078 unless the COMPOUNDPERMITFLAG is used and then it can't be a 1079 postponed prefix. */ 1080 if (!slang->sl_nobreak || mip->mi_result == SP_BAD) 1081 { 1082 /* Check for following word with prefix. */ 1083 mip->mi_compoff = c; 1084 find_prefix(mip, FIND_COMPOUND); 1085 } 1086 #endif 1087 } 1088 1089 if (!slang->sl_nobreak) 1090 break; 1091 } 1092 --mip->mi_complen; 1093 if (flags & WF_COMPROOT) 1094 --mip->mi_compextra; 1095 mip->mi_lp = save_lp; 1096 1097 if (slang->sl_nobreak) 1098 { 1099 nobreak_result = mip->mi_result; 1100 mip->mi_result = save_result; 1101 mip->mi_end = save_end; 1102 } 1103 else 1104 { 1105 if (mip->mi_result == SP_OK) 1106 break; 1107 continue; 1108 } 1109 } 1110 1111 if (flags & WF_BANNED) 1112 res = SP_BANNED; 1113 else if (flags & WF_REGION) 1114 { 1115 /* Check region. */ 1116 if ((mip->mi_lp->lp_region & (flags >> 16)) != 0) 1117 res = SP_OK; 1118 else 1119 res = SP_LOCAL; 1120 } 1121 else if (flags & WF_RARE) 1122 res = SP_RARE; 1123 else 1124 res = SP_OK; 1125 1126 /* Always use the longest match and the best result. For NOBREAK 1127 * we separately keep the longest match without a following good 1128 * word as a fall-back. */ 1129 if (nobreak_result == SP_BAD) 1130 { 1131 if (mip->mi_result2 > res) 1132 { 1133 mip->mi_result2 = res; 1134 mip->mi_end2 = mip->mi_word + wlen; 1135 } 1136 else if (mip->mi_result2 == res 1137 && mip->mi_end2 < mip->mi_word + wlen) 1138 mip->mi_end2 = mip->mi_word + wlen; 1139 } 1140 else if (mip->mi_result > res) 1141 { 1142 mip->mi_result = res; 1143 mip->mi_end = mip->mi_word + wlen; 1144 } 1145 else if (mip->mi_result == res && mip->mi_end < mip->mi_word + wlen) 1146 mip->mi_end = mip->mi_word + wlen; 1147 1148 if (mip->mi_result == SP_OK) 1149 break; 1150 } 1151 1152 if (mip->mi_result == SP_OK) 1153 break; 1154 } 1155 } 1156 1157 /* 1158 * Return TRUE if there is a match between the word ptr[wlen] and 1159 * CHECKCOMPOUNDPATTERN rules, assuming that we will concatenate with another 1160 * word. 1161 * A match means that the first part of CHECKCOMPOUNDPATTERN matches at the 1162 * end of ptr[wlen] and the second part matches after it. 1163 */ 1164 static int 1165 match_checkcompoundpattern( 1166 char_u *ptr, 1167 int wlen, 1168 garray_T *gap) /* &sl_comppat */ 1169 { 1170 int i; 1171 char_u *p; 1172 int len; 1173 1174 for (i = 0; i + 1 < gap->ga_len; i += 2) 1175 { 1176 p = ((char_u **)gap->ga_data)[i + 1]; 1177 if (STRNCMP(ptr + wlen, p, STRLEN(p)) == 0) 1178 { 1179 /* Second part matches at start of following compound word, now 1180 * check if first part matches at end of previous word. */ 1181 p = ((char_u **)gap->ga_data)[i]; 1182 len = (int)STRLEN(p); 1183 if (len <= wlen && STRNCMP(ptr + wlen - len, p, len) == 0) 1184 return TRUE; 1185 } 1186 } 1187 return FALSE; 1188 } 1189 1190 /* 1191 * Return TRUE if "flags" is a valid sequence of compound flags and "word" 1192 * does not have too many syllables. 1193 */ 1194 static int 1195 can_compound(slang_T *slang, char_u *word, char_u *flags) 1196 { 1197 #ifdef FEAT_MBYTE 1198 char_u uflags[MAXWLEN * 2]; 1199 int i; 1200 #endif 1201 char_u *p; 1202 1203 if (slang->sl_compprog == NULL) 1204 return FALSE; 1205 #ifdef FEAT_MBYTE 1206 if (enc_utf8) 1207 { 1208 /* Need to convert the single byte flags to utf8 characters. */ 1209 p = uflags; 1210 for (i = 0; flags[i] != NUL; ++i) 1211 p += mb_char2bytes(flags[i], p); 1212 *p = NUL; 1213 p = uflags; 1214 } 1215 else 1216 #endif 1217 p = flags; 1218 if (!vim_regexec_prog(&slang->sl_compprog, FALSE, p, 0)) 1219 return FALSE; 1220 1221 /* Count the number of syllables. This may be slow, do it last. If there 1222 * are too many syllables AND the number of compound words is above 1223 * COMPOUNDWORDMAX then compounding is not allowed. */ 1224 if (slang->sl_compsylmax < MAXWLEN 1225 && count_syllables(slang, word) > slang->sl_compsylmax) 1226 return (int)STRLEN(flags) < slang->sl_compmax; 1227 return TRUE; 1228 } 1229 1230 /* 1231 * Return TRUE when the sequence of flags in "compflags" plus "flag" can 1232 * possibly form a valid compounded word. This also checks the COMPOUNDRULE 1233 * lines if they don't contain wildcards. 1234 */ 1235 static int 1236 can_be_compound( 1237 trystate_T *sp, 1238 slang_T *slang, 1239 char_u *compflags, 1240 int flag) 1241 { 1242 /* If the flag doesn't appear in sl_compstartflags or sl_compallflags 1243 * then it can't possibly compound. */ 1244 if (!byte_in_str(sp->ts_complen == sp->ts_compsplit 1245 ? slang->sl_compstartflags : slang->sl_compallflags, flag)) 1246 return FALSE; 1247 1248 /* If there are no wildcards, we can check if the flags collected so far 1249 * possibly can form a match with COMPOUNDRULE patterns. This only 1250 * makes sense when we have two or more words. */ 1251 if (slang->sl_comprules != NULL && sp->ts_complen > sp->ts_compsplit) 1252 { 1253 int v; 1254 1255 compflags[sp->ts_complen] = flag; 1256 compflags[sp->ts_complen + 1] = NUL; 1257 v = match_compoundrule(slang, compflags + sp->ts_compsplit); 1258 compflags[sp->ts_complen] = NUL; 1259 return v; 1260 } 1261 1262 return TRUE; 1263 } 1264 1265 1266 /* 1267 * Return TRUE if the compound flags in compflags[] match the start of any 1268 * compound rule. This is used to stop trying a compound if the flags 1269 * collected so far can't possibly match any compound rule. 1270 * Caller must check that slang->sl_comprules is not NULL. 1271 */ 1272 static int 1273 match_compoundrule(slang_T *slang, char_u *compflags) 1274 { 1275 char_u *p; 1276 int i; 1277 int c; 1278 1279 /* loop over all the COMPOUNDRULE entries */ 1280 for (p = slang->sl_comprules; *p != NUL; ++p) 1281 { 1282 /* loop over the flags in the compound word we have made, match 1283 * them against the current rule entry */ 1284 for (i = 0; ; ++i) 1285 { 1286 c = compflags[i]; 1287 if (c == NUL) 1288 /* found a rule that matches for the flags we have so far */ 1289 return TRUE; 1290 if (*p == '/' || *p == NUL) 1291 break; /* end of rule, it's too short */ 1292 if (*p == '[') 1293 { 1294 int match = FALSE; 1295 1296 /* compare against all the flags in [] */ 1297 ++p; 1298 while (*p != ']' && *p != NUL) 1299 if (*p++ == c) 1300 match = TRUE; 1301 if (!match) 1302 break; /* none matches */ 1303 } 1304 else if (*p != c) 1305 break; /* flag of word doesn't match flag in pattern */ 1306 ++p; 1307 } 1308 1309 /* Skip to the next "/", where the next pattern starts. */ 1310 p = vim_strchr(p, '/'); 1311 if (p == NULL) 1312 break; 1313 } 1314 1315 /* Checked all the rules and none of them match the flags, so there 1316 * can't possibly be a compound starting with these flags. */ 1317 return FALSE; 1318 } 1319 1320 /* 1321 * Return non-zero if the prefix indicated by "arridx" matches with the prefix 1322 * ID in "flags" for the word "word". 1323 * The WF_RAREPFX flag is included in the return value for a rare prefix. 1324 */ 1325 static int 1326 valid_word_prefix( 1327 int totprefcnt, /* nr of prefix IDs */ 1328 int arridx, /* idx in sl_pidxs[] */ 1329 int flags, 1330 char_u *word, 1331 slang_T *slang, 1332 int cond_req) /* only use prefixes with a condition */ 1333 { 1334 int prefcnt; 1335 int pidx; 1336 regprog_T **rp; 1337 int prefid; 1338 1339 prefid = (unsigned)flags >> 24; 1340 for (prefcnt = totprefcnt - 1; prefcnt >= 0; --prefcnt) 1341 { 1342 pidx = slang->sl_pidxs[arridx + prefcnt]; 1343 1344 /* Check the prefix ID. */ 1345 if (prefid != (pidx & 0xff)) 1346 continue; 1347 1348 /* Check if the prefix doesn't combine and the word already has a 1349 * suffix. */ 1350 if ((flags & WF_HAS_AFF) && (pidx & WF_PFX_NC)) 1351 continue; 1352 1353 /* Check the condition, if there is one. The condition index is 1354 * stored in the two bytes above the prefix ID byte. */ 1355 rp = &slang->sl_prefprog[((unsigned)pidx >> 8) & 0xffff]; 1356 if (*rp != NULL) 1357 { 1358 if (!vim_regexec_prog(rp, FALSE, word, 0)) 1359 continue; 1360 } 1361 else if (cond_req) 1362 continue; 1363 1364 /* It's a match! Return the WF_ flags. */ 1365 return pidx; 1366 } 1367 return 0; 1368 } 1369 1370 /* 1371 * Check if the word at "mip->mi_word" has a matching prefix. 1372 * If it does, then check the following word. 1373 * 1374 * If "mode" is "FIND_COMPOUND" then do the same after another word, find a 1375 * prefix in a compound word. 1376 * 1377 * For a match mip->mi_result is updated. 1378 */ 1379 static void 1380 find_prefix(matchinf_T *mip, int mode) 1381 { 1382 idx_T arridx = 0; 1383 int len; 1384 int wlen = 0; 1385 int flen; 1386 int c; 1387 char_u *ptr; 1388 idx_T lo, hi, m; 1389 slang_T *slang = mip->mi_lp->lp_slang; 1390 char_u *byts; 1391 idx_T *idxs; 1392 1393 byts = slang->sl_pbyts; 1394 if (byts == NULL) 1395 return; /* array is empty */ 1396 1397 /* We use the case-folded word here, since prefixes are always 1398 * case-folded. */ 1399 ptr = mip->mi_fword; 1400 flen = mip->mi_fwordlen; /* available case-folded bytes */ 1401 if (mode == FIND_COMPOUND) 1402 { 1403 /* Skip over the previously found word(s). */ 1404 ptr += mip->mi_compoff; 1405 flen -= mip->mi_compoff; 1406 } 1407 idxs = slang->sl_pidxs; 1408 1409 /* 1410 * Repeat advancing in the tree until: 1411 * - there is a byte that doesn't match, 1412 * - we reach the end of the tree, 1413 * - or we reach the end of the line. 1414 */ 1415 for (;;) 1416 { 1417 if (flen == 0 && *mip->mi_fend != NUL) 1418 flen = fold_more(mip); 1419 1420 len = byts[arridx++]; 1421 1422 /* If the first possible byte is a zero the prefix could end here. 1423 * Check if the following word matches and supports the prefix. */ 1424 if (byts[arridx] == 0) 1425 { 1426 /* There can be several prefixes with different conditions. We 1427 * try them all, since we don't know which one will give the 1428 * longest match. The word is the same each time, pass the list 1429 * of possible prefixes to find_word(). */ 1430 mip->mi_prefarridx = arridx; 1431 mip->mi_prefcnt = len; 1432 while (len > 0 && byts[arridx] == 0) 1433 { 1434 ++arridx; 1435 --len; 1436 } 1437 mip->mi_prefcnt -= len; 1438 1439 /* Find the word that comes after the prefix. */ 1440 mip->mi_prefixlen = wlen; 1441 if (mode == FIND_COMPOUND) 1442 /* Skip over the previously found word(s). */ 1443 mip->mi_prefixlen += mip->mi_compoff; 1444 1445 #ifdef FEAT_MBYTE 1446 if (has_mbyte) 1447 { 1448 /* Case-folded length may differ from original length. */ 1449 mip->mi_cprefixlen = nofold_len(mip->mi_fword, 1450 mip->mi_prefixlen, mip->mi_word); 1451 } 1452 else 1453 mip->mi_cprefixlen = mip->mi_prefixlen; 1454 #endif 1455 find_word(mip, FIND_PREFIX); 1456 1457 1458 if (len == 0) 1459 break; /* no children, word must end here */ 1460 } 1461 1462 /* Stop looking at end of the line. */ 1463 if (ptr[wlen] == NUL) 1464 break; 1465 1466 /* Perform a binary search in the list of accepted bytes. */ 1467 c = ptr[wlen]; 1468 lo = arridx; 1469 hi = arridx + len - 1; 1470 while (lo < hi) 1471 { 1472 m = (lo + hi) / 2; 1473 if (byts[m] > c) 1474 hi = m - 1; 1475 else if (byts[m] < c) 1476 lo = m + 1; 1477 else 1478 { 1479 lo = hi = m; 1480 break; 1481 } 1482 } 1483 1484 /* Stop if there is no matching byte. */ 1485 if (hi < lo || byts[lo] != c) 1486 break; 1487 1488 /* Continue at the child (if there is one). */ 1489 arridx = idxs[lo]; 1490 ++wlen; 1491 --flen; 1492 } 1493 } 1494 1495 /* 1496 * Need to fold at least one more character. Do until next non-word character 1497 * for efficiency. Include the non-word character too. 1498 * Return the length of the folded chars in bytes. 1499 */ 1500 static int 1501 fold_more(matchinf_T *mip) 1502 { 1503 int flen; 1504 char_u *p; 1505 1506 p = mip->mi_fend; 1507 do 1508 { 1509 mb_ptr_adv(mip->mi_fend); 1510 } while (*mip->mi_fend != NUL && spell_iswordp(mip->mi_fend, mip->mi_win)); 1511 1512 /* Include the non-word character so that we can check for the word end. */ 1513 if (*mip->mi_fend != NUL) 1514 mb_ptr_adv(mip->mi_fend); 1515 1516 (void)spell_casefold(p, (int)(mip->mi_fend - p), 1517 mip->mi_fword + mip->mi_fwordlen, 1518 MAXWLEN - mip->mi_fwordlen); 1519 flen = (int)STRLEN(mip->mi_fword + mip->mi_fwordlen); 1520 mip->mi_fwordlen += flen; 1521 return flen; 1522 } 1523 1524 /* 1525 * Check case flags for a word. Return TRUE if the word has the requested 1526 * case. 1527 */ 1528 static int 1529 spell_valid_case( 1530 int wordflags, /* flags for the checked word. */ 1531 int treeflags) /* flags for the word in the spell tree */ 1532 { 1533 return ((wordflags == WF_ALLCAP && (treeflags & WF_FIXCAP) == 0) 1534 || ((treeflags & (WF_ALLCAP | WF_KEEPCAP)) == 0 1535 && ((treeflags & WF_ONECAP) == 0 1536 || (wordflags & WF_ONECAP) != 0))); 1537 } 1538 1539 /* 1540 * Return TRUE if spell checking is not enabled. 1541 */ 1542 static int 1543 no_spell_checking(win_T *wp) 1544 { 1545 if (!wp->w_p_spell || *wp->w_s->b_p_spl == NUL 1546 || wp->w_s->b_langp.ga_len == 0) 1547 { 1548 EMSG(_("E756: Spell checking is not enabled")); 1549 return TRUE; 1550 } 1551 return FALSE; 1552 } 1553 1554 /* 1555 * Move to next spell error. 1556 * "curline" is FALSE for "[s", "]s", "[S" and "]S". 1557 * "curline" is TRUE to find word under/after cursor in the same line. 1558 * For Insert mode completion "dir" is BACKWARD and "curline" is TRUE: move 1559 * to after badly spelled word before the cursor. 1560 * Return 0 if not found, length of the badly spelled word otherwise. 1561 */ 1562 int 1563 spell_move_to( 1564 win_T *wp, 1565 int dir, /* FORWARD or BACKWARD */ 1566 int allwords, /* TRUE for "[s"/"]s", FALSE for "[S"/"]S" */ 1567 int curline, 1568 hlf_T *attrp) /* return: attributes of bad word or NULL 1569 (only when "dir" is FORWARD) */ 1570 { 1571 linenr_T lnum; 1572 pos_T found_pos; 1573 int found_len = 0; 1574 char_u *line; 1575 char_u *p; 1576 char_u *endp; 1577 hlf_T attr; 1578 int len; 1579 #ifdef FEAT_SYN_HL 1580 int has_syntax = syntax_present(wp); 1581 #endif 1582 int col; 1583 int can_spell; 1584 char_u *buf = NULL; 1585 int buflen = 0; 1586 int skip = 0; 1587 int capcol = -1; 1588 int found_one = FALSE; 1589 int wrapped = FALSE; 1590 1591 if (no_spell_checking(wp)) 1592 return 0; 1593 1594 /* 1595 * Start looking for bad word at the start of the line, because we can't 1596 * start halfway a word, we don't know where it starts or ends. 1597 * 1598 * When searching backwards, we continue in the line to find the last 1599 * bad word (in the cursor line: before the cursor). 1600 * 1601 * We concatenate the start of the next line, so that wrapped words work 1602 * (e.g. "et<line-break>cetera"). Doesn't work when searching backwards 1603 * though... 1604 */ 1605 lnum = wp->w_cursor.lnum; 1606 clearpos(&found_pos); 1607 1608 while (!got_int) 1609 { 1610 line = ml_get_buf(wp->w_buffer, lnum, FALSE); 1611 1612 len = (int)STRLEN(line); 1613 if (buflen < len + MAXWLEN + 2) 1614 { 1615 vim_free(buf); 1616 buflen = len + MAXWLEN + 2; 1617 buf = alloc(buflen); 1618 if (buf == NULL) 1619 break; 1620 } 1621 1622 /* In first line check first word for Capital. */ 1623 if (lnum == 1) 1624 capcol = 0; 1625 1626 /* For checking first word with a capital skip white space. */ 1627 if (capcol == 0) 1628 capcol = (int)(skipwhite(line) - line); 1629 else if (curline && wp == curwin) 1630 { 1631 /* For spellbadword(): check if first word needs a capital. */ 1632 col = (int)(skipwhite(line) - line); 1633 if (check_need_cap(lnum, col)) 1634 capcol = col; 1635 1636 /* Need to get the line again, may have looked at the previous 1637 * one. */ 1638 line = ml_get_buf(wp->w_buffer, lnum, FALSE); 1639 } 1640 1641 /* Copy the line into "buf" and append the start of the next line if 1642 * possible. */ 1643 STRCPY(buf, line); 1644 if (lnum < wp->w_buffer->b_ml.ml_line_count) 1645 spell_cat_line(buf + STRLEN(buf), 1646 ml_get_buf(wp->w_buffer, lnum + 1, FALSE), MAXWLEN); 1647 1648 p = buf + skip; 1649 endp = buf + len; 1650 while (p < endp) 1651 { 1652 /* When searching backward don't search after the cursor. Unless 1653 * we wrapped around the end of the buffer. */ 1654 if (dir == BACKWARD 1655 && lnum == wp->w_cursor.lnum 1656 && !wrapped 1657 && (colnr_T)(p - buf) >= wp->w_cursor.col) 1658 break; 1659 1660 /* start of word */ 1661 attr = HLF_COUNT; 1662 len = spell_check(wp, p, &attr, &capcol, FALSE); 1663 1664 if (attr != HLF_COUNT) 1665 { 1666 /* We found a bad word. Check the attribute. */ 1667 if (allwords || attr == HLF_SPB) 1668 { 1669 /* When searching forward only accept a bad word after 1670 * the cursor. */ 1671 if (dir == BACKWARD 1672 || lnum != wp->w_cursor.lnum 1673 || (lnum == wp->w_cursor.lnum 1674 && (wrapped 1675 || (colnr_T)(curline ? p - buf + len 1676 : p - buf) 1677 > wp->w_cursor.col))) 1678 { 1679 #ifdef FEAT_SYN_HL 1680 if (has_syntax) 1681 { 1682 col = (int)(p - buf); 1683 (void)syn_get_id(wp, lnum, (colnr_T)col, 1684 FALSE, &can_spell, FALSE); 1685 if (!can_spell) 1686 attr = HLF_COUNT; 1687 } 1688 else 1689 #endif 1690 can_spell = TRUE; 1691 1692 if (can_spell) 1693 { 1694 found_one = TRUE; 1695 found_pos.lnum = lnum; 1696 found_pos.col = (int)(p - buf); 1697 #ifdef FEAT_VIRTUALEDIT 1698 found_pos.coladd = 0; 1699 #endif 1700 if (dir == FORWARD) 1701 { 1702 /* No need to search further. */ 1703 wp->w_cursor = found_pos; 1704 vim_free(buf); 1705 if (attrp != NULL) 1706 *attrp = attr; 1707 return len; 1708 } 1709 else if (curline) 1710 /* Insert mode completion: put cursor after 1711 * the bad word. */ 1712 found_pos.col += len; 1713 found_len = len; 1714 } 1715 } 1716 else 1717 found_one = TRUE; 1718 } 1719 } 1720 1721 /* advance to character after the word */ 1722 p += len; 1723 capcol -= len; 1724 } 1725 1726 if (dir == BACKWARD && found_pos.lnum != 0) 1727 { 1728 /* Use the last match in the line (before the cursor). */ 1729 wp->w_cursor = found_pos; 1730 vim_free(buf); 1731 return found_len; 1732 } 1733 1734 if (curline) 1735 break; /* only check cursor line */ 1736 1737 /* Advance to next line. */ 1738 if (dir == BACKWARD) 1739 { 1740 /* If we are back at the starting line and searched it again there 1741 * is no match, give up. */ 1742 if (lnum == wp->w_cursor.lnum && wrapped) 1743 break; 1744 1745 if (lnum > 1) 1746 --lnum; 1747 else if (!p_ws) 1748 break; /* at first line and 'nowrapscan' */ 1749 else 1750 { 1751 /* Wrap around to the end of the buffer. May search the 1752 * starting line again and accept the last match. */ 1753 lnum = wp->w_buffer->b_ml.ml_line_count; 1754 wrapped = TRUE; 1755 if (!shortmess(SHM_SEARCH)) 1756 give_warning((char_u *)_(top_bot_msg), TRUE); 1757 } 1758 capcol = -1; 1759 } 1760 else 1761 { 1762 if (lnum < wp->w_buffer->b_ml.ml_line_count) 1763 ++lnum; 1764 else if (!p_ws) 1765 break; /* at first line and 'nowrapscan' */ 1766 else 1767 { 1768 /* Wrap around to the start of the buffer. May search the 1769 * starting line again and accept the first match. */ 1770 lnum = 1; 1771 wrapped = TRUE; 1772 if (!shortmess(SHM_SEARCH)) 1773 give_warning((char_u *)_(bot_top_msg), TRUE); 1774 } 1775 1776 /* If we are back at the starting line and there is no match then 1777 * give up. */ 1778 if (lnum == wp->w_cursor.lnum && (!found_one || wrapped)) 1779 break; 1780 1781 /* Skip the characters at the start of the next line that were 1782 * included in a match crossing line boundaries. */ 1783 if (attr == HLF_COUNT) 1784 skip = (int)(p - endp); 1785 else 1786 skip = 0; 1787 1788 /* Capcol skips over the inserted space. */ 1789 --capcol; 1790 1791 /* But after empty line check first word in next line */ 1792 if (*skipwhite(line) == NUL) 1793 capcol = 0; 1794 } 1795 1796 line_breakcheck(); 1797 } 1798 1799 vim_free(buf); 1800 return 0; 1801 } 1802 1803 /* 1804 * For spell checking: concatenate the start of the following line "line" into 1805 * "buf", blanking-out special characters. Copy less then "maxlen" bytes. 1806 * Keep the blanks at the start of the next line, this is used in win_line() 1807 * to skip those bytes if the word was OK. 1808 */ 1809 void 1810 spell_cat_line(char_u *buf, char_u *line, int maxlen) 1811 { 1812 char_u *p; 1813 int n; 1814 1815 p = skipwhite(line); 1816 while (vim_strchr((char_u *)"*#/\"\t", *p) != NULL) 1817 p = skipwhite(p + 1); 1818 1819 if (*p != NUL) 1820 { 1821 /* Only worth concatenating if there is something else than spaces to 1822 * concatenate. */ 1823 n = (int)(p - line) + 1; 1824 if (n < maxlen - 1) 1825 { 1826 vim_memset(buf, ' ', n); 1827 vim_strncpy(buf + n, p, maxlen - 1 - n); 1828 } 1829 } 1830 } 1831 1832 /* 1833 * Structure used for the cookie argument of do_in_runtimepath(). 1834 */ 1835 typedef struct spelload_S 1836 { 1837 char_u sl_lang[MAXWLEN + 1]; /* language name */ 1838 slang_T *sl_slang; /* resulting slang_T struct */ 1839 int sl_nobreak; /* NOBREAK language found */ 1840 } spelload_T; 1841 1842 /* 1843 * Load word list(s) for "lang" from Vim spell file(s). 1844 * "lang" must be the language without the region: e.g., "en". 1845 */ 1846 static void 1847 spell_load_lang(char_u *lang) 1848 { 1849 char_u fname_enc[85]; 1850 int r; 1851 spelload_T sl; 1852 #ifdef FEAT_AUTOCMD 1853 int round; 1854 #endif 1855 1856 /* Copy the language name to pass it to spell_load_cb() as a cookie. 1857 * It's truncated when an error is detected. */ 1858 STRCPY(sl.sl_lang, lang); 1859 sl.sl_slang = NULL; 1860 sl.sl_nobreak = FALSE; 1861 1862 #ifdef FEAT_AUTOCMD 1863 /* We may retry when no spell file is found for the language, an 1864 * autocommand may load it then. */ 1865 for (round = 1; round <= 2; ++round) 1866 #endif 1867 { 1868 /* 1869 * Find the first spell file for "lang" in 'runtimepath' and load it. 1870 */ 1871 vim_snprintf((char *)fname_enc, sizeof(fname_enc) - 5, 1872 #ifdef VMS 1873 "spell/%s_%s.spl", 1874 #else 1875 "spell/%s.%s.spl", 1876 #endif 1877 lang, spell_enc()); 1878 r = do_in_runtimepath(fname_enc, 0, spell_load_cb, &sl); 1879 1880 if (r == FAIL && *sl.sl_lang != NUL) 1881 { 1882 /* Try loading the ASCII version. */ 1883 vim_snprintf((char *)fname_enc, sizeof(fname_enc) - 5, 1884 #ifdef VMS 1885 "spell/%s_ascii.spl", 1886 #else 1887 "spell/%s.ascii.spl", 1888 #endif 1889 lang); 1890 r = do_in_runtimepath(fname_enc, 0, spell_load_cb, &sl); 1891 1892 #ifdef FEAT_AUTOCMD 1893 if (r == FAIL && *sl.sl_lang != NUL && round == 1 1894 && apply_autocmds(EVENT_SPELLFILEMISSING, lang, 1895 curbuf->b_fname, FALSE, curbuf)) 1896 continue; 1897 break; 1898 #endif 1899 } 1900 #ifdef FEAT_AUTOCMD 1901 break; 1902 #endif 1903 } 1904 1905 if (r == FAIL) 1906 { 1907 smsg((char_u *) 1908 #ifdef VMS 1909 _("Warning: Cannot find word list \"%s_%s.spl\" or \"%s_ascii.spl\""), 1910 #else 1911 _("Warning: Cannot find word list \"%s.%s.spl\" or \"%s.ascii.spl\""), 1912 #endif 1913 lang, spell_enc(), lang); 1914 } 1915 else if (sl.sl_slang != NULL) 1916 { 1917 /* At least one file was loaded, now load ALL the additions. */ 1918 STRCPY(fname_enc + STRLEN(fname_enc) - 3, "add.spl"); 1919 do_in_runtimepath(fname_enc, DIP_ALL, spell_load_cb, &sl); 1920 } 1921 } 1922 1923 /* 1924 * Return the encoding used for spell checking: Use 'encoding', except that we 1925 * use "latin1" for "latin9". And limit to 60 characters (just in case). 1926 */ 1927 char_u * 1928 spell_enc(void) 1929 { 1930 1931 #ifdef FEAT_MBYTE 1932 if (STRLEN(p_enc) < 60 && STRCMP(p_enc, "iso-8859-15") != 0) 1933 return p_enc; 1934 #endif 1935 return (char_u *)"latin1"; 1936 } 1937 1938 /* 1939 * Get the name of the .spl file for the internal wordlist into 1940 * "fname[MAXPATHL]". 1941 */ 1942 static void 1943 int_wordlist_spl(char_u *fname) 1944 { 1945 vim_snprintf((char *)fname, MAXPATHL, SPL_FNAME_TMPL, 1946 int_wordlist, spell_enc()); 1947 } 1948 1949 /* 1950 * Allocate a new slang_T for language "lang". "lang" can be NULL. 1951 * Caller must fill "sl_next". 1952 */ 1953 slang_T * 1954 slang_alloc(char_u *lang) 1955 { 1956 slang_T *lp; 1957 1958 lp = (slang_T *)alloc_clear(sizeof(slang_T)); 1959 if (lp != NULL) 1960 { 1961 if (lang != NULL) 1962 lp->sl_name = vim_strsave(lang); 1963 ga_init2(&lp->sl_rep, sizeof(fromto_T), 10); 1964 ga_init2(&lp->sl_repsal, sizeof(fromto_T), 10); 1965 lp->sl_compmax = MAXWLEN; 1966 lp->sl_compsylmax = MAXWLEN; 1967 hash_init(&lp->sl_wordcount); 1968 } 1969 1970 return lp; 1971 } 1972 1973 /* 1974 * Free the contents of an slang_T and the structure itself. 1975 */ 1976 void 1977 slang_free(slang_T *lp) 1978 { 1979 vim_free(lp->sl_name); 1980 vim_free(lp->sl_fname); 1981 slang_clear(lp); 1982 vim_free(lp); 1983 } 1984 1985 /* 1986 * Clear an slang_T so that the file can be reloaded. 1987 */ 1988 void 1989 slang_clear(slang_T *lp) 1990 { 1991 garray_T *gap; 1992 fromto_T *ftp; 1993 salitem_T *smp; 1994 int i; 1995 int round; 1996 1997 vim_free(lp->sl_fbyts); 1998 lp->sl_fbyts = NULL; 1999 vim_free(lp->sl_kbyts); 2000 lp->sl_kbyts = NULL; 2001 vim_free(lp->sl_pbyts); 2002 lp->sl_pbyts = NULL; 2003 2004 vim_free(lp->sl_fidxs); 2005 lp->sl_fidxs = NULL; 2006 vim_free(lp->sl_kidxs); 2007 lp->sl_kidxs = NULL; 2008 vim_free(lp->sl_pidxs); 2009 lp->sl_pidxs = NULL; 2010 2011 for (round = 1; round <= 2; ++round) 2012 { 2013 gap = round == 1 ? &lp->sl_rep : &lp->sl_repsal; 2014 while (gap->ga_len > 0) 2015 { 2016 ftp = &((fromto_T *)gap->ga_data)[--gap->ga_len]; 2017 vim_free(ftp->ft_from); 2018 vim_free(ftp->ft_to); 2019 } 2020 ga_clear(gap); 2021 } 2022 2023 gap = &lp->sl_sal; 2024 if (lp->sl_sofo) 2025 { 2026 /* "ga_len" is set to 1 without adding an item for latin1 */ 2027 if (gap->ga_data != NULL) 2028 /* SOFOFROM and SOFOTO items: free lists of wide characters. */ 2029 for (i = 0; i < gap->ga_len; ++i) 2030 vim_free(((int **)gap->ga_data)[i]); 2031 } 2032 else 2033 /* SAL items: free salitem_T items */ 2034 while (gap->ga_len > 0) 2035 { 2036 smp = &((salitem_T *)gap->ga_data)[--gap->ga_len]; 2037 vim_free(smp->sm_lead); 2038 /* Don't free sm_oneof and sm_rules, they point into sm_lead. */ 2039 vim_free(smp->sm_to); 2040 #ifdef FEAT_MBYTE 2041 vim_free(smp->sm_lead_w); 2042 vim_free(smp->sm_oneof_w); 2043 vim_free(smp->sm_to_w); 2044 #endif 2045 } 2046 ga_clear(gap); 2047 2048 for (i = 0; i < lp->sl_prefixcnt; ++i) 2049 vim_regfree(lp->sl_prefprog[i]); 2050 lp->sl_prefixcnt = 0; 2051 vim_free(lp->sl_prefprog); 2052 lp->sl_prefprog = NULL; 2053 2054 vim_free(lp->sl_info); 2055 lp->sl_info = NULL; 2056 2057 vim_free(lp->sl_midword); 2058 lp->sl_midword = NULL; 2059 2060 vim_regfree(lp->sl_compprog); 2061 vim_free(lp->sl_comprules); 2062 vim_free(lp->sl_compstartflags); 2063 vim_free(lp->sl_compallflags); 2064 lp->sl_compprog = NULL; 2065 lp->sl_comprules = NULL; 2066 lp->sl_compstartflags = NULL; 2067 lp->sl_compallflags = NULL; 2068 2069 vim_free(lp->sl_syllable); 2070 lp->sl_syllable = NULL; 2071 ga_clear(&lp->sl_syl_items); 2072 2073 ga_clear_strings(&lp->sl_comppat); 2074 2075 hash_clear_all(&lp->sl_wordcount, WC_KEY_OFF); 2076 hash_init(&lp->sl_wordcount); 2077 2078 #ifdef FEAT_MBYTE 2079 hash_clear_all(&lp->sl_map_hash, 0); 2080 #endif 2081 2082 /* Clear info from .sug file. */ 2083 slang_clear_sug(lp); 2084 2085 lp->sl_compmax = MAXWLEN; 2086 lp->sl_compminlen = 0; 2087 lp->sl_compsylmax = MAXWLEN; 2088 lp->sl_regions[0] = NUL; 2089 } 2090 2091 /* 2092 * Clear the info from the .sug file in "lp". 2093 */ 2094 void 2095 slang_clear_sug(slang_T *lp) 2096 { 2097 vim_free(lp->sl_sbyts); 2098 lp->sl_sbyts = NULL; 2099 vim_free(lp->sl_sidxs); 2100 lp->sl_sidxs = NULL; 2101 close_spellbuf(lp->sl_sugbuf); 2102 lp->sl_sugbuf = NULL; 2103 lp->sl_sugloaded = FALSE; 2104 lp->sl_sugtime = 0; 2105 } 2106 2107 /* 2108 * Load one spell file and store the info into a slang_T. 2109 * Invoked through do_in_runtimepath(). 2110 */ 2111 static void 2112 spell_load_cb(char_u *fname, void *cookie) 2113 { 2114 spelload_T *slp = (spelload_T *)cookie; 2115 slang_T *slang; 2116 2117 slang = spell_load_file(fname, slp->sl_lang, NULL, FALSE); 2118 if (slang != NULL) 2119 { 2120 /* When a previously loaded file has NOBREAK also use it for the 2121 * ".add" files. */ 2122 if (slp->sl_nobreak && slang->sl_add) 2123 slang->sl_nobreak = TRUE; 2124 else if (slang->sl_nobreak) 2125 slp->sl_nobreak = TRUE; 2126 2127 slp->sl_slang = slang; 2128 } 2129 } 2130 2131 2132 /* 2133 * Add a word to the hashtable of common words. 2134 * If it's already there then the counter is increased. 2135 */ 2136 void 2137 count_common_word( 2138 slang_T *lp, 2139 char_u *word, 2140 int len, /* word length, -1 for upto NUL */ 2141 int count) /* 1 to count once, 10 to init */ 2142 { 2143 hash_T hash; 2144 hashitem_T *hi; 2145 wordcount_T *wc; 2146 char_u buf[MAXWLEN]; 2147 char_u *p; 2148 2149 if (len == -1) 2150 p = word; 2151 else 2152 { 2153 vim_strncpy(buf, word, len); 2154 p = buf; 2155 } 2156 2157 hash = hash_hash(p); 2158 hi = hash_lookup(&lp->sl_wordcount, p, hash); 2159 if (HASHITEM_EMPTY(hi)) 2160 { 2161 wc = (wordcount_T *)alloc((unsigned)(sizeof(wordcount_T) + STRLEN(p))); 2162 if (wc == NULL) 2163 return; 2164 STRCPY(wc->wc_word, p); 2165 wc->wc_count = count; 2166 hash_add_item(&lp->sl_wordcount, hi, wc->wc_word, hash); 2167 } 2168 else 2169 { 2170 wc = HI2WC(hi); 2171 if ((wc->wc_count += count) < (unsigned)count) /* check for overflow */ 2172 wc->wc_count = MAXWORDCOUNT; 2173 } 2174 } 2175 2176 /* 2177 * Adjust the score of common words. 2178 */ 2179 static int 2180 score_wordcount_adj( 2181 slang_T *slang, 2182 int score, 2183 char_u *word, 2184 int split) /* word was split, less bonus */ 2185 { 2186 hashitem_T *hi; 2187 wordcount_T *wc; 2188 int bonus; 2189 int newscore; 2190 2191 hi = hash_find(&slang->sl_wordcount, word); 2192 if (!HASHITEM_EMPTY(hi)) 2193 { 2194 wc = HI2WC(hi); 2195 if (wc->wc_count < SCORE_THRES2) 2196 bonus = SCORE_COMMON1; 2197 else if (wc->wc_count < SCORE_THRES3) 2198 bonus = SCORE_COMMON2; 2199 else 2200 bonus = SCORE_COMMON3; 2201 if (split) 2202 newscore = score - bonus / 2; 2203 else 2204 newscore = score - bonus; 2205 if (newscore < 0) 2206 return 0; 2207 return newscore; 2208 } 2209 return score; 2210 } 2211 2212 2213 /* 2214 * Return TRUE if byte "n" appears in "str". 2215 * Like strchr() but independent of locale. 2216 */ 2217 int 2218 byte_in_str(char_u *str, int n) 2219 { 2220 char_u *p; 2221 2222 for (p = str; *p != NUL; ++p) 2223 if (*p == n) 2224 return TRUE; 2225 return FALSE; 2226 } 2227 2228 #define SY_MAXLEN 30 2229 typedef struct syl_item_S 2230 { 2231 char_u sy_chars[SY_MAXLEN]; /* the sequence of chars */ 2232 int sy_len; 2233 } syl_item_T; 2234 2235 /* 2236 * Truncate "slang->sl_syllable" at the first slash and put the following items 2237 * in "slang->sl_syl_items". 2238 */ 2239 int 2240 init_syl_tab(slang_T *slang) 2241 { 2242 char_u *p; 2243 char_u *s; 2244 int l; 2245 syl_item_T *syl; 2246 2247 ga_init2(&slang->sl_syl_items, sizeof(syl_item_T), 4); 2248 p = vim_strchr(slang->sl_syllable, '/'); 2249 while (p != NULL) 2250 { 2251 *p++ = NUL; 2252 if (*p == NUL) /* trailing slash */ 2253 break; 2254 s = p; 2255 p = vim_strchr(p, '/'); 2256 if (p == NULL) 2257 l = (int)STRLEN(s); 2258 else 2259 l = (int)(p - s); 2260 if (l >= SY_MAXLEN) 2261 return SP_FORMERROR; 2262 if (ga_grow(&slang->sl_syl_items, 1) == FAIL) 2263 return SP_OTHERERROR; 2264 syl = ((syl_item_T *)slang->sl_syl_items.ga_data) 2265 + slang->sl_syl_items.ga_len++; 2266 vim_strncpy(syl->sy_chars, s, l); 2267 syl->sy_len = l; 2268 } 2269 return OK; 2270 } 2271 2272 /* 2273 * Count the number of syllables in "word". 2274 * When "word" contains spaces the syllables after the last space are counted. 2275 * Returns zero if syllables are not defines. 2276 */ 2277 static int 2278 count_syllables(slang_T *slang, char_u *word) 2279 { 2280 int cnt = 0; 2281 int skip = FALSE; 2282 char_u *p; 2283 int len; 2284 int i; 2285 syl_item_T *syl; 2286 int c; 2287 2288 if (slang->sl_syllable == NULL) 2289 return 0; 2290 2291 for (p = word; *p != NUL; p += len) 2292 { 2293 /* When running into a space reset counter. */ 2294 if (*p == ' ') 2295 { 2296 len = 1; 2297 cnt = 0; 2298 continue; 2299 } 2300 2301 /* Find longest match of syllable items. */ 2302 len = 0; 2303 for (i = 0; i < slang->sl_syl_items.ga_len; ++i) 2304 { 2305 syl = ((syl_item_T *)slang->sl_syl_items.ga_data) + i; 2306 if (syl->sy_len > len 2307 && STRNCMP(p, syl->sy_chars, syl->sy_len) == 0) 2308 len = syl->sy_len; 2309 } 2310 if (len != 0) /* found a match, count syllable */ 2311 { 2312 ++cnt; 2313 skip = FALSE; 2314 } 2315 else 2316 { 2317 /* No recognized syllable item, at least a syllable char then? */ 2318 #ifdef FEAT_MBYTE 2319 c = mb_ptr2char(p); 2320 len = (*mb_ptr2len)(p); 2321 #else 2322 c = *p; 2323 len = 1; 2324 #endif 2325 if (vim_strchr(slang->sl_syllable, c) == NULL) 2326 skip = FALSE; /* No, search for next syllable */ 2327 else if (!skip) 2328 { 2329 ++cnt; /* Yes, count it */ 2330 skip = TRUE; /* don't count following syllable chars */ 2331 } 2332 } 2333 } 2334 return cnt; 2335 } 2336 2337 /* 2338 * Parse 'spelllang' and set w_s->b_langp accordingly. 2339 * Returns NULL if it's OK, an error message otherwise. 2340 */ 2341 char_u * 2342 did_set_spelllang(win_T *wp) 2343 { 2344 garray_T ga; 2345 char_u *splp; 2346 char_u *region; 2347 char_u region_cp[3]; 2348 int filename; 2349 int region_mask; 2350 slang_T *slang; 2351 int c; 2352 char_u lang[MAXWLEN + 1]; 2353 char_u spf_name[MAXPATHL]; 2354 int len; 2355 char_u *p; 2356 int round; 2357 char_u *spf; 2358 char_u *use_region = NULL; 2359 int dont_use_region = FALSE; 2360 int nobreak = FALSE; 2361 int i, j; 2362 langp_T *lp, *lp2; 2363 static int recursive = FALSE; 2364 char_u *ret_msg = NULL; 2365 char_u *spl_copy; 2366 #ifdef FEAT_AUTOCMD 2367 bufref_T bufref; 2368 2369 set_bufref(&bufref, wp->w_buffer); 2370 #endif 2371 2372 /* We don't want to do this recursively. May happen when a language is 2373 * not available and the SpellFileMissing autocommand opens a new buffer 2374 * in which 'spell' is set. */ 2375 if (recursive) 2376 return NULL; 2377 recursive = TRUE; 2378 2379 ga_init2(&ga, sizeof(langp_T), 2); 2380 clear_midword(wp); 2381 2382 /* Make a copy of 'spelllang', the SpellFileMissing autocommands may change 2383 * it under our fingers. */ 2384 spl_copy = vim_strsave(wp->w_s->b_p_spl); 2385 if (spl_copy == NULL) 2386 goto theend; 2387 2388 #ifdef FEAT_MBYTE 2389 wp->w_s->b_cjk = 0; 2390 #endif 2391 2392 /* Loop over comma separated language names. */ 2393 for (splp = spl_copy; *splp != NUL; ) 2394 { 2395 /* Get one language name. */ 2396 copy_option_part(&splp, lang, MAXWLEN, ","); 2397 region = NULL; 2398 len = (int)STRLEN(lang); 2399 2400 if (STRCMP(lang, "cjk") == 0) 2401 { 2402 #ifdef FEAT_MBYTE 2403 wp->w_s->b_cjk = 1; 2404 #endif 2405 continue; 2406 } 2407 2408 /* If the name ends in ".spl" use it as the name of the spell file. 2409 * If there is a region name let "region" point to it and remove it 2410 * from the name. */ 2411 if (len > 4 && fnamecmp(lang + len - 4, ".spl") == 0) 2412 { 2413 filename = TRUE; 2414 2415 /* Locate a region and remove it from the file name. */ 2416 p = vim_strchr(gettail(lang), '_'); 2417 if (p != NULL && ASCII_ISALPHA(p[1]) && ASCII_ISALPHA(p[2]) 2418 && !ASCII_ISALPHA(p[3])) 2419 { 2420 vim_strncpy(region_cp, p + 1, 2); 2421 mch_memmove(p, p + 3, len - (p - lang) - 2); 2422 len -= 3; 2423 region = region_cp; 2424 } 2425 else 2426 dont_use_region = TRUE; 2427 2428 /* Check if we loaded this language before. */ 2429 for (slang = first_lang; slang != NULL; slang = slang->sl_next) 2430 if (fullpathcmp(lang, slang->sl_fname, FALSE) == FPC_SAME) 2431 break; 2432 } 2433 else 2434 { 2435 filename = FALSE; 2436 if (len > 3 && lang[len - 3] == '_') 2437 { 2438 region = lang + len - 2; 2439 len -= 3; 2440 lang[len] = NUL; 2441 } 2442 else 2443 dont_use_region = TRUE; 2444 2445 /* Check if we loaded this language before. */ 2446 for (slang = first_lang; slang != NULL; slang = slang->sl_next) 2447 if (STRICMP(lang, slang->sl_name) == 0) 2448 break; 2449 } 2450 2451 if (region != NULL) 2452 { 2453 /* If the region differs from what was used before then don't 2454 * use it for 'spellfile'. */ 2455 if (use_region != NULL && STRCMP(region, use_region) != 0) 2456 dont_use_region = TRUE; 2457 use_region = region; 2458 } 2459 2460 /* If not found try loading the language now. */ 2461 if (slang == NULL) 2462 { 2463 if (filename) 2464 (void)spell_load_file(lang, lang, NULL, FALSE); 2465 else 2466 { 2467 spell_load_lang(lang); 2468 #ifdef FEAT_AUTOCMD 2469 /* SpellFileMissing autocommands may do anything, including 2470 * destroying the buffer we are using... */ 2471 if (!bufref_valid(&bufref)) 2472 { 2473 ret_msg = (char_u *)N_("E797: SpellFileMissing autocommand deleted buffer"); 2474 goto theend; 2475 } 2476 #endif 2477 } 2478 } 2479 2480 /* 2481 * Loop over the languages, there can be several files for "lang". 2482 */ 2483 for (slang = first_lang; slang != NULL; slang = slang->sl_next) 2484 if (filename ? fullpathcmp(lang, slang->sl_fname, FALSE) == FPC_SAME 2485 : STRICMP(lang, slang->sl_name) == 0) 2486 { 2487 region_mask = REGION_ALL; 2488 if (!filename && region != NULL) 2489 { 2490 /* find region in sl_regions */ 2491 c = find_region(slang->sl_regions, region); 2492 if (c == REGION_ALL) 2493 { 2494 if (slang->sl_add) 2495 { 2496 if (*slang->sl_regions != NUL) 2497 /* This addition file is for other regions. */ 2498 region_mask = 0; 2499 } 2500 else 2501 /* This is probably an error. Give a warning and 2502 * accept the words anyway. */ 2503 smsg((char_u *) 2504 _("Warning: region %s not supported"), 2505 region); 2506 } 2507 else 2508 region_mask = 1 << c; 2509 } 2510 2511 if (region_mask != 0) 2512 { 2513 if (ga_grow(&ga, 1) == FAIL) 2514 { 2515 ga_clear(&ga); 2516 ret_msg = e_outofmem; 2517 goto theend; 2518 } 2519 LANGP_ENTRY(ga, ga.ga_len)->lp_slang = slang; 2520 LANGP_ENTRY(ga, ga.ga_len)->lp_region = region_mask; 2521 ++ga.ga_len; 2522 use_midword(slang, wp); 2523 if (slang->sl_nobreak) 2524 nobreak = TRUE; 2525 } 2526 } 2527 } 2528 2529 /* round 0: load int_wordlist, if possible. 2530 * round 1: load first name in 'spellfile'. 2531 * round 2: load second name in 'spellfile. 2532 * etc. */ 2533 spf = curwin->w_s->b_p_spf; 2534 for (round = 0; round == 0 || *spf != NUL; ++round) 2535 { 2536 if (round == 0) 2537 { 2538 /* Internal wordlist, if there is one. */ 2539 if (int_wordlist == NULL) 2540 continue; 2541 int_wordlist_spl(spf_name); 2542 } 2543 else 2544 { 2545 /* One entry in 'spellfile'. */ 2546 copy_option_part(&spf, spf_name, MAXPATHL - 5, ","); 2547 STRCAT(spf_name, ".spl"); 2548 2549 /* If it was already found above then skip it. */ 2550 for (c = 0; c < ga.ga_len; ++c) 2551 { 2552 p = LANGP_ENTRY(ga, c)->lp_slang->sl_fname; 2553 if (p != NULL && fullpathcmp(spf_name, p, FALSE) == FPC_SAME) 2554 break; 2555 } 2556 if (c < ga.ga_len) 2557 continue; 2558 } 2559 2560 /* Check if it was loaded already. */ 2561 for (slang = first_lang; slang != NULL; slang = slang->sl_next) 2562 if (fullpathcmp(spf_name, slang->sl_fname, FALSE) == FPC_SAME) 2563 break; 2564 if (slang == NULL) 2565 { 2566 /* Not loaded, try loading it now. The language name includes the 2567 * region name, the region is ignored otherwise. for int_wordlist 2568 * use an arbitrary name. */ 2569 if (round == 0) 2570 STRCPY(lang, "internal wordlist"); 2571 else 2572 { 2573 vim_strncpy(lang, gettail(spf_name), MAXWLEN); 2574 p = vim_strchr(lang, '.'); 2575 if (p != NULL) 2576 *p = NUL; /* truncate at ".encoding.add" */ 2577 } 2578 slang = spell_load_file(spf_name, lang, NULL, TRUE); 2579 2580 /* If one of the languages has NOBREAK we assume the addition 2581 * files also have this. */ 2582 if (slang != NULL && nobreak) 2583 slang->sl_nobreak = TRUE; 2584 } 2585 if (slang != NULL && ga_grow(&ga, 1) == OK) 2586 { 2587 region_mask = REGION_ALL; 2588 if (use_region != NULL && !dont_use_region) 2589 { 2590 /* find region in sl_regions */ 2591 c = find_region(slang->sl_regions, use_region); 2592 if (c != REGION_ALL) 2593 region_mask = 1 << c; 2594 else if (*slang->sl_regions != NUL) 2595 /* This spell file is for other regions. */ 2596 region_mask = 0; 2597 } 2598 2599 if (region_mask != 0) 2600 { 2601 LANGP_ENTRY(ga, ga.ga_len)->lp_slang = slang; 2602 LANGP_ENTRY(ga, ga.ga_len)->lp_sallang = NULL; 2603 LANGP_ENTRY(ga, ga.ga_len)->lp_replang = NULL; 2604 LANGP_ENTRY(ga, ga.ga_len)->lp_region = region_mask; 2605 ++ga.ga_len; 2606 use_midword(slang, wp); 2607 } 2608 } 2609 } 2610 2611 /* Everything is fine, store the new b_langp value. */ 2612 ga_clear(&wp->w_s->b_langp); 2613 wp->w_s->b_langp = ga; 2614 2615 /* For each language figure out what language to use for sound folding and 2616 * REP items. If the language doesn't support it itself use another one 2617 * with the same name. E.g. for "en-math" use "en". */ 2618 for (i = 0; i < ga.ga_len; ++i) 2619 { 2620 lp = LANGP_ENTRY(ga, i); 2621 2622 /* sound folding */ 2623 if (lp->lp_slang->sl_sal.ga_len > 0) 2624 /* language does sound folding itself */ 2625 lp->lp_sallang = lp->lp_slang; 2626 else 2627 /* find first similar language that does sound folding */ 2628 for (j = 0; j < ga.ga_len; ++j) 2629 { 2630 lp2 = LANGP_ENTRY(ga, j); 2631 if (lp2->lp_slang->sl_sal.ga_len > 0 2632 && STRNCMP(lp->lp_slang->sl_name, 2633 lp2->lp_slang->sl_name, 2) == 0) 2634 { 2635 lp->lp_sallang = lp2->lp_slang; 2636 break; 2637 } 2638 } 2639 2640 /* REP items */ 2641 if (lp->lp_slang->sl_rep.ga_len > 0) 2642 /* language has REP items itself */ 2643 lp->lp_replang = lp->lp_slang; 2644 else 2645 /* find first similar language that has REP items */ 2646 for (j = 0; j < ga.ga_len; ++j) 2647 { 2648 lp2 = LANGP_ENTRY(ga, j); 2649 if (lp2->lp_slang->sl_rep.ga_len > 0 2650 && STRNCMP(lp->lp_slang->sl_name, 2651 lp2->lp_slang->sl_name, 2) == 0) 2652 { 2653 lp->lp_replang = lp2->lp_slang; 2654 break; 2655 } 2656 } 2657 } 2658 2659 theend: 2660 vim_free(spl_copy); 2661 recursive = FALSE; 2662 redraw_win_later(wp, NOT_VALID); 2663 return ret_msg; 2664 } 2665 2666 /* 2667 * Clear the midword characters for buffer "buf". 2668 */ 2669 static void 2670 clear_midword(win_T *wp) 2671 { 2672 vim_memset(wp->w_s->b_spell_ismw, 0, 256); 2673 #ifdef FEAT_MBYTE 2674 vim_free(wp->w_s->b_spell_ismw_mb); 2675 wp->w_s->b_spell_ismw_mb = NULL; 2676 #endif 2677 } 2678 2679 /* 2680 * Use the "sl_midword" field of language "lp" for buffer "buf". 2681 * They add up to any currently used midword characters. 2682 */ 2683 static void 2684 use_midword(slang_T *lp, win_T *wp) 2685 { 2686 char_u *p; 2687 2688 if (lp->sl_midword == NULL) /* there aren't any */ 2689 return; 2690 2691 for (p = lp->sl_midword; *p != NUL; ) 2692 #ifdef FEAT_MBYTE 2693 if (has_mbyte) 2694 { 2695 int c, l, n; 2696 char_u *bp; 2697 2698 c = mb_ptr2char(p); 2699 l = (*mb_ptr2len)(p); 2700 if (c < 256 && l <= 2) 2701 wp->w_s->b_spell_ismw[c] = TRUE; 2702 else if (wp->w_s->b_spell_ismw_mb == NULL) 2703 /* First multi-byte char in "b_spell_ismw_mb". */ 2704 wp->w_s->b_spell_ismw_mb = vim_strnsave(p, l); 2705 else 2706 { 2707 /* Append multi-byte chars to "b_spell_ismw_mb". */ 2708 n = (int)STRLEN(wp->w_s->b_spell_ismw_mb); 2709 bp = vim_strnsave(wp->w_s->b_spell_ismw_mb, n + l); 2710 if (bp != NULL) 2711 { 2712 vim_free(wp->w_s->b_spell_ismw_mb); 2713 wp->w_s->b_spell_ismw_mb = bp; 2714 vim_strncpy(bp + n, p, l); 2715 } 2716 } 2717 p += l; 2718 } 2719 else 2720 #endif 2721 wp->w_s->b_spell_ismw[*p++] = TRUE; 2722 } 2723 2724 /* 2725 * Find the region "region[2]" in "rp" (points to "sl_regions"). 2726 * Each region is simply stored as the two characters of it's name. 2727 * Returns the index if found (first is 0), REGION_ALL if not found. 2728 */ 2729 static int 2730 find_region(char_u *rp, char_u *region) 2731 { 2732 int i; 2733 2734 for (i = 0; ; i += 2) 2735 { 2736 if (rp[i] == NUL) 2737 return REGION_ALL; 2738 if (rp[i] == region[0] && rp[i + 1] == region[1]) 2739 break; 2740 } 2741 return i / 2; 2742 } 2743 2744 /* 2745 * Return case type of word: 2746 * w word 0 2747 * Word WF_ONECAP 2748 * W WORD WF_ALLCAP 2749 * WoRd wOrd WF_KEEPCAP 2750 */ 2751 int 2752 captype( 2753 char_u *word, 2754 char_u *end) /* When NULL use up to NUL byte. */ 2755 { 2756 char_u *p; 2757 int c; 2758 int firstcap; 2759 int allcap; 2760 int past_second = FALSE; /* past second word char */ 2761 2762 /* find first letter */ 2763 for (p = word; !spell_iswordp_nmw(p, curwin); mb_ptr_adv(p)) 2764 if (end == NULL ? *p == NUL : p >= end) 2765 return 0; /* only non-word characters, illegal word */ 2766 #ifdef FEAT_MBYTE 2767 if (has_mbyte) 2768 c = mb_ptr2char_adv(&p); 2769 else 2770 #endif 2771 c = *p++; 2772 firstcap = allcap = SPELL_ISUPPER(c); 2773 2774 /* 2775 * Need to check all letters to find a word with mixed upper/lower. 2776 * But a word with an upper char only at start is a ONECAP. 2777 */ 2778 for ( ; end == NULL ? *p != NUL : p < end; mb_ptr_adv(p)) 2779 if (spell_iswordp_nmw(p, curwin)) 2780 { 2781 c = PTR2CHAR(p); 2782 if (!SPELL_ISUPPER(c)) 2783 { 2784 /* UUl -> KEEPCAP */ 2785 if (past_second && allcap) 2786 return WF_KEEPCAP; 2787 allcap = FALSE; 2788 } 2789 else if (!allcap) 2790 /* UlU -> KEEPCAP */ 2791 return WF_KEEPCAP; 2792 past_second = TRUE; 2793 } 2794 2795 if (allcap) 2796 return WF_ALLCAP; 2797 if (firstcap) 2798 return WF_ONECAP; 2799 return 0; 2800 } 2801 2802 /* 2803 * Like captype() but for a KEEPCAP word add ONECAP if the word starts with a 2804 * capital. So that make_case_word() can turn WOrd into Word. 2805 * Add ALLCAP for "WOrD". 2806 */ 2807 static int 2808 badword_captype(char_u *word, char_u *end) 2809 { 2810 int flags = captype(word, end); 2811 int c; 2812 int l, u; 2813 int first; 2814 char_u *p; 2815 2816 if (flags & WF_KEEPCAP) 2817 { 2818 /* Count the number of UPPER and lower case letters. */ 2819 l = u = 0; 2820 first = FALSE; 2821 for (p = word; p < end; mb_ptr_adv(p)) 2822 { 2823 c = PTR2CHAR(p); 2824 if (SPELL_ISUPPER(c)) 2825 { 2826 ++u; 2827 if (p == word) 2828 first = TRUE; 2829 } 2830 else 2831 ++l; 2832 } 2833 2834 /* If there are more UPPER than lower case letters suggest an 2835 * ALLCAP word. Otherwise, if the first letter is UPPER then 2836 * suggest ONECAP. Exception: "ALl" most likely should be "All", 2837 * require three upper case letters. */ 2838 if (u > l && u > 2) 2839 flags |= WF_ALLCAP; 2840 else if (first) 2841 flags |= WF_ONECAP; 2842 2843 if (u >= 2 && l >= 2) /* maCARONI maCAroni */ 2844 flags |= WF_MIXCAP; 2845 } 2846 return flags; 2847 } 2848 2849 /* 2850 * Delete the internal wordlist and its .spl file. 2851 */ 2852 void 2853 spell_delete_wordlist(void) 2854 { 2855 char_u fname[MAXPATHL]; 2856 2857 if (int_wordlist != NULL) 2858 { 2859 mch_remove(int_wordlist); 2860 int_wordlist_spl(fname); 2861 mch_remove(fname); 2862 vim_free(int_wordlist); 2863 int_wordlist = NULL; 2864 } 2865 } 2866 2867 #if defined(FEAT_MBYTE) || defined(EXITFREE) || defined(PROTO) 2868 /* 2869 * Free all languages. 2870 */ 2871 void 2872 spell_free_all(void) 2873 { 2874 slang_T *slang; 2875 buf_T *buf; 2876 2877 /* Go through all buffers and handle 'spelllang'. <VN> */ 2878 FOR_ALL_BUFFERS(buf) 2879 ga_clear(&buf->b_s.b_langp); 2880 2881 while (first_lang != NULL) 2882 { 2883 slang = first_lang; 2884 first_lang = slang->sl_next; 2885 slang_free(slang); 2886 } 2887 2888 spell_delete_wordlist(); 2889 2890 vim_free(repl_to); 2891 repl_to = NULL; 2892 vim_free(repl_from); 2893 repl_from = NULL; 2894 } 2895 #endif 2896 2897 #if defined(FEAT_MBYTE) || defined(PROTO) 2898 /* 2899 * Clear all spelling tables and reload them. 2900 * Used after 'encoding' is set and when ":mkspell" was used. 2901 */ 2902 void 2903 spell_reload(void) 2904 { 2905 win_T *wp; 2906 2907 /* Initialize the table for spell_iswordp(). */ 2908 init_spell_chartab(); 2909 2910 /* Unload all allocated memory. */ 2911 spell_free_all(); 2912 2913 /* Go through all buffers and handle 'spelllang'. */ 2914 FOR_ALL_WINDOWS(wp) 2915 { 2916 /* Only load the wordlists when 'spelllang' is set and there is a 2917 * window for this buffer in which 'spell' is set. */ 2918 if (*wp->w_s->b_p_spl != NUL) 2919 { 2920 if (wp->w_p_spell) 2921 { 2922 (void)did_set_spelllang(wp); 2923 # ifdef FEAT_WINDOWS 2924 break; 2925 # endif 2926 } 2927 } 2928 } 2929 } 2930 #endif 2931 2932 /* 2933 * Opposite of offset2bytes(). 2934 * "pp" points to the bytes and is advanced over it. 2935 * Returns the offset. 2936 */ 2937 static int 2938 bytes2offset(char_u **pp) 2939 { 2940 char_u *p = *pp; 2941 int nr; 2942 int c; 2943 2944 c = *p++; 2945 if ((c & 0x80) == 0x00) /* 1 byte */ 2946 { 2947 nr = c - 1; 2948 } 2949 else if ((c & 0xc0) == 0x80) /* 2 bytes */ 2950 { 2951 nr = (c & 0x3f) - 1; 2952 nr = nr * 255 + (*p++ - 1); 2953 } 2954 else if ((c & 0xe0) == 0xc0) /* 3 bytes */ 2955 { 2956 nr = (c & 0x1f) - 1; 2957 nr = nr * 255 + (*p++ - 1); 2958 nr = nr * 255 + (*p++ - 1); 2959 } 2960 else /* 4 bytes */ 2961 { 2962 nr = (c & 0x0f) - 1; 2963 nr = nr * 255 + (*p++ - 1); 2964 nr = nr * 255 + (*p++ - 1); 2965 nr = nr * 255 + (*p++ - 1); 2966 } 2967 2968 *pp = p; 2969 return nr; 2970 } 2971 2972 2973 /* 2974 * Open a spell buffer. This is a nameless buffer that is not in the buffer 2975 * list and only contains text lines. Can use a swapfile to reduce memory 2976 * use. 2977 * Most other fields are invalid! Esp. watch out for string options being 2978 * NULL and there is no undo info. 2979 * Returns NULL when out of memory. 2980 */ 2981 buf_T * 2982 open_spellbuf(void) 2983 { 2984 buf_T *buf; 2985 2986 buf = (buf_T *)alloc_clear(sizeof(buf_T)); 2987 if (buf != NULL) 2988 { 2989 buf->b_spell = TRUE; 2990 buf->b_p_swf = TRUE; /* may create a swap file */ 2991 #ifdef FEAT_CRYPT 2992 buf->b_p_key = empty_option; 2993 #endif 2994 ml_open(buf); 2995 ml_open_file(buf); /* create swap file now */ 2996 } 2997 return buf; 2998 } 2999 3000 /* 3001 * Close the buffer used for spell info. 3002 */ 3003 void 3004 close_spellbuf(buf_T *buf) 3005 { 3006 if (buf != NULL) 3007 { 3008 ml_close(buf, TRUE); 3009 vim_free(buf); 3010 } 3011 } 3012 3013 /* 3014 * Init the chartab used for spelling for ASCII. 3015 * EBCDIC is not supported! 3016 */ 3017 void 3018 clear_spell_chartab(spelltab_T *sp) 3019 { 3020 int i; 3021 3022 /* Init everything to FALSE. */ 3023 vim_memset(sp->st_isw, FALSE, sizeof(sp->st_isw)); 3024 vim_memset(sp->st_isu, FALSE, sizeof(sp->st_isu)); 3025 for (i = 0; i < 256; ++i) 3026 { 3027 sp->st_fold[i] = i; 3028 sp->st_upper[i] = i; 3029 } 3030 3031 /* We include digits. A word shouldn't start with a digit, but handling 3032 * that is done separately. */ 3033 for (i = '0'; i <= '9'; ++i) 3034 sp->st_isw[i] = TRUE; 3035 for (i = 'A'; i <= 'Z'; ++i) 3036 { 3037 sp->st_isw[i] = TRUE; 3038 sp->st_isu[i] = TRUE; 3039 sp->st_fold[i] = i + 0x20; 3040 } 3041 for (i = 'a'; i <= 'z'; ++i) 3042 { 3043 sp->st_isw[i] = TRUE; 3044 sp->st_upper[i] = i - 0x20; 3045 } 3046 } 3047 3048 /* 3049 * Init the chartab used for spelling. Only depends on 'encoding'. 3050 * Called once while starting up and when 'encoding' changes. 3051 * The default is to use isalpha(), but the spell file should define the word 3052 * characters to make it possible that 'encoding' differs from the current 3053 * locale. For utf-8 we don't use isalpha() but our own functions. 3054 */ 3055 void 3056 init_spell_chartab(void) 3057 { 3058 int i; 3059 3060 did_set_spelltab = FALSE; 3061 clear_spell_chartab(&spelltab); 3062 #ifdef FEAT_MBYTE 3063 if (enc_dbcs) 3064 { 3065 /* DBCS: assume double-wide characters are word characters. */ 3066 for (i = 128; i <= 255; ++i) 3067 if (MB_BYTE2LEN(i) == 2) 3068 spelltab.st_isw[i] = TRUE; 3069 } 3070 else if (enc_utf8) 3071 { 3072 for (i = 128; i < 256; ++i) 3073 { 3074 int f = utf_fold(i); 3075 int u = utf_toupper(i); 3076 3077 spelltab.st_isu[i] = utf_isupper(i); 3078 spelltab.st_isw[i] = spelltab.st_isu[i] || utf_islower(i); 3079 /* The folded/upper-cased value is different between latin1 and 3080 * utf8 for 0xb5, causing E763 for no good reason. Use the latin1 3081 * value for utf-8 to avoid this. */ 3082 spelltab.st_fold[i] = (f < 256) ? f : i; 3083 spelltab.st_upper[i] = (u < 256) ? u : i; 3084 } 3085 } 3086 else 3087 #endif 3088 { 3089 /* Rough guess: use locale-dependent library functions. */ 3090 for (i = 128; i < 256; ++i) 3091 { 3092 if (MB_ISUPPER(i)) 3093 { 3094 spelltab.st_isw[i] = TRUE; 3095 spelltab.st_isu[i] = TRUE; 3096 spelltab.st_fold[i] = MB_TOLOWER(i); 3097 } 3098 else if (MB_ISLOWER(i)) 3099 { 3100 spelltab.st_isw[i] = TRUE; 3101 spelltab.st_upper[i] = MB_TOUPPER(i); 3102 } 3103 } 3104 } 3105 } 3106 3107 3108 /* 3109 * Return TRUE if "p" points to a word character. 3110 * As a special case we see "midword" characters as word character when it is 3111 * followed by a word character. This finds they'there but not 'they there'. 3112 * Thus this only works properly when past the first character of the word. 3113 */ 3114 static int 3115 spell_iswordp( 3116 char_u *p, 3117 win_T *wp) /* buffer used */ 3118 { 3119 #ifdef FEAT_MBYTE 3120 char_u *s; 3121 int l; 3122 int c; 3123 3124 if (has_mbyte) 3125 { 3126 l = MB_BYTE2LEN(*p); 3127 s = p; 3128 if (l == 1) 3129 { 3130 /* be quick for ASCII */ 3131 if (wp->w_s->b_spell_ismw[*p]) 3132 s = p + 1; /* skip a mid-word character */ 3133 } 3134 else 3135 { 3136 c = mb_ptr2char(p); 3137 if (c < 256 ? wp->w_s->b_spell_ismw[c] 3138 : (wp->w_s->b_spell_ismw_mb != NULL 3139 && vim_strchr(wp->w_s->b_spell_ismw_mb, c) != NULL)) 3140 s = p + l; 3141 } 3142 3143 c = mb_ptr2char(s); 3144 if (c > 255) 3145 return spell_mb_isword_class(mb_get_class(s), wp); 3146 return spelltab.st_isw[c]; 3147 } 3148 #endif 3149 3150 return spelltab.st_isw[wp->w_s->b_spell_ismw[*p] ? p[1] : p[0]]; 3151 } 3152 3153 /* 3154 * Return TRUE if "p" points to a word character. 3155 * Unlike spell_iswordp() this doesn't check for "midword" characters. 3156 */ 3157 int 3158 spell_iswordp_nmw(char_u *p, win_T *wp) 3159 { 3160 #ifdef FEAT_MBYTE 3161 int c; 3162 3163 if (has_mbyte) 3164 { 3165 c = mb_ptr2char(p); 3166 if (c > 255) 3167 return spell_mb_isword_class(mb_get_class(p), wp); 3168 return spelltab.st_isw[c]; 3169 } 3170 #endif 3171 return spelltab.st_isw[*p]; 3172 } 3173 3174 #ifdef FEAT_MBYTE 3175 /* 3176 * Return TRUE if word class indicates a word character. 3177 * Only for characters above 255. 3178 * Unicode subscript and superscript are not considered word characters. 3179 * See also dbcs_class() and utf_class() in mbyte.c. 3180 */ 3181 static int 3182 spell_mb_isword_class(int cl, win_T *wp) 3183 { 3184 if (wp->w_s->b_cjk) 3185 /* East Asian characters are not considered word characters. */ 3186 return cl == 2 || cl == 0x2800; 3187 return cl >= 2 && cl != 0x2070 && cl != 0x2080; 3188 } 3189 3190 /* 3191 * Return TRUE if "p" points to a word character. 3192 * Wide version of spell_iswordp(). 3193 */ 3194 static int 3195 spell_iswordp_w(int *p, win_T *wp) 3196 { 3197 int *s; 3198 3199 if (*p < 256 ? wp->w_s->b_spell_ismw[*p] 3200 : (wp->w_s->b_spell_ismw_mb != NULL 3201 && vim_strchr(wp->w_s->b_spell_ismw_mb, *p) != NULL)) 3202 s = p + 1; 3203 else 3204 s = p; 3205 3206 if (*s > 255) 3207 { 3208 if (enc_utf8) 3209 return spell_mb_isword_class(utf_class(*s), wp); 3210 if (enc_dbcs) 3211 return spell_mb_isword_class( 3212 dbcs_class((unsigned)*s >> 8, *s & 0xff), wp); 3213 return 0; 3214 } 3215 return spelltab.st_isw[*s]; 3216 } 3217 #endif 3218 3219 /* 3220 * Case-fold "str[len]" into "buf[buflen]". The result is NUL terminated. 3221 * Uses the character definitions from the .spl file. 3222 * When using a multi-byte 'encoding' the length may change! 3223 * Returns FAIL when something wrong. 3224 */ 3225 int 3226 spell_casefold( 3227 char_u *str, 3228 int len, 3229 char_u *buf, 3230 int buflen) 3231 { 3232 int i; 3233 3234 if (len >= buflen) 3235 { 3236 buf[0] = NUL; 3237 return FAIL; /* result will not fit */ 3238 } 3239 3240 #ifdef FEAT_MBYTE 3241 if (has_mbyte) 3242 { 3243 int outi = 0; 3244 char_u *p; 3245 int c; 3246 3247 /* Fold one character at a time. */ 3248 for (p = str; p < str + len; ) 3249 { 3250 if (outi + MB_MAXBYTES > buflen) 3251 { 3252 buf[outi] = NUL; 3253 return FAIL; 3254 } 3255 c = mb_cptr2char_adv(&p); 3256 outi += mb_char2bytes(SPELL_TOFOLD(c), buf + outi); 3257 } 3258 buf[outi] = NUL; 3259 } 3260 else 3261 #endif 3262 { 3263 /* Be quick for non-multibyte encodings. */ 3264 for (i = 0; i < len; ++i) 3265 buf[i] = spelltab.st_fold[str[i]]; 3266 buf[i] = NUL; 3267 } 3268 3269 return OK; 3270 } 3271 3272 /* values for sps_flags */ 3273 #define SPS_BEST 1 3274 #define SPS_FAST 2 3275 #define SPS_DOUBLE 4 3276 3277 static int sps_flags = SPS_BEST; /* flags from 'spellsuggest' */ 3278 static int sps_limit = 9999; /* max nr of suggestions given */ 3279 3280 /* 3281 * Check the 'spellsuggest' option. Return FAIL if it's wrong. 3282 * Sets "sps_flags" and "sps_limit". 3283 */ 3284 int 3285 spell_check_sps(void) 3286 { 3287 char_u *p; 3288 char_u *s; 3289 char_u buf[MAXPATHL]; 3290 int f; 3291 3292 sps_flags = 0; 3293 sps_limit = 9999; 3294 3295 for (p = p_sps; *p != NUL; ) 3296 { 3297 copy_option_part(&p, buf, MAXPATHL, ","); 3298 3299 f = 0; 3300 if (VIM_ISDIGIT(*buf)) 3301 { 3302 s = buf; 3303 sps_limit = getdigits(&s); 3304 if (*s != NUL && !VIM_ISDIGIT(*s)) 3305 f = -1; 3306 } 3307 else if (STRCMP(buf, "best") == 0) 3308 f = SPS_BEST; 3309 else if (STRCMP(buf, "fast") == 0) 3310 f = SPS_FAST; 3311 else if (STRCMP(buf, "double") == 0) 3312 f = SPS_DOUBLE; 3313 else if (STRNCMP(buf, "expr:", 5) != 0 3314 && STRNCMP(buf, "file:", 5) != 0) 3315 f = -1; 3316 3317 if (f == -1 || (sps_flags != 0 && f != 0)) 3318 { 3319 sps_flags = SPS_BEST; 3320 sps_limit = 9999; 3321 return FAIL; 3322 } 3323 if (f != 0) 3324 sps_flags = f; 3325 } 3326 3327 if (sps_flags == 0) 3328 sps_flags = SPS_BEST; 3329 3330 return OK; 3331 } 3332 3333 /* 3334 * "z=": Find badly spelled word under or after the cursor. 3335 * Give suggestions for the properly spelled word. 3336 * In Visual mode use the highlighted word as the bad word. 3337 * When "count" is non-zero use that suggestion. 3338 */ 3339 void 3340 spell_suggest(int count) 3341 { 3342 char_u *line; 3343 pos_T prev_cursor = curwin->w_cursor; 3344 char_u wcopy[MAXWLEN + 2]; 3345 char_u *p; 3346 int i; 3347 int c; 3348 suginfo_T sug; 3349 suggest_T *stp; 3350 int mouse_used; 3351 int need_cap; 3352 int limit; 3353 int selected = count; 3354 int badlen = 0; 3355 int msg_scroll_save = msg_scroll; 3356 3357 if (no_spell_checking(curwin)) 3358 return; 3359 3360 if (VIsual_active) 3361 { 3362 /* Use the Visually selected text as the bad word. But reject 3363 * a multi-line selection. */ 3364 if (curwin->w_cursor.lnum != VIsual.lnum) 3365 { 3366 vim_beep(BO_SPELL); 3367 return; 3368 } 3369 badlen = (int)curwin->w_cursor.col - (int)VIsual.col; 3370 if (badlen < 0) 3371 badlen = -badlen; 3372 else 3373 curwin->w_cursor.col = VIsual.col; 3374 ++badlen; 3375 end_visual_mode(); 3376 } 3377 /* Find the start of the badly spelled word. */ 3378 else if (spell_move_to(curwin, FORWARD, TRUE, TRUE, NULL) == 0 3379 || curwin->w_cursor.col > prev_cursor.col) 3380 { 3381 /* No bad word or it starts after the cursor: use the word under the 3382 * cursor. */ 3383 curwin->w_cursor = prev_cursor; 3384 line = ml_get_curline(); 3385 p = line + curwin->w_cursor.col; 3386 /* Backup to before start of word. */ 3387 while (p > line && spell_iswordp_nmw(p, curwin)) 3388 mb_ptr_back(line, p); 3389 /* Forward to start of word. */ 3390 while (*p != NUL && !spell_iswordp_nmw(p, curwin)) 3391 mb_ptr_adv(p); 3392 3393 if (!spell_iswordp_nmw(p, curwin)) /* No word found. */ 3394 { 3395 beep_flush(); 3396 return; 3397 } 3398 curwin->w_cursor.col = (colnr_T)(p - line); 3399 } 3400 3401 /* Get the word and its length. */ 3402 3403 /* Figure out if the word should be capitalised. */ 3404 need_cap = check_need_cap(curwin->w_cursor.lnum, curwin->w_cursor.col); 3405 3406 /* Make a copy of current line since autocommands may free the line. */ 3407 line = vim_strsave(ml_get_curline()); 3408 if (line == NULL) 3409 goto skip; 3410 3411 /* Get the list of suggestions. Limit to 'lines' - 2 or the number in 3412 * 'spellsuggest', whatever is smaller. */ 3413 if (sps_limit > (int)Rows - 2) 3414 limit = (int)Rows - 2; 3415 else 3416 limit = sps_limit; 3417 spell_find_suggest(line + curwin->w_cursor.col, badlen, &sug, limit, 3418 TRUE, need_cap, TRUE); 3419 3420 if (sug.su_ga.ga_len == 0) 3421 MSG(_("Sorry, no suggestions")); 3422 else if (count > 0) 3423 { 3424 if (count > sug.su_ga.ga_len) 3425 smsg((char_u *)_("Sorry, only %ld suggestions"), 3426 (long)sug.su_ga.ga_len); 3427 } 3428 else 3429 { 3430 vim_free(repl_from); 3431 repl_from = NULL; 3432 vim_free(repl_to); 3433 repl_to = NULL; 3434 3435 #ifdef FEAT_RIGHTLEFT 3436 /* When 'rightleft' is set the list is drawn right-left. */ 3437 cmdmsg_rl = curwin->w_p_rl; 3438 if (cmdmsg_rl) 3439 msg_col = Columns - 1; 3440 #endif 3441 3442 /* List the suggestions. */ 3443 msg_start(); 3444 msg_row = Rows - 1; /* for when 'cmdheight' > 1 */ 3445 lines_left = Rows; /* avoid more prompt */ 3446 vim_snprintf((char *)IObuff, IOSIZE, _("Change \"%.*s\" to:"), 3447 sug.su_badlen, sug.su_badptr); 3448 #ifdef FEAT_RIGHTLEFT 3449 if (cmdmsg_rl && STRNCMP(IObuff, "Change", 6) == 0) 3450 { 3451 /* And now the rabbit from the high hat: Avoid showing the 3452 * untranslated message rightleft. */ 3453 vim_snprintf((char *)IObuff, IOSIZE, ":ot \"%.*s\" egnahC", 3454 sug.su_badlen, sug.su_badptr); 3455 } 3456 #endif 3457 msg_puts(IObuff); 3458 msg_clr_eos(); 3459 msg_putchar('\n'); 3460 3461 msg_scroll = TRUE; 3462 for (i = 0; i < sug.su_ga.ga_len; ++i) 3463 { 3464 stp = &SUG(sug.su_ga, i); 3465 3466 /* The suggested word may replace only part of the bad word, add 3467 * the not replaced part. */ 3468 vim_strncpy(wcopy, stp->st_word, MAXWLEN); 3469 if (sug.su_badlen > stp->st_orglen) 3470 vim_strncpy(wcopy + stp->st_wordlen, 3471 sug.su_badptr + stp->st_orglen, 3472 sug.su_badlen - stp->st_orglen); 3473 vim_snprintf((char *)IObuff, IOSIZE, "%2d", i + 1); 3474 #ifdef FEAT_RIGHTLEFT 3475 if (cmdmsg_rl) 3476 rl_mirror(IObuff); 3477 #endif 3478 msg_puts(IObuff); 3479 3480 vim_snprintf((char *)IObuff, IOSIZE, " \"%s\"", wcopy); 3481 msg_puts(IObuff); 3482 3483 /* The word may replace more than "su_badlen". */ 3484 if (sug.su_badlen < stp->st_orglen) 3485 { 3486 vim_snprintf((char *)IObuff, IOSIZE, _(" < \"%.*s\""), 3487 stp->st_orglen, sug.su_badptr); 3488 msg_puts(IObuff); 3489 } 3490 3491 if (p_verbose > 0) 3492 { 3493 /* Add the score. */ 3494 if (sps_flags & (SPS_DOUBLE | SPS_BEST)) 3495 vim_snprintf((char *)IObuff, IOSIZE, " (%s%d - %d)", 3496 stp->st_salscore ? "s " : "", 3497 stp->st_score, stp->st_altscore); 3498 else 3499 vim_snprintf((char *)IObuff, IOSIZE, " (%d)", 3500 stp->st_score); 3501 #ifdef FEAT_RIGHTLEFT 3502 if (cmdmsg_rl) 3503 /* Mirror the numbers, but keep the leading space. */ 3504 rl_mirror(IObuff + 1); 3505 #endif 3506 msg_advance(30); 3507 msg_puts(IObuff); 3508 } 3509 msg_putchar('\n'); 3510 } 3511 3512 #ifdef FEAT_RIGHTLEFT 3513 cmdmsg_rl = FALSE; 3514 msg_col = 0; 3515 #endif 3516 /* Ask for choice. */ 3517 selected = prompt_for_number(&mouse_used); 3518 if (mouse_used) 3519 selected -= lines_left; 3520 lines_left = Rows; /* avoid more prompt */ 3521 /* don't delay for 'smd' in normal_cmd() */ 3522 msg_scroll = msg_scroll_save; 3523 } 3524 3525 if (selected > 0 && selected <= sug.su_ga.ga_len && u_save_cursor() == OK) 3526 { 3527 /* Save the from and to text for :spellrepall. */ 3528 stp = &SUG(sug.su_ga, selected - 1); 3529 if (sug.su_badlen > stp->st_orglen) 3530 { 3531 /* Replacing less than "su_badlen", append the remainder to 3532 * repl_to. */ 3533 repl_from = vim_strnsave(sug.su_badptr, sug.su_badlen); 3534 vim_snprintf((char *)IObuff, IOSIZE, "%s%.*s", stp->st_word, 3535 sug.su_badlen - stp->st_orglen, 3536 sug.su_badptr + stp->st_orglen); 3537 repl_to = vim_strsave(IObuff); 3538 } 3539 else 3540 { 3541 /* Replacing su_badlen or more, use the whole word. */ 3542 repl_from = vim_strnsave(sug.su_badptr, stp->st_orglen); 3543 repl_to = vim_strsave(stp->st_word); 3544 } 3545 3546 /* Replace the word. */ 3547 p = alloc((unsigned)STRLEN(line) - stp->st_orglen 3548 + stp->st_wordlen + 1); 3549 if (p != NULL) 3550 { 3551 c = (int)(sug.su_badptr - line); 3552 mch_memmove(p, line, c); 3553 STRCPY(p + c, stp->st_word); 3554 STRCAT(p, sug.su_badptr + stp->st_orglen); 3555 ml_replace(curwin->w_cursor.lnum, p, FALSE); 3556 curwin->w_cursor.col = c; 3557 3558 /* For redo we use a change-word command. */ 3559 ResetRedobuff(); 3560 AppendToRedobuff((char_u *)"ciw"); 3561 AppendToRedobuffLit(p + c, 3562 stp->st_wordlen + sug.su_badlen - stp->st_orglen); 3563 AppendCharToRedobuff(ESC); 3564 3565 /* After this "p" may be invalid. */ 3566 changed_bytes(curwin->w_cursor.lnum, c); 3567 } 3568 } 3569 else 3570 curwin->w_cursor = prev_cursor; 3571 3572 spell_find_cleanup(&sug); 3573 skip: 3574 vim_free(line); 3575 } 3576 3577 /* 3578 * Check if the word at line "lnum" column "col" is required to start with a 3579 * capital. This uses 'spellcapcheck' of the current buffer. 3580 */ 3581 static int 3582 check_need_cap(linenr_T lnum, colnr_T col) 3583 { 3584 int need_cap = FALSE; 3585 char_u *line; 3586 char_u *line_copy = NULL; 3587 char_u *p; 3588 colnr_T endcol; 3589 regmatch_T regmatch; 3590 3591 if (curwin->w_s->b_cap_prog == NULL) 3592 return FALSE; 3593 3594 line = ml_get_curline(); 3595 endcol = 0; 3596 if ((int)(skipwhite(line) - line) >= (int)col) 3597 { 3598 /* At start of line, check if previous line is empty or sentence 3599 * ends there. */ 3600 if (lnum == 1) 3601 need_cap = TRUE; 3602 else 3603 { 3604 line = ml_get(lnum - 1); 3605 if (*skipwhite(line) == NUL) 3606 need_cap = TRUE; 3607 else 3608 { 3609 /* Append a space in place of the line break. */ 3610 line_copy = concat_str(line, (char_u *)" "); 3611 line = line_copy; 3612 endcol = (colnr_T)STRLEN(line); 3613 } 3614 } 3615 } 3616 else 3617 endcol = col; 3618 3619 if (endcol > 0) 3620 { 3621 /* Check if sentence ends before the bad word. */ 3622 regmatch.regprog = curwin->w_s->b_cap_prog; 3623 regmatch.rm_ic = FALSE; 3624 p = line + endcol; 3625 for (;;) 3626 { 3627 mb_ptr_back(line, p); 3628 if (p == line || spell_iswordp_nmw(p, curwin)) 3629 break; 3630 if (vim_regexec(®match, p, 0) 3631 && regmatch.endp[0] == line + endcol) 3632 { 3633 need_cap = TRUE; 3634 break; 3635 } 3636 } 3637 curwin->w_s->b_cap_prog = regmatch.regprog; 3638 } 3639 3640 vim_free(line_copy); 3641 3642 return need_cap; 3643 } 3644 3645 3646 /* 3647 * ":spellrepall" 3648 */ 3649 void 3650 ex_spellrepall(exarg_T *eap UNUSED) 3651 { 3652 pos_T pos = curwin->w_cursor; 3653 char_u *frompat; 3654 int addlen; 3655 char_u *line; 3656 char_u *p; 3657 int save_ws = p_ws; 3658 linenr_T prev_lnum = 0; 3659 3660 if (repl_from == NULL || repl_to == NULL) 3661 { 3662 EMSG(_("E752: No previous spell replacement")); 3663 return; 3664 } 3665 addlen = (int)(STRLEN(repl_to) - STRLEN(repl_from)); 3666 3667 frompat = alloc((unsigned)STRLEN(repl_from) + 7); 3668 if (frompat == NULL) 3669 return; 3670 sprintf((char *)frompat, "\\V\\<%s\\>", repl_from); 3671 p_ws = FALSE; 3672 3673 sub_nsubs = 0; 3674 sub_nlines = 0; 3675 curwin->w_cursor.lnum = 0; 3676 while (!got_int) 3677 { 3678 if (do_search(NULL, '/', frompat, 1L, SEARCH_KEEP, NULL) == 0 3679 || u_save_cursor() == FAIL) 3680 break; 3681 3682 /* Only replace when the right word isn't there yet. This happens 3683 * when changing "etc" to "etc.". */ 3684 line = ml_get_curline(); 3685 if (addlen <= 0 || STRNCMP(line + curwin->w_cursor.col, 3686 repl_to, STRLEN(repl_to)) != 0) 3687 { 3688 p = alloc((unsigned)STRLEN(line) + addlen + 1); 3689 if (p == NULL) 3690 break; 3691 mch_memmove(p, line, curwin->w_cursor.col); 3692 STRCPY(p + curwin->w_cursor.col, repl_to); 3693 STRCAT(p, line + curwin->w_cursor.col + STRLEN(repl_from)); 3694 ml_replace(curwin->w_cursor.lnum, p, FALSE); 3695 changed_bytes(curwin->w_cursor.lnum, curwin->w_cursor.col); 3696 3697 if (curwin->w_cursor.lnum != prev_lnum) 3698 { 3699 ++sub_nlines; 3700 prev_lnum = curwin->w_cursor.lnum; 3701 } 3702 ++sub_nsubs; 3703 } 3704 curwin->w_cursor.col += (colnr_T)STRLEN(repl_to); 3705 } 3706 3707 p_ws = save_ws; 3708 curwin->w_cursor = pos; 3709 vim_free(frompat); 3710 3711 if (sub_nsubs == 0) 3712 EMSG2(_("E753: Not found: %s"), repl_from); 3713 else 3714 do_sub_msg(FALSE); 3715 } 3716 3717 /* 3718 * Find spell suggestions for "word". Return them in the growarray "*gap" as 3719 * a list of allocated strings. 3720 */ 3721 void 3722 spell_suggest_list( 3723 garray_T *gap, 3724 char_u *word, 3725 int maxcount, /* maximum nr of suggestions */ 3726 int need_cap, /* 'spellcapcheck' matched */ 3727 int interactive) 3728 { 3729 suginfo_T sug; 3730 int i; 3731 suggest_T *stp; 3732 char_u *wcopy; 3733 3734 spell_find_suggest(word, 0, &sug, maxcount, FALSE, need_cap, interactive); 3735 3736 /* Make room in "gap". */ 3737 ga_init2(gap, sizeof(char_u *), sug.su_ga.ga_len + 1); 3738 if (ga_grow(gap, sug.su_ga.ga_len) == OK) 3739 { 3740 for (i = 0; i < sug.su_ga.ga_len; ++i) 3741 { 3742 stp = &SUG(sug.su_ga, i); 3743 3744 /* The suggested word may replace only part of "word", add the not 3745 * replaced part. */ 3746 wcopy = alloc(stp->st_wordlen 3747 + (unsigned)STRLEN(sug.su_badptr + stp->st_orglen) + 1); 3748 if (wcopy == NULL) 3749 break; 3750 STRCPY(wcopy, stp->st_word); 3751 STRCPY(wcopy + stp->st_wordlen, sug.su_badptr + stp->st_orglen); 3752 ((char_u **)gap->ga_data)[gap->ga_len++] = wcopy; 3753 } 3754 } 3755 3756 spell_find_cleanup(&sug); 3757 } 3758 3759 /* 3760 * Find spell suggestions for the word at the start of "badptr". 3761 * Return the suggestions in "su->su_ga". 3762 * The maximum number of suggestions is "maxcount". 3763 * Note: does use info for the current window. 3764 * This is based on the mechanisms of Aspell, but completely reimplemented. 3765 */ 3766 static void 3767 spell_find_suggest( 3768 char_u *badptr, 3769 int badlen, /* length of bad word or 0 if unknown */ 3770 suginfo_T *su, 3771 int maxcount, 3772 int banbadword, /* don't include badword in suggestions */ 3773 int need_cap, /* word should start with capital */ 3774 int interactive) 3775 { 3776 hlf_T attr = HLF_COUNT; 3777 char_u buf[MAXPATHL]; 3778 char_u *p; 3779 int do_combine = FALSE; 3780 char_u *sps_copy; 3781 #ifdef FEAT_EVAL 3782 static int expr_busy = FALSE; 3783 #endif 3784 int c; 3785 int i; 3786 langp_T *lp; 3787 3788 /* 3789 * Set the info in "*su". 3790 */ 3791 vim_memset(su, 0, sizeof(suginfo_T)); 3792 ga_init2(&su->su_ga, (int)sizeof(suggest_T), 10); 3793 ga_init2(&su->su_sga, (int)sizeof(suggest_T), 10); 3794 if (*badptr == NUL) 3795 return; 3796 hash_init(&su->su_banned); 3797 3798 su->su_badptr = badptr; 3799 if (badlen != 0) 3800 su->su_badlen = badlen; 3801 else 3802 su->su_badlen = spell_check(curwin, su->su_badptr, &attr, NULL, FALSE); 3803 su->su_maxcount = maxcount; 3804 su->su_maxscore = SCORE_MAXINIT; 3805 3806 if (su->su_badlen >= MAXWLEN) 3807 su->su_badlen = MAXWLEN - 1; /* just in case */ 3808 vim_strncpy(su->su_badword, su->su_badptr, su->su_badlen); 3809 (void)spell_casefold(su->su_badptr, su->su_badlen, 3810 su->su_fbadword, MAXWLEN); 3811 /* get caps flags for bad word */ 3812 su->su_badflags = badword_captype(su->su_badptr, 3813 su->su_badptr + su->su_badlen); 3814 if (need_cap) 3815 su->su_badflags |= WF_ONECAP; 3816 3817 /* Find the default language for sound folding. We simply use the first 3818 * one in 'spelllang' that supports sound folding. That's good for when 3819 * using multiple files for one language, it's not that bad when mixing 3820 * languages (e.g., "pl,en"). */ 3821 for (i = 0; i < curbuf->b_s.b_langp.ga_len; ++i) 3822 { 3823 lp = LANGP_ENTRY(curbuf->b_s.b_langp, i); 3824 if (lp->lp_sallang != NULL) 3825 { 3826 su->su_sallang = lp->lp_sallang; 3827 break; 3828 } 3829 } 3830 3831 /* Soundfold the bad word with the default sound folding, so that we don't 3832 * have to do this many times. */ 3833 if (su->su_sallang != NULL) 3834 spell_soundfold(su->su_sallang, su->su_fbadword, TRUE, 3835 su->su_sal_badword); 3836 3837 /* If the word is not capitalised and spell_check() doesn't consider the 3838 * word to be bad then it might need to be capitalised. Add a suggestion 3839 * for that. */ 3840 c = PTR2CHAR(su->su_badptr); 3841 if (!SPELL_ISUPPER(c) && attr == HLF_COUNT) 3842 { 3843 make_case_word(su->su_badword, buf, WF_ONECAP); 3844 add_suggestion(su, &su->su_ga, buf, su->su_badlen, SCORE_ICASE, 3845 0, TRUE, su->su_sallang, FALSE); 3846 } 3847 3848 /* Ban the bad word itself. It may appear in another region. */ 3849 if (banbadword) 3850 add_banned(su, su->su_badword); 3851 3852 /* Make a copy of 'spellsuggest', because the expression may change it. */ 3853 sps_copy = vim_strsave(p_sps); 3854 if (sps_copy == NULL) 3855 return; 3856 3857 /* Loop over the items in 'spellsuggest'. */ 3858 for (p = sps_copy; *p != NUL; ) 3859 { 3860 copy_option_part(&p, buf, MAXPATHL, ","); 3861 3862 if (STRNCMP(buf, "expr:", 5) == 0) 3863 { 3864 #ifdef FEAT_EVAL 3865 /* Evaluate an expression. Skip this when called recursively, 3866 * when using spellsuggest() in the expression. */ 3867 if (!expr_busy) 3868 { 3869 expr_busy = TRUE; 3870 spell_suggest_expr(su, buf + 5); 3871 expr_busy = FALSE; 3872 } 3873 #endif 3874 } 3875 else if (STRNCMP(buf, "file:", 5) == 0) 3876 /* Use list of suggestions in a file. */ 3877 spell_suggest_file(su, buf + 5); 3878 else 3879 { 3880 /* Use internal method. */ 3881 spell_suggest_intern(su, interactive); 3882 if (sps_flags & SPS_DOUBLE) 3883 do_combine = TRUE; 3884 } 3885 } 3886 3887 vim_free(sps_copy); 3888 3889 if (do_combine) 3890 /* Combine the two list of suggestions. This must be done last, 3891 * because sorting changes the order again. */ 3892 score_combine(su); 3893 } 3894 3895 #ifdef FEAT_EVAL 3896 /* 3897 * Find suggestions by evaluating expression "expr". 3898 */ 3899 static void 3900 spell_suggest_expr(suginfo_T *su, char_u *expr) 3901 { 3902 list_T *list; 3903 listitem_T *li; 3904 int score; 3905 char_u *p; 3906 3907 /* The work is split up in a few parts to avoid having to export 3908 * suginfo_T. 3909 * First evaluate the expression and get the resulting list. */ 3910 list = eval_spell_expr(su->su_badword, expr); 3911 if (list != NULL) 3912 { 3913 /* Loop over the items in the list. */ 3914 for (li = list->lv_first; li != NULL; li = li->li_next) 3915 if (li->li_tv.v_type == VAR_LIST) 3916 { 3917 /* Get the word and the score from the items. */ 3918 score = get_spellword(li->li_tv.vval.v_list, &p); 3919 if (score >= 0 && score <= su->su_maxscore) 3920 add_suggestion(su, &su->su_ga, p, su->su_badlen, 3921 score, 0, TRUE, su->su_sallang, FALSE); 3922 } 3923 list_unref(list); 3924 } 3925 3926 /* Remove bogus suggestions, sort and truncate at "maxcount". */ 3927 check_suggestions(su, &su->su_ga); 3928 (void)cleanup_suggestions(&su->su_ga, su->su_maxscore, su->su_maxcount); 3929 } 3930 #endif 3931 3932 /* 3933 * Find suggestions in file "fname". Used for "file:" in 'spellsuggest'. 3934 */ 3935 static void 3936 spell_suggest_file(suginfo_T *su, char_u *fname) 3937 { 3938 FILE *fd; 3939 char_u line[MAXWLEN * 2]; 3940 char_u *p; 3941 int len; 3942 char_u cword[MAXWLEN]; 3943 3944 /* Open the file. */ 3945 fd = mch_fopen((char *)fname, "r"); 3946 if (fd == NULL) 3947 { 3948 EMSG2(_(e_notopen), fname); 3949 return; 3950 } 3951 3952 /* Read it line by line. */ 3953 while (!vim_fgets(line, MAXWLEN * 2, fd) && !got_int) 3954 { 3955 line_breakcheck(); 3956 3957 p = vim_strchr(line, '/'); 3958 if (p == NULL) 3959 continue; /* No Tab found, just skip the line. */ 3960 *p++ = NUL; 3961 if (STRICMP(su->su_badword, line) == 0) 3962 { 3963 /* Match! Isolate the good word, until CR or NL. */ 3964 for (len = 0; p[len] >= ' '; ++len) 3965 ; 3966 p[len] = NUL; 3967 3968 /* If the suggestion doesn't have specific case duplicate the case 3969 * of the bad word. */ 3970 if (captype(p, NULL) == 0) 3971 { 3972 make_case_word(p, cword, su->su_badflags); 3973 p = cword; 3974 } 3975 3976 add_suggestion(su, &su->su_ga, p, su->su_badlen, 3977 SCORE_FILE, 0, TRUE, su->su_sallang, FALSE); 3978 } 3979 } 3980 3981 fclose(fd); 3982 3983 /* Remove bogus suggestions, sort and truncate at "maxcount". */ 3984 check_suggestions(su, &su->su_ga); 3985 (void)cleanup_suggestions(&su->su_ga, su->su_maxscore, su->su_maxcount); 3986 } 3987 3988 /* 3989 * Find suggestions for the internal method indicated by "sps_flags". 3990 */ 3991 static void 3992 spell_suggest_intern(suginfo_T *su, int interactive) 3993 { 3994 /* 3995 * Load the .sug file(s) that are available and not done yet. 3996 */ 3997 suggest_load_files(); 3998 3999 /* 4000 * 1. Try special cases, such as repeating a word: "the the" -> "the". 4001 * 4002 * Set a maximum score to limit the combination of operations that is 4003 * tried. 4004 */ 4005 suggest_try_special(su); 4006 4007 /* 4008 * 2. Try inserting/deleting/swapping/changing a letter, use REP entries 4009 * from the .aff file and inserting a space (split the word). 4010 */ 4011 suggest_try_change(su); 4012 4013 /* For the resulting top-scorers compute the sound-a-like score. */ 4014 if (sps_flags & SPS_DOUBLE) 4015 score_comp_sal(su); 4016 4017 /* 4018 * 3. Try finding sound-a-like words. 4019 */ 4020 if ((sps_flags & SPS_FAST) == 0) 4021 { 4022 if (sps_flags & SPS_BEST) 4023 /* Adjust the word score for the suggestions found so far for how 4024 * they sounds like. */ 4025 rescore_suggestions(su); 4026 4027 /* 4028 * While going through the soundfold tree "su_maxscore" is the score 4029 * for the soundfold word, limits the changes that are being tried, 4030 * and "su_sfmaxscore" the rescored score, which is set by 4031 * cleanup_suggestions(). 4032 * First find words with a small edit distance, because this is much 4033 * faster and often already finds the top-N suggestions. If we didn't 4034 * find many suggestions try again with a higher edit distance. 4035 * "sl_sounddone" is used to avoid doing the same word twice. 4036 */ 4037 suggest_try_soundalike_prep(); 4038 su->su_maxscore = SCORE_SFMAX1; 4039 su->su_sfmaxscore = SCORE_MAXINIT * 3; 4040 suggest_try_soundalike(su); 4041 if (su->su_ga.ga_len < SUG_CLEAN_COUNT(su)) 4042 { 4043 /* We didn't find enough matches, try again, allowing more 4044 * changes to the soundfold word. */ 4045 su->su_maxscore = SCORE_SFMAX2; 4046 suggest_try_soundalike(su); 4047 if (su->su_ga.ga_len < SUG_CLEAN_COUNT(su)) 4048 { 4049 /* Still didn't find enough matches, try again, allowing even 4050 * more changes to the soundfold word. */ 4051 su->su_maxscore = SCORE_SFMAX3; 4052 suggest_try_soundalike(su); 4053 } 4054 } 4055 su->su_maxscore = su->su_sfmaxscore; 4056 suggest_try_soundalike_finish(); 4057 } 4058 4059 /* When CTRL-C was hit while searching do show the results. Only clear 4060 * got_int when using a command, not for spellsuggest(). */ 4061 ui_breakcheck(); 4062 if (interactive && got_int) 4063 { 4064 (void)vgetc(); 4065 got_int = FALSE; 4066 } 4067 4068 if ((sps_flags & SPS_DOUBLE) == 0 && su->su_ga.ga_len != 0) 4069 { 4070 if (sps_flags & SPS_BEST) 4071 /* Adjust the word score for how it sounds like. */ 4072 rescore_suggestions(su); 4073 4074 /* Remove bogus suggestions, sort and truncate at "maxcount". */ 4075 check_suggestions(su, &su->su_ga); 4076 (void)cleanup_suggestions(&su->su_ga, su->su_maxscore, su->su_maxcount); 4077 } 4078 } 4079 4080 /* 4081 * Free the info put in "*su" by spell_find_suggest(). 4082 */ 4083 static void 4084 spell_find_cleanup(suginfo_T *su) 4085 { 4086 int i; 4087 4088 /* Free the suggestions. */ 4089 for (i = 0; i < su->su_ga.ga_len; ++i) 4090 vim_free(SUG(su->su_ga, i).st_word); 4091 ga_clear(&su->su_ga); 4092 for (i = 0; i < su->su_sga.ga_len; ++i) 4093 vim_free(SUG(su->su_sga, i).st_word); 4094 ga_clear(&su->su_sga); 4095 4096 /* Free the banned words. */ 4097 hash_clear_all(&su->su_banned, 0); 4098 } 4099 4100 /* 4101 * Make a copy of "word", with the first letter upper or lower cased, to 4102 * "wcopy[MAXWLEN]". "word" must not be empty. 4103 * The result is NUL terminated. 4104 */ 4105 void 4106 onecap_copy( 4107 char_u *word, 4108 char_u *wcopy, 4109 int upper) /* TRUE: first letter made upper case */ 4110 { 4111 char_u *p; 4112 int c; 4113 int l; 4114 4115 p = word; 4116 #ifdef FEAT_MBYTE 4117 if (has_mbyte) 4118 c = mb_cptr2char_adv(&p); 4119 else 4120 #endif 4121 c = *p++; 4122 if (upper) 4123 c = SPELL_TOUPPER(c); 4124 else 4125 c = SPELL_TOFOLD(c); 4126 #ifdef FEAT_MBYTE 4127 if (has_mbyte) 4128 l = mb_char2bytes(c, wcopy); 4129 else 4130 #endif 4131 { 4132 l = 1; 4133 wcopy[0] = c; 4134 } 4135 vim_strncpy(wcopy + l, p, MAXWLEN - l - 1); 4136 } 4137 4138 /* 4139 * Make a copy of "word" with all the letters upper cased into 4140 * "wcopy[MAXWLEN]". The result is NUL terminated. 4141 */ 4142 static void 4143 allcap_copy(char_u *word, char_u *wcopy) 4144 { 4145 char_u *s; 4146 char_u *d; 4147 int c; 4148 4149 d = wcopy; 4150 for (s = word; *s != NUL; ) 4151 { 4152 #ifdef FEAT_MBYTE 4153 if (has_mbyte) 4154 c = mb_cptr2char_adv(&s); 4155 else 4156 #endif 4157 c = *s++; 4158 4159 #ifdef FEAT_MBYTE 4160 /* We only change 0xdf to SS when we are certain latin1 is used. It 4161 * would cause weird errors in other 8-bit encodings. */ 4162 if (enc_latin1like && c == 0xdf) 4163 { 4164 c = 'S'; 4165 if (d - wcopy >= MAXWLEN - 1) 4166 break; 4167 *d++ = c; 4168 } 4169 else 4170 #endif 4171 c = SPELL_TOUPPER(c); 4172 4173 #ifdef FEAT_MBYTE 4174 if (has_mbyte) 4175 { 4176 if (d - wcopy >= MAXWLEN - MB_MAXBYTES) 4177 break; 4178 d += mb_char2bytes(c, d); 4179 } 4180 else 4181 #endif 4182 { 4183 if (d - wcopy >= MAXWLEN - 1) 4184 break; 4185 *d++ = c; 4186 } 4187 } 4188 *d = NUL; 4189 } 4190 4191 /* 4192 * Try finding suggestions by recognizing specific situations. 4193 */ 4194 static void 4195 suggest_try_special(suginfo_T *su) 4196 { 4197 char_u *p; 4198 size_t len; 4199 int c; 4200 char_u word[MAXWLEN]; 4201 4202 /* 4203 * Recognize a word that is repeated: "the the". 4204 */ 4205 p = skiptowhite(su->su_fbadword); 4206 len = p - su->su_fbadword; 4207 p = skipwhite(p); 4208 if (STRLEN(p) == len && STRNCMP(su->su_fbadword, p, len) == 0) 4209 { 4210 /* Include badflags: if the badword is onecap or allcap 4211 * use that for the goodword too: "The the" -> "The". */ 4212 c = su->su_fbadword[len]; 4213 su->su_fbadword[len] = NUL; 4214 make_case_word(su->su_fbadword, word, su->su_badflags); 4215 su->su_fbadword[len] = c; 4216 4217 /* Give a soundalike score of 0, compute the score as if deleting one 4218 * character. */ 4219 add_suggestion(su, &su->su_ga, word, su->su_badlen, 4220 RESCORE(SCORE_REP, 0), 0, TRUE, su->su_sallang, FALSE); 4221 } 4222 } 4223 4224 /* 4225 * Change the 0 to 1 to measure how much time is spent in each state. 4226 * Output is dumped in "suggestprof". 4227 */ 4228 #if 0 4229 # define SUGGEST_PROFILE 4230 proftime_T current; 4231 proftime_T total; 4232 proftime_T times[STATE_FINAL + 1]; 4233 long counts[STATE_FINAL + 1]; 4234 4235 static void 4236 prof_init(void) 4237 { 4238 for (int i = 0; i <= STATE_FINAL; ++i) 4239 { 4240 profile_zero(×[i]); 4241 counts[i] = 0; 4242 } 4243 profile_start(¤t); 4244 profile_start(&total); 4245 } 4246 4247 /* call before changing state */ 4248 static void 4249 prof_store(state_T state) 4250 { 4251 profile_end(¤t); 4252 profile_add(×[state], ¤t); 4253 ++counts[state]; 4254 profile_start(¤t); 4255 } 4256 # define PROF_STORE(state) prof_store(state); 4257 4258 static void 4259 prof_report(char *name) 4260 { 4261 FILE *fd = fopen("suggestprof", "a"); 4262 4263 profile_end(&total); 4264 fprintf(fd, "-----------------------\n"); 4265 fprintf(fd, "%s: %s\n", name, profile_msg(&total)); 4266 for (int i = 0; i <= STATE_FINAL; ++i) 4267 fprintf(fd, "%d: %s (%ld)\n", i, profile_msg(×[i]), counts[i]); 4268 fclose(fd); 4269 } 4270 #else 4271 # define PROF_STORE(state) 4272 #endif 4273 4274 /* 4275 * Try finding suggestions by adding/removing/swapping letters. 4276 */ 4277 static void 4278 suggest_try_change(suginfo_T *su) 4279 { 4280 char_u fword[MAXWLEN]; /* copy of the bad word, case-folded */ 4281 int n; 4282 char_u *p; 4283 int lpi; 4284 langp_T *lp; 4285 4286 /* We make a copy of the case-folded bad word, so that we can modify it 4287 * to find matches (esp. REP items). Append some more text, changing 4288 * chars after the bad word may help. */ 4289 STRCPY(fword, su->su_fbadword); 4290 n = (int)STRLEN(fword); 4291 p = su->su_badptr + su->su_badlen; 4292 (void)spell_casefold(p, (int)STRLEN(p), fword + n, MAXWLEN - n); 4293 4294 for (lpi = 0; lpi < curwin->w_s->b_langp.ga_len; ++lpi) 4295 { 4296 lp = LANGP_ENTRY(curwin->w_s->b_langp, lpi); 4297 4298 /* If reloading a spell file fails it's still in the list but 4299 * everything has been cleared. */ 4300 if (lp->lp_slang->sl_fbyts == NULL) 4301 continue; 4302 4303 /* Try it for this language. Will add possible suggestions. */ 4304 #ifdef SUGGEST_PROFILE 4305 prof_init(); 4306 #endif 4307 suggest_trie_walk(su, lp, fword, FALSE); 4308 #ifdef SUGGEST_PROFILE 4309 prof_report("try_change"); 4310 #endif 4311 } 4312 } 4313 4314 /* Check the maximum score, if we go over it we won't try this change. */ 4315 #define TRY_DEEPER(su, stack, depth, add) \ 4316 (stack[depth].ts_score + (add) < su->su_maxscore) 4317 4318 /* 4319 * Try finding suggestions by adding/removing/swapping letters. 4320 * 4321 * This uses a state machine. At each node in the tree we try various 4322 * operations. When trying if an operation works "depth" is increased and the 4323 * stack[] is used to store info. This allows combinations, thus insert one 4324 * character, replace one and delete another. The number of changes is 4325 * limited by su->su_maxscore. 4326 * 4327 * After implementing this I noticed an article by Kemal Oflazer that 4328 * describes something similar: "Error-tolerant Finite State Recognition with 4329 * Applications to Morphological Analysis and Spelling Correction" (1996). 4330 * The implementation in the article is simplified and requires a stack of 4331 * unknown depth. The implementation here only needs a stack depth equal to 4332 * the length of the word. 4333 * 4334 * This is also used for the sound-folded word, "soundfold" is TRUE then. 4335 * The mechanism is the same, but we find a match with a sound-folded word 4336 * that comes from one or more original words. Each of these words may be 4337 * added, this is done by add_sound_suggest(). 4338 * Don't use: 4339 * the prefix tree or the keep-case tree 4340 * "su->su_badlen" 4341 * anything to do with upper and lower case 4342 * anything to do with word or non-word characters ("spell_iswordp()") 4343 * banned words 4344 * word flags (rare, region, compounding) 4345 * word splitting for now 4346 * "similar_chars()" 4347 * use "slang->sl_repsal" instead of "lp->lp_replang->sl_rep" 4348 */ 4349 static void 4350 suggest_trie_walk( 4351 suginfo_T *su, 4352 langp_T *lp, 4353 char_u *fword, 4354 int soundfold) 4355 { 4356 char_u tword[MAXWLEN]; /* good word collected so far */ 4357 trystate_T stack[MAXWLEN]; 4358 char_u preword[MAXWLEN * 3]; /* word found with proper case; 4359 * concatenation of prefix compound 4360 * words and split word. NUL terminated 4361 * when going deeper but not when coming 4362 * back. */ 4363 char_u compflags[MAXWLEN]; /* compound flags, one for each word */ 4364 trystate_T *sp; 4365 int newscore; 4366 int score; 4367 char_u *byts, *fbyts, *pbyts; 4368 idx_T *idxs, *fidxs, *pidxs; 4369 int depth; 4370 int c, c2, c3; 4371 int n = 0; 4372 int flags; 4373 garray_T *gap; 4374 idx_T arridx; 4375 int len; 4376 char_u *p; 4377 fromto_T *ftp; 4378 int fl = 0, tl; 4379 int repextra = 0; /* extra bytes in fword[] from REP item */ 4380 slang_T *slang = lp->lp_slang; 4381 int fword_ends; 4382 int goodword_ends; 4383 #ifdef DEBUG_TRIEWALK 4384 /* Stores the name of the change made at each level. */ 4385 char_u changename[MAXWLEN][80]; 4386 #endif 4387 int breakcheckcount = 1000; 4388 int compound_ok; 4389 4390 /* 4391 * Go through the whole case-fold tree, try changes at each node. 4392 * "tword[]" contains the word collected from nodes in the tree. 4393 * "fword[]" the word we are trying to match with (initially the bad 4394 * word). 4395 */ 4396 depth = 0; 4397 sp = &stack[0]; 4398 vim_memset(sp, 0, sizeof(trystate_T)); 4399 sp->ts_curi = 1; 4400 4401 if (soundfold) 4402 { 4403 /* Going through the soundfold tree. */ 4404 byts = fbyts = slang->sl_sbyts; 4405 idxs = fidxs = slang->sl_sidxs; 4406 pbyts = NULL; 4407 pidxs = NULL; 4408 sp->ts_prefixdepth = PFD_NOPREFIX; 4409 sp->ts_state = STATE_START; 4410 } 4411 else 4412 { 4413 /* 4414 * When there are postponed prefixes we need to use these first. At 4415 * the end of the prefix we continue in the case-fold tree. 4416 */ 4417 fbyts = slang->sl_fbyts; 4418 fidxs = slang->sl_fidxs; 4419 pbyts = slang->sl_pbyts; 4420 pidxs = slang->sl_pidxs; 4421 if (pbyts != NULL) 4422 { 4423 byts = pbyts; 4424 idxs = pidxs; 4425 sp->ts_prefixdepth = PFD_PREFIXTREE; 4426 sp->ts_state = STATE_NOPREFIX; /* try without prefix first */ 4427 } 4428 else 4429 { 4430 byts = fbyts; 4431 idxs = fidxs; 4432 sp->ts_prefixdepth = PFD_NOPREFIX; 4433 sp->ts_state = STATE_START; 4434 } 4435 } 4436 4437 /* 4438 * Loop to find all suggestions. At each round we either: 4439 * - For the current state try one operation, advance "ts_curi", 4440 * increase "depth". 4441 * - When a state is done go to the next, set "ts_state". 4442 * - When all states are tried decrease "depth". 4443 */ 4444 while (depth >= 0 && !got_int) 4445 { 4446 sp = &stack[depth]; 4447 switch (sp->ts_state) 4448 { 4449 case STATE_START: 4450 case STATE_NOPREFIX: 4451 /* 4452 * Start of node: Deal with NUL bytes, which means 4453 * tword[] may end here. 4454 */ 4455 arridx = sp->ts_arridx; /* current node in the tree */ 4456 len = byts[arridx]; /* bytes in this node */ 4457 arridx += sp->ts_curi; /* index of current byte */ 4458 4459 if (sp->ts_prefixdepth == PFD_PREFIXTREE) 4460 { 4461 /* Skip over the NUL bytes, we use them later. */ 4462 for (n = 0; n < len && byts[arridx + n] == 0; ++n) 4463 ; 4464 sp->ts_curi += n; 4465 4466 /* Always past NUL bytes now. */ 4467 n = (int)sp->ts_state; 4468 PROF_STORE(sp->ts_state) 4469 sp->ts_state = STATE_ENDNUL; 4470 sp->ts_save_badflags = su->su_badflags; 4471 4472 /* At end of a prefix or at start of prefixtree: check for 4473 * following word. */ 4474 if (byts[arridx] == 0 || n == (int)STATE_NOPREFIX) 4475 { 4476 /* Set su->su_badflags to the caps type at this position. 4477 * Use the caps type until here for the prefix itself. */ 4478 #ifdef FEAT_MBYTE 4479 if (has_mbyte) 4480 n = nofold_len(fword, sp->ts_fidx, su->su_badptr); 4481 else 4482 #endif 4483 n = sp->ts_fidx; 4484 flags = badword_captype(su->su_badptr, su->su_badptr + n); 4485 su->su_badflags = badword_captype(su->su_badptr + n, 4486 su->su_badptr + su->su_badlen); 4487 #ifdef DEBUG_TRIEWALK 4488 sprintf(changename[depth], "prefix"); 4489 #endif 4490 go_deeper(stack, depth, 0); 4491 ++depth; 4492 sp = &stack[depth]; 4493 sp->ts_prefixdepth = depth - 1; 4494 byts = fbyts; 4495 idxs = fidxs; 4496 sp->ts_arridx = 0; 4497 4498 /* Move the prefix to preword[] with the right case 4499 * and make find_keepcap_word() works. */ 4500 tword[sp->ts_twordlen] = NUL; 4501 make_case_word(tword + sp->ts_splitoff, 4502 preword + sp->ts_prewordlen, flags); 4503 sp->ts_prewordlen = (char_u)STRLEN(preword); 4504 sp->ts_splitoff = sp->ts_twordlen; 4505 } 4506 break; 4507 } 4508 4509 if (sp->ts_curi > len || byts[arridx] != 0) 4510 { 4511 /* Past bytes in node and/or past NUL bytes. */ 4512 PROF_STORE(sp->ts_state) 4513 sp->ts_state = STATE_ENDNUL; 4514 sp->ts_save_badflags = su->su_badflags; 4515 break; 4516 } 4517 4518 /* 4519 * End of word in tree. 4520 */ 4521 ++sp->ts_curi; /* eat one NUL byte */ 4522 4523 flags = (int)idxs[arridx]; 4524 4525 /* Skip words with the NOSUGGEST flag. */ 4526 if (flags & WF_NOSUGGEST) 4527 break; 4528 4529 fword_ends = (fword[sp->ts_fidx] == NUL 4530 || (soundfold 4531 ? vim_iswhite(fword[sp->ts_fidx]) 4532 : !spell_iswordp(fword + sp->ts_fidx, curwin))); 4533 tword[sp->ts_twordlen] = NUL; 4534 4535 if (sp->ts_prefixdepth <= PFD_NOTSPECIAL 4536 && (sp->ts_flags & TSF_PREFIXOK) == 0) 4537 { 4538 /* There was a prefix before the word. Check that the prefix 4539 * can be used with this word. */ 4540 /* Count the length of the NULs in the prefix. If there are 4541 * none this must be the first try without a prefix. */ 4542 n = stack[sp->ts_prefixdepth].ts_arridx; 4543 len = pbyts[n++]; 4544 for (c = 0; c < len && pbyts[n + c] == 0; ++c) 4545 ; 4546 if (c > 0) 4547 { 4548 c = valid_word_prefix(c, n, flags, 4549 tword + sp->ts_splitoff, slang, FALSE); 4550 if (c == 0) 4551 break; 4552 4553 /* Use the WF_RARE flag for a rare prefix. */ 4554 if (c & WF_RAREPFX) 4555 flags |= WF_RARE; 4556 4557 /* Tricky: when checking for both prefix and compounding 4558 * we run into the prefix flag first. 4559 * Remember that it's OK, so that we accept the prefix 4560 * when arriving at a compound flag. */ 4561 sp->ts_flags |= TSF_PREFIXOK; 4562 } 4563 } 4564 4565 /* Check NEEDCOMPOUND: can't use word without compounding. Do try 4566 * appending another compound word below. */ 4567 if (sp->ts_complen == sp->ts_compsplit && fword_ends 4568 && (flags & WF_NEEDCOMP)) 4569 goodword_ends = FALSE; 4570 else 4571 goodword_ends = TRUE; 4572 4573 p = NULL; 4574 compound_ok = TRUE; 4575 if (sp->ts_complen > sp->ts_compsplit) 4576 { 4577 if (slang->sl_nobreak) 4578 { 4579 /* There was a word before this word. When there was no 4580 * change in this word (it was correct) add the first word 4581 * as a suggestion. If this word was corrected too, we 4582 * need to check if a correct word follows. */ 4583 if (sp->ts_fidx - sp->ts_splitfidx 4584 == sp->ts_twordlen - sp->ts_splitoff 4585 && STRNCMP(fword + sp->ts_splitfidx, 4586 tword + sp->ts_splitoff, 4587 sp->ts_fidx - sp->ts_splitfidx) == 0) 4588 { 4589 preword[sp->ts_prewordlen] = NUL; 4590 newscore = score_wordcount_adj(slang, sp->ts_score, 4591 preword + sp->ts_prewordlen, 4592 sp->ts_prewordlen > 0); 4593 /* Add the suggestion if the score isn't too bad. */ 4594 if (newscore <= su->su_maxscore) 4595 add_suggestion(su, &su->su_ga, preword, 4596 sp->ts_splitfidx - repextra, 4597 newscore, 0, FALSE, 4598 lp->lp_sallang, FALSE); 4599 break; 4600 } 4601 } 4602 else 4603 { 4604 /* There was a compound word before this word. If this 4605 * word does not support compounding then give up 4606 * (splitting is tried for the word without compound 4607 * flag). */ 4608 if (((unsigned)flags >> 24) == 0 4609 || sp->ts_twordlen - sp->ts_splitoff 4610 < slang->sl_compminlen) 4611 break; 4612 #ifdef FEAT_MBYTE 4613 /* For multi-byte chars check character length against 4614 * COMPOUNDMIN. */ 4615 if (has_mbyte 4616 && slang->sl_compminlen > 0 4617 && mb_charlen(tword + sp->ts_splitoff) 4618 < slang->sl_compminlen) 4619 break; 4620 #endif 4621 4622 compflags[sp->ts_complen] = ((unsigned)flags >> 24); 4623 compflags[sp->ts_complen + 1] = NUL; 4624 vim_strncpy(preword + sp->ts_prewordlen, 4625 tword + sp->ts_splitoff, 4626 sp->ts_twordlen - sp->ts_splitoff); 4627 4628 /* Verify CHECKCOMPOUNDPATTERN rules. */ 4629 if (match_checkcompoundpattern(preword, sp->ts_prewordlen, 4630 &slang->sl_comppat)) 4631 compound_ok = FALSE; 4632 4633 if (compound_ok) 4634 { 4635 p = preword; 4636 while (*skiptowhite(p) != NUL) 4637 p = skipwhite(skiptowhite(p)); 4638 if (fword_ends && !can_compound(slang, p, 4639 compflags + sp->ts_compsplit)) 4640 /* Compound is not allowed. But it may still be 4641 * possible if we add another (short) word. */ 4642 compound_ok = FALSE; 4643 } 4644 4645 /* Get pointer to last char of previous word. */ 4646 p = preword + sp->ts_prewordlen; 4647 mb_ptr_back(preword, p); 4648 } 4649 } 4650 4651 /* 4652 * Form the word with proper case in preword. 4653 * If there is a word from a previous split, append. 4654 * For the soundfold tree don't change the case, simply append. 4655 */ 4656 if (soundfold) 4657 STRCPY(preword + sp->ts_prewordlen, tword + sp->ts_splitoff); 4658 else if (flags & WF_KEEPCAP) 4659 /* Must find the word in the keep-case tree. */ 4660 find_keepcap_word(slang, tword + sp->ts_splitoff, 4661 preword + sp->ts_prewordlen); 4662 else 4663 { 4664 /* Include badflags: If the badword is onecap or allcap 4665 * use that for the goodword too. But if the badword is 4666 * allcap and it's only one char long use onecap. */ 4667 c = su->su_badflags; 4668 if ((c & WF_ALLCAP) 4669 #ifdef FEAT_MBYTE 4670 && su->su_badlen == (*mb_ptr2len)(su->su_badptr) 4671 #else 4672 && su->su_badlen == 1 4673 #endif 4674 ) 4675 c = WF_ONECAP; 4676 c |= flags; 4677 4678 /* When appending a compound word after a word character don't 4679 * use Onecap. */ 4680 if (p != NULL && spell_iswordp_nmw(p, curwin)) 4681 c &= ~WF_ONECAP; 4682 make_case_word(tword + sp->ts_splitoff, 4683 preword + sp->ts_prewordlen, c); 4684 } 4685 4686 if (!soundfold) 4687 { 4688 /* Don't use a banned word. It may appear again as a good 4689 * word, thus remember it. */ 4690 if (flags & WF_BANNED) 4691 { 4692 add_banned(su, preword + sp->ts_prewordlen); 4693 break; 4694 } 4695 if ((sp->ts_complen == sp->ts_compsplit 4696 && WAS_BANNED(su, preword + sp->ts_prewordlen)) 4697 || WAS_BANNED(su, preword)) 4698 { 4699 if (slang->sl_compprog == NULL) 4700 break; 4701 /* the word so far was banned but we may try compounding */ 4702 goodword_ends = FALSE; 4703 } 4704 } 4705 4706 newscore = 0; 4707 if (!soundfold) /* soundfold words don't have flags */ 4708 { 4709 if ((flags & WF_REGION) 4710 && (((unsigned)flags >> 16) & lp->lp_region) == 0) 4711 newscore += SCORE_REGION; 4712 if (flags & WF_RARE) 4713 newscore += SCORE_RARE; 4714 4715 if (!spell_valid_case(su->su_badflags, 4716 captype(preword + sp->ts_prewordlen, NULL))) 4717 newscore += SCORE_ICASE; 4718 } 4719 4720 /* TODO: how about splitting in the soundfold tree? */ 4721 if (fword_ends 4722 && goodword_ends 4723 && sp->ts_fidx >= sp->ts_fidxtry 4724 && compound_ok) 4725 { 4726 /* The badword also ends: add suggestions. */ 4727 #ifdef DEBUG_TRIEWALK 4728 if (soundfold && STRCMP(preword, "smwrd") == 0) 4729 { 4730 int j; 4731 4732 /* print the stack of changes that brought us here */ 4733 smsg("------ %s -------", fword); 4734 for (j = 0; j < depth; ++j) 4735 smsg("%s", changename[j]); 4736 } 4737 #endif 4738 if (soundfold) 4739 { 4740 /* For soundfolded words we need to find the original 4741 * words, the edit distance and then add them. */ 4742 add_sound_suggest(su, preword, sp->ts_score, lp); 4743 } 4744 else if (sp->ts_fidx > 0) 4745 { 4746 /* Give a penalty when changing non-word char to word 4747 * char, e.g., "thes," -> "these". */ 4748 p = fword + sp->ts_fidx; 4749 mb_ptr_back(fword, p); 4750 if (!spell_iswordp(p, curwin)) 4751 { 4752 p = preword + STRLEN(preword); 4753 mb_ptr_back(preword, p); 4754 if (spell_iswordp(p, curwin)) 4755 newscore += SCORE_NONWORD; 4756 } 4757 4758 /* Give a bonus to words seen before. */ 4759 score = score_wordcount_adj(slang, 4760 sp->ts_score + newscore, 4761 preword + sp->ts_prewordlen, 4762 sp->ts_prewordlen > 0); 4763 4764 /* Add the suggestion if the score isn't too bad. */ 4765 if (score <= su->su_maxscore) 4766 { 4767 add_suggestion(su, &su->su_ga, preword, 4768 sp->ts_fidx - repextra, 4769 score, 0, FALSE, lp->lp_sallang, FALSE); 4770 4771 if (su->su_badflags & WF_MIXCAP) 4772 { 4773 /* We really don't know if the word should be 4774 * upper or lower case, add both. */ 4775 c = captype(preword, NULL); 4776 if (c == 0 || c == WF_ALLCAP) 4777 { 4778 make_case_word(tword + sp->ts_splitoff, 4779 preword + sp->ts_prewordlen, 4780 c == 0 ? WF_ALLCAP : 0); 4781 4782 add_suggestion(su, &su->su_ga, preword, 4783 sp->ts_fidx - repextra, 4784 score + SCORE_ICASE, 0, FALSE, 4785 lp->lp_sallang, FALSE); 4786 } 4787 } 4788 } 4789 } 4790 } 4791 4792 /* 4793 * Try word split and/or compounding. 4794 */ 4795 if ((sp->ts_fidx >= sp->ts_fidxtry || fword_ends) 4796 #ifdef FEAT_MBYTE 4797 /* Don't split halfway a character. */ 4798 && (!has_mbyte || sp->ts_tcharlen == 0) 4799 #endif 4800 ) 4801 { 4802 int try_compound; 4803 int try_split; 4804 4805 /* If past the end of the bad word don't try a split. 4806 * Otherwise try changing the next word. E.g., find 4807 * suggestions for "the the" where the second "the" is 4808 * different. It's done like a split. 4809 * TODO: word split for soundfold words */ 4810 try_split = (sp->ts_fidx - repextra < su->su_badlen) 4811 && !soundfold; 4812 4813 /* Get here in several situations: 4814 * 1. The word in the tree ends: 4815 * If the word allows compounding try that. Otherwise try 4816 * a split by inserting a space. For both check that a 4817 * valid words starts at fword[sp->ts_fidx]. 4818 * For NOBREAK do like compounding to be able to check if 4819 * the next word is valid. 4820 * 2. The badword does end, but it was due to a change (e.g., 4821 * a swap). No need to split, but do check that the 4822 * following word is valid. 4823 * 3. The badword and the word in the tree end. It may still 4824 * be possible to compound another (short) word. 4825 */ 4826 try_compound = FALSE; 4827 if (!soundfold 4828 && !slang->sl_nocompoundsugs 4829 && slang->sl_compprog != NULL 4830 && ((unsigned)flags >> 24) != 0 4831 && sp->ts_twordlen - sp->ts_splitoff 4832 >= slang->sl_compminlen 4833 #ifdef FEAT_MBYTE 4834 && (!has_mbyte 4835 || slang->sl_compminlen == 0 4836 || mb_charlen(tword + sp->ts_splitoff) 4837 >= slang->sl_compminlen) 4838 #endif 4839 && (slang->sl_compsylmax < MAXWLEN 4840 || sp->ts_complen + 1 - sp->ts_compsplit 4841 < slang->sl_compmax) 4842 && (can_be_compound(sp, slang, 4843 compflags, ((unsigned)flags >> 24)))) 4844 4845 { 4846 try_compound = TRUE; 4847 compflags[sp->ts_complen] = ((unsigned)flags >> 24); 4848 compflags[sp->ts_complen + 1] = NUL; 4849 } 4850 4851 /* For NOBREAK we never try splitting, it won't make any word 4852 * valid. */ 4853 if (slang->sl_nobreak && !slang->sl_nocompoundsugs) 4854 try_compound = TRUE; 4855 4856 /* If we could add a compound word, and it's also possible to 4857 * split at this point, do the split first and set 4858 * TSF_DIDSPLIT to avoid doing it again. */ 4859 else if (!fword_ends 4860 && try_compound 4861 && (sp->ts_flags & TSF_DIDSPLIT) == 0) 4862 { 4863 try_compound = FALSE; 4864 sp->ts_flags |= TSF_DIDSPLIT; 4865 --sp->ts_curi; /* do the same NUL again */ 4866 compflags[sp->ts_complen] = NUL; 4867 } 4868 else 4869 sp->ts_flags &= ~TSF_DIDSPLIT; 4870 4871 if (try_split || try_compound) 4872 { 4873 if (!try_compound && (!fword_ends || !goodword_ends)) 4874 { 4875 /* If we're going to split need to check that the 4876 * words so far are valid for compounding. If there 4877 * is only one word it must not have the NEEDCOMPOUND 4878 * flag. */ 4879 if (sp->ts_complen == sp->ts_compsplit 4880 && (flags & WF_NEEDCOMP)) 4881 break; 4882 p = preword; 4883 while (*skiptowhite(p) != NUL) 4884 p = skipwhite(skiptowhite(p)); 4885 if (sp->ts_complen > sp->ts_compsplit 4886 && !can_compound(slang, p, 4887 compflags + sp->ts_compsplit)) 4888 break; 4889 4890 if (slang->sl_nosplitsugs) 4891 newscore += SCORE_SPLIT_NO; 4892 else 4893 newscore += SCORE_SPLIT; 4894 4895 /* Give a bonus to words seen before. */ 4896 newscore = score_wordcount_adj(slang, newscore, 4897 preword + sp->ts_prewordlen, TRUE); 4898 } 4899 4900 if (TRY_DEEPER(su, stack, depth, newscore)) 4901 { 4902 go_deeper(stack, depth, newscore); 4903 #ifdef DEBUG_TRIEWALK 4904 if (!try_compound && !fword_ends) 4905 sprintf(changename[depth], "%.*s-%s: split", 4906 sp->ts_twordlen, tword, fword + sp->ts_fidx); 4907 else 4908 sprintf(changename[depth], "%.*s-%s: compound", 4909 sp->ts_twordlen, tword, fword + sp->ts_fidx); 4910 #endif 4911 /* Save things to be restored at STATE_SPLITUNDO. */ 4912 sp->ts_save_badflags = su->su_badflags; 4913 PROF_STORE(sp->ts_state) 4914 sp->ts_state = STATE_SPLITUNDO; 4915 4916 ++depth; 4917 sp = &stack[depth]; 4918 4919 /* Append a space to preword when splitting. */ 4920 if (!try_compound && !fword_ends) 4921 STRCAT(preword, " "); 4922 sp->ts_prewordlen = (char_u)STRLEN(preword); 4923 sp->ts_splitoff = sp->ts_twordlen; 4924 sp->ts_splitfidx = sp->ts_fidx; 4925 4926 /* If the badword has a non-word character at this 4927 * position skip it. That means replacing the 4928 * non-word character with a space. Always skip a 4929 * character when the word ends. But only when the 4930 * good word can end. */ 4931 if (((!try_compound && !spell_iswordp_nmw(fword 4932 + sp->ts_fidx, 4933 curwin)) 4934 || fword_ends) 4935 && fword[sp->ts_fidx] != NUL 4936 && goodword_ends) 4937 { 4938 int l; 4939 4940 #ifdef FEAT_MBYTE 4941 if (has_mbyte) 4942 l = MB_BYTE2LEN(fword[sp->ts_fidx]); 4943 else 4944 #endif 4945 l = 1; 4946 if (fword_ends) 4947 { 4948 /* Copy the skipped character to preword. */ 4949 mch_memmove(preword + sp->ts_prewordlen, 4950 fword + sp->ts_fidx, l); 4951 sp->ts_prewordlen += l; 4952 preword[sp->ts_prewordlen] = NUL; 4953 } 4954 else 4955 sp->ts_score -= SCORE_SPLIT - SCORE_SUBST; 4956 sp->ts_fidx += l; 4957 } 4958 4959 /* When compounding include compound flag in 4960 * compflags[] (already set above). When splitting we 4961 * may start compounding over again. */ 4962 if (try_compound) 4963 ++sp->ts_complen; 4964 else 4965 sp->ts_compsplit = sp->ts_complen; 4966 sp->ts_prefixdepth = PFD_NOPREFIX; 4967 4968 /* set su->su_badflags to the caps type at this 4969 * position */ 4970 #ifdef FEAT_MBYTE 4971 if (has_mbyte) 4972 n = nofold_len(fword, sp->ts_fidx, su->su_badptr); 4973 else 4974 #endif 4975 n = sp->ts_fidx; 4976 su->su_badflags = badword_captype(su->su_badptr + n, 4977 su->su_badptr + su->su_badlen); 4978 4979 /* Restart at top of the tree. */ 4980 sp->ts_arridx = 0; 4981 4982 /* If there are postponed prefixes, try these too. */ 4983 if (pbyts != NULL) 4984 { 4985 byts = pbyts; 4986 idxs = pidxs; 4987 sp->ts_prefixdepth = PFD_PREFIXTREE; 4988 PROF_STORE(sp->ts_state) 4989 sp->ts_state = STATE_NOPREFIX; 4990 } 4991 } 4992 } 4993 } 4994 break; 4995 4996 case STATE_SPLITUNDO: 4997 /* Undo the changes done for word split or compound word. */ 4998 su->su_badflags = sp->ts_save_badflags; 4999 5000 /* Continue looking for NUL bytes. */ 5001 PROF_STORE(sp->ts_state) 5002 sp->ts_state = STATE_START; 5003 5004 /* In case we went into the prefix tree. */ 5005 byts = fbyts; 5006 idxs = fidxs; 5007 break; 5008 5009 case STATE_ENDNUL: 5010 /* Past the NUL bytes in the node. */ 5011 su->su_badflags = sp->ts_save_badflags; 5012 if (fword[sp->ts_fidx] == NUL 5013 #ifdef FEAT_MBYTE 5014 && sp->ts_tcharlen == 0 5015 #endif 5016 ) 5017 { 5018 /* The badword ends, can't use STATE_PLAIN. */ 5019 PROF_STORE(sp->ts_state) 5020 sp->ts_state = STATE_DEL; 5021 break; 5022 } 5023 PROF_STORE(sp->ts_state) 5024 sp->ts_state = STATE_PLAIN; 5025 /*FALLTHROUGH*/ 5026 5027 case STATE_PLAIN: 5028 /* 5029 * Go over all possible bytes at this node, add each to tword[] 5030 * and use child node. "ts_curi" is the index. 5031 */ 5032 arridx = sp->ts_arridx; 5033 if (sp->ts_curi > byts[arridx]) 5034 { 5035 /* Done all bytes at this node, do next state. When still at 5036 * already changed bytes skip the other tricks. */ 5037 PROF_STORE(sp->ts_state) 5038 if (sp->ts_fidx >= sp->ts_fidxtry) 5039 sp->ts_state = STATE_DEL; 5040 else 5041 sp->ts_state = STATE_FINAL; 5042 } 5043 else 5044 { 5045 arridx += sp->ts_curi++; 5046 c = byts[arridx]; 5047 5048 /* Normal byte, go one level deeper. If it's not equal to the 5049 * byte in the bad word adjust the score. But don't even try 5050 * when the byte was already changed. And don't try when we 5051 * just deleted this byte, accepting it is always cheaper than 5052 * delete + substitute. */ 5053 if (c == fword[sp->ts_fidx] 5054 #ifdef FEAT_MBYTE 5055 || (sp->ts_tcharlen > 0 && sp->ts_isdiff != DIFF_NONE) 5056 #endif 5057 ) 5058 newscore = 0; 5059 else 5060 newscore = SCORE_SUBST; 5061 if ((newscore == 0 5062 || (sp->ts_fidx >= sp->ts_fidxtry 5063 && ((sp->ts_flags & TSF_DIDDEL) == 0 5064 || c != fword[sp->ts_delidx]))) 5065 && TRY_DEEPER(su, stack, depth, newscore)) 5066 { 5067 go_deeper(stack, depth, newscore); 5068 #ifdef DEBUG_TRIEWALK 5069 if (newscore > 0) 5070 sprintf(changename[depth], "%.*s-%s: subst %c to %c", 5071 sp->ts_twordlen, tword, fword + sp->ts_fidx, 5072 fword[sp->ts_fidx], c); 5073 else 5074 sprintf(changename[depth], "%.*s-%s: accept %c", 5075 sp->ts_twordlen, tword, fword + sp->ts_fidx, 5076 fword[sp->ts_fidx]); 5077 #endif 5078 ++depth; 5079 sp = &stack[depth]; 5080 ++sp->ts_fidx; 5081 tword[sp->ts_twordlen++] = c; 5082 sp->ts_arridx = idxs[arridx]; 5083 #ifdef FEAT_MBYTE 5084 if (newscore == SCORE_SUBST) 5085 sp->ts_isdiff = DIFF_YES; 5086 if (has_mbyte) 5087 { 5088 /* Multi-byte characters are a bit complicated to 5089 * handle: They differ when any of the bytes differ 5090 * and then their length may also differ. */ 5091 if (sp->ts_tcharlen == 0) 5092 { 5093 /* First byte. */ 5094 sp->ts_tcharidx = 0; 5095 sp->ts_tcharlen = MB_BYTE2LEN(c); 5096 sp->ts_fcharstart = sp->ts_fidx - 1; 5097 sp->ts_isdiff = (newscore != 0) 5098 ? DIFF_YES : DIFF_NONE; 5099 } 5100 else if (sp->ts_isdiff == DIFF_INSERT) 5101 /* When inserting trail bytes don't advance in the 5102 * bad word. */ 5103 --sp->ts_fidx; 5104 if (++sp->ts_tcharidx == sp->ts_tcharlen) 5105 { 5106 /* Last byte of character. */ 5107 if (sp->ts_isdiff == DIFF_YES) 5108 { 5109 /* Correct ts_fidx for the byte length of the 5110 * character (we didn't check that before). */ 5111 sp->ts_fidx = sp->ts_fcharstart 5112 + MB_BYTE2LEN( 5113 fword[sp->ts_fcharstart]); 5114 5115 /* For changing a composing character adjust 5116 * the score from SCORE_SUBST to 5117 * SCORE_SUBCOMP. */ 5118 if (enc_utf8 5119 && utf_iscomposing( 5120 mb_ptr2char(tword 5121 + sp->ts_twordlen 5122 - sp->ts_tcharlen)) 5123 && utf_iscomposing( 5124 mb_ptr2char(fword 5125 + sp->ts_fcharstart))) 5126 sp->ts_score -= 5127 SCORE_SUBST - SCORE_SUBCOMP; 5128 5129 /* For a similar character adjust score from 5130 * SCORE_SUBST to SCORE_SIMILAR. */ 5131 else if (!soundfold 5132 && slang->sl_has_map 5133 && similar_chars(slang, 5134 mb_ptr2char(tword 5135 + sp->ts_twordlen 5136 - sp->ts_tcharlen), 5137 mb_ptr2char(fword 5138 + sp->ts_fcharstart))) 5139 sp->ts_score -= 5140 SCORE_SUBST - SCORE_SIMILAR; 5141 } 5142 else if (sp->ts_isdiff == DIFF_INSERT 5143 && sp->ts_twordlen > sp->ts_tcharlen) 5144 { 5145 p = tword + sp->ts_twordlen - sp->ts_tcharlen; 5146 c = mb_ptr2char(p); 5147 if (enc_utf8 && utf_iscomposing(c)) 5148 { 5149 /* Inserting a composing char doesn't 5150 * count that much. */ 5151 sp->ts_score -= SCORE_INS - SCORE_INSCOMP; 5152 } 5153 else 5154 { 5155 /* If the previous character was the same, 5156 * thus doubling a character, give a bonus 5157 * to the score. Also for the soundfold 5158 * tree (might seem illogical but does 5159 * give better scores). */ 5160 mb_ptr_back(tword, p); 5161 if (c == mb_ptr2char(p)) 5162 sp->ts_score -= SCORE_INS 5163 - SCORE_INSDUP; 5164 } 5165 } 5166 5167 /* Starting a new char, reset the length. */ 5168 sp->ts_tcharlen = 0; 5169 } 5170 } 5171 else 5172 #endif 5173 { 5174 /* If we found a similar char adjust the score. 5175 * We do this after calling go_deeper() because 5176 * it's slow. */ 5177 if (newscore != 0 5178 && !soundfold 5179 && slang->sl_has_map 5180 && similar_chars(slang, 5181 c, fword[sp->ts_fidx - 1])) 5182 sp->ts_score -= SCORE_SUBST - SCORE_SIMILAR; 5183 } 5184 } 5185 } 5186 break; 5187 5188 case STATE_DEL: 5189 #ifdef FEAT_MBYTE 5190 /* When past the first byte of a multi-byte char don't try 5191 * delete/insert/swap a character. */ 5192 if (has_mbyte && sp->ts_tcharlen > 0) 5193 { 5194 PROF_STORE(sp->ts_state) 5195 sp->ts_state = STATE_FINAL; 5196 break; 5197 } 5198 #endif 5199 /* 5200 * Try skipping one character in the bad word (delete it). 5201 */ 5202 PROF_STORE(sp->ts_state) 5203 sp->ts_state = STATE_INS_PREP; 5204 sp->ts_curi = 1; 5205 if (soundfold && sp->ts_fidx == 0 && fword[sp->ts_fidx] == '*') 5206 /* Deleting a vowel at the start of a word counts less, see 5207 * soundalike_score(). */ 5208 newscore = 2 * SCORE_DEL / 3; 5209 else 5210 newscore = SCORE_DEL; 5211 if (fword[sp->ts_fidx] != NUL 5212 && TRY_DEEPER(su, stack, depth, newscore)) 5213 { 5214 go_deeper(stack, depth, newscore); 5215 #ifdef DEBUG_TRIEWALK 5216 sprintf(changename[depth], "%.*s-%s: delete %c", 5217 sp->ts_twordlen, tword, fword + sp->ts_fidx, 5218 fword[sp->ts_fidx]); 5219 #endif 5220 ++depth; 5221 5222 /* Remember what character we deleted, so that we can avoid 5223 * inserting it again. */ 5224 stack[depth].ts_flags |= TSF_DIDDEL; 5225 stack[depth].ts_delidx = sp->ts_fidx; 5226 5227 /* Advance over the character in fword[]. Give a bonus to the 5228 * score if the same character is following "nn" -> "n". It's 5229 * a bit illogical for soundfold tree but it does give better 5230 * results. */ 5231 #ifdef FEAT_MBYTE 5232 if (has_mbyte) 5233 { 5234 c = mb_ptr2char(fword + sp->ts_fidx); 5235 stack[depth].ts_fidx += MB_BYTE2LEN(fword[sp->ts_fidx]); 5236 if (enc_utf8 && utf_iscomposing(c)) 5237 stack[depth].ts_score -= SCORE_DEL - SCORE_DELCOMP; 5238 else if (c == mb_ptr2char(fword + stack[depth].ts_fidx)) 5239 stack[depth].ts_score -= SCORE_DEL - SCORE_DELDUP; 5240 } 5241 else 5242 #endif 5243 { 5244 ++stack[depth].ts_fidx; 5245 if (fword[sp->ts_fidx] == fword[sp->ts_fidx + 1]) 5246 stack[depth].ts_score -= SCORE_DEL - SCORE_DELDUP; 5247 } 5248 break; 5249 } 5250 /*FALLTHROUGH*/ 5251 5252 case STATE_INS_PREP: 5253 if (sp->ts_flags & TSF_DIDDEL) 5254 { 5255 /* If we just deleted a byte then inserting won't make sense, 5256 * a substitute is always cheaper. */ 5257 PROF_STORE(sp->ts_state) 5258 sp->ts_state = STATE_SWAP; 5259 break; 5260 } 5261 5262 /* skip over NUL bytes */ 5263 n = sp->ts_arridx; 5264 for (;;) 5265 { 5266 if (sp->ts_curi > byts[n]) 5267 { 5268 /* Only NUL bytes at this node, go to next state. */ 5269 PROF_STORE(sp->ts_state) 5270 sp->ts_state = STATE_SWAP; 5271 break; 5272 } 5273 if (byts[n + sp->ts_curi] != NUL) 5274 { 5275 /* Found a byte to insert. */ 5276 PROF_STORE(sp->ts_state) 5277 sp->ts_state = STATE_INS; 5278 break; 5279 } 5280 ++sp->ts_curi; 5281 } 5282 break; 5283 5284 /*FALLTHROUGH*/ 5285 5286 case STATE_INS: 5287 /* Insert one byte. Repeat this for each possible byte at this 5288 * node. */ 5289 n = sp->ts_arridx; 5290 if (sp->ts_curi > byts[n]) 5291 { 5292 /* Done all bytes at this node, go to next state. */ 5293 PROF_STORE(sp->ts_state) 5294 sp->ts_state = STATE_SWAP; 5295 break; 5296 } 5297 5298 /* Do one more byte at this node, but: 5299 * - Skip NUL bytes. 5300 * - Skip the byte if it's equal to the byte in the word, 5301 * accepting that byte is always better. 5302 */ 5303 n += sp->ts_curi++; 5304 c = byts[n]; 5305 if (soundfold && sp->ts_twordlen == 0 && c == '*') 5306 /* Inserting a vowel at the start of a word counts less, 5307 * see soundalike_score(). */ 5308 newscore = 2 * SCORE_INS / 3; 5309 else 5310 newscore = SCORE_INS; 5311 if (c != fword[sp->ts_fidx] 5312 && TRY_DEEPER(su, stack, depth, newscore)) 5313 { 5314 go_deeper(stack, depth, newscore); 5315 #ifdef DEBUG_TRIEWALK 5316 sprintf(changename[depth], "%.*s-%s: insert %c", 5317 sp->ts_twordlen, tword, fword + sp->ts_fidx, 5318 c); 5319 #endif 5320 ++depth; 5321 sp = &stack[depth]; 5322 tword[sp->ts_twordlen++] = c; 5323 sp->ts_arridx = idxs[n]; 5324 #ifdef FEAT_MBYTE 5325 if (has_mbyte) 5326 { 5327 fl = MB_BYTE2LEN(c); 5328 if (fl > 1) 5329 { 5330 /* There are following bytes for the same character. 5331 * We must find all bytes before trying 5332 * delete/insert/swap/etc. */ 5333 sp->ts_tcharlen = fl; 5334 sp->ts_tcharidx = 1; 5335 sp->ts_isdiff = DIFF_INSERT; 5336 } 5337 } 5338 else 5339 fl = 1; 5340 if (fl == 1) 5341 #endif 5342 { 5343 /* If the previous character was the same, thus doubling a 5344 * character, give a bonus to the score. Also for 5345 * soundfold words (illogical but does give a better 5346 * score). */ 5347 if (sp->ts_twordlen >= 2 5348 && tword[sp->ts_twordlen - 2] == c) 5349 sp->ts_score -= SCORE_INS - SCORE_INSDUP; 5350 } 5351 } 5352 break; 5353 5354 case STATE_SWAP: 5355 /* 5356 * Swap two bytes in the bad word: "12" -> "21". 5357 * We change "fword" here, it's changed back afterwards at 5358 * STATE_UNSWAP. 5359 */ 5360 p = fword + sp->ts_fidx; 5361 c = *p; 5362 if (c == NUL) 5363 { 5364 /* End of word, can't swap or replace. */ 5365 PROF_STORE(sp->ts_state) 5366 sp->ts_state = STATE_FINAL; 5367 break; 5368 } 5369 5370 /* Don't swap if the first character is not a word character. 5371 * SWAP3 etc. also don't make sense then. */ 5372 if (!soundfold && !spell_iswordp(p, curwin)) 5373 { 5374 PROF_STORE(sp->ts_state) 5375 sp->ts_state = STATE_REP_INI; 5376 break; 5377 } 5378 5379 #ifdef FEAT_MBYTE 5380 if (has_mbyte) 5381 { 5382 n = MB_CPTR2LEN(p); 5383 c = mb_ptr2char(p); 5384 if (p[n] == NUL) 5385 c2 = NUL; 5386 else if (!soundfold && !spell_iswordp(p + n, curwin)) 5387 c2 = c; /* don't swap non-word char */ 5388 else 5389 c2 = mb_ptr2char(p + n); 5390 } 5391 else 5392 #endif 5393 { 5394 if (p[1] == NUL) 5395 c2 = NUL; 5396 else if (!soundfold && !spell_iswordp(p + 1, curwin)) 5397 c2 = c; /* don't swap non-word char */ 5398 else 5399 c2 = p[1]; 5400 } 5401 5402 /* When the second character is NUL we can't swap. */ 5403 if (c2 == NUL) 5404 { 5405 PROF_STORE(sp->ts_state) 5406 sp->ts_state = STATE_REP_INI; 5407 break; 5408 } 5409 5410 /* When characters are identical, swap won't do anything. 5411 * Also get here if the second char is not a word character. */ 5412 if (c == c2) 5413 { 5414 PROF_STORE(sp->ts_state) 5415 sp->ts_state = STATE_SWAP3; 5416 break; 5417 } 5418 if (c2 != NUL && TRY_DEEPER(su, stack, depth, SCORE_SWAP)) 5419 { 5420 go_deeper(stack, depth, SCORE_SWAP); 5421 #ifdef DEBUG_TRIEWALK 5422 sprintf(changename[depth], "%.*s-%s: swap %c and %c", 5423 sp->ts_twordlen, tword, fword + sp->ts_fidx, 5424 c, c2); 5425 #endif 5426 PROF_STORE(sp->ts_state) 5427 sp->ts_state = STATE_UNSWAP; 5428 ++depth; 5429 #ifdef FEAT_MBYTE 5430 if (has_mbyte) 5431 { 5432 fl = mb_char2len(c2); 5433 mch_memmove(p, p + n, fl); 5434 mb_char2bytes(c, p + fl); 5435 stack[depth].ts_fidxtry = sp->ts_fidx + n + fl; 5436 } 5437 else 5438 #endif 5439 { 5440 p[0] = c2; 5441 p[1] = c; 5442 stack[depth].ts_fidxtry = sp->ts_fidx + 2; 5443 } 5444 } 5445 else 5446 { 5447 /* If this swap doesn't work then SWAP3 won't either. */ 5448 PROF_STORE(sp->ts_state) 5449 sp->ts_state = STATE_REP_INI; 5450 } 5451 break; 5452 5453 case STATE_UNSWAP: 5454 /* Undo the STATE_SWAP swap: "21" -> "12". */ 5455 p = fword + sp->ts_fidx; 5456 #ifdef FEAT_MBYTE 5457 if (has_mbyte) 5458 { 5459 n = MB_BYTE2LEN(*p); 5460 c = mb_ptr2char(p + n); 5461 mch_memmove(p + MB_BYTE2LEN(p[n]), p, n); 5462 mb_char2bytes(c, p); 5463 } 5464 else 5465 #endif 5466 { 5467 c = *p; 5468 *p = p[1]; 5469 p[1] = c; 5470 } 5471 /*FALLTHROUGH*/ 5472 5473 case STATE_SWAP3: 5474 /* Swap two bytes, skipping one: "123" -> "321". We change 5475 * "fword" here, it's changed back afterwards at STATE_UNSWAP3. */ 5476 p = fword + sp->ts_fidx; 5477 #ifdef FEAT_MBYTE 5478 if (has_mbyte) 5479 { 5480 n = MB_CPTR2LEN(p); 5481 c = mb_ptr2char(p); 5482 fl = MB_CPTR2LEN(p + n); 5483 c2 = mb_ptr2char(p + n); 5484 if (!soundfold && !spell_iswordp(p + n + fl, curwin)) 5485 c3 = c; /* don't swap non-word char */ 5486 else 5487 c3 = mb_ptr2char(p + n + fl); 5488 } 5489 else 5490 #endif 5491 { 5492 c = *p; 5493 c2 = p[1]; 5494 if (!soundfold && !spell_iswordp(p + 2, curwin)) 5495 c3 = c; /* don't swap non-word char */ 5496 else 5497 c3 = p[2]; 5498 } 5499 5500 /* When characters are identical: "121" then SWAP3 result is 5501 * identical, ROT3L result is same as SWAP: "211", ROT3L result is 5502 * same as SWAP on next char: "112". Thus skip all swapping. 5503 * Also skip when c3 is NUL. 5504 * Also get here when the third character is not a word character. 5505 * Second character may any char: "a.b" -> "b.a" */ 5506 if (c == c3 || c3 == NUL) 5507 { 5508 PROF_STORE(sp->ts_state) 5509 sp->ts_state = STATE_REP_INI; 5510 break; 5511 } 5512 if (TRY_DEEPER(su, stack, depth, SCORE_SWAP3)) 5513 { 5514 go_deeper(stack, depth, SCORE_SWAP3); 5515 #ifdef DEBUG_TRIEWALK 5516 sprintf(changename[depth], "%.*s-%s: swap3 %c and %c", 5517 sp->ts_twordlen, tword, fword + sp->ts_fidx, 5518 c, c3); 5519 #endif 5520 PROF_STORE(sp->ts_state) 5521 sp->ts_state = STATE_UNSWAP3; 5522 ++depth; 5523 #ifdef FEAT_MBYTE 5524 if (has_mbyte) 5525 { 5526 tl = mb_char2len(c3); 5527 mch_memmove(p, p + n + fl, tl); 5528 mb_char2bytes(c2, p + tl); 5529 mb_char2bytes(c, p + fl + tl); 5530 stack[depth].ts_fidxtry = sp->ts_fidx + n + fl + tl; 5531 } 5532 else 5533 #endif 5534 { 5535 p[0] = p[2]; 5536 p[2] = c; 5537 stack[depth].ts_fidxtry = sp->ts_fidx + 3; 5538 } 5539 } 5540 else 5541 { 5542 PROF_STORE(sp->ts_state) 5543 sp->ts_state = STATE_REP_INI; 5544 } 5545 break; 5546 5547 case STATE_UNSWAP3: 5548 /* Undo STATE_SWAP3: "321" -> "123" */ 5549 p = fword + sp->ts_fidx; 5550 #ifdef FEAT_MBYTE 5551 if (has_mbyte) 5552 { 5553 n = MB_BYTE2LEN(*p); 5554 c2 = mb_ptr2char(p + n); 5555 fl = MB_BYTE2LEN(p[n]); 5556 c = mb_ptr2char(p + n + fl); 5557 tl = MB_BYTE2LEN(p[n + fl]); 5558 mch_memmove(p + fl + tl, p, n); 5559 mb_char2bytes(c, p); 5560 mb_char2bytes(c2, p + tl); 5561 p = p + tl; 5562 } 5563 else 5564 #endif 5565 { 5566 c = *p; 5567 *p = p[2]; 5568 p[2] = c; 5569 ++p; 5570 } 5571 5572 if (!soundfold && !spell_iswordp(p, curwin)) 5573 { 5574 /* Middle char is not a word char, skip the rotate. First and 5575 * third char were already checked at swap and swap3. */ 5576 PROF_STORE(sp->ts_state) 5577 sp->ts_state = STATE_REP_INI; 5578 break; 5579 } 5580 5581 /* Rotate three characters left: "123" -> "231". We change 5582 * "fword" here, it's changed back afterwards at STATE_UNROT3L. */ 5583 if (TRY_DEEPER(su, stack, depth, SCORE_SWAP3)) 5584 { 5585 go_deeper(stack, depth, SCORE_SWAP3); 5586 #ifdef DEBUG_TRIEWALK 5587 p = fword + sp->ts_fidx; 5588 sprintf(changename[depth], "%.*s-%s: rotate left %c%c%c", 5589 sp->ts_twordlen, tword, fword + sp->ts_fidx, 5590 p[0], p[1], p[2]); 5591 #endif 5592 PROF_STORE(sp->ts_state) 5593 sp->ts_state = STATE_UNROT3L; 5594 ++depth; 5595 p = fword + sp->ts_fidx; 5596 #ifdef FEAT_MBYTE 5597 if (has_mbyte) 5598 { 5599 n = MB_CPTR2LEN(p); 5600 c = mb_ptr2char(p); 5601 fl = MB_CPTR2LEN(p + n); 5602 fl += MB_CPTR2LEN(p + n + fl); 5603 mch_memmove(p, p + n, fl); 5604 mb_char2bytes(c, p + fl); 5605 stack[depth].ts_fidxtry = sp->ts_fidx + n + fl; 5606 } 5607 else 5608 #endif 5609 { 5610 c = *p; 5611 *p = p[1]; 5612 p[1] = p[2]; 5613 p[2] = c; 5614 stack[depth].ts_fidxtry = sp->ts_fidx + 3; 5615 } 5616 } 5617 else 5618 { 5619 PROF_STORE(sp->ts_state) 5620 sp->ts_state = STATE_REP_INI; 5621 } 5622 break; 5623 5624 case STATE_UNROT3L: 5625 /* Undo ROT3L: "231" -> "123" */ 5626 p = fword + sp->ts_fidx; 5627 #ifdef FEAT_MBYTE 5628 if (has_mbyte) 5629 { 5630 n = MB_BYTE2LEN(*p); 5631 n += MB_BYTE2LEN(p[n]); 5632 c = mb_ptr2char(p + n); 5633 tl = MB_BYTE2LEN(p[n]); 5634 mch_memmove(p + tl, p, n); 5635 mb_char2bytes(c, p); 5636 } 5637 else 5638 #endif 5639 { 5640 c = p[2]; 5641 p[2] = p[1]; 5642 p[1] = *p; 5643 *p = c; 5644 } 5645 5646 /* Rotate three bytes right: "123" -> "312". We change "fword" 5647 * here, it's changed back afterwards at STATE_UNROT3R. */ 5648 if (TRY_DEEPER(su, stack, depth, SCORE_SWAP3)) 5649 { 5650 go_deeper(stack, depth, SCORE_SWAP3); 5651 #ifdef DEBUG_TRIEWALK 5652 p = fword + sp->ts_fidx; 5653 sprintf(changename[depth], "%.*s-%s: rotate right %c%c%c", 5654 sp->ts_twordlen, tword, fword + sp->ts_fidx, 5655 p[0], p[1], p[2]); 5656 #endif 5657 PROF_STORE(sp->ts_state) 5658 sp->ts_state = STATE_UNROT3R; 5659 ++depth; 5660 p = fword + sp->ts_fidx; 5661 #ifdef FEAT_MBYTE 5662 if (has_mbyte) 5663 { 5664 n = MB_CPTR2LEN(p); 5665 n += MB_CPTR2LEN(p + n); 5666 c = mb_ptr2char(p + n); 5667 tl = MB_CPTR2LEN(p + n); 5668 mch_memmove(p + tl, p, n); 5669 mb_char2bytes(c, p); 5670 stack[depth].ts_fidxtry = sp->ts_fidx + n + tl; 5671 } 5672 else 5673 #endif 5674 { 5675 c = p[2]; 5676 p[2] = p[1]; 5677 p[1] = *p; 5678 *p = c; 5679 stack[depth].ts_fidxtry = sp->ts_fidx + 3; 5680 } 5681 } 5682 else 5683 { 5684 PROF_STORE(sp->ts_state) 5685 sp->ts_state = STATE_REP_INI; 5686 } 5687 break; 5688 5689 case STATE_UNROT3R: 5690 /* Undo ROT3R: "312" -> "123" */ 5691 p = fword + sp->ts_fidx; 5692 #ifdef FEAT_MBYTE 5693 if (has_mbyte) 5694 { 5695 c = mb_ptr2char(p); 5696 tl = MB_BYTE2LEN(*p); 5697 n = MB_BYTE2LEN(p[tl]); 5698 n += MB_BYTE2LEN(p[tl + n]); 5699 mch_memmove(p, p + tl, n); 5700 mb_char2bytes(c, p + n); 5701 } 5702 else 5703 #endif 5704 { 5705 c = *p; 5706 *p = p[1]; 5707 p[1] = p[2]; 5708 p[2] = c; 5709 } 5710 /*FALLTHROUGH*/ 5711 5712 case STATE_REP_INI: 5713 /* Check if matching with REP items from the .aff file would work. 5714 * Quickly skip if: 5715 * - there are no REP items and we are not in the soundfold trie 5716 * - the score is going to be too high anyway 5717 * - already applied a REP item or swapped here */ 5718 if ((lp->lp_replang == NULL && !soundfold) 5719 || sp->ts_score + SCORE_REP >= su->su_maxscore 5720 || sp->ts_fidx < sp->ts_fidxtry) 5721 { 5722 PROF_STORE(sp->ts_state) 5723 sp->ts_state = STATE_FINAL; 5724 break; 5725 } 5726 5727 /* Use the first byte to quickly find the first entry that may 5728 * match. If the index is -1 there is none. */ 5729 if (soundfold) 5730 sp->ts_curi = slang->sl_repsal_first[fword[sp->ts_fidx]]; 5731 else 5732 sp->ts_curi = lp->lp_replang->sl_rep_first[fword[sp->ts_fidx]]; 5733 5734 if (sp->ts_curi < 0) 5735 { 5736 PROF_STORE(sp->ts_state) 5737 sp->ts_state = STATE_FINAL; 5738 break; 5739 } 5740 5741 PROF_STORE(sp->ts_state) 5742 sp->ts_state = STATE_REP; 5743 /*FALLTHROUGH*/ 5744 5745 case STATE_REP: 5746 /* Try matching with REP items from the .aff file. For each match 5747 * replace the characters and check if the resulting word is 5748 * valid. */ 5749 p = fword + sp->ts_fidx; 5750 5751 if (soundfold) 5752 gap = &slang->sl_repsal; 5753 else 5754 gap = &lp->lp_replang->sl_rep; 5755 while (sp->ts_curi < gap->ga_len) 5756 { 5757 ftp = (fromto_T *)gap->ga_data + sp->ts_curi++; 5758 if (*ftp->ft_from != *p) 5759 { 5760 /* past possible matching entries */ 5761 sp->ts_curi = gap->ga_len; 5762 break; 5763 } 5764 if (STRNCMP(ftp->ft_from, p, STRLEN(ftp->ft_from)) == 0 5765 && TRY_DEEPER(su, stack, depth, SCORE_REP)) 5766 { 5767 go_deeper(stack, depth, SCORE_REP); 5768 #ifdef DEBUG_TRIEWALK 5769 sprintf(changename[depth], "%.*s-%s: replace %s with %s", 5770 sp->ts_twordlen, tword, fword + sp->ts_fidx, 5771 ftp->ft_from, ftp->ft_to); 5772 #endif 5773 /* Need to undo this afterwards. */ 5774 PROF_STORE(sp->ts_state) 5775 sp->ts_state = STATE_REP_UNDO; 5776 5777 /* Change the "from" to the "to" string. */ 5778 ++depth; 5779 fl = (int)STRLEN(ftp->ft_from); 5780 tl = (int)STRLEN(ftp->ft_to); 5781 if (fl != tl) 5782 { 5783 STRMOVE(p + tl, p + fl); 5784 repextra += tl - fl; 5785 } 5786 mch_memmove(p, ftp->ft_to, tl); 5787 stack[depth].ts_fidxtry = sp->ts_fidx + tl; 5788 #ifdef FEAT_MBYTE 5789 stack[depth].ts_tcharlen = 0; 5790 #endif 5791 break; 5792 } 5793 } 5794 5795 if (sp->ts_curi >= gap->ga_len && sp->ts_state == STATE_REP) 5796 { 5797 /* No (more) matches. */ 5798 PROF_STORE(sp->ts_state) 5799 sp->ts_state = STATE_FINAL; 5800 } 5801 5802 break; 5803 5804 case STATE_REP_UNDO: 5805 /* Undo a REP replacement and continue with the next one. */ 5806 if (soundfold) 5807 gap = &slang->sl_repsal; 5808 else 5809 gap = &lp->lp_replang->sl_rep; 5810 ftp = (fromto_T *)gap->ga_data + sp->ts_curi - 1; 5811 fl = (int)STRLEN(ftp->ft_from); 5812 tl = (int)STRLEN(ftp->ft_to); 5813 p = fword + sp->ts_fidx; 5814 if (fl != tl) 5815 { 5816 STRMOVE(p + fl, p + tl); 5817 repextra -= tl - fl; 5818 } 5819 mch_memmove(p, ftp->ft_from, fl); 5820 PROF_STORE(sp->ts_state) 5821 sp->ts_state = STATE_REP; 5822 break; 5823 5824 default: 5825 /* Did all possible states at this level, go up one level. */ 5826 --depth; 5827 5828 if (depth >= 0 && stack[depth].ts_prefixdepth == PFD_PREFIXTREE) 5829 { 5830 /* Continue in or go back to the prefix tree. */ 5831 byts = pbyts; 5832 idxs = pidxs; 5833 } 5834 5835 /* Don't check for CTRL-C too often, it takes time. */ 5836 if (--breakcheckcount == 0) 5837 { 5838 ui_breakcheck(); 5839 breakcheckcount = 1000; 5840 } 5841 } 5842 } 5843 } 5844 5845 5846 /* 5847 * Go one level deeper in the tree. 5848 */ 5849 static void 5850 go_deeper(trystate_T *stack, int depth, int score_add) 5851 { 5852 stack[depth + 1] = stack[depth]; 5853 stack[depth + 1].ts_state = STATE_START; 5854 stack[depth + 1].ts_score = stack[depth].ts_score + score_add; 5855 stack[depth + 1].ts_curi = 1; /* start just after length byte */ 5856 stack[depth + 1].ts_flags = 0; 5857 } 5858 5859 #ifdef FEAT_MBYTE 5860 /* 5861 * Case-folding may change the number of bytes: Count nr of chars in 5862 * fword[flen] and return the byte length of that many chars in "word". 5863 */ 5864 static int 5865 nofold_len(char_u *fword, int flen, char_u *word) 5866 { 5867 char_u *p; 5868 int i = 0; 5869 5870 for (p = fword; p < fword + flen; mb_ptr_adv(p)) 5871 ++i; 5872 for (p = word; i > 0; mb_ptr_adv(p)) 5873 --i; 5874 return (int)(p - word); 5875 } 5876 #endif 5877 5878 /* 5879 * "fword" is a good word with case folded. Find the matching keep-case 5880 * words and put it in "kword". 5881 * Theoretically there could be several keep-case words that result in the 5882 * same case-folded word, but we only find one... 5883 */ 5884 static void 5885 find_keepcap_word(slang_T *slang, char_u *fword, char_u *kword) 5886 { 5887 char_u uword[MAXWLEN]; /* "fword" in upper-case */ 5888 int depth; 5889 idx_T tryidx; 5890 5891 /* The following arrays are used at each depth in the tree. */ 5892 idx_T arridx[MAXWLEN]; 5893 int round[MAXWLEN]; 5894 int fwordidx[MAXWLEN]; 5895 int uwordidx[MAXWLEN]; 5896 int kwordlen[MAXWLEN]; 5897 5898 int flen, ulen; 5899 int l; 5900 int len; 5901 int c; 5902 idx_T lo, hi, m; 5903 char_u *p; 5904 char_u *byts = slang->sl_kbyts; /* array with bytes of the words */ 5905 idx_T *idxs = slang->sl_kidxs; /* array with indexes */ 5906 5907 if (byts == NULL) 5908 { 5909 /* array is empty: "cannot happen" */ 5910 *kword = NUL; 5911 return; 5912 } 5913 5914 /* Make an all-cap version of "fword". */ 5915 allcap_copy(fword, uword); 5916 5917 /* 5918 * Each character needs to be tried both case-folded and upper-case. 5919 * All this gets very complicated if we keep in mind that changing case 5920 * may change the byte length of a multi-byte character... 5921 */ 5922 depth = 0; 5923 arridx[0] = 0; 5924 round[0] = 0; 5925 fwordidx[0] = 0; 5926 uwordidx[0] = 0; 5927 kwordlen[0] = 0; 5928 while (depth >= 0) 5929 { 5930 if (fword[fwordidx[depth]] == NUL) 5931 { 5932 /* We are at the end of "fword". If the tree allows a word to end 5933 * here we have found a match. */ 5934 if (byts[arridx[depth] + 1] == 0) 5935 { 5936 kword[kwordlen[depth]] = NUL; 5937 return; 5938 } 5939 5940 /* kword is getting too long, continue one level up */ 5941 --depth; 5942 } 5943 else if (++round[depth] > 2) 5944 { 5945 /* tried both fold-case and upper-case character, continue one 5946 * level up */ 5947 --depth; 5948 } 5949 else 5950 { 5951 /* 5952 * round[depth] == 1: Try using the folded-case character. 5953 * round[depth] == 2: Try using the upper-case character. 5954 */ 5955 #ifdef FEAT_MBYTE 5956 if (has_mbyte) 5957 { 5958 flen = MB_CPTR2LEN(fword + fwordidx[depth]); 5959 ulen = MB_CPTR2LEN(uword + uwordidx[depth]); 5960 } 5961 else 5962 #endif 5963 ulen = flen = 1; 5964 if (round[depth] == 1) 5965 { 5966 p = fword + fwordidx[depth]; 5967 l = flen; 5968 } 5969 else 5970 { 5971 p = uword + uwordidx[depth]; 5972 l = ulen; 5973 } 5974 5975 for (tryidx = arridx[depth]; l > 0; --l) 5976 { 5977 /* Perform a binary search in the list of accepted bytes. */ 5978 len = byts[tryidx++]; 5979 c = *p++; 5980 lo = tryidx; 5981 hi = tryidx + len - 1; 5982 while (lo < hi) 5983 { 5984 m = (lo + hi) / 2; 5985 if (byts[m] > c) 5986 hi = m - 1; 5987 else if (byts[m] < c) 5988 lo = m + 1; 5989 else 5990 { 5991 lo = hi = m; 5992 break; 5993 } 5994 } 5995 5996 /* Stop if there is no matching byte. */ 5997 if (hi < lo || byts[lo] != c) 5998 break; 5999 6000 /* Continue at the child (if there is one). */ 6001 tryidx = idxs[lo]; 6002 } 6003 6004 if (l == 0) 6005 { 6006 /* 6007 * Found the matching char. Copy it to "kword" and go a 6008 * level deeper. 6009 */ 6010 if (round[depth] == 1) 6011 { 6012 STRNCPY(kword + kwordlen[depth], fword + fwordidx[depth], 6013 flen); 6014 kwordlen[depth + 1] = kwordlen[depth] + flen; 6015 } 6016 else 6017 { 6018 STRNCPY(kword + kwordlen[depth], uword + uwordidx[depth], 6019 ulen); 6020 kwordlen[depth + 1] = kwordlen[depth] + ulen; 6021 } 6022 fwordidx[depth + 1] = fwordidx[depth] + flen; 6023 uwordidx[depth + 1] = uwordidx[depth] + ulen; 6024 6025 ++depth; 6026 arridx[depth] = tryidx; 6027 round[depth] = 0; 6028 } 6029 } 6030 } 6031 6032 /* Didn't find it: "cannot happen". */ 6033 *kword = NUL; 6034 } 6035 6036 /* 6037 * Compute the sound-a-like score for suggestions in su->su_ga and add them to 6038 * su->su_sga. 6039 */ 6040 static void 6041 score_comp_sal(suginfo_T *su) 6042 { 6043 langp_T *lp; 6044 char_u badsound[MAXWLEN]; 6045 int i; 6046 suggest_T *stp; 6047 suggest_T *sstp; 6048 int score; 6049 int lpi; 6050 6051 if (ga_grow(&su->su_sga, su->su_ga.ga_len) == FAIL) 6052 return; 6053 6054 /* Use the sound-folding of the first language that supports it. */ 6055 for (lpi = 0; lpi < curwin->w_s->b_langp.ga_len; ++lpi) 6056 { 6057 lp = LANGP_ENTRY(curwin->w_s->b_langp, lpi); 6058 if (lp->lp_slang->sl_sal.ga_len > 0) 6059 { 6060 /* soundfold the bad word */ 6061 spell_soundfold(lp->lp_slang, su->su_fbadword, TRUE, badsound); 6062 6063 for (i = 0; i < su->su_ga.ga_len; ++i) 6064 { 6065 stp = &SUG(su->su_ga, i); 6066 6067 /* Case-fold the suggested word, sound-fold it and compute the 6068 * sound-a-like score. */ 6069 score = stp_sal_score(stp, su, lp->lp_slang, badsound); 6070 if (score < SCORE_MAXMAX) 6071 { 6072 /* Add the suggestion. */ 6073 sstp = &SUG(su->su_sga, su->su_sga.ga_len); 6074 sstp->st_word = vim_strsave(stp->st_word); 6075 if (sstp->st_word != NULL) 6076 { 6077 sstp->st_wordlen = stp->st_wordlen; 6078 sstp->st_score = score; 6079 sstp->st_altscore = 0; 6080 sstp->st_orglen = stp->st_orglen; 6081 ++su->su_sga.ga_len; 6082 } 6083 } 6084 } 6085 break; 6086 } 6087 } 6088 } 6089 6090 /* 6091 * Combine the list of suggestions in su->su_ga and su->su_sga. 6092 * They are entwined. 6093 */ 6094 static void 6095 score_combine(suginfo_T *su) 6096 { 6097 int i; 6098 int j; 6099 garray_T ga; 6100 garray_T *gap; 6101 langp_T *lp; 6102 suggest_T *stp; 6103 char_u *p; 6104 char_u badsound[MAXWLEN]; 6105 int round; 6106 int lpi; 6107 slang_T *slang = NULL; 6108 6109 /* Add the alternate score to su_ga. */ 6110 for (lpi = 0; lpi < curwin->w_s->b_langp.ga_len; ++lpi) 6111 { 6112 lp = LANGP_ENTRY(curwin->w_s->b_langp, lpi); 6113 if (lp->lp_slang->sl_sal.ga_len > 0) 6114 { 6115 /* soundfold the bad word */ 6116 slang = lp->lp_slang; 6117 spell_soundfold(slang, su->su_fbadword, TRUE, badsound); 6118 6119 for (i = 0; i < su->su_ga.ga_len; ++i) 6120 { 6121 stp = &SUG(su->su_ga, i); 6122 stp->st_altscore = stp_sal_score(stp, su, slang, badsound); 6123 if (stp->st_altscore == SCORE_MAXMAX) 6124 stp->st_score = (stp->st_score * 3 + SCORE_BIG) / 4; 6125 else 6126 stp->st_score = (stp->st_score * 3 6127 + stp->st_altscore) / 4; 6128 stp->st_salscore = FALSE; 6129 } 6130 break; 6131 } 6132 } 6133 6134 if (slang == NULL) /* Using "double" without sound folding. */ 6135 { 6136 (void)cleanup_suggestions(&su->su_ga, su->su_maxscore, 6137 su->su_maxcount); 6138 return; 6139 } 6140 6141 /* Add the alternate score to su_sga. */ 6142 for (i = 0; i < su->su_sga.ga_len; ++i) 6143 { 6144 stp = &SUG(su->su_sga, i); 6145 stp->st_altscore = spell_edit_score(slang, 6146 su->su_badword, stp->st_word); 6147 if (stp->st_score == SCORE_MAXMAX) 6148 stp->st_score = (SCORE_BIG * 7 + stp->st_altscore) / 8; 6149 else 6150 stp->st_score = (stp->st_score * 7 + stp->st_altscore) / 8; 6151 stp->st_salscore = TRUE; 6152 } 6153 6154 /* Remove bad suggestions, sort the suggestions and truncate at "maxcount" 6155 * for both lists. */ 6156 check_suggestions(su, &su->su_ga); 6157 (void)cleanup_suggestions(&su->su_ga, su->su_maxscore, su->su_maxcount); 6158 check_suggestions(su, &su->su_sga); 6159 (void)cleanup_suggestions(&su->su_sga, su->su_maxscore, su->su_maxcount); 6160 6161 ga_init2(&ga, (int)sizeof(suginfo_T), 1); 6162 if (ga_grow(&ga, su->su_ga.ga_len + su->su_sga.ga_len) == FAIL) 6163 return; 6164 6165 stp = &SUG(ga, 0); 6166 for (i = 0; i < su->su_ga.ga_len || i < su->su_sga.ga_len; ++i) 6167 { 6168 /* round 1: get a suggestion from su_ga 6169 * round 2: get a suggestion from su_sga */ 6170 for (round = 1; round <= 2; ++round) 6171 { 6172 gap = round == 1 ? &su->su_ga : &su->su_sga; 6173 if (i < gap->ga_len) 6174 { 6175 /* Don't add a word if it's already there. */ 6176 p = SUG(*gap, i).st_word; 6177 for (j = 0; j < ga.ga_len; ++j) 6178 if (STRCMP(stp[j].st_word, p) == 0) 6179 break; 6180 if (j == ga.ga_len) 6181 stp[ga.ga_len++] = SUG(*gap, i); 6182 else 6183 vim_free(p); 6184 } 6185 } 6186 } 6187 6188 ga_clear(&su->su_ga); 6189 ga_clear(&su->su_sga); 6190 6191 /* Truncate the list to the number of suggestions that will be displayed. */ 6192 if (ga.ga_len > su->su_maxcount) 6193 { 6194 for (i = su->su_maxcount; i < ga.ga_len; ++i) 6195 vim_free(stp[i].st_word); 6196 ga.ga_len = su->su_maxcount; 6197 } 6198 6199 su->su_ga = ga; 6200 } 6201 6202 /* 6203 * For the goodword in "stp" compute the soundalike score compared to the 6204 * badword. 6205 */ 6206 static int 6207 stp_sal_score( 6208 suggest_T *stp, 6209 suginfo_T *su, 6210 slang_T *slang, 6211 char_u *badsound) /* sound-folded badword */ 6212 { 6213 char_u *p; 6214 char_u *pbad; 6215 char_u *pgood; 6216 char_u badsound2[MAXWLEN]; 6217 char_u fword[MAXWLEN]; 6218 char_u goodsound[MAXWLEN]; 6219 char_u goodword[MAXWLEN]; 6220 int lendiff; 6221 6222 lendiff = (int)(su->su_badlen - stp->st_orglen); 6223 if (lendiff >= 0) 6224 pbad = badsound; 6225 else 6226 { 6227 /* soundfold the bad word with more characters following */ 6228 (void)spell_casefold(su->su_badptr, stp->st_orglen, fword, MAXWLEN); 6229 6230 /* When joining two words the sound often changes a lot. E.g., "t he" 6231 * sounds like "t h" while "the" sounds like "@". Avoid that by 6232 * removing the space. Don't do it when the good word also contains a 6233 * space. */ 6234 if (vim_iswhite(su->su_badptr[su->su_badlen]) 6235 && *skiptowhite(stp->st_word) == NUL) 6236 for (p = fword; *(p = skiptowhite(p)) != NUL; ) 6237 STRMOVE(p, p + 1); 6238 6239 spell_soundfold(slang, fword, TRUE, badsound2); 6240 pbad = badsound2; 6241 } 6242 6243 if (lendiff > 0 && stp->st_wordlen + lendiff < MAXWLEN) 6244 { 6245 /* Add part of the bad word to the good word, so that we soundfold 6246 * what replaces the bad word. */ 6247 STRCPY(goodword, stp->st_word); 6248 vim_strncpy(goodword + stp->st_wordlen, 6249 su->su_badptr + su->su_badlen - lendiff, lendiff); 6250 pgood = goodword; 6251 } 6252 else 6253 pgood = stp->st_word; 6254 6255 /* Sound-fold the word and compute the score for the difference. */ 6256 spell_soundfold(slang, pgood, FALSE, goodsound); 6257 6258 return soundalike_score(goodsound, pbad); 6259 } 6260 6261 /* structure used to store soundfolded words that add_sound_suggest() has 6262 * handled already. */ 6263 typedef struct 6264 { 6265 short sft_score; /* lowest score used */ 6266 char_u sft_word[1]; /* soundfolded word, actually longer */ 6267 } sftword_T; 6268 6269 static sftword_T dumsft; 6270 #define HIKEY2SFT(p) ((sftword_T *)(p - (dumsft.sft_word - (char_u *)&dumsft))) 6271 #define HI2SFT(hi) HIKEY2SFT((hi)->hi_key) 6272 6273 /* 6274 * Prepare for calling suggest_try_soundalike(). 6275 */ 6276 static void 6277 suggest_try_soundalike_prep(void) 6278 { 6279 langp_T *lp; 6280 int lpi; 6281 slang_T *slang; 6282 6283 /* Do this for all languages that support sound folding and for which a 6284 * .sug file has been loaded. */ 6285 for (lpi = 0; lpi < curwin->w_s->b_langp.ga_len; ++lpi) 6286 { 6287 lp = LANGP_ENTRY(curwin->w_s->b_langp, lpi); 6288 slang = lp->lp_slang; 6289 if (slang->sl_sal.ga_len > 0 && slang->sl_sbyts != NULL) 6290 /* prepare the hashtable used by add_sound_suggest() */ 6291 hash_init(&slang->sl_sounddone); 6292 } 6293 } 6294 6295 /* 6296 * Find suggestions by comparing the word in a sound-a-like form. 6297 * Note: This doesn't support postponed prefixes. 6298 */ 6299 static void 6300 suggest_try_soundalike(suginfo_T *su) 6301 { 6302 char_u salword[MAXWLEN]; 6303 langp_T *lp; 6304 int lpi; 6305 slang_T *slang; 6306 6307 /* Do this for all languages that support sound folding and for which a 6308 * .sug file has been loaded. */ 6309 for (lpi = 0; lpi < curwin->w_s->b_langp.ga_len; ++lpi) 6310 { 6311 lp = LANGP_ENTRY(curwin->w_s->b_langp, lpi); 6312 slang = lp->lp_slang; 6313 if (slang->sl_sal.ga_len > 0 && slang->sl_sbyts != NULL) 6314 { 6315 /* soundfold the bad word */ 6316 spell_soundfold(slang, su->su_fbadword, TRUE, salword); 6317 6318 /* try all kinds of inserts/deletes/swaps/etc. */ 6319 /* TODO: also soundfold the next words, so that we can try joining 6320 * and splitting */ 6321 #ifdef SUGGEST_PROFILE 6322 prof_init(); 6323 #endif 6324 suggest_trie_walk(su, lp, salword, TRUE); 6325 #ifdef SUGGEST_PROFILE 6326 prof_report("soundalike"); 6327 #endif 6328 } 6329 } 6330 } 6331 6332 /* 6333 * Finish up after calling suggest_try_soundalike(). 6334 */ 6335 static void 6336 suggest_try_soundalike_finish(void) 6337 { 6338 langp_T *lp; 6339 int lpi; 6340 slang_T *slang; 6341 int todo; 6342 hashitem_T *hi; 6343 6344 /* Do this for all languages that support sound folding and for which a 6345 * .sug file has been loaded. */ 6346 for (lpi = 0; lpi < curwin->w_s->b_langp.ga_len; ++lpi) 6347 { 6348 lp = LANGP_ENTRY(curwin->w_s->b_langp, lpi); 6349 slang = lp->lp_slang; 6350 if (slang->sl_sal.ga_len > 0 && slang->sl_sbyts != NULL) 6351 { 6352 /* Free the info about handled words. */ 6353 todo = (int)slang->sl_sounddone.ht_used; 6354 for (hi = slang->sl_sounddone.ht_array; todo > 0; ++hi) 6355 if (!HASHITEM_EMPTY(hi)) 6356 { 6357 vim_free(HI2SFT(hi)); 6358 --todo; 6359 } 6360 6361 /* Clear the hashtable, it may also be used by another region. */ 6362 hash_clear(&slang->sl_sounddone); 6363 hash_init(&slang->sl_sounddone); 6364 } 6365 } 6366 } 6367 6368 /* 6369 * A match with a soundfolded word is found. Add the good word(s) that 6370 * produce this soundfolded word. 6371 */ 6372 static void 6373 add_sound_suggest( 6374 suginfo_T *su, 6375 char_u *goodword, 6376 int score, /* soundfold score */ 6377 langp_T *lp) 6378 { 6379 slang_T *slang = lp->lp_slang; /* language for sound folding */ 6380 int sfwordnr; 6381 char_u *nrline; 6382 int orgnr; 6383 char_u theword[MAXWLEN]; 6384 int i; 6385 int wlen; 6386 char_u *byts; 6387 idx_T *idxs; 6388 int n; 6389 int wordcount; 6390 int wc; 6391 int goodscore; 6392 hash_T hash; 6393 hashitem_T *hi; 6394 sftword_T *sft; 6395 int bc, gc; 6396 int limit; 6397 6398 /* 6399 * It's very well possible that the same soundfold word is found several 6400 * times with different scores. Since the following is quite slow only do 6401 * the words that have a better score than before. Use a hashtable to 6402 * remember the words that have been done. 6403 */ 6404 hash = hash_hash(goodword); 6405 hi = hash_lookup(&slang->sl_sounddone, goodword, hash); 6406 if (HASHITEM_EMPTY(hi)) 6407 { 6408 sft = (sftword_T *)alloc((unsigned)(sizeof(sftword_T) 6409 + STRLEN(goodword))); 6410 if (sft != NULL) 6411 { 6412 sft->sft_score = score; 6413 STRCPY(sft->sft_word, goodword); 6414 hash_add_item(&slang->sl_sounddone, hi, sft->sft_word, hash); 6415 } 6416 } 6417 else 6418 { 6419 sft = HI2SFT(hi); 6420 if (score >= sft->sft_score) 6421 return; 6422 sft->sft_score = score; 6423 } 6424 6425 /* 6426 * Find the word nr in the soundfold tree. 6427 */ 6428 sfwordnr = soundfold_find(slang, goodword); 6429 if (sfwordnr < 0) 6430 { 6431 internal_error("add_sound_suggest()"); 6432 return; 6433 } 6434 6435 /* 6436 * go over the list of good words that produce this soundfold word 6437 */ 6438 nrline = ml_get_buf(slang->sl_sugbuf, (linenr_T)(sfwordnr + 1), FALSE); 6439 orgnr = 0; 6440 while (*nrline != NUL) 6441 { 6442 /* The wordnr was stored in a minimal nr of bytes as an offset to the 6443 * previous wordnr. */ 6444 orgnr += bytes2offset(&nrline); 6445 6446 byts = slang->sl_fbyts; 6447 idxs = slang->sl_fidxs; 6448 6449 /* Lookup the word "orgnr" one of the two tries. */ 6450 n = 0; 6451 wordcount = 0; 6452 for (wlen = 0; wlen < MAXWLEN - 3; ++wlen) 6453 { 6454 i = 1; 6455 if (wordcount == orgnr && byts[n + 1] == NUL) 6456 break; /* found end of word */ 6457 6458 if (byts[n + 1] == NUL) 6459 ++wordcount; 6460 6461 /* skip over the NUL bytes */ 6462 for ( ; byts[n + i] == NUL; ++i) 6463 if (i > byts[n]) /* safety check */ 6464 { 6465 STRCPY(theword + wlen, "BAD"); 6466 wlen += 3; 6467 goto badword; 6468 } 6469 6470 /* One of the siblings must have the word. */ 6471 for ( ; i < byts[n]; ++i) 6472 { 6473 wc = idxs[idxs[n + i]]; /* nr of words under this byte */ 6474 if (wordcount + wc > orgnr) 6475 break; 6476 wordcount += wc; 6477 } 6478 6479 theword[wlen] = byts[n + i]; 6480 n = idxs[n + i]; 6481 } 6482 badword: 6483 theword[wlen] = NUL; 6484 6485 /* Go over the possible flags and regions. */ 6486 for (; i <= byts[n] && byts[n + i] == NUL; ++i) 6487 { 6488 char_u cword[MAXWLEN]; 6489 char_u *p; 6490 int flags = (int)idxs[n + i]; 6491 6492 /* Skip words with the NOSUGGEST flag */ 6493 if (flags & WF_NOSUGGEST) 6494 continue; 6495 6496 if (flags & WF_KEEPCAP) 6497 { 6498 /* Must find the word in the keep-case tree. */ 6499 find_keepcap_word(slang, theword, cword); 6500 p = cword; 6501 } 6502 else 6503 { 6504 flags |= su->su_badflags; 6505 if ((flags & WF_CAPMASK) != 0) 6506 { 6507 /* Need to fix case according to "flags". */ 6508 make_case_word(theword, cword, flags); 6509 p = cword; 6510 } 6511 else 6512 p = theword; 6513 } 6514 6515 /* Add the suggestion. */ 6516 if (sps_flags & SPS_DOUBLE) 6517 { 6518 /* Add the suggestion if the score isn't too bad. */ 6519 if (score <= su->su_maxscore) 6520 add_suggestion(su, &su->su_sga, p, su->su_badlen, 6521 score, 0, FALSE, slang, FALSE); 6522 } 6523 else 6524 { 6525 /* Add a penalty for words in another region. */ 6526 if ((flags & WF_REGION) 6527 && (((unsigned)flags >> 16) & lp->lp_region) == 0) 6528 goodscore = SCORE_REGION; 6529 else 6530 goodscore = 0; 6531 6532 /* Add a small penalty for changing the first letter from 6533 * lower to upper case. Helps for "tath" -> "Kath", which is 6534 * less common than "tath" -> "path". Don't do it when the 6535 * letter is the same, that has already been counted. */ 6536 gc = PTR2CHAR(p); 6537 if (SPELL_ISUPPER(gc)) 6538 { 6539 bc = PTR2CHAR(su->su_badword); 6540 if (!SPELL_ISUPPER(bc) 6541 && SPELL_TOFOLD(bc) != SPELL_TOFOLD(gc)) 6542 goodscore += SCORE_ICASE / 2; 6543 } 6544 6545 /* Compute the score for the good word. This only does letter 6546 * insert/delete/swap/replace. REP items are not considered, 6547 * which may make the score a bit higher. 6548 * Use a limit for the score to make it work faster. Use 6549 * MAXSCORE(), because RESCORE() will change the score. 6550 * If the limit is very high then the iterative method is 6551 * inefficient, using an array is quicker. */ 6552 limit = MAXSCORE(su->su_sfmaxscore - goodscore, score); 6553 if (limit > SCORE_LIMITMAX) 6554 goodscore += spell_edit_score(slang, su->su_badword, p); 6555 else 6556 goodscore += spell_edit_score_limit(slang, su->su_badword, 6557 p, limit); 6558 6559 /* When going over the limit don't bother to do the rest. */ 6560 if (goodscore < SCORE_MAXMAX) 6561 { 6562 /* Give a bonus to words seen before. */ 6563 goodscore = score_wordcount_adj(slang, goodscore, p, FALSE); 6564 6565 /* Add the suggestion if the score isn't too bad. */ 6566 goodscore = RESCORE(goodscore, score); 6567 if (goodscore <= su->su_sfmaxscore) 6568 add_suggestion(su, &su->su_ga, p, su->su_badlen, 6569 goodscore, score, TRUE, slang, TRUE); 6570 } 6571 } 6572 } 6573 /* smsg("word %s (%d): %s (%d)", sftword, sftnr, theword, orgnr); */ 6574 } 6575 } 6576 6577 /* 6578 * Find word "word" in fold-case tree for "slang" and return the word number. 6579 */ 6580 static int 6581 soundfold_find(slang_T *slang, char_u *word) 6582 { 6583 idx_T arridx = 0; 6584 int len; 6585 int wlen = 0; 6586 int c; 6587 char_u *ptr = word; 6588 char_u *byts; 6589 idx_T *idxs; 6590 int wordnr = 0; 6591 6592 byts = slang->sl_sbyts; 6593 idxs = slang->sl_sidxs; 6594 6595 for (;;) 6596 { 6597 /* First byte is the number of possible bytes. */ 6598 len = byts[arridx++]; 6599 6600 /* If the first possible byte is a zero the word could end here. 6601 * If the word ends we found the word. If not skip the NUL bytes. */ 6602 c = ptr[wlen]; 6603 if (byts[arridx] == NUL) 6604 { 6605 if (c == NUL) 6606 break; 6607 6608 /* Skip over the zeros, there can be several. */ 6609 while (len > 0 && byts[arridx] == NUL) 6610 { 6611 ++arridx; 6612 --len; 6613 } 6614 if (len == 0) 6615 return -1; /* no children, word should have ended here */ 6616 ++wordnr; 6617 } 6618 6619 /* If the word ends we didn't find it. */ 6620 if (c == NUL) 6621 return -1; 6622 6623 /* Perform a binary search in the list of accepted bytes. */ 6624 if (c == TAB) /* <Tab> is handled like <Space> */ 6625 c = ' '; 6626 while (byts[arridx] < c) 6627 { 6628 /* The word count is in the first idxs[] entry of the child. */ 6629 wordnr += idxs[idxs[arridx]]; 6630 ++arridx; 6631 if (--len == 0) /* end of the bytes, didn't find it */ 6632 return -1; 6633 } 6634 if (byts[arridx] != c) /* didn't find the byte */ 6635 return -1; 6636 6637 /* Continue at the child (if there is one). */ 6638 arridx = idxs[arridx]; 6639 ++wlen; 6640 6641 /* One space in the good word may stand for several spaces in the 6642 * checked word. */ 6643 if (c == ' ') 6644 while (ptr[wlen] == ' ' || ptr[wlen] == TAB) 6645 ++wlen; 6646 } 6647 6648 return wordnr; 6649 } 6650 6651 /* 6652 * Copy "fword" to "cword", fixing case according to "flags". 6653 */ 6654 static void 6655 make_case_word(char_u *fword, char_u *cword, int flags) 6656 { 6657 if (flags & WF_ALLCAP) 6658 /* Make it all upper-case */ 6659 allcap_copy(fword, cword); 6660 else if (flags & WF_ONECAP) 6661 /* Make the first letter upper-case */ 6662 onecap_copy(fword, cword, TRUE); 6663 else 6664 /* Use goodword as-is. */ 6665 STRCPY(cword, fword); 6666 } 6667 6668 6669 /* 6670 * Return TRUE if "c1" and "c2" are similar characters according to the MAP 6671 * lines in the .aff file. 6672 */ 6673 static int 6674 similar_chars(slang_T *slang, int c1, int c2) 6675 { 6676 int m1, m2; 6677 #ifdef FEAT_MBYTE 6678 char_u buf[MB_MAXBYTES + 1]; 6679 hashitem_T *hi; 6680 6681 if (c1 >= 256) 6682 { 6683 buf[mb_char2bytes(c1, buf)] = 0; 6684 hi = hash_find(&slang->sl_map_hash, buf); 6685 if (HASHITEM_EMPTY(hi)) 6686 m1 = 0; 6687 else 6688 m1 = mb_ptr2char(hi->hi_key + STRLEN(hi->hi_key) + 1); 6689 } 6690 else 6691 #endif 6692 m1 = slang->sl_map_array[c1]; 6693 if (m1 == 0) 6694 return FALSE; 6695 6696 6697 #ifdef FEAT_MBYTE 6698 if (c2 >= 256) 6699 { 6700 buf[mb_char2bytes(c2, buf)] = 0; 6701 hi = hash_find(&slang->sl_map_hash, buf); 6702 if (HASHITEM_EMPTY(hi)) 6703 m2 = 0; 6704 else 6705 m2 = mb_ptr2char(hi->hi_key + STRLEN(hi->hi_key) + 1); 6706 } 6707 else 6708 #endif 6709 m2 = slang->sl_map_array[c2]; 6710 6711 return m1 == m2; 6712 } 6713 6714 /* 6715 * Add a suggestion to the list of suggestions. 6716 * For a suggestion that is already in the list the lowest score is remembered. 6717 */ 6718 static void 6719 add_suggestion( 6720 suginfo_T *su, 6721 garray_T *gap, /* either su_ga or su_sga */ 6722 char_u *goodword, 6723 int badlenarg, /* len of bad word replaced with "goodword" */ 6724 int score, 6725 int altscore, 6726 int had_bonus, /* value for st_had_bonus */ 6727 slang_T *slang, /* language for sound folding */ 6728 int maxsf) /* su_maxscore applies to soundfold score, 6729 su_sfmaxscore to the total score. */ 6730 { 6731 int goodlen; /* len of goodword changed */ 6732 int badlen; /* len of bad word changed */ 6733 suggest_T *stp; 6734 suggest_T new_sug; 6735 int i; 6736 char_u *pgood, *pbad; 6737 6738 /* Minimize "badlen" for consistency. Avoids that changing "the the" to 6739 * "thee the" is added next to changing the first "the" the "thee". */ 6740 pgood = goodword + STRLEN(goodword); 6741 pbad = su->su_badptr + badlenarg; 6742 for (;;) 6743 { 6744 goodlen = (int)(pgood - goodword); 6745 badlen = (int)(pbad - su->su_badptr); 6746 if (goodlen <= 0 || badlen <= 0) 6747 break; 6748 mb_ptr_back(goodword, pgood); 6749 mb_ptr_back(su->su_badptr, pbad); 6750 #ifdef FEAT_MBYTE 6751 if (has_mbyte) 6752 { 6753 if (mb_ptr2char(pgood) != mb_ptr2char(pbad)) 6754 break; 6755 } 6756 else 6757 #endif 6758 if (*pgood != *pbad) 6759 break; 6760 } 6761 6762 if (badlen == 0 && goodlen == 0) 6763 /* goodword doesn't change anything; may happen for "the the" changing 6764 * the first "the" to itself. */ 6765 return; 6766 6767 if (gap->ga_len == 0) 6768 i = -1; 6769 else 6770 { 6771 /* Check if the word is already there. Also check the length that is 6772 * being replaced "thes," -> "these" is a different suggestion from 6773 * "thes" -> "these". */ 6774 stp = &SUG(*gap, 0); 6775 for (i = gap->ga_len; --i >= 0; ++stp) 6776 if (stp->st_wordlen == goodlen 6777 && stp->st_orglen == badlen 6778 && STRNCMP(stp->st_word, goodword, goodlen) == 0) 6779 { 6780 /* 6781 * Found it. Remember the word with the lowest score. 6782 */ 6783 if (stp->st_slang == NULL) 6784 stp->st_slang = slang; 6785 6786 new_sug.st_score = score; 6787 new_sug.st_altscore = altscore; 6788 new_sug.st_had_bonus = had_bonus; 6789 6790 if (stp->st_had_bonus != had_bonus) 6791 { 6792 /* Only one of the two had the soundalike score computed. 6793 * Need to do that for the other one now, otherwise the 6794 * scores can't be compared. This happens because 6795 * suggest_try_change() doesn't compute the soundalike 6796 * word to keep it fast, while some special methods set 6797 * the soundalike score to zero. */ 6798 if (had_bonus) 6799 rescore_one(su, stp); 6800 else 6801 { 6802 new_sug.st_word = stp->st_word; 6803 new_sug.st_wordlen = stp->st_wordlen; 6804 new_sug.st_slang = stp->st_slang; 6805 new_sug.st_orglen = badlen; 6806 rescore_one(su, &new_sug); 6807 } 6808 } 6809 6810 if (stp->st_score > new_sug.st_score) 6811 { 6812 stp->st_score = new_sug.st_score; 6813 stp->st_altscore = new_sug.st_altscore; 6814 stp->st_had_bonus = new_sug.st_had_bonus; 6815 } 6816 break; 6817 } 6818 } 6819 6820 if (i < 0 && ga_grow(gap, 1) == OK) 6821 { 6822 /* Add a suggestion. */ 6823 stp = &SUG(*gap, gap->ga_len); 6824 stp->st_word = vim_strnsave(goodword, goodlen); 6825 if (stp->st_word != NULL) 6826 { 6827 stp->st_wordlen = goodlen; 6828 stp->st_score = score; 6829 stp->st_altscore = altscore; 6830 stp->st_had_bonus = had_bonus; 6831 stp->st_orglen = badlen; 6832 stp->st_slang = slang; 6833 ++gap->ga_len; 6834 6835 /* If we have too many suggestions now, sort the list and keep 6836 * the best suggestions. */ 6837 if (gap->ga_len > SUG_MAX_COUNT(su)) 6838 { 6839 if (maxsf) 6840 su->su_sfmaxscore = cleanup_suggestions(gap, 6841 su->su_sfmaxscore, SUG_CLEAN_COUNT(su)); 6842 else 6843 su->su_maxscore = cleanup_suggestions(gap, 6844 su->su_maxscore, SUG_CLEAN_COUNT(su)); 6845 } 6846 } 6847 } 6848 } 6849 6850 /* 6851 * Suggestions may in fact be flagged as errors. Esp. for banned words and 6852 * for split words, such as "the the". Remove these from the list here. 6853 */ 6854 static void 6855 check_suggestions( 6856 suginfo_T *su, 6857 garray_T *gap) /* either su_ga or su_sga */ 6858 { 6859 suggest_T *stp; 6860 int i; 6861 char_u longword[MAXWLEN + 1]; 6862 int len; 6863 hlf_T attr; 6864 6865 stp = &SUG(*gap, 0); 6866 for (i = gap->ga_len - 1; i >= 0; --i) 6867 { 6868 /* Need to append what follows to check for "the the". */ 6869 vim_strncpy(longword, stp[i].st_word, MAXWLEN); 6870 len = stp[i].st_wordlen; 6871 vim_strncpy(longword + len, su->su_badptr + stp[i].st_orglen, 6872 MAXWLEN - len); 6873 attr = HLF_COUNT; 6874 (void)spell_check(curwin, longword, &attr, NULL, FALSE); 6875 if (attr != HLF_COUNT) 6876 { 6877 /* Remove this entry. */ 6878 vim_free(stp[i].st_word); 6879 --gap->ga_len; 6880 if (i < gap->ga_len) 6881 mch_memmove(stp + i, stp + i + 1, 6882 sizeof(suggest_T) * (gap->ga_len - i)); 6883 } 6884 } 6885 } 6886 6887 6888 /* 6889 * Add a word to be banned. 6890 */ 6891 static void 6892 add_banned( 6893 suginfo_T *su, 6894 char_u *word) 6895 { 6896 char_u *s; 6897 hash_T hash; 6898 hashitem_T *hi; 6899 6900 hash = hash_hash(word); 6901 hi = hash_lookup(&su->su_banned, word, hash); 6902 if (HASHITEM_EMPTY(hi)) 6903 { 6904 s = vim_strsave(word); 6905 if (s != NULL) 6906 hash_add_item(&su->su_banned, hi, s, hash); 6907 } 6908 } 6909 6910 /* 6911 * Recompute the score for all suggestions if sound-folding is possible. This 6912 * is slow, thus only done for the final results. 6913 */ 6914 static void 6915 rescore_suggestions(suginfo_T *su) 6916 { 6917 int i; 6918 6919 if (su->su_sallang != NULL) 6920 for (i = 0; i < su->su_ga.ga_len; ++i) 6921 rescore_one(su, &SUG(su->su_ga, i)); 6922 } 6923 6924 /* 6925 * Recompute the score for one suggestion if sound-folding is possible. 6926 */ 6927 static void 6928 rescore_one(suginfo_T *su, suggest_T *stp) 6929 { 6930 slang_T *slang = stp->st_slang; 6931 char_u sal_badword[MAXWLEN]; 6932 char_u *p; 6933 6934 /* Only rescore suggestions that have no sal score yet and do have a 6935 * language. */ 6936 if (slang != NULL && slang->sl_sal.ga_len > 0 && !stp->st_had_bonus) 6937 { 6938 if (slang == su->su_sallang) 6939 p = su->su_sal_badword; 6940 else 6941 { 6942 spell_soundfold(slang, su->su_fbadword, TRUE, sal_badword); 6943 p = sal_badword; 6944 } 6945 6946 stp->st_altscore = stp_sal_score(stp, su, slang, p); 6947 if (stp->st_altscore == SCORE_MAXMAX) 6948 stp->st_altscore = SCORE_BIG; 6949 stp->st_score = RESCORE(stp->st_score, stp->st_altscore); 6950 stp->st_had_bonus = TRUE; 6951 } 6952 } 6953 6954 static int 6955 #ifdef __BORLANDC__ 6956 _RTLENTRYF 6957 #endif 6958 sug_compare(const void *s1, const void *s2); 6959 6960 /* 6961 * Function given to qsort() to sort the suggestions on st_score. 6962 * First on "st_score", then "st_altscore" then alphabetically. 6963 */ 6964 static int 6965 #ifdef __BORLANDC__ 6966 _RTLENTRYF 6967 #endif 6968 sug_compare(const void *s1, const void *s2) 6969 { 6970 suggest_T *p1 = (suggest_T *)s1; 6971 suggest_T *p2 = (suggest_T *)s2; 6972 int n = p1->st_score - p2->st_score; 6973 6974 if (n == 0) 6975 { 6976 n = p1->st_altscore - p2->st_altscore; 6977 if (n == 0) 6978 n = STRICMP(p1->st_word, p2->st_word); 6979 } 6980 return n; 6981 } 6982 6983 /* 6984 * Cleanup the suggestions: 6985 * - Sort on score. 6986 * - Remove words that won't be displayed. 6987 * Returns the maximum score in the list or "maxscore" unmodified. 6988 */ 6989 static int 6990 cleanup_suggestions( 6991 garray_T *gap, 6992 int maxscore, 6993 int keep) /* nr of suggestions to keep */ 6994 { 6995 suggest_T *stp = &SUG(*gap, 0); 6996 int i; 6997 6998 /* Sort the list. */ 6999 qsort(gap->ga_data, (size_t)gap->ga_len, sizeof(suggest_T), sug_compare); 7000 7001 /* Truncate the list to the number of suggestions that will be displayed. */ 7002 if (gap->ga_len > keep) 7003 { 7004 for (i = keep; i < gap->ga_len; ++i) 7005 vim_free(stp[i].st_word); 7006 gap->ga_len = keep; 7007 return stp[keep - 1].st_score; 7008 } 7009 return maxscore; 7010 } 7011 7012 #if defined(FEAT_EVAL) || defined(PROTO) 7013 /* 7014 * Soundfold a string, for soundfold(). 7015 * Result is in allocated memory, NULL for an error. 7016 */ 7017 char_u * 7018 eval_soundfold(char_u *word) 7019 { 7020 langp_T *lp; 7021 char_u sound[MAXWLEN]; 7022 int lpi; 7023 7024 if (curwin->w_p_spell && *curwin->w_s->b_p_spl != NUL) 7025 /* Use the sound-folding of the first language that supports it. */ 7026 for (lpi = 0; lpi < curwin->w_s->b_langp.ga_len; ++lpi) 7027 { 7028 lp = LANGP_ENTRY(curwin->w_s->b_langp, lpi); 7029 if (lp->lp_slang->sl_sal.ga_len > 0) 7030 { 7031 /* soundfold the word */ 7032 spell_soundfold(lp->lp_slang, word, FALSE, sound); 7033 return vim_strsave(sound); 7034 } 7035 } 7036 7037 /* No language with sound folding, return word as-is. */ 7038 return vim_strsave(word); 7039 } 7040 #endif 7041 7042 /* 7043 * Turn "inword" into its sound-a-like equivalent in "res[MAXWLEN]". 7044 * 7045 * There are many ways to turn a word into a sound-a-like representation. The 7046 * oldest is Soundex (1918!). A nice overview can be found in "Approximate 7047 * swedish name matching - survey and test of different algorithms" by Klas 7048 * Erikson. 7049 * 7050 * We support two methods: 7051 * 1. SOFOFROM/SOFOTO do a simple character mapping. 7052 * 2. SAL items define a more advanced sound-folding (and much slower). 7053 */ 7054 void 7055 spell_soundfold( 7056 slang_T *slang, 7057 char_u *inword, 7058 int folded, /* "inword" is already case-folded */ 7059 char_u *res) 7060 { 7061 char_u fword[MAXWLEN]; 7062 char_u *word; 7063 7064 if (slang->sl_sofo) 7065 /* SOFOFROM and SOFOTO used */ 7066 spell_soundfold_sofo(slang, inword, res); 7067 else 7068 { 7069 /* SAL items used. Requires the word to be case-folded. */ 7070 if (folded) 7071 word = inword; 7072 else 7073 { 7074 (void)spell_casefold(inword, (int)STRLEN(inword), fword, MAXWLEN); 7075 word = fword; 7076 } 7077 7078 #ifdef FEAT_MBYTE 7079 if (has_mbyte) 7080 spell_soundfold_wsal(slang, word, res); 7081 else 7082 #endif 7083 spell_soundfold_sal(slang, word, res); 7084 } 7085 } 7086 7087 /* 7088 * Perform sound folding of "inword" into "res" according to SOFOFROM and 7089 * SOFOTO lines. 7090 */ 7091 static void 7092 spell_soundfold_sofo(slang_T *slang, char_u *inword, char_u *res) 7093 { 7094 char_u *s; 7095 int ri = 0; 7096 int c; 7097 7098 #ifdef FEAT_MBYTE 7099 if (has_mbyte) 7100 { 7101 int prevc = 0; 7102 int *ip; 7103 7104 /* The sl_sal_first[] table contains the translation for chars up to 7105 * 255, sl_sal the rest. */ 7106 for (s = inword; *s != NUL; ) 7107 { 7108 c = mb_cptr2char_adv(&s); 7109 if (enc_utf8 ? utf_class(c) == 0 : vim_iswhite(c)) 7110 c = ' '; 7111 else if (c < 256) 7112 c = slang->sl_sal_first[c]; 7113 else 7114 { 7115 ip = ((int **)slang->sl_sal.ga_data)[c & 0xff]; 7116 if (ip == NULL) /* empty list, can't match */ 7117 c = NUL; 7118 else 7119 for (;;) /* find "c" in the list */ 7120 { 7121 if (*ip == 0) /* not found */ 7122 { 7123 c = NUL; 7124 break; 7125 } 7126 if (*ip == c) /* match! */ 7127 { 7128 c = ip[1]; 7129 break; 7130 } 7131 ip += 2; 7132 } 7133 } 7134 7135 if (c != NUL && c != prevc) 7136 { 7137 ri += mb_char2bytes(c, res + ri); 7138 if (ri + MB_MAXBYTES > MAXWLEN) 7139 break; 7140 prevc = c; 7141 } 7142 } 7143 } 7144 else 7145 #endif 7146 { 7147 /* The sl_sal_first[] table contains the translation. */ 7148 for (s = inword; (c = *s) != NUL; ++s) 7149 { 7150 if (vim_iswhite(c)) 7151 c = ' '; 7152 else 7153 c = slang->sl_sal_first[c]; 7154 if (c != NUL && (ri == 0 || res[ri - 1] != c)) 7155 res[ri++] = c; 7156 } 7157 } 7158 7159 res[ri] = NUL; 7160 } 7161 7162 static void 7163 spell_soundfold_sal(slang_T *slang, char_u *inword, char_u *res) 7164 { 7165 salitem_T *smp; 7166 char_u word[MAXWLEN]; 7167 char_u *s = inword; 7168 char_u *t; 7169 char_u *pf; 7170 int i, j, z; 7171 int reslen; 7172 int n, k = 0; 7173 int z0; 7174 int k0; 7175 int n0; 7176 int c; 7177 int pri; 7178 int p0 = -333; 7179 int c0; 7180 7181 /* Remove accents, if wanted. We actually remove all non-word characters. 7182 * But keep white space. We need a copy, the word may be changed here. */ 7183 if (slang->sl_rem_accents) 7184 { 7185 t = word; 7186 while (*s != NUL) 7187 { 7188 if (vim_iswhite(*s)) 7189 { 7190 *t++ = ' '; 7191 s = skipwhite(s); 7192 } 7193 else 7194 { 7195 if (spell_iswordp_nmw(s, curwin)) 7196 *t++ = *s; 7197 ++s; 7198 } 7199 } 7200 *t = NUL; 7201 } 7202 else 7203 vim_strncpy(word, s, MAXWLEN - 1); 7204 7205 smp = (salitem_T *)slang->sl_sal.ga_data; 7206 7207 /* 7208 * This comes from Aspell phonet.cpp. Converted from C++ to C. 7209 * Changed to keep spaces. 7210 */ 7211 i = reslen = z = 0; 7212 while ((c = word[i]) != NUL) 7213 { 7214 /* Start with the first rule that has the character in the word. */ 7215 n = slang->sl_sal_first[c]; 7216 z0 = 0; 7217 7218 if (n >= 0) 7219 { 7220 /* check all rules for the same letter */ 7221 for (; (s = smp[n].sm_lead)[0] == c; ++n) 7222 { 7223 /* Quickly skip entries that don't match the word. Most 7224 * entries are less then three chars, optimize for that. */ 7225 k = smp[n].sm_leadlen; 7226 if (k > 1) 7227 { 7228 if (word[i + 1] != s[1]) 7229 continue; 7230 if (k > 2) 7231 { 7232 for (j = 2; j < k; ++j) 7233 if (word[i + j] != s[j]) 7234 break; 7235 if (j < k) 7236 continue; 7237 } 7238 } 7239 7240 if ((pf = smp[n].sm_oneof) != NULL) 7241 { 7242 /* Check for match with one of the chars in "sm_oneof". */ 7243 while (*pf != NUL && *pf != word[i + k]) 7244 ++pf; 7245 if (*pf == NUL) 7246 continue; 7247 ++k; 7248 } 7249 s = smp[n].sm_rules; 7250 pri = 5; /* default priority */ 7251 7252 p0 = *s; 7253 k0 = k; 7254 while (*s == '-' && k > 1) 7255 { 7256 k--; 7257 s++; 7258 } 7259 if (*s == '<') 7260 s++; 7261 if (VIM_ISDIGIT(*s)) 7262 { 7263 /* determine priority */ 7264 pri = *s - '0'; 7265 s++; 7266 } 7267 if (*s == '^' && *(s + 1) == '^') 7268 s++; 7269 7270 if (*s == NUL 7271 || (*s == '^' 7272 && (i == 0 || !(word[i - 1] == ' ' 7273 || spell_iswordp(word + i - 1, curwin))) 7274 && (*(s + 1) != '$' 7275 || (!spell_iswordp(word + i + k0, curwin)))) 7276 || (*s == '$' && i > 0 7277 && spell_iswordp(word + i - 1, curwin) 7278 && (!spell_iswordp(word + i + k0, curwin)))) 7279 { 7280 /* search for followup rules, if: */ 7281 /* followup and k > 1 and NO '-' in searchstring */ 7282 c0 = word[i + k - 1]; 7283 n0 = slang->sl_sal_first[c0]; 7284 7285 if (slang->sl_followup && k > 1 && n0 >= 0 7286 && p0 != '-' && word[i + k] != NUL) 7287 { 7288 /* test follow-up rule for "word[i + k]" */ 7289 for ( ; (s = smp[n0].sm_lead)[0] == c0; ++n0) 7290 { 7291 /* Quickly skip entries that don't match the word. 7292 * */ 7293 k0 = smp[n0].sm_leadlen; 7294 if (k0 > 1) 7295 { 7296 if (word[i + k] != s[1]) 7297 continue; 7298 if (k0 > 2) 7299 { 7300 pf = word + i + k + 1; 7301 for (j = 2; j < k0; ++j) 7302 if (*pf++ != s[j]) 7303 break; 7304 if (j < k0) 7305 continue; 7306 } 7307 } 7308 k0 += k - 1; 7309 7310 if ((pf = smp[n0].sm_oneof) != NULL) 7311 { 7312 /* Check for match with one of the chars in 7313 * "sm_oneof". */ 7314 while (*pf != NUL && *pf != word[i + k0]) 7315 ++pf; 7316 if (*pf == NUL) 7317 continue; 7318 ++k0; 7319 } 7320 7321 p0 = 5; 7322 s = smp[n0].sm_rules; 7323 while (*s == '-') 7324 { 7325 /* "k0" gets NOT reduced because 7326 * "if (k0 == k)" */ 7327 s++; 7328 } 7329 if (*s == '<') 7330 s++; 7331 if (VIM_ISDIGIT(*s)) 7332 { 7333 p0 = *s - '0'; 7334 s++; 7335 } 7336 7337 if (*s == NUL 7338 /* *s == '^' cuts */ 7339 || (*s == '$' 7340 && !spell_iswordp(word + i + k0, 7341 curwin))) 7342 { 7343 if (k0 == k) 7344 /* this is just a piece of the string */ 7345 continue; 7346 7347 if (p0 < pri) 7348 /* priority too low */ 7349 continue; 7350 /* rule fits; stop search */ 7351 break; 7352 } 7353 } 7354 7355 if (p0 >= pri && smp[n0].sm_lead[0] == c0) 7356 continue; 7357 } 7358 7359 /* replace string */ 7360 s = smp[n].sm_to; 7361 if (s == NULL) 7362 s = (char_u *)""; 7363 pf = smp[n].sm_rules; 7364 p0 = (vim_strchr(pf, '<') != NULL) ? 1 : 0; 7365 if (p0 == 1 && z == 0) 7366 { 7367 /* rule with '<' is used */ 7368 if (reslen > 0 && *s != NUL && (res[reslen - 1] == c 7369 || res[reslen - 1] == *s)) 7370 reslen--; 7371 z0 = 1; 7372 z = 1; 7373 k0 = 0; 7374 while (*s != NUL && word[i + k0] != NUL) 7375 { 7376 word[i + k0] = *s; 7377 k0++; 7378 s++; 7379 } 7380 if (k > k0) 7381 STRMOVE(word + i + k0, word + i + k); 7382 7383 /* new "actual letter" */ 7384 c = word[i]; 7385 } 7386 else 7387 { 7388 /* no '<' rule used */ 7389 i += k - 1; 7390 z = 0; 7391 while (*s != NUL && s[1] != NUL && reslen < MAXWLEN) 7392 { 7393 if (reslen == 0 || res[reslen - 1] != *s) 7394 res[reslen++] = *s; 7395 s++; 7396 } 7397 /* new "actual letter" */ 7398 c = *s; 7399 if (strstr((char *)pf, "^^") != NULL) 7400 { 7401 if (c != NUL) 7402 res[reslen++] = c; 7403 STRMOVE(word, word + i + 1); 7404 i = 0; 7405 z0 = 1; 7406 } 7407 } 7408 break; 7409 } 7410 } 7411 } 7412 else if (vim_iswhite(c)) 7413 { 7414 c = ' '; 7415 k = 1; 7416 } 7417 7418 if (z0 == 0) 7419 { 7420 if (k && !p0 && reslen < MAXWLEN && c != NUL 7421 && (!slang->sl_collapse || reslen == 0 7422 || res[reslen - 1] != c)) 7423 /* condense only double letters */ 7424 res[reslen++] = c; 7425 7426 i++; 7427 z = 0; 7428 k = 0; 7429 } 7430 } 7431 7432 res[reslen] = NUL; 7433 } 7434 7435 #ifdef FEAT_MBYTE 7436 /* 7437 * Turn "inword" into its sound-a-like equivalent in "res[MAXWLEN]". 7438 * Multi-byte version of spell_soundfold(). 7439 */ 7440 static void 7441 spell_soundfold_wsal(slang_T *slang, char_u *inword, char_u *res) 7442 { 7443 salitem_T *smp = (salitem_T *)slang->sl_sal.ga_data; 7444 int word[MAXWLEN]; 7445 int wres[MAXWLEN]; 7446 int l; 7447 char_u *s; 7448 int *ws; 7449 char_u *t; 7450 int *pf; 7451 int i, j, z; 7452 int reslen; 7453 int n, k = 0; 7454 int z0; 7455 int k0; 7456 int n0; 7457 int c; 7458 int pri; 7459 int p0 = -333; 7460 int c0; 7461 int did_white = FALSE; 7462 int wordlen; 7463 7464 7465 /* 7466 * Convert the multi-byte string to a wide-character string. 7467 * Remove accents, if wanted. We actually remove all non-word characters. 7468 * But keep white space. 7469 */ 7470 wordlen = 0; 7471 for (s = inword; *s != NUL; ) 7472 { 7473 t = s; 7474 c = mb_cptr2char_adv(&s); 7475 if (slang->sl_rem_accents) 7476 { 7477 if (enc_utf8 ? utf_class(c) == 0 : vim_iswhite(c)) 7478 { 7479 if (did_white) 7480 continue; 7481 c = ' '; 7482 did_white = TRUE; 7483 } 7484 else 7485 { 7486 did_white = FALSE; 7487 if (!spell_iswordp_nmw(t, curwin)) 7488 continue; 7489 } 7490 } 7491 word[wordlen++] = c; 7492 } 7493 word[wordlen] = NUL; 7494 7495 /* 7496 * This algorithm comes from Aspell phonet.cpp. 7497 * Converted from C++ to C. Added support for multi-byte chars. 7498 * Changed to keep spaces. 7499 */ 7500 i = reslen = z = 0; 7501 while ((c = word[i]) != NUL) 7502 { 7503 /* Start with the first rule that has the character in the word. */ 7504 n = slang->sl_sal_first[c & 0xff]; 7505 z0 = 0; 7506 7507 if (n >= 0) 7508 { 7509 /* Check all rules for the same index byte. 7510 * If c is 0x300 need extra check for the end of the array, as 7511 * (c & 0xff) is NUL. */ 7512 for (; ((ws = smp[n].sm_lead_w)[0] & 0xff) == (c & 0xff) 7513 && ws[0] != NUL; ++n) 7514 { 7515 /* Quickly skip entries that don't match the word. Most 7516 * entries are less then three chars, optimize for that. */ 7517 if (c != ws[0]) 7518 continue; 7519 k = smp[n].sm_leadlen; 7520 if (k > 1) 7521 { 7522 if (word[i + 1] != ws[1]) 7523 continue; 7524 if (k > 2) 7525 { 7526 for (j = 2; j < k; ++j) 7527 if (word[i + j] != ws[j]) 7528 break; 7529 if (j < k) 7530 continue; 7531 } 7532 } 7533 7534 if ((pf = smp[n].sm_oneof_w) != NULL) 7535 { 7536 /* Check for match with one of the chars in "sm_oneof". */ 7537 while (*pf != NUL && *pf != word[i + k]) 7538 ++pf; 7539 if (*pf == NUL) 7540 continue; 7541 ++k; 7542 } 7543 s = smp[n].sm_rules; 7544 pri = 5; /* default priority */ 7545 7546 p0 = *s; 7547 k0 = k; 7548 while (*s == '-' && k > 1) 7549 { 7550 k--; 7551 s++; 7552 } 7553 if (*s == '<') 7554 s++; 7555 if (VIM_ISDIGIT(*s)) 7556 { 7557 /* determine priority */ 7558 pri = *s - '0'; 7559 s++; 7560 } 7561 if (*s == '^' && *(s + 1) == '^') 7562 s++; 7563 7564 if (*s == NUL 7565 || (*s == '^' 7566 && (i == 0 || !(word[i - 1] == ' ' 7567 || spell_iswordp_w(word + i - 1, curwin))) 7568 && (*(s + 1) != '$' 7569 || (!spell_iswordp_w(word + i + k0, curwin)))) 7570 || (*s == '$' && i > 0 7571 && spell_iswordp_w(word + i - 1, curwin) 7572 && (!spell_iswordp_w(word + i + k0, curwin)))) 7573 { 7574 /* search for followup rules, if: */ 7575 /* followup and k > 1 and NO '-' in searchstring */ 7576 c0 = word[i + k - 1]; 7577 n0 = slang->sl_sal_first[c0 & 0xff]; 7578 7579 if (slang->sl_followup && k > 1 && n0 >= 0 7580 && p0 != '-' && word[i + k] != NUL) 7581 { 7582 /* Test follow-up rule for "word[i + k]"; loop over 7583 * all entries with the same index byte. */ 7584 for ( ; ((ws = smp[n0].sm_lead_w)[0] & 0xff) 7585 == (c0 & 0xff); ++n0) 7586 { 7587 /* Quickly skip entries that don't match the word. 7588 */ 7589 if (c0 != ws[0]) 7590 continue; 7591 k0 = smp[n0].sm_leadlen; 7592 if (k0 > 1) 7593 { 7594 if (word[i + k] != ws[1]) 7595 continue; 7596 if (k0 > 2) 7597 { 7598 pf = word + i + k + 1; 7599 for (j = 2; j < k0; ++j) 7600 if (*pf++ != ws[j]) 7601 break; 7602 if (j < k0) 7603 continue; 7604 } 7605 } 7606 k0 += k - 1; 7607 7608 if ((pf = smp[n0].sm_oneof_w) != NULL) 7609 { 7610 /* Check for match with one of the chars in 7611 * "sm_oneof". */ 7612 while (*pf != NUL && *pf != word[i + k0]) 7613 ++pf; 7614 if (*pf == NUL) 7615 continue; 7616 ++k0; 7617 } 7618 7619 p0 = 5; 7620 s = smp[n0].sm_rules; 7621 while (*s == '-') 7622 { 7623 /* "k0" gets NOT reduced because 7624 * "if (k0 == k)" */ 7625 s++; 7626 } 7627 if (*s == '<') 7628 s++; 7629 if (VIM_ISDIGIT(*s)) 7630 { 7631 p0 = *s - '0'; 7632 s++; 7633 } 7634 7635 if (*s == NUL 7636 /* *s == '^' cuts */ 7637 || (*s == '$' 7638 && !spell_iswordp_w(word + i + k0, 7639 curwin))) 7640 { 7641 if (k0 == k) 7642 /* this is just a piece of the string */ 7643 continue; 7644 7645 if (p0 < pri) 7646 /* priority too low */ 7647 continue; 7648 /* rule fits; stop search */ 7649 break; 7650 } 7651 } 7652 7653 if (p0 >= pri && (smp[n0].sm_lead_w[0] & 0xff) 7654 == (c0 & 0xff)) 7655 continue; 7656 } 7657 7658 /* replace string */ 7659 ws = smp[n].sm_to_w; 7660 s = smp[n].sm_rules; 7661 p0 = (vim_strchr(s, '<') != NULL) ? 1 : 0; 7662 if (p0 == 1 && z == 0) 7663 { 7664 /* rule with '<' is used */ 7665 if (reslen > 0 && ws != NULL && *ws != NUL 7666 && (wres[reslen - 1] == c 7667 || wres[reslen - 1] == *ws)) 7668 reslen--; 7669 z0 = 1; 7670 z = 1; 7671 k0 = 0; 7672 if (ws != NULL) 7673 while (*ws != NUL && word[i + k0] != NUL) 7674 { 7675 word[i + k0] = *ws; 7676 k0++; 7677 ws++; 7678 } 7679 if (k > k0) 7680 mch_memmove(word + i + k0, word + i + k, 7681 sizeof(int) * (wordlen - (i + k) + 1)); 7682 7683 /* new "actual letter" */ 7684 c = word[i]; 7685 } 7686 else 7687 { 7688 /* no '<' rule used */ 7689 i += k - 1; 7690 z = 0; 7691 if (ws != NULL) 7692 while (*ws != NUL && ws[1] != NUL 7693 && reslen < MAXWLEN) 7694 { 7695 if (reslen == 0 || wres[reslen - 1] != *ws) 7696 wres[reslen++] = *ws; 7697 ws++; 7698 } 7699 /* new "actual letter" */ 7700 if (ws == NULL) 7701 c = NUL; 7702 else 7703 c = *ws; 7704 if (strstr((char *)s, "^^") != NULL) 7705 { 7706 if (c != NUL) 7707 wres[reslen++] = c; 7708 mch_memmove(word, word + i + 1, 7709 sizeof(int) * (wordlen - (i + 1) + 1)); 7710 i = 0; 7711 z0 = 1; 7712 } 7713 } 7714 break; 7715 } 7716 } 7717 } 7718 else if (vim_iswhite(c)) 7719 { 7720 c = ' '; 7721 k = 1; 7722 } 7723 7724 if (z0 == 0) 7725 { 7726 if (k && !p0 && reslen < MAXWLEN && c != NUL 7727 && (!slang->sl_collapse || reslen == 0 7728 || wres[reslen - 1] != c)) 7729 /* condense only double letters */ 7730 wres[reslen++] = c; 7731 7732 i++; 7733 z = 0; 7734 k = 0; 7735 } 7736 } 7737 7738 /* Convert wide characters in "wres" to a multi-byte string in "res". */ 7739 l = 0; 7740 for (n = 0; n < reslen; ++n) 7741 { 7742 l += mb_char2bytes(wres[n], res + l); 7743 if (l + MB_MAXBYTES > MAXWLEN) 7744 break; 7745 } 7746 res[l] = NUL; 7747 } 7748 #endif 7749 7750 /* 7751 * Compute a score for two sound-a-like words. 7752 * This permits up to two inserts/deletes/swaps/etc. to keep things fast. 7753 * Instead of a generic loop we write out the code. That keeps it fast by 7754 * avoiding checks that will not be possible. 7755 */ 7756 static int 7757 soundalike_score( 7758 char_u *goodstart, /* sound-folded good word */ 7759 char_u *badstart) /* sound-folded bad word */ 7760 { 7761 char_u *goodsound = goodstart; 7762 char_u *badsound = badstart; 7763 int goodlen; 7764 int badlen; 7765 int n; 7766 char_u *pl, *ps; 7767 char_u *pl2, *ps2; 7768 int score = 0; 7769 7770 /* Adding/inserting "*" at the start (word starts with vowel) shouldn't be 7771 * counted so much, vowels halfway the word aren't counted at all. */ 7772 if ((*badsound == '*' || *goodsound == '*') && *badsound != *goodsound) 7773 { 7774 if ((badsound[0] == NUL && goodsound[1] == NUL) 7775 || (goodsound[0] == NUL && badsound[1] == NUL)) 7776 /* changing word with vowel to word without a sound */ 7777 return SCORE_DEL; 7778 if (badsound[0] == NUL || goodsound[0] == NUL) 7779 /* more than two changes */ 7780 return SCORE_MAXMAX; 7781 7782 if (badsound[1] == goodsound[1] 7783 || (badsound[1] != NUL 7784 && goodsound[1] != NUL 7785 && badsound[2] == goodsound[2])) 7786 { 7787 /* handle like a substitute */ 7788 } 7789 else 7790 { 7791 score = 2 * SCORE_DEL / 3; 7792 if (*badsound == '*') 7793 ++badsound; 7794 else 7795 ++goodsound; 7796 } 7797 } 7798 7799 goodlen = (int)STRLEN(goodsound); 7800 badlen = (int)STRLEN(badsound); 7801 7802 /* Return quickly if the lengths are too different to be fixed by two 7803 * changes. */ 7804 n = goodlen - badlen; 7805 if (n < -2 || n > 2) 7806 return SCORE_MAXMAX; 7807 7808 if (n > 0) 7809 { 7810 pl = goodsound; /* goodsound is longest */ 7811 ps = badsound; 7812 } 7813 else 7814 { 7815 pl = badsound; /* badsound is longest */ 7816 ps = goodsound; 7817 } 7818 7819 /* Skip over the identical part. */ 7820 while (*pl == *ps && *pl != NUL) 7821 { 7822 ++pl; 7823 ++ps; 7824 } 7825 7826 switch (n) 7827 { 7828 case -2: 7829 case 2: 7830 /* 7831 * Must delete two characters from "pl". 7832 */ 7833 ++pl; /* first delete */ 7834 while (*pl == *ps) 7835 { 7836 ++pl; 7837 ++ps; 7838 } 7839 /* strings must be equal after second delete */ 7840 if (STRCMP(pl + 1, ps) == 0) 7841 return score + SCORE_DEL * 2; 7842 7843 /* Failed to compare. */ 7844 break; 7845 7846 case -1: 7847 case 1: 7848 /* 7849 * Minimal one delete from "pl" required. 7850 */ 7851 7852 /* 1: delete */ 7853 pl2 = pl + 1; 7854 ps2 = ps; 7855 while (*pl2 == *ps2) 7856 { 7857 if (*pl2 == NUL) /* reached the end */ 7858 return score + SCORE_DEL; 7859 ++pl2; 7860 ++ps2; 7861 } 7862 7863 /* 2: delete then swap, then rest must be equal */ 7864 if (pl2[0] == ps2[1] && pl2[1] == ps2[0] 7865 && STRCMP(pl2 + 2, ps2 + 2) == 0) 7866 return score + SCORE_DEL + SCORE_SWAP; 7867 7868 /* 3: delete then substitute, then the rest must be equal */ 7869 if (STRCMP(pl2 + 1, ps2 + 1) == 0) 7870 return score + SCORE_DEL + SCORE_SUBST; 7871 7872 /* 4: first swap then delete */ 7873 if (pl[0] == ps[1] && pl[1] == ps[0]) 7874 { 7875 pl2 = pl + 2; /* swap, skip two chars */ 7876 ps2 = ps + 2; 7877 while (*pl2 == *ps2) 7878 { 7879 ++pl2; 7880 ++ps2; 7881 } 7882 /* delete a char and then strings must be equal */ 7883 if (STRCMP(pl2 + 1, ps2) == 0) 7884 return score + SCORE_SWAP + SCORE_DEL; 7885 } 7886 7887 /* 5: first substitute then delete */ 7888 pl2 = pl + 1; /* substitute, skip one char */ 7889 ps2 = ps + 1; 7890 while (*pl2 == *ps2) 7891 { 7892 ++pl2; 7893 ++ps2; 7894 } 7895 /* delete a char and then strings must be equal */ 7896 if (STRCMP(pl2 + 1, ps2) == 0) 7897 return score + SCORE_SUBST + SCORE_DEL; 7898 7899 /* Failed to compare. */ 7900 break; 7901 7902 case 0: 7903 /* 7904 * Lengths are equal, thus changes must result in same length: An 7905 * insert is only possible in combination with a delete. 7906 * 1: check if for identical strings 7907 */ 7908 if (*pl == NUL) 7909 return score; 7910 7911 /* 2: swap */ 7912 if (pl[0] == ps[1] && pl[1] == ps[0]) 7913 { 7914 pl2 = pl + 2; /* swap, skip two chars */ 7915 ps2 = ps + 2; 7916 while (*pl2 == *ps2) 7917 { 7918 if (*pl2 == NUL) /* reached the end */ 7919 return score + SCORE_SWAP; 7920 ++pl2; 7921 ++ps2; 7922 } 7923 /* 3: swap and swap again */ 7924 if (pl2[0] == ps2[1] && pl2[1] == ps2[0] 7925 && STRCMP(pl2 + 2, ps2 + 2) == 0) 7926 return score + SCORE_SWAP + SCORE_SWAP; 7927 7928 /* 4: swap and substitute */ 7929 if (STRCMP(pl2 + 1, ps2 + 1) == 0) 7930 return score + SCORE_SWAP + SCORE_SUBST; 7931 } 7932 7933 /* 5: substitute */ 7934 pl2 = pl + 1; 7935 ps2 = ps + 1; 7936 while (*pl2 == *ps2) 7937 { 7938 if (*pl2 == NUL) /* reached the end */ 7939 return score + SCORE_SUBST; 7940 ++pl2; 7941 ++ps2; 7942 } 7943 7944 /* 6: substitute and swap */ 7945 if (pl2[0] == ps2[1] && pl2[1] == ps2[0] 7946 && STRCMP(pl2 + 2, ps2 + 2) == 0) 7947 return score + SCORE_SUBST + SCORE_SWAP; 7948 7949 /* 7: substitute and substitute */ 7950 if (STRCMP(pl2 + 1, ps2 + 1) == 0) 7951 return score + SCORE_SUBST + SCORE_SUBST; 7952 7953 /* 8: insert then delete */ 7954 pl2 = pl; 7955 ps2 = ps + 1; 7956 while (*pl2 == *ps2) 7957 { 7958 ++pl2; 7959 ++ps2; 7960 } 7961 if (STRCMP(pl2 + 1, ps2) == 0) 7962 return score + SCORE_INS + SCORE_DEL; 7963 7964 /* 9: delete then insert */ 7965 pl2 = pl + 1; 7966 ps2 = ps; 7967 while (*pl2 == *ps2) 7968 { 7969 ++pl2; 7970 ++ps2; 7971 } 7972 if (STRCMP(pl2, ps2 + 1) == 0) 7973 return score + SCORE_INS + SCORE_DEL; 7974 7975 /* Failed to compare. */ 7976 break; 7977 } 7978 7979 return SCORE_MAXMAX; 7980 } 7981 7982 /* 7983 * Compute the "edit distance" to turn "badword" into "goodword". The less 7984 * deletes/inserts/substitutes/swaps are required the lower the score. 7985 * 7986 * The algorithm is described by Du and Chang, 1992. 7987 * The implementation of the algorithm comes from Aspell editdist.cpp, 7988 * edit_distance(). It has been converted from C++ to C and modified to 7989 * support multi-byte characters. 7990 */ 7991 static int 7992 spell_edit_score( 7993 slang_T *slang, 7994 char_u *badword, 7995 char_u *goodword) 7996 { 7997 int *cnt; 7998 int badlen, goodlen; /* lengths including NUL */ 7999 int j, i; 8000 int t; 8001 int bc, gc; 8002 int pbc, pgc; 8003 #ifdef FEAT_MBYTE 8004 char_u *p; 8005 int wbadword[MAXWLEN]; 8006 int wgoodword[MAXWLEN]; 8007 8008 if (has_mbyte) 8009 { 8010 /* Get the characters from the multi-byte strings and put them in an 8011 * int array for easy access. */ 8012 for (p = badword, badlen = 0; *p != NUL; ) 8013 wbadword[badlen++] = mb_cptr2char_adv(&p); 8014 wbadword[badlen++] = 0; 8015 for (p = goodword, goodlen = 0; *p != NUL; ) 8016 wgoodword[goodlen++] = mb_cptr2char_adv(&p); 8017 wgoodword[goodlen++] = 0; 8018 } 8019 else 8020 #endif 8021 { 8022 badlen = (int)STRLEN(badword) + 1; 8023 goodlen = (int)STRLEN(goodword) + 1; 8024 } 8025 8026 /* We use "cnt" as an array: CNT(badword_idx, goodword_idx). */ 8027 #define CNT(a, b) cnt[(a) + (b) * (badlen + 1)] 8028 cnt = (int *)lalloc((long_u)(sizeof(int) * (badlen + 1) * (goodlen + 1)), 8029 TRUE); 8030 if (cnt == NULL) 8031 return 0; /* out of memory */ 8032 8033 CNT(0, 0) = 0; 8034 for (j = 1; j <= goodlen; ++j) 8035 CNT(0, j) = CNT(0, j - 1) + SCORE_INS; 8036 8037 for (i = 1; i <= badlen; ++i) 8038 { 8039 CNT(i, 0) = CNT(i - 1, 0) + SCORE_DEL; 8040 for (j = 1; j <= goodlen; ++j) 8041 { 8042 #ifdef FEAT_MBYTE 8043 if (has_mbyte) 8044 { 8045 bc = wbadword[i - 1]; 8046 gc = wgoodword[j - 1]; 8047 } 8048 else 8049 #endif 8050 { 8051 bc = badword[i - 1]; 8052 gc = goodword[j - 1]; 8053 } 8054 if (bc == gc) 8055 CNT(i, j) = CNT(i - 1, j - 1); 8056 else 8057 { 8058 /* Use a better score when there is only a case difference. */ 8059 if (SPELL_TOFOLD(bc) == SPELL_TOFOLD(gc)) 8060 CNT(i, j) = SCORE_ICASE + CNT(i - 1, j - 1); 8061 else 8062 { 8063 /* For a similar character use SCORE_SIMILAR. */ 8064 if (slang != NULL 8065 && slang->sl_has_map 8066 && similar_chars(slang, gc, bc)) 8067 CNT(i, j) = SCORE_SIMILAR + CNT(i - 1, j - 1); 8068 else 8069 CNT(i, j) = SCORE_SUBST + CNT(i - 1, j - 1); 8070 } 8071 8072 if (i > 1 && j > 1) 8073 { 8074 #ifdef FEAT_MBYTE 8075 if (has_mbyte) 8076 { 8077 pbc = wbadword[i - 2]; 8078 pgc = wgoodword[j - 2]; 8079 } 8080 else 8081 #endif 8082 { 8083 pbc = badword[i - 2]; 8084 pgc = goodword[j - 2]; 8085 } 8086 if (bc == pgc && pbc == gc) 8087 { 8088 t = SCORE_SWAP + CNT(i - 2, j - 2); 8089 if (t < CNT(i, j)) 8090 CNT(i, j) = t; 8091 } 8092 } 8093 t = SCORE_DEL + CNT(i - 1, j); 8094 if (t < CNT(i, j)) 8095 CNT(i, j) = t; 8096 t = SCORE_INS + CNT(i, j - 1); 8097 if (t < CNT(i, j)) 8098 CNT(i, j) = t; 8099 } 8100 } 8101 } 8102 8103 i = CNT(badlen - 1, goodlen - 1); 8104 vim_free(cnt); 8105 return i; 8106 } 8107 8108 typedef struct 8109 { 8110 int badi; 8111 int goodi; 8112 int score; 8113 } limitscore_T; 8114 8115 /* 8116 * Like spell_edit_score(), but with a limit on the score to make it faster. 8117 * May return SCORE_MAXMAX when the score is higher than "limit". 8118 * 8119 * This uses a stack for the edits still to be tried. 8120 * The idea comes from Aspell leditdist.cpp. Rewritten in C and added support 8121 * for multi-byte characters. 8122 */ 8123 static int 8124 spell_edit_score_limit( 8125 slang_T *slang, 8126 char_u *badword, 8127 char_u *goodword, 8128 int limit) 8129 { 8130 limitscore_T stack[10]; /* allow for over 3 * 2 edits */ 8131 int stackidx; 8132 int bi, gi; 8133 int bi2, gi2; 8134 int bc, gc; 8135 int score; 8136 int score_off; 8137 int minscore; 8138 int round; 8139 8140 #ifdef FEAT_MBYTE 8141 /* Multi-byte characters require a bit more work, use a different function 8142 * to avoid testing "has_mbyte" quite often. */ 8143 if (has_mbyte) 8144 return spell_edit_score_limit_w(slang, badword, goodword, limit); 8145 #endif 8146 8147 /* 8148 * The idea is to go from start to end over the words. So long as 8149 * characters are equal just continue, this always gives the lowest score. 8150 * When there is a difference try several alternatives. Each alternative 8151 * increases "score" for the edit distance. Some of the alternatives are 8152 * pushed unto a stack and tried later, some are tried right away. At the 8153 * end of the word the score for one alternative is known. The lowest 8154 * possible score is stored in "minscore". 8155 */ 8156 stackidx = 0; 8157 bi = 0; 8158 gi = 0; 8159 score = 0; 8160 minscore = limit + 1; 8161 8162 for (;;) 8163 { 8164 /* Skip over an equal part, score remains the same. */ 8165 for (;;) 8166 { 8167 bc = badword[bi]; 8168 gc = goodword[gi]; 8169 if (bc != gc) /* stop at a char that's different */ 8170 break; 8171 if (bc == NUL) /* both words end */ 8172 { 8173 if (score < minscore) 8174 minscore = score; 8175 goto pop; /* do next alternative */ 8176 } 8177 ++bi; 8178 ++gi; 8179 } 8180 8181 if (gc == NUL) /* goodword ends, delete badword chars */ 8182 { 8183 do 8184 { 8185 if ((score += SCORE_DEL) >= minscore) 8186 goto pop; /* do next alternative */ 8187 } while (badword[++bi] != NUL); 8188 minscore = score; 8189 } 8190 else if (bc == NUL) /* badword ends, insert badword chars */ 8191 { 8192 do 8193 { 8194 if ((score += SCORE_INS) >= minscore) 8195 goto pop; /* do next alternative */ 8196 } while (goodword[++gi] != NUL); 8197 minscore = score; 8198 } 8199 else /* both words continue */ 8200 { 8201 /* If not close to the limit, perform a change. Only try changes 8202 * that may lead to a lower score than "minscore". 8203 * round 0: try deleting a char from badword 8204 * round 1: try inserting a char in badword */ 8205 for (round = 0; round <= 1; ++round) 8206 { 8207 score_off = score + (round == 0 ? SCORE_DEL : SCORE_INS); 8208 if (score_off < minscore) 8209 { 8210 if (score_off + SCORE_EDIT_MIN >= minscore) 8211 { 8212 /* Near the limit, rest of the words must match. We 8213 * can check that right now, no need to push an item 8214 * onto the stack. */ 8215 bi2 = bi + 1 - round; 8216 gi2 = gi + round; 8217 while (goodword[gi2] == badword[bi2]) 8218 { 8219 if (goodword[gi2] == NUL) 8220 { 8221 minscore = score_off; 8222 break; 8223 } 8224 ++bi2; 8225 ++gi2; 8226 } 8227 } 8228 else 8229 { 8230 /* try deleting/inserting a character later */ 8231 stack[stackidx].badi = bi + 1 - round; 8232 stack[stackidx].goodi = gi + round; 8233 stack[stackidx].score = score_off; 8234 ++stackidx; 8235 } 8236 } 8237 } 8238 8239 if (score + SCORE_SWAP < minscore) 8240 { 8241 /* If swapping two characters makes a match then the 8242 * substitution is more expensive, thus there is no need to 8243 * try both. */ 8244 if (gc == badword[bi + 1] && bc == goodword[gi + 1]) 8245 { 8246 /* Swap two characters, that is: skip them. */ 8247 gi += 2; 8248 bi += 2; 8249 score += SCORE_SWAP; 8250 continue; 8251 } 8252 } 8253 8254 /* Substitute one character for another which is the same 8255 * thing as deleting a character from both goodword and badword. 8256 * Use a better score when there is only a case difference. */ 8257 if (SPELL_TOFOLD(bc) == SPELL_TOFOLD(gc)) 8258 score += SCORE_ICASE; 8259 else 8260 { 8261 /* For a similar character use SCORE_SIMILAR. */ 8262 if (slang != NULL 8263 && slang->sl_has_map 8264 && similar_chars(slang, gc, bc)) 8265 score += SCORE_SIMILAR; 8266 else 8267 score += SCORE_SUBST; 8268 } 8269 8270 if (score < minscore) 8271 { 8272 /* Do the substitution. */ 8273 ++gi; 8274 ++bi; 8275 continue; 8276 } 8277 } 8278 pop: 8279 /* 8280 * Get here to try the next alternative, pop it from the stack. 8281 */ 8282 if (stackidx == 0) /* stack is empty, finished */ 8283 break; 8284 8285 /* pop an item from the stack */ 8286 --stackidx; 8287 gi = stack[stackidx].goodi; 8288 bi = stack[stackidx].badi; 8289 score = stack[stackidx].score; 8290 } 8291 8292 /* When the score goes over "limit" it may actually be much higher. 8293 * Return a very large number to avoid going below the limit when giving a 8294 * bonus. */ 8295 if (minscore > limit) 8296 return SCORE_MAXMAX; 8297 return minscore; 8298 } 8299 8300 #ifdef FEAT_MBYTE 8301 /* 8302 * Multi-byte version of spell_edit_score_limit(). 8303 * Keep it in sync with the above! 8304 */ 8305 static int 8306 spell_edit_score_limit_w( 8307 slang_T *slang, 8308 char_u *badword, 8309 char_u *goodword, 8310 int limit) 8311 { 8312 limitscore_T stack[10]; /* allow for over 3 * 2 edits */ 8313 int stackidx; 8314 int bi, gi; 8315 int bi2, gi2; 8316 int bc, gc; 8317 int score; 8318 int score_off; 8319 int minscore; 8320 int round; 8321 char_u *p; 8322 int wbadword[MAXWLEN]; 8323 int wgoodword[MAXWLEN]; 8324 8325 /* Get the characters from the multi-byte strings and put them in an 8326 * int array for easy access. */ 8327 bi = 0; 8328 for (p = badword; *p != NUL; ) 8329 wbadword[bi++] = mb_cptr2char_adv(&p); 8330 wbadword[bi++] = 0; 8331 gi = 0; 8332 for (p = goodword; *p != NUL; ) 8333 wgoodword[gi++] = mb_cptr2char_adv(&p); 8334 wgoodword[gi++] = 0; 8335 8336 /* 8337 * The idea is to go from start to end over the words. So long as 8338 * characters are equal just continue, this always gives the lowest score. 8339 * When there is a difference try several alternatives. Each alternative 8340 * increases "score" for the edit distance. Some of the alternatives are 8341 * pushed unto a stack and tried later, some are tried right away. At the 8342 * end of the word the score for one alternative is known. The lowest 8343 * possible score is stored in "minscore". 8344 */ 8345 stackidx = 0; 8346 bi = 0; 8347 gi = 0; 8348 score = 0; 8349 minscore = limit + 1; 8350 8351 for (;;) 8352 { 8353 /* Skip over an equal part, score remains the same. */ 8354 for (;;) 8355 { 8356 bc = wbadword[bi]; 8357 gc = wgoodword[gi]; 8358 8359 if (bc != gc) /* stop at a char that's different */ 8360 break; 8361 if (bc == NUL) /* both words end */ 8362 { 8363 if (score < minscore) 8364 minscore = score; 8365 goto pop; /* do next alternative */ 8366 } 8367 ++bi; 8368 ++gi; 8369 } 8370 8371 if (gc == NUL) /* goodword ends, delete badword chars */ 8372 { 8373 do 8374 { 8375 if ((score += SCORE_DEL) >= minscore) 8376 goto pop; /* do next alternative */ 8377 } while (wbadword[++bi] != NUL); 8378 minscore = score; 8379 } 8380 else if (bc == NUL) /* badword ends, insert badword chars */ 8381 { 8382 do 8383 { 8384 if ((score += SCORE_INS) >= minscore) 8385 goto pop; /* do next alternative */ 8386 } while (wgoodword[++gi] != NUL); 8387 minscore = score; 8388 } 8389 else /* both words continue */ 8390 { 8391 /* If not close to the limit, perform a change. Only try changes 8392 * that may lead to a lower score than "minscore". 8393 * round 0: try deleting a char from badword 8394 * round 1: try inserting a char in badword */ 8395 for (round = 0; round <= 1; ++round) 8396 { 8397 score_off = score + (round == 0 ? SCORE_DEL : SCORE_INS); 8398 if (score_off < minscore) 8399 { 8400 if (score_off + SCORE_EDIT_MIN >= minscore) 8401 { 8402 /* Near the limit, rest of the words must match. We 8403 * can check that right now, no need to push an item 8404 * onto the stack. */ 8405 bi2 = bi + 1 - round; 8406 gi2 = gi + round; 8407 while (wgoodword[gi2] == wbadword[bi2]) 8408 { 8409 if (wgoodword[gi2] == NUL) 8410 { 8411 minscore = score_off; 8412 break; 8413 } 8414 ++bi2; 8415 ++gi2; 8416 } 8417 } 8418 else 8419 { 8420 /* try deleting a character from badword later */ 8421 stack[stackidx].badi = bi + 1 - round; 8422 stack[stackidx].goodi = gi + round; 8423 stack[stackidx].score = score_off; 8424 ++stackidx; 8425 } 8426 } 8427 } 8428 8429 if (score + SCORE_SWAP < minscore) 8430 { 8431 /* If swapping two characters makes a match then the 8432 * substitution is more expensive, thus there is no need to 8433 * try both. */ 8434 if (gc == wbadword[bi + 1] && bc == wgoodword[gi + 1]) 8435 { 8436 /* Swap two characters, that is: skip them. */ 8437 gi += 2; 8438 bi += 2; 8439 score += SCORE_SWAP; 8440 continue; 8441 } 8442 } 8443 8444 /* Substitute one character for another which is the same 8445 * thing as deleting a character from both goodword and badword. 8446 * Use a better score when there is only a case difference. */ 8447 if (SPELL_TOFOLD(bc) == SPELL_TOFOLD(gc)) 8448 score += SCORE_ICASE; 8449 else 8450 { 8451 /* For a similar character use SCORE_SIMILAR. */ 8452 if (slang != NULL 8453 && slang->sl_has_map 8454 && similar_chars(slang, gc, bc)) 8455 score += SCORE_SIMILAR; 8456 else 8457 score += SCORE_SUBST; 8458 } 8459 8460 if (score < minscore) 8461 { 8462 /* Do the substitution. */ 8463 ++gi; 8464 ++bi; 8465 continue; 8466 } 8467 } 8468 pop: 8469 /* 8470 * Get here to try the next alternative, pop it from the stack. 8471 */ 8472 if (stackidx == 0) /* stack is empty, finished */ 8473 break; 8474 8475 /* pop an item from the stack */ 8476 --stackidx; 8477 gi = stack[stackidx].goodi; 8478 bi = stack[stackidx].badi; 8479 score = stack[stackidx].score; 8480 } 8481 8482 /* When the score goes over "limit" it may actually be much higher. 8483 * Return a very large number to avoid going below the limit when giving a 8484 * bonus. */ 8485 if (minscore > limit) 8486 return SCORE_MAXMAX; 8487 return minscore; 8488 } 8489 #endif 8490 8491 /* 8492 * ":spellinfo" 8493 */ 8494 void 8495 ex_spellinfo(exarg_T *eap UNUSED) 8496 { 8497 int lpi; 8498 langp_T *lp; 8499 char_u *p; 8500 8501 if (no_spell_checking(curwin)) 8502 return; 8503 8504 msg_start(); 8505 for (lpi = 0; lpi < curwin->w_s->b_langp.ga_len && !got_int; ++lpi) 8506 { 8507 lp = LANGP_ENTRY(curwin->w_s->b_langp, lpi); 8508 msg_puts((char_u *)"file: "); 8509 msg_puts(lp->lp_slang->sl_fname); 8510 msg_putchar('\n'); 8511 p = lp->lp_slang->sl_info; 8512 if (p != NULL) 8513 { 8514 msg_puts(p); 8515 msg_putchar('\n'); 8516 } 8517 } 8518 msg_end(); 8519 } 8520 8521 #define DUMPFLAG_KEEPCASE 1 /* round 2: keep-case tree */ 8522 #define DUMPFLAG_COUNT 2 /* include word count */ 8523 #define DUMPFLAG_ICASE 4 /* ignore case when finding matches */ 8524 #define DUMPFLAG_ONECAP 8 /* pattern starts with capital */ 8525 #define DUMPFLAG_ALLCAP 16 /* pattern is all capitals */ 8526 8527 /* 8528 * ":spelldump" 8529 */ 8530 void 8531 ex_spelldump(exarg_T *eap) 8532 { 8533 char_u *spl; 8534 long dummy; 8535 8536 if (no_spell_checking(curwin)) 8537 return; 8538 get_option_value((char_u*)"spl", &dummy, &spl, OPT_LOCAL); 8539 8540 /* Create a new empty buffer in a new window. */ 8541 do_cmdline_cmd((char_u *)"new"); 8542 8543 /* enable spelling locally in the new window */ 8544 set_option_value((char_u*)"spell", TRUE, (char_u*)"", OPT_LOCAL); 8545 set_option_value((char_u*)"spl", dummy, spl, OPT_LOCAL); 8546 vim_free(spl); 8547 8548 if (!bufempty()) 8549 return; 8550 8551 spell_dump_compl(NULL, 0, NULL, eap->forceit ? DUMPFLAG_COUNT : 0); 8552 8553 /* Delete the empty line that we started with. */ 8554 if (curbuf->b_ml.ml_line_count > 1) 8555 ml_delete(curbuf->b_ml.ml_line_count, FALSE); 8556 8557 redraw_later(NOT_VALID); 8558 } 8559 8560 /* 8561 * Go through all possible words and: 8562 * 1. When "pat" is NULL: dump a list of all words in the current buffer. 8563 * "ic" and "dir" are not used. 8564 * 2. When "pat" is not NULL: add matching words to insert mode completion. 8565 */ 8566 void 8567 spell_dump_compl( 8568 char_u *pat, /* leading part of the word */ 8569 int ic, /* ignore case */ 8570 int *dir, /* direction for adding matches */ 8571 int dumpflags_arg) /* DUMPFLAG_* */ 8572 { 8573 langp_T *lp; 8574 slang_T *slang; 8575 idx_T arridx[MAXWLEN]; 8576 int curi[MAXWLEN]; 8577 char_u word[MAXWLEN]; 8578 int c; 8579 char_u *byts; 8580 idx_T *idxs; 8581 linenr_T lnum = 0; 8582 int round; 8583 int depth; 8584 int n; 8585 int flags; 8586 char_u *region_names = NULL; /* region names being used */ 8587 int do_region = TRUE; /* dump region names and numbers */ 8588 char_u *p; 8589 int lpi; 8590 int dumpflags = dumpflags_arg; 8591 int patlen; 8592 8593 /* When ignoring case or when the pattern starts with capital pass this on 8594 * to dump_word(). */ 8595 if (pat != NULL) 8596 { 8597 if (ic) 8598 dumpflags |= DUMPFLAG_ICASE; 8599 else 8600 { 8601 n = captype(pat, NULL); 8602 if (n == WF_ONECAP) 8603 dumpflags |= DUMPFLAG_ONECAP; 8604 else if (n == WF_ALLCAP 8605 #ifdef FEAT_MBYTE 8606 && (int)STRLEN(pat) > mb_ptr2len(pat) 8607 #else 8608 && (int)STRLEN(pat) > 1 8609 #endif 8610 ) 8611 dumpflags |= DUMPFLAG_ALLCAP; 8612 } 8613 } 8614 8615 /* Find out if we can support regions: All languages must support the same 8616 * regions or none at all. */ 8617 for (lpi = 0; lpi < curwin->w_s->b_langp.ga_len; ++lpi) 8618 { 8619 lp = LANGP_ENTRY(curwin->w_s->b_langp, lpi); 8620 p = lp->lp_slang->sl_regions; 8621 if (p[0] != 0) 8622 { 8623 if (region_names == NULL) /* first language with regions */ 8624 region_names = p; 8625 else if (STRCMP(region_names, p) != 0) 8626 { 8627 do_region = FALSE; /* region names are different */ 8628 break; 8629 } 8630 } 8631 } 8632 8633 if (do_region && region_names != NULL) 8634 { 8635 if (pat == NULL) 8636 { 8637 vim_snprintf((char *)IObuff, IOSIZE, "/regions=%s", region_names); 8638 ml_append(lnum++, IObuff, (colnr_T)0, FALSE); 8639 } 8640 } 8641 else 8642 do_region = FALSE; 8643 8644 /* 8645 * Loop over all files loaded for the entries in 'spelllang'. 8646 */ 8647 for (lpi = 0; lpi < curwin->w_s->b_langp.ga_len; ++lpi) 8648 { 8649 lp = LANGP_ENTRY(curwin->w_s->b_langp, lpi); 8650 slang = lp->lp_slang; 8651 if (slang->sl_fbyts == NULL) /* reloading failed */ 8652 continue; 8653 8654 if (pat == NULL) 8655 { 8656 vim_snprintf((char *)IObuff, IOSIZE, "# file: %s", slang->sl_fname); 8657 ml_append(lnum++, IObuff, (colnr_T)0, FALSE); 8658 } 8659 8660 /* When matching with a pattern and there are no prefixes only use 8661 * parts of the tree that match "pat". */ 8662 if (pat != NULL && slang->sl_pbyts == NULL) 8663 patlen = (int)STRLEN(pat); 8664 else 8665 patlen = -1; 8666 8667 /* round 1: case-folded tree 8668 * round 2: keep-case tree */ 8669 for (round = 1; round <= 2; ++round) 8670 { 8671 if (round == 1) 8672 { 8673 dumpflags &= ~DUMPFLAG_KEEPCASE; 8674 byts = slang->sl_fbyts; 8675 idxs = slang->sl_fidxs; 8676 } 8677 else 8678 { 8679 dumpflags |= DUMPFLAG_KEEPCASE; 8680 byts = slang->sl_kbyts; 8681 idxs = slang->sl_kidxs; 8682 } 8683 if (byts == NULL) 8684 continue; /* array is empty */ 8685 8686 depth = 0; 8687 arridx[0] = 0; 8688 curi[0] = 1; 8689 while (depth >= 0 && !got_int 8690 && (pat == NULL || !compl_interrupted)) 8691 { 8692 if (curi[depth] > byts[arridx[depth]]) 8693 { 8694 /* Done all bytes at this node, go up one level. */ 8695 --depth; 8696 line_breakcheck(); 8697 ins_compl_check_keys(50, FALSE); 8698 } 8699 else 8700 { 8701 /* Do one more byte at this node. */ 8702 n = arridx[depth] + curi[depth]; 8703 ++curi[depth]; 8704 c = byts[n]; 8705 if (c == 0) 8706 { 8707 /* End of word, deal with the word. 8708 * Don't use keep-case words in the fold-case tree, 8709 * they will appear in the keep-case tree. 8710 * Only use the word when the region matches. */ 8711 flags = (int)idxs[n]; 8712 if ((round == 2 || (flags & WF_KEEPCAP) == 0) 8713 && (flags & WF_NEEDCOMP) == 0 8714 && (do_region 8715 || (flags & WF_REGION) == 0 8716 || (((unsigned)flags >> 16) 8717 & lp->lp_region) != 0)) 8718 { 8719 word[depth] = NUL; 8720 if (!do_region) 8721 flags &= ~WF_REGION; 8722 8723 /* Dump the basic word if there is no prefix or 8724 * when it's the first one. */ 8725 c = (unsigned)flags >> 24; 8726 if (c == 0 || curi[depth] == 2) 8727 { 8728 dump_word(slang, word, pat, dir, 8729 dumpflags, flags, lnum); 8730 if (pat == NULL) 8731 ++lnum; 8732 } 8733 8734 /* Apply the prefix, if there is one. */ 8735 if (c != 0) 8736 lnum = dump_prefixes(slang, word, pat, dir, 8737 dumpflags, flags, lnum); 8738 } 8739 } 8740 else 8741 { 8742 /* Normal char, go one level deeper. */ 8743 word[depth++] = c; 8744 arridx[depth] = idxs[n]; 8745 curi[depth] = 1; 8746 8747 /* Check if this characters matches with the pattern. 8748 * If not skip the whole tree below it. 8749 * Always ignore case here, dump_word() will check 8750 * proper case later. This isn't exactly right when 8751 * length changes for multi-byte characters with 8752 * ignore case... */ 8753 if (depth <= patlen 8754 && MB_STRNICMP(word, pat, depth) != 0) 8755 --depth; 8756 } 8757 } 8758 } 8759 } 8760 } 8761 } 8762 8763 /* 8764 * Dump one word: apply case modifications and append a line to the buffer. 8765 * When "lnum" is zero add insert mode completion. 8766 */ 8767 static void 8768 dump_word( 8769 slang_T *slang, 8770 char_u *word, 8771 char_u *pat, 8772 int *dir, 8773 int dumpflags, 8774 int wordflags, 8775 linenr_T lnum) 8776 { 8777 int keepcap = FALSE; 8778 char_u *p; 8779 char_u *tw; 8780 char_u cword[MAXWLEN]; 8781 char_u badword[MAXWLEN + 10]; 8782 int i; 8783 int flags = wordflags; 8784 8785 if (dumpflags & DUMPFLAG_ONECAP) 8786 flags |= WF_ONECAP; 8787 if (dumpflags & DUMPFLAG_ALLCAP) 8788 flags |= WF_ALLCAP; 8789 8790 if ((dumpflags & DUMPFLAG_KEEPCASE) == 0 && (flags & WF_CAPMASK) != 0) 8791 { 8792 /* Need to fix case according to "flags". */ 8793 make_case_word(word, cword, flags); 8794 p = cword; 8795 } 8796 else 8797 { 8798 p = word; 8799 if ((dumpflags & DUMPFLAG_KEEPCASE) 8800 && ((captype(word, NULL) & WF_KEEPCAP) == 0 8801 || (flags & WF_FIXCAP) != 0)) 8802 keepcap = TRUE; 8803 } 8804 tw = p; 8805 8806 if (pat == NULL) 8807 { 8808 /* Add flags and regions after a slash. */ 8809 if ((flags & (WF_BANNED | WF_RARE | WF_REGION)) || keepcap) 8810 { 8811 STRCPY(badword, p); 8812 STRCAT(badword, "/"); 8813 if (keepcap) 8814 STRCAT(badword, "="); 8815 if (flags & WF_BANNED) 8816 STRCAT(badword, "!"); 8817 else if (flags & WF_RARE) 8818 STRCAT(badword, "?"); 8819 if (flags & WF_REGION) 8820 for (i = 0; i < 7; ++i) 8821 if (flags & (0x10000 << i)) 8822 sprintf((char *)badword + STRLEN(badword), "%d", i + 1); 8823 p = badword; 8824 } 8825 8826 if (dumpflags & DUMPFLAG_COUNT) 8827 { 8828 hashitem_T *hi; 8829 8830 /* Include the word count for ":spelldump!". */ 8831 hi = hash_find(&slang->sl_wordcount, tw); 8832 if (!HASHITEM_EMPTY(hi)) 8833 { 8834 vim_snprintf((char *)IObuff, IOSIZE, "%s\t%d", 8835 tw, HI2WC(hi)->wc_count); 8836 p = IObuff; 8837 } 8838 } 8839 8840 ml_append(lnum, p, (colnr_T)0, FALSE); 8841 } 8842 else if (((dumpflags & DUMPFLAG_ICASE) 8843 ? MB_STRNICMP(p, pat, STRLEN(pat)) == 0 8844 : STRNCMP(p, pat, STRLEN(pat)) == 0) 8845 && ins_compl_add_infercase(p, (int)STRLEN(p), 8846 p_ic, NULL, *dir, 0) == OK) 8847 /* if dir was BACKWARD then honor it just once */ 8848 *dir = FORWARD; 8849 } 8850 8851 /* 8852 * For ":spelldump": Find matching prefixes for "word". Prepend each to 8853 * "word" and append a line to the buffer. 8854 * When "lnum" is zero add insert mode completion. 8855 * Return the updated line number. 8856 */ 8857 static linenr_T 8858 dump_prefixes( 8859 slang_T *slang, 8860 char_u *word, /* case-folded word */ 8861 char_u *pat, 8862 int *dir, 8863 int dumpflags, 8864 int flags, /* flags with prefix ID */ 8865 linenr_T startlnum) 8866 { 8867 idx_T arridx[MAXWLEN]; 8868 int curi[MAXWLEN]; 8869 char_u prefix[MAXWLEN]; 8870 char_u word_up[MAXWLEN]; 8871 int has_word_up = FALSE; 8872 int c; 8873 char_u *byts; 8874 idx_T *idxs; 8875 linenr_T lnum = startlnum; 8876 int depth; 8877 int n; 8878 int len; 8879 int i; 8880 8881 /* If the word starts with a lower-case letter make the word with an 8882 * upper-case letter in word_up[]. */ 8883 c = PTR2CHAR(word); 8884 if (SPELL_TOUPPER(c) != c) 8885 { 8886 onecap_copy(word, word_up, TRUE); 8887 has_word_up = TRUE; 8888 } 8889 8890 byts = slang->sl_pbyts; 8891 idxs = slang->sl_pidxs; 8892 if (byts != NULL) /* array not is empty */ 8893 { 8894 /* 8895 * Loop over all prefixes, building them byte-by-byte in prefix[]. 8896 * When at the end of a prefix check that it supports "flags". 8897 */ 8898 depth = 0; 8899 arridx[0] = 0; 8900 curi[0] = 1; 8901 while (depth >= 0 && !got_int) 8902 { 8903 n = arridx[depth]; 8904 len = byts[n]; 8905 if (curi[depth] > len) 8906 { 8907 /* Done all bytes at this node, go up one level. */ 8908 --depth; 8909 line_breakcheck(); 8910 } 8911 else 8912 { 8913 /* Do one more byte at this node. */ 8914 n += curi[depth]; 8915 ++curi[depth]; 8916 c = byts[n]; 8917 if (c == 0) 8918 { 8919 /* End of prefix, find out how many IDs there are. */ 8920 for (i = 1; i < len; ++i) 8921 if (byts[n + i] != 0) 8922 break; 8923 curi[depth] += i - 1; 8924 8925 c = valid_word_prefix(i, n, flags, word, slang, FALSE); 8926 if (c != 0) 8927 { 8928 vim_strncpy(prefix + depth, word, MAXWLEN - depth - 1); 8929 dump_word(slang, prefix, pat, dir, dumpflags, 8930 (c & WF_RAREPFX) ? (flags | WF_RARE) 8931 : flags, lnum); 8932 if (lnum != 0) 8933 ++lnum; 8934 } 8935 8936 /* Check for prefix that matches the word when the 8937 * first letter is upper-case, but only if the prefix has 8938 * a condition. */ 8939 if (has_word_up) 8940 { 8941 c = valid_word_prefix(i, n, flags, word_up, slang, 8942 TRUE); 8943 if (c != 0) 8944 { 8945 vim_strncpy(prefix + depth, word_up, 8946 MAXWLEN - depth - 1); 8947 dump_word(slang, prefix, pat, dir, dumpflags, 8948 (c & WF_RAREPFX) ? (flags | WF_RARE) 8949 : flags, lnum); 8950 if (lnum != 0) 8951 ++lnum; 8952 } 8953 } 8954 } 8955 else 8956 { 8957 /* Normal char, go one level deeper. */ 8958 prefix[depth++] = c; 8959 arridx[depth] = idxs[n]; 8960 curi[depth] = 1; 8961 } 8962 } 8963 } 8964 } 8965 8966 return lnum; 8967 } 8968 8969 /* 8970 * Move "p" to the end of word "start". 8971 * Uses the spell-checking word characters. 8972 */ 8973 char_u * 8974 spell_to_word_end(char_u *start, win_T *win) 8975 { 8976 char_u *p = start; 8977 8978 while (*p != NUL && spell_iswordp(p, win)) 8979 mb_ptr_adv(p); 8980 return p; 8981 } 8982 8983 #if defined(FEAT_INS_EXPAND) || defined(PROTO) 8984 /* 8985 * For Insert mode completion CTRL-X s: 8986 * Find start of the word in front of column "startcol". 8987 * We don't check if it is badly spelled, with completion we can only change 8988 * the word in front of the cursor. 8989 * Returns the column number of the word. 8990 */ 8991 int 8992 spell_word_start(int startcol) 8993 { 8994 char_u *line; 8995 char_u *p; 8996 int col = 0; 8997 8998 if (no_spell_checking(curwin)) 8999 return startcol; 9000 9001 /* Find a word character before "startcol". */ 9002 line = ml_get_curline(); 9003 for (p = line + startcol; p > line; ) 9004 { 9005 mb_ptr_back(line, p); 9006 if (spell_iswordp_nmw(p, curwin)) 9007 break; 9008 } 9009 9010 /* Go back to start of the word. */ 9011 while (p > line) 9012 { 9013 col = (int)(p - line); 9014 mb_ptr_back(line, p); 9015 if (!spell_iswordp(p, curwin)) 9016 break; 9017 col = 0; 9018 } 9019 9020 return col; 9021 } 9022 9023 /* 9024 * Need to check for 'spellcapcheck' now, the word is removed before 9025 * expand_spelling() is called. Therefore the ugly global variable. 9026 */ 9027 static int spell_expand_need_cap; 9028 9029 void 9030 spell_expand_check_cap(colnr_T col) 9031 { 9032 spell_expand_need_cap = check_need_cap(curwin->w_cursor.lnum, col); 9033 } 9034 9035 /* 9036 * Get list of spelling suggestions. 9037 * Used for Insert mode completion CTRL-X ?. 9038 * Returns the number of matches. The matches are in "matchp[]", array of 9039 * allocated strings. 9040 */ 9041 int 9042 expand_spelling( 9043 linenr_T lnum UNUSED, 9044 char_u *pat, 9045 char_u ***matchp) 9046 { 9047 garray_T ga; 9048 9049 spell_suggest_list(&ga, pat, 100, spell_expand_need_cap, TRUE); 9050 *matchp = ga.ga_data; 9051 return ga.ga_len; 9052 } 9053 #endif 9054 9055 #endif /* FEAT_SPELL */ 9056