1 /* vi:set ts=8 sts=4 sw=4 noet: 2 * 3 * VIM - Vi IMproved by Bram Moolenaar 4 * 5 * Do ":help uganda" in Vim to read copying and usage conditions. 6 * Do ":help credits" in Vim to see a list of people who contributed. 7 * See README.txt for an overview of the Vim source code. 8 */ 9 10 /* 11 * spell.c: code for spell checking 12 * 13 * See spellfile.c for the Vim spell file format. 14 * 15 * The spell checking mechanism uses a tree (aka trie). Each node in the tree 16 * has a list of bytes that can appear (siblings). For each byte there is a 17 * pointer to the node with the byte that follows in the word (child). 18 * 19 * A NUL byte is used where the word may end. The bytes are sorted, so that 20 * binary searching can be used and the NUL bytes are at the start. The 21 * number of possible bytes is stored before the list of bytes. 22 * 23 * The tree uses two arrays: "byts" stores the characters, "idxs" stores 24 * either the next index or flags. The tree starts at index 0. For example, 25 * to lookup "vi" this sequence is followed: 26 * i = 0 27 * len = byts[i] 28 * n = where "v" appears in byts[i + 1] to byts[i + len] 29 * i = idxs[n] 30 * len = byts[i] 31 * n = where "i" appears in byts[i + 1] to byts[i + len] 32 * i = idxs[n] 33 * len = byts[i] 34 * find that byts[i + 1] is 0, idxs[i + 1] has flags for "vi". 35 * 36 * There are two word trees: one with case-folded words and one with words in 37 * original case. The second one is only used for keep-case words and is 38 * usually small. 39 * 40 * There is one additional tree for when not all prefixes are applied when 41 * generating the .spl file. This tree stores all the possible prefixes, as 42 * if they were words. At each word (prefix) end the prefix nr is stored, the 43 * following word must support this prefix nr. And the condition nr is 44 * stored, used to lookup the condition that the word must match with. 45 * 46 * Thanks to Olaf Seibert for providing an example implementation of this tree 47 * and the compression mechanism. 48 * LZ trie ideas: 49 * http://www.irb.hr/hr/home/ristov/papers/RistovLZtrieRevision1.pdf 50 * More papers: http://www-igm.univ-mlv.fr/~laporte/publi_en.html 51 * 52 * Matching involves checking the caps type: Onecap ALLCAP KeepCap. 53 * 54 * Why doesn't Vim use aspell/ispell/myspell/etc.? 55 * See ":help develop-spell". 56 */ 57 58 /* 59 * Use this to adjust the score after finding suggestions, based on the 60 * suggested word sounding like the bad word. This is much faster than doing 61 * it for every possible suggestion. 62 * Disadvantage: When "the" is typed as "hte" it sounds quite different ("@" 63 * vs "ht") and goes down in the list. 64 * Used when 'spellsuggest' is set to "best". 65 */ 66 #define RESCORE(word_score, sound_score) ((3 * word_score + sound_score) / 4) 67 68 /* 69 * Do the opposite: based on a maximum end score and a known sound score, 70 * compute the maximum word score that can be used. 71 */ 72 #define MAXSCORE(word_score, sound_score) ((4 * word_score - sound_score) / 3) 73 74 #define IN_SPELL_C 75 #include "vim.h" 76 77 #if defined(FEAT_SPELL) || defined(PROTO) 78 79 #ifndef UNIX /* it's in os_unix.h for Unix */ 80 # include <time.h> /* for time_t */ 81 #endif 82 83 /* only used for su_badflags */ 84 #define WF_MIXCAP 0x20 /* mix of upper and lower case: macaRONI */ 85 86 #define WF_CAPMASK (WF_ONECAP | WF_ALLCAP | WF_KEEPCAP | WF_FIXCAP) 87 88 #define REGION_ALL 0xff /* word valid in all regions */ 89 90 #define VIMSUGMAGIC "VIMsug" /* string at start of Vim .sug file */ 91 #define VIMSUGMAGICL 6 92 #define VIMSUGVERSION 1 93 94 /* Result values. Lower number is accepted over higher one. */ 95 #define SP_BANNED -1 96 #define SP_OK 0 97 #define SP_RARE 1 98 #define SP_LOCAL 2 99 #define SP_BAD 3 100 101 typedef struct wordcount_S 102 { 103 short_u wc_count; /* nr of times word was seen */ 104 char_u wc_word[1]; /* word, actually longer */ 105 } wordcount_T; 106 107 #define WC_KEY_OFF offsetof(wordcount_T, wc_word) 108 #define HI2WC(hi) ((wordcount_T *)((hi)->hi_key - WC_KEY_OFF)) 109 #define MAXWORDCOUNT 0xffff 110 111 /* 112 * Information used when looking for suggestions. 113 */ 114 typedef struct suginfo_S 115 { 116 garray_T su_ga; /* suggestions, contains "suggest_T" */ 117 int su_maxcount; /* max. number of suggestions displayed */ 118 int su_maxscore; /* maximum score for adding to su_ga */ 119 int su_sfmaxscore; /* idem, for when doing soundfold words */ 120 garray_T su_sga; /* like su_ga, sound-folded scoring */ 121 char_u *su_badptr; /* start of bad word in line */ 122 int su_badlen; /* length of detected bad word in line */ 123 int su_badflags; /* caps flags for bad word */ 124 char_u su_badword[MAXWLEN]; /* bad word truncated at su_badlen */ 125 char_u su_fbadword[MAXWLEN]; /* su_badword case-folded */ 126 char_u su_sal_badword[MAXWLEN]; /* su_badword soundfolded */ 127 hashtab_T su_banned; /* table with banned words */ 128 slang_T *su_sallang; /* default language for sound folding */ 129 } suginfo_T; 130 131 /* One word suggestion. Used in "si_ga". */ 132 typedef struct suggest_S 133 { 134 char_u *st_word; /* suggested word, allocated string */ 135 int st_wordlen; /* STRLEN(st_word) */ 136 int st_orglen; /* length of replaced text */ 137 int st_score; /* lower is better */ 138 int st_altscore; /* used when st_score compares equal */ 139 int st_salscore; /* st_score is for soundalike */ 140 int st_had_bonus; /* bonus already included in score */ 141 slang_T *st_slang; /* language used for sound folding */ 142 } suggest_T; 143 144 #define SUG(ga, i) (((suggest_T *)(ga).ga_data)[i]) 145 146 /* TRUE if a word appears in the list of banned words. */ 147 #define WAS_BANNED(su, word) (!HASHITEM_EMPTY(hash_find(&su->su_banned, word))) 148 149 /* Number of suggestions kept when cleaning up. We need to keep more than 150 * what is displayed, because when rescore_suggestions() is called the score 151 * may change and wrong suggestions may be removed later. */ 152 #define SUG_CLEAN_COUNT(su) ((su)->su_maxcount < 130 ? 150 : (su)->su_maxcount + 20) 153 154 /* Threshold for sorting and cleaning up suggestions. Don't want to keep lots 155 * of suggestions that are not going to be displayed. */ 156 #define SUG_MAX_COUNT(su) (SUG_CLEAN_COUNT(su) + 50) 157 158 /* score for various changes */ 159 #define SCORE_SPLIT 149 /* split bad word */ 160 #define SCORE_SPLIT_NO 249 /* split bad word with NOSPLITSUGS */ 161 #define SCORE_ICASE 52 /* slightly different case */ 162 #define SCORE_REGION 200 /* word is for different region */ 163 #define SCORE_RARE 180 /* rare word */ 164 #define SCORE_SWAP 75 /* swap two characters */ 165 #define SCORE_SWAP3 110 /* swap two characters in three */ 166 #define SCORE_REP 65 /* REP replacement */ 167 #define SCORE_SUBST 93 /* substitute a character */ 168 #define SCORE_SIMILAR 33 /* substitute a similar character */ 169 #define SCORE_SUBCOMP 33 /* substitute a composing character */ 170 #define SCORE_DEL 94 /* delete a character */ 171 #define SCORE_DELDUP 66 /* delete a duplicated character */ 172 #define SCORE_DELCOMP 28 /* delete a composing character */ 173 #define SCORE_INS 96 /* insert a character */ 174 #define SCORE_INSDUP 67 /* insert a duplicate character */ 175 #define SCORE_INSCOMP 30 /* insert a composing character */ 176 #define SCORE_NONWORD 103 /* change non-word to word char */ 177 178 #define SCORE_FILE 30 /* suggestion from a file */ 179 #define SCORE_MAXINIT 350 /* Initial maximum score: higher == slower. 180 * 350 allows for about three changes. */ 181 182 #define SCORE_COMMON1 30 /* subtracted for words seen before */ 183 #define SCORE_COMMON2 40 /* subtracted for words often seen */ 184 #define SCORE_COMMON3 50 /* subtracted for words very often seen */ 185 #define SCORE_THRES2 10 /* word count threshold for COMMON2 */ 186 #define SCORE_THRES3 100 /* word count threshold for COMMON3 */ 187 188 /* When trying changed soundfold words it becomes slow when trying more than 189 * two changes. With less then two changes it's slightly faster but we miss a 190 * few good suggestions. In rare cases we need to try three of four changes. 191 */ 192 #define SCORE_SFMAX1 200 /* maximum score for first try */ 193 #define SCORE_SFMAX2 300 /* maximum score for second try */ 194 #define SCORE_SFMAX3 400 /* maximum score for third try */ 195 196 #define SCORE_BIG SCORE_INS * 3 /* big difference */ 197 #define SCORE_MAXMAX 999999 /* accept any score */ 198 #define SCORE_LIMITMAX 350 /* for spell_edit_score_limit() */ 199 200 /* for spell_edit_score_limit() we need to know the minimum value of 201 * SCORE_ICASE, SCORE_SWAP, SCORE_DEL, SCORE_SIMILAR and SCORE_INS */ 202 #define SCORE_EDIT_MIN SCORE_SIMILAR 203 204 /* 205 * Structure to store info for word matching. 206 */ 207 typedef struct matchinf_S 208 { 209 langp_T *mi_lp; /* info for language and region */ 210 211 /* pointers to original text to be checked */ 212 char_u *mi_word; /* start of word being checked */ 213 char_u *mi_end; /* end of matching word so far */ 214 char_u *mi_fend; /* next char to be added to mi_fword */ 215 char_u *mi_cend; /* char after what was used for 216 mi_capflags */ 217 218 /* case-folded text */ 219 char_u mi_fword[MAXWLEN + 1]; /* mi_word case-folded */ 220 int mi_fwordlen; /* nr of valid bytes in mi_fword */ 221 222 /* for when checking word after a prefix */ 223 int mi_prefarridx; /* index in sl_pidxs with list of 224 affixID/condition */ 225 int mi_prefcnt; /* number of entries at mi_prefarridx */ 226 int mi_prefixlen; /* byte length of prefix */ 227 #ifdef FEAT_MBYTE 228 int mi_cprefixlen; /* byte length of prefix in original 229 case */ 230 #else 231 # define mi_cprefixlen mi_prefixlen /* it's the same value */ 232 #endif 233 234 /* for when checking a compound word */ 235 int mi_compoff; /* start of following word offset */ 236 char_u mi_compflags[MAXWLEN]; /* flags for compound words used */ 237 int mi_complen; /* nr of compound words used */ 238 int mi_compextra; /* nr of COMPOUNDROOT words */ 239 240 /* others */ 241 int mi_result; /* result so far: SP_BAD, SP_OK, etc. */ 242 int mi_capflags; /* WF_ONECAP WF_ALLCAP WF_KEEPCAP */ 243 win_T *mi_win; /* buffer being checked */ 244 245 /* for NOBREAK */ 246 int mi_result2; /* "mi_resul" without following word */ 247 char_u *mi_end2; /* "mi_end" without following word */ 248 } matchinf_T; 249 250 251 static int spell_iswordp(char_u *p, win_T *wp); 252 #ifdef FEAT_MBYTE 253 static int spell_mb_isword_class(int cl, win_T *wp); 254 static int spell_iswordp_w(int *p, win_T *wp); 255 #endif 256 257 /* 258 * For finding suggestions: At each node in the tree these states are tried: 259 */ 260 typedef enum 261 { 262 STATE_START = 0, /* At start of node check for NUL bytes (goodword 263 * ends); if badword ends there is a match, otherwise 264 * try splitting word. */ 265 STATE_NOPREFIX, /* try without prefix */ 266 STATE_SPLITUNDO, /* Undo splitting. */ 267 STATE_ENDNUL, /* Past NUL bytes at start of the node. */ 268 STATE_PLAIN, /* Use each byte of the node. */ 269 STATE_DEL, /* Delete a byte from the bad word. */ 270 STATE_INS_PREP, /* Prepare for inserting bytes. */ 271 STATE_INS, /* Insert a byte in the bad word. */ 272 STATE_SWAP, /* Swap two bytes. */ 273 STATE_UNSWAP, /* Undo swap two characters. */ 274 STATE_SWAP3, /* Swap two characters over three. */ 275 STATE_UNSWAP3, /* Undo Swap two characters over three. */ 276 STATE_UNROT3L, /* Undo rotate three characters left */ 277 STATE_UNROT3R, /* Undo rotate three characters right */ 278 STATE_REP_INI, /* Prepare for using REP items. */ 279 STATE_REP, /* Use matching REP items from the .aff file. */ 280 STATE_REP_UNDO, /* Undo a REP item replacement. */ 281 STATE_FINAL /* End of this node. */ 282 } state_T; 283 284 /* 285 * Struct to keep the state at each level in suggest_try_change(). 286 */ 287 typedef struct trystate_S 288 { 289 state_T ts_state; /* state at this level, STATE_ */ 290 int ts_score; /* score */ 291 idx_T ts_arridx; /* index in tree array, start of node */ 292 short ts_curi; /* index in list of child nodes */ 293 char_u ts_fidx; /* index in fword[], case-folded bad word */ 294 char_u ts_fidxtry; /* ts_fidx at which bytes may be changed */ 295 char_u ts_twordlen; /* valid length of tword[] */ 296 char_u ts_prefixdepth; /* stack depth for end of prefix or 297 * PFD_PREFIXTREE or PFD_NOPREFIX */ 298 char_u ts_flags; /* TSF_ flags */ 299 #ifdef FEAT_MBYTE 300 char_u ts_tcharlen; /* number of bytes in tword character */ 301 char_u ts_tcharidx; /* current byte index in tword character */ 302 char_u ts_isdiff; /* DIFF_ values */ 303 char_u ts_fcharstart; /* index in fword where badword char started */ 304 #endif 305 char_u ts_prewordlen; /* length of word in "preword[]" */ 306 char_u ts_splitoff; /* index in "tword" after last split */ 307 char_u ts_splitfidx; /* "ts_fidx" at word split */ 308 char_u ts_complen; /* nr of compound words used */ 309 char_u ts_compsplit; /* index for "compflags" where word was spit */ 310 char_u ts_save_badflags; /* su_badflags saved here */ 311 char_u ts_delidx; /* index in fword for char that was deleted, 312 valid when "ts_flags" has TSF_DIDDEL */ 313 } trystate_T; 314 315 /* values for ts_isdiff */ 316 #define DIFF_NONE 0 /* no different byte (yet) */ 317 #define DIFF_YES 1 /* different byte found */ 318 #define DIFF_INSERT 2 /* inserting character */ 319 320 /* values for ts_flags */ 321 #define TSF_PREFIXOK 1 /* already checked that prefix is OK */ 322 #define TSF_DIDSPLIT 2 /* tried split at this point */ 323 #define TSF_DIDDEL 4 /* did a delete, "ts_delidx" has index */ 324 325 /* special values ts_prefixdepth */ 326 #define PFD_NOPREFIX 0xff /* not using prefixes */ 327 #define PFD_PREFIXTREE 0xfe /* walking through the prefix tree */ 328 #define PFD_NOTSPECIAL 0xfd /* highest value that's not special */ 329 330 /* mode values for find_word */ 331 #define FIND_FOLDWORD 0 /* find word case-folded */ 332 #define FIND_KEEPWORD 1 /* find keep-case word */ 333 #define FIND_PREFIX 2 /* find word after prefix */ 334 #define FIND_COMPOUND 3 /* find case-folded compound word */ 335 #define FIND_KEEPCOMPOUND 4 /* find keep-case compound word */ 336 337 static void find_word(matchinf_T *mip, int mode); 338 static int match_checkcompoundpattern(char_u *ptr, int wlen, garray_T *gap); 339 static int can_compound(slang_T *slang, char_u *word, char_u *flags); 340 static int can_be_compound(trystate_T *sp, slang_T *slang, char_u *compflags, int flag); 341 static int match_compoundrule(slang_T *slang, char_u *compflags); 342 static int valid_word_prefix(int totprefcnt, int arridx, int flags, char_u *word, slang_T *slang, int cond_req); 343 static void find_prefix(matchinf_T *mip, int mode); 344 static int fold_more(matchinf_T *mip); 345 static int spell_valid_case(int wordflags, int treeflags); 346 static int no_spell_checking(win_T *wp); 347 static void spell_load_lang(char_u *lang); 348 static void int_wordlist_spl(char_u *fname); 349 static void spell_load_cb(char_u *fname, void *cookie); 350 static int score_wordcount_adj(slang_T *slang, int score, char_u *word, int split); 351 static int count_syllables(slang_T *slang, char_u *word); 352 static void clear_midword(win_T *buf); 353 static void use_midword(slang_T *lp, win_T *buf); 354 static int find_region(char_u *rp, char_u *region); 355 static int badword_captype(char_u *word, char_u *end); 356 static int check_need_cap(linenr_T lnum, colnr_T col); 357 static void spell_find_suggest(char_u *badptr, int badlen, suginfo_T *su, int maxcount, int banbadword, int need_cap, int interactive); 358 #ifdef FEAT_EVAL 359 static void spell_suggest_expr(suginfo_T *su, char_u *expr); 360 #endif 361 static void spell_suggest_file(suginfo_T *su, char_u *fname); 362 static void spell_suggest_intern(suginfo_T *su, int interactive); 363 static void spell_find_cleanup(suginfo_T *su); 364 static void allcap_copy(char_u *word, char_u *wcopy); 365 static void suggest_try_special(suginfo_T *su); 366 static void suggest_try_change(suginfo_T *su); 367 static void suggest_trie_walk(suginfo_T *su, langp_T *lp, char_u *fword, int soundfold); 368 static void go_deeper(trystate_T *stack, int depth, int score_add); 369 #ifdef FEAT_MBYTE 370 static int nofold_len(char_u *fword, int flen, char_u *word); 371 #endif 372 static void find_keepcap_word(slang_T *slang, char_u *fword, char_u *kword); 373 static void score_comp_sal(suginfo_T *su); 374 static void score_combine(suginfo_T *su); 375 static int stp_sal_score(suggest_T *stp, suginfo_T *su, slang_T *slang, char_u *badsound); 376 static void suggest_try_soundalike_prep(void); 377 static void suggest_try_soundalike(suginfo_T *su); 378 static void suggest_try_soundalike_finish(void); 379 static void add_sound_suggest(suginfo_T *su, char_u *goodword, int score, langp_T *lp); 380 static int soundfold_find(slang_T *slang, char_u *word); 381 static void make_case_word(char_u *fword, char_u *cword, int flags); 382 static int similar_chars(slang_T *slang, int c1, int c2); 383 static void add_suggestion(suginfo_T *su, garray_T *gap, char_u *goodword, int badlen, int score, int altscore, int had_bonus, slang_T *slang, int maxsf); 384 static void check_suggestions(suginfo_T *su, garray_T *gap); 385 static void add_banned(suginfo_T *su, char_u *word); 386 static void rescore_suggestions(suginfo_T *su); 387 static void rescore_one(suginfo_T *su, suggest_T *stp); 388 static int cleanup_suggestions(garray_T *gap, int maxscore, int keep); 389 static void spell_soundfold_sofo(slang_T *slang, char_u *inword, char_u *res); 390 static void spell_soundfold_sal(slang_T *slang, char_u *inword, char_u *res); 391 #ifdef FEAT_MBYTE 392 static void spell_soundfold_wsal(slang_T *slang, char_u *inword, char_u *res); 393 #endif 394 static int soundalike_score(char_u *goodsound, char_u *badsound); 395 static int spell_edit_score(slang_T *slang, char_u *badword, char_u *goodword); 396 static int spell_edit_score_limit(slang_T *slang, char_u *badword, char_u *goodword, int limit); 397 #ifdef FEAT_MBYTE 398 static int spell_edit_score_limit_w(slang_T *slang, char_u *badword, char_u *goodword, int limit); 399 #endif 400 static void dump_word(slang_T *slang, char_u *word, char_u *pat, int *dir, int round, int flags, linenr_T lnum); 401 static linenr_T dump_prefixes(slang_T *slang, char_u *word, char_u *pat, int *dir, int round, int flags, linenr_T startlnum); 402 403 404 /* Remember what "z?" replaced. */ 405 static char_u *repl_from = NULL; 406 static char_u *repl_to = NULL; 407 408 /* 409 * Main spell-checking function. 410 * "ptr" points to a character that could be the start of a word. 411 * "*attrp" is set to the highlight index for a badly spelled word. For a 412 * non-word or when it's OK it remains unchanged. 413 * This must only be called when 'spelllang' is not empty. 414 * 415 * "capcol" is used to check for a Capitalised word after the end of a 416 * sentence. If it's zero then perform the check. Return the column where to 417 * check next, or -1 when no sentence end was found. If it's NULL then don't 418 * worry. 419 * 420 * Returns the length of the word in bytes, also when it's OK, so that the 421 * caller can skip over the word. 422 */ 423 int 424 spell_check( 425 win_T *wp, /* current window */ 426 char_u *ptr, 427 hlf_T *attrp, 428 int *capcol, /* column to check for Capital */ 429 int docount) /* count good words */ 430 { 431 matchinf_T mi; /* Most things are put in "mi" so that it can 432 be passed to functions quickly. */ 433 int nrlen = 0; /* found a number first */ 434 int c; 435 int wrongcaplen = 0; 436 int lpi; 437 int count_word = docount; 438 439 /* A word never starts at a space or a control character. Return quickly 440 * then, skipping over the character. */ 441 if (*ptr <= ' ') 442 return 1; 443 444 /* Return here when loading language files failed. */ 445 if (wp->w_s->b_langp.ga_len == 0) 446 return 1; 447 448 vim_memset(&mi, 0, sizeof(matchinf_T)); 449 450 /* A number is always OK. Also skip hexadecimal numbers 0xFF99 and 451 * 0X99FF. But always do check spelling to find "3GPP" and "11 452 * julifeest". */ 453 if (*ptr >= '0' && *ptr <= '9') 454 { 455 if (*ptr == '0' && (ptr[1] == 'b' || ptr[1] == 'B')) 456 mi.mi_end = skipbin(ptr + 2); 457 else if (*ptr == '0' && (ptr[1] == 'x' || ptr[1] == 'X')) 458 mi.mi_end = skiphex(ptr + 2); 459 else 460 mi.mi_end = skipdigits(ptr); 461 nrlen = (int)(mi.mi_end - ptr); 462 } 463 464 /* Find the normal end of the word (until the next non-word character). */ 465 mi.mi_word = ptr; 466 mi.mi_fend = ptr; 467 if (spell_iswordp(mi.mi_fend, wp)) 468 { 469 do 470 { 471 MB_PTR_ADV(mi.mi_fend); 472 } while (*mi.mi_fend != NUL && spell_iswordp(mi.mi_fend, wp)); 473 474 if (capcol != NULL && *capcol == 0 && wp->w_s->b_cap_prog != NULL) 475 { 476 /* Check word starting with capital letter. */ 477 c = PTR2CHAR(ptr); 478 if (!SPELL_ISUPPER(c)) 479 wrongcaplen = (int)(mi.mi_fend - ptr); 480 } 481 } 482 if (capcol != NULL) 483 *capcol = -1; 484 485 /* We always use the characters up to the next non-word character, 486 * also for bad words. */ 487 mi.mi_end = mi.mi_fend; 488 489 /* Check caps type later. */ 490 mi.mi_capflags = 0; 491 mi.mi_cend = NULL; 492 mi.mi_win = wp; 493 494 /* case-fold the word with one non-word character, so that we can check 495 * for the word end. */ 496 if (*mi.mi_fend != NUL) 497 MB_PTR_ADV(mi.mi_fend); 498 499 (void)spell_casefold(ptr, (int)(mi.mi_fend - ptr), mi.mi_fword, 500 MAXWLEN + 1); 501 mi.mi_fwordlen = (int)STRLEN(mi.mi_fword); 502 503 /* The word is bad unless we recognize it. */ 504 mi.mi_result = SP_BAD; 505 mi.mi_result2 = SP_BAD; 506 507 /* 508 * Loop over the languages specified in 'spelllang'. 509 * We check them all, because a word may be matched longer in another 510 * language. 511 */ 512 for (lpi = 0; lpi < wp->w_s->b_langp.ga_len; ++lpi) 513 { 514 mi.mi_lp = LANGP_ENTRY(wp->w_s->b_langp, lpi); 515 516 /* If reloading fails the language is still in the list but everything 517 * has been cleared. */ 518 if (mi.mi_lp->lp_slang->sl_fidxs == NULL) 519 continue; 520 521 /* Check for a matching word in case-folded words. */ 522 find_word(&mi, FIND_FOLDWORD); 523 524 /* Check for a matching word in keep-case words. */ 525 find_word(&mi, FIND_KEEPWORD); 526 527 /* Check for matching prefixes. */ 528 find_prefix(&mi, FIND_FOLDWORD); 529 530 /* For a NOBREAK language, may want to use a word without a following 531 * word as a backup. */ 532 if (mi.mi_lp->lp_slang->sl_nobreak && mi.mi_result == SP_BAD 533 && mi.mi_result2 != SP_BAD) 534 { 535 mi.mi_result = mi.mi_result2; 536 mi.mi_end = mi.mi_end2; 537 } 538 539 /* Count the word in the first language where it's found to be OK. */ 540 if (count_word && mi.mi_result == SP_OK) 541 { 542 count_common_word(mi.mi_lp->lp_slang, ptr, 543 (int)(mi.mi_end - ptr), 1); 544 count_word = FALSE; 545 } 546 } 547 548 if (mi.mi_result != SP_OK) 549 { 550 /* If we found a number skip over it. Allows for "42nd". Do flag 551 * rare and local words, e.g., "3GPP". */ 552 if (nrlen > 0) 553 { 554 if (mi.mi_result == SP_BAD || mi.mi_result == SP_BANNED) 555 return nrlen; 556 } 557 558 /* When we are at a non-word character there is no error, just 559 * skip over the character (try looking for a word after it). */ 560 else if (!spell_iswordp_nmw(ptr, wp)) 561 { 562 if (capcol != NULL && wp->w_s->b_cap_prog != NULL) 563 { 564 regmatch_T regmatch; 565 int r; 566 567 /* Check for end of sentence. */ 568 regmatch.regprog = wp->w_s->b_cap_prog; 569 regmatch.rm_ic = FALSE; 570 r = vim_regexec(®match, ptr, 0); 571 wp->w_s->b_cap_prog = regmatch.regprog; 572 if (r) 573 *capcol = (int)(regmatch.endp[0] - ptr); 574 } 575 576 #ifdef FEAT_MBYTE 577 if (has_mbyte) 578 return (*mb_ptr2len)(ptr); 579 #endif 580 return 1; 581 } 582 else if (mi.mi_end == ptr) 583 /* Always include at least one character. Required for when there 584 * is a mixup in "midword". */ 585 MB_PTR_ADV(mi.mi_end); 586 else if (mi.mi_result == SP_BAD 587 && LANGP_ENTRY(wp->w_s->b_langp, 0)->lp_slang->sl_nobreak) 588 { 589 char_u *p, *fp; 590 int save_result = mi.mi_result; 591 592 /* First language in 'spelllang' is NOBREAK. Find first position 593 * at which any word would be valid. */ 594 mi.mi_lp = LANGP_ENTRY(wp->w_s->b_langp, 0); 595 if (mi.mi_lp->lp_slang->sl_fidxs != NULL) 596 { 597 p = mi.mi_word; 598 fp = mi.mi_fword; 599 for (;;) 600 { 601 MB_PTR_ADV(p); 602 MB_PTR_ADV(fp); 603 if (p >= mi.mi_end) 604 break; 605 mi.mi_compoff = (int)(fp - mi.mi_fword); 606 find_word(&mi, FIND_COMPOUND); 607 if (mi.mi_result != SP_BAD) 608 { 609 mi.mi_end = p; 610 break; 611 } 612 } 613 mi.mi_result = save_result; 614 } 615 } 616 617 if (mi.mi_result == SP_BAD || mi.mi_result == SP_BANNED) 618 *attrp = HLF_SPB; 619 else if (mi.mi_result == SP_RARE) 620 *attrp = HLF_SPR; 621 else 622 *attrp = HLF_SPL; 623 } 624 625 if (wrongcaplen > 0 && (mi.mi_result == SP_OK || mi.mi_result == SP_RARE)) 626 { 627 /* Report SpellCap only when the word isn't badly spelled. */ 628 *attrp = HLF_SPC; 629 return wrongcaplen; 630 } 631 632 return (int)(mi.mi_end - ptr); 633 } 634 635 /* 636 * Check if the word at "mip->mi_word" is in the tree. 637 * When "mode" is FIND_FOLDWORD check in fold-case word tree. 638 * When "mode" is FIND_KEEPWORD check in keep-case word tree. 639 * When "mode" is FIND_PREFIX check for word after prefix in fold-case word 640 * tree. 641 * 642 * For a match mip->mi_result is updated. 643 */ 644 static void 645 find_word(matchinf_T *mip, int mode) 646 { 647 idx_T arridx = 0; 648 int endlen[MAXWLEN]; /* length at possible word endings */ 649 idx_T endidx[MAXWLEN]; /* possible word endings */ 650 int endidxcnt = 0; 651 int len; 652 int wlen = 0; 653 int flen; 654 int c; 655 char_u *ptr; 656 idx_T lo, hi, m; 657 #ifdef FEAT_MBYTE 658 char_u *s; 659 #endif 660 char_u *p; 661 int res = SP_BAD; 662 slang_T *slang = mip->mi_lp->lp_slang; 663 unsigned flags; 664 char_u *byts; 665 idx_T *idxs; 666 int word_ends; 667 int prefix_found; 668 int nobreak_result; 669 670 if (mode == FIND_KEEPWORD || mode == FIND_KEEPCOMPOUND) 671 { 672 /* Check for word with matching case in keep-case tree. */ 673 ptr = mip->mi_word; 674 flen = 9999; /* no case folding, always enough bytes */ 675 byts = slang->sl_kbyts; 676 idxs = slang->sl_kidxs; 677 678 if (mode == FIND_KEEPCOMPOUND) 679 /* Skip over the previously found word(s). */ 680 wlen += mip->mi_compoff; 681 } 682 else 683 { 684 /* Check for case-folded in case-folded tree. */ 685 ptr = mip->mi_fword; 686 flen = mip->mi_fwordlen; /* available case-folded bytes */ 687 byts = slang->sl_fbyts; 688 idxs = slang->sl_fidxs; 689 690 if (mode == FIND_PREFIX) 691 { 692 /* Skip over the prefix. */ 693 wlen = mip->mi_prefixlen; 694 flen -= mip->mi_prefixlen; 695 } 696 else if (mode == FIND_COMPOUND) 697 { 698 /* Skip over the previously found word(s). */ 699 wlen = mip->mi_compoff; 700 flen -= mip->mi_compoff; 701 } 702 703 } 704 705 if (byts == NULL) 706 return; /* array is empty */ 707 708 /* 709 * Repeat advancing in the tree until: 710 * - there is a byte that doesn't match, 711 * - we reach the end of the tree, 712 * - or we reach the end of the line. 713 */ 714 for (;;) 715 { 716 if (flen <= 0 && *mip->mi_fend != NUL) 717 flen = fold_more(mip); 718 719 len = byts[arridx++]; 720 721 /* If the first possible byte is a zero the word could end here. 722 * Remember this index, we first check for the longest word. */ 723 if (byts[arridx] == 0) 724 { 725 if (endidxcnt == MAXWLEN) 726 { 727 /* Must be a corrupted spell file. */ 728 EMSG(_(e_format)); 729 return; 730 } 731 endlen[endidxcnt] = wlen; 732 endidx[endidxcnt++] = arridx++; 733 --len; 734 735 /* Skip over the zeros, there can be several flag/region 736 * combinations. */ 737 while (len > 0 && byts[arridx] == 0) 738 { 739 ++arridx; 740 --len; 741 } 742 if (len == 0) 743 break; /* no children, word must end here */ 744 } 745 746 /* Stop looking at end of the line. */ 747 if (ptr[wlen] == NUL) 748 break; 749 750 /* Perform a binary search in the list of accepted bytes. */ 751 c = ptr[wlen]; 752 if (c == TAB) /* <Tab> is handled like <Space> */ 753 c = ' '; 754 lo = arridx; 755 hi = arridx + len - 1; 756 while (lo < hi) 757 { 758 m = (lo + hi) / 2; 759 if (byts[m] > c) 760 hi = m - 1; 761 else if (byts[m] < c) 762 lo = m + 1; 763 else 764 { 765 lo = hi = m; 766 break; 767 } 768 } 769 770 /* Stop if there is no matching byte. */ 771 if (hi < lo || byts[lo] != c) 772 break; 773 774 /* Continue at the child (if there is one). */ 775 arridx = idxs[lo]; 776 ++wlen; 777 --flen; 778 779 /* One space in the good word may stand for several spaces in the 780 * checked word. */ 781 if (c == ' ') 782 { 783 for (;;) 784 { 785 if (flen <= 0 && *mip->mi_fend != NUL) 786 flen = fold_more(mip); 787 if (ptr[wlen] != ' ' && ptr[wlen] != TAB) 788 break; 789 ++wlen; 790 --flen; 791 } 792 } 793 } 794 795 /* 796 * Verify that one of the possible endings is valid. Try the longest 797 * first. 798 */ 799 while (endidxcnt > 0) 800 { 801 --endidxcnt; 802 arridx = endidx[endidxcnt]; 803 wlen = endlen[endidxcnt]; 804 805 #ifdef FEAT_MBYTE 806 if ((*mb_head_off)(ptr, ptr + wlen) > 0) 807 continue; /* not at first byte of character */ 808 #endif 809 if (spell_iswordp(ptr + wlen, mip->mi_win)) 810 { 811 if (slang->sl_compprog == NULL && !slang->sl_nobreak) 812 continue; /* next char is a word character */ 813 word_ends = FALSE; 814 } 815 else 816 word_ends = TRUE; 817 /* The prefix flag is before compound flags. Once a valid prefix flag 818 * has been found we try compound flags. */ 819 prefix_found = FALSE; 820 821 #ifdef FEAT_MBYTE 822 if (mode != FIND_KEEPWORD && has_mbyte) 823 { 824 /* Compute byte length in original word, length may change 825 * when folding case. This can be slow, take a shortcut when the 826 * case-folded word is equal to the keep-case word. */ 827 p = mip->mi_word; 828 if (STRNCMP(ptr, p, wlen) != 0) 829 { 830 for (s = ptr; s < ptr + wlen; MB_PTR_ADV(s)) 831 MB_PTR_ADV(p); 832 wlen = (int)(p - mip->mi_word); 833 } 834 } 835 #endif 836 837 /* Check flags and region. For FIND_PREFIX check the condition and 838 * prefix ID. 839 * Repeat this if there are more flags/region alternatives until there 840 * is a match. */ 841 res = SP_BAD; 842 for (len = byts[arridx - 1]; len > 0 && byts[arridx] == 0; 843 --len, ++arridx) 844 { 845 flags = idxs[arridx]; 846 847 /* For the fold-case tree check that the case of the checked word 848 * matches with what the word in the tree requires. 849 * For keep-case tree the case is always right. For prefixes we 850 * don't bother to check. */ 851 if (mode == FIND_FOLDWORD) 852 { 853 if (mip->mi_cend != mip->mi_word + wlen) 854 { 855 /* mi_capflags was set for a different word length, need 856 * to do it again. */ 857 mip->mi_cend = mip->mi_word + wlen; 858 mip->mi_capflags = captype(mip->mi_word, mip->mi_cend); 859 } 860 861 if (mip->mi_capflags == WF_KEEPCAP 862 || !spell_valid_case(mip->mi_capflags, flags)) 863 continue; 864 } 865 866 /* When mode is FIND_PREFIX the word must support the prefix: 867 * check the prefix ID and the condition. Do that for the list at 868 * mip->mi_prefarridx that find_prefix() filled. */ 869 else if (mode == FIND_PREFIX && !prefix_found) 870 { 871 c = valid_word_prefix(mip->mi_prefcnt, mip->mi_prefarridx, 872 flags, 873 mip->mi_word + mip->mi_cprefixlen, slang, 874 FALSE); 875 if (c == 0) 876 continue; 877 878 /* Use the WF_RARE flag for a rare prefix. */ 879 if (c & WF_RAREPFX) 880 flags |= WF_RARE; 881 prefix_found = TRUE; 882 } 883 884 if (slang->sl_nobreak) 885 { 886 if ((mode == FIND_COMPOUND || mode == FIND_KEEPCOMPOUND) 887 && (flags & WF_BANNED) == 0) 888 { 889 /* NOBREAK: found a valid following word. That's all we 890 * need to know, so return. */ 891 mip->mi_result = SP_OK; 892 break; 893 } 894 } 895 896 else if ((mode == FIND_COMPOUND || mode == FIND_KEEPCOMPOUND 897 || !word_ends)) 898 { 899 /* If there is no compound flag or the word is shorter than 900 * COMPOUNDMIN reject it quickly. 901 * Makes you wonder why someone puts a compound flag on a word 902 * that's too short... Myspell compatibility requires this 903 * anyway. */ 904 if (((unsigned)flags >> 24) == 0 905 || wlen - mip->mi_compoff < slang->sl_compminlen) 906 continue; 907 #ifdef FEAT_MBYTE 908 /* For multi-byte chars check character length against 909 * COMPOUNDMIN. */ 910 if (has_mbyte 911 && slang->sl_compminlen > 0 912 && mb_charlen_len(mip->mi_word + mip->mi_compoff, 913 wlen - mip->mi_compoff) < slang->sl_compminlen) 914 continue; 915 #endif 916 917 /* Limit the number of compound words to COMPOUNDWORDMAX if no 918 * maximum for syllables is specified. */ 919 if (!word_ends && mip->mi_complen + mip->mi_compextra + 2 920 > slang->sl_compmax 921 && slang->sl_compsylmax == MAXWLEN) 922 continue; 923 924 /* Don't allow compounding on a side where an affix was added, 925 * unless COMPOUNDPERMITFLAG was used. */ 926 if (mip->mi_complen > 0 && (flags & WF_NOCOMPBEF)) 927 continue; 928 if (!word_ends && (flags & WF_NOCOMPAFT)) 929 continue; 930 931 /* Quickly check if compounding is possible with this flag. */ 932 if (!byte_in_str(mip->mi_complen == 0 933 ? slang->sl_compstartflags 934 : slang->sl_compallflags, 935 ((unsigned)flags >> 24))) 936 continue; 937 938 /* If there is a match with a CHECKCOMPOUNDPATTERN rule 939 * discard the compound word. */ 940 if (match_checkcompoundpattern(ptr, wlen, &slang->sl_comppat)) 941 continue; 942 943 if (mode == FIND_COMPOUND) 944 { 945 int capflags; 946 947 /* Need to check the caps type of the appended compound 948 * word. */ 949 #ifdef FEAT_MBYTE 950 if (has_mbyte && STRNCMP(ptr, mip->mi_word, 951 mip->mi_compoff) != 0) 952 { 953 /* case folding may have changed the length */ 954 p = mip->mi_word; 955 for (s = ptr; s < ptr + mip->mi_compoff; MB_PTR_ADV(s)) 956 MB_PTR_ADV(p); 957 } 958 else 959 #endif 960 p = mip->mi_word + mip->mi_compoff; 961 capflags = captype(p, mip->mi_word + wlen); 962 if (capflags == WF_KEEPCAP || (capflags == WF_ALLCAP 963 && (flags & WF_FIXCAP) != 0)) 964 continue; 965 966 if (capflags != WF_ALLCAP) 967 { 968 /* When the character before the word is a word 969 * character we do not accept a Onecap word. We do 970 * accept a no-caps word, even when the dictionary 971 * word specifies ONECAP. */ 972 MB_PTR_BACK(mip->mi_word, p); 973 if (spell_iswordp_nmw(p, mip->mi_win) 974 ? capflags == WF_ONECAP 975 : (flags & WF_ONECAP) != 0 976 && capflags != WF_ONECAP) 977 continue; 978 } 979 } 980 981 /* If the word ends the sequence of compound flags of the 982 * words must match with one of the COMPOUNDRULE items and 983 * the number of syllables must not be too large. */ 984 mip->mi_compflags[mip->mi_complen] = ((unsigned)flags >> 24); 985 mip->mi_compflags[mip->mi_complen + 1] = NUL; 986 if (word_ends) 987 { 988 char_u fword[MAXWLEN]; 989 990 if (slang->sl_compsylmax < MAXWLEN) 991 { 992 /* "fword" is only needed for checking syllables. */ 993 if (ptr == mip->mi_word) 994 (void)spell_casefold(ptr, wlen, fword, MAXWLEN); 995 else 996 vim_strncpy(fword, ptr, endlen[endidxcnt]); 997 } 998 if (!can_compound(slang, fword, mip->mi_compflags)) 999 continue; 1000 } 1001 else if (slang->sl_comprules != NULL 1002 && !match_compoundrule(slang, mip->mi_compflags)) 1003 /* The compound flags collected so far do not match any 1004 * COMPOUNDRULE, discard the compounded word. */ 1005 continue; 1006 } 1007 1008 /* Check NEEDCOMPOUND: can't use word without compounding. */ 1009 else if (flags & WF_NEEDCOMP) 1010 continue; 1011 1012 nobreak_result = SP_OK; 1013 1014 if (!word_ends) 1015 { 1016 int save_result = mip->mi_result; 1017 char_u *save_end = mip->mi_end; 1018 langp_T *save_lp = mip->mi_lp; 1019 int lpi; 1020 1021 /* Check that a valid word follows. If there is one and we 1022 * are compounding, it will set "mi_result", thus we are 1023 * always finished here. For NOBREAK we only check that a 1024 * valid word follows. 1025 * Recursive! */ 1026 if (slang->sl_nobreak) 1027 mip->mi_result = SP_BAD; 1028 1029 /* Find following word in case-folded tree. */ 1030 mip->mi_compoff = endlen[endidxcnt]; 1031 #ifdef FEAT_MBYTE 1032 if (has_mbyte && mode == FIND_KEEPWORD) 1033 { 1034 /* Compute byte length in case-folded word from "wlen": 1035 * byte length in keep-case word. Length may change when 1036 * folding case. This can be slow, take a shortcut when 1037 * the case-folded word is equal to the keep-case word. */ 1038 p = mip->mi_fword; 1039 if (STRNCMP(ptr, p, wlen) != 0) 1040 { 1041 for (s = ptr; s < ptr + wlen; MB_PTR_ADV(s)) 1042 MB_PTR_ADV(p); 1043 mip->mi_compoff = (int)(p - mip->mi_fword); 1044 } 1045 } 1046 #endif 1047 #if 0 /* Disabled, see below */ 1048 c = mip->mi_compoff; 1049 #endif 1050 ++mip->mi_complen; 1051 if (flags & WF_COMPROOT) 1052 ++mip->mi_compextra; 1053 1054 /* For NOBREAK we need to try all NOBREAK languages, at least 1055 * to find the ".add" file(s). */ 1056 for (lpi = 0; lpi < mip->mi_win->w_s->b_langp.ga_len; ++lpi) 1057 { 1058 if (slang->sl_nobreak) 1059 { 1060 mip->mi_lp = LANGP_ENTRY(mip->mi_win->w_s->b_langp, lpi); 1061 if (mip->mi_lp->lp_slang->sl_fidxs == NULL 1062 || !mip->mi_lp->lp_slang->sl_nobreak) 1063 continue; 1064 } 1065 1066 find_word(mip, FIND_COMPOUND); 1067 1068 /* When NOBREAK any word that matches is OK. Otherwise we 1069 * need to find the longest match, thus try with keep-case 1070 * and prefix too. */ 1071 if (!slang->sl_nobreak || mip->mi_result == SP_BAD) 1072 { 1073 /* Find following word in keep-case tree. */ 1074 mip->mi_compoff = wlen; 1075 find_word(mip, FIND_KEEPCOMPOUND); 1076 1077 #if 0 /* Disabled, a prefix must not appear halfway a compound word, 1078 unless the COMPOUNDPERMITFLAG is used and then it can't be a 1079 postponed prefix. */ 1080 if (!slang->sl_nobreak || mip->mi_result == SP_BAD) 1081 { 1082 /* Check for following word with prefix. */ 1083 mip->mi_compoff = c; 1084 find_prefix(mip, FIND_COMPOUND); 1085 } 1086 #endif 1087 } 1088 1089 if (!slang->sl_nobreak) 1090 break; 1091 } 1092 --mip->mi_complen; 1093 if (flags & WF_COMPROOT) 1094 --mip->mi_compextra; 1095 mip->mi_lp = save_lp; 1096 1097 if (slang->sl_nobreak) 1098 { 1099 nobreak_result = mip->mi_result; 1100 mip->mi_result = save_result; 1101 mip->mi_end = save_end; 1102 } 1103 else 1104 { 1105 if (mip->mi_result == SP_OK) 1106 break; 1107 continue; 1108 } 1109 } 1110 1111 if (flags & WF_BANNED) 1112 res = SP_BANNED; 1113 else if (flags & WF_REGION) 1114 { 1115 /* Check region. */ 1116 if ((mip->mi_lp->lp_region & (flags >> 16)) != 0) 1117 res = SP_OK; 1118 else 1119 res = SP_LOCAL; 1120 } 1121 else if (flags & WF_RARE) 1122 res = SP_RARE; 1123 else 1124 res = SP_OK; 1125 1126 /* Always use the longest match and the best result. For NOBREAK 1127 * we separately keep the longest match without a following good 1128 * word as a fall-back. */ 1129 if (nobreak_result == SP_BAD) 1130 { 1131 if (mip->mi_result2 > res) 1132 { 1133 mip->mi_result2 = res; 1134 mip->mi_end2 = mip->mi_word + wlen; 1135 } 1136 else if (mip->mi_result2 == res 1137 && mip->mi_end2 < mip->mi_word + wlen) 1138 mip->mi_end2 = mip->mi_word + wlen; 1139 } 1140 else if (mip->mi_result > res) 1141 { 1142 mip->mi_result = res; 1143 mip->mi_end = mip->mi_word + wlen; 1144 } 1145 else if (mip->mi_result == res && mip->mi_end < mip->mi_word + wlen) 1146 mip->mi_end = mip->mi_word + wlen; 1147 1148 if (mip->mi_result == SP_OK) 1149 break; 1150 } 1151 1152 if (mip->mi_result == SP_OK) 1153 break; 1154 } 1155 } 1156 1157 /* 1158 * Return TRUE if there is a match between the word ptr[wlen] and 1159 * CHECKCOMPOUNDPATTERN rules, assuming that we will concatenate with another 1160 * word. 1161 * A match means that the first part of CHECKCOMPOUNDPATTERN matches at the 1162 * end of ptr[wlen] and the second part matches after it. 1163 */ 1164 static int 1165 match_checkcompoundpattern( 1166 char_u *ptr, 1167 int wlen, 1168 garray_T *gap) /* &sl_comppat */ 1169 { 1170 int i; 1171 char_u *p; 1172 int len; 1173 1174 for (i = 0; i + 1 < gap->ga_len; i += 2) 1175 { 1176 p = ((char_u **)gap->ga_data)[i + 1]; 1177 if (STRNCMP(ptr + wlen, p, STRLEN(p)) == 0) 1178 { 1179 /* Second part matches at start of following compound word, now 1180 * check if first part matches at end of previous word. */ 1181 p = ((char_u **)gap->ga_data)[i]; 1182 len = (int)STRLEN(p); 1183 if (len <= wlen && STRNCMP(ptr + wlen - len, p, len) == 0) 1184 return TRUE; 1185 } 1186 } 1187 return FALSE; 1188 } 1189 1190 /* 1191 * Return TRUE if "flags" is a valid sequence of compound flags and "word" 1192 * does not have too many syllables. 1193 */ 1194 static int 1195 can_compound(slang_T *slang, char_u *word, char_u *flags) 1196 { 1197 #ifdef FEAT_MBYTE 1198 char_u uflags[MAXWLEN * 2]; 1199 int i; 1200 #endif 1201 char_u *p; 1202 1203 if (slang->sl_compprog == NULL) 1204 return FALSE; 1205 #ifdef FEAT_MBYTE 1206 if (enc_utf8) 1207 { 1208 /* Need to convert the single byte flags to utf8 characters. */ 1209 p = uflags; 1210 for (i = 0; flags[i] != NUL; ++i) 1211 p += utf_char2bytes(flags[i], p); 1212 *p = NUL; 1213 p = uflags; 1214 } 1215 else 1216 #endif 1217 p = flags; 1218 if (!vim_regexec_prog(&slang->sl_compprog, FALSE, p, 0)) 1219 return FALSE; 1220 1221 /* Count the number of syllables. This may be slow, do it last. If there 1222 * are too many syllables AND the number of compound words is above 1223 * COMPOUNDWORDMAX then compounding is not allowed. */ 1224 if (slang->sl_compsylmax < MAXWLEN 1225 && count_syllables(slang, word) > slang->sl_compsylmax) 1226 return (int)STRLEN(flags) < slang->sl_compmax; 1227 return TRUE; 1228 } 1229 1230 /* 1231 * Return TRUE when the sequence of flags in "compflags" plus "flag" can 1232 * possibly form a valid compounded word. This also checks the COMPOUNDRULE 1233 * lines if they don't contain wildcards. 1234 */ 1235 static int 1236 can_be_compound( 1237 trystate_T *sp, 1238 slang_T *slang, 1239 char_u *compflags, 1240 int flag) 1241 { 1242 /* If the flag doesn't appear in sl_compstartflags or sl_compallflags 1243 * then it can't possibly compound. */ 1244 if (!byte_in_str(sp->ts_complen == sp->ts_compsplit 1245 ? slang->sl_compstartflags : slang->sl_compallflags, flag)) 1246 return FALSE; 1247 1248 /* If there are no wildcards, we can check if the flags collected so far 1249 * possibly can form a match with COMPOUNDRULE patterns. This only 1250 * makes sense when we have two or more words. */ 1251 if (slang->sl_comprules != NULL && sp->ts_complen > sp->ts_compsplit) 1252 { 1253 int v; 1254 1255 compflags[sp->ts_complen] = flag; 1256 compflags[sp->ts_complen + 1] = NUL; 1257 v = match_compoundrule(slang, compflags + sp->ts_compsplit); 1258 compflags[sp->ts_complen] = NUL; 1259 return v; 1260 } 1261 1262 return TRUE; 1263 } 1264 1265 1266 /* 1267 * Return TRUE if the compound flags in compflags[] match the start of any 1268 * compound rule. This is used to stop trying a compound if the flags 1269 * collected so far can't possibly match any compound rule. 1270 * Caller must check that slang->sl_comprules is not NULL. 1271 */ 1272 static int 1273 match_compoundrule(slang_T *slang, char_u *compflags) 1274 { 1275 char_u *p; 1276 int i; 1277 int c; 1278 1279 /* loop over all the COMPOUNDRULE entries */ 1280 for (p = slang->sl_comprules; *p != NUL; ++p) 1281 { 1282 /* loop over the flags in the compound word we have made, match 1283 * them against the current rule entry */ 1284 for (i = 0; ; ++i) 1285 { 1286 c = compflags[i]; 1287 if (c == NUL) 1288 /* found a rule that matches for the flags we have so far */ 1289 return TRUE; 1290 if (*p == '/' || *p == NUL) 1291 break; /* end of rule, it's too short */ 1292 if (*p == '[') 1293 { 1294 int match = FALSE; 1295 1296 /* compare against all the flags in [] */ 1297 ++p; 1298 while (*p != ']' && *p != NUL) 1299 if (*p++ == c) 1300 match = TRUE; 1301 if (!match) 1302 break; /* none matches */ 1303 } 1304 else if (*p != c) 1305 break; /* flag of word doesn't match flag in pattern */ 1306 ++p; 1307 } 1308 1309 /* Skip to the next "/", where the next pattern starts. */ 1310 p = vim_strchr(p, '/'); 1311 if (p == NULL) 1312 break; 1313 } 1314 1315 /* Checked all the rules and none of them match the flags, so there 1316 * can't possibly be a compound starting with these flags. */ 1317 return FALSE; 1318 } 1319 1320 /* 1321 * Return non-zero if the prefix indicated by "arridx" matches with the prefix 1322 * ID in "flags" for the word "word". 1323 * The WF_RAREPFX flag is included in the return value for a rare prefix. 1324 */ 1325 static int 1326 valid_word_prefix( 1327 int totprefcnt, /* nr of prefix IDs */ 1328 int arridx, /* idx in sl_pidxs[] */ 1329 int flags, 1330 char_u *word, 1331 slang_T *slang, 1332 int cond_req) /* only use prefixes with a condition */ 1333 { 1334 int prefcnt; 1335 int pidx; 1336 regprog_T **rp; 1337 int prefid; 1338 1339 prefid = (unsigned)flags >> 24; 1340 for (prefcnt = totprefcnt - 1; prefcnt >= 0; --prefcnt) 1341 { 1342 pidx = slang->sl_pidxs[arridx + prefcnt]; 1343 1344 /* Check the prefix ID. */ 1345 if (prefid != (pidx & 0xff)) 1346 continue; 1347 1348 /* Check if the prefix doesn't combine and the word already has a 1349 * suffix. */ 1350 if ((flags & WF_HAS_AFF) && (pidx & WF_PFX_NC)) 1351 continue; 1352 1353 /* Check the condition, if there is one. The condition index is 1354 * stored in the two bytes above the prefix ID byte. */ 1355 rp = &slang->sl_prefprog[((unsigned)pidx >> 8) & 0xffff]; 1356 if (*rp != NULL) 1357 { 1358 if (!vim_regexec_prog(rp, FALSE, word, 0)) 1359 continue; 1360 } 1361 else if (cond_req) 1362 continue; 1363 1364 /* It's a match! Return the WF_ flags. */ 1365 return pidx; 1366 } 1367 return 0; 1368 } 1369 1370 /* 1371 * Check if the word at "mip->mi_word" has a matching prefix. 1372 * If it does, then check the following word. 1373 * 1374 * If "mode" is "FIND_COMPOUND" then do the same after another word, find a 1375 * prefix in a compound word. 1376 * 1377 * For a match mip->mi_result is updated. 1378 */ 1379 static void 1380 find_prefix(matchinf_T *mip, int mode) 1381 { 1382 idx_T arridx = 0; 1383 int len; 1384 int wlen = 0; 1385 int flen; 1386 int c; 1387 char_u *ptr; 1388 idx_T lo, hi, m; 1389 slang_T *slang = mip->mi_lp->lp_slang; 1390 char_u *byts; 1391 idx_T *idxs; 1392 1393 byts = slang->sl_pbyts; 1394 if (byts == NULL) 1395 return; /* array is empty */ 1396 1397 /* We use the case-folded word here, since prefixes are always 1398 * case-folded. */ 1399 ptr = mip->mi_fword; 1400 flen = mip->mi_fwordlen; /* available case-folded bytes */ 1401 if (mode == FIND_COMPOUND) 1402 { 1403 /* Skip over the previously found word(s). */ 1404 ptr += mip->mi_compoff; 1405 flen -= mip->mi_compoff; 1406 } 1407 idxs = slang->sl_pidxs; 1408 1409 /* 1410 * Repeat advancing in the tree until: 1411 * - there is a byte that doesn't match, 1412 * - we reach the end of the tree, 1413 * - or we reach the end of the line. 1414 */ 1415 for (;;) 1416 { 1417 if (flen == 0 && *mip->mi_fend != NUL) 1418 flen = fold_more(mip); 1419 1420 len = byts[arridx++]; 1421 1422 /* If the first possible byte is a zero the prefix could end here. 1423 * Check if the following word matches and supports the prefix. */ 1424 if (byts[arridx] == 0) 1425 { 1426 /* There can be several prefixes with different conditions. We 1427 * try them all, since we don't know which one will give the 1428 * longest match. The word is the same each time, pass the list 1429 * of possible prefixes to find_word(). */ 1430 mip->mi_prefarridx = arridx; 1431 mip->mi_prefcnt = len; 1432 while (len > 0 && byts[arridx] == 0) 1433 { 1434 ++arridx; 1435 --len; 1436 } 1437 mip->mi_prefcnt -= len; 1438 1439 /* Find the word that comes after the prefix. */ 1440 mip->mi_prefixlen = wlen; 1441 if (mode == FIND_COMPOUND) 1442 /* Skip over the previously found word(s). */ 1443 mip->mi_prefixlen += mip->mi_compoff; 1444 1445 #ifdef FEAT_MBYTE 1446 if (has_mbyte) 1447 { 1448 /* Case-folded length may differ from original length. */ 1449 mip->mi_cprefixlen = nofold_len(mip->mi_fword, 1450 mip->mi_prefixlen, mip->mi_word); 1451 } 1452 else 1453 mip->mi_cprefixlen = mip->mi_prefixlen; 1454 #endif 1455 find_word(mip, FIND_PREFIX); 1456 1457 1458 if (len == 0) 1459 break; /* no children, word must end here */ 1460 } 1461 1462 /* Stop looking at end of the line. */ 1463 if (ptr[wlen] == NUL) 1464 break; 1465 1466 /* Perform a binary search in the list of accepted bytes. */ 1467 c = ptr[wlen]; 1468 lo = arridx; 1469 hi = arridx + len - 1; 1470 while (lo < hi) 1471 { 1472 m = (lo + hi) / 2; 1473 if (byts[m] > c) 1474 hi = m - 1; 1475 else if (byts[m] < c) 1476 lo = m + 1; 1477 else 1478 { 1479 lo = hi = m; 1480 break; 1481 } 1482 } 1483 1484 /* Stop if there is no matching byte. */ 1485 if (hi < lo || byts[lo] != c) 1486 break; 1487 1488 /* Continue at the child (if there is one). */ 1489 arridx = idxs[lo]; 1490 ++wlen; 1491 --flen; 1492 } 1493 } 1494 1495 /* 1496 * Need to fold at least one more character. Do until next non-word character 1497 * for efficiency. Include the non-word character too. 1498 * Return the length of the folded chars in bytes. 1499 */ 1500 static int 1501 fold_more(matchinf_T *mip) 1502 { 1503 int flen; 1504 char_u *p; 1505 1506 p = mip->mi_fend; 1507 do 1508 { 1509 MB_PTR_ADV(mip->mi_fend); 1510 } while (*mip->mi_fend != NUL && spell_iswordp(mip->mi_fend, mip->mi_win)); 1511 1512 /* Include the non-word character so that we can check for the word end. */ 1513 if (*mip->mi_fend != NUL) 1514 MB_PTR_ADV(mip->mi_fend); 1515 1516 (void)spell_casefold(p, (int)(mip->mi_fend - p), 1517 mip->mi_fword + mip->mi_fwordlen, 1518 MAXWLEN - mip->mi_fwordlen); 1519 flen = (int)STRLEN(mip->mi_fword + mip->mi_fwordlen); 1520 mip->mi_fwordlen += flen; 1521 return flen; 1522 } 1523 1524 /* 1525 * Check case flags for a word. Return TRUE if the word has the requested 1526 * case. 1527 */ 1528 static int 1529 spell_valid_case( 1530 int wordflags, /* flags for the checked word. */ 1531 int treeflags) /* flags for the word in the spell tree */ 1532 { 1533 return ((wordflags == WF_ALLCAP && (treeflags & WF_FIXCAP) == 0) 1534 || ((treeflags & (WF_ALLCAP | WF_KEEPCAP)) == 0 1535 && ((treeflags & WF_ONECAP) == 0 1536 || (wordflags & WF_ONECAP) != 0))); 1537 } 1538 1539 /* 1540 * Return TRUE if spell checking is not enabled. 1541 */ 1542 static int 1543 no_spell_checking(win_T *wp) 1544 { 1545 if (!wp->w_p_spell || *wp->w_s->b_p_spl == NUL 1546 || wp->w_s->b_langp.ga_len == 0) 1547 { 1548 EMSG(_("E756: Spell checking is not enabled")); 1549 return TRUE; 1550 } 1551 return FALSE; 1552 } 1553 1554 /* 1555 * Move to next spell error. 1556 * "curline" is FALSE for "[s", "]s", "[S" and "]S". 1557 * "curline" is TRUE to find word under/after cursor in the same line. 1558 * For Insert mode completion "dir" is BACKWARD and "curline" is TRUE: move 1559 * to after badly spelled word before the cursor. 1560 * Return 0 if not found, length of the badly spelled word otherwise. 1561 */ 1562 int 1563 spell_move_to( 1564 win_T *wp, 1565 int dir, /* FORWARD or BACKWARD */ 1566 int allwords, /* TRUE for "[s"/"]s", FALSE for "[S"/"]S" */ 1567 int curline, 1568 hlf_T *attrp) /* return: attributes of bad word or NULL 1569 (only when "dir" is FORWARD) */ 1570 { 1571 linenr_T lnum; 1572 pos_T found_pos; 1573 int found_len = 0; 1574 char_u *line; 1575 char_u *p; 1576 char_u *endp; 1577 hlf_T attr; 1578 int len; 1579 #ifdef FEAT_SYN_HL 1580 int has_syntax = syntax_present(wp); 1581 #endif 1582 int col; 1583 int can_spell; 1584 char_u *buf = NULL; 1585 int buflen = 0; 1586 int skip = 0; 1587 int capcol = -1; 1588 int found_one = FALSE; 1589 int wrapped = FALSE; 1590 1591 if (no_spell_checking(wp)) 1592 return 0; 1593 1594 /* 1595 * Start looking for bad word at the start of the line, because we can't 1596 * start halfway a word, we don't know where it starts or ends. 1597 * 1598 * When searching backwards, we continue in the line to find the last 1599 * bad word (in the cursor line: before the cursor). 1600 * 1601 * We concatenate the start of the next line, so that wrapped words work 1602 * (e.g. "et<line-break>cetera"). Doesn't work when searching backwards 1603 * though... 1604 */ 1605 lnum = wp->w_cursor.lnum; 1606 CLEAR_POS(&found_pos); 1607 1608 while (!got_int) 1609 { 1610 line = ml_get_buf(wp->w_buffer, lnum, FALSE); 1611 1612 len = (int)STRLEN(line); 1613 if (buflen < len + MAXWLEN + 2) 1614 { 1615 vim_free(buf); 1616 buflen = len + MAXWLEN + 2; 1617 buf = alloc(buflen); 1618 if (buf == NULL) 1619 break; 1620 } 1621 1622 /* In first line check first word for Capital. */ 1623 if (lnum == 1) 1624 capcol = 0; 1625 1626 /* For checking first word with a capital skip white space. */ 1627 if (capcol == 0) 1628 capcol = (int)(skipwhite(line) - line); 1629 else if (curline && wp == curwin) 1630 { 1631 /* For spellbadword(): check if first word needs a capital. */ 1632 col = (int)(skipwhite(line) - line); 1633 if (check_need_cap(lnum, col)) 1634 capcol = col; 1635 1636 /* Need to get the line again, may have looked at the previous 1637 * one. */ 1638 line = ml_get_buf(wp->w_buffer, lnum, FALSE); 1639 } 1640 1641 /* Copy the line into "buf" and append the start of the next line if 1642 * possible. */ 1643 STRCPY(buf, line); 1644 if (lnum < wp->w_buffer->b_ml.ml_line_count) 1645 spell_cat_line(buf + STRLEN(buf), 1646 ml_get_buf(wp->w_buffer, lnum + 1, FALSE), MAXWLEN); 1647 1648 p = buf + skip; 1649 endp = buf + len; 1650 while (p < endp) 1651 { 1652 /* When searching backward don't search after the cursor. Unless 1653 * we wrapped around the end of the buffer. */ 1654 if (dir == BACKWARD 1655 && lnum == wp->w_cursor.lnum 1656 && !wrapped 1657 && (colnr_T)(p - buf) >= wp->w_cursor.col) 1658 break; 1659 1660 /* start of word */ 1661 attr = HLF_COUNT; 1662 len = spell_check(wp, p, &attr, &capcol, FALSE); 1663 1664 if (attr != HLF_COUNT) 1665 { 1666 /* We found a bad word. Check the attribute. */ 1667 if (allwords || attr == HLF_SPB) 1668 { 1669 /* When searching forward only accept a bad word after 1670 * the cursor. */ 1671 if (dir == BACKWARD 1672 || lnum != wp->w_cursor.lnum 1673 || (lnum == wp->w_cursor.lnum 1674 && (wrapped 1675 || (colnr_T)(curline ? p - buf + len 1676 : p - buf) 1677 > wp->w_cursor.col))) 1678 { 1679 #ifdef FEAT_SYN_HL 1680 if (has_syntax) 1681 { 1682 col = (int)(p - buf); 1683 (void)syn_get_id(wp, lnum, (colnr_T)col, 1684 FALSE, &can_spell, FALSE); 1685 if (!can_spell) 1686 attr = HLF_COUNT; 1687 } 1688 else 1689 #endif 1690 can_spell = TRUE; 1691 1692 if (can_spell) 1693 { 1694 found_one = TRUE; 1695 found_pos.lnum = lnum; 1696 found_pos.col = (int)(p - buf); 1697 #ifdef FEAT_VIRTUALEDIT 1698 found_pos.coladd = 0; 1699 #endif 1700 if (dir == FORWARD) 1701 { 1702 /* No need to search further. */ 1703 wp->w_cursor = found_pos; 1704 vim_free(buf); 1705 if (attrp != NULL) 1706 *attrp = attr; 1707 return len; 1708 } 1709 else if (curline) 1710 /* Insert mode completion: put cursor after 1711 * the bad word. */ 1712 found_pos.col += len; 1713 found_len = len; 1714 } 1715 } 1716 else 1717 found_one = TRUE; 1718 } 1719 } 1720 1721 /* advance to character after the word */ 1722 p += len; 1723 capcol -= len; 1724 } 1725 1726 if (dir == BACKWARD && found_pos.lnum != 0) 1727 { 1728 /* Use the last match in the line (before the cursor). */ 1729 wp->w_cursor = found_pos; 1730 vim_free(buf); 1731 return found_len; 1732 } 1733 1734 if (curline) 1735 break; /* only check cursor line */ 1736 1737 /* If we are back at the starting line and searched it again there 1738 * is no match, give up. */ 1739 if (lnum == wp->w_cursor.lnum && wrapped) 1740 break; 1741 1742 /* Advance to next line. */ 1743 if (dir == BACKWARD) 1744 { 1745 if (lnum > 1) 1746 --lnum; 1747 else if (!p_ws) 1748 break; /* at first line and 'nowrapscan' */ 1749 else 1750 { 1751 /* Wrap around to the end of the buffer. May search the 1752 * starting line again and accept the last match. */ 1753 lnum = wp->w_buffer->b_ml.ml_line_count; 1754 wrapped = TRUE; 1755 if (!shortmess(SHM_SEARCH)) 1756 give_warning((char_u *)_(top_bot_msg), TRUE); 1757 } 1758 capcol = -1; 1759 } 1760 else 1761 { 1762 if (lnum < wp->w_buffer->b_ml.ml_line_count) 1763 ++lnum; 1764 else if (!p_ws) 1765 break; /* at first line and 'nowrapscan' */ 1766 else 1767 { 1768 /* Wrap around to the start of the buffer. May search the 1769 * starting line again and accept the first match. */ 1770 lnum = 1; 1771 wrapped = TRUE; 1772 if (!shortmess(SHM_SEARCH)) 1773 give_warning((char_u *)_(bot_top_msg), TRUE); 1774 } 1775 1776 /* If we are back at the starting line and there is no match then 1777 * give up. */ 1778 if (lnum == wp->w_cursor.lnum && !found_one) 1779 break; 1780 1781 /* Skip the characters at the start of the next line that were 1782 * included in a match crossing line boundaries. */ 1783 if (attr == HLF_COUNT) 1784 skip = (int)(p - endp); 1785 else 1786 skip = 0; 1787 1788 /* Capcol skips over the inserted space. */ 1789 --capcol; 1790 1791 /* But after empty line check first word in next line */ 1792 if (*skipwhite(line) == NUL) 1793 capcol = 0; 1794 } 1795 1796 line_breakcheck(); 1797 } 1798 1799 vim_free(buf); 1800 return 0; 1801 } 1802 1803 /* 1804 * For spell checking: concatenate the start of the following line "line" into 1805 * "buf", blanking-out special characters. Copy less then "maxlen" bytes. 1806 * Keep the blanks at the start of the next line, this is used in win_line() 1807 * to skip those bytes if the word was OK. 1808 */ 1809 void 1810 spell_cat_line(char_u *buf, char_u *line, int maxlen) 1811 { 1812 char_u *p; 1813 int n; 1814 1815 p = skipwhite(line); 1816 while (vim_strchr((char_u *)"*#/\"\t", *p) != NULL) 1817 p = skipwhite(p + 1); 1818 1819 if (*p != NUL) 1820 { 1821 /* Only worth concatenating if there is something else than spaces to 1822 * concatenate. */ 1823 n = (int)(p - line) + 1; 1824 if (n < maxlen - 1) 1825 { 1826 vim_memset(buf, ' ', n); 1827 vim_strncpy(buf + n, p, maxlen - 1 - n); 1828 } 1829 } 1830 } 1831 1832 /* 1833 * Structure used for the cookie argument of do_in_runtimepath(). 1834 */ 1835 typedef struct spelload_S 1836 { 1837 char_u sl_lang[MAXWLEN + 1]; /* language name */ 1838 slang_T *sl_slang; /* resulting slang_T struct */ 1839 int sl_nobreak; /* NOBREAK language found */ 1840 } spelload_T; 1841 1842 /* 1843 * Load word list(s) for "lang" from Vim spell file(s). 1844 * "lang" must be the language without the region: e.g., "en". 1845 */ 1846 static void 1847 spell_load_lang(char_u *lang) 1848 { 1849 char_u fname_enc[85]; 1850 int r; 1851 spelload_T sl; 1852 #ifdef FEAT_AUTOCMD 1853 int round; 1854 #endif 1855 1856 /* Copy the language name to pass it to spell_load_cb() as a cookie. 1857 * It's truncated when an error is detected. */ 1858 STRCPY(sl.sl_lang, lang); 1859 sl.sl_slang = NULL; 1860 sl.sl_nobreak = FALSE; 1861 1862 #ifdef FEAT_AUTOCMD 1863 /* We may retry when no spell file is found for the language, an 1864 * autocommand may load it then. */ 1865 for (round = 1; round <= 2; ++round) 1866 #endif 1867 { 1868 /* 1869 * Find the first spell file for "lang" in 'runtimepath' and load it. 1870 */ 1871 vim_snprintf((char *)fname_enc, sizeof(fname_enc) - 5, 1872 #ifdef VMS 1873 "spell/%s_%s.spl", 1874 #else 1875 "spell/%s.%s.spl", 1876 #endif 1877 lang, spell_enc()); 1878 r = do_in_runtimepath(fname_enc, 0, spell_load_cb, &sl); 1879 1880 if (r == FAIL && *sl.sl_lang != NUL) 1881 { 1882 /* Try loading the ASCII version. */ 1883 vim_snprintf((char *)fname_enc, sizeof(fname_enc) - 5, 1884 #ifdef VMS 1885 "spell/%s_ascii.spl", 1886 #else 1887 "spell/%s.ascii.spl", 1888 #endif 1889 lang); 1890 r = do_in_runtimepath(fname_enc, 0, spell_load_cb, &sl); 1891 1892 #ifdef FEAT_AUTOCMD 1893 if (r == FAIL && *sl.sl_lang != NUL && round == 1 1894 && apply_autocmds(EVENT_SPELLFILEMISSING, lang, 1895 curbuf->b_fname, FALSE, curbuf)) 1896 continue; 1897 break; 1898 #endif 1899 } 1900 #ifdef FEAT_AUTOCMD 1901 break; 1902 #endif 1903 } 1904 1905 if (r == FAIL) 1906 { 1907 smsg((char_u *) 1908 #ifdef VMS 1909 _("Warning: Cannot find word list \"%s_%s.spl\" or \"%s_ascii.spl\""), 1910 #else 1911 _("Warning: Cannot find word list \"%s.%s.spl\" or \"%s.ascii.spl\""), 1912 #endif 1913 lang, spell_enc(), lang); 1914 } 1915 else if (sl.sl_slang != NULL) 1916 { 1917 /* At least one file was loaded, now load ALL the additions. */ 1918 STRCPY(fname_enc + STRLEN(fname_enc) - 3, "add.spl"); 1919 do_in_runtimepath(fname_enc, DIP_ALL, spell_load_cb, &sl); 1920 } 1921 } 1922 1923 /* 1924 * Return the encoding used for spell checking: Use 'encoding', except that we 1925 * use "latin1" for "latin9". And limit to 60 characters (just in case). 1926 */ 1927 char_u * 1928 spell_enc(void) 1929 { 1930 1931 #ifdef FEAT_MBYTE 1932 if (STRLEN(p_enc) < 60 && STRCMP(p_enc, "iso-8859-15") != 0) 1933 return p_enc; 1934 #endif 1935 return (char_u *)"latin1"; 1936 } 1937 1938 /* 1939 * Get the name of the .spl file for the internal wordlist into 1940 * "fname[MAXPATHL]". 1941 */ 1942 static void 1943 int_wordlist_spl(char_u *fname) 1944 { 1945 vim_snprintf((char *)fname, MAXPATHL, SPL_FNAME_TMPL, 1946 int_wordlist, spell_enc()); 1947 } 1948 1949 /* 1950 * Allocate a new slang_T for language "lang". "lang" can be NULL. 1951 * Caller must fill "sl_next". 1952 */ 1953 slang_T * 1954 slang_alloc(char_u *lang) 1955 { 1956 slang_T *lp; 1957 1958 lp = (slang_T *)alloc_clear(sizeof(slang_T)); 1959 if (lp != NULL) 1960 { 1961 if (lang != NULL) 1962 lp->sl_name = vim_strsave(lang); 1963 ga_init2(&lp->sl_rep, sizeof(fromto_T), 10); 1964 ga_init2(&lp->sl_repsal, sizeof(fromto_T), 10); 1965 lp->sl_compmax = MAXWLEN; 1966 lp->sl_compsylmax = MAXWLEN; 1967 hash_init(&lp->sl_wordcount); 1968 } 1969 1970 return lp; 1971 } 1972 1973 /* 1974 * Free the contents of an slang_T and the structure itself. 1975 */ 1976 void 1977 slang_free(slang_T *lp) 1978 { 1979 vim_free(lp->sl_name); 1980 vim_free(lp->sl_fname); 1981 slang_clear(lp); 1982 vim_free(lp); 1983 } 1984 1985 /* 1986 * Clear an slang_T so that the file can be reloaded. 1987 */ 1988 void 1989 slang_clear(slang_T *lp) 1990 { 1991 garray_T *gap; 1992 fromto_T *ftp; 1993 salitem_T *smp; 1994 int i; 1995 int round; 1996 1997 vim_free(lp->sl_fbyts); 1998 lp->sl_fbyts = NULL; 1999 vim_free(lp->sl_kbyts); 2000 lp->sl_kbyts = NULL; 2001 vim_free(lp->sl_pbyts); 2002 lp->sl_pbyts = NULL; 2003 2004 vim_free(lp->sl_fidxs); 2005 lp->sl_fidxs = NULL; 2006 vim_free(lp->sl_kidxs); 2007 lp->sl_kidxs = NULL; 2008 vim_free(lp->sl_pidxs); 2009 lp->sl_pidxs = NULL; 2010 2011 for (round = 1; round <= 2; ++round) 2012 { 2013 gap = round == 1 ? &lp->sl_rep : &lp->sl_repsal; 2014 while (gap->ga_len > 0) 2015 { 2016 ftp = &((fromto_T *)gap->ga_data)[--gap->ga_len]; 2017 vim_free(ftp->ft_from); 2018 vim_free(ftp->ft_to); 2019 } 2020 ga_clear(gap); 2021 } 2022 2023 gap = &lp->sl_sal; 2024 if (lp->sl_sofo) 2025 { 2026 /* "ga_len" is set to 1 without adding an item for latin1 */ 2027 if (gap->ga_data != NULL) 2028 /* SOFOFROM and SOFOTO items: free lists of wide characters. */ 2029 for (i = 0; i < gap->ga_len; ++i) 2030 vim_free(((int **)gap->ga_data)[i]); 2031 } 2032 else 2033 /* SAL items: free salitem_T items */ 2034 while (gap->ga_len > 0) 2035 { 2036 smp = &((salitem_T *)gap->ga_data)[--gap->ga_len]; 2037 vim_free(smp->sm_lead); 2038 /* Don't free sm_oneof and sm_rules, they point into sm_lead. */ 2039 vim_free(smp->sm_to); 2040 #ifdef FEAT_MBYTE 2041 vim_free(smp->sm_lead_w); 2042 vim_free(smp->sm_oneof_w); 2043 vim_free(smp->sm_to_w); 2044 #endif 2045 } 2046 ga_clear(gap); 2047 2048 for (i = 0; i < lp->sl_prefixcnt; ++i) 2049 vim_regfree(lp->sl_prefprog[i]); 2050 lp->sl_prefixcnt = 0; 2051 vim_free(lp->sl_prefprog); 2052 lp->sl_prefprog = NULL; 2053 2054 vim_free(lp->sl_info); 2055 lp->sl_info = NULL; 2056 2057 vim_free(lp->sl_midword); 2058 lp->sl_midword = NULL; 2059 2060 vim_regfree(lp->sl_compprog); 2061 vim_free(lp->sl_comprules); 2062 vim_free(lp->sl_compstartflags); 2063 vim_free(lp->sl_compallflags); 2064 lp->sl_compprog = NULL; 2065 lp->sl_comprules = NULL; 2066 lp->sl_compstartflags = NULL; 2067 lp->sl_compallflags = NULL; 2068 2069 vim_free(lp->sl_syllable); 2070 lp->sl_syllable = NULL; 2071 ga_clear(&lp->sl_syl_items); 2072 2073 ga_clear_strings(&lp->sl_comppat); 2074 2075 hash_clear_all(&lp->sl_wordcount, WC_KEY_OFF); 2076 hash_init(&lp->sl_wordcount); 2077 2078 #ifdef FEAT_MBYTE 2079 hash_clear_all(&lp->sl_map_hash, 0); 2080 #endif 2081 2082 /* Clear info from .sug file. */ 2083 slang_clear_sug(lp); 2084 2085 lp->sl_compmax = MAXWLEN; 2086 lp->sl_compminlen = 0; 2087 lp->sl_compsylmax = MAXWLEN; 2088 lp->sl_regions[0] = NUL; 2089 } 2090 2091 /* 2092 * Clear the info from the .sug file in "lp". 2093 */ 2094 void 2095 slang_clear_sug(slang_T *lp) 2096 { 2097 vim_free(lp->sl_sbyts); 2098 lp->sl_sbyts = NULL; 2099 vim_free(lp->sl_sidxs); 2100 lp->sl_sidxs = NULL; 2101 close_spellbuf(lp->sl_sugbuf); 2102 lp->sl_sugbuf = NULL; 2103 lp->sl_sugloaded = FALSE; 2104 lp->sl_sugtime = 0; 2105 } 2106 2107 /* 2108 * Load one spell file and store the info into a slang_T. 2109 * Invoked through do_in_runtimepath(). 2110 */ 2111 static void 2112 spell_load_cb(char_u *fname, void *cookie) 2113 { 2114 spelload_T *slp = (spelload_T *)cookie; 2115 slang_T *slang; 2116 2117 slang = spell_load_file(fname, slp->sl_lang, NULL, FALSE); 2118 if (slang != NULL) 2119 { 2120 /* When a previously loaded file has NOBREAK also use it for the 2121 * ".add" files. */ 2122 if (slp->sl_nobreak && slang->sl_add) 2123 slang->sl_nobreak = TRUE; 2124 else if (slang->sl_nobreak) 2125 slp->sl_nobreak = TRUE; 2126 2127 slp->sl_slang = slang; 2128 } 2129 } 2130 2131 2132 /* 2133 * Add a word to the hashtable of common words. 2134 * If it's already there then the counter is increased. 2135 */ 2136 void 2137 count_common_word( 2138 slang_T *lp, 2139 char_u *word, 2140 int len, /* word length, -1 for upto NUL */ 2141 int count) /* 1 to count once, 10 to init */ 2142 { 2143 hash_T hash; 2144 hashitem_T *hi; 2145 wordcount_T *wc; 2146 char_u buf[MAXWLEN]; 2147 char_u *p; 2148 2149 if (len == -1) 2150 p = word; 2151 else 2152 { 2153 vim_strncpy(buf, word, len); 2154 p = buf; 2155 } 2156 2157 hash = hash_hash(p); 2158 hi = hash_lookup(&lp->sl_wordcount, p, hash); 2159 if (HASHITEM_EMPTY(hi)) 2160 { 2161 wc = (wordcount_T *)alloc((unsigned)(sizeof(wordcount_T) + STRLEN(p))); 2162 if (wc == NULL) 2163 return; 2164 STRCPY(wc->wc_word, p); 2165 wc->wc_count = count; 2166 hash_add_item(&lp->sl_wordcount, hi, wc->wc_word, hash); 2167 } 2168 else 2169 { 2170 wc = HI2WC(hi); 2171 if ((wc->wc_count += count) < (unsigned)count) /* check for overflow */ 2172 wc->wc_count = MAXWORDCOUNT; 2173 } 2174 } 2175 2176 /* 2177 * Adjust the score of common words. 2178 */ 2179 static int 2180 score_wordcount_adj( 2181 slang_T *slang, 2182 int score, 2183 char_u *word, 2184 int split) /* word was split, less bonus */ 2185 { 2186 hashitem_T *hi; 2187 wordcount_T *wc; 2188 int bonus; 2189 int newscore; 2190 2191 hi = hash_find(&slang->sl_wordcount, word); 2192 if (!HASHITEM_EMPTY(hi)) 2193 { 2194 wc = HI2WC(hi); 2195 if (wc->wc_count < SCORE_THRES2) 2196 bonus = SCORE_COMMON1; 2197 else if (wc->wc_count < SCORE_THRES3) 2198 bonus = SCORE_COMMON2; 2199 else 2200 bonus = SCORE_COMMON3; 2201 if (split) 2202 newscore = score - bonus / 2; 2203 else 2204 newscore = score - bonus; 2205 if (newscore < 0) 2206 return 0; 2207 return newscore; 2208 } 2209 return score; 2210 } 2211 2212 2213 /* 2214 * Return TRUE if byte "n" appears in "str". 2215 * Like strchr() but independent of locale. 2216 */ 2217 int 2218 byte_in_str(char_u *str, int n) 2219 { 2220 char_u *p; 2221 2222 for (p = str; *p != NUL; ++p) 2223 if (*p == n) 2224 return TRUE; 2225 return FALSE; 2226 } 2227 2228 #define SY_MAXLEN 30 2229 typedef struct syl_item_S 2230 { 2231 char_u sy_chars[SY_MAXLEN]; /* the sequence of chars */ 2232 int sy_len; 2233 } syl_item_T; 2234 2235 /* 2236 * Truncate "slang->sl_syllable" at the first slash and put the following items 2237 * in "slang->sl_syl_items". 2238 */ 2239 int 2240 init_syl_tab(slang_T *slang) 2241 { 2242 char_u *p; 2243 char_u *s; 2244 int l; 2245 syl_item_T *syl; 2246 2247 ga_init2(&slang->sl_syl_items, sizeof(syl_item_T), 4); 2248 p = vim_strchr(slang->sl_syllable, '/'); 2249 while (p != NULL) 2250 { 2251 *p++ = NUL; 2252 if (*p == NUL) /* trailing slash */ 2253 break; 2254 s = p; 2255 p = vim_strchr(p, '/'); 2256 if (p == NULL) 2257 l = (int)STRLEN(s); 2258 else 2259 l = (int)(p - s); 2260 if (l >= SY_MAXLEN) 2261 return SP_FORMERROR; 2262 if (ga_grow(&slang->sl_syl_items, 1) == FAIL) 2263 return SP_OTHERERROR; 2264 syl = ((syl_item_T *)slang->sl_syl_items.ga_data) 2265 + slang->sl_syl_items.ga_len++; 2266 vim_strncpy(syl->sy_chars, s, l); 2267 syl->sy_len = l; 2268 } 2269 return OK; 2270 } 2271 2272 /* 2273 * Count the number of syllables in "word". 2274 * When "word" contains spaces the syllables after the last space are counted. 2275 * Returns zero if syllables are not defines. 2276 */ 2277 static int 2278 count_syllables(slang_T *slang, char_u *word) 2279 { 2280 int cnt = 0; 2281 int skip = FALSE; 2282 char_u *p; 2283 int len; 2284 int i; 2285 syl_item_T *syl; 2286 int c; 2287 2288 if (slang->sl_syllable == NULL) 2289 return 0; 2290 2291 for (p = word; *p != NUL; p += len) 2292 { 2293 /* When running into a space reset counter. */ 2294 if (*p == ' ') 2295 { 2296 len = 1; 2297 cnt = 0; 2298 continue; 2299 } 2300 2301 /* Find longest match of syllable items. */ 2302 len = 0; 2303 for (i = 0; i < slang->sl_syl_items.ga_len; ++i) 2304 { 2305 syl = ((syl_item_T *)slang->sl_syl_items.ga_data) + i; 2306 if (syl->sy_len > len 2307 && STRNCMP(p, syl->sy_chars, syl->sy_len) == 0) 2308 len = syl->sy_len; 2309 } 2310 if (len != 0) /* found a match, count syllable */ 2311 { 2312 ++cnt; 2313 skip = FALSE; 2314 } 2315 else 2316 { 2317 /* No recognized syllable item, at least a syllable char then? */ 2318 #ifdef FEAT_MBYTE 2319 c = mb_ptr2char(p); 2320 len = (*mb_ptr2len)(p); 2321 #else 2322 c = *p; 2323 len = 1; 2324 #endif 2325 if (vim_strchr(slang->sl_syllable, c) == NULL) 2326 skip = FALSE; /* No, search for next syllable */ 2327 else if (!skip) 2328 { 2329 ++cnt; /* Yes, count it */ 2330 skip = TRUE; /* don't count following syllable chars */ 2331 } 2332 } 2333 } 2334 return cnt; 2335 } 2336 2337 /* 2338 * Parse 'spelllang' and set w_s->b_langp accordingly. 2339 * Returns NULL if it's OK, an error message otherwise. 2340 */ 2341 char_u * 2342 did_set_spelllang(win_T *wp) 2343 { 2344 garray_T ga; 2345 char_u *splp; 2346 char_u *region; 2347 char_u region_cp[3]; 2348 int filename; 2349 int region_mask; 2350 slang_T *slang; 2351 int c; 2352 char_u lang[MAXWLEN + 1]; 2353 char_u spf_name[MAXPATHL]; 2354 int len; 2355 char_u *p; 2356 int round; 2357 char_u *spf; 2358 char_u *use_region = NULL; 2359 int dont_use_region = FALSE; 2360 int nobreak = FALSE; 2361 int i, j; 2362 langp_T *lp, *lp2; 2363 static int recursive = FALSE; 2364 char_u *ret_msg = NULL; 2365 char_u *spl_copy; 2366 #ifdef FEAT_AUTOCMD 2367 bufref_T bufref; 2368 2369 set_bufref(&bufref, wp->w_buffer); 2370 #endif 2371 2372 /* We don't want to do this recursively. May happen when a language is 2373 * not available and the SpellFileMissing autocommand opens a new buffer 2374 * in which 'spell' is set. */ 2375 if (recursive) 2376 return NULL; 2377 recursive = TRUE; 2378 2379 ga_init2(&ga, sizeof(langp_T), 2); 2380 clear_midword(wp); 2381 2382 /* Make a copy of 'spelllang', the SpellFileMissing autocommands may change 2383 * it under our fingers. */ 2384 spl_copy = vim_strsave(wp->w_s->b_p_spl); 2385 if (spl_copy == NULL) 2386 goto theend; 2387 2388 #ifdef FEAT_MBYTE 2389 wp->w_s->b_cjk = 0; 2390 #endif 2391 2392 /* Loop over comma separated language names. */ 2393 for (splp = spl_copy; *splp != NUL; ) 2394 { 2395 /* Get one language name. */ 2396 copy_option_part(&splp, lang, MAXWLEN, ","); 2397 region = NULL; 2398 len = (int)STRLEN(lang); 2399 2400 if (STRCMP(lang, "cjk") == 0) 2401 { 2402 #ifdef FEAT_MBYTE 2403 wp->w_s->b_cjk = 1; 2404 #endif 2405 continue; 2406 } 2407 2408 /* If the name ends in ".spl" use it as the name of the spell file. 2409 * If there is a region name let "region" point to it and remove it 2410 * from the name. */ 2411 if (len > 4 && fnamecmp(lang + len - 4, ".spl") == 0) 2412 { 2413 filename = TRUE; 2414 2415 /* Locate a region and remove it from the file name. */ 2416 p = vim_strchr(gettail(lang), '_'); 2417 if (p != NULL && ASCII_ISALPHA(p[1]) && ASCII_ISALPHA(p[2]) 2418 && !ASCII_ISALPHA(p[3])) 2419 { 2420 vim_strncpy(region_cp, p + 1, 2); 2421 mch_memmove(p, p + 3, len - (p - lang) - 2); 2422 len -= 3; 2423 region = region_cp; 2424 } 2425 else 2426 dont_use_region = TRUE; 2427 2428 /* Check if we loaded this language before. */ 2429 for (slang = first_lang; slang != NULL; slang = slang->sl_next) 2430 if (fullpathcmp(lang, slang->sl_fname, FALSE) == FPC_SAME) 2431 break; 2432 } 2433 else 2434 { 2435 filename = FALSE; 2436 if (len > 3 && lang[len - 3] == '_') 2437 { 2438 region = lang + len - 2; 2439 len -= 3; 2440 lang[len] = NUL; 2441 } 2442 else 2443 dont_use_region = TRUE; 2444 2445 /* Check if we loaded this language before. */ 2446 for (slang = first_lang; slang != NULL; slang = slang->sl_next) 2447 if (STRICMP(lang, slang->sl_name) == 0) 2448 break; 2449 } 2450 2451 if (region != NULL) 2452 { 2453 /* If the region differs from what was used before then don't 2454 * use it for 'spellfile'. */ 2455 if (use_region != NULL && STRCMP(region, use_region) != 0) 2456 dont_use_region = TRUE; 2457 use_region = region; 2458 } 2459 2460 /* If not found try loading the language now. */ 2461 if (slang == NULL) 2462 { 2463 if (filename) 2464 (void)spell_load_file(lang, lang, NULL, FALSE); 2465 else 2466 { 2467 spell_load_lang(lang); 2468 #ifdef FEAT_AUTOCMD 2469 /* SpellFileMissing autocommands may do anything, including 2470 * destroying the buffer we are using... */ 2471 if (!bufref_valid(&bufref)) 2472 { 2473 ret_msg = (char_u *)N_("E797: SpellFileMissing autocommand deleted buffer"); 2474 goto theend; 2475 } 2476 #endif 2477 } 2478 } 2479 2480 /* 2481 * Loop over the languages, there can be several files for "lang". 2482 */ 2483 for (slang = first_lang; slang != NULL; slang = slang->sl_next) 2484 if (filename ? fullpathcmp(lang, slang->sl_fname, FALSE) == FPC_SAME 2485 : STRICMP(lang, slang->sl_name) == 0) 2486 { 2487 region_mask = REGION_ALL; 2488 if (!filename && region != NULL) 2489 { 2490 /* find region in sl_regions */ 2491 c = find_region(slang->sl_regions, region); 2492 if (c == REGION_ALL) 2493 { 2494 if (slang->sl_add) 2495 { 2496 if (*slang->sl_regions != NUL) 2497 /* This addition file is for other regions. */ 2498 region_mask = 0; 2499 } 2500 else 2501 /* This is probably an error. Give a warning and 2502 * accept the words anyway. */ 2503 smsg((char_u *) 2504 _("Warning: region %s not supported"), 2505 region); 2506 } 2507 else 2508 region_mask = 1 << c; 2509 } 2510 2511 if (region_mask != 0) 2512 { 2513 if (ga_grow(&ga, 1) == FAIL) 2514 { 2515 ga_clear(&ga); 2516 ret_msg = e_outofmem; 2517 goto theend; 2518 } 2519 LANGP_ENTRY(ga, ga.ga_len)->lp_slang = slang; 2520 LANGP_ENTRY(ga, ga.ga_len)->lp_region = region_mask; 2521 ++ga.ga_len; 2522 use_midword(slang, wp); 2523 if (slang->sl_nobreak) 2524 nobreak = TRUE; 2525 } 2526 } 2527 } 2528 2529 /* round 0: load int_wordlist, if possible. 2530 * round 1: load first name in 'spellfile'. 2531 * round 2: load second name in 'spellfile. 2532 * etc. */ 2533 spf = curwin->w_s->b_p_spf; 2534 for (round = 0; round == 0 || *spf != NUL; ++round) 2535 { 2536 if (round == 0) 2537 { 2538 /* Internal wordlist, if there is one. */ 2539 if (int_wordlist == NULL) 2540 continue; 2541 int_wordlist_spl(spf_name); 2542 } 2543 else 2544 { 2545 /* One entry in 'spellfile'. */ 2546 copy_option_part(&spf, spf_name, MAXPATHL - 5, ","); 2547 STRCAT(spf_name, ".spl"); 2548 2549 /* If it was already found above then skip it. */ 2550 for (c = 0; c < ga.ga_len; ++c) 2551 { 2552 p = LANGP_ENTRY(ga, c)->lp_slang->sl_fname; 2553 if (p != NULL && fullpathcmp(spf_name, p, FALSE) == FPC_SAME) 2554 break; 2555 } 2556 if (c < ga.ga_len) 2557 continue; 2558 } 2559 2560 /* Check if it was loaded already. */ 2561 for (slang = first_lang; slang != NULL; slang = slang->sl_next) 2562 if (fullpathcmp(spf_name, slang->sl_fname, FALSE) == FPC_SAME) 2563 break; 2564 if (slang == NULL) 2565 { 2566 /* Not loaded, try loading it now. The language name includes the 2567 * region name, the region is ignored otherwise. for int_wordlist 2568 * use an arbitrary name. */ 2569 if (round == 0) 2570 STRCPY(lang, "internal wordlist"); 2571 else 2572 { 2573 vim_strncpy(lang, gettail(spf_name), MAXWLEN); 2574 p = vim_strchr(lang, '.'); 2575 if (p != NULL) 2576 *p = NUL; /* truncate at ".encoding.add" */ 2577 } 2578 slang = spell_load_file(spf_name, lang, NULL, TRUE); 2579 2580 /* If one of the languages has NOBREAK we assume the addition 2581 * files also have this. */ 2582 if (slang != NULL && nobreak) 2583 slang->sl_nobreak = TRUE; 2584 } 2585 if (slang != NULL && ga_grow(&ga, 1) == OK) 2586 { 2587 region_mask = REGION_ALL; 2588 if (use_region != NULL && !dont_use_region) 2589 { 2590 /* find region in sl_regions */ 2591 c = find_region(slang->sl_regions, use_region); 2592 if (c != REGION_ALL) 2593 region_mask = 1 << c; 2594 else if (*slang->sl_regions != NUL) 2595 /* This spell file is for other regions. */ 2596 region_mask = 0; 2597 } 2598 2599 if (region_mask != 0) 2600 { 2601 LANGP_ENTRY(ga, ga.ga_len)->lp_slang = slang; 2602 LANGP_ENTRY(ga, ga.ga_len)->lp_sallang = NULL; 2603 LANGP_ENTRY(ga, ga.ga_len)->lp_replang = NULL; 2604 LANGP_ENTRY(ga, ga.ga_len)->lp_region = region_mask; 2605 ++ga.ga_len; 2606 use_midword(slang, wp); 2607 } 2608 } 2609 } 2610 2611 /* Everything is fine, store the new b_langp value. */ 2612 ga_clear(&wp->w_s->b_langp); 2613 wp->w_s->b_langp = ga; 2614 2615 /* For each language figure out what language to use for sound folding and 2616 * REP items. If the language doesn't support it itself use another one 2617 * with the same name. E.g. for "en-math" use "en". */ 2618 for (i = 0; i < ga.ga_len; ++i) 2619 { 2620 lp = LANGP_ENTRY(ga, i); 2621 2622 /* sound folding */ 2623 if (lp->lp_slang->sl_sal.ga_len > 0) 2624 /* language does sound folding itself */ 2625 lp->lp_sallang = lp->lp_slang; 2626 else 2627 /* find first similar language that does sound folding */ 2628 for (j = 0; j < ga.ga_len; ++j) 2629 { 2630 lp2 = LANGP_ENTRY(ga, j); 2631 if (lp2->lp_slang->sl_sal.ga_len > 0 2632 && STRNCMP(lp->lp_slang->sl_name, 2633 lp2->lp_slang->sl_name, 2) == 0) 2634 { 2635 lp->lp_sallang = lp2->lp_slang; 2636 break; 2637 } 2638 } 2639 2640 /* REP items */ 2641 if (lp->lp_slang->sl_rep.ga_len > 0) 2642 /* language has REP items itself */ 2643 lp->lp_replang = lp->lp_slang; 2644 else 2645 /* find first similar language that has REP items */ 2646 for (j = 0; j < ga.ga_len; ++j) 2647 { 2648 lp2 = LANGP_ENTRY(ga, j); 2649 if (lp2->lp_slang->sl_rep.ga_len > 0 2650 && STRNCMP(lp->lp_slang->sl_name, 2651 lp2->lp_slang->sl_name, 2) == 0) 2652 { 2653 lp->lp_replang = lp2->lp_slang; 2654 break; 2655 } 2656 } 2657 } 2658 2659 theend: 2660 vim_free(spl_copy); 2661 recursive = FALSE; 2662 redraw_win_later(wp, NOT_VALID); 2663 return ret_msg; 2664 } 2665 2666 /* 2667 * Clear the midword characters for buffer "buf". 2668 */ 2669 static void 2670 clear_midword(win_T *wp) 2671 { 2672 vim_memset(wp->w_s->b_spell_ismw, 0, 256); 2673 #ifdef FEAT_MBYTE 2674 vim_free(wp->w_s->b_spell_ismw_mb); 2675 wp->w_s->b_spell_ismw_mb = NULL; 2676 #endif 2677 } 2678 2679 /* 2680 * Use the "sl_midword" field of language "lp" for buffer "buf". 2681 * They add up to any currently used midword characters. 2682 */ 2683 static void 2684 use_midword(slang_T *lp, win_T *wp) 2685 { 2686 char_u *p; 2687 2688 if (lp->sl_midword == NULL) /* there aren't any */ 2689 return; 2690 2691 for (p = lp->sl_midword; *p != NUL; ) 2692 #ifdef FEAT_MBYTE 2693 if (has_mbyte) 2694 { 2695 int c, l, n; 2696 char_u *bp; 2697 2698 c = mb_ptr2char(p); 2699 l = (*mb_ptr2len)(p); 2700 if (c < 256 && l <= 2) 2701 wp->w_s->b_spell_ismw[c] = TRUE; 2702 else if (wp->w_s->b_spell_ismw_mb == NULL) 2703 /* First multi-byte char in "b_spell_ismw_mb". */ 2704 wp->w_s->b_spell_ismw_mb = vim_strnsave(p, l); 2705 else 2706 { 2707 /* Append multi-byte chars to "b_spell_ismw_mb". */ 2708 n = (int)STRLEN(wp->w_s->b_spell_ismw_mb); 2709 bp = vim_strnsave(wp->w_s->b_spell_ismw_mb, n + l); 2710 if (bp != NULL) 2711 { 2712 vim_free(wp->w_s->b_spell_ismw_mb); 2713 wp->w_s->b_spell_ismw_mb = bp; 2714 vim_strncpy(bp + n, p, l); 2715 } 2716 } 2717 p += l; 2718 } 2719 else 2720 #endif 2721 wp->w_s->b_spell_ismw[*p++] = TRUE; 2722 } 2723 2724 /* 2725 * Find the region "region[2]" in "rp" (points to "sl_regions"). 2726 * Each region is simply stored as the two characters of it's name. 2727 * Returns the index if found (first is 0), REGION_ALL if not found. 2728 */ 2729 static int 2730 find_region(char_u *rp, char_u *region) 2731 { 2732 int i; 2733 2734 for (i = 0; ; i += 2) 2735 { 2736 if (rp[i] == NUL) 2737 return REGION_ALL; 2738 if (rp[i] == region[0] && rp[i + 1] == region[1]) 2739 break; 2740 } 2741 return i / 2; 2742 } 2743 2744 /* 2745 * Return case type of word: 2746 * w word 0 2747 * Word WF_ONECAP 2748 * W WORD WF_ALLCAP 2749 * WoRd wOrd WF_KEEPCAP 2750 */ 2751 int 2752 captype( 2753 char_u *word, 2754 char_u *end) /* When NULL use up to NUL byte. */ 2755 { 2756 char_u *p; 2757 int c; 2758 int firstcap; 2759 int allcap; 2760 int past_second = FALSE; /* past second word char */ 2761 2762 /* find first letter */ 2763 for (p = word; !spell_iswordp_nmw(p, curwin); MB_PTR_ADV(p)) 2764 if (end == NULL ? *p == NUL : p >= end) 2765 return 0; /* only non-word characters, illegal word */ 2766 #ifdef FEAT_MBYTE 2767 if (has_mbyte) 2768 c = mb_ptr2char_adv(&p); 2769 else 2770 #endif 2771 c = *p++; 2772 firstcap = allcap = SPELL_ISUPPER(c); 2773 2774 /* 2775 * Need to check all letters to find a word with mixed upper/lower. 2776 * But a word with an upper char only at start is a ONECAP. 2777 */ 2778 for ( ; end == NULL ? *p != NUL : p < end; MB_PTR_ADV(p)) 2779 if (spell_iswordp_nmw(p, curwin)) 2780 { 2781 c = PTR2CHAR(p); 2782 if (!SPELL_ISUPPER(c)) 2783 { 2784 /* UUl -> KEEPCAP */ 2785 if (past_second && allcap) 2786 return WF_KEEPCAP; 2787 allcap = FALSE; 2788 } 2789 else if (!allcap) 2790 /* UlU -> KEEPCAP */ 2791 return WF_KEEPCAP; 2792 past_second = TRUE; 2793 } 2794 2795 if (allcap) 2796 return WF_ALLCAP; 2797 if (firstcap) 2798 return WF_ONECAP; 2799 return 0; 2800 } 2801 2802 /* 2803 * Like captype() but for a KEEPCAP word add ONECAP if the word starts with a 2804 * capital. So that make_case_word() can turn WOrd into Word. 2805 * Add ALLCAP for "WOrD". 2806 */ 2807 static int 2808 badword_captype(char_u *word, char_u *end) 2809 { 2810 int flags = captype(word, end); 2811 int c; 2812 int l, u; 2813 int first; 2814 char_u *p; 2815 2816 if (flags & WF_KEEPCAP) 2817 { 2818 /* Count the number of UPPER and lower case letters. */ 2819 l = u = 0; 2820 first = FALSE; 2821 for (p = word; p < end; MB_PTR_ADV(p)) 2822 { 2823 c = PTR2CHAR(p); 2824 if (SPELL_ISUPPER(c)) 2825 { 2826 ++u; 2827 if (p == word) 2828 first = TRUE; 2829 } 2830 else 2831 ++l; 2832 } 2833 2834 /* If there are more UPPER than lower case letters suggest an 2835 * ALLCAP word. Otherwise, if the first letter is UPPER then 2836 * suggest ONECAP. Exception: "ALl" most likely should be "All", 2837 * require three upper case letters. */ 2838 if (u > l && u > 2) 2839 flags |= WF_ALLCAP; 2840 else if (first) 2841 flags |= WF_ONECAP; 2842 2843 if (u >= 2 && l >= 2) /* maCARONI maCAroni */ 2844 flags |= WF_MIXCAP; 2845 } 2846 return flags; 2847 } 2848 2849 /* 2850 * Delete the internal wordlist and its .spl file. 2851 */ 2852 void 2853 spell_delete_wordlist(void) 2854 { 2855 char_u fname[MAXPATHL]; 2856 2857 if (int_wordlist != NULL) 2858 { 2859 mch_remove(int_wordlist); 2860 int_wordlist_spl(fname); 2861 mch_remove(fname); 2862 vim_free(int_wordlist); 2863 int_wordlist = NULL; 2864 } 2865 } 2866 2867 #if defined(FEAT_MBYTE) || defined(EXITFREE) || defined(PROTO) 2868 /* 2869 * Free all languages. 2870 */ 2871 void 2872 spell_free_all(void) 2873 { 2874 slang_T *slang; 2875 buf_T *buf; 2876 2877 /* Go through all buffers and handle 'spelllang'. <VN> */ 2878 FOR_ALL_BUFFERS(buf) 2879 ga_clear(&buf->b_s.b_langp); 2880 2881 while (first_lang != NULL) 2882 { 2883 slang = first_lang; 2884 first_lang = slang->sl_next; 2885 slang_free(slang); 2886 } 2887 2888 spell_delete_wordlist(); 2889 2890 vim_free(repl_to); 2891 repl_to = NULL; 2892 vim_free(repl_from); 2893 repl_from = NULL; 2894 } 2895 #endif 2896 2897 #if defined(FEAT_MBYTE) || defined(PROTO) 2898 /* 2899 * Clear all spelling tables and reload them. 2900 * Used after 'encoding' is set and when ":mkspell" was used. 2901 */ 2902 void 2903 spell_reload(void) 2904 { 2905 win_T *wp; 2906 2907 /* Initialize the table for spell_iswordp(). */ 2908 init_spell_chartab(); 2909 2910 /* Unload all allocated memory. */ 2911 spell_free_all(); 2912 2913 /* Go through all buffers and handle 'spelllang'. */ 2914 FOR_ALL_WINDOWS(wp) 2915 { 2916 /* Only load the wordlists when 'spelllang' is set and there is a 2917 * window for this buffer in which 'spell' is set. */ 2918 if (*wp->w_s->b_p_spl != NUL) 2919 { 2920 if (wp->w_p_spell) 2921 { 2922 (void)did_set_spelllang(wp); 2923 # ifdef FEAT_WINDOWS 2924 break; 2925 # endif 2926 } 2927 } 2928 } 2929 } 2930 #endif 2931 2932 /* 2933 * Opposite of offset2bytes(). 2934 * "pp" points to the bytes and is advanced over it. 2935 * Returns the offset. 2936 */ 2937 static int 2938 bytes2offset(char_u **pp) 2939 { 2940 char_u *p = *pp; 2941 int nr; 2942 int c; 2943 2944 c = *p++; 2945 if ((c & 0x80) == 0x00) /* 1 byte */ 2946 { 2947 nr = c - 1; 2948 } 2949 else if ((c & 0xc0) == 0x80) /* 2 bytes */ 2950 { 2951 nr = (c & 0x3f) - 1; 2952 nr = nr * 255 + (*p++ - 1); 2953 } 2954 else if ((c & 0xe0) == 0xc0) /* 3 bytes */ 2955 { 2956 nr = (c & 0x1f) - 1; 2957 nr = nr * 255 + (*p++ - 1); 2958 nr = nr * 255 + (*p++ - 1); 2959 } 2960 else /* 4 bytes */ 2961 { 2962 nr = (c & 0x0f) - 1; 2963 nr = nr * 255 + (*p++ - 1); 2964 nr = nr * 255 + (*p++ - 1); 2965 nr = nr * 255 + (*p++ - 1); 2966 } 2967 2968 *pp = p; 2969 return nr; 2970 } 2971 2972 2973 /* 2974 * Open a spell buffer. This is a nameless buffer that is not in the buffer 2975 * list and only contains text lines. Can use a swapfile to reduce memory 2976 * use. 2977 * Most other fields are invalid! Esp. watch out for string options being 2978 * NULL and there is no undo info. 2979 * Returns NULL when out of memory. 2980 */ 2981 buf_T * 2982 open_spellbuf(void) 2983 { 2984 buf_T *buf; 2985 2986 buf = (buf_T *)alloc_clear(sizeof(buf_T)); 2987 if (buf != NULL) 2988 { 2989 buf->b_spell = TRUE; 2990 buf->b_p_swf = TRUE; /* may create a swap file */ 2991 #ifdef FEAT_CRYPT 2992 buf->b_p_key = empty_option; 2993 #endif 2994 ml_open(buf); 2995 ml_open_file(buf); /* create swap file now */ 2996 } 2997 return buf; 2998 } 2999 3000 /* 3001 * Close the buffer used for spell info. 3002 */ 3003 void 3004 close_spellbuf(buf_T *buf) 3005 { 3006 if (buf != NULL) 3007 { 3008 ml_close(buf, TRUE); 3009 vim_free(buf); 3010 } 3011 } 3012 3013 /* 3014 * Init the chartab used for spelling for ASCII. 3015 * EBCDIC is not supported! 3016 */ 3017 void 3018 clear_spell_chartab(spelltab_T *sp) 3019 { 3020 int i; 3021 3022 /* Init everything to FALSE. */ 3023 vim_memset(sp->st_isw, FALSE, sizeof(sp->st_isw)); 3024 vim_memset(sp->st_isu, FALSE, sizeof(sp->st_isu)); 3025 for (i = 0; i < 256; ++i) 3026 { 3027 sp->st_fold[i] = i; 3028 sp->st_upper[i] = i; 3029 } 3030 3031 /* We include digits. A word shouldn't start with a digit, but handling 3032 * that is done separately. */ 3033 for (i = '0'; i <= '9'; ++i) 3034 sp->st_isw[i] = TRUE; 3035 for (i = 'A'; i <= 'Z'; ++i) 3036 { 3037 sp->st_isw[i] = TRUE; 3038 sp->st_isu[i] = TRUE; 3039 sp->st_fold[i] = i + 0x20; 3040 } 3041 for (i = 'a'; i <= 'z'; ++i) 3042 { 3043 sp->st_isw[i] = TRUE; 3044 sp->st_upper[i] = i - 0x20; 3045 } 3046 } 3047 3048 /* 3049 * Init the chartab used for spelling. Only depends on 'encoding'. 3050 * Called once while starting up and when 'encoding' changes. 3051 * The default is to use isalpha(), but the spell file should define the word 3052 * characters to make it possible that 'encoding' differs from the current 3053 * locale. For utf-8 we don't use isalpha() but our own functions. 3054 */ 3055 void 3056 init_spell_chartab(void) 3057 { 3058 int i; 3059 3060 did_set_spelltab = FALSE; 3061 clear_spell_chartab(&spelltab); 3062 #ifdef FEAT_MBYTE 3063 if (enc_dbcs) 3064 { 3065 /* DBCS: assume double-wide characters are word characters. */ 3066 for (i = 128; i <= 255; ++i) 3067 if (MB_BYTE2LEN(i) == 2) 3068 spelltab.st_isw[i] = TRUE; 3069 } 3070 else if (enc_utf8) 3071 { 3072 for (i = 128; i < 256; ++i) 3073 { 3074 int f = utf_fold(i); 3075 int u = utf_toupper(i); 3076 3077 spelltab.st_isu[i] = utf_isupper(i); 3078 spelltab.st_isw[i] = spelltab.st_isu[i] || utf_islower(i); 3079 /* The folded/upper-cased value is different between latin1 and 3080 * utf8 for 0xb5, causing E763 for no good reason. Use the latin1 3081 * value for utf-8 to avoid this. */ 3082 spelltab.st_fold[i] = (f < 256) ? f : i; 3083 spelltab.st_upper[i] = (u < 256) ? u : i; 3084 } 3085 } 3086 else 3087 #endif 3088 { 3089 /* Rough guess: use locale-dependent library functions. */ 3090 for (i = 128; i < 256; ++i) 3091 { 3092 if (MB_ISUPPER(i)) 3093 { 3094 spelltab.st_isw[i] = TRUE; 3095 spelltab.st_isu[i] = TRUE; 3096 spelltab.st_fold[i] = MB_TOLOWER(i); 3097 } 3098 else if (MB_ISLOWER(i)) 3099 { 3100 spelltab.st_isw[i] = TRUE; 3101 spelltab.st_upper[i] = MB_TOUPPER(i); 3102 } 3103 } 3104 } 3105 } 3106 3107 3108 /* 3109 * Return TRUE if "p" points to a word character. 3110 * As a special case we see "midword" characters as word character when it is 3111 * followed by a word character. This finds they'there but not 'they there'. 3112 * Thus this only works properly when past the first character of the word. 3113 */ 3114 static int 3115 spell_iswordp( 3116 char_u *p, 3117 win_T *wp) /* buffer used */ 3118 { 3119 #ifdef FEAT_MBYTE 3120 char_u *s; 3121 int l; 3122 int c; 3123 3124 if (has_mbyte) 3125 { 3126 l = MB_PTR2LEN(p); 3127 s = p; 3128 if (l == 1) 3129 { 3130 /* be quick for ASCII */ 3131 if (wp->w_s->b_spell_ismw[*p]) 3132 s = p + 1; /* skip a mid-word character */ 3133 } 3134 else 3135 { 3136 c = mb_ptr2char(p); 3137 if (c < 256 ? wp->w_s->b_spell_ismw[c] 3138 : (wp->w_s->b_spell_ismw_mb != NULL 3139 && vim_strchr(wp->w_s->b_spell_ismw_mb, c) != NULL)) 3140 s = p + l; 3141 } 3142 3143 c = mb_ptr2char(s); 3144 if (c > 255) 3145 return spell_mb_isword_class(mb_get_class(s), wp); 3146 return spelltab.st_isw[c]; 3147 } 3148 #endif 3149 3150 return spelltab.st_isw[wp->w_s->b_spell_ismw[*p] ? p[1] : p[0]]; 3151 } 3152 3153 /* 3154 * Return TRUE if "p" points to a word character. 3155 * Unlike spell_iswordp() this doesn't check for "midword" characters. 3156 */ 3157 int 3158 spell_iswordp_nmw(char_u *p, win_T *wp) 3159 { 3160 #ifdef FEAT_MBYTE 3161 int c; 3162 3163 if (has_mbyte) 3164 { 3165 c = mb_ptr2char(p); 3166 if (c > 255) 3167 return spell_mb_isword_class(mb_get_class(p), wp); 3168 return spelltab.st_isw[c]; 3169 } 3170 #endif 3171 return spelltab.st_isw[*p]; 3172 } 3173 3174 #ifdef FEAT_MBYTE 3175 /* 3176 * Return TRUE if word class indicates a word character. 3177 * Only for characters above 255. 3178 * Unicode subscript and superscript are not considered word characters. 3179 * See also dbcs_class() and utf_class() in mbyte.c. 3180 */ 3181 static int 3182 spell_mb_isword_class(int cl, win_T *wp) 3183 { 3184 if (wp->w_s->b_cjk) 3185 /* East Asian characters are not considered word characters. */ 3186 return cl == 2 || cl == 0x2800; 3187 return cl >= 2 && cl != 0x2070 && cl != 0x2080; 3188 } 3189 3190 /* 3191 * Return TRUE if "p" points to a word character. 3192 * Wide version of spell_iswordp(). 3193 */ 3194 static int 3195 spell_iswordp_w(int *p, win_T *wp) 3196 { 3197 int *s; 3198 3199 if (*p < 256 ? wp->w_s->b_spell_ismw[*p] 3200 : (wp->w_s->b_spell_ismw_mb != NULL 3201 && vim_strchr(wp->w_s->b_spell_ismw_mb, *p) != NULL)) 3202 s = p + 1; 3203 else 3204 s = p; 3205 3206 if (*s > 255) 3207 { 3208 if (enc_utf8) 3209 return spell_mb_isword_class(utf_class(*s), wp); 3210 if (enc_dbcs) 3211 return spell_mb_isword_class( 3212 dbcs_class((unsigned)*s >> 8, *s & 0xff), wp); 3213 return 0; 3214 } 3215 return spelltab.st_isw[*s]; 3216 } 3217 #endif 3218 3219 /* 3220 * Case-fold "str[len]" into "buf[buflen]". The result is NUL terminated. 3221 * Uses the character definitions from the .spl file. 3222 * When using a multi-byte 'encoding' the length may change! 3223 * Returns FAIL when something wrong. 3224 */ 3225 int 3226 spell_casefold( 3227 char_u *str, 3228 int len, 3229 char_u *buf, 3230 int buflen) 3231 { 3232 int i; 3233 3234 if (len >= buflen) 3235 { 3236 buf[0] = NUL; 3237 return FAIL; /* result will not fit */ 3238 } 3239 3240 #ifdef FEAT_MBYTE 3241 if (has_mbyte) 3242 { 3243 int outi = 0; 3244 char_u *p; 3245 int c; 3246 3247 /* Fold one character at a time. */ 3248 for (p = str; p < str + len; ) 3249 { 3250 if (outi + MB_MAXBYTES > buflen) 3251 { 3252 buf[outi] = NUL; 3253 return FAIL; 3254 } 3255 c = mb_cptr2char_adv(&p); 3256 outi += mb_char2bytes(SPELL_TOFOLD(c), buf + outi); 3257 } 3258 buf[outi] = NUL; 3259 } 3260 else 3261 #endif 3262 { 3263 /* Be quick for non-multibyte encodings. */ 3264 for (i = 0; i < len; ++i) 3265 buf[i] = spelltab.st_fold[str[i]]; 3266 buf[i] = NUL; 3267 } 3268 3269 return OK; 3270 } 3271 3272 /* values for sps_flags */ 3273 #define SPS_BEST 1 3274 #define SPS_FAST 2 3275 #define SPS_DOUBLE 4 3276 3277 static int sps_flags = SPS_BEST; /* flags from 'spellsuggest' */ 3278 static int sps_limit = 9999; /* max nr of suggestions given */ 3279 3280 /* 3281 * Check the 'spellsuggest' option. Return FAIL if it's wrong. 3282 * Sets "sps_flags" and "sps_limit". 3283 */ 3284 int 3285 spell_check_sps(void) 3286 { 3287 char_u *p; 3288 char_u *s; 3289 char_u buf[MAXPATHL]; 3290 int f; 3291 3292 sps_flags = 0; 3293 sps_limit = 9999; 3294 3295 for (p = p_sps; *p != NUL; ) 3296 { 3297 copy_option_part(&p, buf, MAXPATHL, ","); 3298 3299 f = 0; 3300 if (VIM_ISDIGIT(*buf)) 3301 { 3302 s = buf; 3303 sps_limit = getdigits(&s); 3304 if (*s != NUL && !VIM_ISDIGIT(*s)) 3305 f = -1; 3306 } 3307 else if (STRCMP(buf, "best") == 0) 3308 f = SPS_BEST; 3309 else if (STRCMP(buf, "fast") == 0) 3310 f = SPS_FAST; 3311 else if (STRCMP(buf, "double") == 0) 3312 f = SPS_DOUBLE; 3313 else if (STRNCMP(buf, "expr:", 5) != 0 3314 && STRNCMP(buf, "file:", 5) != 0) 3315 f = -1; 3316 3317 if (f == -1 || (sps_flags != 0 && f != 0)) 3318 { 3319 sps_flags = SPS_BEST; 3320 sps_limit = 9999; 3321 return FAIL; 3322 } 3323 if (f != 0) 3324 sps_flags = f; 3325 } 3326 3327 if (sps_flags == 0) 3328 sps_flags = SPS_BEST; 3329 3330 return OK; 3331 } 3332 3333 /* 3334 * "z=": Find badly spelled word under or after the cursor. 3335 * Give suggestions for the properly spelled word. 3336 * In Visual mode use the highlighted word as the bad word. 3337 * When "count" is non-zero use that suggestion. 3338 */ 3339 void 3340 spell_suggest(int count) 3341 { 3342 char_u *line; 3343 pos_T prev_cursor = curwin->w_cursor; 3344 char_u wcopy[MAXWLEN + 2]; 3345 char_u *p; 3346 int i; 3347 int c; 3348 suginfo_T sug; 3349 suggest_T *stp; 3350 int mouse_used; 3351 int need_cap; 3352 int limit; 3353 int selected = count; 3354 int badlen = 0; 3355 int msg_scroll_save = msg_scroll; 3356 3357 if (no_spell_checking(curwin)) 3358 return; 3359 3360 if (VIsual_active) 3361 { 3362 /* Use the Visually selected text as the bad word. But reject 3363 * a multi-line selection. */ 3364 if (curwin->w_cursor.lnum != VIsual.lnum) 3365 { 3366 vim_beep(BO_SPELL); 3367 return; 3368 } 3369 badlen = (int)curwin->w_cursor.col - (int)VIsual.col; 3370 if (badlen < 0) 3371 badlen = -badlen; 3372 else 3373 curwin->w_cursor.col = VIsual.col; 3374 ++badlen; 3375 end_visual_mode(); 3376 } 3377 /* Find the start of the badly spelled word. */ 3378 else if (spell_move_to(curwin, FORWARD, TRUE, TRUE, NULL) == 0 3379 || curwin->w_cursor.col > prev_cursor.col) 3380 { 3381 /* No bad word or it starts after the cursor: use the word under the 3382 * cursor. */ 3383 curwin->w_cursor = prev_cursor; 3384 line = ml_get_curline(); 3385 p = line + curwin->w_cursor.col; 3386 /* Backup to before start of word. */ 3387 while (p > line && spell_iswordp_nmw(p, curwin)) 3388 MB_PTR_BACK(line, p); 3389 /* Forward to start of word. */ 3390 while (*p != NUL && !spell_iswordp_nmw(p, curwin)) 3391 MB_PTR_ADV(p); 3392 3393 if (!spell_iswordp_nmw(p, curwin)) /* No word found. */ 3394 { 3395 beep_flush(); 3396 return; 3397 } 3398 curwin->w_cursor.col = (colnr_T)(p - line); 3399 } 3400 3401 /* Get the word and its length. */ 3402 3403 /* Figure out if the word should be capitalised. */ 3404 need_cap = check_need_cap(curwin->w_cursor.lnum, curwin->w_cursor.col); 3405 3406 /* Make a copy of current line since autocommands may free the line. */ 3407 line = vim_strsave(ml_get_curline()); 3408 if (line == NULL) 3409 goto skip; 3410 3411 /* Get the list of suggestions. Limit to 'lines' - 2 or the number in 3412 * 'spellsuggest', whatever is smaller. */ 3413 if (sps_limit > (int)Rows - 2) 3414 limit = (int)Rows - 2; 3415 else 3416 limit = sps_limit; 3417 spell_find_suggest(line + curwin->w_cursor.col, badlen, &sug, limit, 3418 TRUE, need_cap, TRUE); 3419 3420 if (sug.su_ga.ga_len == 0) 3421 MSG(_("Sorry, no suggestions")); 3422 else if (count > 0) 3423 { 3424 if (count > sug.su_ga.ga_len) 3425 smsg((char_u *)_("Sorry, only %ld suggestions"), 3426 (long)sug.su_ga.ga_len); 3427 } 3428 else 3429 { 3430 vim_free(repl_from); 3431 repl_from = NULL; 3432 vim_free(repl_to); 3433 repl_to = NULL; 3434 3435 #ifdef FEAT_RIGHTLEFT 3436 /* When 'rightleft' is set the list is drawn right-left. */ 3437 cmdmsg_rl = curwin->w_p_rl; 3438 if (cmdmsg_rl) 3439 msg_col = Columns - 1; 3440 #endif 3441 3442 /* List the suggestions. */ 3443 msg_start(); 3444 msg_row = Rows - 1; /* for when 'cmdheight' > 1 */ 3445 lines_left = Rows; /* avoid more prompt */ 3446 vim_snprintf((char *)IObuff, IOSIZE, _("Change \"%.*s\" to:"), 3447 sug.su_badlen, sug.su_badptr); 3448 #ifdef FEAT_RIGHTLEFT 3449 if (cmdmsg_rl && STRNCMP(IObuff, "Change", 6) == 0) 3450 { 3451 /* And now the rabbit from the high hat: Avoid showing the 3452 * untranslated message rightleft. */ 3453 vim_snprintf((char *)IObuff, IOSIZE, ":ot \"%.*s\" egnahC", 3454 sug.su_badlen, sug.su_badptr); 3455 } 3456 #endif 3457 msg_puts(IObuff); 3458 msg_clr_eos(); 3459 msg_putchar('\n'); 3460 3461 msg_scroll = TRUE; 3462 for (i = 0; i < sug.su_ga.ga_len; ++i) 3463 { 3464 stp = &SUG(sug.su_ga, i); 3465 3466 /* The suggested word may replace only part of the bad word, add 3467 * the not replaced part. */ 3468 vim_strncpy(wcopy, stp->st_word, MAXWLEN); 3469 if (sug.su_badlen > stp->st_orglen) 3470 vim_strncpy(wcopy + stp->st_wordlen, 3471 sug.su_badptr + stp->st_orglen, 3472 sug.su_badlen - stp->st_orglen); 3473 vim_snprintf((char *)IObuff, IOSIZE, "%2d", i + 1); 3474 #ifdef FEAT_RIGHTLEFT 3475 if (cmdmsg_rl) 3476 rl_mirror(IObuff); 3477 #endif 3478 msg_puts(IObuff); 3479 3480 vim_snprintf((char *)IObuff, IOSIZE, " \"%s\"", wcopy); 3481 msg_puts(IObuff); 3482 3483 /* The word may replace more than "su_badlen". */ 3484 if (sug.su_badlen < stp->st_orglen) 3485 { 3486 vim_snprintf((char *)IObuff, IOSIZE, _(" < \"%.*s\""), 3487 stp->st_orglen, sug.su_badptr); 3488 msg_puts(IObuff); 3489 } 3490 3491 if (p_verbose > 0) 3492 { 3493 /* Add the score. */ 3494 if (sps_flags & (SPS_DOUBLE | SPS_BEST)) 3495 vim_snprintf((char *)IObuff, IOSIZE, " (%s%d - %d)", 3496 stp->st_salscore ? "s " : "", 3497 stp->st_score, stp->st_altscore); 3498 else 3499 vim_snprintf((char *)IObuff, IOSIZE, " (%d)", 3500 stp->st_score); 3501 #ifdef FEAT_RIGHTLEFT 3502 if (cmdmsg_rl) 3503 /* Mirror the numbers, but keep the leading space. */ 3504 rl_mirror(IObuff + 1); 3505 #endif 3506 msg_advance(30); 3507 msg_puts(IObuff); 3508 } 3509 msg_putchar('\n'); 3510 } 3511 3512 #ifdef FEAT_RIGHTLEFT 3513 cmdmsg_rl = FALSE; 3514 msg_col = 0; 3515 #endif 3516 /* Ask for choice. */ 3517 selected = prompt_for_number(&mouse_used); 3518 if (mouse_used) 3519 selected -= lines_left; 3520 lines_left = Rows; /* avoid more prompt */ 3521 /* don't delay for 'smd' in normal_cmd() */ 3522 msg_scroll = msg_scroll_save; 3523 } 3524 3525 if (selected > 0 && selected <= sug.su_ga.ga_len && u_save_cursor() == OK) 3526 { 3527 /* Save the from and to text for :spellrepall. */ 3528 stp = &SUG(sug.su_ga, selected - 1); 3529 if (sug.su_badlen > stp->st_orglen) 3530 { 3531 /* Replacing less than "su_badlen", append the remainder to 3532 * repl_to. */ 3533 repl_from = vim_strnsave(sug.su_badptr, sug.su_badlen); 3534 vim_snprintf((char *)IObuff, IOSIZE, "%s%.*s", stp->st_word, 3535 sug.su_badlen - stp->st_orglen, 3536 sug.su_badptr + stp->st_orglen); 3537 repl_to = vim_strsave(IObuff); 3538 } 3539 else 3540 { 3541 /* Replacing su_badlen or more, use the whole word. */ 3542 repl_from = vim_strnsave(sug.su_badptr, stp->st_orglen); 3543 repl_to = vim_strsave(stp->st_word); 3544 } 3545 3546 /* Replace the word. */ 3547 p = alloc((unsigned)STRLEN(line) - stp->st_orglen 3548 + stp->st_wordlen + 1); 3549 if (p != NULL) 3550 { 3551 c = (int)(sug.su_badptr - line); 3552 mch_memmove(p, line, c); 3553 STRCPY(p + c, stp->st_word); 3554 STRCAT(p, sug.su_badptr + stp->st_orglen); 3555 ml_replace(curwin->w_cursor.lnum, p, FALSE); 3556 curwin->w_cursor.col = c; 3557 3558 /* For redo we use a change-word command. */ 3559 ResetRedobuff(); 3560 AppendToRedobuff((char_u *)"ciw"); 3561 AppendToRedobuffLit(p + c, 3562 stp->st_wordlen + sug.su_badlen - stp->st_orglen); 3563 AppendCharToRedobuff(ESC); 3564 3565 /* After this "p" may be invalid. */ 3566 changed_bytes(curwin->w_cursor.lnum, c); 3567 } 3568 } 3569 else 3570 curwin->w_cursor = prev_cursor; 3571 3572 spell_find_cleanup(&sug); 3573 skip: 3574 vim_free(line); 3575 } 3576 3577 /* 3578 * Check if the word at line "lnum" column "col" is required to start with a 3579 * capital. This uses 'spellcapcheck' of the current buffer. 3580 */ 3581 static int 3582 check_need_cap(linenr_T lnum, colnr_T col) 3583 { 3584 int need_cap = FALSE; 3585 char_u *line; 3586 char_u *line_copy = NULL; 3587 char_u *p; 3588 colnr_T endcol; 3589 regmatch_T regmatch; 3590 3591 if (curwin->w_s->b_cap_prog == NULL) 3592 return FALSE; 3593 3594 line = ml_get_curline(); 3595 endcol = 0; 3596 if ((int)(skipwhite(line) - line) >= (int)col) 3597 { 3598 /* At start of line, check if previous line is empty or sentence 3599 * ends there. */ 3600 if (lnum == 1) 3601 need_cap = TRUE; 3602 else 3603 { 3604 line = ml_get(lnum - 1); 3605 if (*skipwhite(line) == NUL) 3606 need_cap = TRUE; 3607 else 3608 { 3609 /* Append a space in place of the line break. */ 3610 line_copy = concat_str(line, (char_u *)" "); 3611 line = line_copy; 3612 endcol = (colnr_T)STRLEN(line); 3613 } 3614 } 3615 } 3616 else 3617 endcol = col; 3618 3619 if (endcol > 0) 3620 { 3621 /* Check if sentence ends before the bad word. */ 3622 regmatch.regprog = curwin->w_s->b_cap_prog; 3623 regmatch.rm_ic = FALSE; 3624 p = line + endcol; 3625 for (;;) 3626 { 3627 MB_PTR_BACK(line, p); 3628 if (p == line || spell_iswordp_nmw(p, curwin)) 3629 break; 3630 if (vim_regexec(®match, p, 0) 3631 && regmatch.endp[0] == line + endcol) 3632 { 3633 need_cap = TRUE; 3634 break; 3635 } 3636 } 3637 curwin->w_s->b_cap_prog = regmatch.regprog; 3638 } 3639 3640 vim_free(line_copy); 3641 3642 return need_cap; 3643 } 3644 3645 3646 /* 3647 * ":spellrepall" 3648 */ 3649 void 3650 ex_spellrepall(exarg_T *eap UNUSED) 3651 { 3652 pos_T pos = curwin->w_cursor; 3653 char_u *frompat; 3654 int addlen; 3655 char_u *line; 3656 char_u *p; 3657 int save_ws = p_ws; 3658 linenr_T prev_lnum = 0; 3659 3660 if (repl_from == NULL || repl_to == NULL) 3661 { 3662 EMSG(_("E752: No previous spell replacement")); 3663 return; 3664 } 3665 addlen = (int)(STRLEN(repl_to) - STRLEN(repl_from)); 3666 3667 frompat = alloc((unsigned)STRLEN(repl_from) + 7); 3668 if (frompat == NULL) 3669 return; 3670 sprintf((char *)frompat, "\\V\\<%s\\>", repl_from); 3671 p_ws = FALSE; 3672 3673 sub_nsubs = 0; 3674 sub_nlines = 0; 3675 curwin->w_cursor.lnum = 0; 3676 while (!got_int) 3677 { 3678 if (do_search(NULL, '/', frompat, 1L, SEARCH_KEEP, NULL, NULL) == 0 3679 || u_save_cursor() == FAIL) 3680 break; 3681 3682 /* Only replace when the right word isn't there yet. This happens 3683 * when changing "etc" to "etc.". */ 3684 line = ml_get_curline(); 3685 if (addlen <= 0 || STRNCMP(line + curwin->w_cursor.col, 3686 repl_to, STRLEN(repl_to)) != 0) 3687 { 3688 p = alloc((unsigned)STRLEN(line) + addlen + 1); 3689 if (p == NULL) 3690 break; 3691 mch_memmove(p, line, curwin->w_cursor.col); 3692 STRCPY(p + curwin->w_cursor.col, repl_to); 3693 STRCAT(p, line + curwin->w_cursor.col + STRLEN(repl_from)); 3694 ml_replace(curwin->w_cursor.lnum, p, FALSE); 3695 changed_bytes(curwin->w_cursor.lnum, curwin->w_cursor.col); 3696 3697 if (curwin->w_cursor.lnum != prev_lnum) 3698 { 3699 ++sub_nlines; 3700 prev_lnum = curwin->w_cursor.lnum; 3701 } 3702 ++sub_nsubs; 3703 } 3704 curwin->w_cursor.col += (colnr_T)STRLEN(repl_to); 3705 } 3706 3707 p_ws = save_ws; 3708 curwin->w_cursor = pos; 3709 vim_free(frompat); 3710 3711 if (sub_nsubs == 0) 3712 EMSG2(_("E753: Not found: %s"), repl_from); 3713 else 3714 do_sub_msg(FALSE); 3715 } 3716 3717 /* 3718 * Find spell suggestions for "word". Return them in the growarray "*gap" as 3719 * a list of allocated strings. 3720 */ 3721 void 3722 spell_suggest_list( 3723 garray_T *gap, 3724 char_u *word, 3725 int maxcount, /* maximum nr of suggestions */ 3726 int need_cap, /* 'spellcapcheck' matched */ 3727 int interactive) 3728 { 3729 suginfo_T sug; 3730 int i; 3731 suggest_T *stp; 3732 char_u *wcopy; 3733 3734 spell_find_suggest(word, 0, &sug, maxcount, FALSE, need_cap, interactive); 3735 3736 /* Make room in "gap". */ 3737 ga_init2(gap, sizeof(char_u *), sug.su_ga.ga_len + 1); 3738 if (ga_grow(gap, sug.su_ga.ga_len) == OK) 3739 { 3740 for (i = 0; i < sug.su_ga.ga_len; ++i) 3741 { 3742 stp = &SUG(sug.su_ga, i); 3743 3744 /* The suggested word may replace only part of "word", add the not 3745 * replaced part. */ 3746 wcopy = alloc(stp->st_wordlen 3747 + (unsigned)STRLEN(sug.su_badptr + stp->st_orglen) + 1); 3748 if (wcopy == NULL) 3749 break; 3750 STRCPY(wcopy, stp->st_word); 3751 STRCPY(wcopy + stp->st_wordlen, sug.su_badptr + stp->st_orglen); 3752 ((char_u **)gap->ga_data)[gap->ga_len++] = wcopy; 3753 } 3754 } 3755 3756 spell_find_cleanup(&sug); 3757 } 3758 3759 /* 3760 * Find spell suggestions for the word at the start of "badptr". 3761 * Return the suggestions in "su->su_ga". 3762 * The maximum number of suggestions is "maxcount". 3763 * Note: does use info for the current window. 3764 * This is based on the mechanisms of Aspell, but completely reimplemented. 3765 */ 3766 static void 3767 spell_find_suggest( 3768 char_u *badptr, 3769 int badlen, /* length of bad word or 0 if unknown */ 3770 suginfo_T *su, 3771 int maxcount, 3772 int banbadword, /* don't include badword in suggestions */ 3773 int need_cap, /* word should start with capital */ 3774 int interactive) 3775 { 3776 hlf_T attr = HLF_COUNT; 3777 char_u buf[MAXPATHL]; 3778 char_u *p; 3779 int do_combine = FALSE; 3780 char_u *sps_copy; 3781 #ifdef FEAT_EVAL 3782 static int expr_busy = FALSE; 3783 #endif 3784 int c; 3785 int i; 3786 langp_T *lp; 3787 3788 /* 3789 * Set the info in "*su". 3790 */ 3791 vim_memset(su, 0, sizeof(suginfo_T)); 3792 ga_init2(&su->su_ga, (int)sizeof(suggest_T), 10); 3793 ga_init2(&su->su_sga, (int)sizeof(suggest_T), 10); 3794 if (*badptr == NUL) 3795 return; 3796 hash_init(&su->su_banned); 3797 3798 su->su_badptr = badptr; 3799 if (badlen != 0) 3800 su->su_badlen = badlen; 3801 else 3802 su->su_badlen = spell_check(curwin, su->su_badptr, &attr, NULL, FALSE); 3803 su->su_maxcount = maxcount; 3804 su->su_maxscore = SCORE_MAXINIT; 3805 3806 if (su->su_badlen >= MAXWLEN) 3807 su->su_badlen = MAXWLEN - 1; /* just in case */ 3808 vim_strncpy(su->su_badword, su->su_badptr, su->su_badlen); 3809 (void)spell_casefold(su->su_badptr, su->su_badlen, 3810 su->su_fbadword, MAXWLEN); 3811 /* TODO: make this work if the case-folded text is longer than the original 3812 * text. Currently an illegal byte causes wrong pointer computations. */ 3813 su->su_fbadword[su->su_badlen] = NUL; 3814 3815 /* get caps flags for bad word */ 3816 su->su_badflags = badword_captype(su->su_badptr, 3817 su->su_badptr + su->su_badlen); 3818 if (need_cap) 3819 su->su_badflags |= WF_ONECAP; 3820 3821 /* Find the default language for sound folding. We simply use the first 3822 * one in 'spelllang' that supports sound folding. That's good for when 3823 * using multiple files for one language, it's not that bad when mixing 3824 * languages (e.g., "pl,en"). */ 3825 for (i = 0; i < curbuf->b_s.b_langp.ga_len; ++i) 3826 { 3827 lp = LANGP_ENTRY(curbuf->b_s.b_langp, i); 3828 if (lp->lp_sallang != NULL) 3829 { 3830 su->su_sallang = lp->lp_sallang; 3831 break; 3832 } 3833 } 3834 3835 /* Soundfold the bad word with the default sound folding, so that we don't 3836 * have to do this many times. */ 3837 if (su->su_sallang != NULL) 3838 spell_soundfold(su->su_sallang, su->su_fbadword, TRUE, 3839 su->su_sal_badword); 3840 3841 /* If the word is not capitalised and spell_check() doesn't consider the 3842 * word to be bad then it might need to be capitalised. Add a suggestion 3843 * for that. */ 3844 c = PTR2CHAR(su->su_badptr); 3845 if (!SPELL_ISUPPER(c) && attr == HLF_COUNT) 3846 { 3847 make_case_word(su->su_badword, buf, WF_ONECAP); 3848 add_suggestion(su, &su->su_ga, buf, su->su_badlen, SCORE_ICASE, 3849 0, TRUE, su->su_sallang, FALSE); 3850 } 3851 3852 /* Ban the bad word itself. It may appear in another region. */ 3853 if (banbadword) 3854 add_banned(su, su->su_badword); 3855 3856 /* Make a copy of 'spellsuggest', because the expression may change it. */ 3857 sps_copy = vim_strsave(p_sps); 3858 if (sps_copy == NULL) 3859 return; 3860 3861 /* Loop over the items in 'spellsuggest'. */ 3862 for (p = sps_copy; *p != NUL; ) 3863 { 3864 copy_option_part(&p, buf, MAXPATHL, ","); 3865 3866 if (STRNCMP(buf, "expr:", 5) == 0) 3867 { 3868 #ifdef FEAT_EVAL 3869 /* Evaluate an expression. Skip this when called recursively, 3870 * when using spellsuggest() in the expression. */ 3871 if (!expr_busy) 3872 { 3873 expr_busy = TRUE; 3874 spell_suggest_expr(su, buf + 5); 3875 expr_busy = FALSE; 3876 } 3877 #endif 3878 } 3879 else if (STRNCMP(buf, "file:", 5) == 0) 3880 /* Use list of suggestions in a file. */ 3881 spell_suggest_file(su, buf + 5); 3882 else 3883 { 3884 /* Use internal method. */ 3885 spell_suggest_intern(su, interactive); 3886 if (sps_flags & SPS_DOUBLE) 3887 do_combine = TRUE; 3888 } 3889 } 3890 3891 vim_free(sps_copy); 3892 3893 if (do_combine) 3894 /* Combine the two list of suggestions. This must be done last, 3895 * because sorting changes the order again. */ 3896 score_combine(su); 3897 } 3898 3899 #ifdef FEAT_EVAL 3900 /* 3901 * Find suggestions by evaluating expression "expr". 3902 */ 3903 static void 3904 spell_suggest_expr(suginfo_T *su, char_u *expr) 3905 { 3906 list_T *list; 3907 listitem_T *li; 3908 int score; 3909 char_u *p; 3910 3911 /* The work is split up in a few parts to avoid having to export 3912 * suginfo_T. 3913 * First evaluate the expression and get the resulting list. */ 3914 list = eval_spell_expr(su->su_badword, expr); 3915 if (list != NULL) 3916 { 3917 /* Loop over the items in the list. */ 3918 for (li = list->lv_first; li != NULL; li = li->li_next) 3919 if (li->li_tv.v_type == VAR_LIST) 3920 { 3921 /* Get the word and the score from the items. */ 3922 score = get_spellword(li->li_tv.vval.v_list, &p); 3923 if (score >= 0 && score <= su->su_maxscore) 3924 add_suggestion(su, &su->su_ga, p, su->su_badlen, 3925 score, 0, TRUE, su->su_sallang, FALSE); 3926 } 3927 list_unref(list); 3928 } 3929 3930 /* Remove bogus suggestions, sort and truncate at "maxcount". */ 3931 check_suggestions(su, &su->su_ga); 3932 (void)cleanup_suggestions(&su->su_ga, su->su_maxscore, su->su_maxcount); 3933 } 3934 #endif 3935 3936 /* 3937 * Find suggestions in file "fname". Used for "file:" in 'spellsuggest'. 3938 */ 3939 static void 3940 spell_suggest_file(suginfo_T *su, char_u *fname) 3941 { 3942 FILE *fd; 3943 char_u line[MAXWLEN * 2]; 3944 char_u *p; 3945 int len; 3946 char_u cword[MAXWLEN]; 3947 3948 /* Open the file. */ 3949 fd = mch_fopen((char *)fname, "r"); 3950 if (fd == NULL) 3951 { 3952 EMSG2(_(e_notopen), fname); 3953 return; 3954 } 3955 3956 /* Read it line by line. */ 3957 while (!vim_fgets(line, MAXWLEN * 2, fd) && !got_int) 3958 { 3959 line_breakcheck(); 3960 3961 p = vim_strchr(line, '/'); 3962 if (p == NULL) 3963 continue; /* No Tab found, just skip the line. */ 3964 *p++ = NUL; 3965 if (STRICMP(su->su_badword, line) == 0) 3966 { 3967 /* Match! Isolate the good word, until CR or NL. */ 3968 for (len = 0; p[len] >= ' '; ++len) 3969 ; 3970 p[len] = NUL; 3971 3972 /* If the suggestion doesn't have specific case duplicate the case 3973 * of the bad word. */ 3974 if (captype(p, NULL) == 0) 3975 { 3976 make_case_word(p, cword, su->su_badflags); 3977 p = cword; 3978 } 3979 3980 add_suggestion(su, &su->su_ga, p, su->su_badlen, 3981 SCORE_FILE, 0, TRUE, su->su_sallang, FALSE); 3982 } 3983 } 3984 3985 fclose(fd); 3986 3987 /* Remove bogus suggestions, sort and truncate at "maxcount". */ 3988 check_suggestions(su, &su->su_ga); 3989 (void)cleanup_suggestions(&su->su_ga, su->su_maxscore, su->su_maxcount); 3990 } 3991 3992 /* 3993 * Find suggestions for the internal method indicated by "sps_flags". 3994 */ 3995 static void 3996 spell_suggest_intern(suginfo_T *su, int interactive) 3997 { 3998 /* 3999 * Load the .sug file(s) that are available and not done yet. 4000 */ 4001 suggest_load_files(); 4002 4003 /* 4004 * 1. Try special cases, such as repeating a word: "the the" -> "the". 4005 * 4006 * Set a maximum score to limit the combination of operations that is 4007 * tried. 4008 */ 4009 suggest_try_special(su); 4010 4011 /* 4012 * 2. Try inserting/deleting/swapping/changing a letter, use REP entries 4013 * from the .aff file and inserting a space (split the word). 4014 */ 4015 suggest_try_change(su); 4016 4017 /* For the resulting top-scorers compute the sound-a-like score. */ 4018 if (sps_flags & SPS_DOUBLE) 4019 score_comp_sal(su); 4020 4021 /* 4022 * 3. Try finding sound-a-like words. 4023 */ 4024 if ((sps_flags & SPS_FAST) == 0) 4025 { 4026 if (sps_flags & SPS_BEST) 4027 /* Adjust the word score for the suggestions found so far for how 4028 * they sounds like. */ 4029 rescore_suggestions(su); 4030 4031 /* 4032 * While going through the soundfold tree "su_maxscore" is the score 4033 * for the soundfold word, limits the changes that are being tried, 4034 * and "su_sfmaxscore" the rescored score, which is set by 4035 * cleanup_suggestions(). 4036 * First find words with a small edit distance, because this is much 4037 * faster and often already finds the top-N suggestions. If we didn't 4038 * find many suggestions try again with a higher edit distance. 4039 * "sl_sounddone" is used to avoid doing the same word twice. 4040 */ 4041 suggest_try_soundalike_prep(); 4042 su->su_maxscore = SCORE_SFMAX1; 4043 su->su_sfmaxscore = SCORE_MAXINIT * 3; 4044 suggest_try_soundalike(su); 4045 if (su->su_ga.ga_len < SUG_CLEAN_COUNT(su)) 4046 { 4047 /* We didn't find enough matches, try again, allowing more 4048 * changes to the soundfold word. */ 4049 su->su_maxscore = SCORE_SFMAX2; 4050 suggest_try_soundalike(su); 4051 if (su->su_ga.ga_len < SUG_CLEAN_COUNT(su)) 4052 { 4053 /* Still didn't find enough matches, try again, allowing even 4054 * more changes to the soundfold word. */ 4055 su->su_maxscore = SCORE_SFMAX3; 4056 suggest_try_soundalike(su); 4057 } 4058 } 4059 su->su_maxscore = su->su_sfmaxscore; 4060 suggest_try_soundalike_finish(); 4061 } 4062 4063 /* When CTRL-C was hit while searching do show the results. Only clear 4064 * got_int when using a command, not for spellsuggest(). */ 4065 ui_breakcheck(); 4066 if (interactive && got_int) 4067 { 4068 (void)vgetc(); 4069 got_int = FALSE; 4070 } 4071 4072 if ((sps_flags & SPS_DOUBLE) == 0 && su->su_ga.ga_len != 0) 4073 { 4074 if (sps_flags & SPS_BEST) 4075 /* Adjust the word score for how it sounds like. */ 4076 rescore_suggestions(su); 4077 4078 /* Remove bogus suggestions, sort and truncate at "maxcount". */ 4079 check_suggestions(su, &su->su_ga); 4080 (void)cleanup_suggestions(&su->su_ga, su->su_maxscore, su->su_maxcount); 4081 } 4082 } 4083 4084 /* 4085 * Free the info put in "*su" by spell_find_suggest(). 4086 */ 4087 static void 4088 spell_find_cleanup(suginfo_T *su) 4089 { 4090 int i; 4091 4092 /* Free the suggestions. */ 4093 for (i = 0; i < su->su_ga.ga_len; ++i) 4094 vim_free(SUG(su->su_ga, i).st_word); 4095 ga_clear(&su->su_ga); 4096 for (i = 0; i < su->su_sga.ga_len; ++i) 4097 vim_free(SUG(su->su_sga, i).st_word); 4098 ga_clear(&su->su_sga); 4099 4100 /* Free the banned words. */ 4101 hash_clear_all(&su->su_banned, 0); 4102 } 4103 4104 /* 4105 * Make a copy of "word", with the first letter upper or lower cased, to 4106 * "wcopy[MAXWLEN]". "word" must not be empty. 4107 * The result is NUL terminated. 4108 */ 4109 void 4110 onecap_copy( 4111 char_u *word, 4112 char_u *wcopy, 4113 int upper) /* TRUE: first letter made upper case */ 4114 { 4115 char_u *p; 4116 int c; 4117 int l; 4118 4119 p = word; 4120 #ifdef FEAT_MBYTE 4121 if (has_mbyte) 4122 c = mb_cptr2char_adv(&p); 4123 else 4124 #endif 4125 c = *p++; 4126 if (upper) 4127 c = SPELL_TOUPPER(c); 4128 else 4129 c = SPELL_TOFOLD(c); 4130 #ifdef FEAT_MBYTE 4131 if (has_mbyte) 4132 l = mb_char2bytes(c, wcopy); 4133 else 4134 #endif 4135 { 4136 l = 1; 4137 wcopy[0] = c; 4138 } 4139 vim_strncpy(wcopy + l, p, MAXWLEN - l - 1); 4140 } 4141 4142 /* 4143 * Make a copy of "word" with all the letters upper cased into 4144 * "wcopy[MAXWLEN]". The result is NUL terminated. 4145 */ 4146 static void 4147 allcap_copy(char_u *word, char_u *wcopy) 4148 { 4149 char_u *s; 4150 char_u *d; 4151 int c; 4152 4153 d = wcopy; 4154 for (s = word; *s != NUL; ) 4155 { 4156 #ifdef FEAT_MBYTE 4157 if (has_mbyte) 4158 c = mb_cptr2char_adv(&s); 4159 else 4160 #endif 4161 c = *s++; 4162 4163 #ifdef FEAT_MBYTE 4164 /* We only change 0xdf to SS when we are certain latin1 is used. It 4165 * would cause weird errors in other 8-bit encodings. */ 4166 if (enc_latin1like && c == 0xdf) 4167 { 4168 c = 'S'; 4169 if (d - wcopy >= MAXWLEN - 1) 4170 break; 4171 *d++ = c; 4172 } 4173 else 4174 #endif 4175 c = SPELL_TOUPPER(c); 4176 4177 #ifdef FEAT_MBYTE 4178 if (has_mbyte) 4179 { 4180 if (d - wcopy >= MAXWLEN - MB_MAXBYTES) 4181 break; 4182 d += mb_char2bytes(c, d); 4183 } 4184 else 4185 #endif 4186 { 4187 if (d - wcopy >= MAXWLEN - 1) 4188 break; 4189 *d++ = c; 4190 } 4191 } 4192 *d = NUL; 4193 } 4194 4195 /* 4196 * Try finding suggestions by recognizing specific situations. 4197 */ 4198 static void 4199 suggest_try_special(suginfo_T *su) 4200 { 4201 char_u *p; 4202 size_t len; 4203 int c; 4204 char_u word[MAXWLEN]; 4205 4206 /* 4207 * Recognize a word that is repeated: "the the". 4208 */ 4209 p = skiptowhite(su->su_fbadword); 4210 len = p - su->su_fbadword; 4211 p = skipwhite(p); 4212 if (STRLEN(p) == len && STRNCMP(su->su_fbadword, p, len) == 0) 4213 { 4214 /* Include badflags: if the badword is onecap or allcap 4215 * use that for the goodword too: "The the" -> "The". */ 4216 c = su->su_fbadword[len]; 4217 su->su_fbadword[len] = NUL; 4218 make_case_word(su->su_fbadword, word, su->su_badflags); 4219 su->su_fbadword[len] = c; 4220 4221 /* Give a soundalike score of 0, compute the score as if deleting one 4222 * character. */ 4223 add_suggestion(su, &su->su_ga, word, su->su_badlen, 4224 RESCORE(SCORE_REP, 0), 0, TRUE, su->su_sallang, FALSE); 4225 } 4226 } 4227 4228 /* 4229 * Change the 0 to 1 to measure how much time is spent in each state. 4230 * Output is dumped in "suggestprof". 4231 */ 4232 #if 0 4233 # define SUGGEST_PROFILE 4234 proftime_T current; 4235 proftime_T total; 4236 proftime_T times[STATE_FINAL + 1]; 4237 long counts[STATE_FINAL + 1]; 4238 4239 static void 4240 prof_init(void) 4241 { 4242 for (int i = 0; i <= STATE_FINAL; ++i) 4243 { 4244 profile_zero(×[i]); 4245 counts[i] = 0; 4246 } 4247 profile_start(¤t); 4248 profile_start(&total); 4249 } 4250 4251 /* call before changing state */ 4252 static void 4253 prof_store(state_T state) 4254 { 4255 profile_end(¤t); 4256 profile_add(×[state], ¤t); 4257 ++counts[state]; 4258 profile_start(¤t); 4259 } 4260 # define PROF_STORE(state) prof_store(state); 4261 4262 static void 4263 prof_report(char *name) 4264 { 4265 FILE *fd = fopen("suggestprof", "a"); 4266 4267 profile_end(&total); 4268 fprintf(fd, "-----------------------\n"); 4269 fprintf(fd, "%s: %s\n", name, profile_msg(&total)); 4270 for (int i = 0; i <= STATE_FINAL; ++i) 4271 fprintf(fd, "%d: %s (%ld)\n", i, profile_msg(×[i]), counts[i]); 4272 fclose(fd); 4273 } 4274 #else 4275 # define PROF_STORE(state) 4276 #endif 4277 4278 /* 4279 * Try finding suggestions by adding/removing/swapping letters. 4280 */ 4281 static void 4282 suggest_try_change(suginfo_T *su) 4283 { 4284 char_u fword[MAXWLEN]; /* copy of the bad word, case-folded */ 4285 int n; 4286 char_u *p; 4287 int lpi; 4288 langp_T *lp; 4289 4290 /* We make a copy of the case-folded bad word, so that we can modify it 4291 * to find matches (esp. REP items). Append some more text, changing 4292 * chars after the bad word may help. */ 4293 STRCPY(fword, su->su_fbadword); 4294 n = (int)STRLEN(fword); 4295 p = su->su_badptr + su->su_badlen; 4296 (void)spell_casefold(p, (int)STRLEN(p), fword + n, MAXWLEN - n); 4297 4298 for (lpi = 0; lpi < curwin->w_s->b_langp.ga_len; ++lpi) 4299 { 4300 lp = LANGP_ENTRY(curwin->w_s->b_langp, lpi); 4301 4302 /* If reloading a spell file fails it's still in the list but 4303 * everything has been cleared. */ 4304 if (lp->lp_slang->sl_fbyts == NULL) 4305 continue; 4306 4307 /* Try it for this language. Will add possible suggestions. */ 4308 #ifdef SUGGEST_PROFILE 4309 prof_init(); 4310 #endif 4311 suggest_trie_walk(su, lp, fword, FALSE); 4312 #ifdef SUGGEST_PROFILE 4313 prof_report("try_change"); 4314 #endif 4315 } 4316 } 4317 4318 /* Check the maximum score, if we go over it we won't try this change. */ 4319 #define TRY_DEEPER(su, stack, depth, add) \ 4320 (stack[depth].ts_score + (add) < su->su_maxscore) 4321 4322 /* 4323 * Try finding suggestions by adding/removing/swapping letters. 4324 * 4325 * This uses a state machine. At each node in the tree we try various 4326 * operations. When trying if an operation works "depth" is increased and the 4327 * stack[] is used to store info. This allows combinations, thus insert one 4328 * character, replace one and delete another. The number of changes is 4329 * limited by su->su_maxscore. 4330 * 4331 * After implementing this I noticed an article by Kemal Oflazer that 4332 * describes something similar: "Error-tolerant Finite State Recognition with 4333 * Applications to Morphological Analysis and Spelling Correction" (1996). 4334 * The implementation in the article is simplified and requires a stack of 4335 * unknown depth. The implementation here only needs a stack depth equal to 4336 * the length of the word. 4337 * 4338 * This is also used for the sound-folded word, "soundfold" is TRUE then. 4339 * The mechanism is the same, but we find a match with a sound-folded word 4340 * that comes from one or more original words. Each of these words may be 4341 * added, this is done by add_sound_suggest(). 4342 * Don't use: 4343 * the prefix tree or the keep-case tree 4344 * "su->su_badlen" 4345 * anything to do with upper and lower case 4346 * anything to do with word or non-word characters ("spell_iswordp()") 4347 * banned words 4348 * word flags (rare, region, compounding) 4349 * word splitting for now 4350 * "similar_chars()" 4351 * use "slang->sl_repsal" instead of "lp->lp_replang->sl_rep" 4352 */ 4353 static void 4354 suggest_trie_walk( 4355 suginfo_T *su, 4356 langp_T *lp, 4357 char_u *fword, 4358 int soundfold) 4359 { 4360 char_u tword[MAXWLEN]; /* good word collected so far */ 4361 trystate_T stack[MAXWLEN]; 4362 char_u preword[MAXWLEN * 3]; /* word found with proper case; 4363 * concatenation of prefix compound 4364 * words and split word. NUL terminated 4365 * when going deeper but not when coming 4366 * back. */ 4367 char_u compflags[MAXWLEN]; /* compound flags, one for each word */ 4368 trystate_T *sp; 4369 int newscore; 4370 int score; 4371 char_u *byts, *fbyts, *pbyts; 4372 idx_T *idxs, *fidxs, *pidxs; 4373 int depth; 4374 int c, c2, c3; 4375 int n = 0; 4376 int flags; 4377 garray_T *gap; 4378 idx_T arridx; 4379 int len; 4380 char_u *p; 4381 fromto_T *ftp; 4382 int fl = 0, tl; 4383 int repextra = 0; /* extra bytes in fword[] from REP item */ 4384 slang_T *slang = lp->lp_slang; 4385 int fword_ends; 4386 int goodword_ends; 4387 #ifdef DEBUG_TRIEWALK 4388 /* Stores the name of the change made at each level. */ 4389 char_u changename[MAXWLEN][80]; 4390 #endif 4391 int breakcheckcount = 1000; 4392 int compound_ok; 4393 4394 /* 4395 * Go through the whole case-fold tree, try changes at each node. 4396 * "tword[]" contains the word collected from nodes in the tree. 4397 * "fword[]" the word we are trying to match with (initially the bad 4398 * word). 4399 */ 4400 depth = 0; 4401 sp = &stack[0]; 4402 vim_memset(sp, 0, sizeof(trystate_T)); 4403 sp->ts_curi = 1; 4404 4405 if (soundfold) 4406 { 4407 /* Going through the soundfold tree. */ 4408 byts = fbyts = slang->sl_sbyts; 4409 idxs = fidxs = slang->sl_sidxs; 4410 pbyts = NULL; 4411 pidxs = NULL; 4412 sp->ts_prefixdepth = PFD_NOPREFIX; 4413 sp->ts_state = STATE_START; 4414 } 4415 else 4416 { 4417 /* 4418 * When there are postponed prefixes we need to use these first. At 4419 * the end of the prefix we continue in the case-fold tree. 4420 */ 4421 fbyts = slang->sl_fbyts; 4422 fidxs = slang->sl_fidxs; 4423 pbyts = slang->sl_pbyts; 4424 pidxs = slang->sl_pidxs; 4425 if (pbyts != NULL) 4426 { 4427 byts = pbyts; 4428 idxs = pidxs; 4429 sp->ts_prefixdepth = PFD_PREFIXTREE; 4430 sp->ts_state = STATE_NOPREFIX; /* try without prefix first */ 4431 } 4432 else 4433 { 4434 byts = fbyts; 4435 idxs = fidxs; 4436 sp->ts_prefixdepth = PFD_NOPREFIX; 4437 sp->ts_state = STATE_START; 4438 } 4439 } 4440 4441 /* 4442 * Loop to find all suggestions. At each round we either: 4443 * - For the current state try one operation, advance "ts_curi", 4444 * increase "depth". 4445 * - When a state is done go to the next, set "ts_state". 4446 * - When all states are tried decrease "depth". 4447 */ 4448 while (depth >= 0 && !got_int) 4449 { 4450 sp = &stack[depth]; 4451 switch (sp->ts_state) 4452 { 4453 case STATE_START: 4454 case STATE_NOPREFIX: 4455 /* 4456 * Start of node: Deal with NUL bytes, which means 4457 * tword[] may end here. 4458 */ 4459 arridx = sp->ts_arridx; /* current node in the tree */ 4460 len = byts[arridx]; /* bytes in this node */ 4461 arridx += sp->ts_curi; /* index of current byte */ 4462 4463 if (sp->ts_prefixdepth == PFD_PREFIXTREE) 4464 { 4465 /* Skip over the NUL bytes, we use them later. */ 4466 for (n = 0; n < len && byts[arridx + n] == 0; ++n) 4467 ; 4468 sp->ts_curi += n; 4469 4470 /* Always past NUL bytes now. */ 4471 n = (int)sp->ts_state; 4472 PROF_STORE(sp->ts_state) 4473 sp->ts_state = STATE_ENDNUL; 4474 sp->ts_save_badflags = su->su_badflags; 4475 4476 /* At end of a prefix or at start of prefixtree: check for 4477 * following word. */ 4478 if (byts[arridx] == 0 || n == (int)STATE_NOPREFIX) 4479 { 4480 /* Set su->su_badflags to the caps type at this position. 4481 * Use the caps type until here for the prefix itself. */ 4482 #ifdef FEAT_MBYTE 4483 if (has_mbyte) 4484 n = nofold_len(fword, sp->ts_fidx, su->su_badptr); 4485 else 4486 #endif 4487 n = sp->ts_fidx; 4488 flags = badword_captype(su->su_badptr, su->su_badptr + n); 4489 su->su_badflags = badword_captype(su->su_badptr + n, 4490 su->su_badptr + su->su_badlen); 4491 #ifdef DEBUG_TRIEWALK 4492 sprintf(changename[depth], "prefix"); 4493 #endif 4494 go_deeper(stack, depth, 0); 4495 ++depth; 4496 sp = &stack[depth]; 4497 sp->ts_prefixdepth = depth - 1; 4498 byts = fbyts; 4499 idxs = fidxs; 4500 sp->ts_arridx = 0; 4501 4502 /* Move the prefix to preword[] with the right case 4503 * and make find_keepcap_word() works. */ 4504 tword[sp->ts_twordlen] = NUL; 4505 make_case_word(tword + sp->ts_splitoff, 4506 preword + sp->ts_prewordlen, flags); 4507 sp->ts_prewordlen = (char_u)STRLEN(preword); 4508 sp->ts_splitoff = sp->ts_twordlen; 4509 } 4510 break; 4511 } 4512 4513 if (sp->ts_curi > len || byts[arridx] != 0) 4514 { 4515 /* Past bytes in node and/or past NUL bytes. */ 4516 PROF_STORE(sp->ts_state) 4517 sp->ts_state = STATE_ENDNUL; 4518 sp->ts_save_badflags = su->su_badflags; 4519 break; 4520 } 4521 4522 /* 4523 * End of word in tree. 4524 */ 4525 ++sp->ts_curi; /* eat one NUL byte */ 4526 4527 flags = (int)idxs[arridx]; 4528 4529 /* Skip words with the NOSUGGEST flag. */ 4530 if (flags & WF_NOSUGGEST) 4531 break; 4532 4533 fword_ends = (fword[sp->ts_fidx] == NUL 4534 || (soundfold 4535 ? VIM_ISWHITE(fword[sp->ts_fidx]) 4536 : !spell_iswordp(fword + sp->ts_fidx, curwin))); 4537 tword[sp->ts_twordlen] = NUL; 4538 4539 if (sp->ts_prefixdepth <= PFD_NOTSPECIAL 4540 && (sp->ts_flags & TSF_PREFIXOK) == 0) 4541 { 4542 /* There was a prefix before the word. Check that the prefix 4543 * can be used with this word. */ 4544 /* Count the length of the NULs in the prefix. If there are 4545 * none this must be the first try without a prefix. */ 4546 n = stack[sp->ts_prefixdepth].ts_arridx; 4547 len = pbyts[n++]; 4548 for (c = 0; c < len && pbyts[n + c] == 0; ++c) 4549 ; 4550 if (c > 0) 4551 { 4552 c = valid_word_prefix(c, n, flags, 4553 tword + sp->ts_splitoff, slang, FALSE); 4554 if (c == 0) 4555 break; 4556 4557 /* Use the WF_RARE flag for a rare prefix. */ 4558 if (c & WF_RAREPFX) 4559 flags |= WF_RARE; 4560 4561 /* Tricky: when checking for both prefix and compounding 4562 * we run into the prefix flag first. 4563 * Remember that it's OK, so that we accept the prefix 4564 * when arriving at a compound flag. */ 4565 sp->ts_flags |= TSF_PREFIXOK; 4566 } 4567 } 4568 4569 /* Check NEEDCOMPOUND: can't use word without compounding. Do try 4570 * appending another compound word below. */ 4571 if (sp->ts_complen == sp->ts_compsplit && fword_ends 4572 && (flags & WF_NEEDCOMP)) 4573 goodword_ends = FALSE; 4574 else 4575 goodword_ends = TRUE; 4576 4577 p = NULL; 4578 compound_ok = TRUE; 4579 if (sp->ts_complen > sp->ts_compsplit) 4580 { 4581 if (slang->sl_nobreak) 4582 { 4583 /* There was a word before this word. When there was no 4584 * change in this word (it was correct) add the first word 4585 * as a suggestion. If this word was corrected too, we 4586 * need to check if a correct word follows. */ 4587 if (sp->ts_fidx - sp->ts_splitfidx 4588 == sp->ts_twordlen - sp->ts_splitoff 4589 && STRNCMP(fword + sp->ts_splitfidx, 4590 tword + sp->ts_splitoff, 4591 sp->ts_fidx - sp->ts_splitfidx) == 0) 4592 { 4593 preword[sp->ts_prewordlen] = NUL; 4594 newscore = score_wordcount_adj(slang, sp->ts_score, 4595 preword + sp->ts_prewordlen, 4596 sp->ts_prewordlen > 0); 4597 /* Add the suggestion if the score isn't too bad. */ 4598 if (newscore <= su->su_maxscore) 4599 add_suggestion(su, &su->su_ga, preword, 4600 sp->ts_splitfidx - repextra, 4601 newscore, 0, FALSE, 4602 lp->lp_sallang, FALSE); 4603 break; 4604 } 4605 } 4606 else 4607 { 4608 /* There was a compound word before this word. If this 4609 * word does not support compounding then give up 4610 * (splitting is tried for the word without compound 4611 * flag). */ 4612 if (((unsigned)flags >> 24) == 0 4613 || sp->ts_twordlen - sp->ts_splitoff 4614 < slang->sl_compminlen) 4615 break; 4616 #ifdef FEAT_MBYTE 4617 /* For multi-byte chars check character length against 4618 * COMPOUNDMIN. */ 4619 if (has_mbyte 4620 && slang->sl_compminlen > 0 4621 && mb_charlen(tword + sp->ts_splitoff) 4622 < slang->sl_compminlen) 4623 break; 4624 #endif 4625 4626 compflags[sp->ts_complen] = ((unsigned)flags >> 24); 4627 compflags[sp->ts_complen + 1] = NUL; 4628 vim_strncpy(preword + sp->ts_prewordlen, 4629 tword + sp->ts_splitoff, 4630 sp->ts_twordlen - sp->ts_splitoff); 4631 4632 /* Verify CHECKCOMPOUNDPATTERN rules. */ 4633 if (match_checkcompoundpattern(preword, sp->ts_prewordlen, 4634 &slang->sl_comppat)) 4635 compound_ok = FALSE; 4636 4637 if (compound_ok) 4638 { 4639 p = preword; 4640 while (*skiptowhite(p) != NUL) 4641 p = skipwhite(skiptowhite(p)); 4642 if (fword_ends && !can_compound(slang, p, 4643 compflags + sp->ts_compsplit)) 4644 /* Compound is not allowed. But it may still be 4645 * possible if we add another (short) word. */ 4646 compound_ok = FALSE; 4647 } 4648 4649 /* Get pointer to last char of previous word. */ 4650 p = preword + sp->ts_prewordlen; 4651 MB_PTR_BACK(preword, p); 4652 } 4653 } 4654 4655 /* 4656 * Form the word with proper case in preword. 4657 * If there is a word from a previous split, append. 4658 * For the soundfold tree don't change the case, simply append. 4659 */ 4660 if (soundfold) 4661 STRCPY(preword + sp->ts_prewordlen, tword + sp->ts_splitoff); 4662 else if (flags & WF_KEEPCAP) 4663 /* Must find the word in the keep-case tree. */ 4664 find_keepcap_word(slang, tword + sp->ts_splitoff, 4665 preword + sp->ts_prewordlen); 4666 else 4667 { 4668 /* Include badflags: If the badword is onecap or allcap 4669 * use that for the goodword too. But if the badword is 4670 * allcap and it's only one char long use onecap. */ 4671 c = su->su_badflags; 4672 if ((c & WF_ALLCAP) 4673 #ifdef FEAT_MBYTE 4674 && su->su_badlen == (*mb_ptr2len)(su->su_badptr) 4675 #else 4676 && su->su_badlen == 1 4677 #endif 4678 ) 4679 c = WF_ONECAP; 4680 c |= flags; 4681 4682 /* When appending a compound word after a word character don't 4683 * use Onecap. */ 4684 if (p != NULL && spell_iswordp_nmw(p, curwin)) 4685 c &= ~WF_ONECAP; 4686 make_case_word(tword + sp->ts_splitoff, 4687 preword + sp->ts_prewordlen, c); 4688 } 4689 4690 if (!soundfold) 4691 { 4692 /* Don't use a banned word. It may appear again as a good 4693 * word, thus remember it. */ 4694 if (flags & WF_BANNED) 4695 { 4696 add_banned(su, preword + sp->ts_prewordlen); 4697 break; 4698 } 4699 if ((sp->ts_complen == sp->ts_compsplit 4700 && WAS_BANNED(su, preword + sp->ts_prewordlen)) 4701 || WAS_BANNED(su, preword)) 4702 { 4703 if (slang->sl_compprog == NULL) 4704 break; 4705 /* the word so far was banned but we may try compounding */ 4706 goodword_ends = FALSE; 4707 } 4708 } 4709 4710 newscore = 0; 4711 if (!soundfold) /* soundfold words don't have flags */ 4712 { 4713 if ((flags & WF_REGION) 4714 && (((unsigned)flags >> 16) & lp->lp_region) == 0) 4715 newscore += SCORE_REGION; 4716 if (flags & WF_RARE) 4717 newscore += SCORE_RARE; 4718 4719 if (!spell_valid_case(su->su_badflags, 4720 captype(preword + sp->ts_prewordlen, NULL))) 4721 newscore += SCORE_ICASE; 4722 } 4723 4724 /* TODO: how about splitting in the soundfold tree? */ 4725 if (fword_ends 4726 && goodword_ends 4727 && sp->ts_fidx >= sp->ts_fidxtry 4728 && compound_ok) 4729 { 4730 /* The badword also ends: add suggestions. */ 4731 #ifdef DEBUG_TRIEWALK 4732 if (soundfold && STRCMP(preword, "smwrd") == 0) 4733 { 4734 int j; 4735 4736 /* print the stack of changes that brought us here */ 4737 smsg("------ %s -------", fword); 4738 for (j = 0; j < depth; ++j) 4739 smsg("%s", changename[j]); 4740 } 4741 #endif 4742 if (soundfold) 4743 { 4744 /* For soundfolded words we need to find the original 4745 * words, the edit distance and then add them. */ 4746 add_sound_suggest(su, preword, sp->ts_score, lp); 4747 } 4748 else if (sp->ts_fidx > 0) 4749 { 4750 /* Give a penalty when changing non-word char to word 4751 * char, e.g., "thes," -> "these". */ 4752 p = fword + sp->ts_fidx; 4753 MB_PTR_BACK(fword, p); 4754 if (!spell_iswordp(p, curwin)) 4755 { 4756 p = preword + STRLEN(preword); 4757 MB_PTR_BACK(preword, p); 4758 if (spell_iswordp(p, curwin)) 4759 newscore += SCORE_NONWORD; 4760 } 4761 4762 /* Give a bonus to words seen before. */ 4763 score = score_wordcount_adj(slang, 4764 sp->ts_score + newscore, 4765 preword + sp->ts_prewordlen, 4766 sp->ts_prewordlen > 0); 4767 4768 /* Add the suggestion if the score isn't too bad. */ 4769 if (score <= su->su_maxscore) 4770 { 4771 add_suggestion(su, &su->su_ga, preword, 4772 sp->ts_fidx - repextra, 4773 score, 0, FALSE, lp->lp_sallang, FALSE); 4774 4775 if (su->su_badflags & WF_MIXCAP) 4776 { 4777 /* We really don't know if the word should be 4778 * upper or lower case, add both. */ 4779 c = captype(preword, NULL); 4780 if (c == 0 || c == WF_ALLCAP) 4781 { 4782 make_case_word(tword + sp->ts_splitoff, 4783 preword + sp->ts_prewordlen, 4784 c == 0 ? WF_ALLCAP : 0); 4785 4786 add_suggestion(su, &su->su_ga, preword, 4787 sp->ts_fidx - repextra, 4788 score + SCORE_ICASE, 0, FALSE, 4789 lp->lp_sallang, FALSE); 4790 } 4791 } 4792 } 4793 } 4794 } 4795 4796 /* 4797 * Try word split and/or compounding. 4798 */ 4799 if ((sp->ts_fidx >= sp->ts_fidxtry || fword_ends) 4800 #ifdef FEAT_MBYTE 4801 /* Don't split halfway a character. */ 4802 && (!has_mbyte || sp->ts_tcharlen == 0) 4803 #endif 4804 ) 4805 { 4806 int try_compound; 4807 int try_split; 4808 4809 /* If past the end of the bad word don't try a split. 4810 * Otherwise try changing the next word. E.g., find 4811 * suggestions for "the the" where the second "the" is 4812 * different. It's done like a split. 4813 * TODO: word split for soundfold words */ 4814 try_split = (sp->ts_fidx - repextra < su->su_badlen) 4815 && !soundfold; 4816 4817 /* Get here in several situations: 4818 * 1. The word in the tree ends: 4819 * If the word allows compounding try that. Otherwise try 4820 * a split by inserting a space. For both check that a 4821 * valid words starts at fword[sp->ts_fidx]. 4822 * For NOBREAK do like compounding to be able to check if 4823 * the next word is valid. 4824 * 2. The badword does end, but it was due to a change (e.g., 4825 * a swap). No need to split, but do check that the 4826 * following word is valid. 4827 * 3. The badword and the word in the tree end. It may still 4828 * be possible to compound another (short) word. 4829 */ 4830 try_compound = FALSE; 4831 if (!soundfold 4832 && !slang->sl_nocompoundsugs 4833 && slang->sl_compprog != NULL 4834 && ((unsigned)flags >> 24) != 0 4835 && sp->ts_twordlen - sp->ts_splitoff 4836 >= slang->sl_compminlen 4837 #ifdef FEAT_MBYTE 4838 && (!has_mbyte 4839 || slang->sl_compminlen == 0 4840 || mb_charlen(tword + sp->ts_splitoff) 4841 >= slang->sl_compminlen) 4842 #endif 4843 && (slang->sl_compsylmax < MAXWLEN 4844 || sp->ts_complen + 1 - sp->ts_compsplit 4845 < slang->sl_compmax) 4846 && (can_be_compound(sp, slang, 4847 compflags, ((unsigned)flags >> 24)))) 4848 4849 { 4850 try_compound = TRUE; 4851 compflags[sp->ts_complen] = ((unsigned)flags >> 24); 4852 compflags[sp->ts_complen + 1] = NUL; 4853 } 4854 4855 /* For NOBREAK we never try splitting, it won't make any word 4856 * valid. */ 4857 if (slang->sl_nobreak && !slang->sl_nocompoundsugs) 4858 try_compound = TRUE; 4859 4860 /* If we could add a compound word, and it's also possible to 4861 * split at this point, do the split first and set 4862 * TSF_DIDSPLIT to avoid doing it again. */ 4863 else if (!fword_ends 4864 && try_compound 4865 && (sp->ts_flags & TSF_DIDSPLIT) == 0) 4866 { 4867 try_compound = FALSE; 4868 sp->ts_flags |= TSF_DIDSPLIT; 4869 --sp->ts_curi; /* do the same NUL again */ 4870 compflags[sp->ts_complen] = NUL; 4871 } 4872 else 4873 sp->ts_flags &= ~TSF_DIDSPLIT; 4874 4875 if (try_split || try_compound) 4876 { 4877 if (!try_compound && (!fword_ends || !goodword_ends)) 4878 { 4879 /* If we're going to split need to check that the 4880 * words so far are valid for compounding. If there 4881 * is only one word it must not have the NEEDCOMPOUND 4882 * flag. */ 4883 if (sp->ts_complen == sp->ts_compsplit 4884 && (flags & WF_NEEDCOMP)) 4885 break; 4886 p = preword; 4887 while (*skiptowhite(p) != NUL) 4888 p = skipwhite(skiptowhite(p)); 4889 if (sp->ts_complen > sp->ts_compsplit 4890 && !can_compound(slang, p, 4891 compflags + sp->ts_compsplit)) 4892 break; 4893 4894 if (slang->sl_nosplitsugs) 4895 newscore += SCORE_SPLIT_NO; 4896 else 4897 newscore += SCORE_SPLIT; 4898 4899 /* Give a bonus to words seen before. */ 4900 newscore = score_wordcount_adj(slang, newscore, 4901 preword + sp->ts_prewordlen, TRUE); 4902 } 4903 4904 if (TRY_DEEPER(su, stack, depth, newscore)) 4905 { 4906 go_deeper(stack, depth, newscore); 4907 #ifdef DEBUG_TRIEWALK 4908 if (!try_compound && !fword_ends) 4909 sprintf(changename[depth], "%.*s-%s: split", 4910 sp->ts_twordlen, tword, fword + sp->ts_fidx); 4911 else 4912 sprintf(changename[depth], "%.*s-%s: compound", 4913 sp->ts_twordlen, tword, fword + sp->ts_fidx); 4914 #endif 4915 /* Save things to be restored at STATE_SPLITUNDO. */ 4916 sp->ts_save_badflags = su->su_badflags; 4917 PROF_STORE(sp->ts_state) 4918 sp->ts_state = STATE_SPLITUNDO; 4919 4920 ++depth; 4921 sp = &stack[depth]; 4922 4923 /* Append a space to preword when splitting. */ 4924 if (!try_compound && !fword_ends) 4925 STRCAT(preword, " "); 4926 sp->ts_prewordlen = (char_u)STRLEN(preword); 4927 sp->ts_splitoff = sp->ts_twordlen; 4928 sp->ts_splitfidx = sp->ts_fidx; 4929 4930 /* If the badword has a non-word character at this 4931 * position skip it. That means replacing the 4932 * non-word character with a space. Always skip a 4933 * character when the word ends. But only when the 4934 * good word can end. */ 4935 if (((!try_compound && !spell_iswordp_nmw(fword 4936 + sp->ts_fidx, 4937 curwin)) 4938 || fword_ends) 4939 && fword[sp->ts_fidx] != NUL 4940 && goodword_ends) 4941 { 4942 int l; 4943 4944 l = MB_PTR2LEN(fword + sp->ts_fidx); 4945 if (fword_ends) 4946 { 4947 /* Copy the skipped character to preword. */ 4948 mch_memmove(preword + sp->ts_prewordlen, 4949 fword + sp->ts_fidx, l); 4950 sp->ts_prewordlen += l; 4951 preword[sp->ts_prewordlen] = NUL; 4952 } 4953 else 4954 sp->ts_score -= SCORE_SPLIT - SCORE_SUBST; 4955 sp->ts_fidx += l; 4956 } 4957 4958 /* When compounding include compound flag in 4959 * compflags[] (already set above). When splitting we 4960 * may start compounding over again. */ 4961 if (try_compound) 4962 ++sp->ts_complen; 4963 else 4964 sp->ts_compsplit = sp->ts_complen; 4965 sp->ts_prefixdepth = PFD_NOPREFIX; 4966 4967 /* set su->su_badflags to the caps type at this 4968 * position */ 4969 #ifdef FEAT_MBYTE 4970 if (has_mbyte) 4971 n = nofold_len(fword, sp->ts_fidx, su->su_badptr); 4972 else 4973 #endif 4974 n = sp->ts_fidx; 4975 su->su_badflags = badword_captype(su->su_badptr + n, 4976 su->su_badptr + su->su_badlen); 4977 4978 /* Restart at top of the tree. */ 4979 sp->ts_arridx = 0; 4980 4981 /* If there are postponed prefixes, try these too. */ 4982 if (pbyts != NULL) 4983 { 4984 byts = pbyts; 4985 idxs = pidxs; 4986 sp->ts_prefixdepth = PFD_PREFIXTREE; 4987 PROF_STORE(sp->ts_state) 4988 sp->ts_state = STATE_NOPREFIX; 4989 } 4990 } 4991 } 4992 } 4993 break; 4994 4995 case STATE_SPLITUNDO: 4996 /* Undo the changes done for word split or compound word. */ 4997 su->su_badflags = sp->ts_save_badflags; 4998 4999 /* Continue looking for NUL bytes. */ 5000 PROF_STORE(sp->ts_state) 5001 sp->ts_state = STATE_START; 5002 5003 /* In case we went into the prefix tree. */ 5004 byts = fbyts; 5005 idxs = fidxs; 5006 break; 5007 5008 case STATE_ENDNUL: 5009 /* Past the NUL bytes in the node. */ 5010 su->su_badflags = sp->ts_save_badflags; 5011 if (fword[sp->ts_fidx] == NUL 5012 #ifdef FEAT_MBYTE 5013 && sp->ts_tcharlen == 0 5014 #endif 5015 ) 5016 { 5017 /* The badword ends, can't use STATE_PLAIN. */ 5018 PROF_STORE(sp->ts_state) 5019 sp->ts_state = STATE_DEL; 5020 break; 5021 } 5022 PROF_STORE(sp->ts_state) 5023 sp->ts_state = STATE_PLAIN; 5024 /*FALLTHROUGH*/ 5025 5026 case STATE_PLAIN: 5027 /* 5028 * Go over all possible bytes at this node, add each to tword[] 5029 * and use child node. "ts_curi" is the index. 5030 */ 5031 arridx = sp->ts_arridx; 5032 if (sp->ts_curi > byts[arridx]) 5033 { 5034 /* Done all bytes at this node, do next state. When still at 5035 * already changed bytes skip the other tricks. */ 5036 PROF_STORE(sp->ts_state) 5037 if (sp->ts_fidx >= sp->ts_fidxtry) 5038 sp->ts_state = STATE_DEL; 5039 else 5040 sp->ts_state = STATE_FINAL; 5041 } 5042 else 5043 { 5044 arridx += sp->ts_curi++; 5045 c = byts[arridx]; 5046 5047 /* Normal byte, go one level deeper. If it's not equal to the 5048 * byte in the bad word adjust the score. But don't even try 5049 * when the byte was already changed. And don't try when we 5050 * just deleted this byte, accepting it is always cheaper than 5051 * delete + substitute. */ 5052 if (c == fword[sp->ts_fidx] 5053 #ifdef FEAT_MBYTE 5054 || (sp->ts_tcharlen > 0 && sp->ts_isdiff != DIFF_NONE) 5055 #endif 5056 ) 5057 newscore = 0; 5058 else 5059 newscore = SCORE_SUBST; 5060 if ((newscore == 0 5061 || (sp->ts_fidx >= sp->ts_fidxtry 5062 && ((sp->ts_flags & TSF_DIDDEL) == 0 5063 || c != fword[sp->ts_delidx]))) 5064 && TRY_DEEPER(su, stack, depth, newscore)) 5065 { 5066 go_deeper(stack, depth, newscore); 5067 #ifdef DEBUG_TRIEWALK 5068 if (newscore > 0) 5069 sprintf(changename[depth], "%.*s-%s: subst %c to %c", 5070 sp->ts_twordlen, tword, fword + sp->ts_fidx, 5071 fword[sp->ts_fidx], c); 5072 else 5073 sprintf(changename[depth], "%.*s-%s: accept %c", 5074 sp->ts_twordlen, tword, fword + sp->ts_fidx, 5075 fword[sp->ts_fidx]); 5076 #endif 5077 ++depth; 5078 sp = &stack[depth]; 5079 ++sp->ts_fidx; 5080 tword[sp->ts_twordlen++] = c; 5081 sp->ts_arridx = idxs[arridx]; 5082 #ifdef FEAT_MBYTE 5083 if (newscore == SCORE_SUBST) 5084 sp->ts_isdiff = DIFF_YES; 5085 if (has_mbyte) 5086 { 5087 /* Multi-byte characters are a bit complicated to 5088 * handle: They differ when any of the bytes differ 5089 * and then their length may also differ. */ 5090 if (sp->ts_tcharlen == 0) 5091 { 5092 /* First byte. */ 5093 sp->ts_tcharidx = 0; 5094 sp->ts_tcharlen = MB_BYTE2LEN(c); 5095 sp->ts_fcharstart = sp->ts_fidx - 1; 5096 sp->ts_isdiff = (newscore != 0) 5097 ? DIFF_YES : DIFF_NONE; 5098 } 5099 else if (sp->ts_isdiff == DIFF_INSERT) 5100 /* When inserting trail bytes don't advance in the 5101 * bad word. */ 5102 --sp->ts_fidx; 5103 if (++sp->ts_tcharidx == sp->ts_tcharlen) 5104 { 5105 /* Last byte of character. */ 5106 if (sp->ts_isdiff == DIFF_YES) 5107 { 5108 /* Correct ts_fidx for the byte length of the 5109 * character (we didn't check that before). */ 5110 sp->ts_fidx = sp->ts_fcharstart 5111 + MB_PTR2LEN( 5112 fword + sp->ts_fcharstart); 5113 /* For changing a composing character adjust 5114 * the score from SCORE_SUBST to 5115 * SCORE_SUBCOMP. */ 5116 if (enc_utf8 5117 && utf_iscomposing( 5118 utf_ptr2char(tword 5119 + sp->ts_twordlen 5120 - sp->ts_tcharlen)) 5121 && utf_iscomposing( 5122 utf_ptr2char(fword 5123 + sp->ts_fcharstart))) 5124 sp->ts_score -= 5125 SCORE_SUBST - SCORE_SUBCOMP; 5126 5127 /* For a similar character adjust score from 5128 * SCORE_SUBST to SCORE_SIMILAR. */ 5129 else if (!soundfold 5130 && slang->sl_has_map 5131 && similar_chars(slang, 5132 mb_ptr2char(tword 5133 + sp->ts_twordlen 5134 - sp->ts_tcharlen), 5135 mb_ptr2char(fword 5136 + sp->ts_fcharstart))) 5137 sp->ts_score -= 5138 SCORE_SUBST - SCORE_SIMILAR; 5139 } 5140 else if (sp->ts_isdiff == DIFF_INSERT 5141 && sp->ts_twordlen > sp->ts_tcharlen) 5142 { 5143 p = tword + sp->ts_twordlen - sp->ts_tcharlen; 5144 c = mb_ptr2char(p); 5145 if (enc_utf8 && utf_iscomposing(c)) 5146 { 5147 /* Inserting a composing char doesn't 5148 * count that much. */ 5149 sp->ts_score -= SCORE_INS - SCORE_INSCOMP; 5150 } 5151 else 5152 { 5153 /* If the previous character was the same, 5154 * thus doubling a character, give a bonus 5155 * to the score. Also for the soundfold 5156 * tree (might seem illogical but does 5157 * give better scores). */ 5158 MB_PTR_BACK(tword, p); 5159 if (c == mb_ptr2char(p)) 5160 sp->ts_score -= SCORE_INS 5161 - SCORE_INSDUP; 5162 } 5163 } 5164 5165 /* Starting a new char, reset the length. */ 5166 sp->ts_tcharlen = 0; 5167 } 5168 } 5169 else 5170 #endif 5171 { 5172 /* If we found a similar char adjust the score. 5173 * We do this after calling go_deeper() because 5174 * it's slow. */ 5175 if (newscore != 0 5176 && !soundfold 5177 && slang->sl_has_map 5178 && similar_chars(slang, 5179 c, fword[sp->ts_fidx - 1])) 5180 sp->ts_score -= SCORE_SUBST - SCORE_SIMILAR; 5181 } 5182 } 5183 } 5184 break; 5185 5186 case STATE_DEL: 5187 #ifdef FEAT_MBYTE 5188 /* When past the first byte of a multi-byte char don't try 5189 * delete/insert/swap a character. */ 5190 if (has_mbyte && sp->ts_tcharlen > 0) 5191 { 5192 PROF_STORE(sp->ts_state) 5193 sp->ts_state = STATE_FINAL; 5194 break; 5195 } 5196 #endif 5197 /* 5198 * Try skipping one character in the bad word (delete it). 5199 */ 5200 PROF_STORE(sp->ts_state) 5201 sp->ts_state = STATE_INS_PREP; 5202 sp->ts_curi = 1; 5203 if (soundfold && sp->ts_fidx == 0 && fword[sp->ts_fidx] == '*') 5204 /* Deleting a vowel at the start of a word counts less, see 5205 * soundalike_score(). */ 5206 newscore = 2 * SCORE_DEL / 3; 5207 else 5208 newscore = SCORE_DEL; 5209 if (fword[sp->ts_fidx] != NUL 5210 && TRY_DEEPER(su, stack, depth, newscore)) 5211 { 5212 go_deeper(stack, depth, newscore); 5213 #ifdef DEBUG_TRIEWALK 5214 sprintf(changename[depth], "%.*s-%s: delete %c", 5215 sp->ts_twordlen, tword, fword + sp->ts_fidx, 5216 fword[sp->ts_fidx]); 5217 #endif 5218 ++depth; 5219 5220 /* Remember what character we deleted, so that we can avoid 5221 * inserting it again. */ 5222 stack[depth].ts_flags |= TSF_DIDDEL; 5223 stack[depth].ts_delidx = sp->ts_fidx; 5224 5225 /* Advance over the character in fword[]. Give a bonus to the 5226 * score if the same character is following "nn" -> "n". It's 5227 * a bit illogical for soundfold tree but it does give better 5228 * results. */ 5229 #ifdef FEAT_MBYTE 5230 if (has_mbyte) 5231 { 5232 c = mb_ptr2char(fword + sp->ts_fidx); 5233 stack[depth].ts_fidx += MB_PTR2LEN(fword + sp->ts_fidx); 5234 if (enc_utf8 && utf_iscomposing(c)) 5235 stack[depth].ts_score -= SCORE_DEL - SCORE_DELCOMP; 5236 else if (c == mb_ptr2char(fword + stack[depth].ts_fidx)) 5237 stack[depth].ts_score -= SCORE_DEL - SCORE_DELDUP; 5238 } 5239 else 5240 #endif 5241 { 5242 ++stack[depth].ts_fidx; 5243 if (fword[sp->ts_fidx] == fword[sp->ts_fidx + 1]) 5244 stack[depth].ts_score -= SCORE_DEL - SCORE_DELDUP; 5245 } 5246 break; 5247 } 5248 /*FALLTHROUGH*/ 5249 5250 case STATE_INS_PREP: 5251 if (sp->ts_flags & TSF_DIDDEL) 5252 { 5253 /* If we just deleted a byte then inserting won't make sense, 5254 * a substitute is always cheaper. */ 5255 PROF_STORE(sp->ts_state) 5256 sp->ts_state = STATE_SWAP; 5257 break; 5258 } 5259 5260 /* skip over NUL bytes */ 5261 n = sp->ts_arridx; 5262 for (;;) 5263 { 5264 if (sp->ts_curi > byts[n]) 5265 { 5266 /* Only NUL bytes at this node, go to next state. */ 5267 PROF_STORE(sp->ts_state) 5268 sp->ts_state = STATE_SWAP; 5269 break; 5270 } 5271 if (byts[n + sp->ts_curi] != NUL) 5272 { 5273 /* Found a byte to insert. */ 5274 PROF_STORE(sp->ts_state) 5275 sp->ts_state = STATE_INS; 5276 break; 5277 } 5278 ++sp->ts_curi; 5279 } 5280 break; 5281 5282 /*FALLTHROUGH*/ 5283 5284 case STATE_INS: 5285 /* Insert one byte. Repeat this for each possible byte at this 5286 * node. */ 5287 n = sp->ts_arridx; 5288 if (sp->ts_curi > byts[n]) 5289 { 5290 /* Done all bytes at this node, go to next state. */ 5291 PROF_STORE(sp->ts_state) 5292 sp->ts_state = STATE_SWAP; 5293 break; 5294 } 5295 5296 /* Do one more byte at this node, but: 5297 * - Skip NUL bytes. 5298 * - Skip the byte if it's equal to the byte in the word, 5299 * accepting that byte is always better. 5300 */ 5301 n += sp->ts_curi++; 5302 c = byts[n]; 5303 if (soundfold && sp->ts_twordlen == 0 && c == '*') 5304 /* Inserting a vowel at the start of a word counts less, 5305 * see soundalike_score(). */ 5306 newscore = 2 * SCORE_INS / 3; 5307 else 5308 newscore = SCORE_INS; 5309 if (c != fword[sp->ts_fidx] 5310 && TRY_DEEPER(su, stack, depth, newscore)) 5311 { 5312 go_deeper(stack, depth, newscore); 5313 #ifdef DEBUG_TRIEWALK 5314 sprintf(changename[depth], "%.*s-%s: insert %c", 5315 sp->ts_twordlen, tword, fword + sp->ts_fidx, 5316 c); 5317 #endif 5318 ++depth; 5319 sp = &stack[depth]; 5320 tword[sp->ts_twordlen++] = c; 5321 sp->ts_arridx = idxs[n]; 5322 #ifdef FEAT_MBYTE 5323 if (has_mbyte) 5324 { 5325 fl = MB_BYTE2LEN(c); 5326 if (fl > 1) 5327 { 5328 /* There are following bytes for the same character. 5329 * We must find all bytes before trying 5330 * delete/insert/swap/etc. */ 5331 sp->ts_tcharlen = fl; 5332 sp->ts_tcharidx = 1; 5333 sp->ts_isdiff = DIFF_INSERT; 5334 } 5335 } 5336 else 5337 fl = 1; 5338 if (fl == 1) 5339 #endif 5340 { 5341 /* If the previous character was the same, thus doubling a 5342 * character, give a bonus to the score. Also for 5343 * soundfold words (illogical but does give a better 5344 * score). */ 5345 if (sp->ts_twordlen >= 2 5346 && tword[sp->ts_twordlen - 2] == c) 5347 sp->ts_score -= SCORE_INS - SCORE_INSDUP; 5348 } 5349 } 5350 break; 5351 5352 case STATE_SWAP: 5353 /* 5354 * Swap two bytes in the bad word: "12" -> "21". 5355 * We change "fword" here, it's changed back afterwards at 5356 * STATE_UNSWAP. 5357 */ 5358 p = fword + sp->ts_fidx; 5359 c = *p; 5360 if (c == NUL) 5361 { 5362 /* End of word, can't swap or replace. */ 5363 PROF_STORE(sp->ts_state) 5364 sp->ts_state = STATE_FINAL; 5365 break; 5366 } 5367 5368 /* Don't swap if the first character is not a word character. 5369 * SWAP3 etc. also don't make sense then. */ 5370 if (!soundfold && !spell_iswordp(p, curwin)) 5371 { 5372 PROF_STORE(sp->ts_state) 5373 sp->ts_state = STATE_REP_INI; 5374 break; 5375 } 5376 5377 #ifdef FEAT_MBYTE 5378 if (has_mbyte) 5379 { 5380 n = MB_CPTR2LEN(p); 5381 c = mb_ptr2char(p); 5382 if (p[n] == NUL) 5383 c2 = NUL; 5384 else if (!soundfold && !spell_iswordp(p + n, curwin)) 5385 c2 = c; /* don't swap non-word char */ 5386 else 5387 c2 = mb_ptr2char(p + n); 5388 } 5389 else 5390 #endif 5391 { 5392 if (p[1] == NUL) 5393 c2 = NUL; 5394 else if (!soundfold && !spell_iswordp(p + 1, curwin)) 5395 c2 = c; /* don't swap non-word char */ 5396 else 5397 c2 = p[1]; 5398 } 5399 5400 /* When the second character is NUL we can't swap. */ 5401 if (c2 == NUL) 5402 { 5403 PROF_STORE(sp->ts_state) 5404 sp->ts_state = STATE_REP_INI; 5405 break; 5406 } 5407 5408 /* When characters are identical, swap won't do anything. 5409 * Also get here if the second char is not a word character. */ 5410 if (c == c2) 5411 { 5412 PROF_STORE(sp->ts_state) 5413 sp->ts_state = STATE_SWAP3; 5414 break; 5415 } 5416 if (c2 != NUL && TRY_DEEPER(su, stack, depth, SCORE_SWAP)) 5417 { 5418 go_deeper(stack, depth, SCORE_SWAP); 5419 #ifdef DEBUG_TRIEWALK 5420 sprintf(changename[depth], "%.*s-%s: swap %c and %c", 5421 sp->ts_twordlen, tword, fword + sp->ts_fidx, 5422 c, c2); 5423 #endif 5424 PROF_STORE(sp->ts_state) 5425 sp->ts_state = STATE_UNSWAP; 5426 ++depth; 5427 #ifdef FEAT_MBYTE 5428 if (has_mbyte) 5429 { 5430 fl = mb_char2len(c2); 5431 mch_memmove(p, p + n, fl); 5432 mb_char2bytes(c, p + fl); 5433 stack[depth].ts_fidxtry = sp->ts_fidx + n + fl; 5434 } 5435 else 5436 #endif 5437 { 5438 p[0] = c2; 5439 p[1] = c; 5440 stack[depth].ts_fidxtry = sp->ts_fidx + 2; 5441 } 5442 } 5443 else 5444 { 5445 /* If this swap doesn't work then SWAP3 won't either. */ 5446 PROF_STORE(sp->ts_state) 5447 sp->ts_state = STATE_REP_INI; 5448 } 5449 break; 5450 5451 case STATE_UNSWAP: 5452 /* Undo the STATE_SWAP swap: "21" -> "12". */ 5453 p = fword + sp->ts_fidx; 5454 #ifdef FEAT_MBYTE 5455 if (has_mbyte) 5456 { 5457 n = MB_PTR2LEN(p); 5458 c = mb_ptr2char(p + n); 5459 mch_memmove(p + MB_PTR2LEN(p + n), p, n); 5460 mb_char2bytes(c, p); 5461 } 5462 else 5463 #endif 5464 { 5465 c = *p; 5466 *p = p[1]; 5467 p[1] = c; 5468 } 5469 /*FALLTHROUGH*/ 5470 5471 case STATE_SWAP3: 5472 /* Swap two bytes, skipping one: "123" -> "321". We change 5473 * "fword" here, it's changed back afterwards at STATE_UNSWAP3. */ 5474 p = fword + sp->ts_fidx; 5475 #ifdef FEAT_MBYTE 5476 if (has_mbyte) 5477 { 5478 n = MB_CPTR2LEN(p); 5479 c = mb_ptr2char(p); 5480 fl = MB_CPTR2LEN(p + n); 5481 c2 = mb_ptr2char(p + n); 5482 if (!soundfold && !spell_iswordp(p + n + fl, curwin)) 5483 c3 = c; /* don't swap non-word char */ 5484 else 5485 c3 = mb_ptr2char(p + n + fl); 5486 } 5487 else 5488 #endif 5489 { 5490 c = *p; 5491 c2 = p[1]; 5492 if (!soundfold && !spell_iswordp(p + 2, curwin)) 5493 c3 = c; /* don't swap non-word char */ 5494 else 5495 c3 = p[2]; 5496 } 5497 5498 /* When characters are identical: "121" then SWAP3 result is 5499 * identical, ROT3L result is same as SWAP: "211", ROT3L result is 5500 * same as SWAP on next char: "112". Thus skip all swapping. 5501 * Also skip when c3 is NUL. 5502 * Also get here when the third character is not a word character. 5503 * Second character may any char: "a.b" -> "b.a" */ 5504 if (c == c3 || c3 == NUL) 5505 { 5506 PROF_STORE(sp->ts_state) 5507 sp->ts_state = STATE_REP_INI; 5508 break; 5509 } 5510 if (TRY_DEEPER(su, stack, depth, SCORE_SWAP3)) 5511 { 5512 go_deeper(stack, depth, SCORE_SWAP3); 5513 #ifdef DEBUG_TRIEWALK 5514 sprintf(changename[depth], "%.*s-%s: swap3 %c and %c", 5515 sp->ts_twordlen, tword, fword + sp->ts_fidx, 5516 c, c3); 5517 #endif 5518 PROF_STORE(sp->ts_state) 5519 sp->ts_state = STATE_UNSWAP3; 5520 ++depth; 5521 #ifdef FEAT_MBYTE 5522 if (has_mbyte) 5523 { 5524 tl = mb_char2len(c3); 5525 mch_memmove(p, p + n + fl, tl); 5526 mb_char2bytes(c2, p + tl); 5527 mb_char2bytes(c, p + fl + tl); 5528 stack[depth].ts_fidxtry = sp->ts_fidx + n + fl + tl; 5529 } 5530 else 5531 #endif 5532 { 5533 p[0] = p[2]; 5534 p[2] = c; 5535 stack[depth].ts_fidxtry = sp->ts_fidx + 3; 5536 } 5537 } 5538 else 5539 { 5540 PROF_STORE(sp->ts_state) 5541 sp->ts_state = STATE_REP_INI; 5542 } 5543 break; 5544 5545 case STATE_UNSWAP3: 5546 /* Undo STATE_SWAP3: "321" -> "123" */ 5547 p = fword + sp->ts_fidx; 5548 #ifdef FEAT_MBYTE 5549 if (has_mbyte) 5550 { 5551 n = MB_PTR2LEN(p); 5552 c2 = mb_ptr2char(p + n); 5553 fl = MB_PTR2LEN(p + n); 5554 c = mb_ptr2char(p + n + fl); 5555 tl = MB_PTR2LEN(p + n + fl); 5556 mch_memmove(p + fl + tl, p, n); 5557 mb_char2bytes(c, p); 5558 mb_char2bytes(c2, p + tl); 5559 p = p + tl; 5560 } 5561 else 5562 #endif 5563 { 5564 c = *p; 5565 *p = p[2]; 5566 p[2] = c; 5567 ++p; 5568 } 5569 5570 if (!soundfold && !spell_iswordp(p, curwin)) 5571 { 5572 /* Middle char is not a word char, skip the rotate. First and 5573 * third char were already checked at swap and swap3. */ 5574 PROF_STORE(sp->ts_state) 5575 sp->ts_state = STATE_REP_INI; 5576 break; 5577 } 5578 5579 /* Rotate three characters left: "123" -> "231". We change 5580 * "fword" here, it's changed back afterwards at STATE_UNROT3L. */ 5581 if (TRY_DEEPER(su, stack, depth, SCORE_SWAP3)) 5582 { 5583 go_deeper(stack, depth, SCORE_SWAP3); 5584 #ifdef DEBUG_TRIEWALK 5585 p = fword + sp->ts_fidx; 5586 sprintf(changename[depth], "%.*s-%s: rotate left %c%c%c", 5587 sp->ts_twordlen, tword, fword + sp->ts_fidx, 5588 p[0], p[1], p[2]); 5589 #endif 5590 PROF_STORE(sp->ts_state) 5591 sp->ts_state = STATE_UNROT3L; 5592 ++depth; 5593 p = fword + sp->ts_fidx; 5594 #ifdef FEAT_MBYTE 5595 if (has_mbyte) 5596 { 5597 n = MB_CPTR2LEN(p); 5598 c = mb_ptr2char(p); 5599 fl = MB_CPTR2LEN(p + n); 5600 fl += MB_CPTR2LEN(p + n + fl); 5601 mch_memmove(p, p + n, fl); 5602 mb_char2bytes(c, p + fl); 5603 stack[depth].ts_fidxtry = sp->ts_fidx + n + fl; 5604 } 5605 else 5606 #endif 5607 { 5608 c = *p; 5609 *p = p[1]; 5610 p[1] = p[2]; 5611 p[2] = c; 5612 stack[depth].ts_fidxtry = sp->ts_fidx + 3; 5613 } 5614 } 5615 else 5616 { 5617 PROF_STORE(sp->ts_state) 5618 sp->ts_state = STATE_REP_INI; 5619 } 5620 break; 5621 5622 case STATE_UNROT3L: 5623 /* Undo ROT3L: "231" -> "123" */ 5624 p = fword + sp->ts_fidx; 5625 #ifdef FEAT_MBYTE 5626 if (has_mbyte) 5627 { 5628 n = MB_PTR2LEN(p); 5629 n += MB_PTR2LEN(p + n); 5630 c = mb_ptr2char(p + n); 5631 tl = MB_PTR2LEN(p + n); 5632 mch_memmove(p + tl, p, n); 5633 mb_char2bytes(c, p); 5634 } 5635 else 5636 #endif 5637 { 5638 c = p[2]; 5639 p[2] = p[1]; 5640 p[1] = *p; 5641 *p = c; 5642 } 5643 5644 /* Rotate three bytes right: "123" -> "312". We change "fword" 5645 * here, it's changed back afterwards at STATE_UNROT3R. */ 5646 if (TRY_DEEPER(su, stack, depth, SCORE_SWAP3)) 5647 { 5648 go_deeper(stack, depth, SCORE_SWAP3); 5649 #ifdef DEBUG_TRIEWALK 5650 p = fword + sp->ts_fidx; 5651 sprintf(changename[depth], "%.*s-%s: rotate right %c%c%c", 5652 sp->ts_twordlen, tword, fword + sp->ts_fidx, 5653 p[0], p[1], p[2]); 5654 #endif 5655 PROF_STORE(sp->ts_state) 5656 sp->ts_state = STATE_UNROT3R; 5657 ++depth; 5658 p = fword + sp->ts_fidx; 5659 #ifdef FEAT_MBYTE 5660 if (has_mbyte) 5661 { 5662 n = MB_CPTR2LEN(p); 5663 n += MB_CPTR2LEN(p + n); 5664 c = mb_ptr2char(p + n); 5665 tl = MB_CPTR2LEN(p + n); 5666 mch_memmove(p + tl, p, n); 5667 mb_char2bytes(c, p); 5668 stack[depth].ts_fidxtry = sp->ts_fidx + n + tl; 5669 } 5670 else 5671 #endif 5672 { 5673 c = p[2]; 5674 p[2] = p[1]; 5675 p[1] = *p; 5676 *p = c; 5677 stack[depth].ts_fidxtry = sp->ts_fidx + 3; 5678 } 5679 } 5680 else 5681 { 5682 PROF_STORE(sp->ts_state) 5683 sp->ts_state = STATE_REP_INI; 5684 } 5685 break; 5686 5687 case STATE_UNROT3R: 5688 /* Undo ROT3R: "312" -> "123" */ 5689 p = fword + sp->ts_fidx; 5690 #ifdef FEAT_MBYTE 5691 if (has_mbyte) 5692 { 5693 c = mb_ptr2char(p); 5694 tl = MB_PTR2LEN(p); 5695 n = MB_PTR2LEN(p + tl); 5696 n += MB_PTR2LEN(p + tl + n); 5697 mch_memmove(p, p + tl, n); 5698 mb_char2bytes(c, p + n); 5699 } 5700 else 5701 #endif 5702 { 5703 c = *p; 5704 *p = p[1]; 5705 p[1] = p[2]; 5706 p[2] = c; 5707 } 5708 /*FALLTHROUGH*/ 5709 5710 case STATE_REP_INI: 5711 /* Check if matching with REP items from the .aff file would work. 5712 * Quickly skip if: 5713 * - there are no REP items and we are not in the soundfold trie 5714 * - the score is going to be too high anyway 5715 * - already applied a REP item or swapped here */ 5716 if ((lp->lp_replang == NULL && !soundfold) 5717 || sp->ts_score + SCORE_REP >= su->su_maxscore 5718 || sp->ts_fidx < sp->ts_fidxtry) 5719 { 5720 PROF_STORE(sp->ts_state) 5721 sp->ts_state = STATE_FINAL; 5722 break; 5723 } 5724 5725 /* Use the first byte to quickly find the first entry that may 5726 * match. If the index is -1 there is none. */ 5727 if (soundfold) 5728 sp->ts_curi = slang->sl_repsal_first[fword[sp->ts_fidx]]; 5729 else 5730 sp->ts_curi = lp->lp_replang->sl_rep_first[fword[sp->ts_fidx]]; 5731 5732 if (sp->ts_curi < 0) 5733 { 5734 PROF_STORE(sp->ts_state) 5735 sp->ts_state = STATE_FINAL; 5736 break; 5737 } 5738 5739 PROF_STORE(sp->ts_state) 5740 sp->ts_state = STATE_REP; 5741 /*FALLTHROUGH*/ 5742 5743 case STATE_REP: 5744 /* Try matching with REP items from the .aff file. For each match 5745 * replace the characters and check if the resulting word is 5746 * valid. */ 5747 p = fword + sp->ts_fidx; 5748 5749 if (soundfold) 5750 gap = &slang->sl_repsal; 5751 else 5752 gap = &lp->lp_replang->sl_rep; 5753 while (sp->ts_curi < gap->ga_len) 5754 { 5755 ftp = (fromto_T *)gap->ga_data + sp->ts_curi++; 5756 if (*ftp->ft_from != *p) 5757 { 5758 /* past possible matching entries */ 5759 sp->ts_curi = gap->ga_len; 5760 break; 5761 } 5762 if (STRNCMP(ftp->ft_from, p, STRLEN(ftp->ft_from)) == 0 5763 && TRY_DEEPER(su, stack, depth, SCORE_REP)) 5764 { 5765 go_deeper(stack, depth, SCORE_REP); 5766 #ifdef DEBUG_TRIEWALK 5767 sprintf(changename[depth], "%.*s-%s: replace %s with %s", 5768 sp->ts_twordlen, tword, fword + sp->ts_fidx, 5769 ftp->ft_from, ftp->ft_to); 5770 #endif 5771 /* Need to undo this afterwards. */ 5772 PROF_STORE(sp->ts_state) 5773 sp->ts_state = STATE_REP_UNDO; 5774 5775 /* Change the "from" to the "to" string. */ 5776 ++depth; 5777 fl = (int)STRLEN(ftp->ft_from); 5778 tl = (int)STRLEN(ftp->ft_to); 5779 if (fl != tl) 5780 { 5781 STRMOVE(p + tl, p + fl); 5782 repextra += tl - fl; 5783 } 5784 mch_memmove(p, ftp->ft_to, tl); 5785 stack[depth].ts_fidxtry = sp->ts_fidx + tl; 5786 #ifdef FEAT_MBYTE 5787 stack[depth].ts_tcharlen = 0; 5788 #endif 5789 break; 5790 } 5791 } 5792 5793 if (sp->ts_curi >= gap->ga_len && sp->ts_state == STATE_REP) 5794 { 5795 /* No (more) matches. */ 5796 PROF_STORE(sp->ts_state) 5797 sp->ts_state = STATE_FINAL; 5798 } 5799 5800 break; 5801 5802 case STATE_REP_UNDO: 5803 /* Undo a REP replacement and continue with the next one. */ 5804 if (soundfold) 5805 gap = &slang->sl_repsal; 5806 else 5807 gap = &lp->lp_replang->sl_rep; 5808 ftp = (fromto_T *)gap->ga_data + sp->ts_curi - 1; 5809 fl = (int)STRLEN(ftp->ft_from); 5810 tl = (int)STRLEN(ftp->ft_to); 5811 p = fword + sp->ts_fidx; 5812 if (fl != tl) 5813 { 5814 STRMOVE(p + fl, p + tl); 5815 repextra -= tl - fl; 5816 } 5817 mch_memmove(p, ftp->ft_from, fl); 5818 PROF_STORE(sp->ts_state) 5819 sp->ts_state = STATE_REP; 5820 break; 5821 5822 default: 5823 /* Did all possible states at this level, go up one level. */ 5824 --depth; 5825 5826 if (depth >= 0 && stack[depth].ts_prefixdepth == PFD_PREFIXTREE) 5827 { 5828 /* Continue in or go back to the prefix tree. */ 5829 byts = pbyts; 5830 idxs = pidxs; 5831 } 5832 5833 /* Don't check for CTRL-C too often, it takes time. */ 5834 if (--breakcheckcount == 0) 5835 { 5836 ui_breakcheck(); 5837 breakcheckcount = 1000; 5838 } 5839 } 5840 } 5841 } 5842 5843 5844 /* 5845 * Go one level deeper in the tree. 5846 */ 5847 static void 5848 go_deeper(trystate_T *stack, int depth, int score_add) 5849 { 5850 stack[depth + 1] = stack[depth]; 5851 stack[depth + 1].ts_state = STATE_START; 5852 stack[depth + 1].ts_score = stack[depth].ts_score + score_add; 5853 stack[depth + 1].ts_curi = 1; /* start just after length byte */ 5854 stack[depth + 1].ts_flags = 0; 5855 } 5856 5857 #ifdef FEAT_MBYTE 5858 /* 5859 * Case-folding may change the number of bytes: Count nr of chars in 5860 * fword[flen] and return the byte length of that many chars in "word". 5861 */ 5862 static int 5863 nofold_len(char_u *fword, int flen, char_u *word) 5864 { 5865 char_u *p; 5866 int i = 0; 5867 5868 for (p = fword; p < fword + flen; MB_PTR_ADV(p)) 5869 ++i; 5870 for (p = word; i > 0; MB_PTR_ADV(p)) 5871 --i; 5872 return (int)(p - word); 5873 } 5874 #endif 5875 5876 /* 5877 * "fword" is a good word with case folded. Find the matching keep-case 5878 * words and put it in "kword". 5879 * Theoretically there could be several keep-case words that result in the 5880 * same case-folded word, but we only find one... 5881 */ 5882 static void 5883 find_keepcap_word(slang_T *slang, char_u *fword, char_u *kword) 5884 { 5885 char_u uword[MAXWLEN]; /* "fword" in upper-case */ 5886 int depth; 5887 idx_T tryidx; 5888 5889 /* The following arrays are used at each depth in the tree. */ 5890 idx_T arridx[MAXWLEN]; 5891 int round[MAXWLEN]; 5892 int fwordidx[MAXWLEN]; 5893 int uwordidx[MAXWLEN]; 5894 int kwordlen[MAXWLEN]; 5895 5896 int flen, ulen; 5897 int l; 5898 int len; 5899 int c; 5900 idx_T lo, hi, m; 5901 char_u *p; 5902 char_u *byts = slang->sl_kbyts; /* array with bytes of the words */ 5903 idx_T *idxs = slang->sl_kidxs; /* array with indexes */ 5904 5905 if (byts == NULL) 5906 { 5907 /* array is empty: "cannot happen" */ 5908 *kword = NUL; 5909 return; 5910 } 5911 5912 /* Make an all-cap version of "fword". */ 5913 allcap_copy(fword, uword); 5914 5915 /* 5916 * Each character needs to be tried both case-folded and upper-case. 5917 * All this gets very complicated if we keep in mind that changing case 5918 * may change the byte length of a multi-byte character... 5919 */ 5920 depth = 0; 5921 arridx[0] = 0; 5922 round[0] = 0; 5923 fwordidx[0] = 0; 5924 uwordidx[0] = 0; 5925 kwordlen[0] = 0; 5926 while (depth >= 0) 5927 { 5928 if (fword[fwordidx[depth]] == NUL) 5929 { 5930 /* We are at the end of "fword". If the tree allows a word to end 5931 * here we have found a match. */ 5932 if (byts[arridx[depth] + 1] == 0) 5933 { 5934 kword[kwordlen[depth]] = NUL; 5935 return; 5936 } 5937 5938 /* kword is getting too long, continue one level up */ 5939 --depth; 5940 } 5941 else if (++round[depth] > 2) 5942 { 5943 /* tried both fold-case and upper-case character, continue one 5944 * level up */ 5945 --depth; 5946 } 5947 else 5948 { 5949 /* 5950 * round[depth] == 1: Try using the folded-case character. 5951 * round[depth] == 2: Try using the upper-case character. 5952 */ 5953 #ifdef FEAT_MBYTE 5954 if (has_mbyte) 5955 { 5956 flen = MB_CPTR2LEN(fword + fwordidx[depth]); 5957 ulen = MB_CPTR2LEN(uword + uwordidx[depth]); 5958 } 5959 else 5960 #endif 5961 ulen = flen = 1; 5962 if (round[depth] == 1) 5963 { 5964 p = fword + fwordidx[depth]; 5965 l = flen; 5966 } 5967 else 5968 { 5969 p = uword + uwordidx[depth]; 5970 l = ulen; 5971 } 5972 5973 for (tryidx = arridx[depth]; l > 0; --l) 5974 { 5975 /* Perform a binary search in the list of accepted bytes. */ 5976 len = byts[tryidx++]; 5977 c = *p++; 5978 lo = tryidx; 5979 hi = tryidx + len - 1; 5980 while (lo < hi) 5981 { 5982 m = (lo + hi) / 2; 5983 if (byts[m] > c) 5984 hi = m - 1; 5985 else if (byts[m] < c) 5986 lo = m + 1; 5987 else 5988 { 5989 lo = hi = m; 5990 break; 5991 } 5992 } 5993 5994 /* Stop if there is no matching byte. */ 5995 if (hi < lo || byts[lo] != c) 5996 break; 5997 5998 /* Continue at the child (if there is one). */ 5999 tryidx = idxs[lo]; 6000 } 6001 6002 if (l == 0) 6003 { 6004 /* 6005 * Found the matching char. Copy it to "kword" and go a 6006 * level deeper. 6007 */ 6008 if (round[depth] == 1) 6009 { 6010 STRNCPY(kword + kwordlen[depth], fword + fwordidx[depth], 6011 flen); 6012 kwordlen[depth + 1] = kwordlen[depth] + flen; 6013 } 6014 else 6015 { 6016 STRNCPY(kword + kwordlen[depth], uword + uwordidx[depth], 6017 ulen); 6018 kwordlen[depth + 1] = kwordlen[depth] + ulen; 6019 } 6020 fwordidx[depth + 1] = fwordidx[depth] + flen; 6021 uwordidx[depth + 1] = uwordidx[depth] + ulen; 6022 6023 ++depth; 6024 arridx[depth] = tryidx; 6025 round[depth] = 0; 6026 } 6027 } 6028 } 6029 6030 /* Didn't find it: "cannot happen". */ 6031 *kword = NUL; 6032 } 6033 6034 /* 6035 * Compute the sound-a-like score for suggestions in su->su_ga and add them to 6036 * su->su_sga. 6037 */ 6038 static void 6039 score_comp_sal(suginfo_T *su) 6040 { 6041 langp_T *lp; 6042 char_u badsound[MAXWLEN]; 6043 int i; 6044 suggest_T *stp; 6045 suggest_T *sstp; 6046 int score; 6047 int lpi; 6048 6049 if (ga_grow(&su->su_sga, su->su_ga.ga_len) == FAIL) 6050 return; 6051 6052 /* Use the sound-folding of the first language that supports it. */ 6053 for (lpi = 0; lpi < curwin->w_s->b_langp.ga_len; ++lpi) 6054 { 6055 lp = LANGP_ENTRY(curwin->w_s->b_langp, lpi); 6056 if (lp->lp_slang->sl_sal.ga_len > 0) 6057 { 6058 /* soundfold the bad word */ 6059 spell_soundfold(lp->lp_slang, su->su_fbadword, TRUE, badsound); 6060 6061 for (i = 0; i < su->su_ga.ga_len; ++i) 6062 { 6063 stp = &SUG(su->su_ga, i); 6064 6065 /* Case-fold the suggested word, sound-fold it and compute the 6066 * sound-a-like score. */ 6067 score = stp_sal_score(stp, su, lp->lp_slang, badsound); 6068 if (score < SCORE_MAXMAX) 6069 { 6070 /* Add the suggestion. */ 6071 sstp = &SUG(su->su_sga, su->su_sga.ga_len); 6072 sstp->st_word = vim_strsave(stp->st_word); 6073 if (sstp->st_word != NULL) 6074 { 6075 sstp->st_wordlen = stp->st_wordlen; 6076 sstp->st_score = score; 6077 sstp->st_altscore = 0; 6078 sstp->st_orglen = stp->st_orglen; 6079 ++su->su_sga.ga_len; 6080 } 6081 } 6082 } 6083 break; 6084 } 6085 } 6086 } 6087 6088 /* 6089 * Combine the list of suggestions in su->su_ga and su->su_sga. 6090 * They are entwined. 6091 */ 6092 static void 6093 score_combine(suginfo_T *su) 6094 { 6095 int i; 6096 int j; 6097 garray_T ga; 6098 garray_T *gap; 6099 langp_T *lp; 6100 suggest_T *stp; 6101 char_u *p; 6102 char_u badsound[MAXWLEN]; 6103 int round; 6104 int lpi; 6105 slang_T *slang = NULL; 6106 6107 /* Add the alternate score to su_ga. */ 6108 for (lpi = 0; lpi < curwin->w_s->b_langp.ga_len; ++lpi) 6109 { 6110 lp = LANGP_ENTRY(curwin->w_s->b_langp, lpi); 6111 if (lp->lp_slang->sl_sal.ga_len > 0) 6112 { 6113 /* soundfold the bad word */ 6114 slang = lp->lp_slang; 6115 spell_soundfold(slang, su->su_fbadword, TRUE, badsound); 6116 6117 for (i = 0; i < su->su_ga.ga_len; ++i) 6118 { 6119 stp = &SUG(su->su_ga, i); 6120 stp->st_altscore = stp_sal_score(stp, su, slang, badsound); 6121 if (stp->st_altscore == SCORE_MAXMAX) 6122 stp->st_score = (stp->st_score * 3 + SCORE_BIG) / 4; 6123 else 6124 stp->st_score = (stp->st_score * 3 6125 + stp->st_altscore) / 4; 6126 stp->st_salscore = FALSE; 6127 } 6128 break; 6129 } 6130 } 6131 6132 if (slang == NULL) /* Using "double" without sound folding. */ 6133 { 6134 (void)cleanup_suggestions(&su->su_ga, su->su_maxscore, 6135 su->su_maxcount); 6136 return; 6137 } 6138 6139 /* Add the alternate score to su_sga. */ 6140 for (i = 0; i < su->su_sga.ga_len; ++i) 6141 { 6142 stp = &SUG(su->su_sga, i); 6143 stp->st_altscore = spell_edit_score(slang, 6144 su->su_badword, stp->st_word); 6145 if (stp->st_score == SCORE_MAXMAX) 6146 stp->st_score = (SCORE_BIG * 7 + stp->st_altscore) / 8; 6147 else 6148 stp->st_score = (stp->st_score * 7 + stp->st_altscore) / 8; 6149 stp->st_salscore = TRUE; 6150 } 6151 6152 /* Remove bad suggestions, sort the suggestions and truncate at "maxcount" 6153 * for both lists. */ 6154 check_suggestions(su, &su->su_ga); 6155 (void)cleanup_suggestions(&su->su_ga, su->su_maxscore, su->su_maxcount); 6156 check_suggestions(su, &su->su_sga); 6157 (void)cleanup_suggestions(&su->su_sga, su->su_maxscore, su->su_maxcount); 6158 6159 ga_init2(&ga, (int)sizeof(suginfo_T), 1); 6160 if (ga_grow(&ga, su->su_ga.ga_len + su->su_sga.ga_len) == FAIL) 6161 return; 6162 6163 stp = &SUG(ga, 0); 6164 for (i = 0; i < su->su_ga.ga_len || i < su->su_sga.ga_len; ++i) 6165 { 6166 /* round 1: get a suggestion from su_ga 6167 * round 2: get a suggestion from su_sga */ 6168 for (round = 1; round <= 2; ++round) 6169 { 6170 gap = round == 1 ? &su->su_ga : &su->su_sga; 6171 if (i < gap->ga_len) 6172 { 6173 /* Don't add a word if it's already there. */ 6174 p = SUG(*gap, i).st_word; 6175 for (j = 0; j < ga.ga_len; ++j) 6176 if (STRCMP(stp[j].st_word, p) == 0) 6177 break; 6178 if (j == ga.ga_len) 6179 stp[ga.ga_len++] = SUG(*gap, i); 6180 else 6181 vim_free(p); 6182 } 6183 } 6184 } 6185 6186 ga_clear(&su->su_ga); 6187 ga_clear(&su->su_sga); 6188 6189 /* Truncate the list to the number of suggestions that will be displayed. */ 6190 if (ga.ga_len > su->su_maxcount) 6191 { 6192 for (i = su->su_maxcount; i < ga.ga_len; ++i) 6193 vim_free(stp[i].st_word); 6194 ga.ga_len = su->su_maxcount; 6195 } 6196 6197 su->su_ga = ga; 6198 } 6199 6200 /* 6201 * For the goodword in "stp" compute the soundalike score compared to the 6202 * badword. 6203 */ 6204 static int 6205 stp_sal_score( 6206 suggest_T *stp, 6207 suginfo_T *su, 6208 slang_T *slang, 6209 char_u *badsound) /* sound-folded badword */ 6210 { 6211 char_u *p; 6212 char_u *pbad; 6213 char_u *pgood; 6214 char_u badsound2[MAXWLEN]; 6215 char_u fword[MAXWLEN]; 6216 char_u goodsound[MAXWLEN]; 6217 char_u goodword[MAXWLEN]; 6218 int lendiff; 6219 6220 lendiff = (int)(su->su_badlen - stp->st_orglen); 6221 if (lendiff >= 0) 6222 pbad = badsound; 6223 else 6224 { 6225 /* soundfold the bad word with more characters following */ 6226 (void)spell_casefold(su->su_badptr, stp->st_orglen, fword, MAXWLEN); 6227 6228 /* When joining two words the sound often changes a lot. E.g., "t he" 6229 * sounds like "t h" while "the" sounds like "@". Avoid that by 6230 * removing the space. Don't do it when the good word also contains a 6231 * space. */ 6232 if (VIM_ISWHITE(su->su_badptr[su->su_badlen]) 6233 && *skiptowhite(stp->st_word) == NUL) 6234 for (p = fword; *(p = skiptowhite(p)) != NUL; ) 6235 STRMOVE(p, p + 1); 6236 6237 spell_soundfold(slang, fword, TRUE, badsound2); 6238 pbad = badsound2; 6239 } 6240 6241 if (lendiff > 0 && stp->st_wordlen + lendiff < MAXWLEN) 6242 { 6243 /* Add part of the bad word to the good word, so that we soundfold 6244 * what replaces the bad word. */ 6245 STRCPY(goodword, stp->st_word); 6246 vim_strncpy(goodword + stp->st_wordlen, 6247 su->su_badptr + su->su_badlen - lendiff, lendiff); 6248 pgood = goodword; 6249 } 6250 else 6251 pgood = stp->st_word; 6252 6253 /* Sound-fold the word and compute the score for the difference. */ 6254 spell_soundfold(slang, pgood, FALSE, goodsound); 6255 6256 return soundalike_score(goodsound, pbad); 6257 } 6258 6259 /* structure used to store soundfolded words that add_sound_suggest() has 6260 * handled already. */ 6261 typedef struct 6262 { 6263 short sft_score; /* lowest score used */ 6264 char_u sft_word[1]; /* soundfolded word, actually longer */ 6265 } sftword_T; 6266 6267 static sftword_T dumsft; 6268 #define HIKEY2SFT(p) ((sftword_T *)(p - (dumsft.sft_word - (char_u *)&dumsft))) 6269 #define HI2SFT(hi) HIKEY2SFT((hi)->hi_key) 6270 6271 /* 6272 * Prepare for calling suggest_try_soundalike(). 6273 */ 6274 static void 6275 suggest_try_soundalike_prep(void) 6276 { 6277 langp_T *lp; 6278 int lpi; 6279 slang_T *slang; 6280 6281 /* Do this for all languages that support sound folding and for which a 6282 * .sug file has been loaded. */ 6283 for (lpi = 0; lpi < curwin->w_s->b_langp.ga_len; ++lpi) 6284 { 6285 lp = LANGP_ENTRY(curwin->w_s->b_langp, lpi); 6286 slang = lp->lp_slang; 6287 if (slang->sl_sal.ga_len > 0 && slang->sl_sbyts != NULL) 6288 /* prepare the hashtable used by add_sound_suggest() */ 6289 hash_init(&slang->sl_sounddone); 6290 } 6291 } 6292 6293 /* 6294 * Find suggestions by comparing the word in a sound-a-like form. 6295 * Note: This doesn't support postponed prefixes. 6296 */ 6297 static void 6298 suggest_try_soundalike(suginfo_T *su) 6299 { 6300 char_u salword[MAXWLEN]; 6301 langp_T *lp; 6302 int lpi; 6303 slang_T *slang; 6304 6305 /* Do this for all languages that support sound folding and for which a 6306 * .sug file has been loaded. */ 6307 for (lpi = 0; lpi < curwin->w_s->b_langp.ga_len; ++lpi) 6308 { 6309 lp = LANGP_ENTRY(curwin->w_s->b_langp, lpi); 6310 slang = lp->lp_slang; 6311 if (slang->sl_sal.ga_len > 0 && slang->sl_sbyts != NULL) 6312 { 6313 /* soundfold the bad word */ 6314 spell_soundfold(slang, su->su_fbadword, TRUE, salword); 6315 6316 /* try all kinds of inserts/deletes/swaps/etc. */ 6317 /* TODO: also soundfold the next words, so that we can try joining 6318 * and splitting */ 6319 #ifdef SUGGEST_PROFILE 6320 prof_init(); 6321 #endif 6322 suggest_trie_walk(su, lp, salword, TRUE); 6323 #ifdef SUGGEST_PROFILE 6324 prof_report("soundalike"); 6325 #endif 6326 } 6327 } 6328 } 6329 6330 /* 6331 * Finish up after calling suggest_try_soundalike(). 6332 */ 6333 static void 6334 suggest_try_soundalike_finish(void) 6335 { 6336 langp_T *lp; 6337 int lpi; 6338 slang_T *slang; 6339 int todo; 6340 hashitem_T *hi; 6341 6342 /* Do this for all languages that support sound folding and for which a 6343 * .sug file has been loaded. */ 6344 for (lpi = 0; lpi < curwin->w_s->b_langp.ga_len; ++lpi) 6345 { 6346 lp = LANGP_ENTRY(curwin->w_s->b_langp, lpi); 6347 slang = lp->lp_slang; 6348 if (slang->sl_sal.ga_len > 0 && slang->sl_sbyts != NULL) 6349 { 6350 /* Free the info about handled words. */ 6351 todo = (int)slang->sl_sounddone.ht_used; 6352 for (hi = slang->sl_sounddone.ht_array; todo > 0; ++hi) 6353 if (!HASHITEM_EMPTY(hi)) 6354 { 6355 vim_free(HI2SFT(hi)); 6356 --todo; 6357 } 6358 6359 /* Clear the hashtable, it may also be used by another region. */ 6360 hash_clear(&slang->sl_sounddone); 6361 hash_init(&slang->sl_sounddone); 6362 } 6363 } 6364 } 6365 6366 /* 6367 * A match with a soundfolded word is found. Add the good word(s) that 6368 * produce this soundfolded word. 6369 */ 6370 static void 6371 add_sound_suggest( 6372 suginfo_T *su, 6373 char_u *goodword, 6374 int score, /* soundfold score */ 6375 langp_T *lp) 6376 { 6377 slang_T *slang = lp->lp_slang; /* language for sound folding */ 6378 int sfwordnr; 6379 char_u *nrline; 6380 int orgnr; 6381 char_u theword[MAXWLEN]; 6382 int i; 6383 int wlen; 6384 char_u *byts; 6385 idx_T *idxs; 6386 int n; 6387 int wordcount; 6388 int wc; 6389 int goodscore; 6390 hash_T hash; 6391 hashitem_T *hi; 6392 sftword_T *sft; 6393 int bc, gc; 6394 int limit; 6395 6396 /* 6397 * It's very well possible that the same soundfold word is found several 6398 * times with different scores. Since the following is quite slow only do 6399 * the words that have a better score than before. Use a hashtable to 6400 * remember the words that have been done. 6401 */ 6402 hash = hash_hash(goodword); 6403 hi = hash_lookup(&slang->sl_sounddone, goodword, hash); 6404 if (HASHITEM_EMPTY(hi)) 6405 { 6406 sft = (sftword_T *)alloc((unsigned)(sizeof(sftword_T) 6407 + STRLEN(goodword))); 6408 if (sft != NULL) 6409 { 6410 sft->sft_score = score; 6411 STRCPY(sft->sft_word, goodword); 6412 hash_add_item(&slang->sl_sounddone, hi, sft->sft_word, hash); 6413 } 6414 } 6415 else 6416 { 6417 sft = HI2SFT(hi); 6418 if (score >= sft->sft_score) 6419 return; 6420 sft->sft_score = score; 6421 } 6422 6423 /* 6424 * Find the word nr in the soundfold tree. 6425 */ 6426 sfwordnr = soundfold_find(slang, goodword); 6427 if (sfwordnr < 0) 6428 { 6429 internal_error("add_sound_suggest()"); 6430 return; 6431 } 6432 6433 /* 6434 * go over the list of good words that produce this soundfold word 6435 */ 6436 nrline = ml_get_buf(slang->sl_sugbuf, (linenr_T)(sfwordnr + 1), FALSE); 6437 orgnr = 0; 6438 while (*nrline != NUL) 6439 { 6440 /* The wordnr was stored in a minimal nr of bytes as an offset to the 6441 * previous wordnr. */ 6442 orgnr += bytes2offset(&nrline); 6443 6444 byts = slang->sl_fbyts; 6445 idxs = slang->sl_fidxs; 6446 6447 /* Lookup the word "orgnr" one of the two tries. */ 6448 n = 0; 6449 wordcount = 0; 6450 for (wlen = 0; wlen < MAXWLEN - 3; ++wlen) 6451 { 6452 i = 1; 6453 if (wordcount == orgnr && byts[n + 1] == NUL) 6454 break; /* found end of word */ 6455 6456 if (byts[n + 1] == NUL) 6457 ++wordcount; 6458 6459 /* skip over the NUL bytes */ 6460 for ( ; byts[n + i] == NUL; ++i) 6461 if (i > byts[n]) /* safety check */ 6462 { 6463 STRCPY(theword + wlen, "BAD"); 6464 wlen += 3; 6465 goto badword; 6466 } 6467 6468 /* One of the siblings must have the word. */ 6469 for ( ; i < byts[n]; ++i) 6470 { 6471 wc = idxs[idxs[n + i]]; /* nr of words under this byte */ 6472 if (wordcount + wc > orgnr) 6473 break; 6474 wordcount += wc; 6475 } 6476 6477 theword[wlen] = byts[n + i]; 6478 n = idxs[n + i]; 6479 } 6480 badword: 6481 theword[wlen] = NUL; 6482 6483 /* Go over the possible flags and regions. */ 6484 for (; i <= byts[n] && byts[n + i] == NUL; ++i) 6485 { 6486 char_u cword[MAXWLEN]; 6487 char_u *p; 6488 int flags = (int)idxs[n + i]; 6489 6490 /* Skip words with the NOSUGGEST flag */ 6491 if (flags & WF_NOSUGGEST) 6492 continue; 6493 6494 if (flags & WF_KEEPCAP) 6495 { 6496 /* Must find the word in the keep-case tree. */ 6497 find_keepcap_word(slang, theword, cword); 6498 p = cword; 6499 } 6500 else 6501 { 6502 flags |= su->su_badflags; 6503 if ((flags & WF_CAPMASK) != 0) 6504 { 6505 /* Need to fix case according to "flags". */ 6506 make_case_word(theword, cword, flags); 6507 p = cword; 6508 } 6509 else 6510 p = theword; 6511 } 6512 6513 /* Add the suggestion. */ 6514 if (sps_flags & SPS_DOUBLE) 6515 { 6516 /* Add the suggestion if the score isn't too bad. */ 6517 if (score <= su->su_maxscore) 6518 add_suggestion(su, &su->su_sga, p, su->su_badlen, 6519 score, 0, FALSE, slang, FALSE); 6520 } 6521 else 6522 { 6523 /* Add a penalty for words in another region. */ 6524 if ((flags & WF_REGION) 6525 && (((unsigned)flags >> 16) & lp->lp_region) == 0) 6526 goodscore = SCORE_REGION; 6527 else 6528 goodscore = 0; 6529 6530 /* Add a small penalty for changing the first letter from 6531 * lower to upper case. Helps for "tath" -> "Kath", which is 6532 * less common than "tath" -> "path". Don't do it when the 6533 * letter is the same, that has already been counted. */ 6534 gc = PTR2CHAR(p); 6535 if (SPELL_ISUPPER(gc)) 6536 { 6537 bc = PTR2CHAR(su->su_badword); 6538 if (!SPELL_ISUPPER(bc) 6539 && SPELL_TOFOLD(bc) != SPELL_TOFOLD(gc)) 6540 goodscore += SCORE_ICASE / 2; 6541 } 6542 6543 /* Compute the score for the good word. This only does letter 6544 * insert/delete/swap/replace. REP items are not considered, 6545 * which may make the score a bit higher. 6546 * Use a limit for the score to make it work faster. Use 6547 * MAXSCORE(), because RESCORE() will change the score. 6548 * If the limit is very high then the iterative method is 6549 * inefficient, using an array is quicker. */ 6550 limit = MAXSCORE(su->su_sfmaxscore - goodscore, score); 6551 if (limit > SCORE_LIMITMAX) 6552 goodscore += spell_edit_score(slang, su->su_badword, p); 6553 else 6554 goodscore += spell_edit_score_limit(slang, su->su_badword, 6555 p, limit); 6556 6557 /* When going over the limit don't bother to do the rest. */ 6558 if (goodscore < SCORE_MAXMAX) 6559 { 6560 /* Give a bonus to words seen before. */ 6561 goodscore = score_wordcount_adj(slang, goodscore, p, FALSE); 6562 6563 /* Add the suggestion if the score isn't too bad. */ 6564 goodscore = RESCORE(goodscore, score); 6565 if (goodscore <= su->su_sfmaxscore) 6566 add_suggestion(su, &su->su_ga, p, su->su_badlen, 6567 goodscore, score, TRUE, slang, TRUE); 6568 } 6569 } 6570 } 6571 /* smsg("word %s (%d): %s (%d)", sftword, sftnr, theword, orgnr); */ 6572 } 6573 } 6574 6575 /* 6576 * Find word "word" in fold-case tree for "slang" and return the word number. 6577 */ 6578 static int 6579 soundfold_find(slang_T *slang, char_u *word) 6580 { 6581 idx_T arridx = 0; 6582 int len; 6583 int wlen = 0; 6584 int c; 6585 char_u *ptr = word; 6586 char_u *byts; 6587 idx_T *idxs; 6588 int wordnr = 0; 6589 6590 byts = slang->sl_sbyts; 6591 idxs = slang->sl_sidxs; 6592 6593 for (;;) 6594 { 6595 /* First byte is the number of possible bytes. */ 6596 len = byts[arridx++]; 6597 6598 /* If the first possible byte is a zero the word could end here. 6599 * If the word ends we found the word. If not skip the NUL bytes. */ 6600 c = ptr[wlen]; 6601 if (byts[arridx] == NUL) 6602 { 6603 if (c == NUL) 6604 break; 6605 6606 /* Skip over the zeros, there can be several. */ 6607 while (len > 0 && byts[arridx] == NUL) 6608 { 6609 ++arridx; 6610 --len; 6611 } 6612 if (len == 0) 6613 return -1; /* no children, word should have ended here */ 6614 ++wordnr; 6615 } 6616 6617 /* If the word ends we didn't find it. */ 6618 if (c == NUL) 6619 return -1; 6620 6621 /* Perform a binary search in the list of accepted bytes. */ 6622 if (c == TAB) /* <Tab> is handled like <Space> */ 6623 c = ' '; 6624 while (byts[arridx] < c) 6625 { 6626 /* The word count is in the first idxs[] entry of the child. */ 6627 wordnr += idxs[idxs[arridx]]; 6628 ++arridx; 6629 if (--len == 0) /* end of the bytes, didn't find it */ 6630 return -1; 6631 } 6632 if (byts[arridx] != c) /* didn't find the byte */ 6633 return -1; 6634 6635 /* Continue at the child (if there is one). */ 6636 arridx = idxs[arridx]; 6637 ++wlen; 6638 6639 /* One space in the good word may stand for several spaces in the 6640 * checked word. */ 6641 if (c == ' ') 6642 while (ptr[wlen] == ' ' || ptr[wlen] == TAB) 6643 ++wlen; 6644 } 6645 6646 return wordnr; 6647 } 6648 6649 /* 6650 * Copy "fword" to "cword", fixing case according to "flags". 6651 */ 6652 static void 6653 make_case_word(char_u *fword, char_u *cword, int flags) 6654 { 6655 if (flags & WF_ALLCAP) 6656 /* Make it all upper-case */ 6657 allcap_copy(fword, cword); 6658 else if (flags & WF_ONECAP) 6659 /* Make the first letter upper-case */ 6660 onecap_copy(fword, cword, TRUE); 6661 else 6662 /* Use goodword as-is. */ 6663 STRCPY(cword, fword); 6664 } 6665 6666 6667 /* 6668 * Return TRUE if "c1" and "c2" are similar characters according to the MAP 6669 * lines in the .aff file. 6670 */ 6671 static int 6672 similar_chars(slang_T *slang, int c1, int c2) 6673 { 6674 int m1, m2; 6675 #ifdef FEAT_MBYTE 6676 char_u buf[MB_MAXBYTES + 1]; 6677 hashitem_T *hi; 6678 6679 if (c1 >= 256) 6680 { 6681 buf[mb_char2bytes(c1, buf)] = 0; 6682 hi = hash_find(&slang->sl_map_hash, buf); 6683 if (HASHITEM_EMPTY(hi)) 6684 m1 = 0; 6685 else 6686 m1 = mb_ptr2char(hi->hi_key + STRLEN(hi->hi_key) + 1); 6687 } 6688 else 6689 #endif 6690 m1 = slang->sl_map_array[c1]; 6691 if (m1 == 0) 6692 return FALSE; 6693 6694 6695 #ifdef FEAT_MBYTE 6696 if (c2 >= 256) 6697 { 6698 buf[mb_char2bytes(c2, buf)] = 0; 6699 hi = hash_find(&slang->sl_map_hash, buf); 6700 if (HASHITEM_EMPTY(hi)) 6701 m2 = 0; 6702 else 6703 m2 = mb_ptr2char(hi->hi_key + STRLEN(hi->hi_key) + 1); 6704 } 6705 else 6706 #endif 6707 m2 = slang->sl_map_array[c2]; 6708 6709 return m1 == m2; 6710 } 6711 6712 /* 6713 * Add a suggestion to the list of suggestions. 6714 * For a suggestion that is already in the list the lowest score is remembered. 6715 */ 6716 static void 6717 add_suggestion( 6718 suginfo_T *su, 6719 garray_T *gap, /* either su_ga or su_sga */ 6720 char_u *goodword, 6721 int badlenarg, /* len of bad word replaced with "goodword" */ 6722 int score, 6723 int altscore, 6724 int had_bonus, /* value for st_had_bonus */ 6725 slang_T *slang, /* language for sound folding */ 6726 int maxsf) /* su_maxscore applies to soundfold score, 6727 su_sfmaxscore to the total score. */ 6728 { 6729 int goodlen; /* len of goodword changed */ 6730 int badlen; /* len of bad word changed */ 6731 suggest_T *stp; 6732 suggest_T new_sug; 6733 int i; 6734 char_u *pgood, *pbad; 6735 6736 /* Minimize "badlen" for consistency. Avoids that changing "the the" to 6737 * "thee the" is added next to changing the first "the" the "thee". */ 6738 pgood = goodword + STRLEN(goodword); 6739 pbad = su->su_badptr + badlenarg; 6740 for (;;) 6741 { 6742 goodlen = (int)(pgood - goodword); 6743 badlen = (int)(pbad - su->su_badptr); 6744 if (goodlen <= 0 || badlen <= 0) 6745 break; 6746 MB_PTR_BACK(goodword, pgood); 6747 MB_PTR_BACK(su->su_badptr, pbad); 6748 #ifdef FEAT_MBYTE 6749 if (has_mbyte) 6750 { 6751 if (mb_ptr2char(pgood) != mb_ptr2char(pbad)) 6752 break; 6753 } 6754 else 6755 #endif 6756 if (*pgood != *pbad) 6757 break; 6758 } 6759 6760 if (badlen == 0 && goodlen == 0) 6761 /* goodword doesn't change anything; may happen for "the the" changing 6762 * the first "the" to itself. */ 6763 return; 6764 6765 if (gap->ga_len == 0) 6766 i = -1; 6767 else 6768 { 6769 /* Check if the word is already there. Also check the length that is 6770 * being replaced "thes," -> "these" is a different suggestion from 6771 * "thes" -> "these". */ 6772 stp = &SUG(*gap, 0); 6773 for (i = gap->ga_len; --i >= 0; ++stp) 6774 if (stp->st_wordlen == goodlen 6775 && stp->st_orglen == badlen 6776 && STRNCMP(stp->st_word, goodword, goodlen) == 0) 6777 { 6778 /* 6779 * Found it. Remember the word with the lowest score. 6780 */ 6781 if (stp->st_slang == NULL) 6782 stp->st_slang = slang; 6783 6784 new_sug.st_score = score; 6785 new_sug.st_altscore = altscore; 6786 new_sug.st_had_bonus = had_bonus; 6787 6788 if (stp->st_had_bonus != had_bonus) 6789 { 6790 /* Only one of the two had the soundalike score computed. 6791 * Need to do that for the other one now, otherwise the 6792 * scores can't be compared. This happens because 6793 * suggest_try_change() doesn't compute the soundalike 6794 * word to keep it fast, while some special methods set 6795 * the soundalike score to zero. */ 6796 if (had_bonus) 6797 rescore_one(su, stp); 6798 else 6799 { 6800 new_sug.st_word = stp->st_word; 6801 new_sug.st_wordlen = stp->st_wordlen; 6802 new_sug.st_slang = stp->st_slang; 6803 new_sug.st_orglen = badlen; 6804 rescore_one(su, &new_sug); 6805 } 6806 } 6807 6808 if (stp->st_score > new_sug.st_score) 6809 { 6810 stp->st_score = new_sug.st_score; 6811 stp->st_altscore = new_sug.st_altscore; 6812 stp->st_had_bonus = new_sug.st_had_bonus; 6813 } 6814 break; 6815 } 6816 } 6817 6818 if (i < 0 && ga_grow(gap, 1) == OK) 6819 { 6820 /* Add a suggestion. */ 6821 stp = &SUG(*gap, gap->ga_len); 6822 stp->st_word = vim_strnsave(goodword, goodlen); 6823 if (stp->st_word != NULL) 6824 { 6825 stp->st_wordlen = goodlen; 6826 stp->st_score = score; 6827 stp->st_altscore = altscore; 6828 stp->st_had_bonus = had_bonus; 6829 stp->st_orglen = badlen; 6830 stp->st_slang = slang; 6831 ++gap->ga_len; 6832 6833 /* If we have too many suggestions now, sort the list and keep 6834 * the best suggestions. */ 6835 if (gap->ga_len > SUG_MAX_COUNT(su)) 6836 { 6837 if (maxsf) 6838 su->su_sfmaxscore = cleanup_suggestions(gap, 6839 su->su_sfmaxscore, SUG_CLEAN_COUNT(su)); 6840 else 6841 su->su_maxscore = cleanup_suggestions(gap, 6842 su->su_maxscore, SUG_CLEAN_COUNT(su)); 6843 } 6844 } 6845 } 6846 } 6847 6848 /* 6849 * Suggestions may in fact be flagged as errors. Esp. for banned words and 6850 * for split words, such as "the the". Remove these from the list here. 6851 */ 6852 static void 6853 check_suggestions( 6854 suginfo_T *su, 6855 garray_T *gap) /* either su_ga or su_sga */ 6856 { 6857 suggest_T *stp; 6858 int i; 6859 char_u longword[MAXWLEN + 1]; 6860 int len; 6861 hlf_T attr; 6862 6863 stp = &SUG(*gap, 0); 6864 for (i = gap->ga_len - 1; i >= 0; --i) 6865 { 6866 /* Need to append what follows to check for "the the". */ 6867 vim_strncpy(longword, stp[i].st_word, MAXWLEN); 6868 len = stp[i].st_wordlen; 6869 vim_strncpy(longword + len, su->su_badptr + stp[i].st_orglen, 6870 MAXWLEN - len); 6871 attr = HLF_COUNT; 6872 (void)spell_check(curwin, longword, &attr, NULL, FALSE); 6873 if (attr != HLF_COUNT) 6874 { 6875 /* Remove this entry. */ 6876 vim_free(stp[i].st_word); 6877 --gap->ga_len; 6878 if (i < gap->ga_len) 6879 mch_memmove(stp + i, stp + i + 1, 6880 sizeof(suggest_T) * (gap->ga_len - i)); 6881 } 6882 } 6883 } 6884 6885 6886 /* 6887 * Add a word to be banned. 6888 */ 6889 static void 6890 add_banned( 6891 suginfo_T *su, 6892 char_u *word) 6893 { 6894 char_u *s; 6895 hash_T hash; 6896 hashitem_T *hi; 6897 6898 hash = hash_hash(word); 6899 hi = hash_lookup(&su->su_banned, word, hash); 6900 if (HASHITEM_EMPTY(hi)) 6901 { 6902 s = vim_strsave(word); 6903 if (s != NULL) 6904 hash_add_item(&su->su_banned, hi, s, hash); 6905 } 6906 } 6907 6908 /* 6909 * Recompute the score for all suggestions if sound-folding is possible. This 6910 * is slow, thus only done for the final results. 6911 */ 6912 static void 6913 rescore_suggestions(suginfo_T *su) 6914 { 6915 int i; 6916 6917 if (su->su_sallang != NULL) 6918 for (i = 0; i < su->su_ga.ga_len; ++i) 6919 rescore_one(su, &SUG(su->su_ga, i)); 6920 } 6921 6922 /* 6923 * Recompute the score for one suggestion if sound-folding is possible. 6924 */ 6925 static void 6926 rescore_one(suginfo_T *su, suggest_T *stp) 6927 { 6928 slang_T *slang = stp->st_slang; 6929 char_u sal_badword[MAXWLEN]; 6930 char_u *p; 6931 6932 /* Only rescore suggestions that have no sal score yet and do have a 6933 * language. */ 6934 if (slang != NULL && slang->sl_sal.ga_len > 0 && !stp->st_had_bonus) 6935 { 6936 if (slang == su->su_sallang) 6937 p = su->su_sal_badword; 6938 else 6939 { 6940 spell_soundfold(slang, su->su_fbadword, TRUE, sal_badword); 6941 p = sal_badword; 6942 } 6943 6944 stp->st_altscore = stp_sal_score(stp, su, slang, p); 6945 if (stp->st_altscore == SCORE_MAXMAX) 6946 stp->st_altscore = SCORE_BIG; 6947 stp->st_score = RESCORE(stp->st_score, stp->st_altscore); 6948 stp->st_had_bonus = TRUE; 6949 } 6950 } 6951 6952 static int 6953 #ifdef __BORLANDC__ 6954 _RTLENTRYF 6955 #endif 6956 sug_compare(const void *s1, const void *s2); 6957 6958 /* 6959 * Function given to qsort() to sort the suggestions on st_score. 6960 * First on "st_score", then "st_altscore" then alphabetically. 6961 */ 6962 static int 6963 #ifdef __BORLANDC__ 6964 _RTLENTRYF 6965 #endif 6966 sug_compare(const void *s1, const void *s2) 6967 { 6968 suggest_T *p1 = (suggest_T *)s1; 6969 suggest_T *p2 = (suggest_T *)s2; 6970 int n = p1->st_score - p2->st_score; 6971 6972 if (n == 0) 6973 { 6974 n = p1->st_altscore - p2->st_altscore; 6975 if (n == 0) 6976 n = STRICMP(p1->st_word, p2->st_word); 6977 } 6978 return n; 6979 } 6980 6981 /* 6982 * Cleanup the suggestions: 6983 * - Sort on score. 6984 * - Remove words that won't be displayed. 6985 * Returns the maximum score in the list or "maxscore" unmodified. 6986 */ 6987 static int 6988 cleanup_suggestions( 6989 garray_T *gap, 6990 int maxscore, 6991 int keep) /* nr of suggestions to keep */ 6992 { 6993 suggest_T *stp = &SUG(*gap, 0); 6994 int i; 6995 6996 /* Sort the list. */ 6997 qsort(gap->ga_data, (size_t)gap->ga_len, sizeof(suggest_T), sug_compare); 6998 6999 /* Truncate the list to the number of suggestions that will be displayed. */ 7000 if (gap->ga_len > keep) 7001 { 7002 for (i = keep; i < gap->ga_len; ++i) 7003 vim_free(stp[i].st_word); 7004 gap->ga_len = keep; 7005 return stp[keep - 1].st_score; 7006 } 7007 return maxscore; 7008 } 7009 7010 #if defined(FEAT_EVAL) || defined(PROTO) 7011 /* 7012 * Soundfold a string, for soundfold(). 7013 * Result is in allocated memory, NULL for an error. 7014 */ 7015 char_u * 7016 eval_soundfold(char_u *word) 7017 { 7018 langp_T *lp; 7019 char_u sound[MAXWLEN]; 7020 int lpi; 7021 7022 if (curwin->w_p_spell && *curwin->w_s->b_p_spl != NUL) 7023 /* Use the sound-folding of the first language that supports it. */ 7024 for (lpi = 0; lpi < curwin->w_s->b_langp.ga_len; ++lpi) 7025 { 7026 lp = LANGP_ENTRY(curwin->w_s->b_langp, lpi); 7027 if (lp->lp_slang->sl_sal.ga_len > 0) 7028 { 7029 /* soundfold the word */ 7030 spell_soundfold(lp->lp_slang, word, FALSE, sound); 7031 return vim_strsave(sound); 7032 } 7033 } 7034 7035 /* No language with sound folding, return word as-is. */ 7036 return vim_strsave(word); 7037 } 7038 #endif 7039 7040 /* 7041 * Turn "inword" into its sound-a-like equivalent in "res[MAXWLEN]". 7042 * 7043 * There are many ways to turn a word into a sound-a-like representation. The 7044 * oldest is Soundex (1918!). A nice overview can be found in "Approximate 7045 * swedish name matching - survey and test of different algorithms" by Klas 7046 * Erikson. 7047 * 7048 * We support two methods: 7049 * 1. SOFOFROM/SOFOTO do a simple character mapping. 7050 * 2. SAL items define a more advanced sound-folding (and much slower). 7051 */ 7052 void 7053 spell_soundfold( 7054 slang_T *slang, 7055 char_u *inword, 7056 int folded, /* "inword" is already case-folded */ 7057 char_u *res) 7058 { 7059 char_u fword[MAXWLEN]; 7060 char_u *word; 7061 7062 if (slang->sl_sofo) 7063 /* SOFOFROM and SOFOTO used */ 7064 spell_soundfold_sofo(slang, inword, res); 7065 else 7066 { 7067 /* SAL items used. Requires the word to be case-folded. */ 7068 if (folded) 7069 word = inword; 7070 else 7071 { 7072 (void)spell_casefold(inword, (int)STRLEN(inword), fword, MAXWLEN); 7073 word = fword; 7074 } 7075 7076 #ifdef FEAT_MBYTE 7077 if (has_mbyte) 7078 spell_soundfold_wsal(slang, word, res); 7079 else 7080 #endif 7081 spell_soundfold_sal(slang, word, res); 7082 } 7083 } 7084 7085 /* 7086 * Perform sound folding of "inword" into "res" according to SOFOFROM and 7087 * SOFOTO lines. 7088 */ 7089 static void 7090 spell_soundfold_sofo(slang_T *slang, char_u *inword, char_u *res) 7091 { 7092 char_u *s; 7093 int ri = 0; 7094 int c; 7095 7096 #ifdef FEAT_MBYTE 7097 if (has_mbyte) 7098 { 7099 int prevc = 0; 7100 int *ip; 7101 7102 /* The sl_sal_first[] table contains the translation for chars up to 7103 * 255, sl_sal the rest. */ 7104 for (s = inword; *s != NUL; ) 7105 { 7106 c = mb_cptr2char_adv(&s); 7107 if (enc_utf8 ? utf_class(c) == 0 : VIM_ISWHITE(c)) 7108 c = ' '; 7109 else if (c < 256) 7110 c = slang->sl_sal_first[c]; 7111 else 7112 { 7113 ip = ((int **)slang->sl_sal.ga_data)[c & 0xff]; 7114 if (ip == NULL) /* empty list, can't match */ 7115 c = NUL; 7116 else 7117 for (;;) /* find "c" in the list */ 7118 { 7119 if (*ip == 0) /* not found */ 7120 { 7121 c = NUL; 7122 break; 7123 } 7124 if (*ip == c) /* match! */ 7125 { 7126 c = ip[1]; 7127 break; 7128 } 7129 ip += 2; 7130 } 7131 } 7132 7133 if (c != NUL && c != prevc) 7134 { 7135 ri += mb_char2bytes(c, res + ri); 7136 if (ri + MB_MAXBYTES > MAXWLEN) 7137 break; 7138 prevc = c; 7139 } 7140 } 7141 } 7142 else 7143 #endif 7144 { 7145 /* The sl_sal_first[] table contains the translation. */ 7146 for (s = inword; (c = *s) != NUL; ++s) 7147 { 7148 if (VIM_ISWHITE(c)) 7149 c = ' '; 7150 else 7151 c = slang->sl_sal_first[c]; 7152 if (c != NUL && (ri == 0 || res[ri - 1] != c)) 7153 res[ri++] = c; 7154 } 7155 } 7156 7157 res[ri] = NUL; 7158 } 7159 7160 static void 7161 spell_soundfold_sal(slang_T *slang, char_u *inword, char_u *res) 7162 { 7163 salitem_T *smp; 7164 char_u word[MAXWLEN]; 7165 char_u *s = inword; 7166 char_u *t; 7167 char_u *pf; 7168 int i, j, z; 7169 int reslen; 7170 int n, k = 0; 7171 int z0; 7172 int k0; 7173 int n0; 7174 int c; 7175 int pri; 7176 int p0 = -333; 7177 int c0; 7178 7179 /* Remove accents, if wanted. We actually remove all non-word characters. 7180 * But keep white space. We need a copy, the word may be changed here. */ 7181 if (slang->sl_rem_accents) 7182 { 7183 t = word; 7184 while (*s != NUL) 7185 { 7186 if (VIM_ISWHITE(*s)) 7187 { 7188 *t++ = ' '; 7189 s = skipwhite(s); 7190 } 7191 else 7192 { 7193 if (spell_iswordp_nmw(s, curwin)) 7194 *t++ = *s; 7195 ++s; 7196 } 7197 } 7198 *t = NUL; 7199 } 7200 else 7201 vim_strncpy(word, s, MAXWLEN - 1); 7202 7203 smp = (salitem_T *)slang->sl_sal.ga_data; 7204 7205 /* 7206 * This comes from Aspell phonet.cpp. Converted from C++ to C. 7207 * Changed to keep spaces. 7208 */ 7209 i = reslen = z = 0; 7210 while ((c = word[i]) != NUL) 7211 { 7212 /* Start with the first rule that has the character in the word. */ 7213 n = slang->sl_sal_first[c]; 7214 z0 = 0; 7215 7216 if (n >= 0) 7217 { 7218 /* check all rules for the same letter */ 7219 for (; (s = smp[n].sm_lead)[0] == c; ++n) 7220 { 7221 /* Quickly skip entries that don't match the word. Most 7222 * entries are less then three chars, optimize for that. */ 7223 k = smp[n].sm_leadlen; 7224 if (k > 1) 7225 { 7226 if (word[i + 1] != s[1]) 7227 continue; 7228 if (k > 2) 7229 { 7230 for (j = 2; j < k; ++j) 7231 if (word[i + j] != s[j]) 7232 break; 7233 if (j < k) 7234 continue; 7235 } 7236 } 7237 7238 if ((pf = smp[n].sm_oneof) != NULL) 7239 { 7240 /* Check for match with one of the chars in "sm_oneof". */ 7241 while (*pf != NUL && *pf != word[i + k]) 7242 ++pf; 7243 if (*pf == NUL) 7244 continue; 7245 ++k; 7246 } 7247 s = smp[n].sm_rules; 7248 pri = 5; /* default priority */ 7249 7250 p0 = *s; 7251 k0 = k; 7252 while (*s == '-' && k > 1) 7253 { 7254 k--; 7255 s++; 7256 } 7257 if (*s == '<') 7258 s++; 7259 if (VIM_ISDIGIT(*s)) 7260 { 7261 /* determine priority */ 7262 pri = *s - '0'; 7263 s++; 7264 } 7265 if (*s == '^' && *(s + 1) == '^') 7266 s++; 7267 7268 if (*s == NUL 7269 || (*s == '^' 7270 && (i == 0 || !(word[i - 1] == ' ' 7271 || spell_iswordp(word + i - 1, curwin))) 7272 && (*(s + 1) != '$' 7273 || (!spell_iswordp(word + i + k0, curwin)))) 7274 || (*s == '$' && i > 0 7275 && spell_iswordp(word + i - 1, curwin) 7276 && (!spell_iswordp(word + i + k0, curwin)))) 7277 { 7278 /* search for followup rules, if: */ 7279 /* followup and k > 1 and NO '-' in searchstring */ 7280 c0 = word[i + k - 1]; 7281 n0 = slang->sl_sal_first[c0]; 7282 7283 if (slang->sl_followup && k > 1 && n0 >= 0 7284 && p0 != '-' && word[i + k] != NUL) 7285 { 7286 /* test follow-up rule for "word[i + k]" */ 7287 for ( ; (s = smp[n0].sm_lead)[0] == c0; ++n0) 7288 { 7289 /* Quickly skip entries that don't match the word. 7290 * */ 7291 k0 = smp[n0].sm_leadlen; 7292 if (k0 > 1) 7293 { 7294 if (word[i + k] != s[1]) 7295 continue; 7296 if (k0 > 2) 7297 { 7298 pf = word + i + k + 1; 7299 for (j = 2; j < k0; ++j) 7300 if (*pf++ != s[j]) 7301 break; 7302 if (j < k0) 7303 continue; 7304 } 7305 } 7306 k0 += k - 1; 7307 7308 if ((pf = smp[n0].sm_oneof) != NULL) 7309 { 7310 /* Check for match with one of the chars in 7311 * "sm_oneof". */ 7312 while (*pf != NUL && *pf != word[i + k0]) 7313 ++pf; 7314 if (*pf == NUL) 7315 continue; 7316 ++k0; 7317 } 7318 7319 p0 = 5; 7320 s = smp[n0].sm_rules; 7321 while (*s == '-') 7322 { 7323 /* "k0" gets NOT reduced because 7324 * "if (k0 == k)" */ 7325 s++; 7326 } 7327 if (*s == '<') 7328 s++; 7329 if (VIM_ISDIGIT(*s)) 7330 { 7331 p0 = *s - '0'; 7332 s++; 7333 } 7334 7335 if (*s == NUL 7336 /* *s == '^' cuts */ 7337 || (*s == '$' 7338 && !spell_iswordp(word + i + k0, 7339 curwin))) 7340 { 7341 if (k0 == k) 7342 /* this is just a piece of the string */ 7343 continue; 7344 7345 if (p0 < pri) 7346 /* priority too low */ 7347 continue; 7348 /* rule fits; stop search */ 7349 break; 7350 } 7351 } 7352 7353 if (p0 >= pri && smp[n0].sm_lead[0] == c0) 7354 continue; 7355 } 7356 7357 /* replace string */ 7358 s = smp[n].sm_to; 7359 if (s == NULL) 7360 s = (char_u *)""; 7361 pf = smp[n].sm_rules; 7362 p0 = (vim_strchr(pf, '<') != NULL) ? 1 : 0; 7363 if (p0 == 1 && z == 0) 7364 { 7365 /* rule with '<' is used */ 7366 if (reslen > 0 && *s != NUL && (res[reslen - 1] == c 7367 || res[reslen - 1] == *s)) 7368 reslen--; 7369 z0 = 1; 7370 z = 1; 7371 k0 = 0; 7372 while (*s != NUL && word[i + k0] != NUL) 7373 { 7374 word[i + k0] = *s; 7375 k0++; 7376 s++; 7377 } 7378 if (k > k0) 7379 STRMOVE(word + i + k0, word + i + k); 7380 7381 /* new "actual letter" */ 7382 c = word[i]; 7383 } 7384 else 7385 { 7386 /* no '<' rule used */ 7387 i += k - 1; 7388 z = 0; 7389 while (*s != NUL && s[1] != NUL && reslen < MAXWLEN) 7390 { 7391 if (reslen == 0 || res[reslen - 1] != *s) 7392 res[reslen++] = *s; 7393 s++; 7394 } 7395 /* new "actual letter" */ 7396 c = *s; 7397 if (strstr((char *)pf, "^^") != NULL) 7398 { 7399 if (c != NUL) 7400 res[reslen++] = c; 7401 STRMOVE(word, word + i + 1); 7402 i = 0; 7403 z0 = 1; 7404 } 7405 } 7406 break; 7407 } 7408 } 7409 } 7410 else if (VIM_ISWHITE(c)) 7411 { 7412 c = ' '; 7413 k = 1; 7414 } 7415 7416 if (z0 == 0) 7417 { 7418 if (k && !p0 && reslen < MAXWLEN && c != NUL 7419 && (!slang->sl_collapse || reslen == 0 7420 || res[reslen - 1] != c)) 7421 /* condense only double letters */ 7422 res[reslen++] = c; 7423 7424 i++; 7425 z = 0; 7426 k = 0; 7427 } 7428 } 7429 7430 res[reslen] = NUL; 7431 } 7432 7433 #ifdef FEAT_MBYTE 7434 /* 7435 * Turn "inword" into its sound-a-like equivalent in "res[MAXWLEN]". 7436 * Multi-byte version of spell_soundfold(). 7437 */ 7438 static void 7439 spell_soundfold_wsal(slang_T *slang, char_u *inword, char_u *res) 7440 { 7441 salitem_T *smp = (salitem_T *)slang->sl_sal.ga_data; 7442 int word[MAXWLEN]; 7443 int wres[MAXWLEN]; 7444 int l; 7445 char_u *s; 7446 int *ws; 7447 char_u *t; 7448 int *pf; 7449 int i, j, z; 7450 int reslen; 7451 int n, k = 0; 7452 int z0; 7453 int k0; 7454 int n0; 7455 int c; 7456 int pri; 7457 int p0 = -333; 7458 int c0; 7459 int did_white = FALSE; 7460 int wordlen; 7461 7462 7463 /* 7464 * Convert the multi-byte string to a wide-character string. 7465 * Remove accents, if wanted. We actually remove all non-word characters. 7466 * But keep white space. 7467 */ 7468 wordlen = 0; 7469 for (s = inword; *s != NUL; ) 7470 { 7471 t = s; 7472 c = mb_cptr2char_adv(&s); 7473 if (slang->sl_rem_accents) 7474 { 7475 if (enc_utf8 ? utf_class(c) == 0 : VIM_ISWHITE(c)) 7476 { 7477 if (did_white) 7478 continue; 7479 c = ' '; 7480 did_white = TRUE; 7481 } 7482 else 7483 { 7484 did_white = FALSE; 7485 if (!spell_iswordp_nmw(t, curwin)) 7486 continue; 7487 } 7488 } 7489 word[wordlen++] = c; 7490 } 7491 word[wordlen] = NUL; 7492 7493 /* 7494 * This algorithm comes from Aspell phonet.cpp. 7495 * Converted from C++ to C. Added support for multi-byte chars. 7496 * Changed to keep spaces. 7497 */ 7498 i = reslen = z = 0; 7499 while ((c = word[i]) != NUL) 7500 { 7501 /* Start with the first rule that has the character in the word. */ 7502 n = slang->sl_sal_first[c & 0xff]; 7503 z0 = 0; 7504 7505 if (n >= 0) 7506 { 7507 /* Check all rules for the same index byte. 7508 * If c is 0x300 need extra check for the end of the array, as 7509 * (c & 0xff) is NUL. */ 7510 for (; ((ws = smp[n].sm_lead_w)[0] & 0xff) == (c & 0xff) 7511 && ws[0] != NUL; ++n) 7512 { 7513 /* Quickly skip entries that don't match the word. Most 7514 * entries are less then three chars, optimize for that. */ 7515 if (c != ws[0]) 7516 continue; 7517 k = smp[n].sm_leadlen; 7518 if (k > 1) 7519 { 7520 if (word[i + 1] != ws[1]) 7521 continue; 7522 if (k > 2) 7523 { 7524 for (j = 2; j < k; ++j) 7525 if (word[i + j] != ws[j]) 7526 break; 7527 if (j < k) 7528 continue; 7529 } 7530 } 7531 7532 if ((pf = smp[n].sm_oneof_w) != NULL) 7533 { 7534 /* Check for match with one of the chars in "sm_oneof". */ 7535 while (*pf != NUL && *pf != word[i + k]) 7536 ++pf; 7537 if (*pf == NUL) 7538 continue; 7539 ++k; 7540 } 7541 s = smp[n].sm_rules; 7542 pri = 5; /* default priority */ 7543 7544 p0 = *s; 7545 k0 = k; 7546 while (*s == '-' && k > 1) 7547 { 7548 k--; 7549 s++; 7550 } 7551 if (*s == '<') 7552 s++; 7553 if (VIM_ISDIGIT(*s)) 7554 { 7555 /* determine priority */ 7556 pri = *s - '0'; 7557 s++; 7558 } 7559 if (*s == '^' && *(s + 1) == '^') 7560 s++; 7561 7562 if (*s == NUL 7563 || (*s == '^' 7564 && (i == 0 || !(word[i - 1] == ' ' 7565 || spell_iswordp_w(word + i - 1, curwin))) 7566 && (*(s + 1) != '$' 7567 || (!spell_iswordp_w(word + i + k0, curwin)))) 7568 || (*s == '$' && i > 0 7569 && spell_iswordp_w(word + i - 1, curwin) 7570 && (!spell_iswordp_w(word + i + k0, curwin)))) 7571 { 7572 /* search for followup rules, if: */ 7573 /* followup and k > 1 and NO '-' in searchstring */ 7574 c0 = word[i + k - 1]; 7575 n0 = slang->sl_sal_first[c0 & 0xff]; 7576 7577 if (slang->sl_followup && k > 1 && n0 >= 0 7578 && p0 != '-' && word[i + k] != NUL) 7579 { 7580 /* Test follow-up rule for "word[i + k]"; loop over 7581 * all entries with the same index byte. */ 7582 for ( ; ((ws = smp[n0].sm_lead_w)[0] & 0xff) 7583 == (c0 & 0xff); ++n0) 7584 { 7585 /* Quickly skip entries that don't match the word. 7586 */ 7587 if (c0 != ws[0]) 7588 continue; 7589 k0 = smp[n0].sm_leadlen; 7590 if (k0 > 1) 7591 { 7592 if (word[i + k] != ws[1]) 7593 continue; 7594 if (k0 > 2) 7595 { 7596 pf = word + i + k + 1; 7597 for (j = 2; j < k0; ++j) 7598 if (*pf++ != ws[j]) 7599 break; 7600 if (j < k0) 7601 continue; 7602 } 7603 } 7604 k0 += k - 1; 7605 7606 if ((pf = smp[n0].sm_oneof_w) != NULL) 7607 { 7608 /* Check for match with one of the chars in 7609 * "sm_oneof". */ 7610 while (*pf != NUL && *pf != word[i + k0]) 7611 ++pf; 7612 if (*pf == NUL) 7613 continue; 7614 ++k0; 7615 } 7616 7617 p0 = 5; 7618 s = smp[n0].sm_rules; 7619 while (*s == '-') 7620 { 7621 /* "k0" gets NOT reduced because 7622 * "if (k0 == k)" */ 7623 s++; 7624 } 7625 if (*s == '<') 7626 s++; 7627 if (VIM_ISDIGIT(*s)) 7628 { 7629 p0 = *s - '0'; 7630 s++; 7631 } 7632 7633 if (*s == NUL 7634 /* *s == '^' cuts */ 7635 || (*s == '$' 7636 && !spell_iswordp_w(word + i + k0, 7637 curwin))) 7638 { 7639 if (k0 == k) 7640 /* this is just a piece of the string */ 7641 continue; 7642 7643 if (p0 < pri) 7644 /* priority too low */ 7645 continue; 7646 /* rule fits; stop search */ 7647 break; 7648 } 7649 } 7650 7651 if (p0 >= pri && (smp[n0].sm_lead_w[0] & 0xff) 7652 == (c0 & 0xff)) 7653 continue; 7654 } 7655 7656 /* replace string */ 7657 ws = smp[n].sm_to_w; 7658 s = smp[n].sm_rules; 7659 p0 = (vim_strchr(s, '<') != NULL) ? 1 : 0; 7660 if (p0 == 1 && z == 0) 7661 { 7662 /* rule with '<' is used */ 7663 if (reslen > 0 && ws != NULL && *ws != NUL 7664 && (wres[reslen - 1] == c 7665 || wres[reslen - 1] == *ws)) 7666 reslen--; 7667 z0 = 1; 7668 z = 1; 7669 k0 = 0; 7670 if (ws != NULL) 7671 while (*ws != NUL && word[i + k0] != NUL) 7672 { 7673 word[i + k0] = *ws; 7674 k0++; 7675 ws++; 7676 } 7677 if (k > k0) 7678 mch_memmove(word + i + k0, word + i + k, 7679 sizeof(int) * (wordlen - (i + k) + 1)); 7680 7681 /* new "actual letter" */ 7682 c = word[i]; 7683 } 7684 else 7685 { 7686 /* no '<' rule used */ 7687 i += k - 1; 7688 z = 0; 7689 if (ws != NULL) 7690 while (*ws != NUL && ws[1] != NUL 7691 && reslen < MAXWLEN) 7692 { 7693 if (reslen == 0 || wres[reslen - 1] != *ws) 7694 wres[reslen++] = *ws; 7695 ws++; 7696 } 7697 /* new "actual letter" */ 7698 if (ws == NULL) 7699 c = NUL; 7700 else 7701 c = *ws; 7702 if (strstr((char *)s, "^^") != NULL) 7703 { 7704 if (c != NUL) 7705 wres[reslen++] = c; 7706 mch_memmove(word, word + i + 1, 7707 sizeof(int) * (wordlen - (i + 1) + 1)); 7708 i = 0; 7709 z0 = 1; 7710 } 7711 } 7712 break; 7713 } 7714 } 7715 } 7716 else if (VIM_ISWHITE(c)) 7717 { 7718 c = ' '; 7719 k = 1; 7720 } 7721 7722 if (z0 == 0) 7723 { 7724 if (k && !p0 && reslen < MAXWLEN && c != NUL 7725 && (!slang->sl_collapse || reslen == 0 7726 || wres[reslen - 1] != c)) 7727 /* condense only double letters */ 7728 wres[reslen++] = c; 7729 7730 i++; 7731 z = 0; 7732 k = 0; 7733 } 7734 } 7735 7736 /* Convert wide characters in "wres" to a multi-byte string in "res". */ 7737 l = 0; 7738 for (n = 0; n < reslen; ++n) 7739 { 7740 l += mb_char2bytes(wres[n], res + l); 7741 if (l + MB_MAXBYTES > MAXWLEN) 7742 break; 7743 } 7744 res[l] = NUL; 7745 } 7746 #endif 7747 7748 /* 7749 * Compute a score for two sound-a-like words. 7750 * This permits up to two inserts/deletes/swaps/etc. to keep things fast. 7751 * Instead of a generic loop we write out the code. That keeps it fast by 7752 * avoiding checks that will not be possible. 7753 */ 7754 static int 7755 soundalike_score( 7756 char_u *goodstart, /* sound-folded good word */ 7757 char_u *badstart) /* sound-folded bad word */ 7758 { 7759 char_u *goodsound = goodstart; 7760 char_u *badsound = badstart; 7761 int goodlen; 7762 int badlen; 7763 int n; 7764 char_u *pl, *ps; 7765 char_u *pl2, *ps2; 7766 int score = 0; 7767 7768 /* Adding/inserting "*" at the start (word starts with vowel) shouldn't be 7769 * counted so much, vowels halfway the word aren't counted at all. */ 7770 if ((*badsound == '*' || *goodsound == '*') && *badsound != *goodsound) 7771 { 7772 if ((badsound[0] == NUL && goodsound[1] == NUL) 7773 || (goodsound[0] == NUL && badsound[1] == NUL)) 7774 /* changing word with vowel to word without a sound */ 7775 return SCORE_DEL; 7776 if (badsound[0] == NUL || goodsound[0] == NUL) 7777 /* more than two changes */ 7778 return SCORE_MAXMAX; 7779 7780 if (badsound[1] == goodsound[1] 7781 || (badsound[1] != NUL 7782 && goodsound[1] != NUL 7783 && badsound[2] == goodsound[2])) 7784 { 7785 /* handle like a substitute */ 7786 } 7787 else 7788 { 7789 score = 2 * SCORE_DEL / 3; 7790 if (*badsound == '*') 7791 ++badsound; 7792 else 7793 ++goodsound; 7794 } 7795 } 7796 7797 goodlen = (int)STRLEN(goodsound); 7798 badlen = (int)STRLEN(badsound); 7799 7800 /* Return quickly if the lengths are too different to be fixed by two 7801 * changes. */ 7802 n = goodlen - badlen; 7803 if (n < -2 || n > 2) 7804 return SCORE_MAXMAX; 7805 7806 if (n > 0) 7807 { 7808 pl = goodsound; /* goodsound is longest */ 7809 ps = badsound; 7810 } 7811 else 7812 { 7813 pl = badsound; /* badsound is longest */ 7814 ps = goodsound; 7815 } 7816 7817 /* Skip over the identical part. */ 7818 while (*pl == *ps && *pl != NUL) 7819 { 7820 ++pl; 7821 ++ps; 7822 } 7823 7824 switch (n) 7825 { 7826 case -2: 7827 case 2: 7828 /* 7829 * Must delete two characters from "pl". 7830 */ 7831 ++pl; /* first delete */ 7832 while (*pl == *ps) 7833 { 7834 ++pl; 7835 ++ps; 7836 } 7837 /* strings must be equal after second delete */ 7838 if (STRCMP(pl + 1, ps) == 0) 7839 return score + SCORE_DEL * 2; 7840 7841 /* Failed to compare. */ 7842 break; 7843 7844 case -1: 7845 case 1: 7846 /* 7847 * Minimal one delete from "pl" required. 7848 */ 7849 7850 /* 1: delete */ 7851 pl2 = pl + 1; 7852 ps2 = ps; 7853 while (*pl2 == *ps2) 7854 { 7855 if (*pl2 == NUL) /* reached the end */ 7856 return score + SCORE_DEL; 7857 ++pl2; 7858 ++ps2; 7859 } 7860 7861 /* 2: delete then swap, then rest must be equal */ 7862 if (pl2[0] == ps2[1] && pl2[1] == ps2[0] 7863 && STRCMP(pl2 + 2, ps2 + 2) == 0) 7864 return score + SCORE_DEL + SCORE_SWAP; 7865 7866 /* 3: delete then substitute, then the rest must be equal */ 7867 if (STRCMP(pl2 + 1, ps2 + 1) == 0) 7868 return score + SCORE_DEL + SCORE_SUBST; 7869 7870 /* 4: first swap then delete */ 7871 if (pl[0] == ps[1] && pl[1] == ps[0]) 7872 { 7873 pl2 = pl + 2; /* swap, skip two chars */ 7874 ps2 = ps + 2; 7875 while (*pl2 == *ps2) 7876 { 7877 ++pl2; 7878 ++ps2; 7879 } 7880 /* delete a char and then strings must be equal */ 7881 if (STRCMP(pl2 + 1, ps2) == 0) 7882 return score + SCORE_SWAP + SCORE_DEL; 7883 } 7884 7885 /* 5: first substitute then delete */ 7886 pl2 = pl + 1; /* substitute, skip one char */ 7887 ps2 = ps + 1; 7888 while (*pl2 == *ps2) 7889 { 7890 ++pl2; 7891 ++ps2; 7892 } 7893 /* delete a char and then strings must be equal */ 7894 if (STRCMP(pl2 + 1, ps2) == 0) 7895 return score + SCORE_SUBST + SCORE_DEL; 7896 7897 /* Failed to compare. */ 7898 break; 7899 7900 case 0: 7901 /* 7902 * Lengths are equal, thus changes must result in same length: An 7903 * insert is only possible in combination with a delete. 7904 * 1: check if for identical strings 7905 */ 7906 if (*pl == NUL) 7907 return score; 7908 7909 /* 2: swap */ 7910 if (pl[0] == ps[1] && pl[1] == ps[0]) 7911 { 7912 pl2 = pl + 2; /* swap, skip two chars */ 7913 ps2 = ps + 2; 7914 while (*pl2 == *ps2) 7915 { 7916 if (*pl2 == NUL) /* reached the end */ 7917 return score + SCORE_SWAP; 7918 ++pl2; 7919 ++ps2; 7920 } 7921 /* 3: swap and swap again */ 7922 if (pl2[0] == ps2[1] && pl2[1] == ps2[0] 7923 && STRCMP(pl2 + 2, ps2 + 2) == 0) 7924 return score + SCORE_SWAP + SCORE_SWAP; 7925 7926 /* 4: swap and substitute */ 7927 if (STRCMP(pl2 + 1, ps2 + 1) == 0) 7928 return score + SCORE_SWAP + SCORE_SUBST; 7929 } 7930 7931 /* 5: substitute */ 7932 pl2 = pl + 1; 7933 ps2 = ps + 1; 7934 while (*pl2 == *ps2) 7935 { 7936 if (*pl2 == NUL) /* reached the end */ 7937 return score + SCORE_SUBST; 7938 ++pl2; 7939 ++ps2; 7940 } 7941 7942 /* 6: substitute and swap */ 7943 if (pl2[0] == ps2[1] && pl2[1] == ps2[0] 7944 && STRCMP(pl2 + 2, ps2 + 2) == 0) 7945 return score + SCORE_SUBST + SCORE_SWAP; 7946 7947 /* 7: substitute and substitute */ 7948 if (STRCMP(pl2 + 1, ps2 + 1) == 0) 7949 return score + SCORE_SUBST + SCORE_SUBST; 7950 7951 /* 8: insert then delete */ 7952 pl2 = pl; 7953 ps2 = ps + 1; 7954 while (*pl2 == *ps2) 7955 { 7956 ++pl2; 7957 ++ps2; 7958 } 7959 if (STRCMP(pl2 + 1, ps2) == 0) 7960 return score + SCORE_INS + SCORE_DEL; 7961 7962 /* 9: delete then insert */ 7963 pl2 = pl + 1; 7964 ps2 = ps; 7965 while (*pl2 == *ps2) 7966 { 7967 ++pl2; 7968 ++ps2; 7969 } 7970 if (STRCMP(pl2, ps2 + 1) == 0) 7971 return score + SCORE_INS + SCORE_DEL; 7972 7973 /* Failed to compare. */ 7974 break; 7975 } 7976 7977 return SCORE_MAXMAX; 7978 } 7979 7980 /* 7981 * Compute the "edit distance" to turn "badword" into "goodword". The less 7982 * deletes/inserts/substitutes/swaps are required the lower the score. 7983 * 7984 * The algorithm is described by Du and Chang, 1992. 7985 * The implementation of the algorithm comes from Aspell editdist.cpp, 7986 * edit_distance(). It has been converted from C++ to C and modified to 7987 * support multi-byte characters. 7988 */ 7989 static int 7990 spell_edit_score( 7991 slang_T *slang, 7992 char_u *badword, 7993 char_u *goodword) 7994 { 7995 int *cnt; 7996 int badlen, goodlen; /* lengths including NUL */ 7997 int j, i; 7998 int t; 7999 int bc, gc; 8000 int pbc, pgc; 8001 #ifdef FEAT_MBYTE 8002 char_u *p; 8003 int wbadword[MAXWLEN]; 8004 int wgoodword[MAXWLEN]; 8005 8006 if (has_mbyte) 8007 { 8008 /* Get the characters from the multi-byte strings and put them in an 8009 * int array for easy access. */ 8010 for (p = badword, badlen = 0; *p != NUL; ) 8011 wbadword[badlen++] = mb_cptr2char_adv(&p); 8012 wbadword[badlen++] = 0; 8013 for (p = goodword, goodlen = 0; *p != NUL; ) 8014 wgoodword[goodlen++] = mb_cptr2char_adv(&p); 8015 wgoodword[goodlen++] = 0; 8016 } 8017 else 8018 #endif 8019 { 8020 badlen = (int)STRLEN(badword) + 1; 8021 goodlen = (int)STRLEN(goodword) + 1; 8022 } 8023 8024 /* We use "cnt" as an array: CNT(badword_idx, goodword_idx). */ 8025 #define CNT(a, b) cnt[(a) + (b) * (badlen + 1)] 8026 cnt = (int *)lalloc((long_u)(sizeof(int) * (badlen + 1) * (goodlen + 1)), 8027 TRUE); 8028 if (cnt == NULL) 8029 return 0; /* out of memory */ 8030 8031 CNT(0, 0) = 0; 8032 for (j = 1; j <= goodlen; ++j) 8033 CNT(0, j) = CNT(0, j - 1) + SCORE_INS; 8034 8035 for (i = 1; i <= badlen; ++i) 8036 { 8037 CNT(i, 0) = CNT(i - 1, 0) + SCORE_DEL; 8038 for (j = 1; j <= goodlen; ++j) 8039 { 8040 #ifdef FEAT_MBYTE 8041 if (has_mbyte) 8042 { 8043 bc = wbadword[i - 1]; 8044 gc = wgoodword[j - 1]; 8045 } 8046 else 8047 #endif 8048 { 8049 bc = badword[i - 1]; 8050 gc = goodword[j - 1]; 8051 } 8052 if (bc == gc) 8053 CNT(i, j) = CNT(i - 1, j - 1); 8054 else 8055 { 8056 /* Use a better score when there is only a case difference. */ 8057 if (SPELL_TOFOLD(bc) == SPELL_TOFOLD(gc)) 8058 CNT(i, j) = SCORE_ICASE + CNT(i - 1, j - 1); 8059 else 8060 { 8061 /* For a similar character use SCORE_SIMILAR. */ 8062 if (slang != NULL 8063 && slang->sl_has_map 8064 && similar_chars(slang, gc, bc)) 8065 CNT(i, j) = SCORE_SIMILAR + CNT(i - 1, j - 1); 8066 else 8067 CNT(i, j) = SCORE_SUBST + CNT(i - 1, j - 1); 8068 } 8069 8070 if (i > 1 && j > 1) 8071 { 8072 #ifdef FEAT_MBYTE 8073 if (has_mbyte) 8074 { 8075 pbc = wbadword[i - 2]; 8076 pgc = wgoodword[j - 2]; 8077 } 8078 else 8079 #endif 8080 { 8081 pbc = badword[i - 2]; 8082 pgc = goodword[j - 2]; 8083 } 8084 if (bc == pgc && pbc == gc) 8085 { 8086 t = SCORE_SWAP + CNT(i - 2, j - 2); 8087 if (t < CNT(i, j)) 8088 CNT(i, j) = t; 8089 } 8090 } 8091 t = SCORE_DEL + CNT(i - 1, j); 8092 if (t < CNT(i, j)) 8093 CNT(i, j) = t; 8094 t = SCORE_INS + CNT(i, j - 1); 8095 if (t < CNT(i, j)) 8096 CNT(i, j) = t; 8097 } 8098 } 8099 } 8100 8101 i = CNT(badlen - 1, goodlen - 1); 8102 vim_free(cnt); 8103 return i; 8104 } 8105 8106 typedef struct 8107 { 8108 int badi; 8109 int goodi; 8110 int score; 8111 } limitscore_T; 8112 8113 /* 8114 * Like spell_edit_score(), but with a limit on the score to make it faster. 8115 * May return SCORE_MAXMAX when the score is higher than "limit". 8116 * 8117 * This uses a stack for the edits still to be tried. 8118 * The idea comes from Aspell leditdist.cpp. Rewritten in C and added support 8119 * for multi-byte characters. 8120 */ 8121 static int 8122 spell_edit_score_limit( 8123 slang_T *slang, 8124 char_u *badword, 8125 char_u *goodword, 8126 int limit) 8127 { 8128 limitscore_T stack[10]; /* allow for over 3 * 2 edits */ 8129 int stackidx; 8130 int bi, gi; 8131 int bi2, gi2; 8132 int bc, gc; 8133 int score; 8134 int score_off; 8135 int minscore; 8136 int round; 8137 8138 #ifdef FEAT_MBYTE 8139 /* Multi-byte characters require a bit more work, use a different function 8140 * to avoid testing "has_mbyte" quite often. */ 8141 if (has_mbyte) 8142 return spell_edit_score_limit_w(slang, badword, goodword, limit); 8143 #endif 8144 8145 /* 8146 * The idea is to go from start to end over the words. So long as 8147 * characters are equal just continue, this always gives the lowest score. 8148 * When there is a difference try several alternatives. Each alternative 8149 * increases "score" for the edit distance. Some of the alternatives are 8150 * pushed unto a stack and tried later, some are tried right away. At the 8151 * end of the word the score for one alternative is known. The lowest 8152 * possible score is stored in "minscore". 8153 */ 8154 stackidx = 0; 8155 bi = 0; 8156 gi = 0; 8157 score = 0; 8158 minscore = limit + 1; 8159 8160 for (;;) 8161 { 8162 /* Skip over an equal part, score remains the same. */ 8163 for (;;) 8164 { 8165 bc = badword[bi]; 8166 gc = goodword[gi]; 8167 if (bc != gc) /* stop at a char that's different */ 8168 break; 8169 if (bc == NUL) /* both words end */ 8170 { 8171 if (score < minscore) 8172 minscore = score; 8173 goto pop; /* do next alternative */ 8174 } 8175 ++bi; 8176 ++gi; 8177 } 8178 8179 if (gc == NUL) /* goodword ends, delete badword chars */ 8180 { 8181 do 8182 { 8183 if ((score += SCORE_DEL) >= minscore) 8184 goto pop; /* do next alternative */ 8185 } while (badword[++bi] != NUL); 8186 minscore = score; 8187 } 8188 else if (bc == NUL) /* badword ends, insert badword chars */ 8189 { 8190 do 8191 { 8192 if ((score += SCORE_INS) >= minscore) 8193 goto pop; /* do next alternative */ 8194 } while (goodword[++gi] != NUL); 8195 minscore = score; 8196 } 8197 else /* both words continue */ 8198 { 8199 /* If not close to the limit, perform a change. Only try changes 8200 * that may lead to a lower score than "minscore". 8201 * round 0: try deleting a char from badword 8202 * round 1: try inserting a char in badword */ 8203 for (round = 0; round <= 1; ++round) 8204 { 8205 score_off = score + (round == 0 ? SCORE_DEL : SCORE_INS); 8206 if (score_off < minscore) 8207 { 8208 if (score_off + SCORE_EDIT_MIN >= minscore) 8209 { 8210 /* Near the limit, rest of the words must match. We 8211 * can check that right now, no need to push an item 8212 * onto the stack. */ 8213 bi2 = bi + 1 - round; 8214 gi2 = gi + round; 8215 while (goodword[gi2] == badword[bi2]) 8216 { 8217 if (goodword[gi2] == NUL) 8218 { 8219 minscore = score_off; 8220 break; 8221 } 8222 ++bi2; 8223 ++gi2; 8224 } 8225 } 8226 else 8227 { 8228 /* try deleting/inserting a character later */ 8229 stack[stackidx].badi = bi + 1 - round; 8230 stack[stackidx].goodi = gi + round; 8231 stack[stackidx].score = score_off; 8232 ++stackidx; 8233 } 8234 } 8235 } 8236 8237 if (score + SCORE_SWAP < minscore) 8238 { 8239 /* If swapping two characters makes a match then the 8240 * substitution is more expensive, thus there is no need to 8241 * try both. */ 8242 if (gc == badword[bi + 1] && bc == goodword[gi + 1]) 8243 { 8244 /* Swap two characters, that is: skip them. */ 8245 gi += 2; 8246 bi += 2; 8247 score += SCORE_SWAP; 8248 continue; 8249 } 8250 } 8251 8252 /* Substitute one character for another which is the same 8253 * thing as deleting a character from both goodword and badword. 8254 * Use a better score when there is only a case difference. */ 8255 if (SPELL_TOFOLD(bc) == SPELL_TOFOLD(gc)) 8256 score += SCORE_ICASE; 8257 else 8258 { 8259 /* For a similar character use SCORE_SIMILAR. */ 8260 if (slang != NULL 8261 && slang->sl_has_map 8262 && similar_chars(slang, gc, bc)) 8263 score += SCORE_SIMILAR; 8264 else 8265 score += SCORE_SUBST; 8266 } 8267 8268 if (score < minscore) 8269 { 8270 /* Do the substitution. */ 8271 ++gi; 8272 ++bi; 8273 continue; 8274 } 8275 } 8276 pop: 8277 /* 8278 * Get here to try the next alternative, pop it from the stack. 8279 */ 8280 if (stackidx == 0) /* stack is empty, finished */ 8281 break; 8282 8283 /* pop an item from the stack */ 8284 --stackidx; 8285 gi = stack[stackidx].goodi; 8286 bi = stack[stackidx].badi; 8287 score = stack[stackidx].score; 8288 } 8289 8290 /* When the score goes over "limit" it may actually be much higher. 8291 * Return a very large number to avoid going below the limit when giving a 8292 * bonus. */ 8293 if (minscore > limit) 8294 return SCORE_MAXMAX; 8295 return minscore; 8296 } 8297 8298 #ifdef FEAT_MBYTE 8299 /* 8300 * Multi-byte version of spell_edit_score_limit(). 8301 * Keep it in sync with the above! 8302 */ 8303 static int 8304 spell_edit_score_limit_w( 8305 slang_T *slang, 8306 char_u *badword, 8307 char_u *goodword, 8308 int limit) 8309 { 8310 limitscore_T stack[10]; /* allow for over 3 * 2 edits */ 8311 int stackidx; 8312 int bi, gi; 8313 int bi2, gi2; 8314 int bc, gc; 8315 int score; 8316 int score_off; 8317 int minscore; 8318 int round; 8319 char_u *p; 8320 int wbadword[MAXWLEN]; 8321 int wgoodword[MAXWLEN]; 8322 8323 /* Get the characters from the multi-byte strings and put them in an 8324 * int array for easy access. */ 8325 bi = 0; 8326 for (p = badword; *p != NUL; ) 8327 wbadword[bi++] = mb_cptr2char_adv(&p); 8328 wbadword[bi++] = 0; 8329 gi = 0; 8330 for (p = goodword; *p != NUL; ) 8331 wgoodword[gi++] = mb_cptr2char_adv(&p); 8332 wgoodword[gi++] = 0; 8333 8334 /* 8335 * The idea is to go from start to end over the words. So long as 8336 * characters are equal just continue, this always gives the lowest score. 8337 * When there is a difference try several alternatives. Each alternative 8338 * increases "score" for the edit distance. Some of the alternatives are 8339 * pushed unto a stack and tried later, some are tried right away. At the 8340 * end of the word the score for one alternative is known. The lowest 8341 * possible score is stored in "minscore". 8342 */ 8343 stackidx = 0; 8344 bi = 0; 8345 gi = 0; 8346 score = 0; 8347 minscore = limit + 1; 8348 8349 for (;;) 8350 { 8351 /* Skip over an equal part, score remains the same. */ 8352 for (;;) 8353 { 8354 bc = wbadword[bi]; 8355 gc = wgoodword[gi]; 8356 8357 if (bc != gc) /* stop at a char that's different */ 8358 break; 8359 if (bc == NUL) /* both words end */ 8360 { 8361 if (score < minscore) 8362 minscore = score; 8363 goto pop; /* do next alternative */ 8364 } 8365 ++bi; 8366 ++gi; 8367 } 8368 8369 if (gc == NUL) /* goodword ends, delete badword chars */ 8370 { 8371 do 8372 { 8373 if ((score += SCORE_DEL) >= minscore) 8374 goto pop; /* do next alternative */ 8375 } while (wbadword[++bi] != NUL); 8376 minscore = score; 8377 } 8378 else if (bc == NUL) /* badword ends, insert badword chars */ 8379 { 8380 do 8381 { 8382 if ((score += SCORE_INS) >= minscore) 8383 goto pop; /* do next alternative */ 8384 } while (wgoodword[++gi] != NUL); 8385 minscore = score; 8386 } 8387 else /* both words continue */ 8388 { 8389 /* If not close to the limit, perform a change. Only try changes 8390 * that may lead to a lower score than "minscore". 8391 * round 0: try deleting a char from badword 8392 * round 1: try inserting a char in badword */ 8393 for (round = 0; round <= 1; ++round) 8394 { 8395 score_off = score + (round == 0 ? SCORE_DEL : SCORE_INS); 8396 if (score_off < minscore) 8397 { 8398 if (score_off + SCORE_EDIT_MIN >= minscore) 8399 { 8400 /* Near the limit, rest of the words must match. We 8401 * can check that right now, no need to push an item 8402 * onto the stack. */ 8403 bi2 = bi + 1 - round; 8404 gi2 = gi + round; 8405 while (wgoodword[gi2] == wbadword[bi2]) 8406 { 8407 if (wgoodword[gi2] == NUL) 8408 { 8409 minscore = score_off; 8410 break; 8411 } 8412 ++bi2; 8413 ++gi2; 8414 } 8415 } 8416 else 8417 { 8418 /* try deleting a character from badword later */ 8419 stack[stackidx].badi = bi + 1 - round; 8420 stack[stackidx].goodi = gi + round; 8421 stack[stackidx].score = score_off; 8422 ++stackidx; 8423 } 8424 } 8425 } 8426 8427 if (score + SCORE_SWAP < minscore) 8428 { 8429 /* If swapping two characters makes a match then the 8430 * substitution is more expensive, thus there is no need to 8431 * try both. */ 8432 if (gc == wbadword[bi + 1] && bc == wgoodword[gi + 1]) 8433 { 8434 /* Swap two characters, that is: skip them. */ 8435 gi += 2; 8436 bi += 2; 8437 score += SCORE_SWAP; 8438 continue; 8439 } 8440 } 8441 8442 /* Substitute one character for another which is the same 8443 * thing as deleting a character from both goodword and badword. 8444 * Use a better score when there is only a case difference. */ 8445 if (SPELL_TOFOLD(bc) == SPELL_TOFOLD(gc)) 8446 score += SCORE_ICASE; 8447 else 8448 { 8449 /* For a similar character use SCORE_SIMILAR. */ 8450 if (slang != NULL 8451 && slang->sl_has_map 8452 && similar_chars(slang, gc, bc)) 8453 score += SCORE_SIMILAR; 8454 else 8455 score += SCORE_SUBST; 8456 } 8457 8458 if (score < minscore) 8459 { 8460 /* Do the substitution. */ 8461 ++gi; 8462 ++bi; 8463 continue; 8464 } 8465 } 8466 pop: 8467 /* 8468 * Get here to try the next alternative, pop it from the stack. 8469 */ 8470 if (stackidx == 0) /* stack is empty, finished */ 8471 break; 8472 8473 /* pop an item from the stack */ 8474 --stackidx; 8475 gi = stack[stackidx].goodi; 8476 bi = stack[stackidx].badi; 8477 score = stack[stackidx].score; 8478 } 8479 8480 /* When the score goes over "limit" it may actually be much higher. 8481 * Return a very large number to avoid going below the limit when giving a 8482 * bonus. */ 8483 if (minscore > limit) 8484 return SCORE_MAXMAX; 8485 return minscore; 8486 } 8487 #endif 8488 8489 /* 8490 * ":spellinfo" 8491 */ 8492 void 8493 ex_spellinfo(exarg_T *eap UNUSED) 8494 { 8495 int lpi; 8496 langp_T *lp; 8497 char_u *p; 8498 8499 if (no_spell_checking(curwin)) 8500 return; 8501 8502 msg_start(); 8503 for (lpi = 0; lpi < curwin->w_s->b_langp.ga_len && !got_int; ++lpi) 8504 { 8505 lp = LANGP_ENTRY(curwin->w_s->b_langp, lpi); 8506 msg_puts((char_u *)"file: "); 8507 msg_puts(lp->lp_slang->sl_fname); 8508 msg_putchar('\n'); 8509 p = lp->lp_slang->sl_info; 8510 if (p != NULL) 8511 { 8512 msg_puts(p); 8513 msg_putchar('\n'); 8514 } 8515 } 8516 msg_end(); 8517 } 8518 8519 #define DUMPFLAG_KEEPCASE 1 /* round 2: keep-case tree */ 8520 #define DUMPFLAG_COUNT 2 /* include word count */ 8521 #define DUMPFLAG_ICASE 4 /* ignore case when finding matches */ 8522 #define DUMPFLAG_ONECAP 8 /* pattern starts with capital */ 8523 #define DUMPFLAG_ALLCAP 16 /* pattern is all capitals */ 8524 8525 /* 8526 * ":spelldump" 8527 */ 8528 void 8529 ex_spelldump(exarg_T *eap) 8530 { 8531 char_u *spl; 8532 long dummy; 8533 8534 if (no_spell_checking(curwin)) 8535 return; 8536 get_option_value((char_u*)"spl", &dummy, &spl, OPT_LOCAL); 8537 8538 /* Create a new empty buffer in a new window. */ 8539 do_cmdline_cmd((char_u *)"new"); 8540 8541 /* enable spelling locally in the new window */ 8542 set_option_value((char_u*)"spell", TRUE, (char_u*)"", OPT_LOCAL); 8543 set_option_value((char_u*)"spl", dummy, spl, OPT_LOCAL); 8544 vim_free(spl); 8545 8546 if (!BUFEMPTY()) 8547 return; 8548 8549 spell_dump_compl(NULL, 0, NULL, eap->forceit ? DUMPFLAG_COUNT : 0); 8550 8551 /* Delete the empty line that we started with. */ 8552 if (curbuf->b_ml.ml_line_count > 1) 8553 ml_delete(curbuf->b_ml.ml_line_count, FALSE); 8554 8555 redraw_later(NOT_VALID); 8556 } 8557 8558 /* 8559 * Go through all possible words and: 8560 * 1. When "pat" is NULL: dump a list of all words in the current buffer. 8561 * "ic" and "dir" are not used. 8562 * 2. When "pat" is not NULL: add matching words to insert mode completion. 8563 */ 8564 void 8565 spell_dump_compl( 8566 char_u *pat, /* leading part of the word */ 8567 int ic, /* ignore case */ 8568 int *dir, /* direction for adding matches */ 8569 int dumpflags_arg) /* DUMPFLAG_* */ 8570 { 8571 langp_T *lp; 8572 slang_T *slang; 8573 idx_T arridx[MAXWLEN]; 8574 int curi[MAXWLEN]; 8575 char_u word[MAXWLEN]; 8576 int c; 8577 char_u *byts; 8578 idx_T *idxs; 8579 linenr_T lnum = 0; 8580 int round; 8581 int depth; 8582 int n; 8583 int flags; 8584 char_u *region_names = NULL; /* region names being used */ 8585 int do_region = TRUE; /* dump region names and numbers */ 8586 char_u *p; 8587 int lpi; 8588 int dumpflags = dumpflags_arg; 8589 int patlen; 8590 8591 /* When ignoring case or when the pattern starts with capital pass this on 8592 * to dump_word(). */ 8593 if (pat != NULL) 8594 { 8595 if (ic) 8596 dumpflags |= DUMPFLAG_ICASE; 8597 else 8598 { 8599 n = captype(pat, NULL); 8600 if (n == WF_ONECAP) 8601 dumpflags |= DUMPFLAG_ONECAP; 8602 else if (n == WF_ALLCAP 8603 #ifdef FEAT_MBYTE 8604 && (int)STRLEN(pat) > mb_ptr2len(pat) 8605 #else 8606 && (int)STRLEN(pat) > 1 8607 #endif 8608 ) 8609 dumpflags |= DUMPFLAG_ALLCAP; 8610 } 8611 } 8612 8613 /* Find out if we can support regions: All languages must support the same 8614 * regions or none at all. */ 8615 for (lpi = 0; lpi < curwin->w_s->b_langp.ga_len; ++lpi) 8616 { 8617 lp = LANGP_ENTRY(curwin->w_s->b_langp, lpi); 8618 p = lp->lp_slang->sl_regions; 8619 if (p[0] != 0) 8620 { 8621 if (region_names == NULL) /* first language with regions */ 8622 region_names = p; 8623 else if (STRCMP(region_names, p) != 0) 8624 { 8625 do_region = FALSE; /* region names are different */ 8626 break; 8627 } 8628 } 8629 } 8630 8631 if (do_region && region_names != NULL) 8632 { 8633 if (pat == NULL) 8634 { 8635 vim_snprintf((char *)IObuff, IOSIZE, "/regions=%s", region_names); 8636 ml_append(lnum++, IObuff, (colnr_T)0, FALSE); 8637 } 8638 } 8639 else 8640 do_region = FALSE; 8641 8642 /* 8643 * Loop over all files loaded for the entries in 'spelllang'. 8644 */ 8645 for (lpi = 0; lpi < curwin->w_s->b_langp.ga_len; ++lpi) 8646 { 8647 lp = LANGP_ENTRY(curwin->w_s->b_langp, lpi); 8648 slang = lp->lp_slang; 8649 if (slang->sl_fbyts == NULL) /* reloading failed */ 8650 continue; 8651 8652 if (pat == NULL) 8653 { 8654 vim_snprintf((char *)IObuff, IOSIZE, "# file: %s", slang->sl_fname); 8655 ml_append(lnum++, IObuff, (colnr_T)0, FALSE); 8656 } 8657 8658 /* When matching with a pattern and there are no prefixes only use 8659 * parts of the tree that match "pat". */ 8660 if (pat != NULL && slang->sl_pbyts == NULL) 8661 patlen = (int)STRLEN(pat); 8662 else 8663 patlen = -1; 8664 8665 /* round 1: case-folded tree 8666 * round 2: keep-case tree */ 8667 for (round = 1; round <= 2; ++round) 8668 { 8669 if (round == 1) 8670 { 8671 dumpflags &= ~DUMPFLAG_KEEPCASE; 8672 byts = slang->sl_fbyts; 8673 idxs = slang->sl_fidxs; 8674 } 8675 else 8676 { 8677 dumpflags |= DUMPFLAG_KEEPCASE; 8678 byts = slang->sl_kbyts; 8679 idxs = slang->sl_kidxs; 8680 } 8681 if (byts == NULL) 8682 continue; /* array is empty */ 8683 8684 depth = 0; 8685 arridx[0] = 0; 8686 curi[0] = 1; 8687 while (depth >= 0 && !got_int 8688 && (pat == NULL || !compl_interrupted)) 8689 { 8690 if (curi[depth] > byts[arridx[depth]]) 8691 { 8692 /* Done all bytes at this node, go up one level. */ 8693 --depth; 8694 line_breakcheck(); 8695 ins_compl_check_keys(50, FALSE); 8696 } 8697 else 8698 { 8699 /* Do one more byte at this node. */ 8700 n = arridx[depth] + curi[depth]; 8701 ++curi[depth]; 8702 c = byts[n]; 8703 if (c == 0) 8704 { 8705 /* End of word, deal with the word. 8706 * Don't use keep-case words in the fold-case tree, 8707 * they will appear in the keep-case tree. 8708 * Only use the word when the region matches. */ 8709 flags = (int)idxs[n]; 8710 if ((round == 2 || (flags & WF_KEEPCAP) == 0) 8711 && (flags & WF_NEEDCOMP) == 0 8712 && (do_region 8713 || (flags & WF_REGION) == 0 8714 || (((unsigned)flags >> 16) 8715 & lp->lp_region) != 0)) 8716 { 8717 word[depth] = NUL; 8718 if (!do_region) 8719 flags &= ~WF_REGION; 8720 8721 /* Dump the basic word if there is no prefix or 8722 * when it's the first one. */ 8723 c = (unsigned)flags >> 24; 8724 if (c == 0 || curi[depth] == 2) 8725 { 8726 dump_word(slang, word, pat, dir, 8727 dumpflags, flags, lnum); 8728 if (pat == NULL) 8729 ++lnum; 8730 } 8731 8732 /* Apply the prefix, if there is one. */ 8733 if (c != 0) 8734 lnum = dump_prefixes(slang, word, pat, dir, 8735 dumpflags, flags, lnum); 8736 } 8737 } 8738 else 8739 { 8740 /* Normal char, go one level deeper. */ 8741 word[depth++] = c; 8742 arridx[depth] = idxs[n]; 8743 curi[depth] = 1; 8744 8745 /* Check if this characters matches with the pattern. 8746 * If not skip the whole tree below it. 8747 * Always ignore case here, dump_word() will check 8748 * proper case later. This isn't exactly right when 8749 * length changes for multi-byte characters with 8750 * ignore case... */ 8751 if (depth <= patlen 8752 && MB_STRNICMP(word, pat, depth) != 0) 8753 --depth; 8754 } 8755 } 8756 } 8757 } 8758 } 8759 } 8760 8761 /* 8762 * Dump one word: apply case modifications and append a line to the buffer. 8763 * When "lnum" is zero add insert mode completion. 8764 */ 8765 static void 8766 dump_word( 8767 slang_T *slang, 8768 char_u *word, 8769 char_u *pat, 8770 int *dir, 8771 int dumpflags, 8772 int wordflags, 8773 linenr_T lnum) 8774 { 8775 int keepcap = FALSE; 8776 char_u *p; 8777 char_u *tw; 8778 char_u cword[MAXWLEN]; 8779 char_u badword[MAXWLEN + 10]; 8780 int i; 8781 int flags = wordflags; 8782 8783 if (dumpflags & DUMPFLAG_ONECAP) 8784 flags |= WF_ONECAP; 8785 if (dumpflags & DUMPFLAG_ALLCAP) 8786 flags |= WF_ALLCAP; 8787 8788 if ((dumpflags & DUMPFLAG_KEEPCASE) == 0 && (flags & WF_CAPMASK) != 0) 8789 { 8790 /* Need to fix case according to "flags". */ 8791 make_case_word(word, cword, flags); 8792 p = cword; 8793 } 8794 else 8795 { 8796 p = word; 8797 if ((dumpflags & DUMPFLAG_KEEPCASE) 8798 && ((captype(word, NULL) & WF_KEEPCAP) == 0 8799 || (flags & WF_FIXCAP) != 0)) 8800 keepcap = TRUE; 8801 } 8802 tw = p; 8803 8804 if (pat == NULL) 8805 { 8806 /* Add flags and regions after a slash. */ 8807 if ((flags & (WF_BANNED | WF_RARE | WF_REGION)) || keepcap) 8808 { 8809 STRCPY(badword, p); 8810 STRCAT(badword, "/"); 8811 if (keepcap) 8812 STRCAT(badword, "="); 8813 if (flags & WF_BANNED) 8814 STRCAT(badword, "!"); 8815 else if (flags & WF_RARE) 8816 STRCAT(badword, "?"); 8817 if (flags & WF_REGION) 8818 for (i = 0; i < 7; ++i) 8819 if (flags & (0x10000 << i)) 8820 sprintf((char *)badword + STRLEN(badword), "%d", i + 1); 8821 p = badword; 8822 } 8823 8824 if (dumpflags & DUMPFLAG_COUNT) 8825 { 8826 hashitem_T *hi; 8827 8828 /* Include the word count for ":spelldump!". */ 8829 hi = hash_find(&slang->sl_wordcount, tw); 8830 if (!HASHITEM_EMPTY(hi)) 8831 { 8832 vim_snprintf((char *)IObuff, IOSIZE, "%s\t%d", 8833 tw, HI2WC(hi)->wc_count); 8834 p = IObuff; 8835 } 8836 } 8837 8838 ml_append(lnum, p, (colnr_T)0, FALSE); 8839 } 8840 else if (((dumpflags & DUMPFLAG_ICASE) 8841 ? MB_STRNICMP(p, pat, STRLEN(pat)) == 0 8842 : STRNCMP(p, pat, STRLEN(pat)) == 0) 8843 && ins_compl_add_infercase(p, (int)STRLEN(p), 8844 p_ic, NULL, *dir, 0) == OK) 8845 /* if dir was BACKWARD then honor it just once */ 8846 *dir = FORWARD; 8847 } 8848 8849 /* 8850 * For ":spelldump": Find matching prefixes for "word". Prepend each to 8851 * "word" and append a line to the buffer. 8852 * When "lnum" is zero add insert mode completion. 8853 * Return the updated line number. 8854 */ 8855 static linenr_T 8856 dump_prefixes( 8857 slang_T *slang, 8858 char_u *word, /* case-folded word */ 8859 char_u *pat, 8860 int *dir, 8861 int dumpflags, 8862 int flags, /* flags with prefix ID */ 8863 linenr_T startlnum) 8864 { 8865 idx_T arridx[MAXWLEN]; 8866 int curi[MAXWLEN]; 8867 char_u prefix[MAXWLEN]; 8868 char_u word_up[MAXWLEN]; 8869 int has_word_up = FALSE; 8870 int c; 8871 char_u *byts; 8872 idx_T *idxs; 8873 linenr_T lnum = startlnum; 8874 int depth; 8875 int n; 8876 int len; 8877 int i; 8878 8879 /* If the word starts with a lower-case letter make the word with an 8880 * upper-case letter in word_up[]. */ 8881 c = PTR2CHAR(word); 8882 if (SPELL_TOUPPER(c) != c) 8883 { 8884 onecap_copy(word, word_up, TRUE); 8885 has_word_up = TRUE; 8886 } 8887 8888 byts = slang->sl_pbyts; 8889 idxs = slang->sl_pidxs; 8890 if (byts != NULL) /* array not is empty */ 8891 { 8892 /* 8893 * Loop over all prefixes, building them byte-by-byte in prefix[]. 8894 * When at the end of a prefix check that it supports "flags". 8895 */ 8896 depth = 0; 8897 arridx[0] = 0; 8898 curi[0] = 1; 8899 while (depth >= 0 && !got_int) 8900 { 8901 n = arridx[depth]; 8902 len = byts[n]; 8903 if (curi[depth] > len) 8904 { 8905 /* Done all bytes at this node, go up one level. */ 8906 --depth; 8907 line_breakcheck(); 8908 } 8909 else 8910 { 8911 /* Do one more byte at this node. */ 8912 n += curi[depth]; 8913 ++curi[depth]; 8914 c = byts[n]; 8915 if (c == 0) 8916 { 8917 /* End of prefix, find out how many IDs there are. */ 8918 for (i = 1; i < len; ++i) 8919 if (byts[n + i] != 0) 8920 break; 8921 curi[depth] += i - 1; 8922 8923 c = valid_word_prefix(i, n, flags, word, slang, FALSE); 8924 if (c != 0) 8925 { 8926 vim_strncpy(prefix + depth, word, MAXWLEN - depth - 1); 8927 dump_word(slang, prefix, pat, dir, dumpflags, 8928 (c & WF_RAREPFX) ? (flags | WF_RARE) 8929 : flags, lnum); 8930 if (lnum != 0) 8931 ++lnum; 8932 } 8933 8934 /* Check for prefix that matches the word when the 8935 * first letter is upper-case, but only if the prefix has 8936 * a condition. */ 8937 if (has_word_up) 8938 { 8939 c = valid_word_prefix(i, n, flags, word_up, slang, 8940 TRUE); 8941 if (c != 0) 8942 { 8943 vim_strncpy(prefix + depth, word_up, 8944 MAXWLEN - depth - 1); 8945 dump_word(slang, prefix, pat, dir, dumpflags, 8946 (c & WF_RAREPFX) ? (flags | WF_RARE) 8947 : flags, lnum); 8948 if (lnum != 0) 8949 ++lnum; 8950 } 8951 } 8952 } 8953 else 8954 { 8955 /* Normal char, go one level deeper. */ 8956 prefix[depth++] = c; 8957 arridx[depth] = idxs[n]; 8958 curi[depth] = 1; 8959 } 8960 } 8961 } 8962 } 8963 8964 return lnum; 8965 } 8966 8967 /* 8968 * Move "p" to the end of word "start". 8969 * Uses the spell-checking word characters. 8970 */ 8971 char_u * 8972 spell_to_word_end(char_u *start, win_T *win) 8973 { 8974 char_u *p = start; 8975 8976 while (*p != NUL && spell_iswordp(p, win)) 8977 MB_PTR_ADV(p); 8978 return p; 8979 } 8980 8981 #if defined(FEAT_INS_EXPAND) || defined(PROTO) 8982 /* 8983 * For Insert mode completion CTRL-X s: 8984 * Find start of the word in front of column "startcol". 8985 * We don't check if it is badly spelled, with completion we can only change 8986 * the word in front of the cursor. 8987 * Returns the column number of the word. 8988 */ 8989 int 8990 spell_word_start(int startcol) 8991 { 8992 char_u *line; 8993 char_u *p; 8994 int col = 0; 8995 8996 if (no_spell_checking(curwin)) 8997 return startcol; 8998 8999 /* Find a word character before "startcol". */ 9000 line = ml_get_curline(); 9001 for (p = line + startcol; p > line; ) 9002 { 9003 MB_PTR_BACK(line, p); 9004 if (spell_iswordp_nmw(p, curwin)) 9005 break; 9006 } 9007 9008 /* Go back to start of the word. */ 9009 while (p > line) 9010 { 9011 col = (int)(p - line); 9012 MB_PTR_BACK(line, p); 9013 if (!spell_iswordp(p, curwin)) 9014 break; 9015 col = 0; 9016 } 9017 9018 return col; 9019 } 9020 9021 /* 9022 * Need to check for 'spellcapcheck' now, the word is removed before 9023 * expand_spelling() is called. Therefore the ugly global variable. 9024 */ 9025 static int spell_expand_need_cap; 9026 9027 void 9028 spell_expand_check_cap(colnr_T col) 9029 { 9030 spell_expand_need_cap = check_need_cap(curwin->w_cursor.lnum, col); 9031 } 9032 9033 /* 9034 * Get list of spelling suggestions. 9035 * Used for Insert mode completion CTRL-X ?. 9036 * Returns the number of matches. The matches are in "matchp[]", array of 9037 * allocated strings. 9038 */ 9039 int 9040 expand_spelling( 9041 linenr_T lnum UNUSED, 9042 char_u *pat, 9043 char_u ***matchp) 9044 { 9045 garray_T ga; 9046 9047 spell_suggest_list(&ga, pat, 100, spell_expand_need_cap, TRUE); 9048 *matchp = ga.ga_data; 9049 return ga.ga_len; 9050 } 9051 #endif 9052 9053 #endif /* FEAT_SPELL */ 9054