1 /* vi:set ts=8 sts=4 sw=4: 2 * 3 * VIM - Vi IMproved by Bram Moolenaar 4 * 5 * Do ":help uganda" in Vim to read copying and usage conditions. 6 * Do ":help credits" in Vim to see a list of people who contributed. 7 * See README.txt for an overview of the Vim source code. 8 */ 9 10 /* 11 * spell.c: code for spell checking 12 * 13 * The spell checking mechanism uses a tree (aka trie). Each node in the tree 14 * has a list of bytes that can appear (siblings). For each byte there is a 15 * pointer to the node with the byte that follows in the word (child). 16 * 17 * A NUL byte is used where the word may end. The bytes are sorted, so that 18 * binary searching can be used and the NUL bytes are at the start. The 19 * number of possible bytes is stored before the list of bytes. 20 * 21 * The tree uses two arrays: "byts" stores the characters, "idxs" stores 22 * either the next index or flags. The tree starts at index 0. For example, 23 * to lookup "vi" this sequence is followed: 24 * i = 0 25 * len = byts[i] 26 * n = where "v" appears in byts[i + 1] to byts[i + len] 27 * i = idxs[n] 28 * len = byts[i] 29 * n = where "i" appears in byts[i + 1] to byts[i + len] 30 * i = idxs[n] 31 * len = byts[i] 32 * find that byts[i + 1] is 0, idxs[i + 1] has flags for "vi". 33 * 34 * There are two word trees: one with case-folded words and one with words in 35 * original case. The second one is only used for keep-case words and is 36 * usually small. 37 * 38 * There is one additional tree for when not all prefixes are applied when 39 * generating the .spl file. This tree stores all the possible prefixes, as 40 * if they were words. At each word (prefix) end the prefix nr is stored, the 41 * following word must support this prefix nr. And the condition nr is 42 * stored, used to lookup the condition that the word must match with. 43 * 44 * Thanks to Olaf Seibert for providing an example implementation of this tree 45 * and the compression mechanism. 46 * 47 * Matching involves checking the caps type: Onecap ALLCAP KeepCap. 48 * 49 * Why doesn't Vim use aspell/ispell/myspell/etc.? 50 * See ":help develop-spell". 51 */ 52 53 /* Use SPELL_PRINTTREE for debugging: dump the word tree after adding a word. 54 * Only use it for small word lists! */ 55 #if 0 56 # define SPELL_PRINTTREE 57 #endif 58 59 /* 60 * Use this to adjust the score after finding suggestions, based on the 61 * suggested word sounding like the bad word. This is much faster than doing 62 * it for every possible suggestion. 63 * Disadvantage: When "the" is typed as "hte" it sounds different and goes 64 * down in the list. 65 * Used when 'spellsuggest' is set to "best". 66 */ 67 #define RESCORE(word_score, sound_score) ((3 * word_score + sound_score) / 4) 68 69 /* 70 * Vim spell file format: <HEADER> 71 * <SECTIONS> 72 * <LWORDTREE> 73 * <KWORDTREE> 74 * <PREFIXTREE> 75 * 76 * <HEADER>: <fileID> <versionnr> 77 * 78 * <fileID> 8 bytes "VIMspell" 79 * <versionnr> 1 byte VIMSPELLVERSION 80 * 81 * 82 * Sections make it possible to add information to the .spl file without 83 * making it incompatible with previous versions. There are two kinds of 84 * sections: 85 * 1. Not essential for correct spell checking. E.g. for making suggestions. 86 * These are skipped when not supported. 87 * 2. Optional information, but essential for spell checking when present. 88 * E.g. conditions for affixes. When this section is present but not 89 * supported an error message is given. 90 * 91 * <SECTIONS>: <section> ... <sectionend> 92 * 93 * <section>: <sectionID> <sectionflags> <sectionlen> (section contents) 94 * 95 * <sectionID> 1 byte number from 0 to 254 identifying the section 96 * 97 * <sectionflags> 1 byte SNF_REQUIRED: this section is required for correct 98 * spell checking 99 * 100 * <sectionlen> 4 bytes length of section contents, MSB first 101 * 102 * <sectionend> 1 byte SN_END 103 * 104 * 105 * sectionID == SN_REGION: <regionname> ... 106 * <regionname> 2 bytes Up to 8 region names: ca, au, etc. Lower case. 107 * First <regionname> is region 1. 108 * 109 * sectionID == SN_CHARFLAGS: <charflagslen> <charflags> 110 * <folcharslen> <folchars> 111 * <charflagslen> 1 byte Number of bytes in <charflags> (should be 128). 112 * <charflags> N bytes List of flags (first one is for character 128): 113 * 0x01 word character CF_WORD 114 * 0x02 upper-case character CF_UPPER 115 * <folcharslen> 2 bytes Number of bytes in <folchars>. 116 * <folchars> N bytes Folded characters, first one is for character 128. 117 * 118 * sectionID == SN_MIDWORD: <midword> 119 * <midword> N bytes Characters that are word characters only when used 120 * in the middle of a word. 121 * 122 * sectionID == SN_PREFCOND: <prefcondcnt> <prefcond> ... 123 * <prefcondcnt> 2 bytes Number of <prefcond> items following. 124 * <prefcond> : <condlen> <condstr> 125 * <condlen> 1 byte Length of <condstr>. 126 * <condstr> N bytes Condition for the prefix. 127 * 128 * sectionID == SN_REP: <repcount> <rep> ... 129 * <repcount> 2 bytes number of <rep> items, MSB first. 130 * <rep> : <repfromlen> <repfrom> <reptolen> <repto> 131 * <repfromlen> 1 byte length of <repfrom> 132 * <repfrom> N bytes "from" part of replacement 133 * <reptolen> 1 byte length of <repto> 134 * <repto> N bytes "to" part of replacement 135 * 136 * sectionID == SN_SAL: <salflags> <salcount> <sal> ... 137 * <salflags> 1 byte flags for soundsalike conversion: 138 * SAL_F0LLOWUP 139 * SAL_COLLAPSE 140 * SAL_REM_ACCENTS 141 * <salcount> 2 bytes number of <sal> items following 142 * <sal> : <salfromlen> <salfrom> <saltolen> <salto> 143 * <salfromlen> 1 byte length of <salfrom> 144 * <salfrom> N bytes "from" part of soundsalike 145 * <saltolen> 1 byte length of <salto> 146 * <salto> N bytes "to" part of soundsalike 147 * 148 * sectionID == SN_SOFO: <sofofromlen> <sofofrom> <sofotolen> <sofoto> 149 * <sofofromlen> 2 bytes length of <sofofrom> 150 * <sofofrom> N bytes "from" part of soundfold 151 * <sofotolen> 2 bytes length of <sofoto> 152 * <sofoto> N bytes "to" part of soundfold 153 * 154 * sectionID == SN_MAP: <mapstr> 155 * <mapstr> N bytes String with sequences of similar characters, 156 * separated by slashes. 157 * 158 * sectionID == SN_COMPOUND: <compmax> <compminlen> <compsylmax> <compflags> 159 * <compmax> 1 byte Maximum nr of words in compound word. 160 * <compminlen> 1 byte Minimal word length for compounding. 161 * <compsylmax> 1 byte Maximum nr of syllables in compound word. 162 * <compflags> N bytes Flags from COMPOUNDFLAGS items, separated by 163 * slashes. 164 * 165 * sectionID == SN_NOBREAK: (empty, its presence is enough) 166 * 167 * sectionID == SN_SYLLABLE: <syllable> 168 * <syllable> N bytes String from SYLLABLE item. 169 * 170 * <LWORDTREE>: <wordtree> 171 * 172 * <KWORDTREE>: <wordtree> 173 * 174 * <PREFIXTREE>: <wordtree> 175 * 176 * 177 * <wordtree>: <nodecount> <nodedata> ... 178 * 179 * <nodecount> 4 bytes Number of nodes following. MSB first. 180 * 181 * <nodedata>: <siblingcount> <sibling> ... 182 * 183 * <siblingcount> 1 byte Number of siblings in this node. The siblings 184 * follow in sorted order. 185 * 186 * <sibling>: <byte> [ <nodeidx> <xbyte> 187 * | <flags> [<flags2>] [<region>] [<affixID>] 188 * | [<pflags>] <affixID> <prefcondnr> ] 189 * 190 * <byte> 1 byte Byte value of the sibling. Special cases: 191 * BY_NOFLAGS: End of word without flags and for all 192 * regions. 193 * For PREFIXTREE <affixID> and 194 * <prefcondnr> follow. 195 * BY_FLAGS: End of word, <flags> follow. 196 * For PREFIXTREE <pflags>, <affixID> 197 * and <prefcondnr> follow. 198 * BY_FLAGS2: End of word, <flags> and <flags2> 199 * follow. Not used in PREFIXTREE. 200 * BY_INDEX: Child of sibling is shared, <nodeidx> 201 * and <xbyte> follow. 202 * 203 * <nodeidx> 3 bytes Index of child for this sibling, MSB first. 204 * 205 * <xbyte> 1 byte byte value of the sibling. 206 * 207 * <flags> 1 byte bitmask of: 208 * WF_ALLCAP word must have only capitals 209 * WF_ONECAP first char of word must be capital 210 * WF_KEEPCAP keep-case word 211 * WF_FIXCAP keep-case word, all caps not allowed 212 * WF_RARE rare word 213 * WF_BANNED bad word 214 * WF_REGION <region> follows 215 * WF_AFX <affixID> follows 216 * 217 * <flags2> 1 byte Bitmask of: 218 * WF_HAS_AFF >> 8 word includes affix 219 * WF_NEEDCOMP >> 8 word only valid in compound 220 * 221 * <pflags> 1 byte bitmask of: 222 * WFP_RARE rare prefix 223 * WFP_NC non-combining prefix 224 * WFP_UP letter after prefix made upper case 225 * 226 * <region> 1 byte Bitmask for regions in which word is valid. When 227 * omitted it's valid in all regions. 228 * Lowest bit is for region 1. 229 * 230 * <affixID> 1 byte ID of affix that can be used with this word. In 231 * PREFIXTREE used for the required prefix ID. 232 * 233 * <prefcondnr> 2 bytes Prefix condition number, index in <prefcond> list 234 * from HEADER. 235 * 236 * All text characters are in 'encoding', but stored as single bytes. 237 */ 238 239 #if defined(MSDOS) || defined(WIN16) || defined(WIN32) || defined(_WIN64) 240 # include <io.h> /* for lseek(), must be before vim.h */ 241 #endif 242 243 #include "vim.h" 244 245 #if defined(FEAT_SYN_HL) || defined(PROTO) 246 247 #ifdef HAVE_FCNTL_H 248 # include <fcntl.h> 249 #endif 250 251 #define MAXWLEN 250 /* Assume max. word len is this many bytes. 252 Some places assume a word length fits in a 253 byte, thus it can't be above 255. */ 254 255 /* Type used for indexes in the word tree need to be at least 4 bytes. If int 256 * is 8 bytes we could use something smaller, but what? */ 257 #if SIZEOF_INT > 3 258 typedef int idx_T; 259 #else 260 typedef long idx_T; 261 #endif 262 263 /* Flags used for a word. Only the lowest byte can be used, the region byte 264 * comes above it. */ 265 #define WF_REGION 0x01 /* region byte follows */ 266 #define WF_ONECAP 0x02 /* word with one capital (or all capitals) */ 267 #define WF_ALLCAP 0x04 /* word must be all capitals */ 268 #define WF_RARE 0x08 /* rare word */ 269 #define WF_BANNED 0x10 /* bad word */ 270 #define WF_AFX 0x20 /* affix ID follows */ 271 #define WF_FIXCAP 0x40 /* keep-case word, allcap not allowed */ 272 #define WF_KEEPCAP 0x80 /* keep-case word */ 273 274 /* for <flags2>, shifted up one byte to be used in wn_flags */ 275 #define WF_HAS_AFF 0x0100 /* word includes affix */ 276 #define WF_NEEDCOMP 0x0200 /* word only valid in compound */ 277 278 #define WF_CAPMASK (WF_ONECAP | WF_ALLCAP | WF_KEEPCAP | WF_FIXCAP) 279 280 /* flags for <pflags> */ 281 #define WFP_RARE 0x01 /* rare prefix */ 282 #define WFP_NC 0x02 /* prefix is not combining */ 283 #define WFP_UP 0x04 /* to-upper prefix */ 284 285 /* Flags for postponed prefixes. Must be above affixID (one byte) 286 * and prefcondnr (two bytes). */ 287 #define WF_RAREPFX (WFP_RARE << 24) /* in sl_pidxs: flag for rare 288 * postponed prefix */ 289 #define WF_PFX_NC (WFP_NC << 24) /* in sl_pidxs: flag for non-combining 290 * postponed prefix */ 291 #define WF_PFX_UP (WFP_UP << 24) /* in sl_pidxs: flag for to-upper 292 * postponed prefix */ 293 294 /* Special byte values for <byte>. Some are only used in the tree for 295 * postponed prefixes, some only in the other trees. This is a bit messy... */ 296 #define BY_NOFLAGS 0 /* end of word without flags or region; for 297 * postponed prefix: no <pflags> */ 298 #define BY_INDEX 1 /* child is shared, index follows */ 299 #define BY_FLAGS 2 /* end of word, <flags> byte follows; for 300 * postponed prefix: <pflags> follows */ 301 #define BY_FLAGS2 3 /* end of word, <flags> and <flags2> bytes 302 * follow; never used in prefix tree */ 303 #define BY_SPECIAL BY_FLAGS2 /* highest special byte value */ 304 305 /* Info from "REP" and "SAL" entries in ".aff" file used in si_rep, sl_rep, 306 * and si_sal. Not for sl_sal! 307 * One replacement: from "ft_from" to "ft_to". */ 308 typedef struct fromto_S 309 { 310 char_u *ft_from; 311 char_u *ft_to; 312 } fromto_T; 313 314 /* Info from "SAL" entries in ".aff" file used in sl_sal. 315 * The info is split for quick processing by spell_soundfold(). 316 * Note that "sm_oneof" and "sm_rules" point into sm_lead. */ 317 typedef struct salitem_S 318 { 319 char_u *sm_lead; /* leading letters */ 320 int sm_leadlen; /* length of "sm_lead" */ 321 char_u *sm_oneof; /* letters from () or NULL */ 322 char_u *sm_rules; /* rules like ^, $, priority */ 323 char_u *sm_to; /* replacement. */ 324 #ifdef FEAT_MBYTE 325 int *sm_lead_w; /* wide character copy of "sm_lead" */ 326 int *sm_oneof_w; /* wide character copy of "sm_oneof" */ 327 int *sm_to_w; /* wide character copy of "sm_to" */ 328 #endif 329 } salitem_T; 330 331 #ifdef FEAT_MBYTE 332 typedef int salfirst_T; 333 #else 334 typedef short salfirst_T; 335 #endif 336 337 /* Values for SP_*ERROR are negative, positive values are used by 338 * read_cnt_string(). */ 339 #define SP_TRUNCERROR -1 /* spell file truncated error */ 340 #define SP_FORMERROR -2 /* format error in spell file */ 341 #define SP_OTHERERROR -3 /* other error while reading spell file */ 342 343 /* 344 * Structure used to store words and other info for one language, loaded from 345 * a .spl file. 346 * The main access is through the tree in "sl_fbyts/sl_fidxs", storing the 347 * case-folded words. "sl_kbyts/sl_kidxs" is for keep-case words. 348 * 349 * The "byts" array stores the possible bytes in each tree node, preceded by 350 * the number of possible bytes, sorted on byte value: 351 * <len> <byte1> <byte2> ... 352 * The "idxs" array stores the index of the child node corresponding to the 353 * byte in "byts". 354 * Exception: when the byte is zero, the word may end here and "idxs" holds 355 * the flags, region mask and affixID for the word. There may be several 356 * zeros in sequence for alternative flag/region/affixID combinations. 357 */ 358 typedef struct slang_S slang_T; 359 struct slang_S 360 { 361 slang_T *sl_next; /* next language */ 362 char_u *sl_name; /* language name "en", "en.rare", "nl", etc. */ 363 char_u *sl_fname; /* name of .spl file */ 364 int sl_add; /* TRUE if it's a .add file. */ 365 366 char_u *sl_fbyts; /* case-folded word bytes */ 367 idx_T *sl_fidxs; /* case-folded word indexes */ 368 char_u *sl_kbyts; /* keep-case word bytes */ 369 idx_T *sl_kidxs; /* keep-case word indexes */ 370 char_u *sl_pbyts; /* prefix tree word bytes */ 371 idx_T *sl_pidxs; /* prefix tree word indexes */ 372 373 char_u sl_regions[17]; /* table with up to 8 region names plus NUL */ 374 375 char_u *sl_midword; /* MIDWORD string or NULL */ 376 377 int sl_compmax; /* COMPOUNDMAX (default: MAXWLEN) */ 378 int sl_compminlen; /* COMPOUNDMIN (default: 0) */ 379 int sl_compsylmax; /* COMPOUNDSYLMAX (default: MAXWLEN) */ 380 regprog_T *sl_compprog; /* COMPOUNDFLAGS turned into a regexp progrm 381 * (NULL when no compounding) */ 382 char_u *sl_compstartflags; /* flags for first compound word */ 383 char_u *sl_compallflags; /* all flags for compound words */ 384 char_u sl_nobreak; /* When TRUE: no spaces between words */ 385 char_u *sl_syllable; /* SYLLABLE repeatable chars or NULL */ 386 garray_T sl_syl_items; /* syllable items */ 387 388 int sl_prefixcnt; /* number of items in "sl_prefprog" */ 389 regprog_T **sl_prefprog; /* table with regprogs for prefixes */ 390 391 garray_T sl_rep; /* list of fromto_T entries from REP lines */ 392 short sl_rep_first[256]; /* indexes where byte first appears, -1 if 393 there is none */ 394 garray_T sl_sal; /* list of salitem_T entries from SAL lines */ 395 salfirst_T sl_sal_first[256]; /* indexes where byte first appears, -1 if 396 there is none */ 397 int sl_sofo; /* SOFOFROM and SOFOTO instead of SAL items: 398 * "sl_sal_first" maps chars, when has_mbyte 399 * "sl_sal" is a list of wide char lists. */ 400 int sl_followup; /* SAL followup */ 401 int sl_collapse; /* SAL collapse_result */ 402 int sl_rem_accents; /* SAL remove_accents */ 403 int sl_has_map; /* TRUE if there is a MAP line */ 404 #ifdef FEAT_MBYTE 405 hashtab_T sl_map_hash; /* MAP for multi-byte chars */ 406 int sl_map_array[256]; /* MAP for first 256 chars */ 407 #else 408 char_u sl_map_array[256]; /* MAP for first 256 chars */ 409 #endif 410 }; 411 412 /* First language that is loaded, start of the linked list of loaded 413 * languages. */ 414 static slang_T *first_lang = NULL; 415 416 /* Flags used in .spl file for soundsalike flags. */ 417 #define SAL_F0LLOWUP 1 418 #define SAL_COLLAPSE 2 419 #define SAL_REM_ACCENTS 4 420 421 /* 422 * Structure used in "b_langp", filled from 'spelllang'. 423 */ 424 typedef struct langp_S 425 { 426 slang_T *lp_slang; /* info for this language */ 427 slang_T *lp_sallang; /* language used for sound folding or NULL */ 428 slang_T *lp_replang; /* language used for REP items or NULL */ 429 int lp_region; /* bitmask for region or REGION_ALL */ 430 } langp_T; 431 432 #define LANGP_ENTRY(ga, i) (((langp_T *)(ga).ga_data) + (i)) 433 434 #define REGION_ALL 0xff /* word valid in all regions */ 435 436 #define VIMSPELLMAGIC "VIMspell" /* string at start of Vim spell file */ 437 #define VIMSPELLMAGICL 8 438 #define VIMSPELLVERSION 50 439 440 /* Section IDs. Only renumber them when VIMSPELLVERSION changes! */ 441 #define SN_REGION 0 /* <regionname> section */ 442 #define SN_CHARFLAGS 1 /* charflags section */ 443 #define SN_MIDWORD 2 /* <midword> section */ 444 #define SN_PREFCOND 3 /* <prefcond> section */ 445 #define SN_REP 4 /* REP items section */ 446 #define SN_SAL 5 /* SAL items section */ 447 #define SN_SOFO 6 /* soundfolding section */ 448 #define SN_MAP 7 /* MAP items section */ 449 #define SN_COMPOUND 8 /* compound words section */ 450 #define SN_SYLLABLE 9 /* syllable section */ 451 #define SN_NOBREAK 10 /* NOBREAK section */ 452 #define SN_END 255 /* end of sections */ 453 454 #define SNF_REQUIRED 1 /* <sectionflags>: required section */ 455 456 /* Result values. Lower number is accepted over higher one. */ 457 #define SP_BANNED -1 458 #define SP_OK 0 459 #define SP_RARE 1 460 #define SP_LOCAL 2 461 #define SP_BAD 3 462 463 /* file used for "zG" and "zW" */ 464 static char_u *int_wordlist = NULL; 465 466 /* 467 * Information used when looking for suggestions. 468 */ 469 typedef struct suginfo_S 470 { 471 garray_T su_ga; /* suggestions, contains "suggest_T" */ 472 int su_maxcount; /* max. number of suggestions displayed */ 473 int su_maxscore; /* maximum score for adding to su_ga */ 474 garray_T su_sga; /* like su_ga, sound-folded scoring */ 475 char_u *su_badptr; /* start of bad word in line */ 476 int su_badlen; /* length of detected bad word in line */ 477 int su_badflags; /* caps flags for bad word */ 478 char_u su_badword[MAXWLEN]; /* bad word truncated at su_badlen */ 479 char_u su_fbadword[MAXWLEN]; /* su_badword case-folded */ 480 char_u su_sal_badword[MAXWLEN]; /* su_badword soundfolded */ 481 slang_T *su_slang_first; /* slang_T used for su_sal_badword */ 482 hashtab_T su_banned; /* table with banned words */ 483 slang_T *su_sallang; /* default language for sound folding */ 484 } suginfo_T; 485 486 /* One word suggestion. Used in "si_ga". */ 487 typedef struct suggest_S 488 { 489 char_u *st_word; /* suggested word, allocated string */ 490 int st_orglen; /* length of replaced text */ 491 int st_score; /* lower is better */ 492 int st_altscore; /* used when st_score compares equal */ 493 int st_salscore; /* st_score is for soundalike */ 494 int st_had_bonus; /* bonus already included in score */ 495 slang_T *st_slang; /* language used for sound folding */ 496 } suggest_T; 497 498 #define SUG(ga, i) (((suggest_T *)(ga).ga_data)[i]) 499 500 /* Number of suggestions kept when cleaning up. When rescore_suggestions() is 501 * called the score may change, thus we need to keep more than what is 502 * displayed. */ 503 #define SUG_CLEAN_COUNT(su) ((su)->su_maxcount < 50 ? 50 : (su)->su_maxcount) 504 505 /* Threshold for sorting and cleaning up suggestions. Don't want to keep lots 506 * of suggestions that are not going to be displayed. */ 507 #define SUG_MAX_COUNT(su) ((su)->su_maxcount + 50) 508 509 /* score for various changes */ 510 #define SCORE_SPLIT 149 /* split bad word */ 511 #define SCORE_ICASE 52 /* slightly different case */ 512 #define SCORE_REGION 200 /* word is for different region */ 513 #define SCORE_RARE 180 /* rare word */ 514 #define SCORE_SWAP 90 /* swap two characters */ 515 #define SCORE_SWAP3 110 /* swap two characters in three */ 516 #define SCORE_REP 65 /* REP replacement */ 517 #define SCORE_SUBST 93 /* substitute a character */ 518 #define SCORE_SIMILAR 33 /* substitute a similar character */ 519 #define SCORE_SUBCOMP 33 /* substitute a composing character */ 520 #define SCORE_DEL 94 /* delete a character */ 521 #define SCORE_DELDUP 66 /* delete a duplicated character */ 522 #define SCORE_DELCOMP 28 /* delete a composing character */ 523 #define SCORE_INS 96 /* insert a character */ 524 #define SCORE_INSDUP 67 /* insert a duplicate character */ 525 #define SCORE_INSCOMP 30 /* insert a composing character */ 526 #define SCORE_NONWORD 103 /* change non-word to word char */ 527 528 #define SCORE_FILE 30 /* suggestion from a file */ 529 #define SCORE_MAXINIT 350 /* Initial maximum score: higher == slower. 530 * 350 allows for about three changes. */ 531 532 #define SCORE_BIG SCORE_INS * 3 /* big difference */ 533 #define SCORE_MAXMAX 999999 /* accept any score */ 534 535 /* 536 * Structure to store info for word matching. 537 */ 538 typedef struct matchinf_S 539 { 540 langp_T *mi_lp; /* info for language and region */ 541 542 /* pointers to original text to be checked */ 543 char_u *mi_word; /* start of word being checked */ 544 char_u *mi_end; /* end of matching word so far */ 545 char_u *mi_fend; /* next char to be added to mi_fword */ 546 char_u *mi_cend; /* char after what was used for 547 mi_capflags */ 548 549 /* case-folded text */ 550 char_u mi_fword[MAXWLEN + 1]; /* mi_word case-folded */ 551 int mi_fwordlen; /* nr of valid bytes in mi_fword */ 552 553 /* for when checking word after a prefix */ 554 int mi_prefarridx; /* index in sl_pidxs with list of 555 affixID/condition */ 556 int mi_prefcnt; /* number of entries at mi_prefarridx */ 557 int mi_prefixlen; /* byte length of prefix */ 558 #ifdef FEAT_MBYTE 559 int mi_cprefixlen; /* byte length of prefix in original 560 case */ 561 #else 562 # define mi_cprefixlen mi_prefixlen /* it's the same value */ 563 #endif 564 565 /* for when checking a compound word */ 566 int mi_compoff; /* start of following word offset */ 567 char_u mi_compflags[MAXWLEN]; /* flags for compound words used */ 568 int mi_complen; /* nr of compound words used */ 569 570 /* others */ 571 int mi_result; /* result so far: SP_BAD, SP_OK, etc. */ 572 int mi_capflags; /* WF_ONECAP WF_ALLCAP WF_KEEPCAP */ 573 buf_T *mi_buf; /* buffer being checked */ 574 575 /* for NOBREAK */ 576 int mi_result2; /* "mi_resul" without following word */ 577 char_u *mi_end2; /* "mi_end" without following word */ 578 } matchinf_T; 579 580 /* 581 * The tables used for recognizing word characters according to spelling. 582 * These are only used for the first 256 characters of 'encoding'. 583 */ 584 typedef struct spelltab_S 585 { 586 char_u st_isw[256]; /* flags: is word char */ 587 char_u st_isu[256]; /* flags: is uppercase char */ 588 char_u st_fold[256]; /* chars: folded case */ 589 char_u st_upper[256]; /* chars: upper case */ 590 } spelltab_T; 591 592 static spelltab_T spelltab; 593 static int did_set_spelltab; 594 595 #define CF_WORD 0x01 596 #define CF_UPPER 0x02 597 598 static void clear_spell_chartab __ARGS((spelltab_T *sp)); 599 static int set_spell_finish __ARGS((spelltab_T *new_st)); 600 static int spell_iswordp __ARGS((char_u *p, buf_T *buf)); 601 static int spell_iswordp_nmw __ARGS((char_u *p)); 602 #ifdef FEAT_MBYTE 603 static int spell_iswordp_w __ARGS((int *p, buf_T *buf)); 604 #endif 605 static int write_spell_prefcond __ARGS((FILE *fd, garray_T *gap)); 606 607 /* 608 * For finding suggestions: At each node in the tree these states are tried: 609 */ 610 typedef enum 611 { 612 STATE_START = 0, /* At start of node check for NUL bytes (goodword 613 * ends); if badword ends there is a match, otherwise 614 * try splitting word. */ 615 STATE_NOPREFIX, /* try without prefix */ 616 STATE_SPLITUNDO, /* Undo splitting. */ 617 STATE_ENDNUL, /* Past NUL bytes at start of the node. */ 618 STATE_PLAIN, /* Use each byte of the node. */ 619 STATE_DEL, /* Delete a byte from the bad word. */ 620 STATE_INS, /* Insert a byte in the bad word. */ 621 STATE_SWAP, /* Swap two bytes. */ 622 STATE_UNSWAP, /* Undo swap two characters. */ 623 STATE_SWAP3, /* Swap two characters over three. */ 624 STATE_UNSWAP3, /* Undo Swap two characters over three. */ 625 STATE_UNROT3L, /* Undo rotate three characters left */ 626 STATE_UNROT3R, /* Undo rotate three characters right */ 627 STATE_REP_INI, /* Prepare for using REP items. */ 628 STATE_REP, /* Use matching REP items from the .aff file. */ 629 STATE_REP_UNDO, /* Undo a REP item replacement. */ 630 STATE_FINAL /* End of this node. */ 631 } state_T; 632 633 /* 634 * Struct to keep the state at each level in suggest_try_change(). 635 */ 636 typedef struct trystate_S 637 { 638 state_T ts_state; /* state at this level, STATE_ */ 639 int ts_score; /* score */ 640 idx_T ts_arridx; /* index in tree array, start of node */ 641 short ts_curi; /* index in list of child nodes */ 642 char_u ts_fidx; /* index in fword[], case-folded bad word */ 643 char_u ts_fidxtry; /* ts_fidx at which bytes may be changed */ 644 char_u ts_twordlen; /* valid length of tword[] */ 645 char_u ts_prefixdepth; /* stack depth for end of prefix or 646 * PFD_PREFIXTREE or PFD_NOPREFIX */ 647 char_u ts_flags; /* TSF_ flags */ 648 #ifdef FEAT_MBYTE 649 char_u ts_tcharlen; /* number of bytes in tword character */ 650 char_u ts_tcharidx; /* current byte index in tword character */ 651 char_u ts_isdiff; /* DIFF_ values */ 652 char_u ts_fcharstart; /* index in fword where badword char started */ 653 #endif 654 char_u ts_prewordlen; /* length of word in "preword[]" */ 655 char_u ts_splitoff; /* index in "tword" after last split */ 656 char_u ts_splitfidx; /* "ts_fidx" at word split */ 657 char_u ts_complen; /* nr of compound words used */ 658 char_u ts_compsplit; /* index for "compflags" where word was spit */ 659 char_u ts_save_badflags; /* su_badflags saved here */ 660 } trystate_T; 661 662 /* values for ts_isdiff */ 663 #define DIFF_NONE 0 /* no different byte (yet) */ 664 #define DIFF_YES 1 /* different byte found */ 665 #define DIFF_INSERT 2 /* inserting character */ 666 667 /* values for ts_flags */ 668 #define TSF_PREFIXOK 1 /* already checked that prefix is OK */ 669 #define TSF_DIDSPLIT 2 /* tried split at this point */ 670 671 /* special values ts_prefixdepth */ 672 #define PFD_NOPREFIX 0xff /* not using prefixes */ 673 #define PFD_PREFIXTREE 0xfe /* walking through the prefix tree */ 674 #define PFD_NOTSPECIAL 0xfd /* first value that's not special */ 675 676 /* mode values for find_word */ 677 #define FIND_FOLDWORD 0 /* find word case-folded */ 678 #define FIND_KEEPWORD 1 /* find keep-case word */ 679 #define FIND_PREFIX 2 /* find word after prefix */ 680 #define FIND_COMPOUND 3 /* find case-folded compound word */ 681 #define FIND_KEEPCOMPOUND 4 /* find keep-case compound word */ 682 683 static slang_T *slang_alloc __ARGS((char_u *lang)); 684 static void slang_free __ARGS((slang_T *lp)); 685 static void slang_clear __ARGS((slang_T *lp)); 686 static void find_word __ARGS((matchinf_T *mip, int mode)); 687 static int can_compound __ARGS((slang_T *slang, char_u *word, char_u *flags)); 688 static int valid_word_prefix __ARGS((int totprefcnt, int arridx, int flags, char_u *word, slang_T *slang, int cond_req)); 689 static void find_prefix __ARGS((matchinf_T *mip, int mode)); 690 static int fold_more __ARGS((matchinf_T *mip)); 691 static int spell_valid_case __ARGS((int wordflags, int treeflags)); 692 static int no_spell_checking __ARGS((win_T *wp)); 693 static void spell_load_lang __ARGS((char_u *lang)); 694 static char_u *spell_enc __ARGS((void)); 695 static void int_wordlist_spl __ARGS((char_u *fname)); 696 static void spell_load_cb __ARGS((char_u *fname, void *cookie)); 697 static slang_T *spell_load_file __ARGS((char_u *fname, char_u *lang, slang_T *old_lp, int silent)); 698 static char_u *read_cnt_string __ARGS((FILE *fd, int cnt_bytes, int *lenp)); 699 static char_u *read_string __ARGS((FILE *fd, int cnt)); 700 static int read_region_section __ARGS((FILE *fd, slang_T *slang, int len)); 701 static int read_charflags_section __ARGS((FILE *fd)); 702 static int read_prefcond_section __ARGS((FILE *fd, slang_T *lp)); 703 static int read_rep_section __ARGS((FILE *fd, slang_T *slang)); 704 static int read_sal_section __ARGS((FILE *fd, slang_T *slang)); 705 static int read_sofo_section __ARGS((FILE *fd, slang_T *slang)); 706 static int read_compound __ARGS((FILE *fd, slang_T *slang, int len)); 707 static int byte_in_str __ARGS((char_u *str, int byte)); 708 static int init_syl_tab __ARGS((slang_T *slang)); 709 static int count_syllables __ARGS((slang_T *slang, char_u *word)); 710 static int set_sofo __ARGS((slang_T *lp, char_u *from, char_u *to)); 711 static void set_sal_first __ARGS((slang_T *lp)); 712 #ifdef FEAT_MBYTE 713 static int *mb_str2wide __ARGS((char_u *s)); 714 #endif 715 static idx_T read_tree __ARGS((FILE *fd, char_u *byts, idx_T *idxs, int maxidx, int startidx, int prefixtree, int maxprefcondnr)); 716 static void clear_midword __ARGS((buf_T *buf)); 717 static void use_midword __ARGS((slang_T *lp, buf_T *buf)); 718 static int find_region __ARGS((char_u *rp, char_u *region)); 719 static int captype __ARGS((char_u *word, char_u *end)); 720 static int badword_captype __ARGS((char_u *word, char_u *end)); 721 static void spell_reload_one __ARGS((char_u *fname, int added_word)); 722 static void set_spell_charflags __ARGS((char_u *flags, int cnt, char_u *upp)); 723 static int set_spell_chartab __ARGS((char_u *fol, char_u *low, char_u *upp)); 724 static int spell_casefold __ARGS((char_u *p, int len, char_u *buf, int buflen)); 725 static int check_need_cap __ARGS((linenr_T lnum, colnr_T col)); 726 static void spell_find_suggest __ARGS((char_u *badptr, suginfo_T *su, int maxcount, int banbadword, int need_cap)); 727 #ifdef FEAT_EVAL 728 static void spell_suggest_expr __ARGS((suginfo_T *su, char_u *expr)); 729 #endif 730 static void spell_suggest_file __ARGS((suginfo_T *su, char_u *fname)); 731 static void spell_suggest_intern __ARGS((suginfo_T *su)); 732 static void spell_find_cleanup __ARGS((suginfo_T *su)); 733 static void onecap_copy __ARGS((char_u *word, char_u *wcopy, int upper)); 734 static void allcap_copy __ARGS((char_u *word, char_u *wcopy)); 735 static void suggest_try_special __ARGS((suginfo_T *su)); 736 static void suggest_try_change __ARGS((suginfo_T *su)); 737 static int try_deeper __ARGS((suginfo_T *su, trystate_T *stack, int depth, int score_add)); 738 #ifdef FEAT_MBYTE 739 static int nofold_len __ARGS((char_u *fword, int flen, char_u *word)); 740 #endif 741 static void find_keepcap_word __ARGS((slang_T *slang, char_u *fword, char_u *kword)); 742 static void score_comp_sal __ARGS((suginfo_T *su)); 743 static void score_combine __ARGS((suginfo_T *su)); 744 static int stp_sal_score __ARGS((suggest_T *stp, suginfo_T *su, slang_T *slang, char_u *badsound)); 745 static void suggest_try_soundalike __ARGS((suginfo_T *su)); 746 static void make_case_word __ARGS((char_u *fword, char_u *cword, int flags)); 747 static void set_map_str __ARGS((slang_T *lp, char_u *map)); 748 static int similar_chars __ARGS((slang_T *slang, int c1, int c2)); 749 static void add_suggestion __ARGS((suginfo_T *su, garray_T *gap, char_u *goodword, int badlen, int score, int altscore, int had_bonus, slang_T *slang)); 750 static void add_banned __ARGS((suginfo_T *su, char_u *word)); 751 static int was_banned __ARGS((suginfo_T *su, char_u *word)); 752 static void free_banned __ARGS((suginfo_T *su)); 753 static void rescore_suggestions __ARGS((suginfo_T *su)); 754 static void rescore_one __ARGS((suginfo_T *su, suggest_T *stp)); 755 static int cleanup_suggestions __ARGS((garray_T *gap, int maxscore, int keep)); 756 static void spell_soundfold __ARGS((slang_T *slang, char_u *inword, int folded, char_u *res)); 757 static void spell_soundfold_sofo __ARGS((slang_T *slang, char_u *inword, char_u *res)); 758 static void spell_soundfold_sal __ARGS((slang_T *slang, char_u *inword, char_u *res)); 759 #ifdef FEAT_MBYTE 760 static void spell_soundfold_wsal __ARGS((slang_T *slang, char_u *inword, char_u *res)); 761 #endif 762 static int soundalike_score __ARGS((char_u *goodsound, char_u *badsound)); 763 static int spell_edit_score __ARGS((char_u *badword, char_u *goodword)); 764 static void dump_word __ARGS((char_u *word, int round, int flags, linenr_T lnum)); 765 static linenr_T dump_prefixes __ARGS((slang_T *slang, char_u *word, int round, int flags, linenr_T startlnum)); 766 767 /* 768 * Use our own character-case definitions, because the current locale may 769 * differ from what the .spl file uses. 770 * These must not be called with negative number! 771 */ 772 #ifndef FEAT_MBYTE 773 /* Non-multi-byte implementation. */ 774 # define SPELL_TOFOLD(c) ((c) < 256 ? spelltab.st_fold[c] : (c)) 775 # define SPELL_TOUPPER(c) ((c) < 256 ? spelltab.st_upper[c] : (c)) 776 # define SPELL_ISUPPER(c) ((c) < 256 ? spelltab.st_isu[c] : FALSE) 777 #else 778 # if defined(HAVE_WCHAR_H) 779 # include <wchar.h> /* for towupper() and towlower() */ 780 # endif 781 /* Multi-byte implementation. For Unicode we can call utf_*(), but don't do 782 * that for ASCII, because we don't want to use 'casemap' here. Otherwise use 783 * the "w" library function for characters above 255 if available. */ 784 # ifdef HAVE_TOWLOWER 785 # define SPELL_TOFOLD(c) (enc_utf8 && (c) >= 128 ? utf_fold(c) \ 786 : (c) < 256 ? spelltab.st_fold[c] : towlower(c)) 787 # else 788 # define SPELL_TOFOLD(c) (enc_utf8 && (c) >= 128 ? utf_fold(c) \ 789 : (c) < 256 ? spelltab.st_fold[c] : (c)) 790 # endif 791 792 # ifdef HAVE_TOWUPPER 793 # define SPELL_TOUPPER(c) (enc_utf8 && (c) >= 128 ? utf_toupper(c) \ 794 : (c) < 256 ? spelltab.st_upper[c] : towupper(c)) 795 # else 796 # define SPELL_TOUPPER(c) (enc_utf8 && (c) >= 128 ? utf_toupper(c) \ 797 : (c) < 256 ? spelltab.st_upper[c] : (c)) 798 # endif 799 800 # ifdef HAVE_ISWUPPER 801 # define SPELL_ISUPPER(c) (enc_utf8 && (c) >= 128 ? utf_isupper(c) \ 802 : (c) < 256 ? spelltab.st_isu[c] : iswupper(c)) 803 # else 804 # define SPELL_ISUPPER(c) (enc_utf8 && (c) >= 128 ? utf_isupper(c) \ 805 : (c) < 256 ? spelltab.st_isu[c] : (FALSE)) 806 # endif 807 #endif 808 809 810 static char *e_format = N_("E759: Format error in spell file"); 811 static char *e_spell_trunc = N_("E758: Truncated spell file"); 812 static char *e_afftrailing = N_("Trailing text in %s line %d: %s"); 813 static char *e_affname = N_("Affix name too long in %s line %d: %s"); 814 static char *e_affform = N_("E761: Format error in affix file FOL, LOW or UPP"); 815 static char *e_affrange = N_("E762: Character in FOL, LOW or UPP is out of range"); 816 static char *msg_compressing = N_("Compressing word tree..."); 817 818 /* 819 * Main spell-checking function. 820 * "ptr" points to a character that could be the start of a word. 821 * "*attrp" is set to the highlight index for a badly spelled word. For a 822 * non-word or when it's OK it remains unchanged. 823 * This must only be called when 'spelllang' is not empty. 824 * 825 * "capcol" is used to check for a Capitalised word after the end of a 826 * sentence. If it's zero then perform the check. Return the column where to 827 * check next, or -1 when no sentence end was found. If it's NULL then don't 828 * worry. 829 * 830 * Returns the length of the word in bytes, also when it's OK, so that the 831 * caller can skip over the word. 832 */ 833 int 834 spell_check(wp, ptr, attrp, capcol) 835 win_T *wp; /* current window */ 836 char_u *ptr; 837 hlf_T *attrp; 838 int *capcol; /* column to check for Capital */ 839 { 840 matchinf_T mi; /* Most things are put in "mi" so that it can 841 be passed to functions quickly. */ 842 int nrlen = 0; /* found a number first */ 843 int c; 844 int wrongcaplen = 0; 845 int lpi; 846 847 /* A word never starts at a space or a control character. Return quickly 848 * then, skipping over the character. */ 849 if (*ptr <= ' ') 850 return 1; 851 vim_memset(&mi, 0, sizeof(matchinf_T)); 852 853 /* A number is always OK. Also skip hexadecimal numbers 0xFF99 and 854 * 0X99FF. But when a word character follows do check spelling to find 855 * "3GPP". */ 856 if (*ptr >= '0' && *ptr <= '9') 857 { 858 if (*ptr == '0' && (ptr[1] == 'x' || ptr[1] == 'X')) 859 mi.mi_end = skiphex(ptr + 2); 860 else 861 { 862 mi.mi_end = skipdigits(ptr); 863 nrlen = mi.mi_end - ptr; 864 } 865 if (!spell_iswordp(mi.mi_end, wp->w_buffer)) 866 return (int)(mi.mi_end - ptr); 867 868 /* Try including the digits in the word. */ 869 mi.mi_fend = ptr + nrlen; 870 } 871 else 872 mi.mi_fend = ptr; 873 874 /* Find the normal end of the word (until the next non-word character). */ 875 mi.mi_word = ptr; 876 if (spell_iswordp(mi.mi_fend, wp->w_buffer)) 877 { 878 do 879 { 880 mb_ptr_adv(mi.mi_fend); 881 } while (*mi.mi_fend != NUL && spell_iswordp(mi.mi_fend, wp->w_buffer)); 882 883 if (capcol != NULL && *capcol == 0 && wp->w_buffer->b_cap_prog != NULL) 884 { 885 /* Check word starting with capital letter. */ 886 c = PTR2CHAR(ptr); 887 if (!SPELL_ISUPPER(c)) 888 wrongcaplen = (int)(mi.mi_fend - ptr); 889 } 890 } 891 if (capcol != NULL) 892 *capcol = -1; 893 894 /* We always use the characters up to the next non-word character, 895 * also for bad words. */ 896 mi.mi_end = mi.mi_fend; 897 898 /* Check caps type later. */ 899 mi.mi_buf = wp->w_buffer; 900 901 /* case-fold the word with one non-word character, so that we can check 902 * for the word end. */ 903 if (*mi.mi_fend != NUL) 904 mb_ptr_adv(mi.mi_fend); 905 906 (void)spell_casefold(ptr, (int)(mi.mi_fend - ptr), mi.mi_fword, 907 MAXWLEN + 1); 908 mi.mi_fwordlen = STRLEN(mi.mi_fword); 909 910 /* The word is bad unless we recognize it. */ 911 mi.mi_result = SP_BAD; 912 mi.mi_result2 = SP_BAD; 913 914 /* 915 * Loop over the languages specified in 'spelllang'. 916 * We check them all, because a matching word may be longer than an 917 * already found matching word. 918 */ 919 for (lpi = 0; lpi < wp->w_buffer->b_langp.ga_len; ++lpi) 920 { 921 mi.mi_lp = LANGP_ENTRY(wp->w_buffer->b_langp, lpi); 922 923 /* If reloading fails the language is still in the list but everything 924 * has been cleared. */ 925 if (mi.mi_lp->lp_slang->sl_fidxs == NULL) 926 continue; 927 928 /* Check for a matching word in case-folded words. */ 929 find_word(&mi, FIND_FOLDWORD); 930 931 /* Check for a matching word in keep-case words. */ 932 find_word(&mi, FIND_KEEPWORD); 933 934 /* Check for matching prefixes. */ 935 find_prefix(&mi, FIND_FOLDWORD); 936 937 /* For a NOBREAK language, may want to use a word without a following 938 * word as a backup. */ 939 if (mi.mi_lp->lp_slang->sl_nobreak && mi.mi_result == SP_BAD 940 && mi.mi_result2 != SP_BAD) 941 { 942 mi.mi_result = mi.mi_result2; 943 mi.mi_end = mi.mi_end2; 944 } 945 } 946 947 if (mi.mi_result != SP_OK) 948 { 949 /* If we found a number skip over it. Allows for "42nd". Do flag 950 * rare and local words, e.g., "3GPP". */ 951 if (nrlen > 0) 952 { 953 if (mi.mi_result == SP_BAD || mi.mi_result == SP_BANNED) 954 return nrlen; 955 } 956 957 /* When we are at a non-word character there is no error, just 958 * skip over the character (try looking for a word after it). */ 959 else if (!spell_iswordp_nmw(ptr)) 960 { 961 if (capcol != NULL && wp->w_buffer->b_cap_prog != NULL) 962 { 963 regmatch_T regmatch; 964 965 /* Check for end of sentence. */ 966 regmatch.regprog = wp->w_buffer->b_cap_prog; 967 regmatch.rm_ic = FALSE; 968 if (vim_regexec(®match, ptr, 0)) 969 *capcol = (int)(regmatch.endp[0] - ptr); 970 } 971 972 #ifdef FEAT_MBYTE 973 if (has_mbyte) 974 return (*mb_ptr2len)(ptr); 975 #endif 976 return 1; 977 } 978 else if (mi.mi_end == ptr) 979 /* Always include at least one character. Required for when there 980 * is a mixup in "midword". */ 981 mb_ptr_adv(mi.mi_end); 982 else if (mi.mi_result == SP_BAD 983 && LANGP_ENTRY(wp->w_buffer->b_langp, 0)->lp_slang->sl_nobreak) 984 { 985 char_u *p, *fp; 986 int save_result = mi.mi_result; 987 988 /* First language in 'spelllang' is NOBREAK. Find first position 989 * at which any word would be valid. */ 990 mi.mi_lp = LANGP_ENTRY(wp->w_buffer->b_langp, 0); 991 if (mi.mi_lp->lp_slang->sl_fidxs != NULL) 992 { 993 p = mi.mi_word; 994 fp = mi.mi_fword; 995 for (;;) 996 { 997 mb_ptr_adv(p); 998 mb_ptr_adv(fp); 999 if (p >= mi.mi_end) 1000 break; 1001 mi.mi_compoff = fp - mi.mi_fword; 1002 find_word(&mi, FIND_COMPOUND); 1003 if (mi.mi_result != SP_BAD) 1004 { 1005 mi.mi_end = p; 1006 break; 1007 } 1008 } 1009 mi.mi_result = save_result; 1010 } 1011 } 1012 1013 if (mi.mi_result == SP_BAD || mi.mi_result == SP_BANNED) 1014 *attrp = HLF_SPB; 1015 else if (mi.mi_result == SP_RARE) 1016 *attrp = HLF_SPR; 1017 else 1018 *attrp = HLF_SPL; 1019 } 1020 1021 if (wrongcaplen > 0 && (mi.mi_result == SP_OK || mi.mi_result == SP_RARE)) 1022 { 1023 /* Report SpellCap only when the word isn't badly spelled. */ 1024 *attrp = HLF_SPC; 1025 return wrongcaplen; 1026 } 1027 1028 return (int)(mi.mi_end - ptr); 1029 } 1030 1031 /* 1032 * Check if the word at "mip->mi_word" is in the tree. 1033 * When "mode" is FIND_FOLDWORD check in fold-case word tree. 1034 * When "mode" is FIND_KEEPWORD check in keep-case word tree. 1035 * When "mode" is FIND_PREFIX check for word after prefix in fold-case word 1036 * tree. 1037 * 1038 * For a match mip->mi_result is updated. 1039 */ 1040 static void 1041 find_word(mip, mode) 1042 matchinf_T *mip; 1043 int mode; 1044 { 1045 idx_T arridx = 0; 1046 int endlen[MAXWLEN]; /* length at possible word endings */ 1047 idx_T endidx[MAXWLEN]; /* possible word endings */ 1048 int endidxcnt = 0; 1049 int len; 1050 int wlen = 0; 1051 int flen; 1052 int c; 1053 char_u *ptr; 1054 idx_T lo, hi, m; 1055 #ifdef FEAT_MBYTE 1056 char_u *s; 1057 #endif 1058 char_u *p; 1059 int res = SP_BAD; 1060 slang_T *slang = mip->mi_lp->lp_slang; 1061 unsigned flags; 1062 char_u *byts; 1063 idx_T *idxs; 1064 int word_ends; 1065 int prefix_found; 1066 int nobreak_result; 1067 1068 if (mode == FIND_KEEPWORD || mode == FIND_KEEPCOMPOUND) 1069 { 1070 /* Check for word with matching case in keep-case tree. */ 1071 ptr = mip->mi_word; 1072 flen = 9999; /* no case folding, always enough bytes */ 1073 byts = slang->sl_kbyts; 1074 idxs = slang->sl_kidxs; 1075 1076 if (mode == FIND_KEEPCOMPOUND) 1077 /* Skip over the previously found word(s). */ 1078 wlen += mip->mi_compoff; 1079 } 1080 else 1081 { 1082 /* Check for case-folded in case-folded tree. */ 1083 ptr = mip->mi_fword; 1084 flen = mip->mi_fwordlen; /* available case-folded bytes */ 1085 byts = slang->sl_fbyts; 1086 idxs = slang->sl_fidxs; 1087 1088 if (mode == FIND_PREFIX) 1089 { 1090 /* Skip over the prefix. */ 1091 wlen = mip->mi_prefixlen; 1092 flen -= mip->mi_prefixlen; 1093 } 1094 else if (mode == FIND_COMPOUND) 1095 { 1096 /* Skip over the previously found word(s). */ 1097 wlen = mip->mi_compoff; 1098 flen -= mip->mi_compoff; 1099 } 1100 1101 } 1102 1103 if (byts == NULL) 1104 return; /* array is empty */ 1105 1106 /* 1107 * Repeat advancing in the tree until: 1108 * - there is a byte that doesn't match, 1109 * - we reach the end of the tree, 1110 * - or we reach the end of the line. 1111 */ 1112 for (;;) 1113 { 1114 if (flen <= 0 && *mip->mi_fend != NUL) 1115 flen = fold_more(mip); 1116 1117 len = byts[arridx++]; 1118 1119 /* If the first possible byte is a zero the word could end here. 1120 * Remember this index, we first check for the longest word. */ 1121 if (byts[arridx] == 0) 1122 { 1123 if (endidxcnt == MAXWLEN) 1124 { 1125 /* Must be a corrupted spell file. */ 1126 EMSG(_(e_format)); 1127 return; 1128 } 1129 endlen[endidxcnt] = wlen; 1130 endidx[endidxcnt++] = arridx++; 1131 --len; 1132 1133 /* Skip over the zeros, there can be several flag/region 1134 * combinations. */ 1135 while (len > 0 && byts[arridx] == 0) 1136 { 1137 ++arridx; 1138 --len; 1139 } 1140 if (len == 0) 1141 break; /* no children, word must end here */ 1142 } 1143 1144 /* Stop looking at end of the line. */ 1145 if (ptr[wlen] == NUL) 1146 break; 1147 1148 /* Perform a binary search in the list of accepted bytes. */ 1149 c = ptr[wlen]; 1150 if (c == TAB) /* <Tab> is handled like <Space> */ 1151 c = ' '; 1152 lo = arridx; 1153 hi = arridx + len - 1; 1154 while (lo < hi) 1155 { 1156 m = (lo + hi) / 2; 1157 if (byts[m] > c) 1158 hi = m - 1; 1159 else if (byts[m] < c) 1160 lo = m + 1; 1161 else 1162 { 1163 lo = hi = m; 1164 break; 1165 } 1166 } 1167 1168 /* Stop if there is no matching byte. */ 1169 if (hi < lo || byts[lo] != c) 1170 break; 1171 1172 /* Continue at the child (if there is one). */ 1173 arridx = idxs[lo]; 1174 ++wlen; 1175 --flen; 1176 1177 /* One space in the good word may stand for several spaces in the 1178 * checked word. */ 1179 if (c == ' ') 1180 { 1181 for (;;) 1182 { 1183 if (flen <= 0 && *mip->mi_fend != NUL) 1184 flen = fold_more(mip); 1185 if (ptr[wlen] != ' ' && ptr[wlen] != TAB) 1186 break; 1187 ++wlen; 1188 --flen; 1189 } 1190 } 1191 } 1192 1193 /* 1194 * Verify that one of the possible endings is valid. Try the longest 1195 * first. 1196 */ 1197 while (endidxcnt > 0) 1198 { 1199 --endidxcnt; 1200 arridx = endidx[endidxcnt]; 1201 wlen = endlen[endidxcnt]; 1202 1203 #ifdef FEAT_MBYTE 1204 if ((*mb_head_off)(ptr, ptr + wlen) > 0) 1205 continue; /* not at first byte of character */ 1206 #endif 1207 if (spell_iswordp(ptr + wlen, mip->mi_buf)) 1208 { 1209 if (slang->sl_compprog == NULL && !slang->sl_nobreak) 1210 continue; /* next char is a word character */ 1211 word_ends = FALSE; 1212 } 1213 else 1214 word_ends = TRUE; 1215 /* The prefix flag is before compound flags. Once a valid prefix flag 1216 * has been found we try compound flags. */ 1217 prefix_found = FALSE; 1218 1219 #ifdef FEAT_MBYTE 1220 if (mode != FIND_KEEPWORD && has_mbyte) 1221 { 1222 /* Compute byte length in original word, length may change 1223 * when folding case. This can be slow, take a shortcut when the 1224 * case-folded word is equal to the keep-case word. */ 1225 p = mip->mi_word; 1226 if (STRNCMP(ptr, p, wlen) != 0) 1227 { 1228 for (s = ptr; s < ptr + wlen; mb_ptr_adv(s)) 1229 mb_ptr_adv(p); 1230 wlen = p - mip->mi_word; 1231 } 1232 } 1233 #endif 1234 1235 /* Check flags and region. For FIND_PREFIX check the condition and 1236 * prefix ID. 1237 * Repeat this if there are more flags/region alternatives until there 1238 * is a match. */ 1239 res = SP_BAD; 1240 for (len = byts[arridx - 1]; len > 0 && byts[arridx] == 0; 1241 --len, ++arridx) 1242 { 1243 flags = idxs[arridx]; 1244 1245 /* For the fold-case tree check that the case of the checked word 1246 * matches with what the word in the tree requires. 1247 * For keep-case tree the case is always right. For prefixes we 1248 * don't bother to check. */ 1249 if (mode == FIND_FOLDWORD) 1250 { 1251 if (mip->mi_cend != mip->mi_word + wlen) 1252 { 1253 /* mi_capflags was set for a different word length, need 1254 * to do it again. */ 1255 mip->mi_cend = mip->mi_word + wlen; 1256 mip->mi_capflags = captype(mip->mi_word, mip->mi_cend); 1257 } 1258 1259 if (mip->mi_capflags == WF_KEEPCAP 1260 || !spell_valid_case(mip->mi_capflags, flags)) 1261 continue; 1262 } 1263 1264 /* When mode is FIND_PREFIX the word must support the prefix: 1265 * check the prefix ID and the condition. Do that for the list at 1266 * mip->mi_prefarridx that find_prefix() filled. */ 1267 else if (mode == FIND_PREFIX && !prefix_found) 1268 { 1269 c = valid_word_prefix(mip->mi_prefcnt, mip->mi_prefarridx, 1270 flags, 1271 mip->mi_word + mip->mi_cprefixlen, slang, 1272 FALSE); 1273 if (c == 0) 1274 continue; 1275 1276 /* Use the WF_RARE flag for a rare prefix. */ 1277 if (c & WF_RAREPFX) 1278 flags |= WF_RARE; 1279 prefix_found = TRUE; 1280 } 1281 1282 if (slang->sl_nobreak) 1283 { 1284 if ((mode == FIND_COMPOUND || mode == FIND_KEEPCOMPOUND) 1285 && (flags & WF_BANNED) == 0) 1286 { 1287 /* NOBREAK: found a valid following word. That's all we 1288 * need to know, so return. */ 1289 mip->mi_result = SP_OK; 1290 break; 1291 } 1292 } 1293 1294 else if ((mode == FIND_COMPOUND || mode == FIND_KEEPCOMPOUND 1295 || !word_ends)) 1296 { 1297 /* If there is no flag or the word is shorter than 1298 * COMPOUNDMIN reject it quickly. 1299 * Makes you wonder why someone puts a compound flag on a word 1300 * that's too short... Myspell compatibility requires this 1301 * anyway. */ 1302 if (((unsigned)flags >> 24) == 0 1303 || wlen - mip->mi_compoff < slang->sl_compminlen) 1304 continue; 1305 #ifdef FEAT_MBYTE 1306 /* For multi-byte chars check character length against 1307 * COMPOUNDMIN. */ 1308 if (has_mbyte 1309 && slang->sl_compminlen > 0 1310 && mb_charlen_len(mip->mi_word + mip->mi_compoff, 1311 wlen - mip->mi_compoff) < slang->sl_compminlen) 1312 continue; 1313 #endif 1314 1315 /* Limit the number of compound words to COMPOUNDMAX if no 1316 * maximum for syllables is specified. */ 1317 if (!word_ends && mip->mi_complen + 2 > slang->sl_compmax 1318 && slang->sl_compsylmax == MAXWLEN) 1319 continue; 1320 1321 /* Quickly check if compounding is possible with this flag. */ 1322 if (!byte_in_str(mip->mi_complen == 0 1323 ? slang->sl_compstartflags 1324 : slang->sl_compallflags, 1325 ((unsigned)flags >> 24))) 1326 continue; 1327 1328 if (mode == FIND_COMPOUND) 1329 { 1330 int capflags; 1331 1332 /* Need to check the caps type of the appended compound 1333 * word. */ 1334 #ifdef FEAT_MBYTE 1335 if (has_mbyte && STRNCMP(ptr, mip->mi_word, 1336 mip->mi_compoff) != 0) 1337 { 1338 /* case folding may have changed the length */ 1339 p = mip->mi_word; 1340 for (s = ptr; s < ptr + mip->mi_compoff; mb_ptr_adv(s)) 1341 mb_ptr_adv(p); 1342 } 1343 else 1344 #endif 1345 p = mip->mi_word + mip->mi_compoff; 1346 capflags = captype(p, mip->mi_word + wlen); 1347 if (capflags == WF_KEEPCAP || (capflags == WF_ALLCAP 1348 && (flags & WF_FIXCAP) != 0)) 1349 continue; 1350 1351 if (capflags != WF_ALLCAP) 1352 { 1353 /* When the character before the word is a word 1354 * character we do not accept a Onecap word. We do 1355 * accept a no-caps word, even when the dictionary 1356 * word specifies ONECAP. */ 1357 mb_ptr_back(mip->mi_word, p); 1358 if (spell_iswordp_nmw(p) 1359 ? capflags == WF_ONECAP 1360 : (flags & WF_ONECAP) != 0 1361 && capflags != WF_ONECAP) 1362 continue; 1363 } 1364 } 1365 1366 /* If the word ends the sequence of compound flags of the 1367 * words must match with one of the COMPOUNDFLAGS items and 1368 * the number of syllables must not be too large. */ 1369 mip->mi_compflags[mip->mi_complen] = ((unsigned)flags >> 24); 1370 mip->mi_compflags[mip->mi_complen + 1] = NUL; 1371 if (word_ends) 1372 { 1373 char_u fword[MAXWLEN]; 1374 1375 if (slang->sl_compsylmax < MAXWLEN) 1376 { 1377 /* "fword" is only needed for checking syllables. */ 1378 if (ptr == mip->mi_word) 1379 (void)spell_casefold(ptr, wlen, fword, MAXWLEN); 1380 else 1381 vim_strncpy(fword, ptr, endlen[endidxcnt]); 1382 } 1383 if (!can_compound(slang, fword, mip->mi_compflags)) 1384 continue; 1385 } 1386 } 1387 1388 /* Check NEEDCOMPOUND: can't use word without compounding. */ 1389 else if (flags & WF_NEEDCOMP) 1390 continue; 1391 1392 nobreak_result = SP_OK; 1393 1394 if (!word_ends) 1395 { 1396 int save_result = mip->mi_result; 1397 char_u *save_end = mip->mi_end; 1398 langp_T *save_lp = mip->mi_lp; 1399 int lpi; 1400 1401 /* Check that a valid word follows. If there is one and we 1402 * are compounding, it will set "mi_result", thus we are 1403 * always finished here. For NOBREAK we only check that a 1404 * valid word follows. 1405 * Recursive! */ 1406 if (slang->sl_nobreak) 1407 mip->mi_result = SP_BAD; 1408 1409 /* Find following word in case-folded tree. */ 1410 mip->mi_compoff = endlen[endidxcnt]; 1411 #ifdef FEAT_MBYTE 1412 if (has_mbyte && mode == FIND_KEEPWORD) 1413 { 1414 /* Compute byte length in case-folded word from "wlen": 1415 * byte length in keep-case word. Length may change when 1416 * folding case. This can be slow, take a shortcut when 1417 * the case-folded word is equal to the keep-case word. */ 1418 p = mip->mi_fword; 1419 if (STRNCMP(ptr, p, wlen) != 0) 1420 { 1421 for (s = ptr; s < ptr + wlen; mb_ptr_adv(s)) 1422 mb_ptr_adv(p); 1423 mip->mi_compoff = p - mip->mi_fword; 1424 } 1425 } 1426 #endif 1427 c = mip->mi_compoff; 1428 ++mip->mi_complen; 1429 1430 /* For NOBREAK we need to try all NOBREAK languages, at least 1431 * to find the ".add" file(s). */ 1432 for (lpi = 0; lpi < mip->mi_buf->b_langp.ga_len; ++lpi) 1433 { 1434 if (slang->sl_nobreak) 1435 { 1436 mip->mi_lp = LANGP_ENTRY(mip->mi_buf->b_langp, lpi); 1437 if (mip->mi_lp->lp_slang->sl_fidxs == NULL 1438 || !mip->mi_lp->lp_slang->sl_nobreak) 1439 continue; 1440 } 1441 1442 find_word(mip, FIND_COMPOUND); 1443 1444 /* When NOBREAK any word that matches is OK. Otherwise we 1445 * need to find the longest match, thus try with keep-case 1446 * and prefix too. */ 1447 if (!slang->sl_nobreak || mip->mi_result == SP_BAD) 1448 { 1449 /* Find following word in keep-case tree. */ 1450 mip->mi_compoff = wlen; 1451 find_word(mip, FIND_KEEPCOMPOUND); 1452 1453 if (!slang->sl_nobreak || mip->mi_result == SP_BAD) 1454 { 1455 /* Check for following word with prefix. */ 1456 mip->mi_compoff = c; 1457 find_prefix(mip, FIND_COMPOUND); 1458 } 1459 } 1460 1461 if (!slang->sl_nobreak) 1462 break; 1463 } 1464 --mip->mi_complen; 1465 mip->mi_lp = save_lp; 1466 1467 if (slang->sl_nobreak) 1468 { 1469 nobreak_result = mip->mi_result; 1470 mip->mi_result = save_result; 1471 mip->mi_end = save_end; 1472 } 1473 else 1474 { 1475 if (mip->mi_result == SP_OK) 1476 break; 1477 continue; 1478 } 1479 } 1480 1481 if (flags & WF_BANNED) 1482 res = SP_BANNED; 1483 else if (flags & WF_REGION) 1484 { 1485 /* Check region. */ 1486 if ((mip->mi_lp->lp_region & (flags >> 16)) != 0) 1487 res = SP_OK; 1488 else 1489 res = SP_LOCAL; 1490 } 1491 else if (flags & WF_RARE) 1492 res = SP_RARE; 1493 else 1494 res = SP_OK; 1495 1496 /* Always use the longest match and the best result. For NOBREAK 1497 * we separately keep the longest match without a following good 1498 * word as a fall-back. */ 1499 if (nobreak_result == SP_BAD) 1500 { 1501 if (mip->mi_result2 > res) 1502 { 1503 mip->mi_result2 = res; 1504 mip->mi_end2 = mip->mi_word + wlen; 1505 } 1506 else if (mip->mi_result2 == res 1507 && mip->mi_end2 < mip->mi_word + wlen) 1508 mip->mi_end2 = mip->mi_word + wlen; 1509 } 1510 else if (mip->mi_result > res) 1511 { 1512 mip->mi_result = res; 1513 mip->mi_end = mip->mi_word + wlen; 1514 } 1515 else if (mip->mi_result == res && mip->mi_end < mip->mi_word + wlen) 1516 mip->mi_end = mip->mi_word + wlen; 1517 1518 if (mip->mi_result == SP_OK) 1519 break; 1520 } 1521 1522 if (mip->mi_result == SP_OK) 1523 break; 1524 } 1525 } 1526 1527 /* 1528 * Return TRUE if "flags" is a valid sequence of compound flags and 1529 * "word[len]" does not have too many syllables. 1530 */ 1531 static int 1532 can_compound(slang, word, flags) 1533 slang_T *slang; 1534 char_u *word; 1535 char_u *flags; 1536 { 1537 regmatch_T regmatch; 1538 #ifdef FEAT_MBYTE 1539 char_u uflags[MAXWLEN * 2]; 1540 int i; 1541 #endif 1542 char_u *p; 1543 1544 if (slang->sl_compprog == NULL) 1545 return FALSE; 1546 #ifdef FEAT_MBYTE 1547 if (enc_utf8) 1548 { 1549 /* Need to convert the single byte flags to utf8 characters. */ 1550 p = uflags; 1551 for (i = 0; flags[i] != NUL; ++i) 1552 p += mb_char2bytes(flags[i], p); 1553 *p = NUL; 1554 p = uflags; 1555 } 1556 else 1557 #endif 1558 p = flags; 1559 regmatch.regprog = slang->sl_compprog; 1560 regmatch.rm_ic = FALSE; 1561 if (!vim_regexec(®match, p, 0)) 1562 return FALSE; 1563 1564 /* Count the number of syllables. This may be slow, do it last. If there 1565 * are too many syllables AND the number of compound words is above 1566 * COMPOUNDMAX then compounding is not allowed. */ 1567 if (slang->sl_compsylmax < MAXWLEN 1568 && count_syllables(slang, word) > slang->sl_compsylmax) 1569 return (int)STRLEN(flags) < slang->sl_compmax; 1570 return TRUE; 1571 } 1572 1573 /* 1574 * Return non-zero if the prefix indicated by "arridx" matches with the prefix 1575 * ID in "flags" for the word "word". 1576 * The WF_RAREPFX flag is included in the return value for a rare prefix. 1577 */ 1578 static int 1579 valid_word_prefix(totprefcnt, arridx, flags, word, slang, cond_req) 1580 int totprefcnt; /* nr of prefix IDs */ 1581 int arridx; /* idx in sl_pidxs[] */ 1582 int flags; 1583 char_u *word; 1584 slang_T *slang; 1585 int cond_req; /* only use prefixes with a condition */ 1586 { 1587 int prefcnt; 1588 int pidx; 1589 regprog_T *rp; 1590 regmatch_T regmatch; 1591 int prefid; 1592 1593 prefid = (unsigned)flags >> 24; 1594 for (prefcnt = totprefcnt - 1; prefcnt >= 0; --prefcnt) 1595 { 1596 pidx = slang->sl_pidxs[arridx + prefcnt]; 1597 1598 /* Check the prefix ID. */ 1599 if (prefid != (pidx & 0xff)) 1600 continue; 1601 1602 /* Check if the prefix doesn't combine and the word already has a 1603 * suffix. */ 1604 if ((flags & WF_HAS_AFF) && (pidx & WF_PFX_NC)) 1605 continue; 1606 1607 /* Check the condition, if there is one. The condition index is 1608 * stored in the two bytes above the prefix ID byte. */ 1609 rp = slang->sl_prefprog[((unsigned)pidx >> 8) & 0xffff]; 1610 if (rp != NULL) 1611 { 1612 regmatch.regprog = rp; 1613 regmatch.rm_ic = FALSE; 1614 if (!vim_regexec(®match, word, 0)) 1615 continue; 1616 } 1617 else if (cond_req) 1618 continue; 1619 1620 /* It's a match! Return the WF_ flags. */ 1621 return pidx; 1622 } 1623 return 0; 1624 } 1625 1626 /* 1627 * Check if the word at "mip->mi_word" has a matching prefix. 1628 * If it does, then check the following word. 1629 * 1630 * If "mode" is "FIND_COMPOUND" then do the same after another word, find a 1631 * prefix in a compound word. 1632 * 1633 * For a match mip->mi_result is updated. 1634 */ 1635 static void 1636 find_prefix(mip, mode) 1637 matchinf_T *mip; 1638 int mode; 1639 { 1640 idx_T arridx = 0; 1641 int len; 1642 int wlen = 0; 1643 int flen; 1644 int c; 1645 char_u *ptr; 1646 idx_T lo, hi, m; 1647 slang_T *slang = mip->mi_lp->lp_slang; 1648 char_u *byts; 1649 idx_T *idxs; 1650 1651 byts = slang->sl_pbyts; 1652 if (byts == NULL) 1653 return; /* array is empty */ 1654 1655 /* We use the case-folded word here, since prefixes are always 1656 * case-folded. */ 1657 ptr = mip->mi_fword; 1658 flen = mip->mi_fwordlen; /* available case-folded bytes */ 1659 if (mode == FIND_COMPOUND) 1660 { 1661 /* Skip over the previously found word(s). */ 1662 ptr += mip->mi_compoff; 1663 flen -= mip->mi_compoff; 1664 } 1665 idxs = slang->sl_pidxs; 1666 1667 /* 1668 * Repeat advancing in the tree until: 1669 * - there is a byte that doesn't match, 1670 * - we reach the end of the tree, 1671 * - or we reach the end of the line. 1672 */ 1673 for (;;) 1674 { 1675 if (flen == 0 && *mip->mi_fend != NUL) 1676 flen = fold_more(mip); 1677 1678 len = byts[arridx++]; 1679 1680 /* If the first possible byte is a zero the prefix could end here. 1681 * Check if the following word matches and supports the prefix. */ 1682 if (byts[arridx] == 0) 1683 { 1684 /* There can be several prefixes with different conditions. We 1685 * try them all, since we don't know which one will give the 1686 * longest match. The word is the same each time, pass the list 1687 * of possible prefixes to find_word(). */ 1688 mip->mi_prefarridx = arridx; 1689 mip->mi_prefcnt = len; 1690 while (len > 0 && byts[arridx] == 0) 1691 { 1692 ++arridx; 1693 --len; 1694 } 1695 mip->mi_prefcnt -= len; 1696 1697 /* Find the word that comes after the prefix. */ 1698 mip->mi_prefixlen = wlen; 1699 if (mode == FIND_COMPOUND) 1700 /* Skip over the previously found word(s). */ 1701 mip->mi_prefixlen += mip->mi_compoff; 1702 1703 #ifdef FEAT_MBYTE 1704 if (has_mbyte) 1705 { 1706 /* Case-folded length may differ from original length. */ 1707 mip->mi_cprefixlen = nofold_len(mip->mi_fword, 1708 mip->mi_prefixlen, mip->mi_word); 1709 } 1710 else 1711 mip->mi_cprefixlen = mip->mi_prefixlen; 1712 #endif 1713 find_word(mip, FIND_PREFIX); 1714 1715 1716 if (len == 0) 1717 break; /* no children, word must end here */ 1718 } 1719 1720 /* Stop looking at end of the line. */ 1721 if (ptr[wlen] == NUL) 1722 break; 1723 1724 /* Perform a binary search in the list of accepted bytes. */ 1725 c = ptr[wlen]; 1726 lo = arridx; 1727 hi = arridx + len - 1; 1728 while (lo < hi) 1729 { 1730 m = (lo + hi) / 2; 1731 if (byts[m] > c) 1732 hi = m - 1; 1733 else if (byts[m] < c) 1734 lo = m + 1; 1735 else 1736 { 1737 lo = hi = m; 1738 break; 1739 } 1740 } 1741 1742 /* Stop if there is no matching byte. */ 1743 if (hi < lo || byts[lo] != c) 1744 break; 1745 1746 /* Continue at the child (if there is one). */ 1747 arridx = idxs[lo]; 1748 ++wlen; 1749 --flen; 1750 } 1751 } 1752 1753 /* 1754 * Need to fold at least one more character. Do until next non-word character 1755 * for efficiency. 1756 * Return the length of the folded chars in bytes. 1757 */ 1758 static int 1759 fold_more(mip) 1760 matchinf_T *mip; 1761 { 1762 int flen; 1763 char_u *p; 1764 1765 p = mip->mi_fend; 1766 do 1767 { 1768 mb_ptr_adv(mip->mi_fend); 1769 } while (*mip->mi_fend != NUL && spell_iswordp(mip->mi_fend, mip->mi_buf)); 1770 1771 /* Include the non-word character so that we can check for the 1772 * word end. */ 1773 if (*mip->mi_fend != NUL) 1774 mb_ptr_adv(mip->mi_fend); 1775 1776 (void)spell_casefold(p, (int)(mip->mi_fend - p), 1777 mip->mi_fword + mip->mi_fwordlen, 1778 MAXWLEN - mip->mi_fwordlen); 1779 flen = STRLEN(mip->mi_fword + mip->mi_fwordlen); 1780 mip->mi_fwordlen += flen; 1781 return flen; 1782 } 1783 1784 /* 1785 * Check case flags for a word. Return TRUE if the word has the requested 1786 * case. 1787 */ 1788 static int 1789 spell_valid_case(wordflags, treeflags) 1790 int wordflags; /* flags for the checked word. */ 1791 int treeflags; /* flags for the word in the spell tree */ 1792 { 1793 return ((wordflags == WF_ALLCAP && (treeflags & WF_FIXCAP) == 0) 1794 || ((treeflags & (WF_ALLCAP | WF_KEEPCAP)) == 0 1795 && ((treeflags & WF_ONECAP) == 0 1796 || (wordflags & WF_ONECAP) != 0))); 1797 } 1798 1799 /* 1800 * Return TRUE if spell checking is not enabled. 1801 */ 1802 static int 1803 no_spell_checking(wp) 1804 win_T *wp; 1805 { 1806 if (!wp->w_p_spell || *wp->w_buffer->b_p_spl == NUL) 1807 { 1808 EMSG(_("E756: Spell checking is not enabled")); 1809 return TRUE; 1810 } 1811 return FALSE; 1812 } 1813 1814 /* 1815 * Move to next spell error. 1816 * "curline" is FALSE for "[s", "]s", "[S" and "]S". 1817 * "curline" is TRUE to find word under/after cursor in the same line. 1818 * For Insert mode completion "dir" is BACKWARD and "curline" is TRUE: move 1819 * to after badly spelled word before the cursor. 1820 * Return 0 if not found, length of the badly spelled word otherwise. 1821 */ 1822 int 1823 spell_move_to(wp, dir, allwords, curline, attrp) 1824 win_T *wp; 1825 int dir; /* FORWARD or BACKWARD */ 1826 int allwords; /* TRUE for "[s"/"]s", FALSE for "[S"/"]S" */ 1827 int curline; 1828 hlf_T *attrp; /* return: attributes of bad word or NULL 1829 (only when "dir" is FORWARD) */ 1830 { 1831 linenr_T lnum; 1832 pos_T found_pos; 1833 int found_len = 0; 1834 char_u *line; 1835 char_u *p; 1836 char_u *endp; 1837 hlf_T attr; 1838 int len; 1839 int has_syntax = syntax_present(wp->w_buffer); 1840 int col; 1841 int can_spell; 1842 char_u *buf = NULL; 1843 int buflen = 0; 1844 int skip = 0; 1845 int capcol = -1; 1846 int found_one = FALSE; 1847 int wrapped = FALSE; 1848 1849 if (no_spell_checking(wp)) 1850 return 0; 1851 1852 /* 1853 * Start looking for bad word at the start of the line, because we can't 1854 * start halfway a word, we don't know where the it starts or ends. 1855 * 1856 * When searching backwards, we continue in the line to find the last 1857 * bad word (in the cursor line: before the cursor). 1858 * 1859 * We concatenate the start of the next line, so that wrapped words work 1860 * (e.g. "et<line-break>cetera"). Doesn't work when searching backwards 1861 * though... 1862 */ 1863 lnum = wp->w_cursor.lnum; 1864 found_pos.lnum = 0; 1865 1866 while (!got_int) 1867 { 1868 line = ml_get_buf(wp->w_buffer, lnum, FALSE); 1869 1870 len = STRLEN(line); 1871 if (buflen < len + MAXWLEN + 2) 1872 { 1873 vim_free(buf); 1874 buflen = len + MAXWLEN + 2; 1875 buf = alloc(buflen); 1876 if (buf == NULL) 1877 break; 1878 } 1879 1880 /* In first line check first word for Capital. */ 1881 if (lnum == 1) 1882 capcol = 0; 1883 1884 /* For checking first word with a capital skip white space. */ 1885 if (capcol == 0) 1886 capcol = skipwhite(line) - line; 1887 1888 /* Copy the line into "buf" and append the start of the next line if 1889 * possible. */ 1890 STRCPY(buf, line); 1891 if (lnum < wp->w_buffer->b_ml.ml_line_count) 1892 spell_cat_line(buf + STRLEN(buf), ml_get(lnum + 1), MAXWLEN); 1893 1894 p = buf + skip; 1895 endp = buf + len; 1896 while (p < endp) 1897 { 1898 /* When searching backward don't search after the cursor. Unless 1899 * we wrapped around the end of the buffer. */ 1900 if (dir == BACKWARD 1901 && lnum == wp->w_cursor.lnum 1902 && !wrapped 1903 && (colnr_T)(p - buf) >= wp->w_cursor.col) 1904 break; 1905 1906 /* start of word */ 1907 attr = HLF_COUNT; 1908 len = spell_check(wp, p, &attr, &capcol); 1909 1910 if (attr != HLF_COUNT) 1911 { 1912 /* We found a bad word. Check the attribute. */ 1913 if (allwords || attr == HLF_SPB) 1914 { 1915 found_one = TRUE; 1916 1917 /* When searching forward only accept a bad word after 1918 * the cursor. */ 1919 if (dir == BACKWARD 1920 || lnum != wp->w_cursor.lnum 1921 || (lnum == wp->w_cursor.lnum 1922 && (wrapped 1923 || (colnr_T)(curline ? p - buf + len 1924 : p - buf) 1925 > wp->w_cursor.col))) 1926 { 1927 if (has_syntax) 1928 { 1929 col = p - buf; 1930 (void)syn_get_id(wp, lnum, (colnr_T)col, 1931 FALSE, &can_spell); 1932 } 1933 else 1934 can_spell = TRUE; 1935 1936 if (can_spell) 1937 { 1938 found_pos.lnum = lnum; 1939 found_pos.col = p - buf; 1940 #ifdef FEAT_VIRTUALEDIT 1941 found_pos.coladd = 0; 1942 #endif 1943 if (dir == FORWARD) 1944 { 1945 /* No need to search further. */ 1946 wp->w_cursor = found_pos; 1947 vim_free(buf); 1948 if (attrp != NULL) 1949 *attrp = attr; 1950 return len; 1951 } 1952 else if (curline) 1953 /* Insert mode completion: put cursor after 1954 * the bad word. */ 1955 found_pos.col += len; 1956 found_len = len; 1957 } 1958 } 1959 } 1960 } 1961 1962 /* advance to character after the word */ 1963 p += len; 1964 capcol -= len; 1965 } 1966 1967 if (dir == BACKWARD && found_pos.lnum != 0) 1968 { 1969 /* Use the last match in the line (before the cursor). */ 1970 wp->w_cursor = found_pos; 1971 vim_free(buf); 1972 return found_len; 1973 } 1974 1975 if (curline) 1976 break; /* only check cursor line */ 1977 1978 /* Advance to next line. */ 1979 if (dir == BACKWARD) 1980 { 1981 /* If we are back at the starting line and searched it again there 1982 * is no match, give up. */ 1983 if (lnum == wp->w_cursor.lnum && wrapped) 1984 break; 1985 1986 if (lnum > 1) 1987 --lnum; 1988 else if (!p_ws) 1989 break; /* at first line and 'nowrapscan' */ 1990 else 1991 { 1992 /* Wrap around to the end of the buffer. May search the 1993 * starting line again and accept the last match. */ 1994 lnum = wp->w_buffer->b_ml.ml_line_count; 1995 wrapped = TRUE; 1996 if (!shortmess(SHM_SEARCH)) 1997 give_warning((char_u *)_(top_bot_msg), TRUE); 1998 } 1999 capcol = -1; 2000 } 2001 else 2002 { 2003 if (lnum < wp->w_buffer->b_ml.ml_line_count) 2004 ++lnum; 2005 else if (!p_ws) 2006 break; /* at first line and 'nowrapscan' */ 2007 else 2008 { 2009 /* Wrap around to the start of the buffer. May search the 2010 * starting line again and accept the first match. */ 2011 lnum = 1; 2012 wrapped = TRUE; 2013 if (!shortmess(SHM_SEARCH)) 2014 give_warning((char_u *)_(bot_top_msg), TRUE); 2015 } 2016 2017 /* If we are back at the starting line and there is no match then 2018 * give up. */ 2019 if (lnum == wp->w_cursor.lnum && !found_one) 2020 break; 2021 2022 /* Skip the characters at the start of the next line that were 2023 * included in a match crossing line boundaries. */ 2024 if (attr == HLF_COUNT) 2025 skip = p - endp; 2026 else 2027 skip = 0; 2028 2029 /* Capscol skips over the inserted space. */ 2030 --capcol; 2031 2032 /* But after empty line check first word in next line */ 2033 if (*skipwhite(line) == NUL) 2034 capcol = 0; 2035 } 2036 2037 line_breakcheck(); 2038 } 2039 2040 vim_free(buf); 2041 return 0; 2042 } 2043 2044 /* 2045 * For spell checking: concatenate the start of the following line "line" into 2046 * "buf", blanking-out special characters. Copy less then "maxlen" bytes. 2047 */ 2048 void 2049 spell_cat_line(buf, line, maxlen) 2050 char_u *buf; 2051 char_u *line; 2052 int maxlen; 2053 { 2054 char_u *p; 2055 int n; 2056 2057 p = skipwhite(line); 2058 while (vim_strchr((char_u *)"*#/\"\t", *p) != NULL) 2059 p = skipwhite(p + 1); 2060 2061 if (*p != NUL) 2062 { 2063 *buf = ' '; 2064 vim_strncpy(buf + 1, line, maxlen - 2); 2065 n = p - line; 2066 if (n >= maxlen) 2067 n = maxlen - 1; 2068 vim_memset(buf + 1, ' ', n); 2069 } 2070 } 2071 2072 typedef struct spelload_S 2073 { 2074 char_u sl_lang[MAXWLEN + 1]; /* language name */ 2075 slang_T *sl_slang; /* resulting slang_T struct */ 2076 int sl_nobreak; /* NOBREAK language found */ 2077 } spelload_T; 2078 2079 /* 2080 * Load word list(s) for "lang" from Vim spell file(s). 2081 * "lang" must be the language without the region: e.g., "en". 2082 */ 2083 static void 2084 spell_load_lang(lang) 2085 char_u *lang; 2086 { 2087 char_u fname_enc[85]; 2088 int r; 2089 spelload_T sl; 2090 2091 /* Copy the language name to pass it to spell_load_cb() as a cookie. 2092 * It's truncated when an error is detected. */ 2093 STRCPY(sl.sl_lang, lang); 2094 sl.sl_slang = NULL; 2095 sl.sl_nobreak = FALSE; 2096 2097 /* 2098 * Find the first spell file for "lang" in 'runtimepath' and load it. 2099 */ 2100 vim_snprintf((char *)fname_enc, sizeof(fname_enc) - 5, 2101 "spell/%s.%s.spl", lang, spell_enc()); 2102 r = do_in_runtimepath(fname_enc, FALSE, spell_load_cb, &sl); 2103 2104 if (r == FAIL && *sl.sl_lang != NUL) 2105 { 2106 /* Try loading the ASCII version. */ 2107 vim_snprintf((char *)fname_enc, sizeof(fname_enc) - 5, 2108 "spell/%s.ascii.spl", lang); 2109 r = do_in_runtimepath(fname_enc, FALSE, spell_load_cb, &sl); 2110 } 2111 2112 if (r == FAIL) 2113 smsg((char_u *)_("Warning: Cannot find word list \"%s.%s.spl\" or \"%s.ascii.spl\""), 2114 lang, spell_enc(), lang); 2115 else if (sl.sl_slang != NULL) 2116 { 2117 /* At least one file was loaded, now load all the additions. */ 2118 STRCPY(fname_enc + STRLEN(fname_enc) - 3, "add.spl"); 2119 do_in_runtimepath(fname_enc, TRUE, spell_load_cb, &sl); 2120 } 2121 } 2122 2123 /* 2124 * Return the encoding used for spell checking: Use 'encoding', except that we 2125 * use "latin1" for "latin9". And limit to 60 characters (just in case). 2126 */ 2127 static char_u * 2128 spell_enc() 2129 { 2130 2131 #ifdef FEAT_MBYTE 2132 if (STRLEN(p_enc) < 60 && STRCMP(p_enc, "iso-8859-15") != 0) 2133 return p_enc; 2134 #endif 2135 return (char_u *)"latin1"; 2136 } 2137 2138 /* 2139 * Get the name of the .spl file for the internal wordlist into 2140 * "fname[MAXPATHL]". 2141 */ 2142 static void 2143 int_wordlist_spl(fname) 2144 char_u *fname; 2145 { 2146 vim_snprintf((char *)fname, MAXPATHL, "%s.%s.spl", 2147 int_wordlist, spell_enc()); 2148 } 2149 2150 /* 2151 * Allocate a new slang_T. 2152 * Caller must fill "sl_next". 2153 */ 2154 static slang_T * 2155 slang_alloc(lang) 2156 char_u *lang; 2157 { 2158 slang_T *lp; 2159 2160 lp = (slang_T *)alloc_clear(sizeof(slang_T)); 2161 if (lp != NULL) 2162 { 2163 lp->sl_name = vim_strsave(lang); 2164 ga_init2(&lp->sl_rep, sizeof(fromto_T), 10); 2165 lp->sl_compmax = MAXWLEN; 2166 lp->sl_compsylmax = MAXWLEN; 2167 } 2168 return lp; 2169 } 2170 2171 /* 2172 * Free the contents of an slang_T and the structure itself. 2173 */ 2174 static void 2175 slang_free(lp) 2176 slang_T *lp; 2177 { 2178 vim_free(lp->sl_name); 2179 vim_free(lp->sl_fname); 2180 slang_clear(lp); 2181 vim_free(lp); 2182 } 2183 2184 /* 2185 * Clear an slang_T so that the file can be reloaded. 2186 */ 2187 static void 2188 slang_clear(lp) 2189 slang_T *lp; 2190 { 2191 garray_T *gap; 2192 fromto_T *ftp; 2193 salitem_T *smp; 2194 int i; 2195 2196 vim_free(lp->sl_fbyts); 2197 lp->sl_fbyts = NULL; 2198 vim_free(lp->sl_kbyts); 2199 lp->sl_kbyts = NULL; 2200 vim_free(lp->sl_pbyts); 2201 lp->sl_pbyts = NULL; 2202 2203 vim_free(lp->sl_fidxs); 2204 lp->sl_fidxs = NULL; 2205 vim_free(lp->sl_kidxs); 2206 lp->sl_kidxs = NULL; 2207 vim_free(lp->sl_pidxs); 2208 lp->sl_pidxs = NULL; 2209 2210 gap = &lp->sl_rep; 2211 while (gap->ga_len > 0) 2212 { 2213 ftp = &((fromto_T *)gap->ga_data)[--gap->ga_len]; 2214 vim_free(ftp->ft_from); 2215 vim_free(ftp->ft_to); 2216 } 2217 ga_clear(gap); 2218 2219 gap = &lp->sl_sal; 2220 if (lp->sl_sofo) 2221 { 2222 /* "ga_len" is set to 1 without adding an item for latin1 */ 2223 if (gap->ga_data != NULL) 2224 /* SOFOFROM and SOFOTO items: free lists of wide characters. */ 2225 for (i = 0; i < gap->ga_len; ++i) 2226 vim_free(((int **)gap->ga_data)[i]); 2227 } 2228 else 2229 /* SAL items: free salitem_T items */ 2230 while (gap->ga_len > 0) 2231 { 2232 smp = &((salitem_T *)gap->ga_data)[--gap->ga_len]; 2233 vim_free(smp->sm_lead); 2234 /* Don't free sm_oneof and sm_rules, they point into sm_lead. */ 2235 vim_free(smp->sm_to); 2236 #ifdef FEAT_MBYTE 2237 vim_free(smp->sm_lead_w); 2238 vim_free(smp->sm_oneof_w); 2239 vim_free(smp->sm_to_w); 2240 #endif 2241 } 2242 ga_clear(gap); 2243 2244 for (i = 0; i < lp->sl_prefixcnt; ++i) 2245 vim_free(lp->sl_prefprog[i]); 2246 lp->sl_prefixcnt = 0; 2247 vim_free(lp->sl_prefprog); 2248 lp->sl_prefprog = NULL; 2249 2250 vim_free(lp->sl_midword); 2251 lp->sl_midword = NULL; 2252 2253 vim_free(lp->sl_compprog); 2254 vim_free(lp->sl_compstartflags); 2255 vim_free(lp->sl_compallflags); 2256 lp->sl_compprog = NULL; 2257 lp->sl_compstartflags = NULL; 2258 lp->sl_compallflags = NULL; 2259 2260 vim_free(lp->sl_syllable); 2261 lp->sl_syllable = NULL; 2262 ga_clear(&lp->sl_syl_items); 2263 2264 #ifdef FEAT_MBYTE 2265 { 2266 int todo = lp->sl_map_hash.ht_used; 2267 hashitem_T *hi; 2268 2269 for (hi = lp->sl_map_hash.ht_array; todo > 0; ++hi) 2270 if (!HASHITEM_EMPTY(hi)) 2271 { 2272 --todo; 2273 vim_free(hi->hi_key); 2274 } 2275 } 2276 hash_clear(&lp->sl_map_hash); 2277 #endif 2278 2279 lp->sl_compmax = MAXWLEN; 2280 lp->sl_compminlen = 0; 2281 lp->sl_compsylmax = MAXWLEN; 2282 lp->sl_regions[0] = NUL; 2283 } 2284 2285 /* 2286 * Load one spell file and store the info into a slang_T. 2287 * Invoked through do_in_runtimepath(). 2288 */ 2289 static void 2290 spell_load_cb(fname, cookie) 2291 char_u *fname; 2292 void *cookie; 2293 { 2294 spelload_T *slp = (spelload_T *)cookie; 2295 slang_T *slang; 2296 2297 slang = spell_load_file(fname, slp->sl_lang, NULL, FALSE); 2298 if (slang != NULL) 2299 { 2300 /* When a previously loaded file has NOBREAK also use it for the 2301 * ".add" files. */ 2302 if (slp->sl_nobreak && slang->sl_add) 2303 slang->sl_nobreak = TRUE; 2304 else if (slang->sl_nobreak) 2305 slp->sl_nobreak = TRUE; 2306 2307 slp->sl_slang = slang; 2308 } 2309 } 2310 2311 /* 2312 * Load one spell file and store the info into a slang_T. 2313 * 2314 * This is invoked in two ways: 2315 * - From spell_load_cb() to load a spell file for the first time. "lang" is 2316 * the language name, "old_lp" is NULL. Will allocate an slang_T. 2317 * - To reload a spell file that was changed. "lang" is NULL and "old_lp" 2318 * points to the existing slang_T. 2319 * Returns the slang_T the spell file was loaded into. NULL for error. 2320 */ 2321 static slang_T * 2322 spell_load_file(fname, lang, old_lp, silent) 2323 char_u *fname; 2324 char_u *lang; 2325 slang_T *old_lp; 2326 int silent; /* no error if file doesn't exist */ 2327 { 2328 FILE *fd; 2329 char_u buf[VIMSPELLMAGICL]; 2330 char_u *p; 2331 char_u *bp; 2332 idx_T *ip; 2333 int i; 2334 int n; 2335 int len; 2336 int round; 2337 char_u *save_sourcing_name = sourcing_name; 2338 linenr_T save_sourcing_lnum = sourcing_lnum; 2339 slang_T *lp = NULL; 2340 idx_T idx; 2341 int c = 0; 2342 int res; 2343 2344 fd = mch_fopen((char *)fname, "r"); 2345 if (fd == NULL) 2346 { 2347 if (!silent) 2348 EMSG2(_(e_notopen), fname); 2349 else if (p_verbose > 2) 2350 { 2351 verbose_enter(); 2352 smsg((char_u *)e_notopen, fname); 2353 verbose_leave(); 2354 } 2355 goto endFAIL; 2356 } 2357 if (p_verbose > 2) 2358 { 2359 verbose_enter(); 2360 smsg((char_u *)_("Reading spell file \"%s\""), fname); 2361 verbose_leave(); 2362 } 2363 2364 if (old_lp == NULL) 2365 { 2366 lp = slang_alloc(lang); 2367 if (lp == NULL) 2368 goto endFAIL; 2369 2370 /* Remember the file name, used to reload the file when it's updated. */ 2371 lp->sl_fname = vim_strsave(fname); 2372 if (lp->sl_fname == NULL) 2373 goto endFAIL; 2374 2375 /* Check for .add.spl. */ 2376 lp->sl_add = strstr((char *)gettail(fname), ".add.") != NULL; 2377 } 2378 else 2379 lp = old_lp; 2380 2381 /* Set sourcing_name, so that error messages mention the file name. */ 2382 sourcing_name = fname; 2383 sourcing_lnum = 0; 2384 2385 /* <HEADER>: <fileID> 2386 */ 2387 for (i = 0; i < VIMSPELLMAGICL; ++i) 2388 buf[i] = getc(fd); /* <fileID> */ 2389 if (STRNCMP(buf, VIMSPELLMAGIC, VIMSPELLMAGICL) != 0) 2390 { 2391 EMSG(_("E757: This does not look like a spell file")); 2392 goto endFAIL; 2393 } 2394 c = getc(fd); /* <versionnr> */ 2395 if (c < VIMSPELLVERSION) 2396 { 2397 EMSG(_("E771: Old spell file, needs to be updated")); 2398 goto endFAIL; 2399 } 2400 else if (c > VIMSPELLVERSION) 2401 { 2402 EMSG(_("E772: Spell file is for newer version of Vim")); 2403 goto endFAIL; 2404 } 2405 2406 2407 /* 2408 * <SECTIONS>: <section> ... <sectionend> 2409 * <section>: <sectionID> <sectionflags> <sectionlen> (section contents) 2410 */ 2411 for (;;) 2412 { 2413 n = getc(fd); /* <sectionID> or <sectionend> */ 2414 if (n == SN_END) 2415 break; 2416 c = getc(fd); /* <sectionflags> */ 2417 len = (getc(fd) << 24) + (getc(fd) << 16) + (getc(fd) << 8) + getc(fd); 2418 /* <sectionlen> */ 2419 if (len < 0) 2420 goto truncerr; 2421 2422 res = 0; 2423 switch (n) 2424 { 2425 case SN_REGION: 2426 res = read_region_section(fd, lp, len); 2427 break; 2428 2429 case SN_CHARFLAGS: 2430 res = read_charflags_section(fd); 2431 break; 2432 2433 case SN_MIDWORD: 2434 lp->sl_midword = read_string(fd, len); /* <midword> */ 2435 if (lp->sl_midword == NULL) 2436 goto endFAIL; 2437 break; 2438 2439 case SN_PREFCOND: 2440 res = read_prefcond_section(fd, lp); 2441 break; 2442 2443 case SN_REP: 2444 res = read_rep_section(fd, lp); 2445 break; 2446 2447 case SN_SAL: 2448 res = read_sal_section(fd, lp); 2449 break; 2450 2451 case SN_SOFO: 2452 res = read_sofo_section(fd, lp); 2453 break; 2454 2455 case SN_MAP: 2456 p = read_string(fd, len); /* <mapstr> */ 2457 if (p == NULL) 2458 goto endFAIL; 2459 set_map_str(lp, p); 2460 vim_free(p); 2461 break; 2462 2463 case SN_COMPOUND: 2464 res = read_compound(fd, lp, len); 2465 break; 2466 2467 case SN_NOBREAK: 2468 lp->sl_nobreak = TRUE; 2469 break; 2470 2471 case SN_SYLLABLE: 2472 lp->sl_syllable = read_string(fd, len); /* <syllable> */ 2473 if (lp->sl_syllable == NULL) 2474 goto endFAIL; 2475 if (init_syl_tab(lp) == FAIL) 2476 goto endFAIL; 2477 break; 2478 2479 default: 2480 /* Unsupported section. When it's required give an error 2481 * message. When it's not required skip the contents. */ 2482 if (c & SNF_REQUIRED) 2483 { 2484 EMSG(_("E770: Unsupported section in spell file")); 2485 goto endFAIL; 2486 } 2487 while (--len >= 0) 2488 if (getc(fd) < 0) 2489 goto truncerr; 2490 break; 2491 } 2492 if (res == SP_FORMERROR) 2493 { 2494 formerr: 2495 EMSG(_(e_format)); 2496 goto endFAIL; 2497 } 2498 if (res == SP_TRUNCERROR) 2499 { 2500 truncerr: 2501 EMSG(_(e_spell_trunc)); 2502 goto endFAIL; 2503 } 2504 if (res == SP_OTHERERROR) 2505 goto endFAIL; 2506 } 2507 2508 /* round 1: <LWORDTREE> 2509 * round 2: <KWORDTREE> 2510 * round 3: <PREFIXTREE> */ 2511 for (round = 1; round <= 3; ++round) 2512 { 2513 /* The tree size was computed when writing the file, so that we can 2514 * allocate it as one long block. <nodecount> */ 2515 len = (getc(fd) << 24) + (getc(fd) << 16) + (getc(fd) << 8) + getc(fd); 2516 if (len < 0) 2517 goto truncerr; 2518 if (len > 0) 2519 { 2520 /* Allocate the byte array. */ 2521 bp = lalloc((long_u)len, TRUE); 2522 if (bp == NULL) 2523 goto endFAIL; 2524 if (round == 1) 2525 lp->sl_fbyts = bp; 2526 else if (round == 2) 2527 lp->sl_kbyts = bp; 2528 else 2529 lp->sl_pbyts = bp; 2530 2531 /* Allocate the index array. */ 2532 ip = (idx_T *)lalloc_clear((long_u)(len * sizeof(int)), TRUE); 2533 if (ip == NULL) 2534 goto endFAIL; 2535 if (round == 1) 2536 lp->sl_fidxs = ip; 2537 else if (round == 2) 2538 lp->sl_kidxs = ip; 2539 else 2540 lp->sl_pidxs = ip; 2541 2542 /* Read the tree and store it in the array. */ 2543 idx = read_tree(fd, bp, ip, len, 0, round == 3, lp->sl_prefixcnt); 2544 if (idx == -1) 2545 goto truncerr; 2546 if (idx < 0) 2547 goto formerr; 2548 } 2549 } 2550 2551 /* For a new file link it in the list of spell files. */ 2552 if (old_lp == NULL) 2553 { 2554 lp->sl_next = first_lang; 2555 first_lang = lp; 2556 } 2557 2558 goto endOK; 2559 2560 endFAIL: 2561 if (lang != NULL) 2562 /* truncating the name signals the error to spell_load_lang() */ 2563 *lang = NUL; 2564 if (lp != NULL && old_lp == NULL) 2565 slang_free(lp); 2566 lp = NULL; 2567 2568 endOK: 2569 if (fd != NULL) 2570 fclose(fd); 2571 sourcing_name = save_sourcing_name; 2572 sourcing_lnum = save_sourcing_lnum; 2573 2574 return lp; 2575 } 2576 2577 /* 2578 * Read a length field from "fd" in "cnt_bytes" bytes. 2579 * Allocate memory, read the string into it and add a NUL at the end. 2580 * Returns NULL when the count is zero. 2581 * Sets "*cntp" to SP_*ERROR when there is an error, length of the result 2582 * otherwise. 2583 */ 2584 static char_u * 2585 read_cnt_string(fd, cnt_bytes, cntp) 2586 FILE *fd; 2587 int cnt_bytes; 2588 int *cntp; 2589 { 2590 int cnt = 0; 2591 int i; 2592 char_u *str; 2593 2594 /* read the length bytes, MSB first */ 2595 for (i = 0; i < cnt_bytes; ++i) 2596 cnt = (cnt << 8) + getc(fd); 2597 if (cnt < 0) 2598 { 2599 *cntp = SP_TRUNCERROR; 2600 return NULL; 2601 } 2602 *cntp = cnt; 2603 if (cnt == 0) 2604 return NULL; /* nothing to read, return NULL */ 2605 2606 str = read_string(fd, cnt); 2607 if (str == NULL) 2608 *cntp = SP_OTHERERROR; 2609 return str; 2610 } 2611 2612 /* 2613 * Read a string of length "cnt" from "fd" into allocated memory. 2614 * Returns NULL when out of memory. 2615 */ 2616 static char_u * 2617 read_string(fd, cnt) 2618 FILE *fd; 2619 int cnt; 2620 { 2621 char_u *str; 2622 int i; 2623 2624 /* allocate memory */ 2625 str = alloc((unsigned)cnt + 1); 2626 if (str != NULL) 2627 { 2628 /* Read the string. Doesn't check for truncated file. */ 2629 for (i = 0; i < cnt; ++i) 2630 str[i] = getc(fd); 2631 str[i] = NUL; 2632 } 2633 return str; 2634 } 2635 2636 /* 2637 * Read SN_REGION: <regionname> ... 2638 * Return SP_*ERROR flags. 2639 */ 2640 static int 2641 read_region_section(fd, lp, len) 2642 FILE *fd; 2643 slang_T *lp; 2644 int len; 2645 { 2646 int i; 2647 2648 if (len > 16) 2649 return SP_FORMERROR; 2650 for (i = 0; i < len; ++i) 2651 lp->sl_regions[i] = getc(fd); /* <regionname> */ 2652 lp->sl_regions[len] = NUL; 2653 return 0; 2654 } 2655 2656 /* 2657 * Read SN_CHARFLAGS section: <charflagslen> <charflags> 2658 * <folcharslen> <folchars> 2659 * Return SP_*ERROR flags. 2660 */ 2661 static int 2662 read_charflags_section(fd) 2663 FILE *fd; 2664 { 2665 char_u *flags; 2666 char_u *fol; 2667 int flagslen, follen; 2668 2669 /* <charflagslen> <charflags> */ 2670 flags = read_cnt_string(fd, 1, &flagslen); 2671 if (flagslen < 0) 2672 return flagslen; 2673 2674 /* <folcharslen> <folchars> */ 2675 fol = read_cnt_string(fd, 2, &follen); 2676 if (follen < 0) 2677 { 2678 vim_free(flags); 2679 return follen; 2680 } 2681 2682 /* Set the word-char flags and fill SPELL_ISUPPER() table. */ 2683 if (flags != NULL && fol != NULL) 2684 set_spell_charflags(flags, flagslen, fol); 2685 2686 vim_free(flags); 2687 vim_free(fol); 2688 2689 /* When <charflagslen> is zero then <fcharlen> must also be zero. */ 2690 if ((flags == NULL) != (fol == NULL)) 2691 return SP_FORMERROR; 2692 return 0; 2693 } 2694 2695 /* 2696 * Read SN_PREFCOND section. 2697 * Return SP_*ERROR flags. 2698 */ 2699 static int 2700 read_prefcond_section(fd, lp) 2701 FILE *fd; 2702 slang_T *lp; 2703 { 2704 int cnt; 2705 int i; 2706 int n; 2707 char_u *p; 2708 char_u buf[MAXWLEN + 1]; 2709 2710 /* <prefcondcnt> <prefcond> ... */ 2711 cnt = (getc(fd) << 8) + getc(fd); /* <prefcondcnt> */ 2712 if (cnt <= 0) 2713 return SP_FORMERROR; 2714 2715 lp->sl_prefprog = (regprog_T **)alloc_clear( 2716 (unsigned)sizeof(regprog_T *) * cnt); 2717 if (lp->sl_prefprog == NULL) 2718 return SP_OTHERERROR; 2719 lp->sl_prefixcnt = cnt; 2720 2721 for (i = 0; i < cnt; ++i) 2722 { 2723 /* <prefcond> : <condlen> <condstr> */ 2724 n = getc(fd); /* <condlen> */ 2725 if (n < 0 || n >= MAXWLEN) 2726 return SP_FORMERROR; 2727 2728 /* When <condlen> is zero we have an empty condition. Otherwise 2729 * compile the regexp program used to check for the condition. */ 2730 if (n > 0) 2731 { 2732 buf[0] = '^'; /* always match at one position only */ 2733 p = buf + 1; 2734 while (n-- > 0) 2735 *p++ = getc(fd); /* <condstr> */ 2736 *p = NUL; 2737 lp->sl_prefprog[i] = vim_regcomp(buf, RE_MAGIC + RE_STRING); 2738 } 2739 } 2740 return 0; 2741 } 2742 2743 /* 2744 * Read REP items section from "fd": <repcount> <rep> ... 2745 * Return SP_*ERROR flags. 2746 */ 2747 static int 2748 read_rep_section(fd, slang) 2749 FILE *fd; 2750 slang_T *slang; 2751 { 2752 int cnt; 2753 garray_T *gap; 2754 fromto_T *ftp; 2755 short *first; 2756 int i; 2757 2758 cnt = (getc(fd) << 8) + getc(fd); /* <repcount> */ 2759 if (cnt < 0) 2760 return SP_TRUNCERROR; 2761 2762 gap = &slang->sl_rep; 2763 if (ga_grow(gap, cnt) == FAIL) 2764 return SP_OTHERERROR; 2765 2766 /* <rep> : <repfromlen> <repfrom> <reptolen> <repto> */ 2767 for (; gap->ga_len < cnt; ++gap->ga_len) 2768 { 2769 ftp = &((fromto_T *)gap->ga_data)[gap->ga_len]; 2770 ftp->ft_from = read_cnt_string(fd, 1, &i); 2771 if (i < 0) 2772 return i; 2773 if (i == 0) 2774 return SP_FORMERROR; 2775 ftp->ft_to = read_cnt_string(fd, 1, &i); 2776 if (i <= 0) 2777 { 2778 vim_free(ftp->ft_from); 2779 if (i < 0) 2780 return i; 2781 return SP_FORMERROR; 2782 } 2783 } 2784 2785 /* Fill the first-index table. */ 2786 first = slang->sl_rep_first; 2787 for (i = 0; i < 256; ++i) 2788 first[i] = -1; 2789 for (i = 0; i < gap->ga_len; ++i) 2790 { 2791 ftp = &((fromto_T *)gap->ga_data)[i]; 2792 if (first[*ftp->ft_from] == -1) 2793 first[*ftp->ft_from] = i; 2794 } 2795 return 0; 2796 } 2797 2798 /* 2799 * Read SN_SAL section: <salflags> <salcount> <sal> ... 2800 * Return SP_*ERROR flags. 2801 */ 2802 static int 2803 read_sal_section(fd, slang) 2804 FILE *fd; 2805 slang_T *slang; 2806 { 2807 int i; 2808 int cnt; 2809 garray_T *gap; 2810 salitem_T *smp; 2811 int ccnt; 2812 char_u *p; 2813 int c = NUL; 2814 2815 slang->sl_sofo = FALSE; 2816 2817 i = getc(fd); /* <salflags> */ 2818 if (i & SAL_F0LLOWUP) 2819 slang->sl_followup = TRUE; 2820 if (i & SAL_COLLAPSE) 2821 slang->sl_collapse = TRUE; 2822 if (i & SAL_REM_ACCENTS) 2823 slang->sl_rem_accents = TRUE; 2824 2825 cnt = (getc(fd) << 8) + getc(fd); /* <salcount> */ 2826 if (cnt < 0) 2827 return SP_TRUNCERROR; 2828 2829 gap = &slang->sl_sal; 2830 ga_init2(gap, sizeof(salitem_T), 10); 2831 if (ga_grow(gap, cnt) == FAIL) 2832 return SP_OTHERERROR; 2833 2834 /* <sal> : <salfromlen> <salfrom> <saltolen> <salto> */ 2835 for (; gap->ga_len < cnt; ++gap->ga_len) 2836 { 2837 smp = &((salitem_T *)gap->ga_data)[gap->ga_len]; 2838 ccnt = getc(fd); /* <salfromlen> */ 2839 if (ccnt < 0) 2840 return SP_TRUNCERROR; 2841 if ((p = alloc(ccnt + 2)) == NULL) 2842 return SP_OTHERERROR; 2843 smp->sm_lead = p; 2844 2845 /* Read up to the first special char into sm_lead. */ 2846 for (i = 0; i < ccnt; ++i) 2847 { 2848 c = getc(fd); /* <salfrom> */ 2849 if (vim_strchr((char_u *)"0123456789(-<^$", c) != NULL) 2850 break; 2851 *p++ = c; 2852 } 2853 smp->sm_leadlen = p - smp->sm_lead; 2854 *p++ = NUL; 2855 2856 /* Put (abc) chars in sm_oneof, if any. */ 2857 if (c == '(') 2858 { 2859 smp->sm_oneof = p; 2860 for (++i; i < ccnt; ++i) 2861 { 2862 c = getc(fd); /* <salfrom> */ 2863 if (c == ')') 2864 break; 2865 *p++ = c; 2866 } 2867 *p++ = NUL; 2868 if (++i < ccnt) 2869 c = getc(fd); 2870 } 2871 else 2872 smp->sm_oneof = NULL; 2873 2874 /* Any following chars go in sm_rules. */ 2875 smp->sm_rules = p; 2876 if (i < ccnt) 2877 /* store the char we got while checking for end of sm_lead */ 2878 *p++ = c; 2879 for (++i; i < ccnt; ++i) 2880 *p++ = getc(fd); /* <salfrom> */ 2881 *p++ = NUL; 2882 2883 /* <saltolen> <salto> */ 2884 smp->sm_to = read_cnt_string(fd, 1, &ccnt); 2885 if (ccnt < 0) 2886 { 2887 vim_free(smp->sm_lead); 2888 return ccnt; 2889 } 2890 2891 #ifdef FEAT_MBYTE 2892 if (has_mbyte) 2893 { 2894 /* convert the multi-byte strings to wide char strings */ 2895 smp->sm_lead_w = mb_str2wide(smp->sm_lead); 2896 smp->sm_leadlen = mb_charlen(smp->sm_lead); 2897 if (smp->sm_oneof == NULL) 2898 smp->sm_oneof_w = NULL; 2899 else 2900 smp->sm_oneof_w = mb_str2wide(smp->sm_oneof); 2901 if (smp->sm_to == NULL) 2902 smp->sm_to_w = NULL; 2903 else 2904 smp->sm_to_w = mb_str2wide(smp->sm_to); 2905 if (smp->sm_lead_w == NULL 2906 || (smp->sm_oneof_w == NULL && smp->sm_oneof != NULL) 2907 || (smp->sm_to_w == NULL && smp->sm_to != NULL)) 2908 { 2909 vim_free(smp->sm_lead); 2910 vim_free(smp->sm_to); 2911 vim_free(smp->sm_lead_w); 2912 vim_free(smp->sm_oneof_w); 2913 vim_free(smp->sm_to_w); 2914 return SP_OTHERERROR; 2915 } 2916 } 2917 #endif 2918 } 2919 2920 /* Fill the first-index table. */ 2921 set_sal_first(slang); 2922 2923 return 0; 2924 } 2925 2926 /* 2927 * SN_SOFO: <sofofromlen> <sofofrom> <sofotolen> <sofoto> 2928 * Return SP_*ERROR flags. 2929 */ 2930 static int 2931 read_sofo_section(fd, slang) 2932 FILE *fd; 2933 slang_T *slang; 2934 { 2935 int cnt; 2936 char_u *from, *to; 2937 int res; 2938 2939 slang->sl_sofo = TRUE; 2940 2941 /* <sofofromlen> <sofofrom> */ 2942 from = read_cnt_string(fd, 2, &cnt); 2943 if (cnt < 0) 2944 return cnt; 2945 2946 /* <sofotolen> <sofoto> */ 2947 to = read_cnt_string(fd, 2, &cnt); 2948 if (cnt < 0) 2949 { 2950 vim_free(from); 2951 return cnt; 2952 } 2953 2954 /* Store the info in slang->sl_sal and/or slang->sl_sal_first. */ 2955 if (from != NULL && to != NULL) 2956 res = set_sofo(slang, from, to); 2957 else if (from != NULL || to != NULL) 2958 res = SP_FORMERROR; /* only one of two strings is an error */ 2959 else 2960 res = 0; 2961 2962 vim_free(from); 2963 vim_free(to); 2964 return res; 2965 } 2966 2967 /* 2968 * Read the compound section from the .spl file: 2969 * <compmax> <compminlen> <compsylmax> <compflags> 2970 * Returns SP_*ERROR flags. 2971 */ 2972 static int 2973 read_compound(fd, slang, len) 2974 FILE *fd; 2975 slang_T *slang; 2976 int len; 2977 { 2978 int todo = len; 2979 int c; 2980 int atstart; 2981 char_u *pat; 2982 char_u *pp; 2983 char_u *cp; 2984 char_u *ap; 2985 2986 if (todo < 2) 2987 return SP_FORMERROR; /* need at least two bytes */ 2988 2989 --todo; 2990 c = getc(fd); /* <compmax> */ 2991 if (c < 2) 2992 c = MAXWLEN; 2993 slang->sl_compmax = c; 2994 2995 --todo; 2996 c = getc(fd); /* <compminlen> */ 2997 if (c < 1) 2998 c = 0; 2999 slang->sl_compminlen = c; 3000 3001 --todo; 3002 c = getc(fd); /* <compsylmax> */ 3003 if (c < 1) 3004 c = MAXWLEN; 3005 slang->sl_compsylmax = c; 3006 3007 /* Turn the COMPOUNDFLAGS items into a regexp pattern: 3008 * "a[bc]/a*b+" -> "^\(a[bc]\|a*b\+\)$". 3009 * Inserting backslashes may double the length, "^\(\)$<Nul>" is 7 bytes. 3010 * Conversion to utf-8 may double the size. */ 3011 c = todo * 2 + 7; 3012 #ifdef FEAT_MBYTE 3013 if (enc_utf8) 3014 c += todo * 2; 3015 #endif 3016 pat = alloc((unsigned)c); 3017 if (pat == NULL) 3018 return SP_OTHERERROR; 3019 3020 /* We also need a list of all flags that can appear at the start and one 3021 * for all flags. */ 3022 cp = alloc(todo + 1); 3023 if (cp == NULL) 3024 { 3025 vim_free(pat); 3026 return SP_OTHERERROR; 3027 } 3028 slang->sl_compstartflags = cp; 3029 *cp = NUL; 3030 3031 ap = alloc(todo + 1); 3032 if (ap == NULL) 3033 { 3034 vim_free(pat); 3035 return SP_OTHERERROR; 3036 } 3037 slang->sl_compallflags = ap; 3038 *ap = NUL; 3039 3040 pp = pat; 3041 *pp++ = '^'; 3042 *pp++ = '\\'; 3043 *pp++ = '('; 3044 3045 atstart = 1; 3046 while (todo-- > 0) 3047 { 3048 c = getc(fd); /* <compflags> */ 3049 3050 /* Add all flags to "sl_compallflags". */ 3051 if (vim_strchr((char_u *)"+*[]/", c) == NULL 3052 && !byte_in_str(slang->sl_compallflags, c)) 3053 { 3054 *ap++ = c; 3055 *ap = NUL; 3056 } 3057 3058 if (atstart != 0) 3059 { 3060 /* At start of item: copy flags to "sl_compstartflags". For a 3061 * [abc] item set "atstart" to 2 and copy up to the ']'. */ 3062 if (c == '[') 3063 atstart = 2; 3064 else if (c == ']') 3065 atstart = 0; 3066 else 3067 { 3068 if (!byte_in_str(slang->sl_compstartflags, c)) 3069 { 3070 *cp++ = c; 3071 *cp = NUL; 3072 } 3073 if (atstart == 1) 3074 atstart = 0; 3075 } 3076 } 3077 if (c == '/') /* slash separates two items */ 3078 { 3079 *pp++ = '\\'; 3080 *pp++ = '|'; 3081 atstart = 1; 3082 } 3083 else /* normal char, "[abc]" and '*' are copied as-is */ 3084 { 3085 if (c == '+' || c == '~') 3086 *pp++ = '\\'; /* "a+" becomes "a\+" */ 3087 #ifdef FEAT_MBYTE 3088 if (enc_utf8) 3089 pp += mb_char2bytes(c, pp); 3090 else 3091 #endif 3092 *pp++ = c; 3093 } 3094 } 3095 3096 *pp++ = '\\'; 3097 *pp++ = ')'; 3098 *pp++ = '$'; 3099 *pp = NUL; 3100 3101 slang->sl_compprog = vim_regcomp(pat, RE_MAGIC + RE_STRING + RE_STRICT); 3102 vim_free(pat); 3103 if (slang->sl_compprog == NULL) 3104 return SP_FORMERROR; 3105 3106 return 0; 3107 } 3108 3109 /* 3110 * Return TRUE if byte "n" appears in "str". 3111 * Like strchr() but independent of locale. 3112 */ 3113 static int 3114 byte_in_str(str, n) 3115 char_u *str; 3116 int n; 3117 { 3118 char_u *p; 3119 3120 for (p = str; *p != NUL; ++p) 3121 if (*p == n) 3122 return TRUE; 3123 return FALSE; 3124 } 3125 3126 #define SY_MAXLEN 30 3127 typedef struct syl_item_S 3128 { 3129 char_u sy_chars[SY_MAXLEN]; /* the sequence of chars */ 3130 int sy_len; 3131 } syl_item_T; 3132 3133 /* 3134 * Truncate "slang->sl_syllable" at the first slash and put the following items 3135 * in "slang->sl_syl_items". 3136 */ 3137 static int 3138 init_syl_tab(slang) 3139 slang_T *slang; 3140 { 3141 char_u *p; 3142 char_u *s; 3143 int l; 3144 syl_item_T *syl; 3145 3146 ga_init2(&slang->sl_syl_items, sizeof(syl_item_T), 4); 3147 p = vim_strchr(slang->sl_syllable, '/'); 3148 while (p != NULL) 3149 { 3150 *p++ = NUL; 3151 if (*p == NUL) /* trailing slash */ 3152 break; 3153 s = p; 3154 p = vim_strchr(p, '/'); 3155 if (p == NULL) 3156 l = STRLEN(s); 3157 else 3158 l = p - s; 3159 if (l >= SY_MAXLEN) 3160 return SP_FORMERROR; 3161 if (ga_grow(&slang->sl_syl_items, 1) == FAIL) 3162 return SP_OTHERERROR; 3163 syl = ((syl_item_T *)slang->sl_syl_items.ga_data) 3164 + slang->sl_syl_items.ga_len++; 3165 vim_strncpy(syl->sy_chars, s, l); 3166 syl->sy_len = l; 3167 } 3168 return OK; 3169 } 3170 3171 /* 3172 * Count the number of syllables in "word". 3173 * When "word" contains spaces the syllables after the last space are counted. 3174 * Returns zero if syllables are not defines. 3175 */ 3176 static int 3177 count_syllables(slang, word) 3178 slang_T *slang; 3179 char_u *word; 3180 { 3181 int cnt = 0; 3182 int skip = FALSE; 3183 char_u *p; 3184 int len; 3185 int i; 3186 syl_item_T *syl; 3187 int c; 3188 3189 if (slang->sl_syllable == NULL) 3190 return 0; 3191 3192 for (p = word; *p != NUL; p += len) 3193 { 3194 /* When running into a space reset counter. */ 3195 if (*p == ' ') 3196 { 3197 len = 1; 3198 cnt = 0; 3199 continue; 3200 } 3201 3202 /* Find longest match of syllable items. */ 3203 len = 0; 3204 for (i = 0; i < slang->sl_syl_items.ga_len; ++i) 3205 { 3206 syl = ((syl_item_T *)slang->sl_syl_items.ga_data) + i; 3207 if (syl->sy_len > len 3208 && STRNCMP(p, syl->sy_chars, syl->sy_len) == 0) 3209 len = syl->sy_len; 3210 } 3211 if (len != 0) /* found a match, count syllable */ 3212 { 3213 ++cnt; 3214 skip = FALSE; 3215 } 3216 else 3217 { 3218 /* No recognized syllable item, at least a syllable char then? */ 3219 #ifdef FEAT_MBYTE 3220 c = mb_ptr2char(p); 3221 len = (*mb_ptr2len)(p); 3222 #else 3223 c = *p; 3224 len = 1; 3225 #endif 3226 if (vim_strchr(slang->sl_syllable, c) == NULL) 3227 skip = FALSE; /* No, search for next syllable */ 3228 else if (!skip) 3229 { 3230 ++cnt; /* Yes, count it */ 3231 skip = TRUE; /* don't count following syllable chars */ 3232 } 3233 } 3234 } 3235 return cnt; 3236 } 3237 3238 /* 3239 * Set the SOFOFROM and SOFOTO items in language "lp". 3240 * Returns SP_*ERROR flags when there is something wrong. 3241 */ 3242 static int 3243 set_sofo(lp, from, to) 3244 slang_T *lp; 3245 char_u *from; 3246 char_u *to; 3247 { 3248 int i; 3249 3250 #ifdef FEAT_MBYTE 3251 garray_T *gap; 3252 char_u *s; 3253 char_u *p; 3254 int c; 3255 int *inp; 3256 3257 if (has_mbyte) 3258 { 3259 /* Use "sl_sal" as an array with 256 pointers to a list of wide 3260 * characters. The index is the low byte of the character. 3261 * The list contains from-to pairs with a terminating NUL. 3262 * sl_sal_first[] is used for latin1 "from" characters. */ 3263 gap = &lp->sl_sal; 3264 ga_init2(gap, sizeof(int *), 1); 3265 if (ga_grow(gap, 256) == FAIL) 3266 return SP_OTHERERROR; 3267 vim_memset(gap->ga_data, 0, sizeof(int *) * 256); 3268 gap->ga_len = 256; 3269 3270 /* First count the number of items for each list. Temporarily use 3271 * sl_sal_first[] for this. */ 3272 for (p = from, s = to; *p != NUL && *s != NUL; ) 3273 { 3274 c = mb_cptr2char_adv(&p); 3275 mb_cptr_adv(s); 3276 if (c >= 256) 3277 ++lp->sl_sal_first[c & 0xff]; 3278 } 3279 if (*p != NUL || *s != NUL) /* lengths differ */ 3280 return SP_FORMERROR; 3281 3282 /* Allocate the lists. */ 3283 for (i = 0; i < 256; ++i) 3284 if (lp->sl_sal_first[i] > 0) 3285 { 3286 p = alloc(sizeof(int) * (lp->sl_sal_first[i] * 2 + 1)); 3287 if (p == NULL) 3288 return SP_OTHERERROR; 3289 ((int **)gap->ga_data)[i] = (int *)p; 3290 *(int *)p = 0; 3291 } 3292 3293 /* Put the characters up to 255 in sl_sal_first[] the rest in a sl_sal 3294 * list. */ 3295 vim_memset(lp->sl_sal_first, 0, sizeof(salfirst_T) * 256); 3296 for (p = from, s = to; *p != NUL && *s != NUL; ) 3297 { 3298 c = mb_cptr2char_adv(&p); 3299 i = mb_cptr2char_adv(&s); 3300 if (c >= 256) 3301 { 3302 /* Append the from-to chars at the end of the list with 3303 * the low byte. */ 3304 inp = ((int **)gap->ga_data)[c & 0xff]; 3305 while (*inp != 0) 3306 ++inp; 3307 *inp++ = c; /* from char */ 3308 *inp++ = i; /* to char */ 3309 *inp++ = NUL; /* NUL at the end */ 3310 } 3311 else 3312 /* mapping byte to char is done in sl_sal_first[] */ 3313 lp->sl_sal_first[c] = i; 3314 } 3315 } 3316 else 3317 #endif 3318 { 3319 /* mapping bytes to bytes is done in sl_sal_first[] */ 3320 if (STRLEN(from) != STRLEN(to)) 3321 return SP_FORMERROR; 3322 3323 for (i = 0; to[i] != NUL; ++i) 3324 lp->sl_sal_first[from[i]] = to[i]; 3325 lp->sl_sal.ga_len = 1; /* indicates we have soundfolding */ 3326 } 3327 3328 return 0; 3329 } 3330 3331 /* 3332 * Fill the first-index table for "lp". 3333 */ 3334 static void 3335 set_sal_first(lp) 3336 slang_T *lp; 3337 { 3338 salfirst_T *sfirst; 3339 int i; 3340 salitem_T *smp; 3341 int c; 3342 garray_T *gap = &lp->sl_sal; 3343 3344 sfirst = lp->sl_sal_first; 3345 for (i = 0; i < 256; ++i) 3346 sfirst[i] = -1; 3347 smp = (salitem_T *)gap->ga_data; 3348 for (i = 0; i < gap->ga_len; ++i) 3349 { 3350 #ifdef FEAT_MBYTE 3351 if (has_mbyte) 3352 /* Use the lowest byte of the first character. For latin1 it's 3353 * the character, for other encodings it should differ for most 3354 * characters. */ 3355 c = *smp[i].sm_lead_w & 0xff; 3356 else 3357 #endif 3358 c = *smp[i].sm_lead; 3359 if (sfirst[c] == -1) 3360 { 3361 sfirst[c] = i; 3362 #ifdef FEAT_MBYTE 3363 if (has_mbyte) 3364 { 3365 int n; 3366 3367 /* Make sure all entries with this byte are following each 3368 * other. Move the ones that are in the wrong position. Do 3369 * keep the same ordering! */ 3370 while (i + 1 < gap->ga_len 3371 && (*smp[i + 1].sm_lead_w & 0xff) == c) 3372 /* Skip over entry with same index byte. */ 3373 ++i; 3374 3375 for (n = 1; i + n < gap->ga_len; ++n) 3376 if ((*smp[i + n].sm_lead_w & 0xff) == c) 3377 { 3378 salitem_T tsal; 3379 3380 /* Move entry with same index byte after the entries 3381 * we already found. */ 3382 ++i; 3383 --n; 3384 tsal = smp[i + n]; 3385 mch_memmove(smp + i + 1, smp + i, 3386 sizeof(salitem_T) * n); 3387 smp[i] = tsal; 3388 } 3389 } 3390 #endif 3391 } 3392 } 3393 } 3394 3395 #ifdef FEAT_MBYTE 3396 /* 3397 * Turn a multi-byte string into a wide character string. 3398 * Return it in allocated memory (NULL for out-of-memory) 3399 */ 3400 static int * 3401 mb_str2wide(s) 3402 char_u *s; 3403 { 3404 int *res; 3405 char_u *p; 3406 int i = 0; 3407 3408 res = (int *)alloc(sizeof(int) * (mb_charlen(s) + 1)); 3409 if (res != NULL) 3410 { 3411 for (p = s; *p != NUL; ) 3412 res[i++] = mb_ptr2char_adv(&p); 3413 res[i] = NUL; 3414 } 3415 return res; 3416 } 3417 #endif 3418 3419 /* 3420 * Read one row of siblings from the spell file and store it in the byte array 3421 * "byts" and index array "idxs". Recursively read the children. 3422 * 3423 * NOTE: The code here must match put_node(). 3424 * 3425 * Returns the index follosing the siblings. 3426 * Returns -1 if the file is shorter than expected. 3427 * Returns -2 if there is a format error. 3428 */ 3429 static idx_T 3430 read_tree(fd, byts, idxs, maxidx, startidx, prefixtree, maxprefcondnr) 3431 FILE *fd; 3432 char_u *byts; 3433 idx_T *idxs; 3434 int maxidx; /* size of arrays */ 3435 idx_T startidx; /* current index in "byts" and "idxs" */ 3436 int prefixtree; /* TRUE for reading PREFIXTREE */ 3437 int maxprefcondnr; /* maximum for <prefcondnr> */ 3438 { 3439 int len; 3440 int i; 3441 int n; 3442 idx_T idx = startidx; 3443 int c; 3444 int c2; 3445 #define SHARED_MASK 0x8000000 3446 3447 len = getc(fd); /* <siblingcount> */ 3448 if (len <= 0) 3449 return -1; 3450 3451 if (startidx + len >= maxidx) 3452 return -2; 3453 byts[idx++] = len; 3454 3455 /* Read the byte values, flag/region bytes and shared indexes. */ 3456 for (i = 1; i <= len; ++i) 3457 { 3458 c = getc(fd); /* <byte> */ 3459 if (c < 0) 3460 return -1; 3461 if (c <= BY_SPECIAL) 3462 { 3463 if (c == BY_NOFLAGS && !prefixtree) 3464 { 3465 /* No flags, all regions. */ 3466 idxs[idx] = 0; 3467 c = 0; 3468 } 3469 else if (c != BY_INDEX) 3470 { 3471 if (prefixtree) 3472 { 3473 /* Read the optional pflags byte, the prefix ID and the 3474 * condition nr. In idxs[] store the prefix ID in the low 3475 * byte, the condition index shifted up 8 bits, the flags 3476 * shifted up 24 bits. */ 3477 if (c == BY_FLAGS) 3478 c = getc(fd) << 24; /* <pflags> */ 3479 else 3480 c = 0; 3481 3482 c |= getc(fd); /* <affixID> */ 3483 3484 n = (getc(fd) << 8) + getc(fd); /* <prefcondnr> */ 3485 if (n >= maxprefcondnr) 3486 return -2; 3487 c |= (n << 8); 3488 } 3489 else /* c must be BY_FLAGS or BY_FLAGS2 */ 3490 { 3491 /* Read flags and optional region and prefix ID. In 3492 * idxs[] the flags go in the low two bytes, region above 3493 * that and prefix ID above the region. */ 3494 c2 = c; 3495 c = getc(fd); /* <flags> */ 3496 if (c2 == BY_FLAGS2) 3497 c = (getc(fd) << 8) + c; /* <flags2> */ 3498 if (c & WF_REGION) 3499 c = (getc(fd) << 16) + c; /* <region> */ 3500 if (c & WF_AFX) 3501 c = (getc(fd) << 24) + c; /* <affixID> */ 3502 } 3503 3504 idxs[idx] = c; 3505 c = 0; 3506 } 3507 else /* c == BY_INDEX */ 3508 { 3509 /* <nodeidx> */ 3510 n = (getc(fd) << 16) + (getc(fd) << 8) + getc(fd); 3511 if (n < 0 || n >= maxidx) 3512 return -2; 3513 idxs[idx] = n + SHARED_MASK; 3514 c = getc(fd); /* <xbyte> */ 3515 } 3516 } 3517 byts[idx++] = c; 3518 } 3519 3520 /* Recursively read the children for non-shared siblings. 3521 * Skip the end-of-word ones (zero byte value) and the shared ones (and 3522 * remove SHARED_MASK) */ 3523 for (i = 1; i <= len; ++i) 3524 if (byts[startidx + i] != 0) 3525 { 3526 if (idxs[startidx + i] & SHARED_MASK) 3527 idxs[startidx + i] &= ~SHARED_MASK; 3528 else 3529 { 3530 idxs[startidx + i] = idx; 3531 idx = read_tree(fd, byts, idxs, maxidx, idx, 3532 prefixtree, maxprefcondnr); 3533 if (idx < 0) 3534 break; 3535 } 3536 } 3537 3538 return idx; 3539 } 3540 3541 /* 3542 * Parse 'spelllang' and set buf->b_langp accordingly. 3543 * Returns NULL if it's OK, an error message otherwise. 3544 */ 3545 char_u * 3546 did_set_spelllang(buf) 3547 buf_T *buf; 3548 { 3549 garray_T ga; 3550 char_u *splp; 3551 char_u *region; 3552 char_u region_cp[3]; 3553 int filename; 3554 int region_mask; 3555 slang_T *slang; 3556 int c; 3557 char_u lang[MAXWLEN + 1]; 3558 char_u spf_name[MAXPATHL]; 3559 int len; 3560 char_u *p; 3561 int round; 3562 char_u *spf; 3563 char_u *use_region = NULL; 3564 int dont_use_region = FALSE; 3565 int nobreak = FALSE; 3566 int i, j; 3567 langp_T *lp, *lp2; 3568 3569 ga_init2(&ga, sizeof(langp_T), 2); 3570 clear_midword(buf); 3571 3572 /* loop over comma separated language names. */ 3573 for (splp = buf->b_p_spl; *splp != NUL; ) 3574 { 3575 /* Get one language name. */ 3576 copy_option_part(&splp, lang, MAXWLEN, ","); 3577 3578 region = NULL; 3579 len = STRLEN(lang); 3580 3581 /* If the name ends in ".spl" use it as the name of the spell file. 3582 * If there is a region name let "region" point to it and remove it 3583 * from the name. */ 3584 if (len > 4 && fnamecmp(lang + len - 4, ".spl") == 0) 3585 { 3586 filename = TRUE; 3587 3588 /* Locate a region and remove it from the file name. */ 3589 p = vim_strchr(gettail(lang), '_'); 3590 if (p != NULL && ASCII_ISALPHA(p[1]) && ASCII_ISALPHA(p[2]) 3591 && !ASCII_ISALPHA(p[3])) 3592 { 3593 vim_strncpy(region_cp, p + 1, 2); 3594 mch_memmove(p, p + 3, len - (p - lang) - 2); 3595 len -= 3; 3596 region = region_cp; 3597 } 3598 else 3599 dont_use_region = TRUE; 3600 3601 /* Check if we loaded this language before. */ 3602 for (slang = first_lang; slang != NULL; slang = slang->sl_next) 3603 if (fullpathcmp(lang, slang->sl_fname, FALSE) == FPC_SAME) 3604 break; 3605 } 3606 else 3607 { 3608 filename = FALSE; 3609 if (len > 3 && lang[len - 3] == '_') 3610 { 3611 region = lang + len - 2; 3612 len -= 3; 3613 lang[len] = NUL; 3614 } 3615 else 3616 dont_use_region = TRUE; 3617 3618 /* Check if we loaded this language before. */ 3619 for (slang = first_lang; slang != NULL; slang = slang->sl_next) 3620 if (STRICMP(lang, slang->sl_name) == 0) 3621 break; 3622 } 3623 3624 if (region != NULL) 3625 { 3626 /* If the region differs from what was used before then don't 3627 * use it for 'spellfile'. */ 3628 if (use_region != NULL && STRCMP(region, use_region) != 0) 3629 dont_use_region = TRUE; 3630 use_region = region; 3631 } 3632 3633 /* If not found try loading the language now. */ 3634 if (slang == NULL) 3635 { 3636 if (filename) 3637 (void)spell_load_file(lang, lang, NULL, FALSE); 3638 else 3639 spell_load_lang(lang); 3640 } 3641 3642 /* 3643 * Loop over the languages, there can be several files for "lang". 3644 */ 3645 for (slang = first_lang; slang != NULL; slang = slang->sl_next) 3646 if (filename ? fullpathcmp(lang, slang->sl_fname, FALSE) == FPC_SAME 3647 : STRICMP(lang, slang->sl_name) == 0) 3648 { 3649 region_mask = REGION_ALL; 3650 if (!filename && region != NULL) 3651 { 3652 /* find region in sl_regions */ 3653 c = find_region(slang->sl_regions, region); 3654 if (c == REGION_ALL) 3655 { 3656 if (slang->sl_add) 3657 { 3658 if (*slang->sl_regions != NUL) 3659 /* This addition file is for other regions. */ 3660 region_mask = 0; 3661 } 3662 else 3663 /* This is probably an error. Give a warning and 3664 * accept the words anyway. */ 3665 smsg((char_u *) 3666 _("Warning: region %s not supported"), 3667 region); 3668 } 3669 else 3670 region_mask = 1 << c; 3671 } 3672 3673 if (region_mask != 0) 3674 { 3675 if (ga_grow(&ga, 1) == FAIL) 3676 { 3677 ga_clear(&ga); 3678 return e_outofmem; 3679 } 3680 LANGP_ENTRY(ga, ga.ga_len)->lp_slang = slang; 3681 LANGP_ENTRY(ga, ga.ga_len)->lp_region = region_mask; 3682 ++ga.ga_len; 3683 use_midword(slang, buf); 3684 if (slang->sl_nobreak) 3685 nobreak = TRUE; 3686 } 3687 } 3688 } 3689 3690 /* round 0: load int_wordlist, if possible. 3691 * round 1: load first name in 'spellfile'. 3692 * round 2: load second name in 'spellfile. 3693 * etc. */ 3694 spf = curbuf->b_p_spf; 3695 for (round = 0; round == 0 || *spf != NUL; ++round) 3696 { 3697 if (round == 0) 3698 { 3699 /* Internal wordlist, if there is one. */ 3700 if (int_wordlist == NULL) 3701 continue; 3702 int_wordlist_spl(spf_name); 3703 } 3704 else 3705 { 3706 /* One entry in 'spellfile'. */ 3707 copy_option_part(&spf, spf_name, MAXPATHL - 5, ","); 3708 STRCAT(spf_name, ".spl"); 3709 3710 /* If it was already found above then skip it. */ 3711 for (c = 0; c < ga.ga_len; ++c) 3712 { 3713 p = LANGP_ENTRY(ga, c)->lp_slang->sl_fname; 3714 if (p != NULL && fullpathcmp(spf_name, p, FALSE) == FPC_SAME) 3715 break; 3716 } 3717 if (c < ga.ga_len) 3718 continue; 3719 } 3720 3721 /* Check if it was loaded already. */ 3722 for (slang = first_lang; slang != NULL; slang = slang->sl_next) 3723 if (fullpathcmp(spf_name, slang->sl_fname, FALSE) == FPC_SAME) 3724 break; 3725 if (slang == NULL) 3726 { 3727 /* Not loaded, try loading it now. The language name includes the 3728 * region name, the region is ignored otherwise. for int_wordlist 3729 * use an arbitrary name. */ 3730 if (round == 0) 3731 STRCPY(lang, "internal wordlist"); 3732 else 3733 { 3734 vim_strncpy(lang, gettail(spf_name), MAXWLEN); 3735 p = vim_strchr(lang, '.'); 3736 if (p != NULL) 3737 *p = NUL; /* truncate at ".encoding.add" */ 3738 } 3739 slang = spell_load_file(spf_name, lang, NULL, TRUE); 3740 3741 /* If one of the languages has NOBREAK we assume the addition 3742 * files also have this. */ 3743 if (slang != NULL && nobreak) 3744 slang->sl_nobreak = TRUE; 3745 } 3746 if (slang != NULL && ga_grow(&ga, 1) == OK) 3747 { 3748 region_mask = REGION_ALL; 3749 if (use_region != NULL && !dont_use_region) 3750 { 3751 /* find region in sl_regions */ 3752 c = find_region(slang->sl_regions, use_region); 3753 if (c != REGION_ALL) 3754 region_mask = 1 << c; 3755 else if (*slang->sl_regions != NUL) 3756 /* This spell file is for other regions. */ 3757 region_mask = 0; 3758 } 3759 3760 if (region_mask != 0) 3761 { 3762 LANGP_ENTRY(ga, ga.ga_len)->lp_slang = slang; 3763 LANGP_ENTRY(ga, ga.ga_len)->lp_sallang = NULL; 3764 LANGP_ENTRY(ga, ga.ga_len)->lp_replang = NULL; 3765 LANGP_ENTRY(ga, ga.ga_len)->lp_region = region_mask; 3766 ++ga.ga_len; 3767 use_midword(slang, buf); 3768 } 3769 } 3770 } 3771 3772 /* Everything is fine, store the new b_langp value. */ 3773 ga_clear(&buf->b_langp); 3774 buf->b_langp = ga; 3775 3776 /* For each language figure out what language to use for sound folding and 3777 * REP items. If the language doesn't support it itself use another one 3778 * with the same name. E.g. for "en-math" use "en". */ 3779 for (i = 0; i < ga.ga_len; ++i) 3780 { 3781 lp = LANGP_ENTRY(ga, i); 3782 3783 /* sound folding */ 3784 if (lp->lp_slang->sl_sal.ga_len > 0) 3785 /* language does sound folding itself */ 3786 lp->lp_sallang = lp->lp_slang; 3787 else 3788 /* find first similar language that does sound folding */ 3789 for (j = 0; j < ga.ga_len; ++j) 3790 { 3791 lp2 = LANGP_ENTRY(ga, j); 3792 if (lp2->lp_slang->sl_sal.ga_len > 0 3793 && STRNCMP(lp->lp_slang->sl_name, 3794 lp2->lp_slang->sl_name, 2) == 0) 3795 { 3796 lp->lp_sallang = lp2->lp_slang; 3797 break; 3798 } 3799 } 3800 3801 /* REP items */ 3802 if (lp->lp_slang->sl_rep.ga_len > 0) 3803 /* language has REP items itself */ 3804 lp->lp_replang = lp->lp_slang; 3805 else 3806 /* find first similar language that does sound folding */ 3807 for (j = 0; j < ga.ga_len; ++j) 3808 { 3809 lp2 = LANGP_ENTRY(ga, j); 3810 if (lp2->lp_slang->sl_rep.ga_len > 0 3811 && STRNCMP(lp->lp_slang->sl_name, 3812 lp2->lp_slang->sl_name, 2) == 0) 3813 { 3814 lp->lp_replang = lp2->lp_slang; 3815 break; 3816 } 3817 } 3818 } 3819 3820 return NULL; 3821 } 3822 3823 /* 3824 * Clear the midword characters for buffer "buf". 3825 */ 3826 static void 3827 clear_midword(buf) 3828 buf_T *buf; 3829 { 3830 vim_memset(buf->b_spell_ismw, 0, 256); 3831 #ifdef FEAT_MBYTE 3832 vim_free(buf->b_spell_ismw_mb); 3833 buf->b_spell_ismw_mb = NULL; 3834 #endif 3835 } 3836 3837 /* 3838 * Use the "sl_midword" field of language "lp" for buffer "buf". 3839 * They add up to any currently used midword characters. 3840 */ 3841 static void 3842 use_midword(lp, buf) 3843 slang_T *lp; 3844 buf_T *buf; 3845 { 3846 char_u *p; 3847 3848 if (lp->sl_midword == NULL) /* there aren't any */ 3849 return; 3850 3851 for (p = lp->sl_midword; *p != NUL; ) 3852 #ifdef FEAT_MBYTE 3853 if (has_mbyte) 3854 { 3855 int c, l, n; 3856 char_u *bp; 3857 3858 c = mb_ptr2char(p); 3859 l = (*mb_ptr2len)(p); 3860 if (c < 256 && l <= 2) 3861 buf->b_spell_ismw[c] = TRUE; 3862 else if (buf->b_spell_ismw_mb == NULL) 3863 /* First multi-byte char in "b_spell_ismw_mb". */ 3864 buf->b_spell_ismw_mb = vim_strnsave(p, l); 3865 else 3866 { 3867 /* Append multi-byte chars to "b_spell_ismw_mb". */ 3868 n = STRLEN(buf->b_spell_ismw_mb); 3869 bp = vim_strnsave(buf->b_spell_ismw_mb, n + l); 3870 if (bp != NULL) 3871 { 3872 vim_free(buf->b_spell_ismw_mb); 3873 buf->b_spell_ismw_mb = bp; 3874 vim_strncpy(bp + n, p, l); 3875 } 3876 } 3877 p += l; 3878 } 3879 else 3880 #endif 3881 buf->b_spell_ismw[*p++] = TRUE; 3882 } 3883 3884 /* 3885 * Find the region "region[2]" in "rp" (points to "sl_regions"). 3886 * Each region is simply stored as the two characters of it's name. 3887 * Returns the index if found (first is 0), REGION_ALL if not found. 3888 */ 3889 static int 3890 find_region(rp, region) 3891 char_u *rp; 3892 char_u *region; 3893 { 3894 int i; 3895 3896 for (i = 0; ; i += 2) 3897 { 3898 if (rp[i] == NUL) 3899 return REGION_ALL; 3900 if (rp[i] == region[0] && rp[i + 1] == region[1]) 3901 break; 3902 } 3903 return i / 2; 3904 } 3905 3906 /* 3907 * Return case type of word: 3908 * w word 0 3909 * Word WF_ONECAP 3910 * W WORD WF_ALLCAP 3911 * WoRd wOrd WF_KEEPCAP 3912 */ 3913 static int 3914 captype(word, end) 3915 char_u *word; 3916 char_u *end; /* When NULL use up to NUL byte. */ 3917 { 3918 char_u *p; 3919 int c; 3920 int firstcap; 3921 int allcap; 3922 int past_second = FALSE; /* past second word char */ 3923 3924 /* find first letter */ 3925 for (p = word; !spell_iswordp_nmw(p); mb_ptr_adv(p)) 3926 if (end == NULL ? *p == NUL : p >= end) 3927 return 0; /* only non-word characters, illegal word */ 3928 #ifdef FEAT_MBYTE 3929 if (has_mbyte) 3930 c = mb_ptr2char_adv(&p); 3931 else 3932 #endif 3933 c = *p++; 3934 firstcap = allcap = SPELL_ISUPPER(c); 3935 3936 /* 3937 * Need to check all letters to find a word with mixed upper/lower. 3938 * But a word with an upper char only at start is a ONECAP. 3939 */ 3940 for ( ; end == NULL ? *p != NUL : p < end; mb_ptr_adv(p)) 3941 if (spell_iswordp_nmw(p)) 3942 { 3943 c = PTR2CHAR(p); 3944 if (!SPELL_ISUPPER(c)) 3945 { 3946 /* UUl -> KEEPCAP */ 3947 if (past_second && allcap) 3948 return WF_KEEPCAP; 3949 allcap = FALSE; 3950 } 3951 else if (!allcap) 3952 /* UlU -> KEEPCAP */ 3953 return WF_KEEPCAP; 3954 past_second = TRUE; 3955 } 3956 3957 if (allcap) 3958 return WF_ALLCAP; 3959 if (firstcap) 3960 return WF_ONECAP; 3961 return 0; 3962 } 3963 3964 /* 3965 * Like captype() but for a KEEPCAP word add ONECAP if the word starts with a 3966 * capital. So that make_case_word() can turn WOrd into Word. 3967 * Add ALLCAP for "WOrD". 3968 */ 3969 static int 3970 badword_captype(word, end) 3971 char_u *word; 3972 char_u *end; 3973 { 3974 int flags = captype(word, end); 3975 int c; 3976 int l, u; 3977 int first; 3978 char_u *p; 3979 3980 if (flags & WF_KEEPCAP) 3981 { 3982 /* Count the number of UPPER and lower case letters. */ 3983 l = u = 0; 3984 first = FALSE; 3985 for (p = word; p < end; mb_ptr_adv(p)) 3986 { 3987 c = PTR2CHAR(p); 3988 if (SPELL_ISUPPER(c)) 3989 { 3990 ++u; 3991 if (p == word) 3992 first = TRUE; 3993 } 3994 else 3995 ++l; 3996 } 3997 3998 /* If there are more UPPER than lower case letters suggest an 3999 * ALLCAP word. Otherwise, if the first letter is UPPER then 4000 * suggest ONECAP. Exception: "ALl" most likely should be "All", 4001 * require three upper case letters. */ 4002 if (u > l && u > 2) 4003 flags |= WF_ALLCAP; 4004 else if (first) 4005 flags |= WF_ONECAP; 4006 } 4007 return flags; 4008 } 4009 4010 # if defined(FEAT_MBYTE) || defined(EXITFREE) || defined(PROTO) 4011 /* 4012 * Free all languages. 4013 */ 4014 void 4015 spell_free_all() 4016 { 4017 slang_T *slang; 4018 buf_T *buf; 4019 char_u fname[MAXPATHL]; 4020 4021 /* Go through all buffers and handle 'spelllang'. */ 4022 for (buf = firstbuf; buf != NULL; buf = buf->b_next) 4023 ga_clear(&buf->b_langp); 4024 4025 while (first_lang != NULL) 4026 { 4027 slang = first_lang; 4028 first_lang = slang->sl_next; 4029 slang_free(slang); 4030 } 4031 4032 if (int_wordlist != NULL) 4033 { 4034 /* Delete the internal wordlist and its .spl file */ 4035 mch_remove(int_wordlist); 4036 int_wordlist_spl(fname); 4037 mch_remove(fname); 4038 vim_free(int_wordlist); 4039 int_wordlist = NULL; 4040 } 4041 4042 init_spell_chartab(); 4043 } 4044 # endif 4045 4046 # if defined(FEAT_MBYTE) || defined(PROTO) 4047 /* 4048 * Clear all spelling tables and reload them. 4049 * Used after 'encoding' is set and when ":mkspell" was used. 4050 */ 4051 void 4052 spell_reload() 4053 { 4054 buf_T *buf; 4055 win_T *wp; 4056 4057 /* Initialize the table for spell_iswordp(). */ 4058 init_spell_chartab(); 4059 4060 /* Unload all allocated memory. */ 4061 spell_free_all(); 4062 4063 /* Go through all buffers and handle 'spelllang'. */ 4064 for (buf = firstbuf; buf != NULL; buf = buf->b_next) 4065 { 4066 /* Only load the wordlists when 'spelllang' is set and there is a 4067 * window for this buffer in which 'spell' is set. */ 4068 if (*buf->b_p_spl != NUL) 4069 { 4070 FOR_ALL_WINDOWS(wp) 4071 if (wp->w_buffer == buf && wp->w_p_spell) 4072 { 4073 (void)did_set_spelllang(buf); 4074 # ifdef FEAT_WINDOWS 4075 break; 4076 # endif 4077 } 4078 } 4079 } 4080 } 4081 # endif 4082 4083 /* 4084 * Reload the spell file "fname" if it's loaded. 4085 */ 4086 static void 4087 spell_reload_one(fname, added_word) 4088 char_u *fname; 4089 int added_word; /* invoked through "zg" */ 4090 { 4091 slang_T *slang; 4092 int didit = FALSE; 4093 4094 for (slang = first_lang; slang != NULL; slang = slang->sl_next) 4095 { 4096 if (fullpathcmp(fname, slang->sl_fname, FALSE) == FPC_SAME) 4097 { 4098 slang_clear(slang); 4099 if (spell_load_file(fname, NULL, slang, FALSE) == NULL) 4100 /* reloading failed, clear the language */ 4101 slang_clear(slang); 4102 redraw_all_later(NOT_VALID); 4103 didit = TRUE; 4104 } 4105 } 4106 4107 /* When "zg" was used and the file wasn't loaded yet, should redo 4108 * 'spelllang' to get it loaded. */ 4109 if (added_word && !didit) 4110 did_set_spelllang(curbuf); 4111 } 4112 4113 4114 /* 4115 * Functions for ":mkspell". 4116 */ 4117 4118 #define MAXLINELEN 500 /* Maximum length in bytes of a line in a .aff 4119 and .dic file. */ 4120 /* 4121 * Main structure to store the contents of a ".aff" file. 4122 */ 4123 typedef struct afffile_S 4124 { 4125 char_u *af_enc; /* "SET", normalized, alloc'ed string or NULL */ 4126 int af_flagtype; /* AFT_CHAR, AFT_LONG, AFT_NUM or AFT_CAPLONG */ 4127 int af_slash; /* character used in word for slash */ 4128 unsigned af_rar; /* RAR ID for rare word */ 4129 unsigned af_kep; /* KEP ID for keep-case word */ 4130 unsigned af_bad; /* BAD ID for banned word */ 4131 unsigned af_needaffix; /* NEEDAFFIX ID */ 4132 unsigned af_needcomp; /* NEEDCOMPOUND ID */ 4133 int af_pfxpostpone; /* postpone prefixes without chop string */ 4134 hashtab_T af_pref; /* hashtable for prefixes, affheader_T */ 4135 hashtab_T af_suff; /* hashtable for suffixes, affheader_T */ 4136 hashtab_T af_comp; /* hashtable for compound flags, compitem_T */ 4137 } afffile_T; 4138 4139 #define AFT_CHAR 0 /* flags are one character */ 4140 #define AFT_LONG 1 /* flags are two characters */ 4141 #define AFT_CAPLONG 2 /* flags are one or two characters */ 4142 #define AFT_NUM 3 /* flags are numbers, comma separated */ 4143 4144 typedef struct affentry_S affentry_T; 4145 /* Affix entry from ".aff" file. Used for prefixes and suffixes. */ 4146 struct affentry_S 4147 { 4148 affentry_T *ae_next; /* next affix with same name/number */ 4149 char_u *ae_chop; /* text to chop off basic word (can be NULL) */ 4150 char_u *ae_add; /* text to add to basic word (can be NULL) */ 4151 char_u *ae_cond; /* condition (NULL for ".") */ 4152 regprog_T *ae_prog; /* regexp program for ae_cond or NULL */ 4153 char_u ae_rare; /* rare affix */ 4154 char_u ae_nocomp; /* word with affix not compoundable */ 4155 }; 4156 4157 #ifdef FEAT_MBYTE 4158 # define AH_KEY_LEN 17 /* 2 x 8 bytes + NUL */ 4159 #else 4160 # define AH_KEY_LEN 7 /* 6 digits + NUL */ 4161 #endif 4162 4163 /* Affix header from ".aff" file. Used for af_pref and af_suff. */ 4164 typedef struct affheader_S 4165 { 4166 char_u ah_key[AH_KEY_LEN]; /* key for hashtab == name of affix */ 4167 unsigned ah_flag; /* affix name as number, uses "af_flagtype" */ 4168 int ah_newID; /* prefix ID after renumbering; 0 if not used */ 4169 int ah_combine; /* suffix may combine with prefix */ 4170 int ah_follows; /* another affix block should be following */ 4171 affentry_T *ah_first; /* first affix entry */ 4172 } affheader_T; 4173 4174 #define HI2AH(hi) ((affheader_T *)(hi)->hi_key) 4175 4176 /* Flag used in compound items. */ 4177 typedef struct compitem_S 4178 { 4179 char_u ci_key[AH_KEY_LEN]; /* key for hashtab == name of compound */ 4180 unsigned ci_flag; /* affix name as number, uses "af_flagtype" */ 4181 int ci_newID; /* affix ID after renumbering. */ 4182 } compitem_T; 4183 4184 #define HI2CI(hi) ((compitem_T *)(hi)->hi_key) 4185 4186 /* 4187 * Structure that is used to store the items in the word tree. This avoids 4188 * the need to keep track of each allocated thing, everything is freed all at 4189 * once after ":mkspell" is done. 4190 */ 4191 #define SBLOCKSIZE 16000 /* size of sb_data */ 4192 typedef struct sblock_S sblock_T; 4193 struct sblock_S 4194 { 4195 sblock_T *sb_next; /* next block in list */ 4196 int sb_used; /* nr of bytes already in use */ 4197 char_u sb_data[1]; /* data, actually longer */ 4198 }; 4199 4200 /* 4201 * A node in the tree. 4202 */ 4203 typedef struct wordnode_S wordnode_T; 4204 struct wordnode_S 4205 { 4206 union /* shared to save space */ 4207 { 4208 char_u hashkey[6]; /* the hash key, only used while compressing */ 4209 int index; /* index in written nodes (valid after first 4210 round) */ 4211 } wn_u1; 4212 union /* shared to save space */ 4213 { 4214 wordnode_T *next; /* next node with same hash key */ 4215 wordnode_T *wnode; /* parent node that will write this node */ 4216 } wn_u2; 4217 wordnode_T *wn_child; /* child (next byte in word) */ 4218 wordnode_T *wn_sibling; /* next sibling (alternate byte in word, 4219 always sorted) */ 4220 int wn_refs; /* Nr. of references to this node. Only 4221 relevant for first node in a list of 4222 siblings, in following siblings it is 4223 always one. */ 4224 char_u wn_byte; /* Byte for this node. NUL for word end */ 4225 char_u wn_affixID; /* when "wn_byte" is NUL: supported/required 4226 prefix ID or 0 */ 4227 short_u wn_flags; /* when "wn_byte" is NUL: WF_ flags */ 4228 short wn_region; /* when "wn_byte" is NUL: region mask; for 4229 PREFIXTREE it's the prefcondnr */ 4230 #ifdef SPELL_PRINTTREE 4231 int wn_nr; /* sequence nr for printing */ 4232 #endif 4233 }; 4234 4235 #define WN_MASK 0xffff /* mask relevant bits of "wn_flags" */ 4236 4237 #define HI2WN(hi) (wordnode_T *)((hi)->hi_key) 4238 4239 /* 4240 * Info used while reading the spell files. 4241 */ 4242 typedef struct spellinfo_S 4243 { 4244 wordnode_T *si_foldroot; /* tree with case-folded words */ 4245 long si_foldwcount; /* nr of words in si_foldroot */ 4246 4247 wordnode_T *si_keeproot; /* tree with keep-case words */ 4248 long si_keepwcount; /* nr of words in si_keeproot */ 4249 4250 wordnode_T *si_prefroot; /* tree with postponed prefixes */ 4251 4252 sblock_T *si_blocks; /* memory blocks used */ 4253 long si_blocks_cnt; /* memory blocks allocated */ 4254 long si_compress_cnt; /* words to add before lowering 4255 compression limit */ 4256 wordnode_T *si_first_free; /* List of nodes that have been freed during 4257 compression, linked by "wn_child" field. */ 4258 long si_free_count; /* number of nodes in si_first_free */ 4259 #ifdef SPELL_PRINTTREE 4260 int si_wordnode_nr; /* sequence nr for nodes */ 4261 #endif 4262 4263 4264 int si_ascii; /* handling only ASCII words */ 4265 int si_add; /* addition file */ 4266 int si_clear_chartab; /* when TRUE clear char tables */ 4267 int si_region; /* region mask */ 4268 vimconv_T si_conv; /* for conversion to 'encoding' */ 4269 int si_memtot; /* runtime memory used */ 4270 int si_verbose; /* verbose messages */ 4271 int si_msg_count; /* number of words added since last message */ 4272 int si_region_count; /* number of regions supported (1 when there 4273 are no regions) */ 4274 char_u si_region_name[16]; /* region names; used only if 4275 * si_region_count > 1) */ 4276 4277 garray_T si_rep; /* list of fromto_T entries from REP lines */ 4278 garray_T si_sal; /* list of fromto_T entries from SAL lines */ 4279 char_u *si_sofofr; /* SOFOFROM text */ 4280 char_u *si_sofoto; /* SOFOTO text */ 4281 int si_followup; /* soundsalike: ? */ 4282 int si_collapse; /* soundsalike: ? */ 4283 int si_rem_accents; /* soundsalike: remove accents */ 4284 garray_T si_map; /* MAP info concatenated */ 4285 char_u *si_midword; /* MIDWORD chars or NULL */ 4286 int si_compmax; /* max nr of words for compounding */ 4287 int si_compminlen; /* minimal length for compounding */ 4288 int si_compsylmax; /* max nr of syllables for compounding */ 4289 char_u *si_compflags; /* flags used for compounding */ 4290 char_u si_nobreak; /* NOBREAK */ 4291 char_u *si_syllable; /* syllable string */ 4292 garray_T si_prefcond; /* table with conditions for postponed 4293 * prefixes, each stored as a string */ 4294 int si_newprefID; /* current value for ah_newID */ 4295 int si_newcompID; /* current value for compound ID */ 4296 } spellinfo_T; 4297 4298 static afffile_T *spell_read_aff __ARGS((spellinfo_T *spin, char_u *fname)); 4299 static unsigned affitem2flag __ARGS((int flagtype, char_u *item, char_u *fname, int lnum)); 4300 static unsigned get_affitem __ARGS((int flagtype, char_u **pp)); 4301 static void process_compflags __ARGS((spellinfo_T *spin, afffile_T *aff, char_u *compflags)); 4302 static void check_renumber __ARGS((spellinfo_T *spin)); 4303 static int flag_in_afflist __ARGS((int flagtype, char_u *afflist, unsigned flag)); 4304 static void aff_check_number __ARGS((int spinval, int affval, char *name)); 4305 static void aff_check_string __ARGS((char_u *spinval, char_u *affval, char *name)); 4306 static int str_equal __ARGS((char_u *s1, char_u *s2)); 4307 static void add_fromto __ARGS((spellinfo_T *spin, garray_T *gap, char_u *from, char_u *to)); 4308 static int sal_to_bool __ARGS((char_u *s)); 4309 static int has_non_ascii __ARGS((char_u *s)); 4310 static void spell_free_aff __ARGS((afffile_T *aff)); 4311 static int spell_read_dic __ARGS((spellinfo_T *spin, char_u *fname, afffile_T *affile)); 4312 static int get_pfxlist __ARGS((afffile_T *affile, char_u *afflist, char_u *store_afflist)); 4313 static void get_compflags __ARGS((afffile_T *affile, char_u *afflist, char_u *store_afflist)); 4314 static int store_aff_word __ARGS((spellinfo_T *spin, char_u *word, char_u *afflist, afffile_T *affile, hashtab_T *ht, hashtab_T *xht, int comb, int flags, char_u *pfxlist, int pfxlen)); 4315 static int spell_read_wordfile __ARGS((spellinfo_T *spin, char_u *fname)); 4316 static void *getroom __ARGS((spellinfo_T *spin, size_t len, int align)); 4317 static char_u *getroom_save __ARGS((spellinfo_T *spin, char_u *s)); 4318 static void free_blocks __ARGS((sblock_T *bl)); 4319 static wordnode_T *wordtree_alloc __ARGS((spellinfo_T *spin)); 4320 static int store_word __ARGS((spellinfo_T *spin, char_u *word, int flags, int region, char_u *pfxlist, int need_affix)); 4321 static int tree_add_word __ARGS((spellinfo_T *spin, char_u *word, wordnode_T *tree, int flags, int region, int affixID)); 4322 static wordnode_T *get_wordnode __ARGS((spellinfo_T *spin)); 4323 static void deref_wordnode __ARGS((spellinfo_T *spin, wordnode_T *node)); 4324 static void free_wordnode __ARGS((spellinfo_T *spin, wordnode_T *n)); 4325 static void wordtree_compress __ARGS((spellinfo_T *spin, wordnode_T *root)); 4326 static int node_compress __ARGS((spellinfo_T *spin, wordnode_T *node, hashtab_T *ht, int *tot)); 4327 static int node_equal __ARGS((wordnode_T *n1, wordnode_T *n2)); 4328 static int write_vim_spell __ARGS((spellinfo_T *spin, char_u *fname)); 4329 static void clear_node __ARGS((wordnode_T *node)); 4330 static int put_node __ARGS((FILE *fd, wordnode_T *node, int index, int regionmask, int prefixtree)); 4331 static void mkspell __ARGS((int fcount, char_u **fnames, int ascii, int overwrite, int added_word)); 4332 static void init_spellfile __ARGS((void)); 4333 4334 /* In the postponed prefixes tree wn_flags is used to store the WFP_ flags, 4335 * but it must be negative to indicate the prefix tree to tree_add_word(). 4336 * Use a negative number with the lower 8 bits zero. */ 4337 #define PFX_FLAGS -256 4338 4339 /* 4340 * Tunable parameters for when the tree is compressed. See 'mkspellmem'. 4341 */ 4342 static long compress_start = 30000; /* memory / SBLOCKSIZE */ 4343 static long compress_inc = 100; /* memory / SBLOCKSIZE */ 4344 static long compress_added = 500000; /* word count */ 4345 4346 #ifdef SPELL_PRINTTREE 4347 /* 4348 * For debugging the tree code: print the current tree in a (more or less) 4349 * readable format, so that we can see what happens when adding a word and/or 4350 * compressing the tree. 4351 * Based on code from Olaf Seibert. 4352 */ 4353 #define PRINTLINESIZE 1000 4354 #define PRINTWIDTH 6 4355 4356 #define PRINTSOME(l, depth, fmt, a1, a2) vim_snprintf(l + depth * PRINTWIDTH, \ 4357 PRINTLINESIZE - PRINTWIDTH * depth, fmt, a1, a2) 4358 4359 static char line1[PRINTLINESIZE]; 4360 static char line2[PRINTLINESIZE]; 4361 static char line3[PRINTLINESIZE]; 4362 4363 static void 4364 spell_clear_flags(wordnode_T *node) 4365 { 4366 wordnode_T *np; 4367 4368 for (np = node; np != NULL; np = np->wn_sibling) 4369 { 4370 np->wn_u1.index = FALSE; 4371 spell_clear_flags(np->wn_child); 4372 } 4373 } 4374 4375 static void 4376 spell_print_node(wordnode_T *node, int depth) 4377 { 4378 if (node->wn_u1.index) 4379 { 4380 /* Done this node before, print the reference. */ 4381 PRINTSOME(line1, depth, "(%d)", node->wn_nr, 0); 4382 PRINTSOME(line2, depth, " ", 0, 0); 4383 PRINTSOME(line3, depth, " ", 0, 0); 4384 msg(line1); 4385 msg(line2); 4386 msg(line3); 4387 } 4388 else 4389 { 4390 node->wn_u1.index = TRUE; 4391 4392 if (node->wn_byte != NUL) 4393 { 4394 if (node->wn_child != NULL) 4395 PRINTSOME(line1, depth, " %c -> ", node->wn_byte, 0); 4396 else 4397 /* Cannot happen? */ 4398 PRINTSOME(line1, depth, " %c ???", node->wn_byte, 0); 4399 } 4400 else 4401 PRINTSOME(line1, depth, " $ ", 0, 0); 4402 4403 PRINTSOME(line2, depth, "%d/%d ", node->wn_nr, node->wn_refs); 4404 4405 if (node->wn_sibling != NULL) 4406 PRINTSOME(line3, depth, " | ", 0, 0); 4407 else 4408 PRINTSOME(line3, depth, " ", 0, 0); 4409 4410 if (node->wn_byte == NUL) 4411 { 4412 msg(line1); 4413 msg(line2); 4414 msg(line3); 4415 } 4416 4417 /* do the children */ 4418 if (node->wn_byte != NUL && node->wn_child != NULL) 4419 spell_print_node(node->wn_child, depth + 1); 4420 4421 /* do the siblings */ 4422 if (node->wn_sibling != NULL) 4423 { 4424 /* get rid of all parent details except | */ 4425 STRCPY(line1, line3); 4426 STRCPY(line2, line3); 4427 spell_print_node(node->wn_sibling, depth); 4428 } 4429 } 4430 } 4431 4432 static void 4433 spell_print_tree(wordnode_T *root) 4434 { 4435 if (root != NULL) 4436 { 4437 /* Clear the "wn_u1.index" fields, used to remember what has been 4438 * done. */ 4439 spell_clear_flags(root); 4440 4441 /* Recursively print the tree. */ 4442 spell_print_node(root, 0); 4443 } 4444 } 4445 #endif /* SPELL_PRINTTREE */ 4446 4447 /* 4448 * Read the affix file "fname". 4449 * Returns an afffile_T, NULL for complete failure. 4450 */ 4451 static afffile_T * 4452 spell_read_aff(spin, fname) 4453 spellinfo_T *spin; 4454 char_u *fname; 4455 { 4456 FILE *fd; 4457 afffile_T *aff; 4458 char_u rline[MAXLINELEN]; 4459 char_u *line; 4460 char_u *pc = NULL; 4461 #define MAXITEMCNT 7 4462 char_u *(items[MAXITEMCNT]); 4463 int itemcnt; 4464 char_u *p; 4465 int lnum = 0; 4466 affheader_T *cur_aff = NULL; 4467 int did_postpone_prefix = FALSE; 4468 int aff_todo = 0; 4469 hashtab_T *tp; 4470 char_u *low = NULL; 4471 char_u *fol = NULL; 4472 char_u *upp = NULL; 4473 int do_rep; 4474 int do_sal; 4475 int do_map; 4476 int found_map = FALSE; 4477 hashitem_T *hi; 4478 int l; 4479 int compminlen = 0; /* COMPOUNDMIN value */ 4480 int compsylmax = 0; /* COMPOUNDSYLMAX value */ 4481 int compmax = 0; /* COMPOUNDMAX value */ 4482 char_u *compflags = NULL; /* COMPOUNDFLAG and COMPOUNDFLAGS 4483 concatenated */ 4484 char_u *midword = NULL; /* MIDWORD value */ 4485 char_u *syllable = NULL; /* SYLLABLE value */ 4486 char_u *sofofrom = NULL; /* SOFOFROM value */ 4487 char_u *sofoto = NULL; /* SOFOTO value */ 4488 4489 /* 4490 * Open the file. 4491 */ 4492 fd = mch_fopen((char *)fname, "r"); 4493 if (fd == NULL) 4494 { 4495 EMSG2(_(e_notopen), fname); 4496 return NULL; 4497 } 4498 4499 if (spin->si_verbose || p_verbose > 2) 4500 { 4501 if (!spin->si_verbose) 4502 verbose_enter(); 4503 smsg((char_u *)_("Reading affix file %s ..."), fname); 4504 out_flush(); 4505 if (!spin->si_verbose) 4506 verbose_leave(); 4507 } 4508 4509 /* Only do REP lines when not done in another .aff file already. */ 4510 do_rep = spin->si_rep.ga_len == 0; 4511 4512 /* Only do SAL lines when not done in another .aff file already. */ 4513 do_sal = spin->si_sal.ga_len == 0; 4514 4515 /* Only do MAP lines when not done in another .aff file already. */ 4516 do_map = spin->si_map.ga_len == 0; 4517 4518 /* 4519 * Allocate and init the afffile_T structure. 4520 */ 4521 aff = (afffile_T *)getroom(spin, sizeof(afffile_T), TRUE); 4522 if (aff == NULL) 4523 return NULL; 4524 hash_init(&aff->af_pref); 4525 hash_init(&aff->af_suff); 4526 hash_init(&aff->af_comp); 4527 4528 /* 4529 * Read all the lines in the file one by one. 4530 */ 4531 while (!vim_fgets(rline, MAXLINELEN, fd) && !got_int) 4532 { 4533 line_breakcheck(); 4534 ++lnum; 4535 4536 /* Skip comment lines. */ 4537 if (*rline == '#') 4538 continue; 4539 4540 /* Convert from "SET" to 'encoding' when needed. */ 4541 vim_free(pc); 4542 #ifdef FEAT_MBYTE 4543 if (spin->si_conv.vc_type != CONV_NONE) 4544 { 4545 pc = string_convert(&spin->si_conv, rline, NULL); 4546 if (pc == NULL) 4547 { 4548 smsg((char_u *)_("Conversion failure for word in %s line %d: %s"), 4549 fname, lnum, rline); 4550 continue; 4551 } 4552 line = pc; 4553 } 4554 else 4555 #endif 4556 { 4557 pc = NULL; 4558 line = rline; 4559 } 4560 4561 /* Split the line up in white separated items. Put a NUL after each 4562 * item. */ 4563 itemcnt = 0; 4564 for (p = line; ; ) 4565 { 4566 while (*p != NUL && *p <= ' ') /* skip white space and CR/NL */ 4567 ++p; 4568 if (*p == NUL) 4569 break; 4570 if (itemcnt == MAXITEMCNT) /* too many items */ 4571 break; 4572 items[itemcnt++] = p; 4573 while (*p > ' ') /* skip until white space or CR/NL */ 4574 ++p; 4575 if (*p == NUL) 4576 break; 4577 *p++ = NUL; 4578 } 4579 4580 /* Handle non-empty lines. */ 4581 if (itemcnt > 0) 4582 { 4583 if (STRCMP(items[0], "SET") == 0 && itemcnt == 2 4584 && aff->af_enc == NULL) 4585 { 4586 #ifdef FEAT_MBYTE 4587 /* Setup for conversion from "ENC" to 'encoding'. */ 4588 aff->af_enc = enc_canonize(items[1]); 4589 if (aff->af_enc != NULL && !spin->si_ascii 4590 && convert_setup(&spin->si_conv, aff->af_enc, 4591 p_enc) == FAIL) 4592 smsg((char_u *)_("Conversion in %s not supported: from %s to %s"), 4593 fname, aff->af_enc, p_enc); 4594 spin->si_conv.vc_fail = TRUE; 4595 #else 4596 smsg((char_u *)_("Conversion in %s not supported"), fname); 4597 #endif 4598 } 4599 else if (STRCMP(items[0], "FLAG") == 0 && itemcnt == 2 4600 && aff->af_flagtype == AFT_CHAR) 4601 { 4602 if (STRCMP(items[1], "long") == 0) 4603 aff->af_flagtype = AFT_LONG; 4604 else if (STRCMP(items[1], "num") == 0) 4605 aff->af_flagtype = AFT_NUM; 4606 else if (STRCMP(items[1], "caplong") == 0) 4607 aff->af_flagtype = AFT_CAPLONG; 4608 else 4609 smsg((char_u *)_("Invalid value for FLAG in %s line %d: %s"), 4610 fname, lnum, items[1]); 4611 if (aff->af_rar != 0 || aff->af_kep != 0 || aff->af_bad != 0 4612 || aff->af_needaffix != 0 4613 || aff->af_needcomp != 0 4614 || compflags != NULL 4615 || aff->af_suff.ht_used > 0 4616 || aff->af_pref.ht_used > 0) 4617 smsg((char_u *)_("FLAG after using flags in %s line %d: %s"), 4618 fname, lnum, items[1]); 4619 } 4620 else if (STRCMP(items[0], "MIDWORD") == 0 && itemcnt == 2 4621 && midword == NULL) 4622 { 4623 midword = getroom_save(spin, items[1]); 4624 } 4625 else if (STRCMP(items[0], "NOSPLITSUGS") == 0 && itemcnt == 1) 4626 { 4627 /* ignored, we always split */ 4628 } 4629 else if (STRCMP(items[0], "TRY") == 0 && itemcnt == 2) 4630 { 4631 /* ignored, we look in the tree for what chars may appear */ 4632 } 4633 else if (STRCMP(items[0], "SLASH") == 0 && itemcnt == 2 4634 && aff->af_slash == 0) 4635 { 4636 aff->af_slash = items[1][0]; 4637 if (items[1][1] != NUL) 4638 smsg((char_u *)_("Character used for SLASH must be ASCII; in %s line %d: %s"), 4639 fname, lnum, items[1]); 4640 } 4641 else if (STRCMP(items[0], "RAR") == 0 && itemcnt == 2 4642 && aff->af_rar == 0) 4643 { 4644 aff->af_rar = affitem2flag(aff->af_flagtype, items[1], 4645 fname, lnum); 4646 } 4647 else if (STRCMP(items[0], "KEP") == 0 && itemcnt == 2 4648 && aff->af_kep == 0) 4649 { 4650 aff->af_kep = affitem2flag(aff->af_flagtype, items[1], 4651 fname, lnum); 4652 } 4653 else if (STRCMP(items[0], "BAD") == 0 && itemcnt == 2 4654 && aff->af_bad == 0) 4655 { 4656 aff->af_bad = affitem2flag(aff->af_flagtype, items[1], 4657 fname, lnum); 4658 } 4659 else if (STRCMP(items[0], "NEEDAFFIX") == 0 && itemcnt == 2 4660 && aff->af_needaffix == 0) 4661 { 4662 aff->af_needaffix = affitem2flag(aff->af_flagtype, items[1], 4663 fname, lnum); 4664 } 4665 else if (STRCMP(items[0], "NEEDCOMPOUND") == 0 && itemcnt == 2 4666 && aff->af_needcomp == 0) 4667 { 4668 aff->af_needcomp = affitem2flag(aff->af_flagtype, items[1], 4669 fname, lnum); 4670 } 4671 else if (STRCMP(items[0], "COMPOUNDFLAG") == 0 && itemcnt == 2 4672 && compflags == NULL) 4673 { 4674 /* Turn flag "c" into COMPOUNDFLAGS compatible string "c+", 4675 * "Na" into "Na+", "1234" into "1234+". */ 4676 p = getroom(spin, STRLEN(items[1]) + 2, FALSE); 4677 if (p != NULL) 4678 { 4679 STRCPY(p, items[1]); 4680 STRCAT(p, "+"); 4681 compflags = p; 4682 } 4683 } 4684 else if (STRCMP(items[0], "COMPOUNDFLAGS") == 0 && itemcnt == 2) 4685 { 4686 /* Concatenate this string to previously defined ones, using a 4687 * slash to separate them. */ 4688 l = STRLEN(items[1]) + 1; 4689 if (compflags != NULL) 4690 l += STRLEN(compflags) + 1; 4691 p = getroom(spin, l, FALSE); 4692 if (p != NULL) 4693 { 4694 if (compflags != NULL) 4695 { 4696 STRCPY(p, compflags); 4697 STRCAT(p, "/"); 4698 } 4699 STRCAT(p, items[1]); 4700 compflags = p; 4701 } 4702 } 4703 else if (STRCMP(items[0], "COMPOUNDMAX") == 0 && itemcnt == 2 4704 && compmax == 0) 4705 { 4706 compmax = atoi((char *)items[1]); 4707 if (compmax == 0) 4708 smsg((char_u *)_("Wrong COMPOUNDMAX value in %s line %d: %s"), 4709 fname, lnum, items[1]); 4710 } 4711 else if (STRCMP(items[0], "COMPOUNDMIN") == 0 && itemcnt == 2 4712 && compminlen == 0) 4713 { 4714 compminlen = atoi((char *)items[1]); 4715 if (compminlen == 0) 4716 smsg((char_u *)_("Wrong COMPOUNDMIN value in %s line %d: %s"), 4717 fname, lnum, items[1]); 4718 } 4719 else if (STRCMP(items[0], "COMPOUNDSYLMAX") == 0 && itemcnt == 2 4720 && compsylmax == 0) 4721 { 4722 compsylmax = atoi((char *)items[1]); 4723 if (compsylmax == 0) 4724 smsg((char_u *)_("Wrong COMPOUNDSYLMAX value in %s line %d: %s"), 4725 fname, lnum, items[1]); 4726 } 4727 else if (STRCMP(items[0], "SYLLABLE") == 0 && itemcnt == 2 4728 && syllable == NULL) 4729 { 4730 syllable = getroom_save(spin, items[1]); 4731 } 4732 else if (STRCMP(items[0], "NOBREAK") == 0 && itemcnt == 1) 4733 { 4734 spin->si_nobreak = TRUE; 4735 } 4736 else if (STRCMP(items[0], "PFXPOSTPONE") == 0 && itemcnt == 1) 4737 { 4738 aff->af_pfxpostpone = TRUE; 4739 } 4740 else if ((STRCMP(items[0], "PFX") == 0 4741 || STRCMP(items[0], "SFX") == 0) 4742 && aff_todo == 0 4743 && itemcnt >= 4) 4744 { 4745 int lasti = 4; 4746 char_u key[AH_KEY_LEN]; 4747 4748 if (*items[0] == 'P') 4749 tp = &aff->af_pref; 4750 else 4751 tp = &aff->af_suff; 4752 4753 /* Myspell allows the same affix name to be used multiple 4754 * times. The affix files that do this have an undocumented 4755 * "S" flag on all but the last block, thus we check for that 4756 * and store it in ah_follows. */ 4757 vim_strncpy(key, items[1], AH_KEY_LEN - 1); 4758 hi = hash_find(tp, key); 4759 if (!HASHITEM_EMPTY(hi)) 4760 { 4761 cur_aff = HI2AH(hi); 4762 if (cur_aff->ah_combine != (*items[2] == 'Y')) 4763 smsg((char_u *)_("Different combining flag in continued affix block in %s line %d: %s"), 4764 fname, lnum, items[1]); 4765 if (!cur_aff->ah_follows) 4766 smsg((char_u *)_("Duplicate affix in %s line %d: %s"), 4767 fname, lnum, items[1]); 4768 } 4769 else 4770 { 4771 /* New affix letter. */ 4772 cur_aff = (affheader_T *)getroom(spin, 4773 sizeof(affheader_T), TRUE); 4774 if (cur_aff == NULL) 4775 break; 4776 cur_aff->ah_flag = affitem2flag(aff->af_flagtype, items[1], 4777 fname, lnum); 4778 if (cur_aff->ah_flag == 0 || STRLEN(items[1]) >= AH_KEY_LEN) 4779 break; 4780 if (cur_aff->ah_flag == aff->af_bad 4781 || cur_aff->ah_flag == aff->af_rar 4782 || cur_aff->ah_flag == aff->af_kep 4783 || cur_aff->ah_flag == aff->af_needaffix 4784 || cur_aff->ah_flag == aff->af_needcomp) 4785 smsg((char_u *)_("Affix also used for BAD/RAR/KEP/NEEDAFFIX/NEEDCOMPOUND in %s line %d: %s"), 4786 fname, lnum, items[1]); 4787 STRCPY(cur_aff->ah_key, items[1]); 4788 hash_add(tp, cur_aff->ah_key); 4789 4790 cur_aff->ah_combine = (*items[2] == 'Y'); 4791 } 4792 4793 /* Check for the "S" flag, which apparently means that another 4794 * block with the same affix name is following. */ 4795 if (itemcnt > lasti && STRCMP(items[lasti], "S") == 0) 4796 { 4797 ++lasti; 4798 cur_aff->ah_follows = TRUE; 4799 } 4800 else 4801 cur_aff->ah_follows = FALSE; 4802 4803 /* Myspell allows extra text after the item, but that might 4804 * mean mistakes go unnoticed. Require a comment-starter. */ 4805 if (itemcnt > lasti && *items[lasti] != '#') 4806 smsg((char_u *)_("Trailing text in %s line %d: %s"), 4807 fname, lnum, items[4]); 4808 4809 if (STRCMP(items[2], "Y") != 0 && STRCMP(items[2], "N") != 0) 4810 smsg((char_u *)_("Expected Y or N in %s line %d: %s"), 4811 fname, lnum, items[2]); 4812 4813 if (*items[0] == 'P' && aff->af_pfxpostpone) 4814 { 4815 if (cur_aff->ah_newID == 0) 4816 { 4817 /* Use a new number in the .spl file later, to be able 4818 * to handle multiple .aff files. */ 4819 check_renumber(spin); 4820 cur_aff->ah_newID = ++spin->si_newprefID; 4821 4822 /* We only really use ah_newID if the prefix is 4823 * postponed. We know that only after handling all 4824 * the items. */ 4825 did_postpone_prefix = FALSE; 4826 } 4827 else 4828 /* Did use the ID in a previous block. */ 4829 did_postpone_prefix = TRUE; 4830 } 4831 4832 aff_todo = atoi((char *)items[3]); 4833 } 4834 else if ((STRCMP(items[0], "PFX") == 0 4835 || STRCMP(items[0], "SFX") == 0) 4836 && aff_todo > 0 4837 && STRCMP(cur_aff->ah_key, items[1]) == 0 4838 && itemcnt >= 5) 4839 { 4840 affentry_T *aff_entry; 4841 int rare = FALSE; 4842 int nocomp = FALSE; 4843 int upper = FALSE; 4844 int lasti = 5; 4845 4846 /* Check for "rare" and "nocomp" after the other info. */ 4847 while (itemcnt > lasti) 4848 { 4849 if (!rare && STRICMP(items[lasti], "rare") == 0) 4850 { 4851 rare = TRUE; 4852 ++lasti; 4853 } 4854 else if (!nocomp && STRICMP(items[lasti], "nocomp") == 0) 4855 { 4856 nocomp = TRUE; 4857 ++lasti; 4858 } 4859 else 4860 break; 4861 } 4862 4863 /* Myspell allows extra text after the item, but that might 4864 * mean mistakes go unnoticed. Require a comment-starter. */ 4865 if (itemcnt > lasti && *items[lasti] != '#') 4866 smsg((char_u *)_(e_afftrailing), fname, lnum, items[lasti]); 4867 4868 /* New item for an affix letter. */ 4869 --aff_todo; 4870 aff_entry = (affentry_T *)getroom(spin, 4871 sizeof(affentry_T), TRUE); 4872 if (aff_entry == NULL) 4873 break; 4874 aff_entry->ae_rare = rare; 4875 aff_entry->ae_nocomp = nocomp; 4876 4877 if (STRCMP(items[2], "0") != 0) 4878 aff_entry->ae_chop = getroom_save(spin, items[2]); 4879 if (STRCMP(items[3], "0") != 0) 4880 aff_entry->ae_add = getroom_save(spin, items[3]); 4881 4882 /* Don't use an affix entry with non-ASCII characters when 4883 * "spin->si_ascii" is TRUE. */ 4884 if (!spin->si_ascii || !(has_non_ascii(aff_entry->ae_chop) 4885 || has_non_ascii(aff_entry->ae_add))) 4886 { 4887 aff_entry->ae_next = cur_aff->ah_first; 4888 cur_aff->ah_first = aff_entry; 4889 4890 if (STRCMP(items[4], ".") != 0) 4891 { 4892 char_u buf[MAXLINELEN]; 4893 4894 aff_entry->ae_cond = getroom_save(spin, items[4]); 4895 if (*items[0] == 'P') 4896 sprintf((char *)buf, "^%s", items[4]); 4897 else 4898 sprintf((char *)buf, "%s$", items[4]); 4899 aff_entry->ae_prog = vim_regcomp(buf, 4900 RE_MAGIC + RE_STRING + RE_STRICT); 4901 if (aff_entry->ae_prog == NULL) 4902 smsg((char_u *)_("Broken condition in %s line %d: %s"), 4903 fname, lnum, items[4]); 4904 } 4905 4906 /* For postponed prefixes we need an entry in si_prefcond 4907 * for the condition. Use an existing one if possible. */ 4908 if (*items[0] == 'P' && aff->af_pfxpostpone) 4909 { 4910 /* When the chop string is one lower-case letter and 4911 * the add string ends in the upper-case letter we set 4912 * the "upper" flag, clear "ae_chop" and remove the 4913 * letters from "ae_add". The condition must either 4914 * be empty or start with the same letter. */ 4915 if (aff_entry->ae_chop != NULL 4916 && aff_entry->ae_add != NULL 4917 #ifdef FEAT_MBYTE 4918 && aff_entry->ae_chop[(*mb_ptr2len)( 4919 aff_entry->ae_chop)] == NUL 4920 #else 4921 && aff_entry->ae_chop[1] == NUL 4922 #endif 4923 ) 4924 { 4925 int c, c_up; 4926 4927 c = PTR2CHAR(aff_entry->ae_chop); 4928 c_up = SPELL_TOUPPER(c); 4929 if (c_up != c 4930 && (aff_entry->ae_cond == NULL 4931 || PTR2CHAR(aff_entry->ae_cond) == c)) 4932 { 4933 p = aff_entry->ae_add 4934 + STRLEN(aff_entry->ae_add); 4935 mb_ptr_back(aff_entry->ae_add, p); 4936 if (PTR2CHAR(p) == c_up) 4937 { 4938 upper = TRUE; 4939 aff_entry->ae_chop = NULL; 4940 *p = NUL; 4941 4942 /* The condition is matched with the 4943 * actual word, thus must check for the 4944 * upper-case letter. */ 4945 if (aff_entry->ae_cond != NULL) 4946 { 4947 char_u buf[MAXLINELEN]; 4948 #ifdef FEAT_MBYTE 4949 if (has_mbyte) 4950 { 4951 onecap_copy(items[4], buf, TRUE); 4952 aff_entry->ae_cond = getroom_save( 4953 spin, buf); 4954 } 4955 else 4956 #endif 4957 *aff_entry->ae_cond = c_up; 4958 if (aff_entry->ae_cond != NULL) 4959 { 4960 sprintf((char *)buf, "^%s", 4961 aff_entry->ae_cond); 4962 vim_free(aff_entry->ae_prog); 4963 aff_entry->ae_prog = vim_regcomp( 4964 buf, RE_MAGIC + RE_STRING); 4965 } 4966 } 4967 } 4968 } 4969 } 4970 4971 if (aff_entry->ae_chop == NULL) 4972 { 4973 int idx; 4974 char_u **pp; 4975 int n; 4976 4977 /* Find a previously used condition. */ 4978 for (idx = spin->si_prefcond.ga_len - 1; idx >= 0; 4979 --idx) 4980 { 4981 p = ((char_u **)spin->si_prefcond.ga_data)[idx]; 4982 if (str_equal(p, aff_entry->ae_cond)) 4983 break; 4984 } 4985 if (idx < 0 && ga_grow(&spin->si_prefcond, 1) == OK) 4986 { 4987 /* Not found, add a new condition. */ 4988 idx = spin->si_prefcond.ga_len++; 4989 pp = ((char_u **)spin->si_prefcond.ga_data) 4990 + idx; 4991 if (aff_entry->ae_cond == NULL) 4992 *pp = NULL; 4993 else 4994 *pp = getroom_save(spin, 4995 aff_entry->ae_cond); 4996 } 4997 4998 /* Add the prefix to the prefix tree. */ 4999 if (aff_entry->ae_add == NULL) 5000 p = (char_u *)""; 5001 else 5002 p = aff_entry->ae_add; 5003 /* PFX_FLAGS is a negative number, so that 5004 * tree_add_word() knows this is the prefix tree. */ 5005 n = PFX_FLAGS; 5006 if (rare) 5007 n |= WFP_RARE; 5008 if (!cur_aff->ah_combine) 5009 n |= WFP_NC; 5010 if (upper) 5011 n |= WFP_UP; 5012 tree_add_word(spin, p, spin->si_prefroot, n, 5013 idx, cur_aff->ah_newID); 5014 did_postpone_prefix = TRUE; 5015 } 5016 5017 /* Didn't actually use ah_newID, backup si_newprefID. */ 5018 if (aff_todo == 0 && !did_postpone_prefix) 5019 { 5020 --spin->si_newprefID; 5021 cur_aff->ah_newID = 0; 5022 } 5023 } 5024 } 5025 } 5026 else if (STRCMP(items[0], "FOL") == 0 && itemcnt == 2 5027 && fol == NULL) 5028 { 5029 fol = vim_strsave(items[1]); 5030 } 5031 else if (STRCMP(items[0], "LOW") == 0 && itemcnt == 2 5032 && low == NULL) 5033 { 5034 low = vim_strsave(items[1]); 5035 } 5036 else if (STRCMP(items[0], "UPP") == 0 && itemcnt == 2 5037 && upp == NULL) 5038 { 5039 upp = vim_strsave(items[1]); 5040 } 5041 else if (STRCMP(items[0], "REP") == 0 && itemcnt == 2) 5042 { 5043 /* Ignore REP count */; 5044 if (!isdigit(*items[1])) 5045 smsg((char_u *)_("Expected REP count in %s line %d"), 5046 fname, lnum); 5047 } 5048 else if (STRCMP(items[0], "REP") == 0 && itemcnt >= 3) 5049 { 5050 /* REP item */ 5051 /* Myspell ignores extra arguments, we require it starts with 5052 * # to detect mistakes. */ 5053 if (itemcnt > 3 && items[3][0] != '#') 5054 smsg((char_u *)_(e_afftrailing), fname, lnum, items[3]); 5055 if (do_rep) 5056 { 5057 /* Replace underscore with space (can't include a space 5058 * directly). */ 5059 for (p = items[1]; *p != NUL; mb_ptr_adv(p)) 5060 if (*p == '_') 5061 *p = ' '; 5062 for (p = items[2]; *p != NUL; mb_ptr_adv(p)) 5063 if (*p == '_') 5064 *p = ' '; 5065 add_fromto(spin, &spin->si_rep, items[1], items[2]); 5066 } 5067 } 5068 else if (STRCMP(items[0], "MAP") == 0 && itemcnt == 2) 5069 { 5070 /* MAP item or count */ 5071 if (!found_map) 5072 { 5073 /* First line contains the count. */ 5074 found_map = TRUE; 5075 if (!isdigit(*items[1])) 5076 smsg((char_u *)_("Expected MAP count in %s line %d"), 5077 fname, lnum); 5078 } 5079 else if (do_map) 5080 { 5081 int c; 5082 5083 /* Check that every character appears only once. */ 5084 for (p = items[1]; *p != NUL; ) 5085 { 5086 #ifdef FEAT_MBYTE 5087 c = mb_ptr2char_adv(&p); 5088 #else 5089 c = *p++; 5090 #endif 5091 if ((spin->si_map.ga_len > 0 5092 && vim_strchr(spin->si_map.ga_data, c) 5093 != NULL) 5094 || vim_strchr(p, c) != NULL) 5095 smsg((char_u *)_("Duplicate character in MAP in %s line %d"), 5096 fname, lnum); 5097 } 5098 5099 /* We simply concatenate all the MAP strings, separated by 5100 * slashes. */ 5101 ga_concat(&spin->si_map, items[1]); 5102 ga_append(&spin->si_map, '/'); 5103 } 5104 } 5105 /* Accept "SAL from to" and "SAL from to # comment". */ 5106 else if (STRCMP(items[0], "SAL") == 0 5107 && (itemcnt == 3 || (itemcnt > 3 && items[3][0] == '#'))) 5108 { 5109 if (do_sal) 5110 { 5111 /* SAL item (sounds-a-like) 5112 * Either one of the known keys or a from-to pair. */ 5113 if (STRCMP(items[1], "followup") == 0) 5114 spin->si_followup = sal_to_bool(items[2]); 5115 else if (STRCMP(items[1], "collapse_result") == 0) 5116 spin->si_collapse = sal_to_bool(items[2]); 5117 else if (STRCMP(items[1], "remove_accents") == 0) 5118 spin->si_rem_accents = sal_to_bool(items[2]); 5119 else 5120 /* when "to" is "_" it means empty */ 5121 add_fromto(spin, &spin->si_sal, items[1], 5122 STRCMP(items[2], "_") == 0 ? (char_u *)"" 5123 : items[2]); 5124 } 5125 } 5126 else if (STRCMP(items[0], "SOFOFROM") == 0 && itemcnt == 2 5127 && sofofrom == NULL) 5128 { 5129 sofofrom = getroom_save(spin, items[1]); 5130 } 5131 else if (STRCMP(items[0], "SOFOTO") == 0 && itemcnt == 2 5132 && sofoto == NULL) 5133 { 5134 sofoto = getroom_save(spin, items[1]); 5135 } 5136 else 5137 smsg((char_u *)_("Unrecognized or duplicate item in %s line %d: %s"), 5138 fname, lnum, items[0]); 5139 } 5140 } 5141 5142 if (fol != NULL || low != NULL || upp != NULL) 5143 { 5144 if (spin->si_clear_chartab) 5145 { 5146 /* Clear the char type tables, don't want to use any of the 5147 * currently used spell properties. */ 5148 init_spell_chartab(); 5149 spin->si_clear_chartab = FALSE; 5150 } 5151 5152 /* 5153 * Don't write a word table for an ASCII file, so that we don't check 5154 * for conflicts with a word table that matches 'encoding'. 5155 * Don't write one for utf-8 either, we use utf_*() and 5156 * mb_get_class(), the list of chars in the file will be incomplete. 5157 */ 5158 if (!spin->si_ascii 5159 #ifdef FEAT_MBYTE 5160 && !enc_utf8 5161 #endif 5162 ) 5163 { 5164 if (fol == NULL || low == NULL || upp == NULL) 5165 smsg((char_u *)_("Missing FOL/LOW/UPP line in %s"), fname); 5166 else 5167 (void)set_spell_chartab(fol, low, upp); 5168 } 5169 5170 vim_free(fol); 5171 vim_free(low); 5172 vim_free(upp); 5173 } 5174 5175 /* Use compound specifications of the .aff file for the spell info. */ 5176 if (compmax != 0) 5177 { 5178 aff_check_number(spin->si_compmax, compmax, "COMPOUNDMAX"); 5179 spin->si_compmax = compmax; 5180 } 5181 5182 if (compminlen != 0) 5183 { 5184 aff_check_number(spin->si_compminlen, compminlen, "COMPOUNDMIN"); 5185 spin->si_compminlen = compminlen; 5186 } 5187 5188 if (compsylmax != 0) 5189 { 5190 if (syllable == NULL) 5191 smsg((char_u *)_("COMPOUNDSYLMAX used without SYLLABLE")); 5192 aff_check_number(spin->si_compsylmax, compsylmax, "COMPOUNDSYLMAX"); 5193 spin->si_compsylmax = compsylmax; 5194 } 5195 5196 if (compflags != NULL) 5197 process_compflags(spin, aff, compflags); 5198 5199 /* Check that we didn't use too many renumbered flags. */ 5200 if (spin->si_newcompID < spin->si_newprefID) 5201 { 5202 if (spin->si_newcompID == 127 || spin->si_newcompID == 255) 5203 MSG(_("Too many postponed prefixes")); 5204 else if (spin->si_newprefID == 0 || spin->si_newprefID == 127) 5205 MSG(_("Too many compound flags")); 5206 else 5207 MSG(_("Too many posponed prefixes and/or compound flags")); 5208 } 5209 5210 if (syllable != NULL) 5211 { 5212 aff_check_string(spin->si_syllable, syllable, "SYLLABLE"); 5213 spin->si_syllable = syllable; 5214 } 5215 5216 if (sofofrom != NULL || sofoto != NULL) 5217 { 5218 if (sofofrom == NULL || sofoto == NULL) 5219 smsg((char_u *)_("Missing SOFO%s line in %s"), 5220 sofofrom == NULL ? "FROM" : "TO", fname); 5221 else if (spin->si_sal.ga_len > 0) 5222 smsg((char_u *)_("Both SAL and SOFO lines in %s"), fname); 5223 else 5224 { 5225 aff_check_string(spin->si_sofofr, sofofrom, "SOFOFROM"); 5226 aff_check_string(spin->si_sofoto, sofoto, "SOFOTO"); 5227 spin->si_sofofr = sofofrom; 5228 spin->si_sofoto = sofoto; 5229 } 5230 } 5231 5232 if (midword != NULL) 5233 { 5234 aff_check_string(spin->si_midword, midword, "MIDWORD"); 5235 spin->si_midword = midword; 5236 } 5237 5238 vim_free(pc); 5239 fclose(fd); 5240 return aff; 5241 } 5242 5243 /* 5244 * Turn an affix flag name into a number, according to the FLAG type. 5245 * returns zero for failure. 5246 */ 5247 static unsigned 5248 affitem2flag(flagtype, item, fname, lnum) 5249 int flagtype; 5250 char_u *item; 5251 char_u *fname; 5252 int lnum; 5253 { 5254 unsigned res; 5255 char_u *p = item; 5256 5257 res = get_affitem(flagtype, &p); 5258 if (res == 0) 5259 { 5260 if (flagtype == AFT_NUM) 5261 smsg((char_u *)_("Flag is not a number in %s line %d: %s"), 5262 fname, lnum, item); 5263 else 5264 smsg((char_u *)_("Illegal flag in %s line %d: %s"), 5265 fname, lnum, item); 5266 } 5267 if (*p != NUL) 5268 { 5269 smsg((char_u *)_(e_affname), fname, lnum, item); 5270 return 0; 5271 } 5272 5273 return res; 5274 } 5275 5276 /* 5277 * Get one affix name from "*pp" and advance the pointer. 5278 * Returns zero for an error, still advances the pointer then. 5279 */ 5280 static unsigned 5281 get_affitem(flagtype, pp) 5282 int flagtype; 5283 char_u **pp; 5284 { 5285 int res; 5286 5287 if (flagtype == AFT_NUM) 5288 { 5289 if (!VIM_ISDIGIT(**pp)) 5290 { 5291 ++*pp; /* always advance, avoid getting stuck */ 5292 return 0; 5293 } 5294 res = getdigits(pp); 5295 } 5296 else 5297 { 5298 #ifdef FEAT_MBYTE 5299 res = mb_ptr2char_adv(pp); 5300 #else 5301 res = *(*pp)++; 5302 #endif 5303 if (flagtype == AFT_LONG || (flagtype == AFT_CAPLONG 5304 && res >= 'A' && res <= 'Z')) 5305 { 5306 if (**pp == NUL) 5307 return 0; 5308 #ifdef FEAT_MBYTE 5309 res = mb_ptr2char_adv(pp) + (res << 16); 5310 #else 5311 res = *(*pp)++ + (res << 16); 5312 #endif 5313 } 5314 } 5315 return res; 5316 } 5317 5318 /* 5319 * Process the "compflags" string used in an affix file and append it to 5320 * spin->si_compflags. 5321 * The processing involves changing the affix names to ID numbers, so that 5322 * they fit in one byte. 5323 */ 5324 static void 5325 process_compflags(spin, aff, compflags) 5326 spellinfo_T *spin; 5327 afffile_T *aff; 5328 char_u *compflags; 5329 { 5330 char_u *p; 5331 char_u *prevp; 5332 unsigned flag; 5333 compitem_T *ci; 5334 int id; 5335 int len; 5336 char_u *tp; 5337 char_u key[AH_KEY_LEN]; 5338 hashitem_T *hi; 5339 5340 /* Make room for the old and the new compflags, concatenated with a / in 5341 * between. Processing it makes it shorter, but we don't know by how 5342 * much, thus allocate the maximum. */ 5343 len = STRLEN(compflags) + 1; 5344 if (spin->si_compflags != NULL) 5345 len += STRLEN(spin->si_compflags) + 1; 5346 p = getroom(spin, len, FALSE); 5347 if (p == NULL) 5348 return; 5349 if (spin->si_compflags != NULL) 5350 { 5351 STRCPY(p, spin->si_compflags); 5352 STRCAT(p, "/"); 5353 } 5354 spin->si_compflags = p; 5355 tp = p + STRLEN(p); 5356 5357 for (p = compflags; *p != NUL; ) 5358 { 5359 if (vim_strchr((char_u *)"/*+[]", *p) != NULL) 5360 /* Copy non-flag characters directly. */ 5361 *tp++ = *p++; 5362 else 5363 { 5364 /* First get the flag number, also checks validity. */ 5365 prevp = p; 5366 flag = get_affitem(aff->af_flagtype, &p); 5367 if (flag != 0) 5368 { 5369 /* Find the flag in the hashtable. If it was used before, use 5370 * the existing ID. Otherwise add a new entry. */ 5371 vim_strncpy(key, prevp, p - prevp); 5372 hi = hash_find(&aff->af_comp, key); 5373 if (!HASHITEM_EMPTY(hi)) 5374 id = HI2CI(hi)->ci_newID; 5375 else 5376 { 5377 ci = (compitem_T *)getroom(spin, sizeof(compitem_T), TRUE); 5378 if (ci == NULL) 5379 break; 5380 STRCPY(ci->ci_key, key); 5381 ci->ci_flag = flag; 5382 /* Avoid using a flag ID that has a special meaning in a 5383 * regexp (also inside []). */ 5384 do 5385 { 5386 check_renumber(spin); 5387 id = spin->si_newcompID--; 5388 } while (vim_strchr((char_u *)"/+*[]\\-^", id) != NULL); 5389 ci->ci_newID = id; 5390 hash_add(&aff->af_comp, ci->ci_key); 5391 } 5392 *tp++ = id; 5393 } 5394 if (aff->af_flagtype == AFT_NUM && *p == ',') 5395 ++p; 5396 } 5397 } 5398 5399 *tp = NUL; 5400 } 5401 5402 /* 5403 * Check that the new IDs for postponed affixes and compounding don't overrun 5404 * each other. We have almost 255 available, but start at 0-127 to avoid 5405 * using two bytes for utf-8. When the 0-127 range is used up go to 128-255. 5406 * When that is used up an error message is given. 5407 */ 5408 static void 5409 check_renumber(spin) 5410 spellinfo_T *spin; 5411 { 5412 if (spin->si_newprefID == spin->si_newcompID && spin->si_newcompID < 128) 5413 { 5414 spin->si_newprefID = 127; 5415 spin->si_newcompID = 255; 5416 } 5417 } 5418 5419 /* 5420 * Return TRUE if flag "flag" appears in affix list "afflist". 5421 */ 5422 static int 5423 flag_in_afflist(flagtype, afflist, flag) 5424 int flagtype; 5425 char_u *afflist; 5426 unsigned flag; 5427 { 5428 char_u *p; 5429 unsigned n; 5430 5431 switch (flagtype) 5432 { 5433 case AFT_CHAR: 5434 return vim_strchr(afflist, flag) != NULL; 5435 5436 case AFT_CAPLONG: 5437 case AFT_LONG: 5438 for (p = afflist; *p != NUL; ) 5439 { 5440 #ifdef FEAT_MBYTE 5441 n = mb_ptr2char_adv(&p); 5442 #else 5443 n = *p++; 5444 #endif 5445 if ((flagtype == AFT_LONG || (n >= 'A' && n <= 'Z')) 5446 && *p != NUL) 5447 #ifdef FEAT_MBYTE 5448 n = mb_ptr2char_adv(&p) + (n << 16); 5449 #else 5450 n = *p++ + (n << 16); 5451 #endif 5452 if (n == flag) 5453 return TRUE; 5454 } 5455 break; 5456 5457 case AFT_NUM: 5458 for (p = afflist; *p != NUL; ) 5459 { 5460 n = getdigits(&p); 5461 if (n == flag) 5462 return TRUE; 5463 if (*p != NUL) /* skip over comma */ 5464 ++p; 5465 } 5466 break; 5467 } 5468 return FALSE; 5469 } 5470 5471 /* 5472 * Give a warning when "spinval" and "affval" numbers are set and not the same. 5473 */ 5474 static void 5475 aff_check_number(spinval, affval, name) 5476 int spinval; 5477 int affval; 5478 char *name; 5479 { 5480 if (spinval != 0 && spinval != affval) 5481 smsg((char_u *)_("%s value differs from what is used in another .aff file"), name); 5482 } 5483 5484 /* 5485 * Give a warning when "spinval" and "affval" strings are set and not the same. 5486 */ 5487 static void 5488 aff_check_string(spinval, affval, name) 5489 char_u *spinval; 5490 char_u *affval; 5491 char *name; 5492 { 5493 if (spinval != NULL && STRCMP(spinval, affval) != 0) 5494 smsg((char_u *)_("%s value differs from what is used in another .aff file"), name); 5495 } 5496 5497 /* 5498 * Return TRUE if strings "s1" and "s2" are equal. Also consider both being 5499 * NULL as equal. 5500 */ 5501 static int 5502 str_equal(s1, s2) 5503 char_u *s1; 5504 char_u *s2; 5505 { 5506 if (s1 == NULL || s2 == NULL) 5507 return s1 == s2; 5508 return STRCMP(s1, s2) == 0; 5509 } 5510 5511 /* 5512 * Add a from-to item to "gap". Used for REP and SAL items. 5513 * They are stored case-folded. 5514 */ 5515 static void 5516 add_fromto(spin, gap, from, to) 5517 spellinfo_T *spin; 5518 garray_T *gap; 5519 char_u *from; 5520 char_u *to; 5521 { 5522 fromto_T *ftp; 5523 char_u word[MAXWLEN]; 5524 5525 if (ga_grow(gap, 1) == OK) 5526 { 5527 ftp = ((fromto_T *)gap->ga_data) + gap->ga_len; 5528 (void)spell_casefold(from, STRLEN(from), word, MAXWLEN); 5529 ftp->ft_from = getroom_save(spin, word); 5530 (void)spell_casefold(to, STRLEN(to), word, MAXWLEN); 5531 ftp->ft_to = getroom_save(spin, word); 5532 ++gap->ga_len; 5533 } 5534 } 5535 5536 /* 5537 * Convert a boolean argument in a SAL line to TRUE or FALSE; 5538 */ 5539 static int 5540 sal_to_bool(s) 5541 char_u *s; 5542 { 5543 return STRCMP(s, "1") == 0 || STRCMP(s, "true") == 0; 5544 } 5545 5546 /* 5547 * Return TRUE if string "s" contains a non-ASCII character (128 or higher). 5548 * When "s" is NULL FALSE is returned. 5549 */ 5550 static int 5551 has_non_ascii(s) 5552 char_u *s; 5553 { 5554 char_u *p; 5555 5556 if (s != NULL) 5557 for (p = s; *p != NUL; ++p) 5558 if (*p >= 128) 5559 return TRUE; 5560 return FALSE; 5561 } 5562 5563 /* 5564 * Free the structure filled by spell_read_aff(). 5565 */ 5566 static void 5567 spell_free_aff(aff) 5568 afffile_T *aff; 5569 { 5570 hashtab_T *ht; 5571 hashitem_T *hi; 5572 int todo; 5573 affheader_T *ah; 5574 affentry_T *ae; 5575 5576 vim_free(aff->af_enc); 5577 5578 /* All this trouble to free the "ae_prog" items... */ 5579 for (ht = &aff->af_pref; ; ht = &aff->af_suff) 5580 { 5581 todo = ht->ht_used; 5582 for (hi = ht->ht_array; todo > 0; ++hi) 5583 { 5584 if (!HASHITEM_EMPTY(hi)) 5585 { 5586 --todo; 5587 ah = HI2AH(hi); 5588 for (ae = ah->ah_first; ae != NULL; ae = ae->ae_next) 5589 vim_free(ae->ae_prog); 5590 } 5591 } 5592 if (ht == &aff->af_suff) 5593 break; 5594 } 5595 5596 hash_clear(&aff->af_pref); 5597 hash_clear(&aff->af_suff); 5598 hash_clear(&aff->af_comp); 5599 } 5600 5601 /* 5602 * Read dictionary file "fname". 5603 * Returns OK or FAIL; 5604 */ 5605 static int 5606 spell_read_dic(spin, fname, affile) 5607 spellinfo_T *spin; 5608 char_u *fname; 5609 afffile_T *affile; 5610 { 5611 hashtab_T ht; 5612 char_u line[MAXLINELEN]; 5613 char_u *p; 5614 char_u *afflist; 5615 char_u store_afflist[MAXWLEN]; 5616 int pfxlen; 5617 int need_affix; 5618 char_u *dw; 5619 char_u *pc; 5620 char_u *w; 5621 int l; 5622 hash_T hash; 5623 hashitem_T *hi; 5624 FILE *fd; 5625 int lnum = 1; 5626 int non_ascii = 0; 5627 int retval = OK; 5628 char_u message[MAXLINELEN + MAXWLEN]; 5629 int flags; 5630 int duplicate = 0; 5631 5632 /* 5633 * Open the file. 5634 */ 5635 fd = mch_fopen((char *)fname, "r"); 5636 if (fd == NULL) 5637 { 5638 EMSG2(_(e_notopen), fname); 5639 return FAIL; 5640 } 5641 5642 /* The hashtable is only used to detect duplicated words. */ 5643 hash_init(&ht); 5644 5645 if (spin->si_verbose || p_verbose > 2) 5646 { 5647 if (!spin->si_verbose) 5648 verbose_enter(); 5649 smsg((char_u *)_("Reading dictionary file %s ..."), fname); 5650 out_flush(); 5651 if (!spin->si_verbose) 5652 verbose_leave(); 5653 } 5654 5655 /* start with a message for the first line */ 5656 spin->si_msg_count = 999999; 5657 5658 /* Read and ignore the first line: word count. */ 5659 (void)vim_fgets(line, MAXLINELEN, fd); 5660 if (!vim_isdigit(*skipwhite(line))) 5661 EMSG2(_("E760: No word count in %s"), fname); 5662 5663 /* 5664 * Read all the lines in the file one by one. 5665 * The words are converted to 'encoding' here, before being added to 5666 * the hashtable. 5667 */ 5668 while (!vim_fgets(line, MAXLINELEN, fd) && !got_int) 5669 { 5670 line_breakcheck(); 5671 ++lnum; 5672 if (line[0] == '#' || line[0] == '/') 5673 continue; /* comment line */ 5674 5675 /* Remove CR, LF and white space from the end. White space halfway 5676 * the word is kept to allow e.g., "et al.". */ 5677 l = STRLEN(line); 5678 while (l > 0 && line[l - 1] <= ' ') 5679 --l; 5680 if (l == 0) 5681 continue; /* empty line */ 5682 line[l] = NUL; 5683 5684 /* Find the optional affix names. Replace the SLASH character by a 5685 * slash. */ 5686 afflist = NULL; 5687 for (p = line; *p != NUL; mb_ptr_adv(p)) 5688 { 5689 if (*p == affile->af_slash) 5690 *p = '/'; 5691 else if (*p == '/') 5692 { 5693 *p = NUL; 5694 afflist = p + 1; 5695 break; 5696 } 5697 } 5698 5699 /* Skip non-ASCII words when "spin->si_ascii" is TRUE. */ 5700 if (spin->si_ascii && has_non_ascii(line)) 5701 { 5702 ++non_ascii; 5703 continue; 5704 } 5705 5706 #ifdef FEAT_MBYTE 5707 /* Convert from "SET" to 'encoding' when needed. */ 5708 if (spin->si_conv.vc_type != CONV_NONE) 5709 { 5710 pc = string_convert(&spin->si_conv, line, NULL); 5711 if (pc == NULL) 5712 { 5713 smsg((char_u *)_("Conversion failure for word in %s line %d: %s"), 5714 fname, lnum, line); 5715 continue; 5716 } 5717 w = pc; 5718 } 5719 else 5720 #endif 5721 { 5722 pc = NULL; 5723 w = line; 5724 } 5725 5726 /* This takes time, print a message every 10000 words. */ 5727 if (spin->si_verbose && spin->si_msg_count > 10000) 5728 { 5729 spin->si_msg_count = 0; 5730 vim_snprintf((char *)message, sizeof(message), 5731 _("line %6d, word %6d - %s"), 5732 lnum, spin->si_foldwcount + spin->si_keepwcount, w); 5733 msg_start(); 5734 msg_puts_long_attr(message, 0); 5735 msg_clr_eos(); 5736 msg_didout = FALSE; 5737 msg_col = 0; 5738 out_flush(); 5739 } 5740 5741 /* Store the word in the hashtable to be able to find duplicates. */ 5742 dw = (char_u *)getroom_save(spin, w); 5743 if (dw == NULL) 5744 retval = FAIL; 5745 vim_free(pc); 5746 if (retval == FAIL) 5747 break; 5748 5749 hash = hash_hash(dw); 5750 hi = hash_lookup(&ht, dw, hash); 5751 if (!HASHITEM_EMPTY(hi)) 5752 { 5753 if (p_verbose > 0) 5754 smsg((char_u *)_("Duplicate word in %s line %d: %s"), 5755 fname, lnum, dw); 5756 else if (duplicate == 0) 5757 smsg((char_u *)_("First duplicate word in %s line %d: %s"), 5758 fname, lnum, dw); 5759 ++duplicate; 5760 } 5761 else 5762 hash_add_item(&ht, hi, dw, hash); 5763 5764 flags = 0; 5765 store_afflist[0] = NUL; 5766 pfxlen = 0; 5767 need_affix = FALSE; 5768 if (afflist != NULL) 5769 { 5770 /* Check for affix name that stands for keep-case word and stands 5771 * for rare word (if defined). */ 5772 if (affile->af_kep != 0 && flag_in_afflist( 5773 affile->af_flagtype, afflist, affile->af_kep)) 5774 flags |= WF_KEEPCAP | WF_FIXCAP; 5775 if (affile->af_rar != 0 && flag_in_afflist( 5776 affile->af_flagtype, afflist, affile->af_rar)) 5777 flags |= WF_RARE; 5778 if (affile->af_bad != 0 && flag_in_afflist( 5779 affile->af_flagtype, afflist, affile->af_bad)) 5780 flags |= WF_BANNED; 5781 if (affile->af_needaffix != 0 && flag_in_afflist( 5782 affile->af_flagtype, afflist, affile->af_needaffix)) 5783 need_affix = TRUE; 5784 if (affile->af_needcomp != 0 && flag_in_afflist( 5785 affile->af_flagtype, afflist, affile->af_needcomp)) 5786 flags |= WF_NEEDCOMP; 5787 5788 if (affile->af_pfxpostpone) 5789 /* Need to store the list of prefix IDs with the word. */ 5790 pfxlen = get_pfxlist(affile, afflist, store_afflist); 5791 5792 if (spin->si_compflags != NULL) 5793 /* Need to store the list of compound flags with the word. 5794 * Concatenate them to the list of prefix IDs. */ 5795 get_compflags(affile, afflist, store_afflist + pfxlen); 5796 } 5797 5798 /* Add the word to the word tree(s). */ 5799 if (store_word(spin, dw, flags, spin->si_region, 5800 store_afflist, need_affix) == FAIL) 5801 retval = FAIL; 5802 5803 if (afflist != NULL) 5804 { 5805 /* Find all matching suffixes and add the resulting words. 5806 * Additionally do matching prefixes that combine. */ 5807 if (store_aff_word(spin, dw, afflist, affile, 5808 &affile->af_suff, &affile->af_pref, 5809 FALSE, flags, store_afflist, pfxlen) == FAIL) 5810 retval = FAIL; 5811 5812 /* Find all matching prefixes and add the resulting words. */ 5813 if (store_aff_word(spin, dw, afflist, affile, 5814 &affile->af_pref, NULL, 5815 FALSE, flags, store_afflist, pfxlen) == FAIL) 5816 retval = FAIL; 5817 } 5818 } 5819 5820 if (duplicate > 0) 5821 smsg((char_u *)_("%d duplicate word(s) in %s"), duplicate, fname); 5822 if (spin->si_ascii && non_ascii > 0) 5823 smsg((char_u *)_("Ignored %d word(s) with non-ASCII characters in %s"), 5824 non_ascii, fname); 5825 hash_clear(&ht); 5826 5827 fclose(fd); 5828 return retval; 5829 } 5830 5831 /* 5832 * Get the list of prefix IDs from the affix list "afflist". 5833 * Used for PFXPOSTPONE. 5834 * Put the resulting flags in "store_afflist[MAXWLEN]" with a terminating NUL 5835 * and return the number of affixes. 5836 */ 5837 static int 5838 get_pfxlist(affile, afflist, store_afflist) 5839 afffile_T *affile; 5840 char_u *afflist; 5841 char_u *store_afflist; 5842 { 5843 char_u *p; 5844 char_u *prevp; 5845 int cnt = 0; 5846 int id; 5847 char_u key[AH_KEY_LEN]; 5848 hashitem_T *hi; 5849 5850 for (p = afflist; *p != NUL; ) 5851 { 5852 prevp = p; 5853 if (get_affitem(affile->af_flagtype, &p) != 0) 5854 { 5855 /* A flag is a postponed prefix flag if it appears in "af_pref" 5856 * and it's ID is not zero. */ 5857 vim_strncpy(key, prevp, p - prevp); 5858 hi = hash_find(&affile->af_pref, key); 5859 if (!HASHITEM_EMPTY(hi)) 5860 { 5861 id = HI2AH(hi)->ah_newID; 5862 if (id != 0) 5863 store_afflist[cnt++] = id; 5864 } 5865 } 5866 if (affile->af_flagtype == AFT_NUM && *p == ',') 5867 ++p; 5868 } 5869 5870 store_afflist[cnt] = NUL; 5871 return cnt; 5872 } 5873 5874 /* 5875 * Get the list of compound IDs from the affix list "afflist" that are used 5876 * for compound words. 5877 * Puts the flags in "store_afflist[]". 5878 */ 5879 static void 5880 get_compflags(affile, afflist, store_afflist) 5881 afffile_T *affile; 5882 char_u *afflist; 5883 char_u *store_afflist; 5884 { 5885 char_u *p; 5886 char_u *prevp; 5887 int cnt = 0; 5888 char_u key[AH_KEY_LEN]; 5889 hashitem_T *hi; 5890 5891 for (p = afflist; *p != NUL; ) 5892 { 5893 prevp = p; 5894 if (get_affitem(affile->af_flagtype, &p) != 0) 5895 { 5896 /* A flag is a compound flag if it appears in "af_comp". */ 5897 vim_strncpy(key, prevp, p - prevp); 5898 hi = hash_find(&affile->af_comp, key); 5899 if (!HASHITEM_EMPTY(hi)) 5900 store_afflist[cnt++] = HI2CI(hi)->ci_newID; 5901 } 5902 if (affile->af_flagtype == AFT_NUM && *p == ',') 5903 ++p; 5904 } 5905 5906 store_afflist[cnt] = NUL; 5907 } 5908 5909 /* 5910 * Apply affixes to a word and store the resulting words. 5911 * "ht" is the hashtable with affentry_T that need to be applied, either 5912 * prefixes or suffixes. 5913 * "xht", when not NULL, is the prefix hashtable, to be used additionally on 5914 * the resulting words for combining affixes. 5915 * 5916 * Returns FAIL when out of memory. 5917 */ 5918 static int 5919 store_aff_word(spin, word, afflist, affile, ht, xht, comb, flags, 5920 pfxlist, pfxlen) 5921 spellinfo_T *spin; /* spell info */ 5922 char_u *word; /* basic word start */ 5923 char_u *afflist; /* list of names of supported affixes */ 5924 afffile_T *affile; 5925 hashtab_T *ht; 5926 hashtab_T *xht; 5927 int comb; /* only use affixes that combine */ 5928 int flags; /* flags for the word */ 5929 char_u *pfxlist; /* list of prefix IDs */ 5930 int pfxlen; /* nr of flags in "pfxlist" for prefixes, rest 5931 * is compound flags */ 5932 { 5933 int todo; 5934 hashitem_T *hi; 5935 affheader_T *ah; 5936 affentry_T *ae; 5937 regmatch_T regmatch; 5938 char_u newword[MAXWLEN]; 5939 int retval = OK; 5940 int i; 5941 char_u *p; 5942 int use_flags; 5943 char_u *use_pfxlist; 5944 char_u pfx_pfxlist[MAXWLEN]; 5945 size_t wordlen = STRLEN(word); 5946 5947 todo = ht->ht_used; 5948 for (hi = ht->ht_array; todo > 0 && retval == OK; ++hi) 5949 { 5950 if (!HASHITEM_EMPTY(hi)) 5951 { 5952 --todo; 5953 ah = HI2AH(hi); 5954 5955 /* Check that the affix combines, if required, and that the word 5956 * supports this affix. */ 5957 if ((!comb || ah->ah_combine) && flag_in_afflist( 5958 affile->af_flagtype, afflist, ah->ah_flag)) 5959 { 5960 /* Loop over all affix entries with this name. */ 5961 for (ae = ah->ah_first; ae != NULL; ae = ae->ae_next) 5962 { 5963 /* Check the condition. It's not logical to match case 5964 * here, but it is required for compatibility with 5965 * Myspell. 5966 * Another requirement from Myspell is that the chop 5967 * string is shorter than the word itself. 5968 * For prefixes, when "PFXPOSTPONE" was used, only do 5969 * prefixes with a chop string. */ 5970 regmatch.regprog = ae->ae_prog; 5971 regmatch.rm_ic = FALSE; 5972 if ((xht != NULL || !affile->af_pfxpostpone 5973 || ae->ae_chop != NULL) 5974 && (ae->ae_chop == NULL 5975 || STRLEN(ae->ae_chop) < wordlen) 5976 && (ae->ae_prog == NULL 5977 || vim_regexec(®match, word, (colnr_T)0))) 5978 { 5979 /* Match. Remove the chop and add the affix. */ 5980 if (xht == NULL) 5981 { 5982 /* prefix: chop/add at the start of the word */ 5983 if (ae->ae_add == NULL) 5984 *newword = NUL; 5985 else 5986 STRCPY(newword, ae->ae_add); 5987 p = word; 5988 if (ae->ae_chop != NULL) 5989 { 5990 /* Skip chop string. */ 5991 #ifdef FEAT_MBYTE 5992 if (has_mbyte) 5993 { 5994 i = mb_charlen(ae->ae_chop); 5995 for ( ; i > 0; --i) 5996 mb_ptr_adv(p); 5997 } 5998 else 5999 #endif 6000 p += STRLEN(ae->ae_chop); 6001 } 6002 STRCAT(newword, p); 6003 } 6004 else 6005 { 6006 /* suffix: chop/add at the end of the word */ 6007 STRCPY(newword, word); 6008 if (ae->ae_chop != NULL) 6009 { 6010 /* Remove chop string. */ 6011 p = newword + STRLEN(newword); 6012 i = MB_CHARLEN(ae->ae_chop); 6013 for ( ; i > 0; --i) 6014 mb_ptr_back(newword, p); 6015 *p = NUL; 6016 } 6017 if (ae->ae_add != NULL) 6018 STRCAT(newword, ae->ae_add); 6019 } 6020 6021 /* Obey the "rare" flag of the affix. */ 6022 if (ae->ae_rare) 6023 use_flags = flags | WF_RARE; 6024 else 6025 use_flags = flags; 6026 6027 /* Obey the "nocomp" flag of the affix: don't use the 6028 * compound flags. */ 6029 use_pfxlist = pfxlist; 6030 if (ae->ae_nocomp && pfxlist != NULL) 6031 { 6032 vim_strncpy(pfx_pfxlist, pfxlist, pfxlen); 6033 use_pfxlist = pfx_pfxlist; 6034 } 6035 6036 /* When there are postponed prefixes... */ 6037 if (spin->si_prefroot != NULL 6038 && spin->si_prefroot->wn_sibling != NULL) 6039 { 6040 /* ... add a flag to indicate an affix was used. */ 6041 use_flags |= WF_HAS_AFF; 6042 6043 /* ... don't use a prefix list if combining 6044 * affixes is not allowed. But do use the 6045 * compound flags after them. */ 6046 if ((!ah->ah_combine || comb) && pfxlist != NULL) 6047 use_pfxlist += pfxlen; 6048 } 6049 6050 /* Store the modified word. */ 6051 if (store_word(spin, newword, use_flags, 6052 spin->si_region, use_pfxlist, FALSE) == FAIL) 6053 retval = FAIL; 6054 6055 /* When added a suffix and combining is allowed also 6056 * try adding prefixes additionally. */ 6057 if (xht != NULL && ah->ah_combine) 6058 if (store_aff_word(spin, newword, afflist, affile, 6059 xht, NULL, TRUE, 6060 use_flags, use_pfxlist, pfxlen) == FAIL) 6061 retval = FAIL; 6062 } 6063 } 6064 } 6065 } 6066 } 6067 6068 return retval; 6069 } 6070 6071 /* 6072 * Read a file with a list of words. 6073 */ 6074 static int 6075 spell_read_wordfile(spin, fname) 6076 spellinfo_T *spin; 6077 char_u *fname; 6078 { 6079 FILE *fd; 6080 long lnum = 0; 6081 char_u rline[MAXLINELEN]; 6082 char_u *line; 6083 char_u *pc = NULL; 6084 char_u *p; 6085 int l; 6086 int retval = OK; 6087 int did_word = FALSE; 6088 int non_ascii = 0; 6089 int flags; 6090 int regionmask; 6091 6092 /* 6093 * Open the file. 6094 */ 6095 fd = mch_fopen((char *)fname, "r"); 6096 if (fd == NULL) 6097 { 6098 EMSG2(_(e_notopen), fname); 6099 return FAIL; 6100 } 6101 6102 if (spin->si_verbose || p_verbose > 2) 6103 { 6104 if (!spin->si_verbose) 6105 verbose_enter(); 6106 smsg((char_u *)_("Reading word file %s ..."), fname); 6107 out_flush(); 6108 if (!spin->si_verbose) 6109 verbose_leave(); 6110 } 6111 6112 /* 6113 * Read all the lines in the file one by one. 6114 */ 6115 while (!vim_fgets(rline, MAXLINELEN, fd) && !got_int) 6116 { 6117 line_breakcheck(); 6118 ++lnum; 6119 6120 /* Skip comment lines. */ 6121 if (*rline == '#') 6122 continue; 6123 6124 /* Remove CR, LF and white space from the end. */ 6125 l = STRLEN(rline); 6126 while (l > 0 && rline[l - 1] <= ' ') 6127 --l; 6128 if (l == 0) 6129 continue; /* empty or blank line */ 6130 rline[l] = NUL; 6131 6132 /* Convert from "=encoding={encoding}" to 'encoding' when needed. */ 6133 vim_free(pc); 6134 #ifdef FEAT_MBYTE 6135 if (spin->si_conv.vc_type != CONV_NONE) 6136 { 6137 pc = string_convert(&spin->si_conv, rline, NULL); 6138 if (pc == NULL) 6139 { 6140 smsg((char_u *)_("Conversion failure for word in %s line %d: %s"), 6141 fname, lnum, rline); 6142 continue; 6143 } 6144 line = pc; 6145 } 6146 else 6147 #endif 6148 { 6149 pc = NULL; 6150 line = rline; 6151 } 6152 6153 if (*line == '/') 6154 { 6155 ++line; 6156 if (STRNCMP(line, "encoding=", 9) == 0) 6157 { 6158 if (spin->si_conv.vc_type != CONV_NONE) 6159 smsg((char_u *)_("Duplicate /encoding= line ignored in %s line %d: %s"), 6160 fname, lnum, line - 1); 6161 else if (did_word) 6162 smsg((char_u *)_("/encoding= line after word ignored in %s line %d: %s"), 6163 fname, lnum, line - 1); 6164 else 6165 { 6166 #ifdef FEAT_MBYTE 6167 char_u *enc; 6168 6169 /* Setup for conversion to 'encoding'. */ 6170 line += 10; 6171 enc = enc_canonize(line); 6172 if (enc != NULL && !spin->si_ascii 6173 && convert_setup(&spin->si_conv, enc, 6174 p_enc) == FAIL) 6175 smsg((char_u *)_("Conversion in %s not supported: from %s to %s"), 6176 fname, line, p_enc); 6177 vim_free(enc); 6178 spin->si_conv.vc_fail = TRUE; 6179 #else 6180 smsg((char_u *)_("Conversion in %s not supported"), fname); 6181 #endif 6182 } 6183 continue; 6184 } 6185 6186 if (STRNCMP(line, "regions=", 8) == 0) 6187 { 6188 if (spin->si_region_count > 1) 6189 smsg((char_u *)_("Duplicate /regions= line ignored in %s line %d: %s"), 6190 fname, lnum, line); 6191 else 6192 { 6193 line += 8; 6194 if (STRLEN(line) > 16) 6195 smsg((char_u *)_("Too many regions in %s line %d: %s"), 6196 fname, lnum, line); 6197 else 6198 { 6199 spin->si_region_count = STRLEN(line) / 2; 6200 STRCPY(spin->si_region_name, line); 6201 6202 /* Adjust the mask for a word valid in all regions. */ 6203 spin->si_region = (1 << spin->si_region_count) - 1; 6204 } 6205 } 6206 continue; 6207 } 6208 6209 smsg((char_u *)_("/ line ignored in %s line %d: %s"), 6210 fname, lnum, line - 1); 6211 continue; 6212 } 6213 6214 flags = 0; 6215 regionmask = spin->si_region; 6216 6217 /* Check for flags and region after a slash. */ 6218 p = vim_strchr(line, '/'); 6219 if (p != NULL) 6220 { 6221 *p++ = NUL; 6222 while (*p != NUL) 6223 { 6224 if (*p == '=') /* keep-case word */ 6225 flags |= WF_KEEPCAP | WF_FIXCAP; 6226 else if (*p == '!') /* Bad, bad, wicked word. */ 6227 flags |= WF_BANNED; 6228 else if (*p == '?') /* Rare word. */ 6229 flags |= WF_RARE; 6230 else if (VIM_ISDIGIT(*p)) /* region number(s) */ 6231 { 6232 if ((flags & WF_REGION) == 0) /* first one */ 6233 regionmask = 0; 6234 flags |= WF_REGION; 6235 6236 l = *p - '0'; 6237 if (l > spin->si_region_count) 6238 { 6239 smsg((char_u *)_("Invalid region nr in %s line %d: %s"), 6240 fname, lnum, p); 6241 break; 6242 } 6243 regionmask |= 1 << (l - 1); 6244 } 6245 else 6246 { 6247 smsg((char_u *)_("Unrecognized flags in %s line %d: %s"), 6248 fname, lnum, p); 6249 break; 6250 } 6251 ++p; 6252 } 6253 } 6254 6255 /* Skip non-ASCII words when "spin->si_ascii" is TRUE. */ 6256 if (spin->si_ascii && has_non_ascii(line)) 6257 { 6258 ++non_ascii; 6259 continue; 6260 } 6261 6262 /* Normal word: store it. */ 6263 if (store_word(spin, line, flags, regionmask, NULL, FALSE) == FAIL) 6264 { 6265 retval = FAIL; 6266 break; 6267 } 6268 did_word = TRUE; 6269 } 6270 6271 vim_free(pc); 6272 fclose(fd); 6273 6274 if (spin->si_ascii && non_ascii > 0 && (spin->si_verbose || p_verbose > 2)) 6275 { 6276 if (p_verbose > 2) 6277 verbose_enter(); 6278 smsg((char_u *)_("Ignored %d words with non-ASCII characters"), 6279 non_ascii); 6280 if (p_verbose > 2) 6281 verbose_leave(); 6282 } 6283 return retval; 6284 } 6285 6286 /* 6287 * Get part of an sblock_T, "len" bytes long. 6288 * This avoids calling free() for every little struct we use (and keeping 6289 * track of them). 6290 * The memory is cleared to all zeros. 6291 * Returns NULL when out of memory. 6292 */ 6293 static void * 6294 getroom(spin, len, align) 6295 spellinfo_T *spin; 6296 size_t len; /* length needed */ 6297 int align; /* align for pointer */ 6298 { 6299 char_u *p; 6300 sblock_T *bl = spin->si_blocks; 6301 6302 if (align && bl != NULL) 6303 /* Round size up for alignment. On some systems structures need to be 6304 * aligned to the size of a pointer (e.g., SPARC). */ 6305 bl->sb_used = (bl->sb_used + sizeof(char *) - 1) 6306 & ~(sizeof(char *) - 1); 6307 6308 if (bl == NULL || bl->sb_used + len > SBLOCKSIZE) 6309 { 6310 /* Allocate a block of memory. This is not freed until much later. */ 6311 bl = (sblock_T *)alloc_clear((unsigned)(sizeof(sblock_T) + SBLOCKSIZE)); 6312 if (bl == NULL) 6313 return NULL; 6314 bl->sb_next = spin->si_blocks; 6315 spin->si_blocks = bl; 6316 bl->sb_used = 0; 6317 ++spin->si_blocks_cnt; 6318 } 6319 6320 p = bl->sb_data + bl->sb_used; 6321 bl->sb_used += len; 6322 6323 return p; 6324 } 6325 6326 /* 6327 * Make a copy of a string into memory allocated with getroom(). 6328 */ 6329 static char_u * 6330 getroom_save(spin, s) 6331 spellinfo_T *spin; 6332 char_u *s; 6333 { 6334 char_u *sc; 6335 6336 sc = (char_u *)getroom(spin, STRLEN(s) + 1, FALSE); 6337 if (sc != NULL) 6338 STRCPY(sc, s); 6339 return sc; 6340 } 6341 6342 6343 /* 6344 * Free the list of allocated sblock_T. 6345 */ 6346 static void 6347 free_blocks(bl) 6348 sblock_T *bl; 6349 { 6350 sblock_T *next; 6351 6352 while (bl != NULL) 6353 { 6354 next = bl->sb_next; 6355 vim_free(bl); 6356 bl = next; 6357 } 6358 } 6359 6360 /* 6361 * Allocate the root of a word tree. 6362 */ 6363 static wordnode_T * 6364 wordtree_alloc(spin) 6365 spellinfo_T *spin; 6366 { 6367 return (wordnode_T *)getroom(spin, sizeof(wordnode_T), TRUE); 6368 } 6369 6370 /* 6371 * Store a word in the tree(s). 6372 * Always store it in the case-folded tree. For a keep-case word this is 6373 * useful when the word can also be used with all caps (no WF_FIXCAP flag) and 6374 * used to find suggestions. 6375 * For a keep-case word also store it in the keep-case tree. 6376 * When "pfxlist" is not NULL store the word for each postponed prefix ID and 6377 * compound flag. 6378 */ 6379 static int 6380 store_word(spin, word, flags, region, pfxlist, need_affix) 6381 spellinfo_T *spin; 6382 char_u *word; 6383 int flags; /* extra flags, WF_BANNED */ 6384 int region; /* supported region(s) */ 6385 char_u *pfxlist; /* list of prefix IDs or NULL */ 6386 int need_affix; /* only store word with affix ID */ 6387 { 6388 int len = STRLEN(word); 6389 int ct = captype(word, word + len); 6390 char_u foldword[MAXWLEN]; 6391 int res = OK; 6392 char_u *p; 6393 6394 (void)spell_casefold(word, len, foldword, MAXWLEN); 6395 for (p = pfxlist; res == OK; ++p) 6396 { 6397 if (!need_affix || (p != NULL && *p != NUL)) 6398 res = tree_add_word(spin, foldword, spin->si_foldroot, ct | flags, 6399 region, p == NULL ? 0 : *p); 6400 if (p == NULL || *p == NUL) 6401 break; 6402 } 6403 ++spin->si_foldwcount; 6404 6405 if (res == OK && (ct == WF_KEEPCAP || (flags & WF_KEEPCAP))) 6406 { 6407 for (p = pfxlist; res == OK; ++p) 6408 { 6409 if (!need_affix || (p != NULL && *p != NUL)) 6410 res = tree_add_word(spin, word, spin->si_keeproot, flags, 6411 region, p == NULL ? 0 : *p); 6412 if (p == NULL || *p == NUL) 6413 break; 6414 } 6415 ++spin->si_keepwcount; 6416 } 6417 return res; 6418 } 6419 6420 /* 6421 * Add word "word" to a word tree at "root". 6422 * When "flags" < 0 we are adding to the prefix tree where flags is used for 6423 * "rare" and "region" is the condition nr. 6424 * Returns FAIL when out of memory. 6425 */ 6426 static int 6427 tree_add_word(spin, word, root, flags, region, affixID) 6428 spellinfo_T *spin; 6429 char_u *word; 6430 wordnode_T *root; 6431 int flags; 6432 int region; 6433 int affixID; 6434 { 6435 wordnode_T *node = root; 6436 wordnode_T *np; 6437 wordnode_T *copyp, **copyprev; 6438 wordnode_T **prev = NULL; 6439 int i; 6440 6441 /* Add each byte of the word to the tree, including the NUL at the end. */ 6442 for (i = 0; ; ++i) 6443 { 6444 /* When there is more than one reference to this node we need to make 6445 * a copy, so that we can modify it. Copy the whole list of siblings 6446 * (we don't optimize for a partly shared list of siblings). */ 6447 if (node != NULL && node->wn_refs > 1) 6448 { 6449 --node->wn_refs; 6450 copyprev = prev; 6451 for (copyp = node; copyp != NULL; copyp = copyp->wn_sibling) 6452 { 6453 /* Allocate a new node and copy the info. */ 6454 np = get_wordnode(spin); 6455 if (np == NULL) 6456 return FAIL; 6457 np->wn_child = copyp->wn_child; 6458 if (np->wn_child != NULL) 6459 ++np->wn_child->wn_refs; /* child gets extra ref */ 6460 np->wn_byte = copyp->wn_byte; 6461 if (np->wn_byte == NUL) 6462 { 6463 np->wn_flags = copyp->wn_flags; 6464 np->wn_region = copyp->wn_region; 6465 np->wn_affixID = copyp->wn_affixID; 6466 } 6467 6468 /* Link the new node in the list, there will be one ref. */ 6469 np->wn_refs = 1; 6470 *copyprev = np; 6471 copyprev = &np->wn_sibling; 6472 6473 /* Let "node" point to the head of the copied list. */ 6474 if (copyp == node) 6475 node = np; 6476 } 6477 } 6478 6479 /* Look for the sibling that has the same character. They are sorted 6480 * on byte value, thus stop searching when a sibling is found with a 6481 * higher byte value. For zero bytes (end of word) the sorting is 6482 * done on flags and then on affixID. */ 6483 while (node != NULL 6484 && (node->wn_byte < word[i] 6485 || (node->wn_byte == NUL 6486 && (flags < 0 6487 ? node->wn_affixID < affixID 6488 : node->wn_flags < (flags & WN_MASK) 6489 || (node->wn_flags == (flags & WN_MASK) 6490 && node->wn_affixID < affixID))))) 6491 { 6492 prev = &node->wn_sibling; 6493 node = *prev; 6494 } 6495 if (node == NULL 6496 || node->wn_byte != word[i] 6497 || (word[i] == NUL 6498 && (flags < 0 6499 || node->wn_flags != (flags & WN_MASK) 6500 || node->wn_affixID != affixID))) 6501 { 6502 /* Allocate a new node. */ 6503 np = get_wordnode(spin); 6504 if (np == NULL) 6505 return FAIL; 6506 np->wn_byte = word[i]; 6507 6508 /* If "node" is NULL this is a new child or the end of the sibling 6509 * list: ref count is one. Otherwise use ref count of sibling and 6510 * make ref count of sibling one (matters when inserting in front 6511 * of the list of siblings). */ 6512 if (node == NULL) 6513 np->wn_refs = 1; 6514 else 6515 { 6516 np->wn_refs = node->wn_refs; 6517 node->wn_refs = 1; 6518 } 6519 *prev = np; 6520 np->wn_sibling = node; 6521 node = np; 6522 } 6523 6524 if (word[i] == NUL) 6525 { 6526 node->wn_flags = flags; 6527 node->wn_region |= region; 6528 node->wn_affixID = affixID; 6529 break; 6530 } 6531 prev = &node->wn_child; 6532 node = *prev; 6533 } 6534 #ifdef SPELL_PRINTTREE 6535 smsg("Added \"%s\"", word); 6536 spell_print_tree(root->wn_sibling); 6537 #endif 6538 6539 /* count nr of words added since last message */ 6540 ++spin->si_msg_count; 6541 6542 if (spin->si_compress_cnt > 1) 6543 { 6544 if (--spin->si_compress_cnt == 1) 6545 /* Did enough words to lower the block count limit. */ 6546 spin->si_blocks_cnt += compress_inc; 6547 } 6548 6549 /* 6550 * When we have allocated lots of memory we need to compress the word tree 6551 * to free up some room. But compression is slow, and we might actually 6552 * need that room, thus only compress in the following situations: 6553 * 1. When not compressed before (si_compress_cnt == 0): when using 6554 * "compress_start" blocks. 6555 * 2. When compressed before and used "compress_inc" blocks before 6556 * adding "compress_added" words (si_compress_cnt > 1). 6557 * 3. When compressed before, added "compress_added" words 6558 * (si_compress_cnt == 1) and the number of free nodes drops below the 6559 * maximum word length. 6560 */ 6561 #ifndef SPELL_PRINTTREE 6562 if (spin->si_compress_cnt == 1 6563 ? spin->si_free_count < MAXWLEN 6564 : spin->si_blocks_cnt >= compress_start) 6565 #endif 6566 { 6567 /* Decrement the block counter. The effect is that we compress again 6568 * when the freed up room has been used and another "compress_inc" 6569 * blocks have been allocated. Unless "compress_added" words have 6570 * been added, then the limit is put back again. */ 6571 spin->si_blocks_cnt -= compress_inc; 6572 spin->si_compress_cnt = compress_added; 6573 6574 if (spin->si_verbose) 6575 { 6576 msg_start(); 6577 msg_puts((char_u *)_(msg_compressing)); 6578 msg_clr_eos(); 6579 msg_didout = FALSE; 6580 msg_col = 0; 6581 out_flush(); 6582 } 6583 6584 /* Compress both trees. Either they both have many nodes, which makes 6585 * compression useful, or one of them is small, which means 6586 * compression goes fast. */ 6587 wordtree_compress(spin, spin->si_foldroot); 6588 wordtree_compress(spin, spin->si_keeproot); 6589 } 6590 6591 return OK; 6592 } 6593 6594 /* 6595 * Check the 'mkspellmem' option. Return FAIL if it's wrong. 6596 * Sets "sps_flags". 6597 */ 6598 int 6599 spell_check_msm() 6600 { 6601 char_u *p = p_msm; 6602 long start = 0; 6603 long inc = 0; 6604 long added = 0; 6605 6606 if (!VIM_ISDIGIT(*p)) 6607 return FAIL; 6608 /* block count = (value * 1024) / SBLOCKSIZE (but avoid overflow)*/ 6609 start = (getdigits(&p) * 10) / (SBLOCKSIZE / 102); 6610 if (*p != ',') 6611 return FAIL; 6612 ++p; 6613 if (!VIM_ISDIGIT(*p)) 6614 return FAIL; 6615 inc = (getdigits(&p) * 102) / (SBLOCKSIZE / 10); 6616 if (*p != ',') 6617 return FAIL; 6618 ++p; 6619 if (!VIM_ISDIGIT(*p)) 6620 return FAIL; 6621 added = getdigits(&p) * 1024; 6622 if (*p != NUL) 6623 return FAIL; 6624 6625 if (start == 0 || inc == 0 || added == 0 || inc > start) 6626 return FAIL; 6627 6628 compress_start = start; 6629 compress_inc = inc; 6630 compress_added = added; 6631 return OK; 6632 } 6633 6634 6635 /* 6636 * Get a wordnode_T, either from the list of previously freed nodes or 6637 * allocate a new one. 6638 */ 6639 static wordnode_T * 6640 get_wordnode(spin) 6641 spellinfo_T *spin; 6642 { 6643 wordnode_T *n; 6644 6645 if (spin->si_first_free == NULL) 6646 n = (wordnode_T *)getroom(spin, sizeof(wordnode_T), TRUE); 6647 else 6648 { 6649 n = spin->si_first_free; 6650 spin->si_first_free = n->wn_child; 6651 vim_memset(n, 0, sizeof(wordnode_T)); 6652 --spin->si_free_count; 6653 } 6654 #ifdef SPELL_PRINTTREE 6655 n->wn_nr = ++spin->si_wordnode_nr; 6656 #endif 6657 return n; 6658 } 6659 6660 /* 6661 * Decrement the reference count on a node (which is the head of a list of 6662 * siblings). If the reference count becomes zero free the node and its 6663 * siblings. 6664 */ 6665 static void 6666 deref_wordnode(spin, node) 6667 spellinfo_T *spin; 6668 wordnode_T *node; 6669 { 6670 wordnode_T *np; 6671 6672 if (--node->wn_refs == 0) 6673 for (np = node; np != NULL; np = np->wn_sibling) 6674 { 6675 if (np->wn_child != NULL) 6676 deref_wordnode(spin, np->wn_child); 6677 free_wordnode(spin, np); 6678 } 6679 } 6680 6681 /* 6682 * Free a wordnode_T for re-use later. 6683 * Only the "wn_child" field becomes invalid. 6684 */ 6685 static void 6686 free_wordnode(spin, n) 6687 spellinfo_T *spin; 6688 wordnode_T *n; 6689 { 6690 n->wn_child = spin->si_first_free; 6691 spin->si_first_free = n; 6692 ++spin->si_free_count; 6693 } 6694 6695 /* 6696 * Compress a tree: find tails that are identical and can be shared. 6697 */ 6698 static void 6699 wordtree_compress(spin, root) 6700 spellinfo_T *spin; 6701 wordnode_T *root; 6702 { 6703 hashtab_T ht; 6704 int n; 6705 int tot = 0; 6706 int perc; 6707 6708 /* Skip the root itself, it's not actually used. The first sibling is the 6709 * start of the tree. */ 6710 if (root->wn_sibling != NULL) 6711 { 6712 hash_init(&ht); 6713 n = node_compress(spin, root->wn_sibling, &ht, &tot); 6714 6715 #ifndef SPELL_PRINTTREE 6716 if (spin->si_verbose || p_verbose > 2) 6717 #endif 6718 { 6719 if (!spin->si_verbose) 6720 verbose_enter(); 6721 if (tot > 1000000) 6722 perc = (tot - n) / (tot / 100); 6723 else if (tot == 0) 6724 perc = 0; 6725 else 6726 perc = (tot - n) * 100 / tot; 6727 smsg((char_u *)_("Compressed %d of %d nodes; %d%% remaining"), 6728 n, tot, perc); 6729 if (p_verbose > 2) 6730 verbose_leave(); 6731 } 6732 #ifdef SPELL_PRINTTREE 6733 spell_print_tree(root->wn_sibling); 6734 #endif 6735 hash_clear(&ht); 6736 } 6737 } 6738 6739 /* 6740 * Compress a node, its siblings and its children, depth first. 6741 * Returns the number of compressed nodes. 6742 */ 6743 static int 6744 node_compress(spin, node, ht, tot) 6745 spellinfo_T *spin; 6746 wordnode_T *node; 6747 hashtab_T *ht; 6748 int *tot; /* total count of nodes before compressing, 6749 incremented while going through the tree */ 6750 { 6751 wordnode_T *np; 6752 wordnode_T *tp; 6753 wordnode_T *child; 6754 hash_T hash; 6755 hashitem_T *hi; 6756 int len = 0; 6757 unsigned nr, n; 6758 int compressed = 0; 6759 6760 /* 6761 * Go through the list of siblings. Compress each child and then try 6762 * finding an identical child to replace it. 6763 * Note that with "child" we mean not just the node that is pointed to, 6764 * but the whole list of siblings, of which the node is the first. 6765 */ 6766 for (np = node; np != NULL && !got_int; np = np->wn_sibling) 6767 { 6768 ++len; 6769 if ((child = np->wn_child) != NULL) 6770 { 6771 /* Compress the child. This fills hashkey. */ 6772 compressed += node_compress(spin, child, ht, tot); 6773 6774 /* Try to find an identical child. */ 6775 hash = hash_hash(child->wn_u1.hashkey); 6776 hi = hash_lookup(ht, child->wn_u1.hashkey, hash); 6777 tp = NULL; 6778 if (!HASHITEM_EMPTY(hi)) 6779 { 6780 /* There are children with an identical hash value. Now check 6781 * if there is one that is really identical. */ 6782 for (tp = HI2WN(hi); tp != NULL; tp = tp->wn_u2.next) 6783 if (node_equal(child, tp)) 6784 { 6785 /* Found one! Now use that child in place of the 6786 * current one. This means the current child and all 6787 * its siblings is unlinked from the tree. */ 6788 ++tp->wn_refs; 6789 deref_wordnode(spin, child); 6790 np->wn_child = tp; 6791 ++compressed; 6792 break; 6793 } 6794 if (tp == NULL) 6795 { 6796 /* No other child with this hash value equals the child of 6797 * the node, add it to the linked list after the first 6798 * item. */ 6799 tp = HI2WN(hi); 6800 child->wn_u2.next = tp->wn_u2.next; 6801 tp->wn_u2.next = child; 6802 } 6803 } 6804 else 6805 /* No other child has this hash value, add it to the 6806 * hashtable. */ 6807 hash_add_item(ht, hi, child->wn_u1.hashkey, hash); 6808 } 6809 } 6810 *tot += len; 6811 6812 /* 6813 * Make a hash key for the node and its siblings, so that we can quickly 6814 * find a lookalike node. This must be done after compressing the sibling 6815 * list, otherwise the hash key would become invalid by the compression. 6816 */ 6817 node->wn_u1.hashkey[0] = len; 6818 nr = 0; 6819 for (np = node; np != NULL; np = np->wn_sibling) 6820 { 6821 if (np->wn_byte == NUL) 6822 /* end node: use wn_flags, wn_region and wn_affixID */ 6823 n = np->wn_flags + (np->wn_region << 8) + (np->wn_affixID << 16); 6824 else 6825 /* byte node: use the byte value and the child pointer */ 6826 n = np->wn_byte + ((long_u)np->wn_child << 8); 6827 nr = nr * 101 + n; 6828 } 6829 6830 /* Avoid NUL bytes, it terminates the hash key. */ 6831 n = nr & 0xff; 6832 node->wn_u1.hashkey[1] = n == 0 ? 1 : n; 6833 n = (nr >> 8) & 0xff; 6834 node->wn_u1.hashkey[2] = n == 0 ? 1 : n; 6835 n = (nr >> 16) & 0xff; 6836 node->wn_u1.hashkey[3] = n == 0 ? 1 : n; 6837 n = (nr >> 24) & 0xff; 6838 node->wn_u1.hashkey[4] = n == 0 ? 1 : n; 6839 node->wn_u1.hashkey[5] = NUL; 6840 6841 /* Check for CTRL-C pressed now and then. */ 6842 fast_breakcheck(); 6843 6844 return compressed; 6845 } 6846 6847 /* 6848 * Return TRUE when two nodes have identical siblings and children. 6849 */ 6850 static int 6851 node_equal(n1, n2) 6852 wordnode_T *n1; 6853 wordnode_T *n2; 6854 { 6855 wordnode_T *p1; 6856 wordnode_T *p2; 6857 6858 for (p1 = n1, p2 = n2; p1 != NULL && p2 != NULL; 6859 p1 = p1->wn_sibling, p2 = p2->wn_sibling) 6860 if (p1->wn_byte != p2->wn_byte 6861 || (p1->wn_byte == NUL 6862 ? (p1->wn_flags != p2->wn_flags 6863 || p1->wn_region != p2->wn_region 6864 || p1->wn_affixID != p2->wn_affixID) 6865 : (p1->wn_child != p2->wn_child))) 6866 break; 6867 6868 return p1 == NULL && p2 == NULL; 6869 } 6870 6871 /* 6872 * Write a number to file "fd", MSB first, in "len" bytes. 6873 */ 6874 void 6875 put_bytes(fd, nr, len) 6876 FILE *fd; 6877 long_u nr; 6878 int len; 6879 { 6880 int i; 6881 6882 for (i = len - 1; i >= 0; --i) 6883 putc((int)(nr >> (i * 8)), fd); 6884 } 6885 6886 static int 6887 #ifdef __BORLANDC__ 6888 _RTLENTRYF 6889 #endif 6890 rep_compare __ARGS((const void *s1, const void *s2)); 6891 6892 /* 6893 * Function given to qsort() to sort the REP items on "from" string. 6894 */ 6895 static int 6896 #ifdef __BORLANDC__ 6897 _RTLENTRYF 6898 #endif 6899 rep_compare(s1, s2) 6900 const void *s1; 6901 const void *s2; 6902 { 6903 fromto_T *p1 = (fromto_T *)s1; 6904 fromto_T *p2 = (fromto_T *)s2; 6905 6906 return STRCMP(p1->ft_from, p2->ft_from); 6907 } 6908 6909 /* 6910 * Write the Vim .spl file "fname". 6911 * Return FAIL or OK; 6912 */ 6913 static int 6914 write_vim_spell(spin, fname) 6915 spellinfo_T *spin; 6916 char_u *fname; 6917 { 6918 FILE *fd; 6919 int regionmask; 6920 int round; 6921 wordnode_T *tree; 6922 int nodecount; 6923 int i; 6924 int l; 6925 garray_T *gap; 6926 fromto_T *ftp; 6927 char_u *p; 6928 int rr; 6929 int retval = OK; 6930 6931 fd = mch_fopen((char *)fname, "w"); 6932 if (fd == NULL) 6933 { 6934 EMSG2(_(e_notopen), fname); 6935 return FAIL; 6936 } 6937 6938 /* <HEADER>: <fileID> <versionnr> */ 6939 /* <fileID> */ 6940 if (fwrite(VIMSPELLMAGIC, VIMSPELLMAGICL, (size_t)1, fd) != 1) 6941 { 6942 EMSG(_(e_write)); 6943 retval = FAIL; 6944 } 6945 putc(VIMSPELLVERSION, fd); /* <versionnr> */ 6946 6947 /* 6948 * <SECTIONS>: <section> ... <sectionend> 6949 */ 6950 6951 /* SN_REGION: <regionname> ... 6952 * Write the region names only if there is more than one. */ 6953 if (spin->si_region_count > 1) 6954 { 6955 putc(SN_REGION, fd); /* <sectionID> */ 6956 putc(SNF_REQUIRED, fd); /* <sectionflags> */ 6957 l = spin->si_region_count * 2; 6958 put_bytes(fd, (long_u)l, 4); /* <sectionlen> */ 6959 fwrite(spin->si_region_name, (size_t)l, (size_t)1, fd); 6960 /* <regionname> ... */ 6961 regionmask = (1 << spin->si_region_count) - 1; 6962 } 6963 else 6964 regionmask = 0; 6965 6966 /* SN_CHARFLAGS: <charflagslen> <charflags> <folcharslen> <folchars> 6967 * 6968 * The table with character flags and the table for case folding. 6969 * This makes sure the same characters are recognized as word characters 6970 * when generating an when using a spell file. 6971 * Skip this for ASCII, the table may conflict with the one used for 6972 * 'encoding'. 6973 * Also skip this for an .add.spl file, the main spell file must contain 6974 * the table (avoids that it conflicts). File is shorter too. 6975 */ 6976 if (!spin->si_ascii && !spin->si_add) 6977 { 6978 char_u folchars[128 * 8]; 6979 int flags; 6980 6981 putc(SN_CHARFLAGS, fd); /* <sectionID> */ 6982 putc(SNF_REQUIRED, fd); /* <sectionflags> */ 6983 6984 /* Form the <folchars> string first, we need to know its length. */ 6985 l = 0; 6986 for (i = 128; i < 256; ++i) 6987 { 6988 #ifdef FEAT_MBYTE 6989 if (has_mbyte) 6990 l += mb_char2bytes(spelltab.st_fold[i], folchars + l); 6991 else 6992 #endif 6993 folchars[l++] = spelltab.st_fold[i]; 6994 } 6995 put_bytes(fd, (long_u)(1 + 128 + 2 + l), 4); /* <sectionlen> */ 6996 6997 fputc(128, fd); /* <charflagslen> */ 6998 for (i = 128; i < 256; ++i) 6999 { 7000 flags = 0; 7001 if (spelltab.st_isw[i]) 7002 flags |= CF_WORD; 7003 if (spelltab.st_isu[i]) 7004 flags |= CF_UPPER; 7005 fputc(flags, fd); /* <charflags> */ 7006 } 7007 7008 put_bytes(fd, (long_u)l, 2); /* <folcharslen> */ 7009 fwrite(folchars, (size_t)l, (size_t)1, fd); /* <folchars> */ 7010 } 7011 7012 /* SN_MIDWORD: <midword> */ 7013 if (spin->si_midword != NULL) 7014 { 7015 putc(SN_MIDWORD, fd); /* <sectionID> */ 7016 putc(SNF_REQUIRED, fd); /* <sectionflags> */ 7017 7018 i = STRLEN(spin->si_midword); 7019 put_bytes(fd, (long_u)i, 4); /* <sectionlen> */ 7020 fwrite(spin->si_midword, (size_t)i, (size_t)1, fd); /* <midword> */ 7021 } 7022 7023 /* SN_PREFCOND: <prefcondcnt> <prefcond> ... */ 7024 if (spin->si_prefcond.ga_len > 0) 7025 { 7026 putc(SN_PREFCOND, fd); /* <sectionID> */ 7027 putc(SNF_REQUIRED, fd); /* <sectionflags> */ 7028 7029 l = write_spell_prefcond(NULL, &spin->si_prefcond); 7030 put_bytes(fd, (long_u)l, 4); /* <sectionlen> */ 7031 7032 write_spell_prefcond(fd, &spin->si_prefcond); 7033 } 7034 7035 /* SN_REP: <repcount> <rep> ... 7036 * SN_SAL: <salflags> <salcount> <sal> ... */ 7037 7038 /* Sort the REP items. */ 7039 qsort(spin->si_rep.ga_data, (size_t)spin->si_rep.ga_len, 7040 sizeof(fromto_T), rep_compare); 7041 7042 /* round 1: SN_REP section 7043 * round 2: SN_SAL section (unless SN_SOFO is used) */ 7044 for (round = 1; round <= 2; ++round) 7045 { 7046 if (round == 1) 7047 { 7048 gap = &spin->si_rep; 7049 putc(SN_REP, fd); /* <sectionID> */ 7050 } 7051 else 7052 { 7053 if (spin->si_sofofr != NULL && spin->si_sofoto != NULL) 7054 /* using SN_SOFO section instead of SN_SAL */ 7055 break; 7056 gap = &spin->si_sal; 7057 putc(SN_SAL, fd); /* <sectionID> */ 7058 } 7059 7060 /* This is for making suggestions, section is not required. */ 7061 putc(0, fd); /* <sectionflags> */ 7062 7063 /* Compute the length of what follows. */ 7064 l = 2; /* count <repcount> or <salcount> */ 7065 for (i = 0; i < gap->ga_len; ++i) 7066 { 7067 ftp = &((fromto_T *)gap->ga_data)[i]; 7068 l += 1 + STRLEN(ftp->ft_from); /* count <*fromlen> and <*from> */ 7069 l += 1 + STRLEN(ftp->ft_to); /* count <*tolen> and <*to> */ 7070 } 7071 if (round == 2) 7072 ++l; /* count <salflags> */ 7073 put_bytes(fd, (long_u)l, 4); /* <sectionlen> */ 7074 7075 if (round == 2) 7076 { 7077 i = 0; 7078 if (spin->si_followup) 7079 i |= SAL_F0LLOWUP; 7080 if (spin->si_collapse) 7081 i |= SAL_COLLAPSE; 7082 if (spin->si_rem_accents) 7083 i |= SAL_REM_ACCENTS; 7084 putc(i, fd); /* <salflags> */ 7085 } 7086 7087 put_bytes(fd, (long_u)gap->ga_len, 2); /* <repcount> or <salcount> */ 7088 for (i = 0; i < gap->ga_len; ++i) 7089 { 7090 /* <rep> : <repfromlen> <repfrom> <reptolen> <repto> */ 7091 /* <sal> : <salfromlen> <salfrom> <saltolen> <salto> */ 7092 ftp = &((fromto_T *)gap->ga_data)[i]; 7093 for (rr = 1; rr <= 2; ++rr) 7094 { 7095 p = rr == 1 ? ftp->ft_from : ftp->ft_to; 7096 l = STRLEN(p); 7097 putc(l, fd); 7098 fwrite(p, l, (size_t)1, fd); 7099 } 7100 } 7101 7102 } 7103 7104 /* SN_SOFO: <sofofromlen> <sofofrom> <sofotolen> <sofoto> 7105 * This is for making suggestions, section is not required. */ 7106 if (spin->si_sofofr != NULL && spin->si_sofoto != NULL) 7107 { 7108 putc(SN_SOFO, fd); /* <sectionID> */ 7109 putc(0, fd); /* <sectionflags> */ 7110 7111 l = STRLEN(spin->si_sofofr); 7112 put_bytes(fd, (long_u)(l + STRLEN(spin->si_sofoto) + 4), 4); 7113 /* <sectionlen> */ 7114 7115 put_bytes(fd, (long_u)l, 2); /* <sofofromlen> */ 7116 fwrite(spin->si_sofofr, l, (size_t)1, fd); /* <sofofrom> */ 7117 7118 l = STRLEN(spin->si_sofoto); 7119 put_bytes(fd, (long_u)l, 2); /* <sofotolen> */ 7120 fwrite(spin->si_sofoto, l, (size_t)1, fd); /* <sofoto> */ 7121 } 7122 7123 /* SN_MAP: <mapstr> 7124 * This is for making suggestions, section is not required. */ 7125 if (spin->si_map.ga_len > 0) 7126 { 7127 putc(SN_MAP, fd); /* <sectionID> */ 7128 putc(0, fd); /* <sectionflags> */ 7129 l = spin->si_map.ga_len; 7130 put_bytes(fd, (long_u)l, 4); /* <sectionlen> */ 7131 fwrite(spin->si_map.ga_data, (size_t)l, (size_t)1, fd); 7132 /* <mapstr> */ 7133 } 7134 7135 /* SN_COMPOUND: compound info. 7136 * We don't mark it required, when not supported all compound words will 7137 * be bad words. */ 7138 if (spin->si_compflags != NULL) 7139 { 7140 putc(SN_COMPOUND, fd); /* <sectionID> */ 7141 putc(0, fd); /* <sectionflags> */ 7142 7143 l = STRLEN(spin->si_compflags); 7144 put_bytes(fd, (long_u)(l + 3), 4); /* <sectionlen> */ 7145 putc(spin->si_compmax, fd); /* <compmax> */ 7146 putc(spin->si_compminlen, fd); /* <compminlen> */ 7147 putc(spin->si_compsylmax, fd); /* <compsylmax> */ 7148 /* <compflags> */ 7149 fwrite(spin->si_compflags, (size_t)l, (size_t)1, fd); 7150 } 7151 7152 /* SN_NOBREAK: NOBREAK flag */ 7153 if (spin->si_nobreak) 7154 { 7155 putc(SN_NOBREAK, fd); /* <sectionID> */ 7156 putc(0, fd); /* <sectionflags> */ 7157 7158 /* It's empty, the precense of the section flags the feature. */ 7159 put_bytes(fd, (long_u)0, 4); /* <sectionlen> */ 7160 } 7161 7162 /* SN_SYLLABLE: syllable info. 7163 * We don't mark it required, when not supported syllables will not be 7164 * counted. */ 7165 if (spin->si_syllable != NULL) 7166 { 7167 putc(SN_SYLLABLE, fd); /* <sectionID> */ 7168 putc(0, fd); /* <sectionflags> */ 7169 7170 l = STRLEN(spin->si_syllable); 7171 put_bytes(fd, (long_u)l, 4); /* <sectionlen> */ 7172 fwrite(spin->si_syllable, (size_t)l, (size_t)1, fd); /* <syllable> */ 7173 } 7174 7175 /* end of <SECTIONS> */ 7176 putc(SN_END, fd); /* <sectionend> */ 7177 7178 7179 /* 7180 * <LWORDTREE> <KWORDTREE> <PREFIXTREE> 7181 */ 7182 spin->si_memtot = 0; 7183 for (round = 1; round <= 3; ++round) 7184 { 7185 if (round == 1) 7186 tree = spin->si_foldroot->wn_sibling; 7187 else if (round == 2) 7188 tree = spin->si_keeproot->wn_sibling; 7189 else 7190 tree = spin->si_prefroot->wn_sibling; 7191 7192 /* Clear the index and wnode fields in the tree. */ 7193 clear_node(tree); 7194 7195 /* Count the number of nodes. Needed to be able to allocate the 7196 * memory when reading the nodes. Also fills in index for shared 7197 * nodes. */ 7198 nodecount = put_node(NULL, tree, 0, regionmask, round == 3); 7199 7200 /* number of nodes in 4 bytes */ 7201 put_bytes(fd, (long_u)nodecount, 4); /* <nodecount> */ 7202 spin->si_memtot += nodecount + nodecount * sizeof(int); 7203 7204 /* Write the nodes. */ 7205 (void)put_node(fd, tree, 0, regionmask, round == 3); 7206 } 7207 7208 /* Write another byte to check for errors. */ 7209 if (putc(0, fd) == EOF) 7210 retval = FAIL; 7211 7212 if (fclose(fd) == EOF) 7213 retval = FAIL; 7214 7215 return retval; 7216 } 7217 7218 /* 7219 * Clear the index and wnode fields of "node", it siblings and its 7220 * children. This is needed because they are a union with other items to save 7221 * space. 7222 */ 7223 static void 7224 clear_node(node) 7225 wordnode_T *node; 7226 { 7227 wordnode_T *np; 7228 7229 if (node != NULL) 7230 for (np = node; np != NULL; np = np->wn_sibling) 7231 { 7232 np->wn_u1.index = 0; 7233 np->wn_u2.wnode = NULL; 7234 7235 if (np->wn_byte != NUL) 7236 clear_node(np->wn_child); 7237 } 7238 } 7239 7240 7241 /* 7242 * Dump a word tree at node "node". 7243 * 7244 * This first writes the list of possible bytes (siblings). Then for each 7245 * byte recursively write the children. 7246 * 7247 * NOTE: The code here must match the code in read_tree(), since assumptions 7248 * are made about the indexes (so that we don't have to write them in the 7249 * file). 7250 * 7251 * Returns the number of nodes used. 7252 */ 7253 static int 7254 put_node(fd, node, index, regionmask, prefixtree) 7255 FILE *fd; /* NULL when only counting */ 7256 wordnode_T *node; 7257 int index; 7258 int regionmask; 7259 int prefixtree; /* TRUE for PREFIXTREE */ 7260 { 7261 int newindex = index; 7262 int siblingcount = 0; 7263 wordnode_T *np; 7264 int flags; 7265 7266 /* If "node" is zero the tree is empty. */ 7267 if (node == NULL) 7268 return 0; 7269 7270 /* Store the index where this node is written. */ 7271 node->wn_u1.index = index; 7272 7273 /* Count the number of siblings. */ 7274 for (np = node; np != NULL; np = np->wn_sibling) 7275 ++siblingcount; 7276 7277 /* Write the sibling count. */ 7278 if (fd != NULL) 7279 putc(siblingcount, fd); /* <siblingcount> */ 7280 7281 /* Write each sibling byte and optionally extra info. */ 7282 for (np = node; np != NULL; np = np->wn_sibling) 7283 { 7284 if (np->wn_byte == 0) 7285 { 7286 if (fd != NULL) 7287 { 7288 /* For a NUL byte (end of word) write the flags etc. */ 7289 if (prefixtree) 7290 { 7291 /* In PREFIXTREE write the required affixID and the 7292 * associated condition nr (stored in wn_region). The 7293 * byte value is misused to store the "rare" and "not 7294 * combining" flags */ 7295 if (np->wn_flags == (short_u)PFX_FLAGS) 7296 putc(BY_NOFLAGS, fd); /* <byte> */ 7297 else 7298 { 7299 putc(BY_FLAGS, fd); /* <byte> */ 7300 putc(np->wn_flags, fd); /* <pflags> */ 7301 } 7302 putc(np->wn_affixID, fd); /* <affixID> */ 7303 put_bytes(fd, (long_u)np->wn_region, 2); /* <prefcondnr> */ 7304 } 7305 else 7306 { 7307 /* For word trees we write the flag/region items. */ 7308 flags = np->wn_flags; 7309 if (regionmask != 0 && np->wn_region != regionmask) 7310 flags |= WF_REGION; 7311 if (np->wn_affixID != 0) 7312 flags |= WF_AFX; 7313 if (flags == 0) 7314 { 7315 /* word without flags or region */ 7316 putc(BY_NOFLAGS, fd); /* <byte> */ 7317 } 7318 else 7319 { 7320 if (np->wn_flags >= 0x100) 7321 { 7322 putc(BY_FLAGS2, fd); /* <byte> */ 7323 putc(flags, fd); /* <flags> */ 7324 putc((unsigned)flags >> 8, fd); /* <flags2> */ 7325 } 7326 else 7327 { 7328 putc(BY_FLAGS, fd); /* <byte> */ 7329 putc(flags, fd); /* <flags> */ 7330 } 7331 if (flags & WF_REGION) 7332 putc(np->wn_region, fd); /* <region> */ 7333 if (flags & WF_AFX) 7334 putc(np->wn_affixID, fd); /* <affixID> */ 7335 } 7336 } 7337 } 7338 } 7339 else 7340 { 7341 if (np->wn_child->wn_u1.index != 0 7342 && np->wn_child->wn_u2.wnode != node) 7343 { 7344 /* The child is written elsewhere, write the reference. */ 7345 if (fd != NULL) 7346 { 7347 putc(BY_INDEX, fd); /* <byte> */ 7348 /* <nodeidx> */ 7349 put_bytes(fd, (long_u)np->wn_child->wn_u1.index, 3); 7350 } 7351 } 7352 else if (np->wn_child->wn_u2.wnode == NULL) 7353 /* We will write the child below and give it an index. */ 7354 np->wn_child->wn_u2.wnode = node; 7355 7356 if (fd != NULL) 7357 if (putc(np->wn_byte, fd) == EOF) /* <byte> or <xbyte> */ 7358 { 7359 EMSG(_(e_write)); 7360 return 0; 7361 } 7362 } 7363 } 7364 7365 /* Space used in the array when reading: one for each sibling and one for 7366 * the count. */ 7367 newindex += siblingcount + 1; 7368 7369 /* Recursively dump the children of each sibling. */ 7370 for (np = node; np != NULL; np = np->wn_sibling) 7371 if (np->wn_byte != 0 && np->wn_child->wn_u2.wnode == node) 7372 newindex = put_node(fd, np->wn_child, newindex, regionmask, 7373 prefixtree); 7374 7375 return newindex; 7376 } 7377 7378 7379 /* 7380 * ":mkspell [-ascii] outfile infile ..." 7381 * ":mkspell [-ascii] addfile" 7382 */ 7383 void 7384 ex_mkspell(eap) 7385 exarg_T *eap; 7386 { 7387 int fcount; 7388 char_u **fnames; 7389 char_u *arg = eap->arg; 7390 int ascii = FALSE; 7391 7392 if (STRNCMP(arg, "-ascii", 6) == 0) 7393 { 7394 ascii = TRUE; 7395 arg = skipwhite(arg + 6); 7396 } 7397 7398 /* Expand all the remaining arguments (e.g., $VIMRUNTIME). */ 7399 if (get_arglist_exp(arg, &fcount, &fnames) == OK) 7400 { 7401 mkspell(fcount, fnames, ascii, eap->forceit, FALSE); 7402 FreeWild(fcount, fnames); 7403 } 7404 } 7405 7406 /* 7407 * Create a Vim spell file from one or more word lists. 7408 * "fnames[0]" is the output file name. 7409 * "fnames[fcount - 1]" is the last input file name. 7410 * Exception: when "fnames[0]" ends in ".add" it's used as the input file name 7411 * and ".spl" is appended to make the output file name. 7412 */ 7413 static void 7414 mkspell(fcount, fnames, ascii, overwrite, added_word) 7415 int fcount; 7416 char_u **fnames; 7417 int ascii; /* -ascii argument given */ 7418 int overwrite; /* overwrite existing output file */ 7419 int added_word; /* invoked through "zg" */ 7420 { 7421 char_u fname[MAXPATHL]; 7422 char_u wfname[MAXPATHL]; 7423 char_u **innames; 7424 int incount; 7425 afffile_T *(afile[8]); 7426 int i; 7427 int len; 7428 struct stat st; 7429 int error = FALSE; 7430 spellinfo_T spin; 7431 7432 vim_memset(&spin, 0, sizeof(spin)); 7433 spin.si_verbose = !added_word; 7434 spin.si_ascii = ascii; 7435 spin.si_followup = TRUE; 7436 spin.si_rem_accents = TRUE; 7437 ga_init2(&spin.si_rep, (int)sizeof(fromto_T), 20); 7438 ga_init2(&spin.si_sal, (int)sizeof(fromto_T), 20); 7439 ga_init2(&spin.si_map, (int)sizeof(char_u), 100); 7440 ga_init2(&spin.si_prefcond, (int)sizeof(char_u *), 50); 7441 spin.si_newcompID = 127; /* start compound ID at first maximum */ 7442 7443 /* default: fnames[0] is output file, following are input files */ 7444 innames = &fnames[1]; 7445 incount = fcount - 1; 7446 7447 if (fcount >= 1) 7448 { 7449 len = STRLEN(fnames[0]); 7450 if (fcount == 1 && len > 4 && STRCMP(fnames[0] + len - 4, ".add") == 0) 7451 { 7452 /* For ":mkspell path/en.latin1.add" output file is 7453 * "path/en.latin1.add.spl". */ 7454 innames = &fnames[0]; 7455 incount = 1; 7456 vim_snprintf((char *)wfname, sizeof(wfname), "%s.spl", fnames[0]); 7457 } 7458 else if (fcount == 1) 7459 { 7460 /* For ":mkspell path/vim" output file is "path/vim.latin1.spl". */ 7461 innames = &fnames[0]; 7462 incount = 1; 7463 vim_snprintf((char *)wfname, sizeof(wfname), "%s.%s.spl", fnames[0], 7464 spin.si_ascii ? (char_u *)"ascii" : spell_enc()); 7465 } 7466 else if (len > 4 && STRCMP(fnames[0] + len - 4, ".spl") == 0) 7467 { 7468 /* Name ends in ".spl", use as the file name. */ 7469 vim_strncpy(wfname, fnames[0], sizeof(wfname) - 1); 7470 } 7471 else 7472 /* Name should be language, make the file name from it. */ 7473 vim_snprintf((char *)wfname, sizeof(wfname), "%s.%s.spl", fnames[0], 7474 spin.si_ascii ? (char_u *)"ascii" : spell_enc()); 7475 7476 /* Check for .ascii.spl. */ 7477 if (strstr((char *)gettail(wfname), ".ascii.") != NULL) 7478 spin.si_ascii = TRUE; 7479 7480 /* Check for .add.spl. */ 7481 if (strstr((char *)gettail(wfname), ".add.") != NULL) 7482 spin.si_add = TRUE; 7483 } 7484 7485 if (incount <= 0) 7486 EMSG(_(e_invarg)); /* need at least output and input names */ 7487 else if (vim_strchr(gettail(wfname), '_') != NULL) 7488 EMSG(_("E751: Output file name must not have region name")); 7489 else if (incount > 8) 7490 EMSG(_("E754: Only up to 8 regions supported")); 7491 else 7492 { 7493 /* Check for overwriting before doing things that may take a lot of 7494 * time. */ 7495 if (!overwrite && mch_stat((char *)wfname, &st) >= 0) 7496 { 7497 EMSG(_(e_exists)); 7498 return; 7499 } 7500 if (mch_isdir(wfname)) 7501 { 7502 EMSG2(_(e_isadir2), wfname); 7503 return; 7504 } 7505 7506 /* 7507 * Init the aff and dic pointers. 7508 * Get the region names if there are more than 2 arguments. 7509 */ 7510 for (i = 0; i < incount; ++i) 7511 { 7512 afile[i] = NULL; 7513 7514 if (incount > 1) 7515 { 7516 len = STRLEN(innames[i]); 7517 if (STRLEN(gettail(innames[i])) < 5 7518 || innames[i][len - 3] != '_') 7519 { 7520 EMSG2(_("E755: Invalid region in %s"), innames[i]); 7521 return; 7522 } 7523 spin.si_region_name[i * 2] = TOLOWER_ASC(innames[i][len - 2]); 7524 spin.si_region_name[i * 2 + 1] = 7525 TOLOWER_ASC(innames[i][len - 1]); 7526 } 7527 } 7528 spin.si_region_count = incount; 7529 7530 spin.si_foldroot = wordtree_alloc(&spin); 7531 spin.si_keeproot = wordtree_alloc(&spin); 7532 spin.si_prefroot = wordtree_alloc(&spin); 7533 if (spin.si_foldroot == NULL 7534 || spin.si_keeproot == NULL 7535 || spin.si_prefroot == NULL) 7536 { 7537 free_blocks(spin.si_blocks); 7538 return; 7539 } 7540 7541 /* When not producing a .add.spl file clear the character table when 7542 * we encounter one in the .aff file. This means we dump the current 7543 * one in the .spl file if the .aff file doesn't define one. That's 7544 * better than guessing the contents, the table will match a 7545 * previously loaded spell file. */ 7546 if (!spin.si_add) 7547 spin.si_clear_chartab = TRUE; 7548 7549 /* 7550 * Read all the .aff and .dic files. 7551 * Text is converted to 'encoding'. 7552 * Words are stored in the case-folded and keep-case trees. 7553 */ 7554 for (i = 0; i < incount && !error; ++i) 7555 { 7556 spin.si_conv.vc_type = CONV_NONE; 7557 spin.si_region = 1 << i; 7558 7559 vim_snprintf((char *)fname, sizeof(fname), "%s.aff", innames[i]); 7560 if (mch_stat((char *)fname, &st) >= 0) 7561 { 7562 /* Read the .aff file. Will init "spin->si_conv" based on the 7563 * "SET" line. */ 7564 afile[i] = spell_read_aff(&spin, fname); 7565 if (afile[i] == NULL) 7566 error = TRUE; 7567 else 7568 { 7569 /* Read the .dic file and store the words in the trees. */ 7570 vim_snprintf((char *)fname, sizeof(fname), "%s.dic", 7571 innames[i]); 7572 if (spell_read_dic(&spin, fname, afile[i]) == FAIL) 7573 error = TRUE; 7574 } 7575 } 7576 else 7577 { 7578 /* No .aff file, try reading the file as a word list. Store 7579 * the words in the trees. */ 7580 if (spell_read_wordfile(&spin, innames[i]) == FAIL) 7581 error = TRUE; 7582 } 7583 7584 #ifdef FEAT_MBYTE 7585 /* Free any conversion stuff. */ 7586 convert_setup(&spin.si_conv, NULL, NULL); 7587 #endif 7588 } 7589 7590 if (spin.si_compflags != NULL && spin.si_nobreak) 7591 MSG(_("Warning: both compounding and NOBREAK specified")); 7592 7593 if (!error) 7594 { 7595 /* 7596 * Combine tails in the tree. 7597 */ 7598 if (spin.si_verbose || p_verbose > 2) 7599 { 7600 if (!spin.si_verbose) 7601 verbose_enter(); 7602 MSG(_(msg_compressing)); 7603 out_flush(); 7604 if (!spin.si_verbose) 7605 verbose_leave(); 7606 } 7607 wordtree_compress(&spin, spin.si_foldroot); 7608 wordtree_compress(&spin, spin.si_keeproot); 7609 wordtree_compress(&spin, spin.si_prefroot); 7610 } 7611 7612 if (!error) 7613 { 7614 /* 7615 * Write the info in the spell file. 7616 */ 7617 if (spin.si_verbose || p_verbose > 2) 7618 { 7619 if (!spin.si_verbose) 7620 verbose_enter(); 7621 smsg((char_u *)_("Writing spell file %s ..."), wfname); 7622 out_flush(); 7623 if (!spin.si_verbose) 7624 verbose_leave(); 7625 } 7626 7627 error = write_vim_spell(&spin, wfname) == FAIL; 7628 7629 if (spin.si_verbose || p_verbose > 2) 7630 { 7631 if (!spin.si_verbose) 7632 verbose_enter(); 7633 MSG(_("Done!")); 7634 smsg((char_u *)_("Estimated runtime memory use: %d bytes"), 7635 spin.si_memtot); 7636 out_flush(); 7637 if (!spin.si_verbose) 7638 verbose_leave(); 7639 } 7640 7641 /* If the file is loaded need to reload it. */ 7642 if (!error) 7643 spell_reload_one(wfname, added_word); 7644 } 7645 7646 /* Free the allocated memory. */ 7647 ga_clear(&spin.si_rep); 7648 ga_clear(&spin.si_sal); 7649 ga_clear(&spin.si_map); 7650 ga_clear(&spin.si_prefcond); 7651 7652 /* Free the .aff file structures. */ 7653 for (i = 0; i < incount; ++i) 7654 if (afile[i] != NULL) 7655 spell_free_aff(afile[i]); 7656 7657 /* Free all the bits and pieces at once. */ 7658 free_blocks(spin.si_blocks); 7659 } 7660 } 7661 7662 7663 /* 7664 * ":[count]spellgood {word}" 7665 * ":[count]spellwrong {word}" 7666 */ 7667 void 7668 ex_spell(eap) 7669 exarg_T *eap; 7670 { 7671 spell_add_word(eap->arg, STRLEN(eap->arg), eap->cmdidx == CMD_spellwrong, 7672 eap->forceit ? 0 : (int)eap->line2); 7673 } 7674 7675 /* 7676 * Add "word[len]" to 'spellfile' as a good or bad word. 7677 */ 7678 void 7679 spell_add_word(word, len, bad, index) 7680 char_u *word; 7681 int len; 7682 int bad; 7683 int index; /* "zG" and "zW": zero, otherwise index in 7684 'spellfile' */ 7685 { 7686 FILE *fd; 7687 buf_T *buf = NULL; 7688 int new_spf = FALSE; 7689 struct stat st; 7690 char_u *fname; 7691 char_u fnamebuf[MAXPATHL]; 7692 char_u line[MAXWLEN * 2]; 7693 long fpos, fpos_next = 0; 7694 int i; 7695 char_u *spf; 7696 7697 if (index == 0) /* use internal wordlist */ 7698 { 7699 if (int_wordlist == NULL) 7700 { 7701 int_wordlist = vim_tempname('s'); 7702 if (int_wordlist == NULL) 7703 return; 7704 } 7705 fname = int_wordlist; 7706 } 7707 else 7708 { 7709 /* If 'spellfile' isn't set figure out a good default value. */ 7710 if (*curbuf->b_p_spf == NUL) 7711 { 7712 init_spellfile(); 7713 new_spf = TRUE; 7714 } 7715 7716 if (*curbuf->b_p_spf == NUL) 7717 { 7718 EMSG2(_(e_notset), "spellfile"); 7719 return; 7720 } 7721 7722 for (spf = curbuf->b_p_spf, i = 1; *spf != NUL; ++i) 7723 { 7724 copy_option_part(&spf, fnamebuf, MAXPATHL, ","); 7725 if (i == index) 7726 break; 7727 if (*spf == NUL) 7728 { 7729 EMSGN(_("E765: 'spellfile' does not have %ld entries"), index); 7730 return; 7731 } 7732 } 7733 7734 /* Check that the user isn't editing the .add file somewhere. */ 7735 buf = buflist_findname_exp(fnamebuf); 7736 if (buf != NULL && buf->b_ml.ml_mfp == NULL) 7737 buf = NULL; 7738 if (buf != NULL && bufIsChanged(buf)) 7739 { 7740 EMSG(_(e_bufloaded)); 7741 return; 7742 } 7743 7744 fname = fnamebuf; 7745 } 7746 7747 if (bad) 7748 { 7749 /* When the word also appears as good word we need to remove that one, 7750 * since its flags sort before the one with WF_BANNED. */ 7751 fd = mch_fopen((char *)fname, "r"); 7752 if (fd != NULL) 7753 { 7754 while (!vim_fgets(line, MAXWLEN * 2, fd)) 7755 { 7756 fpos = fpos_next; 7757 fpos_next = ftell(fd); 7758 if (STRNCMP(word, line, len) == 0 7759 && (line[len] == '/' || line[len] < ' ')) 7760 { 7761 /* Found duplicate word. Remove it by writing a '#' at 7762 * the start of the line. Mixing reading and writing 7763 * doesn't work for all systems, close the file first. */ 7764 fclose(fd); 7765 fd = mch_fopen((char *)fname, "r+"); 7766 if (fd == NULL) 7767 break; 7768 if (fseek(fd, fpos, SEEK_SET) == 0) 7769 fputc('#', fd); 7770 fseek(fd, fpos_next, SEEK_SET); 7771 } 7772 } 7773 fclose(fd); 7774 } 7775 } 7776 7777 fd = mch_fopen((char *)fname, "a"); 7778 if (fd == NULL && new_spf) 7779 { 7780 /* We just initialized the 'spellfile' option and can't open the file. 7781 * We may need to create the "spell" directory first. We already 7782 * checked the runtime directory is writable in init_spellfile(). */ 7783 STRCPY(NameBuff, fname); 7784 *gettail_sep(NameBuff) = NUL; 7785 if (mch_stat((char *)NameBuff, &st) < 0) 7786 { 7787 /* The directory doesn't exist. Try creating it and opening the 7788 * file again. */ 7789 vim_mkdir(NameBuff, 0755); 7790 fd = mch_fopen((char *)fname, "a"); 7791 } 7792 } 7793 7794 if (fd == NULL) 7795 EMSG2(_(e_notopen), fname); 7796 else 7797 { 7798 if (bad) 7799 fprintf(fd, "%.*s/!\n", len, word); 7800 else 7801 fprintf(fd, "%.*s\n", len, word); 7802 fclose(fd); 7803 7804 /* Update the .add.spl file. */ 7805 mkspell(1, &fname, FALSE, TRUE, TRUE); 7806 7807 /* If the .add file is edited somewhere, reload it. */ 7808 if (buf != NULL) 7809 buf_reload(buf); 7810 7811 redraw_all_later(NOT_VALID); 7812 } 7813 } 7814 7815 /* 7816 * Initialize 'spellfile' for the current buffer. 7817 */ 7818 static void 7819 init_spellfile() 7820 { 7821 char_u buf[MAXPATHL]; 7822 int l; 7823 char_u *fname; 7824 char_u *rtp; 7825 char_u *lend; 7826 int aspath = FALSE; 7827 char_u *lstart = curbuf->b_p_spl; 7828 7829 if (*curbuf->b_p_spl != NUL && curbuf->b_langp.ga_len > 0) 7830 { 7831 /* Find the end of the language name. Exclude the region. If there 7832 * is a path separator remember the start of the tail. */ 7833 for (lend = curbuf->b_p_spl; *lend != NUL 7834 && vim_strchr((char_u *)",._", *lend) == NULL; ++lend) 7835 if (vim_ispathsep(*lend)) 7836 { 7837 aspath = TRUE; 7838 lstart = lend + 1; 7839 } 7840 7841 /* Loop over all entries in 'runtimepath'. Use the first one where we 7842 * are allowed to write. */ 7843 rtp = p_rtp; 7844 while (*rtp != NUL) 7845 { 7846 if (aspath) 7847 /* Use directory of an entry with path, e.g., for 7848 * "/dir/lg.utf-8.spl" use "/dir". */ 7849 vim_strncpy(buf, curbuf->b_p_spl, lstart - curbuf->b_p_spl - 1); 7850 else 7851 /* Copy the path from 'runtimepath' to buf[]. */ 7852 copy_option_part(&rtp, buf, MAXPATHL, ","); 7853 if (filewritable(buf) == 2) 7854 { 7855 /* Use the first language name from 'spelllang' and the 7856 * encoding used in the first loaded .spl file. */ 7857 if (aspath) 7858 vim_strncpy(buf, curbuf->b_p_spl, lend - curbuf->b_p_spl); 7859 else 7860 { 7861 l = STRLEN(buf); 7862 vim_snprintf((char *)buf + l, MAXPATHL - l, 7863 "/spell/%.*s", (int)(lend - lstart), lstart); 7864 } 7865 l = STRLEN(buf); 7866 fname = LANGP_ENTRY(curbuf->b_langp, 0)->lp_slang->sl_fname; 7867 vim_snprintf((char *)buf + l, MAXPATHL - l, ".%s.add", 7868 fname != NULL 7869 && strstr((char *)gettail(fname), ".ascii.") != NULL 7870 ? (char_u *)"ascii" : spell_enc()); 7871 set_option_value((char_u *)"spellfile", 0L, buf, OPT_LOCAL); 7872 break; 7873 } 7874 aspath = FALSE; 7875 } 7876 } 7877 } 7878 7879 7880 /* 7881 * Init the chartab used for spelling for ASCII. 7882 * EBCDIC is not supported! 7883 */ 7884 static void 7885 clear_spell_chartab(sp) 7886 spelltab_T *sp; 7887 { 7888 int i; 7889 7890 /* Init everything to FALSE. */ 7891 vim_memset(sp->st_isw, FALSE, sizeof(sp->st_isw)); 7892 vim_memset(sp->st_isu, FALSE, sizeof(sp->st_isu)); 7893 for (i = 0; i < 256; ++i) 7894 { 7895 sp->st_fold[i] = i; 7896 sp->st_upper[i] = i; 7897 } 7898 7899 /* We include digits. A word shouldn't start with a digit, but handling 7900 * that is done separately. */ 7901 for (i = '0'; i <= '9'; ++i) 7902 sp->st_isw[i] = TRUE; 7903 for (i = 'A'; i <= 'Z'; ++i) 7904 { 7905 sp->st_isw[i] = TRUE; 7906 sp->st_isu[i] = TRUE; 7907 sp->st_fold[i] = i + 0x20; 7908 } 7909 for (i = 'a'; i <= 'z'; ++i) 7910 { 7911 sp->st_isw[i] = TRUE; 7912 sp->st_upper[i] = i - 0x20; 7913 } 7914 } 7915 7916 /* 7917 * Init the chartab used for spelling. Only depends on 'encoding'. 7918 * Called once while starting up and when 'encoding' changes. 7919 * The default is to use isalpha(), but the spell file should define the word 7920 * characters to make it possible that 'encoding' differs from the current 7921 * locale. For utf-8 we don't use isalpha() but our own functions. 7922 */ 7923 void 7924 init_spell_chartab() 7925 { 7926 int i; 7927 7928 did_set_spelltab = FALSE; 7929 clear_spell_chartab(&spelltab); 7930 #ifdef FEAT_MBYTE 7931 if (enc_dbcs) 7932 { 7933 /* DBCS: assume double-wide characters are word characters. */ 7934 for (i = 128; i <= 255; ++i) 7935 if (MB_BYTE2LEN(i) == 2) 7936 spelltab.st_isw[i] = TRUE; 7937 } 7938 else if (enc_utf8) 7939 { 7940 for (i = 128; i < 256; ++i) 7941 { 7942 spelltab.st_isu[i] = utf_isupper(i); 7943 spelltab.st_isw[i] = spelltab.st_isu[i] || utf_islower(i); 7944 spelltab.st_fold[i] = utf_fold(i); 7945 spelltab.st_upper[i] = utf_toupper(i); 7946 } 7947 } 7948 else 7949 #endif 7950 { 7951 /* Rough guess: use locale-dependent library functions. */ 7952 for (i = 128; i < 256; ++i) 7953 { 7954 if (MB_ISUPPER(i)) 7955 { 7956 spelltab.st_isw[i] = TRUE; 7957 spelltab.st_isu[i] = TRUE; 7958 spelltab.st_fold[i] = MB_TOLOWER(i); 7959 } 7960 else if (MB_ISLOWER(i)) 7961 { 7962 spelltab.st_isw[i] = TRUE; 7963 spelltab.st_upper[i] = MB_TOUPPER(i); 7964 } 7965 } 7966 } 7967 } 7968 7969 /* 7970 * Set the spell character tables from strings in the affix file. 7971 */ 7972 static int 7973 set_spell_chartab(fol, low, upp) 7974 char_u *fol; 7975 char_u *low; 7976 char_u *upp; 7977 { 7978 /* We build the new tables here first, so that we can compare with the 7979 * previous one. */ 7980 spelltab_T new_st; 7981 char_u *pf = fol, *pl = low, *pu = upp; 7982 int f, l, u; 7983 7984 clear_spell_chartab(&new_st); 7985 7986 while (*pf != NUL) 7987 { 7988 if (*pl == NUL || *pu == NUL) 7989 { 7990 EMSG(_(e_affform)); 7991 return FAIL; 7992 } 7993 #ifdef FEAT_MBYTE 7994 f = mb_ptr2char_adv(&pf); 7995 l = mb_ptr2char_adv(&pl); 7996 u = mb_ptr2char_adv(&pu); 7997 #else 7998 f = *pf++; 7999 l = *pl++; 8000 u = *pu++; 8001 #endif 8002 /* Every character that appears is a word character. */ 8003 if (f < 256) 8004 new_st.st_isw[f] = TRUE; 8005 if (l < 256) 8006 new_st.st_isw[l] = TRUE; 8007 if (u < 256) 8008 new_st.st_isw[u] = TRUE; 8009 8010 /* if "LOW" and "FOL" are not the same the "LOW" char needs 8011 * case-folding */ 8012 if (l < 256 && l != f) 8013 { 8014 if (f >= 256) 8015 { 8016 EMSG(_(e_affrange)); 8017 return FAIL; 8018 } 8019 new_st.st_fold[l] = f; 8020 } 8021 8022 /* if "UPP" and "FOL" are not the same the "UPP" char needs 8023 * case-folding, it's upper case and the "UPP" is the upper case of 8024 * "FOL" . */ 8025 if (u < 256 && u != f) 8026 { 8027 if (f >= 256) 8028 { 8029 EMSG(_(e_affrange)); 8030 return FAIL; 8031 } 8032 new_st.st_fold[u] = f; 8033 new_st.st_isu[u] = TRUE; 8034 new_st.st_upper[f] = u; 8035 } 8036 } 8037 8038 if (*pl != NUL || *pu != NUL) 8039 { 8040 EMSG(_(e_affform)); 8041 return FAIL; 8042 } 8043 8044 return set_spell_finish(&new_st); 8045 } 8046 8047 /* 8048 * Set the spell character tables from strings in the .spl file. 8049 */ 8050 static void 8051 set_spell_charflags(flags, cnt, fol) 8052 char_u *flags; 8053 int cnt; /* length of "flags" */ 8054 char_u *fol; 8055 { 8056 /* We build the new tables here first, so that we can compare with the 8057 * previous one. */ 8058 spelltab_T new_st; 8059 int i; 8060 char_u *p = fol; 8061 int c; 8062 8063 clear_spell_chartab(&new_st); 8064 8065 for (i = 0; i < 128; ++i) 8066 { 8067 if (i < cnt) 8068 { 8069 new_st.st_isw[i + 128] = (flags[i] & CF_WORD) != 0; 8070 new_st.st_isu[i + 128] = (flags[i] & CF_UPPER) != 0; 8071 } 8072 8073 if (*p != NUL) 8074 { 8075 #ifdef FEAT_MBYTE 8076 c = mb_ptr2char_adv(&p); 8077 #else 8078 c = *p++; 8079 #endif 8080 new_st.st_fold[i + 128] = c; 8081 if (i + 128 != c && new_st.st_isu[i + 128] && c < 256) 8082 new_st.st_upper[c] = i + 128; 8083 } 8084 } 8085 8086 (void)set_spell_finish(&new_st); 8087 } 8088 8089 static int 8090 set_spell_finish(new_st) 8091 spelltab_T *new_st; 8092 { 8093 int i; 8094 8095 if (did_set_spelltab) 8096 { 8097 /* check that it's the same table */ 8098 for (i = 0; i < 256; ++i) 8099 { 8100 if (spelltab.st_isw[i] != new_st->st_isw[i] 8101 || spelltab.st_isu[i] != new_st->st_isu[i] 8102 || spelltab.st_fold[i] != new_st->st_fold[i] 8103 || spelltab.st_upper[i] != new_st->st_upper[i]) 8104 { 8105 EMSG(_("E763: Word characters differ between spell files")); 8106 return FAIL; 8107 } 8108 } 8109 } 8110 else 8111 { 8112 /* copy the new spelltab into the one being used */ 8113 spelltab = *new_st; 8114 did_set_spelltab = TRUE; 8115 } 8116 8117 return OK; 8118 } 8119 8120 /* 8121 * Return TRUE if "p" points to a word character. 8122 * As a special case we see "midword" characters as word character when it is 8123 * followed by a word character. This finds they'there but not 'they there'. 8124 * Thus this only works properly when past the first character of the word. 8125 */ 8126 static int 8127 spell_iswordp(p, buf) 8128 char_u *p; 8129 buf_T *buf; /* buffer used */ 8130 { 8131 #ifdef FEAT_MBYTE 8132 char_u *s; 8133 int l; 8134 int c; 8135 8136 if (has_mbyte) 8137 { 8138 l = MB_BYTE2LEN(*p); 8139 s = p; 8140 if (l == 1) 8141 { 8142 /* be quick for ASCII */ 8143 if (buf->b_spell_ismw[*p]) 8144 { 8145 s = p + 1; /* skip a mid-word character */ 8146 l = MB_BYTE2LEN(*s); 8147 } 8148 } 8149 else 8150 { 8151 c = mb_ptr2char(p); 8152 if (c < 256 ? buf->b_spell_ismw[c] 8153 : (buf->b_spell_ismw_mb != NULL 8154 && vim_strchr(buf->b_spell_ismw_mb, c) != NULL)) 8155 { 8156 s = p + l; 8157 l = MB_BYTE2LEN(*s); 8158 } 8159 } 8160 8161 c = mb_ptr2char(s); 8162 if (c > 255) 8163 return mb_get_class(s) >= 2; 8164 return spelltab.st_isw[c]; 8165 } 8166 #endif 8167 8168 return spelltab.st_isw[buf->b_spell_ismw[*p] ? p[1] : p[0]]; 8169 } 8170 8171 /* 8172 * Return TRUE if "p" points to a word character. 8173 * Unlike spell_iswordp() this doesn't check for "midword" characters. 8174 */ 8175 static int 8176 spell_iswordp_nmw(p) 8177 char_u *p; 8178 { 8179 #ifdef FEAT_MBYTE 8180 int c; 8181 8182 if (has_mbyte) 8183 { 8184 c = mb_ptr2char(p); 8185 if (c > 255) 8186 return mb_get_class(p) >= 2; 8187 return spelltab.st_isw[c]; 8188 } 8189 #endif 8190 return spelltab.st_isw[*p]; 8191 } 8192 8193 #ifdef FEAT_MBYTE 8194 /* 8195 * Return TRUE if "p" points to a word character. 8196 * Wide version of spell_iswordp(). 8197 */ 8198 static int 8199 spell_iswordp_w(p, buf) 8200 int *p; 8201 buf_T *buf; 8202 { 8203 int *s; 8204 8205 if (*p < 256 ? buf->b_spell_ismw[*p] 8206 : (buf->b_spell_ismw_mb != NULL 8207 && vim_strchr(buf->b_spell_ismw_mb, *p) != NULL)) 8208 s = p + 1; 8209 else 8210 s = p; 8211 8212 if (*s > 255) 8213 { 8214 if (enc_utf8) 8215 return utf_class(*s) >= 2; 8216 if (enc_dbcs) 8217 return dbcs_class((unsigned)*s >> 8, *s & 0xff) >= 2; 8218 return 0; 8219 } 8220 return spelltab.st_isw[*s]; 8221 } 8222 #endif 8223 8224 /* 8225 * Write the table with prefix conditions to the .spl file. 8226 * When "fd" is NULL only count the length of what is written. 8227 */ 8228 static int 8229 write_spell_prefcond(fd, gap) 8230 FILE *fd; 8231 garray_T *gap; 8232 { 8233 int i; 8234 char_u *p; 8235 int len; 8236 int totlen; 8237 8238 if (fd != NULL) 8239 put_bytes(fd, (long_u)gap->ga_len, 2); /* <prefcondcnt> */ 8240 8241 totlen = 2 + gap->ga_len; /* length of <prefcondcnt> and <condlen> bytes */ 8242 8243 for (i = 0; i < gap->ga_len; ++i) 8244 { 8245 /* <prefcond> : <condlen> <condstr> */ 8246 p = ((char_u **)gap->ga_data)[i]; 8247 if (p != NULL) 8248 { 8249 len = STRLEN(p); 8250 if (fd != NULL) 8251 { 8252 fputc(len, fd); 8253 fwrite(p, (size_t)len, (size_t)1, fd); 8254 } 8255 totlen += len; 8256 } 8257 else if (fd != NULL) 8258 fputc(0, fd); 8259 } 8260 8261 return totlen; 8262 } 8263 8264 /* 8265 * Case-fold "str[len]" into "buf[buflen]". The result is NUL terminated. 8266 * Uses the character definitions from the .spl file. 8267 * When using a multi-byte 'encoding' the length may change! 8268 * Returns FAIL when something wrong. 8269 */ 8270 static int 8271 spell_casefold(str, len, buf, buflen) 8272 char_u *str; 8273 int len; 8274 char_u *buf; 8275 int buflen; 8276 { 8277 int i; 8278 8279 if (len >= buflen) 8280 { 8281 buf[0] = NUL; 8282 return FAIL; /* result will not fit */ 8283 } 8284 8285 #ifdef FEAT_MBYTE 8286 if (has_mbyte) 8287 { 8288 int outi = 0; 8289 char_u *p; 8290 int c; 8291 8292 /* Fold one character at a time. */ 8293 for (p = str; p < str + len; ) 8294 { 8295 if (outi + MB_MAXBYTES > buflen) 8296 { 8297 buf[outi] = NUL; 8298 return FAIL; 8299 } 8300 c = mb_cptr2char_adv(&p); 8301 outi += mb_char2bytes(SPELL_TOFOLD(c), buf + outi); 8302 } 8303 buf[outi] = NUL; 8304 } 8305 else 8306 #endif 8307 { 8308 /* Be quick for non-multibyte encodings. */ 8309 for (i = 0; i < len; ++i) 8310 buf[i] = spelltab.st_fold[str[i]]; 8311 buf[i] = NUL; 8312 } 8313 8314 return OK; 8315 } 8316 8317 #define SPS_BEST 1 8318 #define SPS_FAST 2 8319 #define SPS_DOUBLE 4 8320 8321 static int sps_flags = SPS_BEST; 8322 static int sps_limit = 9999; 8323 8324 /* 8325 * Check the 'spellsuggest' option. Return FAIL if it's wrong. 8326 * Sets "sps_flags" and "sps_limit". 8327 */ 8328 int 8329 spell_check_sps() 8330 { 8331 char_u *p; 8332 char_u *s; 8333 char_u buf[MAXPATHL]; 8334 int f; 8335 8336 sps_flags = 0; 8337 sps_limit = 9999; 8338 8339 for (p = p_sps; *p != NUL; ) 8340 { 8341 copy_option_part(&p, buf, MAXPATHL, ","); 8342 8343 f = 0; 8344 if (VIM_ISDIGIT(*buf)) 8345 { 8346 s = buf; 8347 sps_limit = getdigits(&s); 8348 if (*s != NUL && !VIM_ISDIGIT(*s)) 8349 f = -1; 8350 } 8351 else if (STRCMP(buf, "best") == 0) 8352 f = SPS_BEST; 8353 else if (STRCMP(buf, "fast") == 0) 8354 f = SPS_FAST; 8355 else if (STRCMP(buf, "double") == 0) 8356 f = SPS_DOUBLE; 8357 else if (STRNCMP(buf, "expr:", 5) != 0 8358 && STRNCMP(buf, "file:", 5) != 0) 8359 f = -1; 8360 8361 if (f == -1 || (sps_flags != 0 && f != 0)) 8362 { 8363 sps_flags = SPS_BEST; 8364 sps_limit = 9999; 8365 return FAIL; 8366 } 8367 if (f != 0) 8368 sps_flags = f; 8369 } 8370 8371 if (sps_flags == 0) 8372 sps_flags = SPS_BEST; 8373 8374 return OK; 8375 } 8376 8377 /* Remember what "z?" replaced. */ 8378 static char_u *repl_from = NULL; 8379 static char_u *repl_to = NULL; 8380 8381 /* 8382 * "z?": Find badly spelled word under or after the cursor. 8383 * Give suggestions for the properly spelled word. 8384 * When "count" is non-zero use that suggestion. 8385 */ 8386 void 8387 spell_suggest(count) 8388 int count; 8389 { 8390 char_u *line; 8391 pos_T prev_cursor = curwin->w_cursor; 8392 char_u wcopy[MAXWLEN + 2]; 8393 char_u *p; 8394 int i; 8395 int c; 8396 suginfo_T sug; 8397 suggest_T *stp; 8398 int mouse_used; 8399 int need_cap; 8400 int limit; 8401 int selected = count; 8402 8403 /* Find the start of the badly spelled word. */ 8404 if (spell_move_to(curwin, FORWARD, TRUE, TRUE, NULL) == 0 8405 || curwin->w_cursor.col > prev_cursor.col) 8406 { 8407 if (!curwin->w_p_spell || *curbuf->b_p_spl == NUL) 8408 return; 8409 8410 /* No bad word or it starts after the cursor: use the word under the 8411 * cursor. */ 8412 curwin->w_cursor = prev_cursor; 8413 line = ml_get_curline(); 8414 p = line + curwin->w_cursor.col; 8415 /* Backup to before start of word. */ 8416 while (p > line && spell_iswordp_nmw(p)) 8417 mb_ptr_back(line, p); 8418 /* Forward to start of word. */ 8419 while (*p != NUL && !spell_iswordp_nmw(p)) 8420 mb_ptr_adv(p); 8421 8422 if (!spell_iswordp_nmw(p)) /* No word found. */ 8423 { 8424 beep_flush(); 8425 return; 8426 } 8427 curwin->w_cursor.col = p - line; 8428 } 8429 8430 /* Get the word and its length. */ 8431 8432 /* Figure out if the word should be capitalised. */ 8433 need_cap = check_need_cap(curwin->w_cursor.lnum, curwin->w_cursor.col); 8434 8435 line = ml_get_curline(); 8436 8437 /* Get the list of suggestions. Limit to 'lines' - 2 or the number in 8438 * 'spellsuggest', whatever is smaller. */ 8439 if (sps_limit > (int)Rows - 2) 8440 limit = (int)Rows - 2; 8441 else 8442 limit = sps_limit; 8443 spell_find_suggest(line + curwin->w_cursor.col, &sug, limit, 8444 TRUE, need_cap); 8445 8446 if (sug.su_ga.ga_len == 0) 8447 MSG(_("Sorry, no suggestions")); 8448 else if (count > 0) 8449 { 8450 if (count > sug.su_ga.ga_len) 8451 smsg((char_u *)_("Sorry, only %ld suggestions"), 8452 (long)sug.su_ga.ga_len); 8453 } 8454 else 8455 { 8456 vim_free(repl_from); 8457 repl_from = NULL; 8458 vim_free(repl_to); 8459 repl_to = NULL; 8460 8461 #ifdef FEAT_RIGHTLEFT 8462 /* When 'rightleft' is set the list is drawn right-left. */ 8463 cmdmsg_rl = curwin->w_p_rl; 8464 if (cmdmsg_rl) 8465 msg_col = Columns - 1; 8466 #endif 8467 8468 /* List the suggestions. */ 8469 msg_start(); 8470 lines_left = Rows; /* avoid more prompt */ 8471 vim_snprintf((char *)IObuff, IOSIZE, _("Change \"%.*s\" to:"), 8472 sug.su_badlen, sug.su_badptr); 8473 #ifdef FEAT_RIGHTLEFT 8474 if (cmdmsg_rl && STRNCMP(IObuff, "Change", 6) == 0) 8475 { 8476 /* And now the rabbit from the high hat: Avoid showing the 8477 * untranslated message rightleft. */ 8478 vim_snprintf((char *)IObuff, IOSIZE, ":ot \"%.*s\" egnahC", 8479 sug.su_badlen, sug.su_badptr); 8480 } 8481 #endif 8482 msg_puts(IObuff); 8483 msg_clr_eos(); 8484 msg_putchar('\n'); 8485 8486 msg_scroll = TRUE; 8487 for (i = 0; i < sug.su_ga.ga_len; ++i) 8488 { 8489 stp = &SUG(sug.su_ga, i); 8490 8491 /* The suggested word may replace only part of the bad word, add 8492 * the not replaced part. */ 8493 STRCPY(wcopy, stp->st_word); 8494 if (sug.su_badlen > stp->st_orglen) 8495 vim_strncpy(wcopy + STRLEN(wcopy), 8496 sug.su_badptr + stp->st_orglen, 8497 sug.su_badlen - stp->st_orglen); 8498 vim_snprintf((char *)IObuff, IOSIZE, "%2d", i + 1); 8499 #ifdef FEAT_RIGHTLEFT 8500 if (cmdmsg_rl) 8501 rl_mirror(IObuff); 8502 #endif 8503 msg_puts(IObuff); 8504 8505 vim_snprintf((char *)IObuff, IOSIZE, " \"%s\"", wcopy); 8506 msg_puts(IObuff); 8507 8508 /* The word may replace more than "su_badlen". */ 8509 if (sug.su_badlen < stp->st_orglen) 8510 { 8511 vim_snprintf((char *)IObuff, IOSIZE, _(" < \"%.*s\""), 8512 stp->st_orglen, sug.su_badptr); 8513 msg_puts(IObuff); 8514 } 8515 8516 if (p_verbose > 0) 8517 { 8518 /* Add the score. */ 8519 if (sps_flags & (SPS_DOUBLE | SPS_BEST)) 8520 vim_snprintf((char *)IObuff, IOSIZE, " (%s%d - %d)", 8521 stp->st_salscore ? "s " : "", 8522 stp->st_score, stp->st_altscore); 8523 else 8524 vim_snprintf((char *)IObuff, IOSIZE, " (%d)", 8525 stp->st_score); 8526 #ifdef FEAT_RIGHTLEFT 8527 if (cmdmsg_rl) 8528 /* Mirror the numbers, but keep the leading space. */ 8529 rl_mirror(IObuff + 1); 8530 #endif 8531 msg_advance(30); 8532 msg_puts(IObuff); 8533 } 8534 msg_putchar('\n'); 8535 } 8536 8537 #ifdef FEAT_RIGHTLEFT 8538 cmdmsg_rl = FALSE; 8539 msg_col = 0; 8540 #endif 8541 /* Ask for choice. */ 8542 selected = prompt_for_number(&mouse_used); 8543 if (mouse_used) 8544 selected -= lines_left; 8545 } 8546 8547 if (selected > 0 && selected <= sug.su_ga.ga_len && u_save_cursor() == OK) 8548 { 8549 /* Save the from and to text for :spellrepall. */ 8550 stp = &SUG(sug.su_ga, selected - 1); 8551 repl_from = vim_strnsave(sug.su_badptr, stp->st_orglen); 8552 repl_to = vim_strsave(stp->st_word); 8553 8554 /* Replace the word. */ 8555 p = alloc(STRLEN(line) - stp->st_orglen + STRLEN(stp->st_word) + 1); 8556 if (p != NULL) 8557 { 8558 c = sug.su_badptr - line; 8559 mch_memmove(p, line, c); 8560 STRCPY(p + c, stp->st_word); 8561 STRCAT(p, sug.su_badptr + stp->st_orglen); 8562 ml_replace(curwin->w_cursor.lnum, p, FALSE); 8563 curwin->w_cursor.col = c; 8564 changed_bytes(curwin->w_cursor.lnum, c); 8565 8566 /* For redo we use a change-word command. */ 8567 ResetRedobuff(); 8568 AppendToRedobuff((char_u *)"ciw"); 8569 AppendToRedobuff(stp->st_word); 8570 AppendCharToRedobuff(ESC); 8571 } 8572 } 8573 else 8574 curwin->w_cursor = prev_cursor; 8575 8576 spell_find_cleanup(&sug); 8577 } 8578 8579 /* 8580 * Check if the word at line "lnum" column "col" is required to start with a 8581 * capital. This uses 'spellcapcheck' of the current buffer. 8582 */ 8583 static int 8584 check_need_cap(lnum, col) 8585 linenr_T lnum; 8586 colnr_T col; 8587 { 8588 int need_cap = FALSE; 8589 char_u *line; 8590 char_u *line_copy = NULL; 8591 char_u *p; 8592 colnr_T endcol; 8593 regmatch_T regmatch; 8594 8595 if (curbuf->b_cap_prog == NULL) 8596 return FALSE; 8597 8598 line = ml_get_curline(); 8599 endcol = 0; 8600 if ((int)(skipwhite(line) - line) >= (int)col) 8601 { 8602 /* At start of line, check if previous line is empty or sentence 8603 * ends there. */ 8604 if (lnum == 1) 8605 need_cap = TRUE; 8606 else 8607 { 8608 line = ml_get(lnum - 1); 8609 if (*skipwhite(line) == NUL) 8610 need_cap = TRUE; 8611 else 8612 { 8613 /* Append a space in place of the line break. */ 8614 line_copy = concat_str(line, (char_u *)" "); 8615 line = line_copy; 8616 endcol = STRLEN(line); 8617 } 8618 } 8619 } 8620 else 8621 endcol = col; 8622 8623 if (endcol > 0) 8624 { 8625 /* Check if sentence ends before the bad word. */ 8626 regmatch.regprog = curbuf->b_cap_prog; 8627 regmatch.rm_ic = FALSE; 8628 p = line + endcol; 8629 for (;;) 8630 { 8631 mb_ptr_back(line, p); 8632 if (p == line || spell_iswordp_nmw(p)) 8633 break; 8634 if (vim_regexec(®match, p, 0) 8635 && regmatch.endp[0] == line + endcol) 8636 { 8637 need_cap = TRUE; 8638 break; 8639 } 8640 } 8641 } 8642 8643 vim_free(line_copy); 8644 8645 return need_cap; 8646 } 8647 8648 8649 /* 8650 * ":spellrepall" 8651 */ 8652 /*ARGSUSED*/ 8653 void 8654 ex_spellrepall(eap) 8655 exarg_T *eap; 8656 { 8657 pos_T pos = curwin->w_cursor; 8658 char_u *frompat; 8659 int addlen; 8660 char_u *line; 8661 char_u *p; 8662 int save_ws = p_ws; 8663 linenr_T prev_lnum = 0; 8664 8665 if (repl_from == NULL || repl_to == NULL) 8666 { 8667 EMSG(_("E752: No previous spell replacement")); 8668 return; 8669 } 8670 addlen = STRLEN(repl_to) - STRLEN(repl_from); 8671 8672 frompat = alloc(STRLEN(repl_from) + 7); 8673 if (frompat == NULL) 8674 return; 8675 sprintf((char *)frompat, "\\V\\<%s\\>", repl_from); 8676 p_ws = FALSE; 8677 8678 sub_nsubs = 0; 8679 sub_nlines = 0; 8680 curwin->w_cursor.lnum = 0; 8681 while (!got_int) 8682 { 8683 if (do_search(NULL, '/', frompat, 1L, SEARCH_KEEP) == 0 8684 || u_save_cursor() == FAIL) 8685 break; 8686 8687 /* Only replace when the right word isn't there yet. This happens 8688 * when changing "etc" to "etc.". */ 8689 line = ml_get_curline(); 8690 if (addlen <= 0 || STRNCMP(line + curwin->w_cursor.col, 8691 repl_to, STRLEN(repl_to)) != 0) 8692 { 8693 p = alloc(STRLEN(line) + addlen + 1); 8694 if (p == NULL) 8695 break; 8696 mch_memmove(p, line, curwin->w_cursor.col); 8697 STRCPY(p + curwin->w_cursor.col, repl_to); 8698 STRCAT(p, line + curwin->w_cursor.col + STRLEN(repl_from)); 8699 ml_replace(curwin->w_cursor.lnum, p, FALSE); 8700 changed_bytes(curwin->w_cursor.lnum, curwin->w_cursor.col); 8701 8702 if (curwin->w_cursor.lnum != prev_lnum) 8703 { 8704 ++sub_nlines; 8705 prev_lnum = curwin->w_cursor.lnum; 8706 } 8707 ++sub_nsubs; 8708 } 8709 curwin->w_cursor.col += STRLEN(repl_to); 8710 } 8711 8712 p_ws = save_ws; 8713 curwin->w_cursor = pos; 8714 vim_free(frompat); 8715 8716 if (sub_nsubs == 0) 8717 EMSG2(_("E753: Not found: %s"), repl_from); 8718 else 8719 do_sub_msg(FALSE); 8720 } 8721 8722 /* 8723 * Find spell suggestions for "word". Return them in the growarray "*gap" as 8724 * a list of allocated strings. 8725 */ 8726 void 8727 spell_suggest_list(gap, word, maxcount, need_cap) 8728 garray_T *gap; 8729 char_u *word; 8730 int maxcount; /* maximum nr of suggestions */ 8731 int need_cap; /* 'spellcapcheck' matched */ 8732 { 8733 suginfo_T sug; 8734 int i; 8735 suggest_T *stp; 8736 char_u *wcopy; 8737 8738 spell_find_suggest(word, &sug, maxcount, FALSE, need_cap); 8739 8740 /* Make room in "gap". */ 8741 ga_init2(gap, sizeof(char_u *), sug.su_ga.ga_len + 1); 8742 if (ga_grow(gap, sug.su_ga.ga_len) == FAIL) 8743 return; 8744 8745 for (i = 0; i < sug.su_ga.ga_len; ++i) 8746 { 8747 stp = &SUG(sug.su_ga, i); 8748 8749 /* The suggested word may replace only part of "word", add the not 8750 * replaced part. */ 8751 wcopy = alloc(STRLEN(stp->st_word) 8752 + STRLEN(sug.su_badptr + stp->st_orglen) + 1); 8753 if (wcopy == NULL) 8754 break; 8755 STRCPY(wcopy, stp->st_word); 8756 STRCAT(wcopy, sug.su_badptr + stp->st_orglen); 8757 ((char_u **)gap->ga_data)[gap->ga_len++] = wcopy; 8758 } 8759 8760 spell_find_cleanup(&sug); 8761 } 8762 8763 /* 8764 * Find spell suggestions for the word at the start of "badptr". 8765 * Return the suggestions in "su->su_ga". 8766 * The maximum number of suggestions is "maxcount". 8767 * Note: does use info for the current window. 8768 * This is based on the mechanisms of Aspell, but completely reimplemented. 8769 */ 8770 static void 8771 spell_find_suggest(badptr, su, maxcount, banbadword, need_cap) 8772 char_u *badptr; 8773 suginfo_T *su; 8774 int maxcount; 8775 int banbadword; /* don't include badword in suggestions */ 8776 int need_cap; /* word should start with capital */ 8777 { 8778 hlf_T attr = HLF_COUNT; 8779 char_u buf[MAXPATHL]; 8780 char_u *p; 8781 int do_combine = FALSE; 8782 char_u *sps_copy; 8783 #ifdef FEAT_EVAL 8784 static int expr_busy = FALSE; 8785 #endif 8786 int c; 8787 int i; 8788 langp_T *lp; 8789 8790 /* 8791 * Set the info in "*su". 8792 */ 8793 vim_memset(su, 0, sizeof(suginfo_T)); 8794 ga_init2(&su->su_ga, (int)sizeof(suggest_T), 10); 8795 ga_init2(&su->su_sga, (int)sizeof(suggest_T), 10); 8796 if (*badptr == NUL) 8797 return; 8798 hash_init(&su->su_banned); 8799 8800 su->su_badptr = badptr; 8801 su->su_badlen = spell_check(curwin, su->su_badptr, &attr, NULL); 8802 su->su_maxcount = maxcount; 8803 su->su_maxscore = SCORE_MAXINIT; 8804 8805 if (su->su_badlen >= MAXWLEN) 8806 su->su_badlen = MAXWLEN - 1; /* just in case */ 8807 vim_strncpy(su->su_badword, su->su_badptr, su->su_badlen); 8808 (void)spell_casefold(su->su_badptr, su->su_badlen, 8809 su->su_fbadword, MAXWLEN); 8810 /* get caps flags for bad word */ 8811 su->su_badflags = badword_captype(su->su_badptr, 8812 su->su_badptr + su->su_badlen); 8813 if (need_cap) 8814 su->su_badflags |= WF_ONECAP; 8815 8816 /* Find the default language for sound folding. We simply use the first 8817 * one in 'spelllang' that supports sound folding. That's good for when 8818 * using multiple files for one language, it's not that bad when mixing 8819 * languages (e.g., "pl,en"). */ 8820 for (i = 0; i < curbuf->b_langp.ga_len; ++i) 8821 { 8822 lp = LANGP_ENTRY(curbuf->b_langp, i); 8823 if (lp->lp_sallang != NULL) 8824 { 8825 su->su_sallang = lp->lp_sallang; 8826 break; 8827 } 8828 } 8829 8830 /* Soundfold the bad word with the default sound folding, so that we don't 8831 * have to do this many times. */ 8832 if (su->su_sallang != NULL) 8833 spell_soundfold(su->su_sallang, su->su_fbadword, TRUE, 8834 su->su_sal_badword); 8835 8836 /* If the word is not capitalised and spell_check() doesn't consider the 8837 * word to be bad then it might need to be capitalised. Add a suggestion 8838 * for that. */ 8839 c = PTR2CHAR(su->su_badptr); 8840 if (!SPELL_ISUPPER(c) && attr == HLF_COUNT) 8841 { 8842 make_case_word(su->su_badword, buf, WF_ONECAP); 8843 add_suggestion(su, &su->su_ga, buf, su->su_badlen, SCORE_ICASE, 8844 0, TRUE, su->su_sallang); 8845 } 8846 8847 /* Ban the bad word itself. It may appear in another region. */ 8848 if (banbadword) 8849 add_banned(su, su->su_badword); 8850 8851 /* Make a copy of 'spellsuggest', because the expression may change it. */ 8852 sps_copy = vim_strsave(p_sps); 8853 if (sps_copy == NULL) 8854 return; 8855 8856 /* Loop over the items in 'spellsuggest'. */ 8857 for (p = sps_copy; *p != NUL; ) 8858 { 8859 copy_option_part(&p, buf, MAXPATHL, ","); 8860 8861 if (STRNCMP(buf, "expr:", 5) == 0) 8862 { 8863 #ifdef FEAT_EVAL 8864 /* Evaluate an expression. Skip this when called recursively, 8865 * when using spellsuggest() in the expression. */ 8866 if (!expr_busy) 8867 { 8868 expr_busy = TRUE; 8869 spell_suggest_expr(su, buf + 5); 8870 expr_busy = FALSE; 8871 } 8872 #endif 8873 } 8874 else if (STRNCMP(buf, "file:", 5) == 0) 8875 /* Use list of suggestions in a file. */ 8876 spell_suggest_file(su, buf + 5); 8877 else 8878 { 8879 /* Use internal method. */ 8880 spell_suggest_intern(su); 8881 if (sps_flags & SPS_DOUBLE) 8882 do_combine = TRUE; 8883 } 8884 } 8885 8886 vim_free(sps_copy); 8887 8888 if (do_combine) 8889 /* Combine the two list of suggestions. This must be done last, 8890 * because sorting changes the order again. */ 8891 score_combine(su); 8892 } 8893 8894 #ifdef FEAT_EVAL 8895 /* 8896 * Find suggestions by evaluating expression "expr". 8897 */ 8898 static void 8899 spell_suggest_expr(su, expr) 8900 suginfo_T *su; 8901 char_u *expr; 8902 { 8903 list_T *list; 8904 listitem_T *li; 8905 int score; 8906 char_u *p; 8907 8908 /* The work is split up in a few parts to avoid having to export 8909 * suginfo_T. 8910 * First evaluate the expression and get the resulting list. */ 8911 list = eval_spell_expr(su->su_badword, expr); 8912 if (list != NULL) 8913 { 8914 /* Loop over the items in the list. */ 8915 for (li = list->lv_first; li != NULL; li = li->li_next) 8916 if (li->li_tv.v_type == VAR_LIST) 8917 { 8918 /* Get the word and the score from the items. */ 8919 score = get_spellword(li->li_tv.vval.v_list, &p); 8920 if (score >= 0) 8921 add_suggestion(su, &su->su_ga, p, 8922 su->su_badlen, score, 0, TRUE, su->su_sallang); 8923 } 8924 list_unref(list); 8925 } 8926 8927 /* Sort the suggestions and truncate at "maxcount". */ 8928 (void)cleanup_suggestions(&su->su_ga, su->su_maxscore, su->su_maxcount); 8929 } 8930 #endif 8931 8932 /* 8933 * Find suggestions in file "fname". Used for "file:" in 'spellsuggest'. 8934 */ 8935 static void 8936 spell_suggest_file(su, fname) 8937 suginfo_T *su; 8938 char_u *fname; 8939 { 8940 FILE *fd; 8941 char_u line[MAXWLEN * 2]; 8942 char_u *p; 8943 int len; 8944 char_u cword[MAXWLEN]; 8945 8946 /* Open the file. */ 8947 fd = mch_fopen((char *)fname, "r"); 8948 if (fd == NULL) 8949 { 8950 EMSG2(_(e_notopen), fname); 8951 return; 8952 } 8953 8954 /* Read it line by line. */ 8955 while (!vim_fgets(line, MAXWLEN * 2, fd) && !got_int) 8956 { 8957 line_breakcheck(); 8958 8959 p = vim_strchr(line, '/'); 8960 if (p == NULL) 8961 continue; /* No Tab found, just skip the line. */ 8962 *p++ = NUL; 8963 if (STRICMP(su->su_badword, line) == 0) 8964 { 8965 /* Match! Isolate the good word, until CR or NL. */ 8966 for (len = 0; p[len] >= ' '; ++len) 8967 ; 8968 p[len] = NUL; 8969 8970 /* If the suggestion doesn't have specific case duplicate the case 8971 * of the bad word. */ 8972 if (captype(p, NULL) == 0) 8973 { 8974 make_case_word(p, cword, su->su_badflags); 8975 p = cword; 8976 } 8977 8978 add_suggestion(su, &su->su_ga, p, su->su_badlen, 8979 SCORE_FILE, 0, TRUE, su->su_sallang); 8980 } 8981 } 8982 8983 fclose(fd); 8984 8985 /* Sort the suggestions and truncate at "maxcount". */ 8986 (void)cleanup_suggestions(&su->su_ga, su->su_maxscore, su->su_maxcount); 8987 } 8988 8989 /* 8990 * Find suggestions for the internal method indicated by "sps_flags". 8991 */ 8992 static void 8993 spell_suggest_intern(su) 8994 suginfo_T *su; 8995 { 8996 /* 8997 * 1. Try special cases, such as repeating a word: "the the" -> "the". 8998 * 8999 * Set a maximum score to limit the combination of operations that is 9000 * tried. 9001 */ 9002 suggest_try_special(su); 9003 9004 /* 9005 * 2. Try inserting/deleting/swapping/changing a letter, use REP entries 9006 * from the .aff file and inserting a space (split the word). 9007 */ 9008 suggest_try_change(su); 9009 9010 /* For the resulting top-scorers compute the sound-a-like score. */ 9011 if (sps_flags & SPS_DOUBLE) 9012 score_comp_sal(su); 9013 9014 /* 9015 * 3. Try finding sound-a-like words. 9016 * 9017 * Only do this when we don't have a lot of suggestions yet, because it's 9018 * very slow and often doesn't find new suggestions. 9019 */ 9020 if ((sps_flags & SPS_DOUBLE) 9021 || (!(sps_flags & SPS_FAST) 9022 && su->su_ga.ga_len < SUG_CLEAN_COUNT(su))) 9023 { 9024 /* Allow a higher score now. */ 9025 su->su_maxscore = SCORE_MAXMAX; 9026 suggest_try_soundalike(su); 9027 } 9028 9029 /* When CTRL-C was hit while searching do show the results. */ 9030 ui_breakcheck(); 9031 if (got_int) 9032 { 9033 (void)vgetc(); 9034 got_int = FALSE; 9035 } 9036 9037 if ((sps_flags & SPS_DOUBLE) == 0 && su->su_ga.ga_len != 0) 9038 { 9039 if (sps_flags & SPS_BEST) 9040 /* Adjust the word score for how it sounds like. */ 9041 rescore_suggestions(su); 9042 9043 /* Sort the suggestions and truncate at "maxcount". */ 9044 (void)cleanup_suggestions(&su->su_ga, su->su_maxscore, su->su_maxcount); 9045 } 9046 } 9047 9048 /* 9049 * Free the info put in "*su" by spell_find_suggest(). 9050 */ 9051 static void 9052 spell_find_cleanup(su) 9053 suginfo_T *su; 9054 { 9055 int i; 9056 9057 /* Free the suggestions. */ 9058 for (i = 0; i < su->su_ga.ga_len; ++i) 9059 vim_free(SUG(su->su_ga, i).st_word); 9060 ga_clear(&su->su_ga); 9061 for (i = 0; i < su->su_sga.ga_len; ++i) 9062 vim_free(SUG(su->su_sga, i).st_word); 9063 ga_clear(&su->su_sga); 9064 9065 /* Free the banned words. */ 9066 free_banned(su); 9067 } 9068 9069 /* 9070 * Make a copy of "word", with the first letter upper or lower cased, to 9071 * "wcopy[MAXWLEN]". "word" must not be empty. 9072 * The result is NUL terminated. 9073 */ 9074 static void 9075 onecap_copy(word, wcopy, upper) 9076 char_u *word; 9077 char_u *wcopy; 9078 int upper; /* TRUE: first letter made upper case */ 9079 { 9080 char_u *p; 9081 int c; 9082 int l; 9083 9084 p = word; 9085 #ifdef FEAT_MBYTE 9086 if (has_mbyte) 9087 c = mb_cptr2char_adv(&p); 9088 else 9089 #endif 9090 c = *p++; 9091 if (upper) 9092 c = SPELL_TOUPPER(c); 9093 else 9094 c = SPELL_TOFOLD(c); 9095 #ifdef FEAT_MBYTE 9096 if (has_mbyte) 9097 l = mb_char2bytes(c, wcopy); 9098 else 9099 #endif 9100 { 9101 l = 1; 9102 wcopy[0] = c; 9103 } 9104 vim_strncpy(wcopy + l, p, MAXWLEN - l - 1); 9105 } 9106 9107 /* 9108 * Make a copy of "word" with all the letters upper cased into 9109 * "wcopy[MAXWLEN]". The result is NUL terminated. 9110 */ 9111 static void 9112 allcap_copy(word, wcopy) 9113 char_u *word; 9114 char_u *wcopy; 9115 { 9116 char_u *s; 9117 char_u *d; 9118 int c; 9119 9120 d = wcopy; 9121 for (s = word; *s != NUL; ) 9122 { 9123 #ifdef FEAT_MBYTE 9124 if (has_mbyte) 9125 c = mb_cptr2char_adv(&s); 9126 else 9127 #endif 9128 c = *s++; 9129 9130 #ifdef FEAT_MBYTE 9131 /* We only change � to SS when we are certain latin1 is used. It 9132 * would cause weird errors in other 8-bit encodings. */ 9133 if (enc_latin1like && c == 0xdf) 9134 { 9135 c = 'S'; 9136 if (d - wcopy >= MAXWLEN - 1) 9137 break; 9138 *d++ = c; 9139 } 9140 else 9141 #endif 9142 c = SPELL_TOUPPER(c); 9143 9144 #ifdef FEAT_MBYTE 9145 if (has_mbyte) 9146 { 9147 if (d - wcopy >= MAXWLEN - MB_MAXBYTES) 9148 break; 9149 d += mb_char2bytes(c, d); 9150 } 9151 else 9152 #endif 9153 { 9154 if (d - wcopy >= MAXWLEN - 1) 9155 break; 9156 *d++ = c; 9157 } 9158 } 9159 *d = NUL; 9160 } 9161 9162 /* 9163 * Try finding suggestions by recognizing specific situations. 9164 */ 9165 static void 9166 suggest_try_special(su) 9167 suginfo_T *su; 9168 { 9169 char_u *p; 9170 size_t len; 9171 int c; 9172 char_u word[MAXWLEN]; 9173 9174 /* 9175 * Recognize a word that is repeated: "the the". 9176 */ 9177 p = skiptowhite(su->su_fbadword); 9178 len = p - su->su_fbadword; 9179 p = skipwhite(p); 9180 if (STRLEN(p) == len && STRNCMP(su->su_fbadword, p, len) == 0) 9181 { 9182 /* Include badflags: if the badword is onecap or allcap 9183 * use that for the goodword too: "The the" -> "The". */ 9184 c = su->su_fbadword[len]; 9185 su->su_fbadword[len] = NUL; 9186 make_case_word(su->su_fbadword, word, su->su_badflags); 9187 su->su_fbadword[len] = c; 9188 9189 /* Give a soundalike score of 0, compute the score as if deleting one 9190 * character. */ 9191 add_suggestion(su, &su->su_ga, word, su->su_badlen, 9192 RESCORE(SCORE_REP, 0), 0, TRUE, su->su_sallang); 9193 } 9194 } 9195 9196 /* 9197 * Try finding suggestions by adding/removing/swapping letters. 9198 * 9199 * This uses a state machine. At each node in the tree we try various 9200 * operations. When trying if an operation work "depth" is increased and the 9201 * stack[] is used to store info. This allows combinations, thus insert one 9202 * character, replace one and delete another. The number of changes is 9203 * limited by su->su_maxscore, checked in try_deeper(). 9204 * 9205 * After implementing this I noticed an article by Kemal Oflazer that 9206 * describes something similar: "Error-tolerant Finite State Recognition with 9207 * Applications to Morphological Analysis and Spelling Correction" (1996). 9208 * The implementation in the article is simplified and requires a stack of 9209 * unknown depth. The implementation here only needs a stack depth of the 9210 * length of the word. 9211 */ 9212 static void 9213 suggest_try_change(su) 9214 suginfo_T *su; 9215 { 9216 char_u fword[MAXWLEN]; /* copy of the bad word, case-folded */ 9217 char_u tword[MAXWLEN]; /* good word collected so far */ 9218 trystate_T stack[MAXWLEN]; 9219 char_u preword[MAXWLEN * 3]; /* word found with proper case; 9220 * concatanation of prefix compound 9221 * words and split word. NUL terminated 9222 * when going deeper but not when coming 9223 * back. */ 9224 char_u compflags[MAXWLEN]; /* compound flags, one for each word */ 9225 trystate_T *sp; 9226 int newscore; 9227 langp_T *lp; 9228 char_u *byts, *fbyts, *pbyts; 9229 idx_T *idxs, *fidxs, *pidxs; 9230 int depth; 9231 int c, c2, c3; 9232 int n; 9233 int flags; 9234 garray_T *gap; 9235 idx_T arridx; 9236 int len; 9237 char_u *p; 9238 fromto_T *ftp; 9239 int fl = 0, tl; 9240 int repextra = 0; /* extra bytes in fword[] from REP item */ 9241 slang_T *slang; 9242 int fword_ends; 9243 int lpi; 9244 int maysplit; 9245 int goodword_ends; 9246 9247 /* We make a copy of the case-folded bad word, so that we can modify it 9248 * to find matches (esp. REP items). Append some more text, changing 9249 * chars after the bad word may help. */ 9250 STRCPY(fword, su->su_fbadword); 9251 n = STRLEN(fword); 9252 p = su->su_badptr + su->su_badlen; 9253 (void)spell_casefold(p, STRLEN(p), fword + n, MAXWLEN - n); 9254 9255 for (lpi = 0; lpi < curbuf->b_langp.ga_len; ++lpi) 9256 { 9257 lp = LANGP_ENTRY(curbuf->b_langp, lpi); 9258 slang = lp->lp_slang; 9259 9260 /* If reloading a spell file fails it's still in the list but 9261 * everything has been cleared. */ 9262 if (slang->sl_fbyts == NULL) 9263 continue; 9264 9265 /* 9266 * Go through the whole case-fold tree, try changes at each node. 9267 * "tword[]" contains the word collected from nodes in the tree. 9268 * "fword[]" the word we are trying to match with (initially the bad 9269 * word). 9270 */ 9271 depth = 0; 9272 sp = &stack[0]; 9273 vim_memset(sp, 0, sizeof(trystate_T)); 9274 sp->ts_curi = 1; 9275 9276 /* 9277 * When there are postponed prefixes we need to use these first. At 9278 * the end of the prefix we continue in the case-fold tree. 9279 */ 9280 fbyts = slang->sl_fbyts; 9281 fidxs = slang->sl_fidxs; 9282 pbyts = slang->sl_pbyts; 9283 pidxs = slang->sl_pidxs; 9284 if (pbyts != NULL) 9285 { 9286 byts = pbyts; 9287 idxs = pidxs; 9288 sp->ts_prefixdepth = PFD_PREFIXTREE; 9289 sp->ts_state = STATE_NOPREFIX; /* try without prefix first */ 9290 } 9291 else 9292 { 9293 byts = fbyts; 9294 idxs = fidxs; 9295 sp->ts_prefixdepth = PFD_NOPREFIX; 9296 sp->ts_state = STATE_START; 9297 } 9298 9299 /* 9300 * Loop to find all suggestions. At each round we either: 9301 * - For the current state try one operation, advance "ts_curi", 9302 * increase "depth". 9303 * - When a state is done go to the next, set "ts_state". 9304 * - When all states are tried decrease "depth". 9305 */ 9306 while (depth >= 0 && !got_int) 9307 { 9308 sp = &stack[depth]; 9309 switch (sp->ts_state) 9310 { 9311 case STATE_START: 9312 case STATE_NOPREFIX: 9313 /* 9314 * Start of node: Deal with NUL bytes, which means 9315 * tword[] may end here. 9316 */ 9317 arridx = sp->ts_arridx; /* current node in the tree */ 9318 len = byts[arridx]; /* bytes in this node */ 9319 arridx += sp->ts_curi; /* index of current byte */ 9320 9321 if (sp->ts_prefixdepth == PFD_PREFIXTREE) 9322 { 9323 /* Skip over the NUL bytes, we use them later. */ 9324 for (n = 0; n < len && byts[arridx + n] == 0; ++n) 9325 ; 9326 sp->ts_curi += n; 9327 9328 /* Always past NUL bytes now. */ 9329 n = (int)sp->ts_state; 9330 sp->ts_state = STATE_ENDNUL; 9331 sp->ts_save_badflags = su->su_badflags; 9332 9333 /* At end of a prefix or at start of prefixtree: check for 9334 * following word. */ 9335 if (byts[arridx] == 0 || n == (int)STATE_NOPREFIX) 9336 { 9337 /* Set su->su_badflags to the caps type at this 9338 * position. Use the caps type until here for the 9339 * prefix itself. */ 9340 #ifdef FEAT_MBYTE 9341 if (has_mbyte) 9342 n = nofold_len(fword, sp->ts_fidx, su->su_badptr); 9343 else 9344 #endif 9345 n = sp->ts_fidx; 9346 flags = badword_captype(su->su_badptr, 9347 su->su_badptr + n); 9348 su->su_badflags = badword_captype(su->su_badptr + n, 9349 su->su_badptr + su->su_badlen); 9350 ++depth; 9351 stack[depth] = stack[depth - 1]; 9352 sp = &stack[depth]; 9353 sp->ts_prefixdepth = depth - 1; 9354 byts = fbyts; 9355 idxs = fidxs; 9356 sp->ts_state = STATE_START; 9357 sp->ts_curi = 1; /* start just after length byte */ 9358 sp->ts_arridx = 0; 9359 9360 /* Move the prefix to preword[] with the right case 9361 * and make find_keepcap_word() works. */ 9362 tword[sp->ts_twordlen] = NUL; 9363 make_case_word(tword + sp->ts_splitoff, 9364 preword + sp->ts_prewordlen, 9365 flags); 9366 sp->ts_prewordlen = STRLEN(preword); 9367 sp->ts_splitoff = sp->ts_twordlen; 9368 } 9369 break; 9370 } 9371 9372 if (sp->ts_curi > len || byts[arridx] != 0) 9373 { 9374 /* Past bytes in node and/or past NUL bytes. */ 9375 sp->ts_state = STATE_ENDNUL; 9376 sp->ts_save_badflags = su->su_badflags; 9377 break; 9378 } 9379 9380 /* 9381 * End of word in tree. 9382 */ 9383 ++sp->ts_curi; /* eat one NUL byte */ 9384 9385 flags = (int)idxs[arridx]; 9386 fword_ends = (fword[sp->ts_fidx] == NUL 9387 || !spell_iswordp(fword + sp->ts_fidx, curbuf)); 9388 tword[sp->ts_twordlen] = NUL; 9389 9390 if (sp->ts_prefixdepth <= PFD_NOTSPECIAL 9391 && (sp->ts_flags & TSF_PREFIXOK) == 0) 9392 { 9393 /* There was a prefix before the word. Check that the 9394 * prefix can be used with this word. */ 9395 /* Count the length of the NULs in the prefix. If there 9396 * are none this must be the first try without a prefix. 9397 */ 9398 n = stack[sp->ts_prefixdepth].ts_arridx; 9399 len = pbyts[n++]; 9400 for (c = 0; c < len && pbyts[n + c] == 0; ++c) 9401 ; 9402 if (c > 0) 9403 { 9404 c = valid_word_prefix(c, n, flags, 9405 tword + sp->ts_splitoff, slang, FALSE); 9406 if (c == 0) 9407 break; 9408 9409 /* Use the WF_RARE flag for a rare prefix. */ 9410 if (c & WF_RAREPFX) 9411 flags |= WF_RARE; 9412 9413 /* Tricky: when checking for both prefix and 9414 * compounding we run into the prefix flag first. 9415 * Remember that it's OK, so that we accept the prefix 9416 * when arriving at a compound flag. */ 9417 sp->ts_flags |= TSF_PREFIXOK; 9418 } 9419 } 9420 9421 /* Check NEEDCOMPOUND: can't use word without compounding. Do 9422 * try appending another compound word below. */ 9423 if (sp->ts_complen == sp->ts_compsplit && fword_ends 9424 && (flags & WF_NEEDCOMP)) 9425 goodword_ends = FALSE; 9426 else 9427 goodword_ends = TRUE; 9428 9429 if (sp->ts_complen > sp->ts_compsplit) 9430 { 9431 if (slang->sl_nobreak) 9432 { 9433 /* There was a word before this word. When there was 9434 * no change in this word (it was correct) add the 9435 * first word as a suggestion. If this word was 9436 * corrected too, we need to check if a correct word 9437 * follows. */ 9438 if (sp->ts_fidx - sp->ts_splitfidx 9439 == sp->ts_twordlen - sp->ts_splitoff 9440 && STRNCMP(fword + sp->ts_splitfidx, 9441 tword + sp->ts_splitoff, 9442 sp->ts_fidx - sp->ts_splitfidx) == 0) 9443 { 9444 preword[sp->ts_prewordlen] = NUL; 9445 add_suggestion(su, &su->su_ga, preword, 9446 sp->ts_splitfidx - repextra, 9447 sp->ts_score, 0, FALSE, 9448 lp->lp_sallang); 9449 break; 9450 } 9451 } 9452 else 9453 { 9454 /* There was a compound word before this word. If 9455 * this word does not support compounding then give up 9456 * (splitting is tried for the word without compound 9457 * flag). */ 9458 if (((unsigned)flags >> 24) == 0 9459 || sp->ts_twordlen - sp->ts_splitoff 9460 < slang->sl_compminlen) 9461 break; 9462 #ifdef FEAT_MBYTE 9463 /* For multi-byte chars check character length against 9464 * COMPOUNDMIN. */ 9465 if (has_mbyte 9466 && slang->sl_compminlen > 0 9467 && mb_charlen(tword + sp->ts_splitoff) 9468 < slang->sl_compminlen) 9469 break; 9470 #endif 9471 9472 compflags[sp->ts_complen] = ((unsigned)flags >> 24); 9473 compflags[sp->ts_complen + 1] = NUL; 9474 vim_strncpy(preword + sp->ts_prewordlen, 9475 tword + sp->ts_splitoff, 9476 sp->ts_twordlen - sp->ts_splitoff); 9477 p = preword; 9478 while (*skiptowhite(p) != NUL) 9479 p = skipwhite(skiptowhite(p)); 9480 if (fword_ends && !can_compound(slang, p, 9481 compflags + sp->ts_compsplit)) 9482 break; 9483 9484 /* Get pointer to last char of previous word. */ 9485 p = preword + sp->ts_prewordlen; 9486 mb_ptr_back(preword, p); 9487 } 9488 } 9489 else 9490 p = NULL; 9491 9492 /* 9493 * Form the word with proper case in preword. 9494 * If there is a word from a previous split, append. 9495 */ 9496 if (flags & WF_KEEPCAP) 9497 /* Must find the word in the keep-case tree. */ 9498 find_keepcap_word(slang, tword + sp->ts_splitoff, 9499 preword + sp->ts_prewordlen); 9500 else 9501 { 9502 /* Include badflags: if the badword is onecap or allcap 9503 * use that for the goodword too. But if the badword is 9504 * allcap and it's only one char long use onecap. */ 9505 c = su->su_badflags; 9506 if ((c & WF_ALLCAP) 9507 #ifdef FEAT_MBYTE 9508 && su->su_badlen == (*mb_ptr2len)(su->su_badptr) 9509 #else 9510 && su->su_badlen == 1 9511 #endif 9512 ) 9513 c = WF_ONECAP; 9514 c |= flags; 9515 9516 /* When appending a compound word after a word character 9517 * don't use Onecap. */ 9518 if (p != NULL && spell_iswordp_nmw(p)) 9519 c &= ~WF_ONECAP; 9520 make_case_word(tword + sp->ts_splitoff, 9521 preword + sp->ts_prewordlen, c); 9522 } 9523 9524 /* Don't use a banned word. It may appear again as a good 9525 * word, thus remember it. */ 9526 if (flags & WF_BANNED) 9527 { 9528 add_banned(su, preword + sp->ts_prewordlen); 9529 break; 9530 } 9531 if ((sp->ts_complen == sp->ts_compsplit 9532 && was_banned(su, preword + sp->ts_prewordlen)) 9533 || was_banned(su, preword)) 9534 { 9535 if (slang->sl_compprog == NULL) 9536 break; 9537 /* the word so far was banned but we may try compounding */ 9538 goodword_ends = FALSE; 9539 } 9540 9541 newscore = 0; 9542 if ((flags & WF_REGION) 9543 && (((unsigned)flags >> 16) & lp->lp_region) == 0) 9544 newscore += SCORE_REGION; 9545 if (flags & WF_RARE) 9546 newscore += SCORE_RARE; 9547 9548 if (!spell_valid_case(su->su_badflags, 9549 captype(preword + sp->ts_prewordlen, NULL))) 9550 newscore += SCORE_ICASE; 9551 9552 maysplit = TRUE; 9553 if (fword_ends && goodword_ends 9554 && sp->ts_fidx >= sp->ts_fidxtry) 9555 { 9556 /* The badword also ends: add suggestions. Give a penalty 9557 * when changing non-word char to word char, e.g., "thes," 9558 * -> "these". */ 9559 p = fword + sp->ts_fidx; 9560 #ifdef FEAT_MBYTE 9561 if (has_mbyte) 9562 mb_ptr_back(fword, p); 9563 else 9564 #endif 9565 --p; 9566 if (!spell_iswordp(p, curbuf)) 9567 { 9568 p = preword + STRLEN(preword); 9569 #ifdef FEAT_MBYTE 9570 if (has_mbyte) 9571 mb_ptr_back(preword, p); 9572 else 9573 #endif 9574 --p; 9575 if (spell_iswordp(p, curbuf)) 9576 newscore += SCORE_NONWORD; 9577 } 9578 9579 add_suggestion(su, &su->su_ga, preword, 9580 sp->ts_fidx - repextra, 9581 sp->ts_score + newscore, 0, FALSE, 9582 lp->lp_sallang); 9583 9584 /* When the bad word doesn't end yet, try changing the 9585 * next word. E.g., find suggestions for "the the" where 9586 * the second "the" is different. It's done like a split. 9587 */ 9588 if (sp->ts_fidx - repextra >= su->su_badlen) 9589 maysplit = FALSE; 9590 } 9591 9592 if (maysplit 9593 && (sp->ts_fidx >= sp->ts_fidxtry || fword_ends) 9594 #ifdef FEAT_MBYTE 9595 /* Don't split halfway a character. */ 9596 && (!has_mbyte || sp->ts_tcharlen == 0) 9597 #endif 9598 ) 9599 { 9600 int try_compound; 9601 9602 /* Get here in two situations: 9603 * 1. The word in the tree ends but the badword continues: 9604 * If the word allows compounding try that. Otherwise 9605 * try a split by inserting a space. For both check 9606 * that a valid words starts at fword[sp->ts_fidx]. 9607 * For NOBREAK do like compounding to be able to check 9608 * if the next word is valid. 9609 * 2. The badword does end, but it was due to a change 9610 * (e.g., a swap). No need to split, but do check that 9611 * the following word is valid. 9612 */ 9613 try_compound = FALSE; 9614 if ((!fword_ends || !goodword_ends) 9615 && slang->sl_compprog != NULL 9616 && ((unsigned)flags >> 24) != 0 9617 && sp->ts_twordlen - sp->ts_splitoff 9618 >= slang->sl_compminlen 9619 #ifdef FEAT_MBYTE 9620 && (!has_mbyte 9621 || slang->sl_compminlen == 0 9622 || mb_charlen(tword + sp->ts_splitoff) 9623 >= slang->sl_compminlen) 9624 #endif 9625 && (slang->sl_compsylmax < MAXWLEN 9626 || sp->ts_complen + 1 - sp->ts_compsplit 9627 < slang->sl_compmax) 9628 && (byte_in_str(sp->ts_complen == sp->ts_compsplit 9629 ? slang->sl_compstartflags 9630 : slang->sl_compallflags, 9631 ((unsigned)flags >> 24)))) 9632 { 9633 try_compound = TRUE; 9634 compflags[sp->ts_complen] = ((unsigned)flags >> 24); 9635 compflags[sp->ts_complen + 1] = NUL; 9636 } 9637 9638 /* For NOBREAK we never try splitting, it won't make any 9639 * word valid. */ 9640 if (slang->sl_nobreak) 9641 try_compound = TRUE; 9642 9643 /* If we could add a compound word, and it's also possible 9644 * to split at this point, do the split first and set 9645 * TSF_DIDSPLIT to avoid doing it again. */ 9646 else if (!fword_ends 9647 && try_compound 9648 && (sp->ts_flags & TSF_DIDSPLIT) == 0) 9649 { 9650 try_compound = FALSE; 9651 sp->ts_flags |= TSF_DIDSPLIT; 9652 --sp->ts_curi; /* do the same NUL again */ 9653 compflags[sp->ts_complen] = NUL; 9654 } 9655 else 9656 sp->ts_flags &= ~TSF_DIDSPLIT; 9657 9658 if (!try_compound && (!fword_ends || !goodword_ends)) 9659 { 9660 /* If we're going to split need to check that the 9661 * words so far are valid for compounding. If there 9662 * is only one word it must not have the NEEDCOMPOUND 9663 * flag. */ 9664 if (sp->ts_complen == sp->ts_compsplit 9665 && (flags & WF_NEEDCOMP)) 9666 break; 9667 p = preword; 9668 while (*skiptowhite(p) != NUL) 9669 p = skipwhite(skiptowhite(p)); 9670 if (sp->ts_complen > sp->ts_compsplit 9671 && !can_compound(slang, p, 9672 compflags + sp->ts_compsplit)) 9673 break; 9674 newscore += SCORE_SPLIT; 9675 } 9676 9677 if (try_deeper(su, stack, depth, newscore)) 9678 { 9679 /* Save things to be restored at STATE_SPLITUNDO. */ 9680 sp->ts_save_badflags = su->su_badflags; 9681 sp->ts_state = STATE_SPLITUNDO; 9682 9683 ++depth; 9684 sp = &stack[depth]; 9685 9686 /* Append a space to preword when splitting. */ 9687 if (!try_compound && !fword_ends) 9688 STRCAT(preword, " "); 9689 sp->ts_prewordlen = STRLEN(preword); 9690 sp->ts_splitoff = sp->ts_twordlen; 9691 sp->ts_splitfidx = sp->ts_fidx; 9692 9693 /* If the badword has a non-word character at this 9694 * position skip it. That means replacing the 9695 * non-word character with a space. Always skip a 9696 * character when the word ends. But only when the 9697 * good word can end. */ 9698 if (((!try_compound 9699 && !spell_iswordp_nmw(fword + sp->ts_fidx)) 9700 || fword_ends) 9701 && goodword_ends) 9702 { 9703 int l; 9704 9705 #ifdef FEAT_MBYTE 9706 if (has_mbyte) 9707 l = MB_BYTE2LEN(fword[sp->ts_fidx]); 9708 else 9709 #endif 9710 l = 1; 9711 if (fword_ends) 9712 { 9713 /* Copy the skipped character to preword. */ 9714 mch_memmove(preword + sp->ts_prewordlen, 9715 fword + sp->ts_fidx, l); 9716 sp->ts_prewordlen += l; 9717 preword[sp->ts_prewordlen] = NUL; 9718 } 9719 else 9720 sp->ts_score -= SCORE_SPLIT - SCORE_SUBST; 9721 sp->ts_fidx += l; 9722 } 9723 9724 /* When compounding include compound flag in 9725 * compflags[] (already set above). When splitting we 9726 * may start compounding over again. */ 9727 if (try_compound) 9728 ++sp->ts_complen; 9729 else 9730 sp->ts_compsplit = sp->ts_complen; 9731 sp->ts_prefixdepth = PFD_NOPREFIX; 9732 9733 /* set su->su_badflags to the caps type at this 9734 * position */ 9735 #ifdef FEAT_MBYTE 9736 if (has_mbyte) 9737 n = nofold_len(fword, sp->ts_fidx, su->su_badptr); 9738 else 9739 #endif 9740 n = sp->ts_fidx; 9741 su->su_badflags = badword_captype(su->su_badptr + n, 9742 su->su_badptr + su->su_badlen); 9743 9744 /* Restart at top of the tree. */ 9745 sp->ts_arridx = 0; 9746 9747 /* If there are postponed prefixes, try these too. */ 9748 if (pbyts != NULL) 9749 { 9750 byts = pbyts; 9751 idxs = pidxs; 9752 sp->ts_prefixdepth = PFD_PREFIXTREE; 9753 sp->ts_state = STATE_NOPREFIX; 9754 } 9755 } 9756 } 9757 break; 9758 9759 case STATE_SPLITUNDO: 9760 /* Undo the changes done for word split or compound word. */ 9761 su->su_badflags = sp->ts_save_badflags; 9762 9763 /* Continue looking for NUL bytes. */ 9764 sp->ts_state = STATE_START; 9765 9766 /* In case we went into the prefix tree. */ 9767 byts = fbyts; 9768 idxs = fidxs; 9769 break; 9770 9771 case STATE_ENDNUL: 9772 /* Past the NUL bytes in the node. */ 9773 su->su_badflags = sp->ts_save_badflags; 9774 if (fword[sp->ts_fidx] == NUL 9775 #ifdef FEAT_MBYTE 9776 && sp->ts_tcharlen == 0 9777 #endif 9778 ) 9779 { 9780 /* The badword ends, can't use the bytes in this node. */ 9781 sp->ts_state = STATE_DEL; 9782 break; 9783 } 9784 sp->ts_state = STATE_PLAIN; 9785 /*FALLTHROUGH*/ 9786 9787 case STATE_PLAIN: 9788 /* 9789 * Go over all possible bytes at this node, add each to 9790 * tword[] and use child node. "ts_curi" is the index. 9791 */ 9792 arridx = sp->ts_arridx; 9793 if (sp->ts_curi > byts[arridx]) 9794 { 9795 /* Done all bytes at this node, do next state. When still 9796 * at already changed bytes skip the other tricks. */ 9797 if (sp->ts_fidx >= sp->ts_fidxtry) 9798 sp->ts_state = STATE_DEL; 9799 else 9800 sp->ts_state = STATE_FINAL; 9801 } 9802 else 9803 { 9804 arridx += sp->ts_curi++; 9805 c = byts[arridx]; 9806 9807 /* Normal byte, go one level deeper. If it's not equal to 9808 * the byte in the bad word adjust the score. But don't 9809 * even try when the byte was already changed. */ 9810 if (c == fword[sp->ts_fidx] 9811 #ifdef FEAT_MBYTE 9812 || (sp->ts_tcharlen > 0 9813 && sp->ts_isdiff != DIFF_NONE) 9814 #endif 9815 ) 9816 newscore = 0; 9817 else 9818 newscore = SCORE_SUBST; 9819 if ((newscore == 0 || sp->ts_fidx >= sp->ts_fidxtry) 9820 && try_deeper(su, stack, depth, newscore)) 9821 { 9822 ++depth; 9823 sp = &stack[depth]; 9824 ++sp->ts_fidx; 9825 tword[sp->ts_twordlen++] = c; 9826 sp->ts_arridx = idxs[arridx]; 9827 #ifdef FEAT_MBYTE 9828 if (newscore == SCORE_SUBST) 9829 sp->ts_isdiff = DIFF_YES; 9830 if (has_mbyte) 9831 { 9832 /* Multi-byte characters are a bit complicated to 9833 * handle: They differ when any of the bytes 9834 * differ and then their length may also differ. */ 9835 if (sp->ts_tcharlen == 0) 9836 { 9837 /* First byte. */ 9838 sp->ts_tcharidx = 0; 9839 sp->ts_tcharlen = MB_BYTE2LEN(c); 9840 sp->ts_fcharstart = sp->ts_fidx - 1; 9841 sp->ts_isdiff = (newscore != 0) 9842 ? DIFF_YES : DIFF_NONE; 9843 } 9844 else if (sp->ts_isdiff == DIFF_INSERT) 9845 /* When inserting trail bytes don't advance in 9846 * the bad word. */ 9847 --sp->ts_fidx; 9848 if (++sp->ts_tcharidx == sp->ts_tcharlen) 9849 { 9850 /* Last byte of character. */ 9851 if (sp->ts_isdiff == DIFF_YES) 9852 { 9853 /* Correct ts_fidx for the byte length of 9854 * the character (we didn't check that 9855 * before). */ 9856 sp->ts_fidx = sp->ts_fcharstart 9857 + MB_BYTE2LEN( 9858 fword[sp->ts_fcharstart]); 9859 9860 /* For changing a composing character 9861 * adjust the score from SCORE_SUBST to 9862 * SCORE_SUBCOMP. */ 9863 if (enc_utf8 9864 && utf_iscomposing( 9865 mb_ptr2char(tword 9866 + sp->ts_twordlen 9867 - sp->ts_tcharlen)) 9868 && utf_iscomposing( 9869 mb_ptr2char(fword 9870 + sp->ts_fcharstart))) 9871 sp->ts_score -= 9872 SCORE_SUBST - SCORE_SUBCOMP; 9873 9874 /* For a similar character adjust score 9875 * from SCORE_SUBST to SCORE_SIMILAR. */ 9876 else if (slang->sl_has_map 9877 && similar_chars(slang, 9878 mb_ptr2char(tword 9879 + sp->ts_twordlen 9880 - sp->ts_tcharlen), 9881 mb_ptr2char(fword 9882 + sp->ts_fcharstart))) 9883 sp->ts_score -= 9884 SCORE_SUBST - SCORE_SIMILAR; 9885 } 9886 else if (sp->ts_isdiff == DIFF_INSERT 9887 && sp->ts_twordlen > sp->ts_tcharlen) 9888 { 9889 p = tword + sp->ts_twordlen 9890 - sp->ts_tcharlen; 9891 c = mb_ptr2char(p); 9892 if (enc_utf8 && utf_iscomposing(c)) 9893 { 9894 /* Inserting a composing char doesn't 9895 * count that much. */ 9896 sp->ts_score -= SCORE_INS 9897 - SCORE_INSCOMP; 9898 } 9899 else 9900 { 9901 /* If the previous character was the 9902 * same, thus doubling a character, 9903 * give a bonus to the score. */ 9904 mb_ptr_back(tword, p); 9905 if (c == mb_ptr2char(p)) 9906 sp->ts_score -= SCORE_INS 9907 - SCORE_INSDUP; 9908 } 9909 } 9910 9911 /* Starting a new char, reset the length. */ 9912 sp->ts_tcharlen = 0; 9913 } 9914 } 9915 else 9916 #endif 9917 { 9918 /* If we found a similar char adjust the score. 9919 * We do this after calling try_deeper() because 9920 * it's slow. */ 9921 if (newscore != 0 9922 && slang->sl_has_map 9923 && similar_chars(slang, 9924 c, fword[sp->ts_fidx - 1])) 9925 sp->ts_score -= SCORE_SUBST - SCORE_SIMILAR; 9926 } 9927 } 9928 } 9929 break; 9930 9931 case STATE_DEL: 9932 #ifdef FEAT_MBYTE 9933 /* When past the first byte of a multi-byte char don't try 9934 * delete/insert/swap a character. */ 9935 if (has_mbyte && sp->ts_tcharlen > 0) 9936 { 9937 sp->ts_state = STATE_FINAL; 9938 break; 9939 } 9940 #endif 9941 /* 9942 * Try skipping one character in the bad word (delete it). 9943 */ 9944 sp->ts_state = STATE_INS; 9945 sp->ts_curi = 1; 9946 if (fword[sp->ts_fidx] != NUL 9947 && try_deeper(su, stack, depth, SCORE_DEL)) 9948 { 9949 ++depth; 9950 9951 /* Advance over the character in fword[]. Give a bonus to 9952 * the score if the same character is following "nn" -> 9953 * "n". */ 9954 #ifdef FEAT_MBYTE 9955 if (has_mbyte) 9956 { 9957 c = mb_ptr2char(fword + sp->ts_fidx); 9958 stack[depth].ts_fidx += MB_BYTE2LEN(fword[sp->ts_fidx]); 9959 if (enc_utf8 && utf_iscomposing(c)) 9960 stack[depth].ts_score -= SCORE_DEL - SCORE_DELCOMP; 9961 else if (c == mb_ptr2char(fword + stack[depth].ts_fidx)) 9962 stack[depth].ts_score -= SCORE_DEL - SCORE_DELDUP; 9963 } 9964 else 9965 #endif 9966 { 9967 ++stack[depth].ts_fidx; 9968 if (fword[sp->ts_fidx] == fword[sp->ts_fidx + 1]) 9969 stack[depth].ts_score -= SCORE_DEL - SCORE_DELDUP; 9970 } 9971 break; 9972 } 9973 /*FALLTHROUGH*/ 9974 9975 case STATE_INS: 9976 /* Insert one byte. Do this for each possible byte at this 9977 * node. */ 9978 n = sp->ts_arridx; 9979 if (sp->ts_curi > byts[n]) 9980 { 9981 /* Done all bytes at this node, do next state. */ 9982 sp->ts_state = STATE_SWAP; 9983 } 9984 else 9985 { 9986 /* Do one more byte at this node. Skip NUL bytes. */ 9987 n += sp->ts_curi++; 9988 c = byts[n]; 9989 if (c != 0 && try_deeper(su, stack, depth, SCORE_INS)) 9990 { 9991 ++depth; 9992 sp = &stack[depth]; 9993 tword[sp->ts_twordlen++] = c; 9994 sp->ts_arridx = idxs[n]; 9995 #ifdef FEAT_MBYTE 9996 if (has_mbyte) 9997 { 9998 fl = MB_BYTE2LEN(c); 9999 if (fl > 1) 10000 { 10001 /* There are following bytes for the same 10002 * character. We must find all bytes before 10003 * trying delete/insert/swap/etc. */ 10004 sp->ts_tcharlen = fl; 10005 sp->ts_tcharidx = 1; 10006 sp->ts_isdiff = DIFF_INSERT; 10007 } 10008 } 10009 else 10010 fl = 1; 10011 if (fl == 1) 10012 #endif 10013 { 10014 /* If the previous character was the same, thus 10015 * doubling a character, give a bonus to the 10016 * score. */ 10017 if (sp->ts_twordlen >= 2 10018 && tword[sp->ts_twordlen - 2] == c) 10019 sp->ts_score -= SCORE_INS - SCORE_INSDUP; 10020 } 10021 } 10022 } 10023 break; 10024 10025 case STATE_SWAP: 10026 /* 10027 * Swap two bytes in the bad word: "12" -> "21". 10028 * We change "fword" here, it's changed back afterwards. 10029 */ 10030 p = fword + sp->ts_fidx; 10031 c = *p; 10032 if (c == NUL) 10033 { 10034 /* End of word, can't swap or replace. */ 10035 sp->ts_state = STATE_FINAL; 10036 break; 10037 } 10038 10039 /* Don't swap if the first character is not a word character. 10040 * SWAP3 etc. also don't make sense then. */ 10041 if (!spell_iswordp(p, curbuf)) 10042 { 10043 sp->ts_state = STATE_REP_INI; 10044 break; 10045 } 10046 10047 #ifdef FEAT_MBYTE 10048 if (has_mbyte) 10049 { 10050 n = mb_cptr2len(p); 10051 c = mb_ptr2char(p); 10052 if (!spell_iswordp(p + n, curbuf)) 10053 c2 = c; /* don't swap non-word char */ 10054 else 10055 c2 = mb_ptr2char(p + n); 10056 } 10057 else 10058 #endif 10059 { 10060 if (!spell_iswordp(p + 1, curbuf)) 10061 c2 = c; /* don't swap non-word char */ 10062 else 10063 c2 = p[1]; 10064 } 10065 10066 /* When characters are identical, swap won't do anything. 10067 * Also get here if the second char is not a word character. */ 10068 if (c == c2) 10069 { 10070 sp->ts_state = STATE_SWAP3; 10071 break; 10072 } 10073 if (c2 != NUL && try_deeper(su, stack, depth, SCORE_SWAP)) 10074 { 10075 sp->ts_state = STATE_UNSWAP; 10076 ++depth; 10077 #ifdef FEAT_MBYTE 10078 if (has_mbyte) 10079 { 10080 fl = mb_char2len(c2); 10081 mch_memmove(p, p + n, fl); 10082 mb_char2bytes(c, p + fl); 10083 stack[depth].ts_fidxtry = sp->ts_fidx + n + fl; 10084 } 10085 else 10086 #endif 10087 { 10088 p[0] = c2; 10089 p[1] = c; 10090 stack[depth].ts_fidxtry = sp->ts_fidx + 2; 10091 } 10092 } 10093 else 10094 /* If this swap doesn't work then SWAP3 won't either. */ 10095 sp->ts_state = STATE_REP_INI; 10096 break; 10097 10098 case STATE_UNSWAP: 10099 /* Undo the STATE_SWAP swap: "21" -> "12". */ 10100 p = fword + sp->ts_fidx; 10101 #ifdef FEAT_MBYTE 10102 if (has_mbyte) 10103 { 10104 n = MB_BYTE2LEN(*p); 10105 c = mb_ptr2char(p + n); 10106 mch_memmove(p + MB_BYTE2LEN(p[n]), p, n); 10107 mb_char2bytes(c, p); 10108 } 10109 else 10110 #endif 10111 { 10112 c = *p; 10113 *p = p[1]; 10114 p[1] = c; 10115 } 10116 /*FALLTHROUGH*/ 10117 10118 case STATE_SWAP3: 10119 /* Swap two bytes, skipping one: "123" -> "321". We change 10120 * "fword" here, it's changed back afterwards. */ 10121 p = fword + sp->ts_fidx; 10122 #ifdef FEAT_MBYTE 10123 if (has_mbyte) 10124 { 10125 n = mb_cptr2len(p); 10126 c = mb_ptr2char(p); 10127 fl = mb_cptr2len(p + n); 10128 c2 = mb_ptr2char(p + n); 10129 if (!spell_iswordp(p + n + fl, curbuf)) 10130 c3 = c; /* don't swap non-word char */ 10131 else 10132 c3 = mb_ptr2char(p + n + fl); 10133 } 10134 else 10135 #endif 10136 { 10137 c = *p; 10138 c2 = p[1]; 10139 if (!spell_iswordp(p + 2, curbuf)) 10140 c3 = c; /* don't swap non-word char */ 10141 else 10142 c3 = p[2]; 10143 } 10144 10145 /* When characters are identical: "121" then SWAP3 result is 10146 * identical, ROT3L result is same as SWAP: "211", ROT3L 10147 * result is same as SWAP on next char: "112". Thus skip all 10148 * swapping. Also skip when c3 is NUL. 10149 * Also get here when the third character is not a word 10150 * character. Second character may any char: "a.b" -> "b.a" */ 10151 if (c == c3 || c3 == NUL) 10152 { 10153 sp->ts_state = STATE_REP_INI; 10154 break; 10155 } 10156 if (try_deeper(su, stack, depth, SCORE_SWAP3)) 10157 { 10158 sp->ts_state = STATE_UNSWAP3; 10159 ++depth; 10160 #ifdef FEAT_MBYTE 10161 if (has_mbyte) 10162 { 10163 tl = mb_char2len(c3); 10164 mch_memmove(p, p + n + fl, tl); 10165 mb_char2bytes(c2, p + tl); 10166 mb_char2bytes(c, p + fl + tl); 10167 stack[depth].ts_fidxtry = sp->ts_fidx + n + fl + tl; 10168 } 10169 else 10170 #endif 10171 { 10172 p[0] = p[2]; 10173 p[2] = c; 10174 stack[depth].ts_fidxtry = sp->ts_fidx + 3; 10175 } 10176 } 10177 else 10178 sp->ts_state = STATE_REP_INI; 10179 break; 10180 10181 case STATE_UNSWAP3: 10182 /* Undo STATE_SWAP3: "321" -> "123" */ 10183 p = fword + sp->ts_fidx; 10184 #ifdef FEAT_MBYTE 10185 if (has_mbyte) 10186 { 10187 n = MB_BYTE2LEN(*p); 10188 c2 = mb_ptr2char(p + n); 10189 fl = MB_BYTE2LEN(p[n]); 10190 c = mb_ptr2char(p + n + fl); 10191 tl = MB_BYTE2LEN(p[n + fl]); 10192 mch_memmove(p + fl + tl, p, n); 10193 mb_char2bytes(c, p); 10194 mb_char2bytes(c2, p + tl); 10195 p = p + tl; 10196 } 10197 else 10198 #endif 10199 { 10200 c = *p; 10201 *p = p[2]; 10202 p[2] = c; 10203 ++p; 10204 } 10205 10206 if (!spell_iswordp(p, curbuf)) 10207 { 10208 /* Middle char is not a word char, skip the rotate. 10209 * First and third char were already checked at swap 10210 * and swap3. */ 10211 sp->ts_state = STATE_REP_INI; 10212 break; 10213 } 10214 10215 /* Rotate three characters left: "123" -> "231". We change 10216 * "fword" here, it's changed back afterwards. */ 10217 if (try_deeper(su, stack, depth, SCORE_SWAP3)) 10218 { 10219 sp->ts_state = STATE_UNROT3L; 10220 ++depth; 10221 p = fword + sp->ts_fidx; 10222 #ifdef FEAT_MBYTE 10223 if (has_mbyte) 10224 { 10225 n = mb_cptr2len(p); 10226 c = mb_ptr2char(p); 10227 fl = mb_cptr2len(p + n); 10228 fl += mb_cptr2len(p + n + fl); 10229 mch_memmove(p, p + n, fl); 10230 mb_char2bytes(c, p + fl); 10231 stack[depth].ts_fidxtry = sp->ts_fidx + n + fl; 10232 } 10233 else 10234 #endif 10235 { 10236 c = *p; 10237 *p = p[1]; 10238 p[1] = p[2]; 10239 p[2] = c; 10240 stack[depth].ts_fidxtry = sp->ts_fidx + 3; 10241 } 10242 } 10243 else 10244 sp->ts_state = STATE_REP_INI; 10245 break; 10246 10247 case STATE_UNROT3L: 10248 /* Undo ROT3L: "231" -> "123" */ 10249 p = fword + sp->ts_fidx; 10250 #ifdef FEAT_MBYTE 10251 if (has_mbyte) 10252 { 10253 n = MB_BYTE2LEN(*p); 10254 n += MB_BYTE2LEN(p[n]); 10255 c = mb_ptr2char(p + n); 10256 tl = MB_BYTE2LEN(p[n]); 10257 mch_memmove(p + tl, p, n); 10258 mb_char2bytes(c, p); 10259 } 10260 else 10261 #endif 10262 { 10263 c = p[2]; 10264 p[2] = p[1]; 10265 p[1] = *p; 10266 *p = c; 10267 } 10268 10269 /* Rotate three bytes right: "123" -> "312". We change 10270 * "fword" here, it's changed back afterwards. */ 10271 if (try_deeper(su, stack, depth, SCORE_SWAP3)) 10272 { 10273 sp->ts_state = STATE_UNROT3R; 10274 ++depth; 10275 p = fword + sp->ts_fidx; 10276 #ifdef FEAT_MBYTE 10277 if (has_mbyte) 10278 { 10279 n = mb_cptr2len(p); 10280 n += mb_cptr2len(p + n); 10281 c = mb_ptr2char(p + n); 10282 tl = mb_cptr2len(p + n); 10283 mch_memmove(p + tl, p, n); 10284 mb_char2bytes(c, p); 10285 stack[depth].ts_fidxtry = sp->ts_fidx + n + tl; 10286 } 10287 else 10288 #endif 10289 { 10290 c = p[2]; 10291 p[2] = p[1]; 10292 p[1] = *p; 10293 *p = c; 10294 stack[depth].ts_fidxtry = sp->ts_fidx + 3; 10295 } 10296 } 10297 else 10298 sp->ts_state = STATE_REP_INI; 10299 break; 10300 10301 case STATE_UNROT3R: 10302 /* Undo ROT3R: "312" -> "123" */ 10303 p = fword + sp->ts_fidx; 10304 #ifdef FEAT_MBYTE 10305 if (has_mbyte) 10306 { 10307 c = mb_ptr2char(p); 10308 tl = MB_BYTE2LEN(*p); 10309 n = MB_BYTE2LEN(p[tl]); 10310 n += MB_BYTE2LEN(p[tl + n]); 10311 mch_memmove(p, p + tl, n); 10312 mb_char2bytes(c, p + n); 10313 } 10314 else 10315 #endif 10316 { 10317 c = *p; 10318 *p = p[1]; 10319 p[1] = p[2]; 10320 p[2] = c; 10321 } 10322 /*FALLTHROUGH*/ 10323 10324 case STATE_REP_INI: 10325 /* Check if matching with REP items from the .aff file would 10326 * work. Quickly skip if: 10327 * - there are no REP items 10328 * - the score is going to be too high anyway 10329 * - already applied a REP item or swapped here */ 10330 if (lp->lp_replang == NULL 10331 || sp->ts_score + SCORE_REP >= su->su_maxscore 10332 || sp->ts_fidx < sp->ts_fidxtry) 10333 { 10334 sp->ts_state = STATE_FINAL; 10335 break; 10336 } 10337 gap = &lp->lp_replang->sl_rep; 10338 10339 /* Use the first byte to quickly find the first entry that 10340 * may match. If the index is -1 there is none. */ 10341 sp->ts_curi = lp->lp_replang->sl_rep_first[fword[sp->ts_fidx]]; 10342 if (sp->ts_curi < 0) 10343 { 10344 sp->ts_state = STATE_FINAL; 10345 break; 10346 } 10347 10348 sp->ts_state = STATE_REP; 10349 /*FALLTHROUGH*/ 10350 10351 case STATE_REP: 10352 /* Try matching with REP items from the .aff file. For each 10353 * match replace the characters and check if the resulting 10354 * word is valid. */ 10355 p = fword + sp->ts_fidx; 10356 10357 gap = &lp->lp_replang->sl_rep; 10358 while (sp->ts_curi < gap->ga_len) 10359 { 10360 ftp = (fromto_T *)gap->ga_data + sp->ts_curi++; 10361 if (*ftp->ft_from != *p) 10362 { 10363 /* past possible matching entries */ 10364 sp->ts_curi = gap->ga_len; 10365 break; 10366 } 10367 if (STRNCMP(ftp->ft_from, p, STRLEN(ftp->ft_from)) == 0 10368 && try_deeper(su, stack, depth, SCORE_REP)) 10369 { 10370 /* Need to undo this afterwards. */ 10371 sp->ts_state = STATE_REP_UNDO; 10372 10373 /* Change the "from" to the "to" string. */ 10374 ++depth; 10375 fl = STRLEN(ftp->ft_from); 10376 tl = STRLEN(ftp->ft_to); 10377 if (fl != tl) 10378 { 10379 mch_memmove(p + tl, p + fl, STRLEN(p + fl) + 1); 10380 repextra += tl - fl; 10381 } 10382 mch_memmove(p, ftp->ft_to, tl); 10383 stack[depth].ts_fidxtry = sp->ts_fidx + tl; 10384 #ifdef FEAT_MBYTE 10385 stack[depth].ts_tcharlen = 0; 10386 #endif 10387 break; 10388 } 10389 } 10390 10391 if (sp->ts_curi >= gap->ga_len && sp->ts_state == STATE_REP) 10392 /* No (more) matches. */ 10393 sp->ts_state = STATE_FINAL; 10394 10395 break; 10396 10397 case STATE_REP_UNDO: 10398 /* Undo a REP replacement and continue with the next one. */ 10399 ftp = (fromto_T *)lp->lp_replang->sl_rep.ga_data 10400 + sp->ts_curi - 1; 10401 fl = STRLEN(ftp->ft_from); 10402 tl = STRLEN(ftp->ft_to); 10403 p = fword + sp->ts_fidx; 10404 if (fl != tl) 10405 { 10406 mch_memmove(p + fl, p + tl, STRLEN(p + tl) + 1); 10407 repextra -= tl - fl; 10408 } 10409 mch_memmove(p, ftp->ft_from, fl); 10410 sp->ts_state = STATE_REP; 10411 break; 10412 10413 default: 10414 /* Did all possible states at this level, go up one level. */ 10415 --depth; 10416 10417 if (depth >= 0 && stack[depth].ts_prefixdepth == PFD_PREFIXTREE) 10418 { 10419 /* Continue in or go back to the prefix tree. */ 10420 byts = pbyts; 10421 idxs = pidxs; 10422 } 10423 10424 /* Don't check for CTRL-C too often, it takes time. */ 10425 line_breakcheck(); 10426 } 10427 } 10428 } 10429 } 10430 10431 /* 10432 * Try going one level deeper in the tree. 10433 */ 10434 static int 10435 try_deeper(su, stack, depth, score_add) 10436 suginfo_T *su; 10437 trystate_T *stack; 10438 int depth; 10439 int score_add; 10440 { 10441 int newscore; 10442 10443 /* Refuse to go deeper if the scrore is getting too big. */ 10444 newscore = stack[depth].ts_score + score_add; 10445 if (newscore >= su->su_maxscore) 10446 return FALSE; 10447 10448 stack[depth + 1] = stack[depth]; 10449 stack[depth + 1].ts_state = STATE_START; 10450 stack[depth + 1].ts_score = newscore; 10451 stack[depth + 1].ts_curi = 1; /* start just after length byte */ 10452 stack[depth + 1].ts_flags = 0; 10453 return TRUE; 10454 } 10455 10456 #ifdef FEAT_MBYTE 10457 /* 10458 * Case-folding may change the number of bytes: Count nr of chars in 10459 * fword[flen] and return the byte length of that many chars in "word". 10460 */ 10461 static int 10462 nofold_len(fword, flen, word) 10463 char_u *fword; 10464 int flen; 10465 char_u *word; 10466 { 10467 char_u *p; 10468 int i = 0; 10469 10470 for (p = fword; p < fword + flen; mb_ptr_adv(p)) 10471 ++i; 10472 for (p = word; i > 0; mb_ptr_adv(p)) 10473 --i; 10474 return (int)(p - word); 10475 } 10476 #endif 10477 10478 /* 10479 * "fword" is a good word with case folded. Find the matching keep-case 10480 * words and put it in "kword". 10481 * Theoretically there could be several keep-case words that result in the 10482 * same case-folded word, but we only find one... 10483 */ 10484 static void 10485 find_keepcap_word(slang, fword, kword) 10486 slang_T *slang; 10487 char_u *fword; 10488 char_u *kword; 10489 { 10490 char_u uword[MAXWLEN]; /* "fword" in upper-case */ 10491 int depth; 10492 idx_T tryidx; 10493 10494 /* The following arrays are used at each depth in the tree. */ 10495 idx_T arridx[MAXWLEN]; 10496 int round[MAXWLEN]; 10497 int fwordidx[MAXWLEN]; 10498 int uwordidx[MAXWLEN]; 10499 int kwordlen[MAXWLEN]; 10500 10501 int flen, ulen; 10502 int l; 10503 int len; 10504 int c; 10505 idx_T lo, hi, m; 10506 char_u *p; 10507 char_u *byts = slang->sl_kbyts; /* array with bytes of the words */ 10508 idx_T *idxs = slang->sl_kidxs; /* array with indexes */ 10509 10510 if (byts == NULL) 10511 { 10512 /* array is empty: "cannot happen" */ 10513 *kword = NUL; 10514 return; 10515 } 10516 10517 /* Make an all-cap version of "fword". */ 10518 allcap_copy(fword, uword); 10519 10520 /* 10521 * Each character needs to be tried both case-folded and upper-case. 10522 * All this gets very complicated if we keep in mind that changing case 10523 * may change the byte length of a multi-byte character... 10524 */ 10525 depth = 0; 10526 arridx[0] = 0; 10527 round[0] = 0; 10528 fwordidx[0] = 0; 10529 uwordidx[0] = 0; 10530 kwordlen[0] = 0; 10531 while (depth >= 0) 10532 { 10533 if (fword[fwordidx[depth]] == NUL) 10534 { 10535 /* We are at the end of "fword". If the tree allows a word to end 10536 * here we have found a match. */ 10537 if (byts[arridx[depth] + 1] == 0) 10538 { 10539 kword[kwordlen[depth]] = NUL; 10540 return; 10541 } 10542 10543 /* kword is getting too long, continue one level up */ 10544 --depth; 10545 } 10546 else if (++round[depth] > 2) 10547 { 10548 /* tried both fold-case and upper-case character, continue one 10549 * level up */ 10550 --depth; 10551 } 10552 else 10553 { 10554 /* 10555 * round[depth] == 1: Try using the folded-case character. 10556 * round[depth] == 2: Try using the upper-case character. 10557 */ 10558 #ifdef FEAT_MBYTE 10559 if (has_mbyte) 10560 { 10561 flen = mb_cptr2len(fword + fwordidx[depth]); 10562 ulen = mb_cptr2len(uword + uwordidx[depth]); 10563 } 10564 else 10565 #endif 10566 ulen = flen = 1; 10567 if (round[depth] == 1) 10568 { 10569 p = fword + fwordidx[depth]; 10570 l = flen; 10571 } 10572 else 10573 { 10574 p = uword + uwordidx[depth]; 10575 l = ulen; 10576 } 10577 10578 for (tryidx = arridx[depth]; l > 0; --l) 10579 { 10580 /* Perform a binary search in the list of accepted bytes. */ 10581 len = byts[tryidx++]; 10582 c = *p++; 10583 lo = tryidx; 10584 hi = tryidx + len - 1; 10585 while (lo < hi) 10586 { 10587 m = (lo + hi) / 2; 10588 if (byts[m] > c) 10589 hi = m - 1; 10590 else if (byts[m] < c) 10591 lo = m + 1; 10592 else 10593 { 10594 lo = hi = m; 10595 break; 10596 } 10597 } 10598 10599 /* Stop if there is no matching byte. */ 10600 if (hi < lo || byts[lo] != c) 10601 break; 10602 10603 /* Continue at the child (if there is one). */ 10604 tryidx = idxs[lo]; 10605 } 10606 10607 if (l == 0) 10608 { 10609 /* 10610 * Found the matching char. Copy it to "kword" and go a 10611 * level deeper. 10612 */ 10613 if (round[depth] == 1) 10614 { 10615 STRNCPY(kword + kwordlen[depth], fword + fwordidx[depth], 10616 flen); 10617 kwordlen[depth + 1] = kwordlen[depth] + flen; 10618 } 10619 else 10620 { 10621 STRNCPY(kword + kwordlen[depth], uword + uwordidx[depth], 10622 ulen); 10623 kwordlen[depth + 1] = kwordlen[depth] + ulen; 10624 } 10625 fwordidx[depth + 1] = fwordidx[depth] + flen; 10626 uwordidx[depth + 1] = uwordidx[depth] + ulen; 10627 10628 ++depth; 10629 arridx[depth] = tryidx; 10630 round[depth] = 0; 10631 } 10632 } 10633 } 10634 10635 /* Didn't find it: "cannot happen". */ 10636 *kword = NUL; 10637 } 10638 10639 /* 10640 * Compute the sound-a-like score for suggestions in su->su_ga and add them to 10641 * su->su_sga. 10642 */ 10643 static void 10644 score_comp_sal(su) 10645 suginfo_T *su; 10646 { 10647 langp_T *lp; 10648 char_u badsound[MAXWLEN]; 10649 int i; 10650 suggest_T *stp; 10651 suggest_T *sstp; 10652 int score; 10653 int lpi; 10654 10655 if (ga_grow(&su->su_sga, su->su_ga.ga_len) == FAIL) 10656 return; 10657 10658 /* Use the sound-folding of the first language that supports it. */ 10659 for (lpi = 0; lpi < curbuf->b_langp.ga_len; ++lpi) 10660 { 10661 lp = LANGP_ENTRY(curbuf->b_langp, lpi); 10662 if (lp->lp_slang->sl_sal.ga_len > 0) 10663 { 10664 /* soundfold the bad word */ 10665 spell_soundfold(lp->lp_slang, su->su_fbadword, TRUE, badsound); 10666 10667 for (i = 0; i < su->su_ga.ga_len; ++i) 10668 { 10669 stp = &SUG(su->su_ga, i); 10670 10671 /* Case-fold the suggested word, sound-fold it and compute the 10672 * sound-a-like score. */ 10673 score = stp_sal_score(stp, su, lp->lp_slang, badsound); 10674 if (score < SCORE_MAXMAX) 10675 { 10676 /* Add the suggestion. */ 10677 sstp = &SUG(su->su_sga, su->su_sga.ga_len); 10678 sstp->st_word = vim_strsave(stp->st_word); 10679 if (sstp->st_word != NULL) 10680 { 10681 sstp->st_score = score; 10682 sstp->st_altscore = 0; 10683 sstp->st_orglen = stp->st_orglen; 10684 ++su->su_sga.ga_len; 10685 } 10686 } 10687 } 10688 break; 10689 } 10690 } 10691 } 10692 10693 /* 10694 * Combine the list of suggestions in su->su_ga and su->su_sga. 10695 * They are intwined. 10696 */ 10697 static void 10698 score_combine(su) 10699 suginfo_T *su; 10700 { 10701 int i; 10702 int j; 10703 garray_T ga; 10704 garray_T *gap; 10705 langp_T *lp; 10706 suggest_T *stp; 10707 char_u *p; 10708 char_u badsound[MAXWLEN]; 10709 int round; 10710 int lpi; 10711 10712 /* Add the alternate score to su_ga. */ 10713 for (lpi = 0; lpi < curbuf->b_langp.ga_len; ++lpi) 10714 { 10715 lp = LANGP_ENTRY(curbuf->b_langp, lpi); 10716 if (lp->lp_slang->sl_sal.ga_len > 0) 10717 { 10718 /* soundfold the bad word */ 10719 spell_soundfold(lp->lp_slang, su->su_fbadword, TRUE, badsound); 10720 10721 for (i = 0; i < su->su_ga.ga_len; ++i) 10722 { 10723 stp = &SUG(su->su_ga, i); 10724 stp->st_altscore = stp_sal_score(stp, su, lp->lp_slang, 10725 badsound); 10726 if (stp->st_altscore == SCORE_MAXMAX) 10727 stp->st_score = (stp->st_score * 3 + SCORE_BIG) / 4; 10728 else 10729 stp->st_score = (stp->st_score * 3 10730 + stp->st_altscore) / 4; 10731 stp->st_salscore = FALSE; 10732 } 10733 break; 10734 } 10735 } 10736 10737 /* Add the alternate score to su_sga. */ 10738 for (i = 0; i < su->su_sga.ga_len; ++i) 10739 { 10740 stp = &SUG(su->su_sga, i); 10741 stp->st_altscore = spell_edit_score(su->su_badword, stp->st_word); 10742 if (stp->st_score == SCORE_MAXMAX) 10743 stp->st_score = (SCORE_BIG * 7 + stp->st_altscore) / 8; 10744 else 10745 stp->st_score = (stp->st_score * 7 + stp->st_altscore) / 8; 10746 stp->st_salscore = TRUE; 10747 } 10748 10749 /* Sort the suggestions and truncate at "maxcount" for both lists. */ 10750 (void)cleanup_suggestions(&su->su_ga, su->su_maxscore, su->su_maxcount); 10751 (void)cleanup_suggestions(&su->su_sga, su->su_maxscore, su->su_maxcount); 10752 10753 ga_init2(&ga, (int)sizeof(suginfo_T), 1); 10754 if (ga_grow(&ga, su->su_ga.ga_len + su->su_sga.ga_len) == FAIL) 10755 return; 10756 10757 stp = &SUG(ga, 0); 10758 for (i = 0; i < su->su_ga.ga_len || i < su->su_sga.ga_len; ++i) 10759 { 10760 /* round 1: get a suggestion from su_ga 10761 * round 2: get a suggestion from su_sga */ 10762 for (round = 1; round <= 2; ++round) 10763 { 10764 gap = round == 1 ? &su->su_ga : &su->su_sga; 10765 if (i < gap->ga_len) 10766 { 10767 /* Don't add a word if it's already there. */ 10768 p = SUG(*gap, i).st_word; 10769 for (j = 0; j < ga.ga_len; ++j) 10770 if (STRCMP(stp[j].st_word, p) == 0) 10771 break; 10772 if (j == ga.ga_len) 10773 stp[ga.ga_len++] = SUG(*gap, i); 10774 else 10775 vim_free(p); 10776 } 10777 } 10778 } 10779 10780 ga_clear(&su->su_ga); 10781 ga_clear(&su->su_sga); 10782 10783 /* Truncate the list to the number of suggestions that will be displayed. */ 10784 if (ga.ga_len > su->su_maxcount) 10785 { 10786 for (i = su->su_maxcount; i < ga.ga_len; ++i) 10787 vim_free(stp[i].st_word); 10788 ga.ga_len = su->su_maxcount; 10789 } 10790 10791 su->su_ga = ga; 10792 } 10793 10794 /* 10795 * For the goodword in "stp" compute the soundalike score compared to the 10796 * badword. 10797 */ 10798 static int 10799 stp_sal_score(stp, su, slang, badsound) 10800 suggest_T *stp; 10801 suginfo_T *su; 10802 slang_T *slang; 10803 char_u *badsound; /* sound-folded badword */ 10804 { 10805 char_u *p; 10806 char_u *pbad; 10807 char_u *pgood; 10808 char_u badsound2[MAXWLEN]; 10809 char_u fword[MAXWLEN]; 10810 char_u goodsound[MAXWLEN]; 10811 char_u goodword[MAXWLEN]; 10812 int lendiff; 10813 10814 lendiff = (int)(su->su_badlen - stp->st_orglen); 10815 if (lendiff >= 0) 10816 pbad = badsound; 10817 else 10818 { 10819 /* soundfold the bad word with more characters following */ 10820 (void)spell_casefold(su->su_badptr, stp->st_orglen, fword, MAXWLEN); 10821 10822 /* When joining two words the sound often changes a lot. E.g., "t he" 10823 * sounds like "t h" while "the" sounds like "@". Avoid that by 10824 * removing the space. Don't do it when the good word also contains a 10825 * space. */ 10826 if (vim_iswhite(su->su_badptr[su->su_badlen]) 10827 && *skiptowhite(stp->st_word) == NUL) 10828 for (p = fword; *(p = skiptowhite(p)) != NUL; ) 10829 mch_memmove(p, p + 1, STRLEN(p)); 10830 10831 spell_soundfold(slang, fword, TRUE, badsound2); 10832 pbad = badsound2; 10833 } 10834 10835 if (lendiff > 0) 10836 { 10837 /* Add part of the bad word to the good word, so that we soundfold 10838 * what replaces the bad word. */ 10839 STRCPY(goodword, stp->st_word); 10840 STRNCAT(goodword, su->su_badptr + su->su_badlen - lendiff, lendiff); 10841 pgood = goodword; 10842 } 10843 else 10844 pgood = stp->st_word; 10845 10846 /* Sound-fold the word and compute the score for the difference. */ 10847 spell_soundfold(slang, pgood, FALSE, goodsound); 10848 10849 return soundalike_score(goodsound, pbad); 10850 } 10851 10852 /* 10853 * Find suggestions by comparing the word in a sound-a-like form. 10854 * Note: This doesn't support postponed prefixes. 10855 */ 10856 static void 10857 suggest_try_soundalike(su) 10858 suginfo_T *su; 10859 { 10860 char_u salword[MAXWLEN]; 10861 char_u tword[MAXWLEN]; 10862 char_u tsalword[MAXWLEN]; 10863 idx_T arridx[MAXWLEN]; 10864 int curi[MAXWLEN]; 10865 langp_T *lp; 10866 char_u *byts; 10867 idx_T *idxs; 10868 int depth; 10869 int c; 10870 idx_T n; 10871 int round; 10872 int flags; 10873 int sound_score; 10874 int local_score; 10875 int lpi; 10876 slang_T *slang; 10877 10878 /* Do this for all languages that support sound folding. */ 10879 for (lpi = 0; lpi < curbuf->b_langp.ga_len; ++lpi) 10880 { 10881 lp = LANGP_ENTRY(curbuf->b_langp, lpi); 10882 slang = lp->lp_slang; 10883 if (slang->sl_sal.ga_len > 0) 10884 { 10885 /* soundfold the bad word */ 10886 spell_soundfold(slang, su->su_fbadword, TRUE, salword); 10887 10888 /* 10889 * Go through the whole tree, soundfold each word and compare. 10890 * round 1: use the case-folded tree. 10891 * round 2: use the keep-case tree. 10892 */ 10893 for (round = 1; round <= 2; ++round) 10894 { 10895 if (round == 1) 10896 { 10897 byts = slang->sl_fbyts; 10898 idxs = slang->sl_fidxs; 10899 } 10900 else 10901 { 10902 byts = slang->sl_kbyts; 10903 idxs = slang->sl_kidxs; 10904 if (byts == NULL) /* no keep-case words */ 10905 continue; 10906 } 10907 10908 depth = 0; 10909 arridx[0] = 0; 10910 curi[0] = 1; 10911 while (depth >= 0 && !got_int) 10912 { 10913 if (curi[depth] > byts[arridx[depth]]) 10914 { 10915 /* Done all bytes at this node, go up one level. */ 10916 --depth; 10917 line_breakcheck(); 10918 } 10919 else 10920 { 10921 /* Do one more byte at this node. */ 10922 n = arridx[depth] + curi[depth]; 10923 ++curi[depth]; 10924 c = byts[n]; 10925 if (c == 0) 10926 { 10927 /* End of word, deal with the word. */ 10928 flags = (int)idxs[n]; 10929 if (round == 2 || (flags & WF_KEEPCAP) == 0) 10930 { 10931 tword[depth] = NUL; 10932 /* Sound-fold. Only in keep-case tree need to 10933 * case-fold the word. */ 10934 spell_soundfold(slang, tword, 10935 round == 1, tsalword); 10936 10937 /* Compute the edit distance between the 10938 * sound-a-like words. */ 10939 sound_score = soundalike_score(salword, 10940 tsalword); 10941 10942 /* Add a penalty for words in another region. */ 10943 if ((flags & WF_REGION) && (((unsigned)flags 10944 >> 16) & lp->lp_region) == 0) 10945 local_score = SCORE_REGION; 10946 else 10947 local_score = 0; 10948 sound_score += local_score; 10949 10950 if (sound_score < SCORE_MAXMAX) 10951 { 10952 char_u cword[MAXWLEN]; 10953 char_u *p; 10954 int score; 10955 10956 flags |= su->su_badflags; 10957 if (round == 1 && (flags & WF_CAPMASK) != 0) 10958 { 10959 /* Need to fix case according to 10960 * "flags". */ 10961 make_case_word(tword, cword, flags); 10962 p = cword; 10963 } 10964 else 10965 p = tword; 10966 10967 if (sps_flags & SPS_DOUBLE) 10968 add_suggestion(su, &su->su_sga, p, 10969 su->su_badlen, 10970 sound_score, 0, FALSE, 10971 lp->lp_sallang); 10972 else 10973 { 10974 /* Compute the score. */ 10975 score = spell_edit_score( 10976 su->su_badword, p) 10977 + local_score; 10978 if (sps_flags & SPS_BEST) 10979 /* give a bonus for the good word 10980 * sounding the same as the bad 10981 * word */ 10982 add_suggestion(su, &su->su_ga, p, 10983 su->su_badlen, 10984 RESCORE(score, sound_score), 10985 sound_score, TRUE, 10986 lp->lp_sallang); 10987 else 10988 add_suggestion(su, &su->su_ga, p, 10989 su->su_badlen, 10990 score + sound_score, 10991 0, FALSE, 10992 lp->lp_sallang); 10993 } 10994 } 10995 } 10996 10997 /* Skip over other NUL bytes. */ 10998 while (byts[n + 1] == 0) 10999 { 11000 ++n; 11001 ++curi[depth]; 11002 } 11003 } 11004 else 11005 { 11006 /* Normal char, go one level deeper. */ 11007 tword[depth++] = c; 11008 arridx[depth] = idxs[n]; 11009 curi[depth] = 1; 11010 } 11011 } 11012 } 11013 } 11014 } 11015 } 11016 } 11017 11018 /* 11019 * Copy "fword" to "cword", fixing case according to "flags". 11020 */ 11021 static void 11022 make_case_word(fword, cword, flags) 11023 char_u *fword; 11024 char_u *cword; 11025 int flags; 11026 { 11027 if (flags & WF_ALLCAP) 11028 /* Make it all upper-case */ 11029 allcap_copy(fword, cword); 11030 else if (flags & WF_ONECAP) 11031 /* Make the first letter upper-case */ 11032 onecap_copy(fword, cword, TRUE); 11033 else 11034 /* Use goodword as-is. */ 11035 STRCPY(cword, fword); 11036 } 11037 11038 /* 11039 * Use map string "map" for languages "lp". 11040 */ 11041 static void 11042 set_map_str(lp, map) 11043 slang_T *lp; 11044 char_u *map; 11045 { 11046 char_u *p; 11047 int headc = 0; 11048 int c; 11049 int i; 11050 11051 if (*map == NUL) 11052 { 11053 lp->sl_has_map = FALSE; 11054 return; 11055 } 11056 lp->sl_has_map = TRUE; 11057 11058 /* Init the array and hash table empty. */ 11059 for (i = 0; i < 256; ++i) 11060 lp->sl_map_array[i] = 0; 11061 #ifdef FEAT_MBYTE 11062 hash_init(&lp->sl_map_hash); 11063 #endif 11064 11065 /* 11066 * The similar characters are stored separated with slashes: 11067 * "aaa/bbb/ccc/". Fill sl_map_array[c] with the character before c and 11068 * before the same slash. For characters above 255 sl_map_hash is used. 11069 */ 11070 for (p = map; *p != NUL; ) 11071 { 11072 #ifdef FEAT_MBYTE 11073 c = mb_cptr2char_adv(&p); 11074 #else 11075 c = *p++; 11076 #endif 11077 if (c == '/') 11078 headc = 0; 11079 else 11080 { 11081 if (headc == 0) 11082 headc = c; 11083 11084 #ifdef FEAT_MBYTE 11085 /* Characters above 255 don't fit in sl_map_array[], put them in 11086 * the hash table. Each entry is the char, a NUL the headchar and 11087 * a NUL. */ 11088 if (c >= 256) 11089 { 11090 int cl = mb_char2len(c); 11091 int headcl = mb_char2len(headc); 11092 char_u *b; 11093 hash_T hash; 11094 hashitem_T *hi; 11095 11096 b = alloc((unsigned)(cl + headcl + 2)); 11097 if (b == NULL) 11098 return; 11099 mb_char2bytes(c, b); 11100 b[cl] = NUL; 11101 mb_char2bytes(headc, b + cl + 1); 11102 b[cl + 1 + headcl] = NUL; 11103 hash = hash_hash(b); 11104 hi = hash_lookup(&lp->sl_map_hash, b, hash); 11105 if (HASHITEM_EMPTY(hi)) 11106 hash_add_item(&lp->sl_map_hash, hi, b, hash); 11107 else 11108 { 11109 /* This should have been checked when generating the .spl 11110 * file. */ 11111 EMSG(_("E999: duplicate char in MAP entry")); 11112 vim_free(b); 11113 } 11114 } 11115 else 11116 #endif 11117 lp->sl_map_array[c] = headc; 11118 } 11119 } 11120 } 11121 11122 /* 11123 * Return TRUE if "c1" and "c2" are similar characters according to the MAP 11124 * lines in the .aff file. 11125 */ 11126 static int 11127 similar_chars(slang, c1, c2) 11128 slang_T *slang; 11129 int c1; 11130 int c2; 11131 { 11132 int m1, m2; 11133 #ifdef FEAT_MBYTE 11134 char_u buf[MB_MAXBYTES]; 11135 hashitem_T *hi; 11136 11137 if (c1 >= 256) 11138 { 11139 buf[mb_char2bytes(c1, buf)] = 0; 11140 hi = hash_find(&slang->sl_map_hash, buf); 11141 if (HASHITEM_EMPTY(hi)) 11142 m1 = 0; 11143 else 11144 m1 = mb_ptr2char(hi->hi_key + STRLEN(hi->hi_key) + 1); 11145 } 11146 else 11147 #endif 11148 m1 = slang->sl_map_array[c1]; 11149 if (m1 == 0) 11150 return FALSE; 11151 11152 11153 #ifdef FEAT_MBYTE 11154 if (c2 >= 256) 11155 { 11156 buf[mb_char2bytes(c2, buf)] = 0; 11157 hi = hash_find(&slang->sl_map_hash, buf); 11158 if (HASHITEM_EMPTY(hi)) 11159 m2 = 0; 11160 else 11161 m2 = mb_ptr2char(hi->hi_key + STRLEN(hi->hi_key) + 1); 11162 } 11163 else 11164 #endif 11165 m2 = slang->sl_map_array[c2]; 11166 11167 return m1 == m2; 11168 } 11169 11170 /* 11171 * Add a suggestion to the list of suggestions. 11172 * Do not add a duplicate suggestion or suggestions with a bad score. 11173 * When "use_score" is not zero it's used, otherwise the score is computed 11174 * with spell_edit_score(). 11175 */ 11176 static void 11177 add_suggestion(su, gap, goodword, badlenarg, score, altscore, had_bonus, slang) 11178 suginfo_T *su; 11179 garray_T *gap; 11180 char_u *goodword; 11181 int badlenarg; /* len of bad word replaced with "goodword" */ 11182 int score; 11183 int altscore; 11184 int had_bonus; /* value for st_had_bonus */ 11185 slang_T *slang; /* language for sound folding */ 11186 { 11187 int goodlen = STRLEN(goodword); /* len of goodword changed */ 11188 int badlen = badlenarg; /* len of bad word changed */ 11189 suggest_T *stp; 11190 suggest_T new_sug; 11191 int i; 11192 hlf_T attr = HLF_COUNT; 11193 char_u longword[MAXWLEN + 1]; 11194 char_u *pgood, *pbad; 11195 11196 /* Check that the word really is valid. Esp. for banned words and for 11197 * split words, such as "the the". Need to append what follows to check 11198 * for that. */ 11199 STRCPY(longword, goodword); 11200 vim_strncpy(longword + goodlen, su->su_badptr + badlen, MAXWLEN - goodlen); 11201 (void)spell_check(curwin, longword, &attr, NULL); 11202 if (attr != HLF_COUNT) 11203 return; 11204 11205 /* Minimize "badlen" for consistency. Avoids that changing "the the" to 11206 * "thee the" is added next to changing the first "the" the "thee". */ 11207 pgood = goodword + STRLEN(goodword); 11208 pbad = su->su_badptr + badlen; 11209 while (pgood > goodword && pbad > su->su_badptr) 11210 { 11211 mb_ptr_back(goodword, pgood); 11212 mb_ptr_back(su->su_badptr, pbad); 11213 #ifdef FEAT_MBYTE 11214 if (has_mbyte) 11215 { 11216 if (mb_ptr2char(pgood) != mb_ptr2char(pbad)) 11217 break; 11218 } 11219 else 11220 #endif 11221 if (*pgood != *pbad) 11222 break; 11223 badlen = pbad - su->su_badptr; 11224 goodlen = pgood - goodword; 11225 } 11226 if (badlen == 0 && goodlen == 0) 11227 /* goodword doesn't change anything; may happen for "the the" changing 11228 * the first "the" to itself. */ 11229 return; 11230 11231 if (score <= su->su_maxscore) 11232 { 11233 /* Check if the word is already there. Also check the length that is 11234 * being replaced "thes," -> "these" is a different suggestion from 11235 * "thes" -> "these". */ 11236 stp = &SUG(*gap, 0); 11237 for (i = gap->ga_len - 1; i >= 0; --i) 11238 if ((int)STRLEN(stp[i].st_word) == goodlen 11239 && STRNCMP(stp[i].st_word, goodword, goodlen) == 0 11240 && stp[i].st_orglen == badlen) 11241 { 11242 /* 11243 * Found it. Remember the word with the lowest score. 11244 */ 11245 if (stp[i].st_slang == NULL) 11246 stp[i].st_slang = slang; 11247 11248 new_sug.st_score = score; 11249 new_sug.st_altscore = altscore; 11250 new_sug.st_had_bonus = had_bonus; 11251 11252 if (stp[i].st_had_bonus != had_bonus) 11253 { 11254 /* Only one of the two had the soundalike score computed. 11255 * Need to do that for the other one now, otherwise the 11256 * scores can't be compared. This happens because 11257 * suggest_try_change() doesn't compute the soundalike 11258 * word to keep it fast, while some special methods set 11259 * the soundalike score to zero. */ 11260 if (had_bonus) 11261 rescore_one(su, &stp[i]); 11262 else 11263 { 11264 new_sug.st_word = goodword; 11265 new_sug.st_slang = stp[i].st_slang; 11266 new_sug.st_orglen = badlen; 11267 rescore_one(su, &new_sug); 11268 } 11269 } 11270 11271 if (stp[i].st_score > new_sug.st_score) 11272 { 11273 stp[i].st_score = new_sug.st_score; 11274 stp[i].st_altscore = new_sug.st_altscore; 11275 stp[i].st_had_bonus = new_sug.st_had_bonus; 11276 } 11277 break; 11278 } 11279 11280 if (i < 0 && ga_grow(gap, 1) == OK) 11281 { 11282 /* Add a suggestion. */ 11283 stp = &SUG(*gap, gap->ga_len); 11284 stp->st_word = vim_strnsave(goodword, goodlen); 11285 if (stp->st_word != NULL) 11286 { 11287 stp->st_score = score; 11288 stp->st_altscore = altscore; 11289 stp->st_had_bonus = had_bonus; 11290 stp->st_orglen = badlen; 11291 stp->st_slang = slang; 11292 ++gap->ga_len; 11293 11294 /* If we have too many suggestions now, sort the list and keep 11295 * the best suggestions. */ 11296 if (gap->ga_len > SUG_MAX_COUNT(su)) 11297 su->su_maxscore = cleanup_suggestions(gap, su->su_maxscore, 11298 SUG_CLEAN_COUNT(su)); 11299 } 11300 } 11301 } 11302 } 11303 11304 /* 11305 * Add a word to be banned. 11306 */ 11307 static void 11308 add_banned(su, word) 11309 suginfo_T *su; 11310 char_u *word; 11311 { 11312 char_u *s = vim_strsave(word); 11313 hash_T hash; 11314 hashitem_T *hi; 11315 11316 if (s != NULL) 11317 { 11318 hash = hash_hash(s); 11319 hi = hash_lookup(&su->su_banned, s, hash); 11320 if (HASHITEM_EMPTY(hi)) 11321 hash_add_item(&su->su_banned, hi, s, hash); 11322 else 11323 vim_free(s); 11324 } 11325 } 11326 11327 /* 11328 * Return TRUE if a word appears in the list of banned words. 11329 */ 11330 static int 11331 was_banned(su, word) 11332 suginfo_T *su; 11333 char_u *word; 11334 { 11335 hashitem_T *hi = hash_find(&su->su_banned, word); 11336 11337 return !HASHITEM_EMPTY(hi); 11338 } 11339 11340 /* 11341 * Free the banned words in "su". 11342 */ 11343 static void 11344 free_banned(su) 11345 suginfo_T *su; 11346 { 11347 int todo; 11348 hashitem_T *hi; 11349 11350 todo = su->su_banned.ht_used; 11351 for (hi = su->su_banned.ht_array; todo > 0; ++hi) 11352 { 11353 if (!HASHITEM_EMPTY(hi)) 11354 { 11355 vim_free(hi->hi_key); 11356 --todo; 11357 } 11358 } 11359 hash_clear(&su->su_banned); 11360 } 11361 11362 /* 11363 * Recompute the score for all suggestions if sound-folding is possible. This 11364 * is slow, thus only done for the final results. 11365 */ 11366 static void 11367 rescore_suggestions(su) 11368 suginfo_T *su; 11369 { 11370 int i; 11371 11372 if (su->su_sallang != NULL) 11373 for (i = 0; i < su->su_ga.ga_len; ++i) 11374 rescore_one(su, &SUG(su->su_ga, i)); 11375 } 11376 11377 /* 11378 * Recompute the score for one suggestion if sound-folding is possible. 11379 */ 11380 static void 11381 rescore_one(su, stp) 11382 suginfo_T *su; 11383 suggest_T *stp; 11384 { 11385 slang_T *slang = stp->st_slang; 11386 char_u sal_badword[MAXWLEN]; 11387 char_u *p; 11388 11389 /* Only rescore suggestions that have no sal score yet and do have a 11390 * language. */ 11391 if (slang != NULL && slang->sl_sal.ga_len > 0 && !stp->st_had_bonus) 11392 { 11393 if (slang == su->su_sallang) 11394 p = su->su_sal_badword; 11395 else 11396 { 11397 spell_soundfold(slang, su->su_fbadword, TRUE, sal_badword); 11398 p = sal_badword; 11399 } 11400 11401 stp->st_altscore = stp_sal_score(stp, su, slang, p); 11402 if (stp->st_altscore == SCORE_MAXMAX) 11403 stp->st_altscore = SCORE_BIG; 11404 stp->st_score = RESCORE(stp->st_score, stp->st_altscore); 11405 stp->st_had_bonus = TRUE; 11406 } 11407 } 11408 11409 static int 11410 #ifdef __BORLANDC__ 11411 _RTLENTRYF 11412 #endif 11413 sug_compare __ARGS((const void *s1, const void *s2)); 11414 11415 /* 11416 * Function given to qsort() to sort the suggestions on st_score. 11417 * First on "st_score", then "st_altscore" then alphabetically. 11418 */ 11419 static int 11420 #ifdef __BORLANDC__ 11421 _RTLENTRYF 11422 #endif 11423 sug_compare(s1, s2) 11424 const void *s1; 11425 const void *s2; 11426 { 11427 suggest_T *p1 = (suggest_T *)s1; 11428 suggest_T *p2 = (suggest_T *)s2; 11429 int n = p1->st_score - p2->st_score; 11430 11431 if (n == 0) 11432 { 11433 n = p1->st_altscore - p2->st_altscore; 11434 if (n == 0) 11435 n = STRICMP(p1->st_word, p2->st_word); 11436 } 11437 return n; 11438 } 11439 11440 /* 11441 * Cleanup the suggestions: 11442 * - Sort on score. 11443 * - Remove words that won't be displayed. 11444 * Returns the maximum score in the list or "maxscore" unmodified. 11445 */ 11446 static int 11447 cleanup_suggestions(gap, maxscore, keep) 11448 garray_T *gap; 11449 int maxscore; 11450 int keep; /* nr of suggestions to keep */ 11451 { 11452 suggest_T *stp = &SUG(*gap, 0); 11453 int i; 11454 11455 /* Sort the list. */ 11456 qsort(gap->ga_data, (size_t)gap->ga_len, sizeof(suggest_T), sug_compare); 11457 11458 /* Truncate the list to the number of suggestions that will be displayed. */ 11459 if (gap->ga_len > keep) 11460 { 11461 for (i = keep; i < gap->ga_len; ++i) 11462 vim_free(stp[i].st_word); 11463 gap->ga_len = keep; 11464 return stp[keep - 1].st_score; 11465 } 11466 return maxscore; 11467 } 11468 11469 #if defined(FEAT_EVAL) || defined(PROTO) 11470 /* 11471 * Soundfold a string, for soundfold(). 11472 * Result is in allocated memory, NULL for an error. 11473 */ 11474 char_u * 11475 eval_soundfold(word) 11476 char_u *word; 11477 { 11478 langp_T *lp; 11479 char_u sound[MAXWLEN]; 11480 int lpi; 11481 11482 if (curwin->w_p_spell && *curbuf->b_p_spl != NUL) 11483 /* Use the sound-folding of the first language that supports it. */ 11484 for (lpi = 0; lpi < curbuf->b_langp.ga_len; ++lpi) 11485 { 11486 lp = LANGP_ENTRY(curbuf->b_langp, lpi); 11487 if (lp->lp_slang->sl_sal.ga_len > 0) 11488 { 11489 /* soundfold the word */ 11490 spell_soundfold(lp->lp_slang, word, FALSE, sound); 11491 return vim_strsave(sound); 11492 } 11493 } 11494 11495 /* No language with sound folding, return word as-is. */ 11496 return vim_strsave(word); 11497 } 11498 #endif 11499 11500 /* 11501 * Turn "inword" into its sound-a-like equivalent in "res[MAXWLEN]". 11502 * 11503 * There are many ways to turn a word into a sound-a-like representation. The 11504 * oldest is Soundex (1918!). A nice overview can be found in "Approximate 11505 * swedish name matching - survey and test of different algorithms" by Klas 11506 * Erikson. 11507 * 11508 * We support two methods: 11509 * 1. SOFOFROM/SOFOTO do a simple character mapping. 11510 * 2. SAL items define a more advanced sound-folding (and much slower). 11511 */ 11512 static void 11513 spell_soundfold(slang, inword, folded, res) 11514 slang_T *slang; 11515 char_u *inword; 11516 int folded; /* "inword" is already case-folded */ 11517 char_u *res; 11518 { 11519 char_u fword[MAXWLEN]; 11520 char_u *word; 11521 11522 if (slang->sl_sofo) 11523 /* SOFOFROM and SOFOTO used */ 11524 spell_soundfold_sofo(slang, inword, res); 11525 else 11526 { 11527 /* SAL items used. Requires the word to be case-folded. */ 11528 if (folded) 11529 word = inword; 11530 else 11531 { 11532 (void)spell_casefold(inword, STRLEN(inword), fword, MAXWLEN); 11533 word = fword; 11534 } 11535 11536 #ifdef FEAT_MBYTE 11537 if (has_mbyte) 11538 spell_soundfold_wsal(slang, word, res); 11539 else 11540 #endif 11541 spell_soundfold_sal(slang, word, res); 11542 } 11543 } 11544 11545 /* 11546 * Perform sound folding of "inword" into "res" according to SOFOFROM and 11547 * SOFOTO lines. 11548 */ 11549 static void 11550 spell_soundfold_sofo(slang, inword, res) 11551 slang_T *slang; 11552 char_u *inword; 11553 char_u *res; 11554 { 11555 char_u *s; 11556 int ri = 0; 11557 int c; 11558 11559 #ifdef FEAT_MBYTE 11560 if (has_mbyte) 11561 { 11562 int prevc = 0; 11563 int *ip; 11564 11565 /* The sl_sal_first[] table contains the translation for chars up to 11566 * 255, sl_sal the rest. */ 11567 for (s = inword; *s != NUL; ) 11568 { 11569 c = mb_cptr2char_adv(&s); 11570 if (enc_utf8 ? utf_class(c) == 0 : vim_iswhite(c)) 11571 c = ' '; 11572 else if (c < 256) 11573 c = slang->sl_sal_first[c]; 11574 else 11575 { 11576 ip = ((int **)slang->sl_sal.ga_data)[c & 0xff]; 11577 if (ip == NULL) /* empty list, can't match */ 11578 c = NUL; 11579 else 11580 for (;;) /* find "c" in the list */ 11581 { 11582 if (*ip == 0) /* not found */ 11583 { 11584 c = NUL; 11585 break; 11586 } 11587 if (*ip == c) /* match! */ 11588 { 11589 c = ip[1]; 11590 break; 11591 } 11592 ip += 2; 11593 } 11594 } 11595 11596 if (c != NUL && c != prevc) 11597 { 11598 ri += mb_char2bytes(c, res + ri); 11599 if (ri + MB_MAXBYTES > MAXWLEN) 11600 break; 11601 prevc = c; 11602 } 11603 } 11604 } 11605 else 11606 #endif 11607 { 11608 /* The sl_sal_first[] table contains the translation. */ 11609 for (s = inword; (c = *s) != NUL; ++s) 11610 { 11611 if (vim_iswhite(c)) 11612 c = ' '; 11613 else 11614 c = slang->sl_sal_first[c]; 11615 if (c != NUL && (ri == 0 || res[ri - 1] != c)) 11616 res[ri++] = c; 11617 } 11618 } 11619 11620 res[ri] = NUL; 11621 } 11622 11623 static void 11624 spell_soundfold_sal(slang, inword, res) 11625 slang_T *slang; 11626 char_u *inword; 11627 char_u *res; 11628 { 11629 salitem_T *smp; 11630 char_u word[MAXWLEN]; 11631 char_u *s = inword; 11632 char_u *t; 11633 char_u *pf; 11634 int i, j, z; 11635 int reslen; 11636 int n, k = 0; 11637 int z0; 11638 int k0; 11639 int n0; 11640 int c; 11641 int pri; 11642 int p0 = -333; 11643 int c0; 11644 11645 /* Remove accents, if wanted. We actually remove all non-word characters. 11646 * But keep white space. We need a copy, the word may be changed here. */ 11647 if (slang->sl_rem_accents) 11648 { 11649 t = word; 11650 while (*s != NUL) 11651 { 11652 if (vim_iswhite(*s)) 11653 { 11654 *t++ = ' '; 11655 s = skipwhite(s); 11656 } 11657 else 11658 { 11659 if (spell_iswordp_nmw(s)) 11660 *t++ = *s; 11661 ++s; 11662 } 11663 } 11664 *t = NUL; 11665 } 11666 else 11667 STRCPY(word, s); 11668 11669 smp = (salitem_T *)slang->sl_sal.ga_data; 11670 11671 /* 11672 * This comes from Aspell phonet.cpp. Converted from C++ to C. 11673 * Changed to keep spaces. 11674 */ 11675 i = reslen = z = 0; 11676 while ((c = word[i]) != NUL) 11677 { 11678 /* Start with the first rule that has the character in the word. */ 11679 n = slang->sl_sal_first[c]; 11680 z0 = 0; 11681 11682 if (n >= 0) 11683 { 11684 /* check all rules for the same letter */ 11685 for (; (s = smp[n].sm_lead)[0] == c; ++n) 11686 { 11687 /* Quickly skip entries that don't match the word. Most 11688 * entries are less then three chars, optimize for that. */ 11689 k = smp[n].sm_leadlen; 11690 if (k > 1) 11691 { 11692 if (word[i + 1] != s[1]) 11693 continue; 11694 if (k > 2) 11695 { 11696 for (j = 2; j < k; ++j) 11697 if (word[i + j] != s[j]) 11698 break; 11699 if (j < k) 11700 continue; 11701 } 11702 } 11703 11704 if ((pf = smp[n].sm_oneof) != NULL) 11705 { 11706 /* Check for match with one of the chars in "sm_oneof". */ 11707 while (*pf != NUL && *pf != word[i + k]) 11708 ++pf; 11709 if (*pf == NUL) 11710 continue; 11711 ++k; 11712 } 11713 s = smp[n].sm_rules; 11714 pri = 5; /* default priority */ 11715 11716 p0 = *s; 11717 k0 = k; 11718 while (*s == '-' && k > 1) 11719 { 11720 k--; 11721 s++; 11722 } 11723 if (*s == '<') 11724 s++; 11725 if (VIM_ISDIGIT(*s)) 11726 { 11727 /* determine priority */ 11728 pri = *s - '0'; 11729 s++; 11730 } 11731 if (*s == '^' && *(s + 1) == '^') 11732 s++; 11733 11734 if (*s == NUL 11735 || (*s == '^' 11736 && (i == 0 || !(word[i - 1] == ' ' 11737 || spell_iswordp(word + i - 1, curbuf))) 11738 && (*(s + 1) != '$' 11739 || (!spell_iswordp(word + i + k0, curbuf)))) 11740 || (*s == '$' && i > 0 11741 && spell_iswordp(word + i - 1, curbuf) 11742 && (!spell_iswordp(word + i + k0, curbuf)))) 11743 { 11744 /* search for followup rules, if: */ 11745 /* followup and k > 1 and NO '-' in searchstring */ 11746 c0 = word[i + k - 1]; 11747 n0 = slang->sl_sal_first[c0]; 11748 11749 if (slang->sl_followup && k > 1 && n0 >= 0 11750 && p0 != '-' && word[i + k] != NUL) 11751 { 11752 /* test follow-up rule for "word[i + k]" */ 11753 for ( ; (s = smp[n0].sm_lead)[0] == c0; ++n0) 11754 { 11755 /* Quickly skip entries that don't match the word. 11756 * */ 11757 k0 = smp[n0].sm_leadlen; 11758 if (k0 > 1) 11759 { 11760 if (word[i + k] != s[1]) 11761 continue; 11762 if (k0 > 2) 11763 { 11764 pf = word + i + k + 1; 11765 for (j = 2; j < k0; ++j) 11766 if (*pf++ != s[j]) 11767 break; 11768 if (j < k0) 11769 continue; 11770 } 11771 } 11772 k0 += k - 1; 11773 11774 if ((pf = smp[n0].sm_oneof) != NULL) 11775 { 11776 /* Check for match with one of the chars in 11777 * "sm_oneof". */ 11778 while (*pf != NUL && *pf != word[i + k0]) 11779 ++pf; 11780 if (*pf == NUL) 11781 continue; 11782 ++k0; 11783 } 11784 11785 p0 = 5; 11786 s = smp[n0].sm_rules; 11787 while (*s == '-') 11788 { 11789 /* "k0" gets NOT reduced because 11790 * "if (k0 == k)" */ 11791 s++; 11792 } 11793 if (*s == '<') 11794 s++; 11795 if (VIM_ISDIGIT(*s)) 11796 { 11797 p0 = *s - '0'; 11798 s++; 11799 } 11800 11801 if (*s == NUL 11802 /* *s == '^' cuts */ 11803 || (*s == '$' 11804 && !spell_iswordp(word + i + k0, 11805 curbuf))) 11806 { 11807 if (k0 == k) 11808 /* this is just a piece of the string */ 11809 continue; 11810 11811 if (p0 < pri) 11812 /* priority too low */ 11813 continue; 11814 /* rule fits; stop search */ 11815 break; 11816 } 11817 } 11818 11819 if (p0 >= pri && smp[n0].sm_lead[0] == c0) 11820 continue; 11821 } 11822 11823 /* replace string */ 11824 s = smp[n].sm_to; 11825 if (s == NULL) 11826 s = (char_u *)""; 11827 pf = smp[n].sm_rules; 11828 p0 = (vim_strchr(pf, '<') != NULL) ? 1 : 0; 11829 if (p0 == 1 && z == 0) 11830 { 11831 /* rule with '<' is used */ 11832 if (reslen > 0 && *s != NUL && (res[reslen - 1] == c 11833 || res[reslen - 1] == *s)) 11834 reslen--; 11835 z0 = 1; 11836 z = 1; 11837 k0 = 0; 11838 while (*s != NUL && word[i + k0] != NUL) 11839 { 11840 word[i + k0] = *s; 11841 k0++; 11842 s++; 11843 } 11844 if (k > k0) 11845 mch_memmove(word + i + k0, word + i + k, 11846 STRLEN(word + i + k) + 1); 11847 11848 /* new "actual letter" */ 11849 c = word[i]; 11850 } 11851 else 11852 { 11853 /* no '<' rule used */ 11854 i += k - 1; 11855 z = 0; 11856 while (*s != NUL && s[1] != NUL && reslen < MAXWLEN) 11857 { 11858 if (reslen == 0 || res[reslen - 1] != *s) 11859 res[reslen++] = *s; 11860 s++; 11861 } 11862 /* new "actual letter" */ 11863 c = *s; 11864 if (strstr((char *)pf, "^^") != NULL) 11865 { 11866 if (c != NUL) 11867 res[reslen++] = c; 11868 mch_memmove(word, word + i + 1, 11869 STRLEN(word + i + 1) + 1); 11870 i = 0; 11871 z0 = 1; 11872 } 11873 } 11874 break; 11875 } 11876 } 11877 } 11878 else if (vim_iswhite(c)) 11879 { 11880 c = ' '; 11881 k = 1; 11882 } 11883 11884 if (z0 == 0) 11885 { 11886 if (k && !p0 && reslen < MAXWLEN && c != NUL 11887 && (!slang->sl_collapse || reslen == 0 11888 || res[reslen - 1] != c)) 11889 /* condense only double letters */ 11890 res[reslen++] = c; 11891 11892 i++; 11893 z = 0; 11894 k = 0; 11895 } 11896 } 11897 11898 res[reslen] = NUL; 11899 } 11900 11901 #ifdef FEAT_MBYTE 11902 /* 11903 * Turn "inword" into its sound-a-like equivalent in "res[MAXWLEN]". 11904 * Multi-byte version of spell_soundfold(). 11905 */ 11906 static void 11907 spell_soundfold_wsal(slang, inword, res) 11908 slang_T *slang; 11909 char_u *inword; 11910 char_u *res; 11911 { 11912 salitem_T *smp = (salitem_T *)slang->sl_sal.ga_data; 11913 int word[MAXWLEN]; 11914 int wres[MAXWLEN]; 11915 int l; 11916 char_u *s; 11917 int *ws; 11918 char_u *t; 11919 int *pf; 11920 int i, j, z; 11921 int reslen; 11922 int n, k = 0; 11923 int z0; 11924 int k0; 11925 int n0; 11926 int c; 11927 int pri; 11928 int p0 = -333; 11929 int c0; 11930 int did_white = FALSE; 11931 11932 /* 11933 * Convert the multi-byte string to a wide-character string. 11934 * Remove accents, if wanted. We actually remove all non-word characters. 11935 * But keep white space. 11936 */ 11937 n = 0; 11938 for (s = inword; *s != NUL; ) 11939 { 11940 t = s; 11941 c = mb_cptr2char_adv(&s); 11942 if (slang->sl_rem_accents) 11943 { 11944 if (enc_utf8 ? utf_class(c) == 0 : vim_iswhite(c)) 11945 { 11946 if (did_white) 11947 continue; 11948 c = ' '; 11949 did_white = TRUE; 11950 } 11951 else 11952 { 11953 did_white = FALSE; 11954 if (!spell_iswordp_nmw(t)) 11955 continue; 11956 } 11957 } 11958 word[n++] = c; 11959 } 11960 word[n] = NUL; 11961 11962 /* 11963 * This comes from Aspell phonet.cpp. 11964 * Converted from C++ to C. Added support for multi-byte chars. 11965 * Changed to keep spaces. 11966 */ 11967 i = reslen = z = 0; 11968 while ((c = word[i]) != NUL) 11969 { 11970 /* Start with the first rule that has the character in the word. */ 11971 n = slang->sl_sal_first[c & 0xff]; 11972 z0 = 0; 11973 11974 if (n >= 0) 11975 { 11976 /* check all rules for the same index byte */ 11977 for (; ((ws = smp[n].sm_lead_w)[0] & 0xff) == (c & 0xff); ++n) 11978 { 11979 /* Quickly skip entries that don't match the word. Most 11980 * entries are less then three chars, optimize for that. */ 11981 if (c != ws[0]) 11982 continue; 11983 k = smp[n].sm_leadlen; 11984 if (k > 1) 11985 { 11986 if (word[i + 1] != ws[1]) 11987 continue; 11988 if (k > 2) 11989 { 11990 for (j = 2; j < k; ++j) 11991 if (word[i + j] != ws[j]) 11992 break; 11993 if (j < k) 11994 continue; 11995 } 11996 } 11997 11998 if ((pf = smp[n].sm_oneof_w) != NULL) 11999 { 12000 /* Check for match with one of the chars in "sm_oneof". */ 12001 while (*pf != NUL && *pf != word[i + k]) 12002 ++pf; 12003 if (*pf == NUL) 12004 continue; 12005 ++k; 12006 } 12007 s = smp[n].sm_rules; 12008 pri = 5; /* default priority */ 12009 12010 p0 = *s; 12011 k0 = k; 12012 while (*s == '-' && k > 1) 12013 { 12014 k--; 12015 s++; 12016 } 12017 if (*s == '<') 12018 s++; 12019 if (VIM_ISDIGIT(*s)) 12020 { 12021 /* determine priority */ 12022 pri = *s - '0'; 12023 s++; 12024 } 12025 if (*s == '^' && *(s + 1) == '^') 12026 s++; 12027 12028 if (*s == NUL 12029 || (*s == '^' 12030 && (i == 0 || !(word[i - 1] == ' ' 12031 || spell_iswordp_w(word + i - 1, curbuf))) 12032 && (*(s + 1) != '$' 12033 || (!spell_iswordp_w(word + i + k0, curbuf)))) 12034 || (*s == '$' && i > 0 12035 && spell_iswordp_w(word + i - 1, curbuf) 12036 && (!spell_iswordp_w(word + i + k0, curbuf)))) 12037 { 12038 /* search for followup rules, if: */ 12039 /* followup and k > 1 and NO '-' in searchstring */ 12040 c0 = word[i + k - 1]; 12041 n0 = slang->sl_sal_first[c0 & 0xff]; 12042 12043 if (slang->sl_followup && k > 1 && n0 >= 0 12044 && p0 != '-' && word[i + k] != NUL) 12045 { 12046 /* Test follow-up rule for "word[i + k]"; loop over 12047 * all entries with the same index byte. */ 12048 for ( ; ((ws = smp[n0].sm_lead_w)[0] & 0xff) 12049 == (c0 & 0xff); ++n0) 12050 { 12051 /* Quickly skip entries that don't match the word. 12052 */ 12053 if (c0 != ws[0]) 12054 continue; 12055 k0 = smp[n0].sm_leadlen; 12056 if (k0 > 1) 12057 { 12058 if (word[i + k] != ws[1]) 12059 continue; 12060 if (k0 > 2) 12061 { 12062 pf = word + i + k + 1; 12063 for (j = 2; j < k0; ++j) 12064 if (*pf++ != ws[j]) 12065 break; 12066 if (j < k0) 12067 continue; 12068 } 12069 } 12070 k0 += k - 1; 12071 12072 if ((pf = smp[n0].sm_oneof_w) != NULL) 12073 { 12074 /* Check for match with one of the chars in 12075 * "sm_oneof". */ 12076 while (*pf != NUL && *pf != word[i + k0]) 12077 ++pf; 12078 if (*pf == NUL) 12079 continue; 12080 ++k0; 12081 } 12082 12083 p0 = 5; 12084 s = smp[n0].sm_rules; 12085 while (*s == '-') 12086 { 12087 /* "k0" gets NOT reduced because 12088 * "if (k0 == k)" */ 12089 s++; 12090 } 12091 if (*s == '<') 12092 s++; 12093 if (VIM_ISDIGIT(*s)) 12094 { 12095 p0 = *s - '0'; 12096 s++; 12097 } 12098 12099 if (*s == NUL 12100 /* *s == '^' cuts */ 12101 || (*s == '$' 12102 && !spell_iswordp_w(word + i + k0, 12103 curbuf))) 12104 { 12105 if (k0 == k) 12106 /* this is just a piece of the string */ 12107 continue; 12108 12109 if (p0 < pri) 12110 /* priority too low */ 12111 continue; 12112 /* rule fits; stop search */ 12113 break; 12114 } 12115 } 12116 12117 if (p0 >= pri && (smp[n0].sm_lead_w[0] & 0xff) 12118 == (c0 & 0xff)) 12119 continue; 12120 } 12121 12122 /* replace string */ 12123 ws = smp[n].sm_to_w; 12124 s = smp[n].sm_rules; 12125 p0 = (vim_strchr(s, '<') != NULL) ? 1 : 0; 12126 if (p0 == 1 && z == 0) 12127 { 12128 /* rule with '<' is used */ 12129 if (reslen > 0 && ws != NULL && *ws != NUL 12130 && (wres[reslen - 1] == c 12131 || wres[reslen - 1] == *ws)) 12132 reslen--; 12133 z0 = 1; 12134 z = 1; 12135 k0 = 0; 12136 if (ws != NULL) 12137 while (*ws != NUL && word[i + k0] != NUL) 12138 { 12139 word[i + k0] = *ws; 12140 k0++; 12141 ws++; 12142 } 12143 if (k > k0) 12144 mch_memmove(word + i + k0, word + i + k, 12145 sizeof(int) * (STRLEN(word + i + k) + 1)); 12146 12147 /* new "actual letter" */ 12148 c = word[i]; 12149 } 12150 else 12151 { 12152 /* no '<' rule used */ 12153 i += k - 1; 12154 z = 0; 12155 if (ws != NULL) 12156 while (*ws != NUL && ws[1] != NUL 12157 && reslen < MAXWLEN) 12158 { 12159 if (reslen == 0 || wres[reslen - 1] != *ws) 12160 wres[reslen++] = *ws; 12161 ws++; 12162 } 12163 /* new "actual letter" */ 12164 if (ws == NULL) 12165 c = NUL; 12166 else 12167 c = *ws; 12168 if (strstr((char *)s, "^^") != NULL) 12169 { 12170 if (c != NUL) 12171 wres[reslen++] = c; 12172 mch_memmove(word, word + i + 1, 12173 sizeof(int) * (STRLEN(word + i + 1) + 1)); 12174 i = 0; 12175 z0 = 1; 12176 } 12177 } 12178 break; 12179 } 12180 } 12181 } 12182 else if (vim_iswhite(c)) 12183 { 12184 c = ' '; 12185 k = 1; 12186 } 12187 12188 if (z0 == 0) 12189 { 12190 if (k && !p0 && reslen < MAXWLEN && c != NUL 12191 && (!slang->sl_collapse || reslen == 0 12192 || wres[reslen - 1] != c)) 12193 /* condense only double letters */ 12194 wres[reslen++] = c; 12195 12196 i++; 12197 z = 0; 12198 k = 0; 12199 } 12200 } 12201 12202 /* Convert wide characters in "wres" to a multi-byte string in "res". */ 12203 l = 0; 12204 for (n = 0; n < reslen; ++n) 12205 { 12206 l += mb_char2bytes(wres[n], res + l); 12207 if (l + MB_MAXBYTES > MAXWLEN) 12208 break; 12209 } 12210 res[l] = NUL; 12211 } 12212 #endif 12213 12214 /* 12215 * Compute a score for two sound-a-like words. 12216 * This permits up to two inserts/deletes/swaps/etc. to keep things fast. 12217 * Instead of a generic loop we write out the code. That keeps it fast by 12218 * avoiding checks that will not be possible. 12219 */ 12220 static int 12221 soundalike_score(goodstart, badstart) 12222 char_u *goodstart; /* sound-folded good word */ 12223 char_u *badstart; /* sound-folded bad word */ 12224 { 12225 char_u *goodsound = goodstart; 12226 char_u *badsound = badstart; 12227 int goodlen; 12228 int badlen; 12229 int n; 12230 char_u *pl, *ps; 12231 char_u *pl2, *ps2; 12232 int score = 0; 12233 12234 /* adding/inserting "*" at the start (word starts with vowel) shouldn't be 12235 * counted so much, vowels halfway the word aren't counted at all. */ 12236 if ((*badsound == '*' || *goodsound == '*') && *badsound != *goodsound) 12237 { 12238 score = SCORE_DEL / 2; 12239 if (*badsound == '*') 12240 ++badsound; 12241 else 12242 ++goodsound; 12243 } 12244 12245 goodlen = STRLEN(goodsound); 12246 badlen = STRLEN(badsound); 12247 12248 /* Return quickly if the lenghts are too different to be fixed by two 12249 * changes. */ 12250 n = goodlen - badlen; 12251 if (n < -2 || n > 2) 12252 return SCORE_MAXMAX; 12253 12254 if (n > 0) 12255 { 12256 pl = goodsound; /* goodsound is longest */ 12257 ps = badsound; 12258 } 12259 else 12260 { 12261 pl = badsound; /* badsound is longest */ 12262 ps = goodsound; 12263 } 12264 12265 /* Skip over the identical part. */ 12266 while (*pl == *ps && *pl != NUL) 12267 { 12268 ++pl; 12269 ++ps; 12270 } 12271 12272 switch (n) 12273 { 12274 case -2: 12275 case 2: 12276 /* 12277 * Must delete two characters from "pl". 12278 */ 12279 ++pl; /* first delete */ 12280 while (*pl == *ps) 12281 { 12282 ++pl; 12283 ++ps; 12284 } 12285 /* strings must be equal after second delete */ 12286 if (STRCMP(pl + 1, ps) == 0) 12287 return score + SCORE_DEL * 2; 12288 12289 /* Failed to compare. */ 12290 break; 12291 12292 case -1: 12293 case 1: 12294 /* 12295 * Minimal one delete from "pl" required. 12296 */ 12297 12298 /* 1: delete */ 12299 pl2 = pl + 1; 12300 ps2 = ps; 12301 while (*pl2 == *ps2) 12302 { 12303 if (*pl2 == NUL) /* reached the end */ 12304 return score + SCORE_DEL; 12305 ++pl2; 12306 ++ps2; 12307 } 12308 12309 /* 2: delete then swap, then rest must be equal */ 12310 if (pl2[0] == ps2[1] && pl2[1] == ps2[0] 12311 && STRCMP(pl2 + 2, ps2 + 2) == 0) 12312 return score + SCORE_DEL + SCORE_SWAP; 12313 12314 /* 3: delete then substitute, then the rest must be equal */ 12315 if (STRCMP(pl2 + 1, ps2 + 1) == 0) 12316 return score + SCORE_DEL + SCORE_SUBST; 12317 12318 /* 4: first swap then delete */ 12319 if (pl[0] == ps[1] && pl[1] == ps[0]) 12320 { 12321 pl2 = pl + 2; /* swap, skip two chars */ 12322 ps2 = ps + 2; 12323 while (*pl2 == *ps2) 12324 { 12325 ++pl2; 12326 ++ps2; 12327 } 12328 /* delete a char and then strings must be equal */ 12329 if (STRCMP(pl2 + 1, ps2) == 0) 12330 return score + SCORE_SWAP + SCORE_DEL; 12331 } 12332 12333 /* 5: first substitute then delete */ 12334 pl2 = pl + 1; /* substitute, skip one char */ 12335 ps2 = ps + 1; 12336 while (*pl2 == *ps2) 12337 { 12338 ++pl2; 12339 ++ps2; 12340 } 12341 /* delete a char and then strings must be equal */ 12342 if (STRCMP(pl2 + 1, ps2) == 0) 12343 return score + SCORE_SUBST + SCORE_DEL; 12344 12345 /* Failed to compare. */ 12346 break; 12347 12348 case 0: 12349 /* 12350 * Lenghts are equal, thus changes must result in same length: An 12351 * insert is only possible in combination with a delete. 12352 * 1: check if for identical strings 12353 */ 12354 if (*pl == NUL) 12355 return score; 12356 12357 /* 2: swap */ 12358 if (pl[0] == ps[1] && pl[1] == ps[0]) 12359 { 12360 pl2 = pl + 2; /* swap, skip two chars */ 12361 ps2 = ps + 2; 12362 while (*pl2 == *ps2) 12363 { 12364 if (*pl2 == NUL) /* reached the end */ 12365 return score + SCORE_SWAP; 12366 ++pl2; 12367 ++ps2; 12368 } 12369 /* 3: swap and swap again */ 12370 if (pl2[0] == ps2[1] && pl2[1] == ps2[0] 12371 && STRCMP(pl2 + 2, ps2 + 2) == 0) 12372 return score + SCORE_SWAP + SCORE_SWAP; 12373 12374 /* 4: swap and substitute */ 12375 if (STRCMP(pl2 + 1, ps2 + 1) == 0) 12376 return score + SCORE_SWAP + SCORE_SUBST; 12377 } 12378 12379 /* 5: substitute */ 12380 pl2 = pl + 1; 12381 ps2 = ps + 1; 12382 while (*pl2 == *ps2) 12383 { 12384 if (*pl2 == NUL) /* reached the end */ 12385 return score + SCORE_SUBST; 12386 ++pl2; 12387 ++ps2; 12388 } 12389 12390 /* 6: substitute and swap */ 12391 if (pl2[0] == ps2[1] && pl2[1] == ps2[0] 12392 && STRCMP(pl2 + 2, ps2 + 2) == 0) 12393 return score + SCORE_SUBST + SCORE_SWAP; 12394 12395 /* 7: substitute and substitute */ 12396 if (STRCMP(pl2 + 1, ps2 + 1) == 0) 12397 return score + SCORE_SUBST + SCORE_SUBST; 12398 12399 /* 8: insert then delete */ 12400 pl2 = pl; 12401 ps2 = ps + 1; 12402 while (*pl2 == *ps2) 12403 { 12404 ++pl2; 12405 ++ps2; 12406 } 12407 if (STRCMP(pl2 + 1, ps2) == 0) 12408 return score + SCORE_INS + SCORE_DEL; 12409 12410 /* 9: delete then insert */ 12411 pl2 = pl + 1; 12412 ps2 = ps; 12413 while (*pl2 == *ps2) 12414 { 12415 ++pl2; 12416 ++ps2; 12417 } 12418 if (STRCMP(pl2, ps2 + 1) == 0) 12419 return score + SCORE_INS + SCORE_DEL; 12420 12421 /* Failed to compare. */ 12422 break; 12423 } 12424 12425 return SCORE_MAXMAX; 12426 } 12427 12428 /* 12429 * Compute the "edit distance" to turn "badword" into "goodword". The less 12430 * deletes/inserts/substitutes/swaps are required the lower the score. 12431 * 12432 * The algorithm is described by Du and Chang, 1992. 12433 * The implementation of the algorithm comes from Aspell editdist.cpp, 12434 * edit_distance(). It has been converted from C++ to C and modified to 12435 * support multi-byte characters. 12436 */ 12437 static int 12438 spell_edit_score(badword, goodword) 12439 char_u *badword; 12440 char_u *goodword; 12441 { 12442 int *cnt; 12443 int badlen, goodlen; /* lenghts including NUL */ 12444 int j, i; 12445 int t; 12446 int bc, gc; 12447 int pbc, pgc; 12448 #ifdef FEAT_MBYTE 12449 char_u *p; 12450 int wbadword[MAXWLEN]; 12451 int wgoodword[MAXWLEN]; 12452 12453 if (has_mbyte) 12454 { 12455 /* Get the characters from the multi-byte strings and put them in an 12456 * int array for easy access. */ 12457 for (p = badword, badlen = 0; *p != NUL; ) 12458 wbadword[badlen++] = mb_cptr2char_adv(&p); 12459 wbadword[badlen++] = 0; 12460 for (p = goodword, goodlen = 0; *p != NUL; ) 12461 wgoodword[goodlen++] = mb_cptr2char_adv(&p); 12462 wgoodword[goodlen++] = 0; 12463 } 12464 else 12465 #endif 12466 { 12467 badlen = STRLEN(badword) + 1; 12468 goodlen = STRLEN(goodword) + 1; 12469 } 12470 12471 /* We use "cnt" as an array: CNT(badword_idx, goodword_idx). */ 12472 #define CNT(a, b) cnt[(a) + (b) * (badlen + 1)] 12473 cnt = (int *)lalloc((long_u)(sizeof(int) * (badlen + 1) * (goodlen + 1)), 12474 TRUE); 12475 if (cnt == NULL) 12476 return 0; /* out of memory */ 12477 12478 CNT(0, 0) = 0; 12479 for (j = 1; j <= goodlen; ++j) 12480 CNT(0, j) = CNT(0, j - 1) + SCORE_DEL; 12481 12482 for (i = 1; i <= badlen; ++i) 12483 { 12484 CNT(i, 0) = CNT(i - 1, 0) + SCORE_INS; 12485 for (j = 1; j <= goodlen; ++j) 12486 { 12487 #ifdef FEAT_MBYTE 12488 if (has_mbyte) 12489 { 12490 bc = wbadword[i - 1]; 12491 gc = wgoodword[j - 1]; 12492 } 12493 else 12494 #endif 12495 { 12496 bc = badword[i - 1]; 12497 gc = goodword[j - 1]; 12498 } 12499 if (bc == gc) 12500 CNT(i, j) = CNT(i - 1, j - 1); 12501 else 12502 { 12503 /* Use a better score when there is only a case difference. */ 12504 if (SPELL_TOFOLD(bc) == SPELL_TOFOLD(gc)) 12505 CNT(i, j) = SCORE_ICASE + CNT(i - 1, j - 1); 12506 else 12507 CNT(i, j) = SCORE_SUBST + CNT(i - 1, j - 1); 12508 12509 if (i > 1 && j > 1) 12510 { 12511 #ifdef FEAT_MBYTE 12512 if (has_mbyte) 12513 { 12514 pbc = wbadword[i - 2]; 12515 pgc = wgoodword[j - 2]; 12516 } 12517 else 12518 #endif 12519 { 12520 pbc = badword[i - 2]; 12521 pgc = goodword[j - 2]; 12522 } 12523 if (bc == pgc && pbc == gc) 12524 { 12525 t = SCORE_SWAP + CNT(i - 2, j - 2); 12526 if (t < CNT(i, j)) 12527 CNT(i, j) = t; 12528 } 12529 } 12530 t = SCORE_DEL + CNT(i - 1, j); 12531 if (t < CNT(i, j)) 12532 CNT(i, j) = t; 12533 t = SCORE_INS + CNT(i, j - 1); 12534 if (t < CNT(i, j)) 12535 CNT(i, j) = t; 12536 } 12537 } 12538 } 12539 12540 i = CNT(badlen - 1, goodlen - 1); 12541 vim_free(cnt); 12542 return i; 12543 } 12544 12545 /* 12546 * ":spelldump" 12547 */ 12548 /*ARGSUSED*/ 12549 void 12550 ex_spelldump(eap) 12551 exarg_T *eap; 12552 { 12553 buf_T *buf = curbuf; 12554 langp_T *lp; 12555 slang_T *slang; 12556 idx_T arridx[MAXWLEN]; 12557 int curi[MAXWLEN]; 12558 char_u word[MAXWLEN]; 12559 int c; 12560 char_u *byts; 12561 idx_T *idxs; 12562 linenr_T lnum = 0; 12563 int round; 12564 int depth; 12565 int n; 12566 int flags; 12567 char_u *region_names = NULL; /* region names being used */ 12568 int do_region = TRUE; /* dump region names and numbers */ 12569 char_u *p; 12570 int lpi; 12571 12572 if (no_spell_checking(curwin)) 12573 return; 12574 12575 /* Create a new empty buffer by splitting the window. */ 12576 do_cmdline_cmd((char_u *)"new"); 12577 if (!bufempty() || !buf_valid(buf)) 12578 return; 12579 12580 /* Find out if we can support regions: All languages must support the same 12581 * regions or none at all. */ 12582 for (lpi = 0; lpi < buf->b_langp.ga_len; ++lpi) 12583 { 12584 lp = LANGP_ENTRY(buf->b_langp, lpi); 12585 p = lp->lp_slang->sl_regions; 12586 if (p[0] != 0) 12587 { 12588 if (region_names == NULL) /* first language with regions */ 12589 region_names = p; 12590 else if (STRCMP(region_names, p) != 0) 12591 { 12592 do_region = FALSE; /* region names are different */ 12593 break; 12594 } 12595 } 12596 } 12597 12598 if (do_region && region_names != NULL) 12599 { 12600 vim_snprintf((char *)IObuff, IOSIZE, "/regions=%s", region_names); 12601 ml_append(lnum++, IObuff, (colnr_T)0, FALSE); 12602 } 12603 else 12604 do_region = FALSE; 12605 12606 /* 12607 * Loop over all files loaded for the entries in 'spelllang'. 12608 */ 12609 for (lpi = 0; lpi < buf->b_langp.ga_len; ++lpi) 12610 { 12611 lp = LANGP_ENTRY(buf->b_langp, lpi); 12612 slang = lp->lp_slang; 12613 if (slang->sl_fbyts == NULL) /* reloading failed */ 12614 continue; 12615 12616 vim_snprintf((char *)IObuff, IOSIZE, "# file: %s", slang->sl_fname); 12617 ml_append(lnum++, IObuff, (colnr_T)0, FALSE); 12618 12619 /* round 1: case-folded tree 12620 * round 2: keep-case tree */ 12621 for (round = 1; round <= 2; ++round) 12622 { 12623 if (round == 1) 12624 { 12625 byts = slang->sl_fbyts; 12626 idxs = slang->sl_fidxs; 12627 } 12628 else 12629 { 12630 byts = slang->sl_kbyts; 12631 idxs = slang->sl_kidxs; 12632 } 12633 if (byts == NULL) 12634 continue; /* array is empty */ 12635 12636 depth = 0; 12637 arridx[0] = 0; 12638 curi[0] = 1; 12639 while (depth >= 0 && !got_int) 12640 { 12641 if (curi[depth] > byts[arridx[depth]]) 12642 { 12643 /* Done all bytes at this node, go up one level. */ 12644 --depth; 12645 line_breakcheck(); 12646 } 12647 else 12648 { 12649 /* Do one more byte at this node. */ 12650 n = arridx[depth] + curi[depth]; 12651 ++curi[depth]; 12652 c = byts[n]; 12653 if (c == 0) 12654 { 12655 /* End of word, deal with the word. 12656 * Don't use keep-case words in the fold-case tree, 12657 * they will appear in the keep-case tree. 12658 * Only use the word when the region matches. */ 12659 flags = (int)idxs[n]; 12660 if ((round == 2 || (flags & WF_KEEPCAP) == 0) 12661 && (flags & WF_NEEDCOMP) == 0 12662 && (do_region 12663 || (flags & WF_REGION) == 0 12664 || (((unsigned)flags >> 16) 12665 & lp->lp_region) != 0)) 12666 { 12667 word[depth] = NUL; 12668 if (!do_region) 12669 flags &= ~WF_REGION; 12670 12671 /* Dump the basic word if there is no prefix or 12672 * when it's the first one. */ 12673 c = (unsigned)flags >> 24; 12674 if (c == 0 || curi[depth] == 2) 12675 dump_word(word, round, flags, lnum++); 12676 12677 /* Apply the prefix, if there is one. */ 12678 if (c != 0) 12679 lnum = dump_prefixes(slang, word, round, 12680 flags, lnum); 12681 } 12682 } 12683 else 12684 { 12685 /* Normal char, go one level deeper. */ 12686 word[depth++] = c; 12687 arridx[depth] = idxs[n]; 12688 curi[depth] = 1; 12689 } 12690 } 12691 } 12692 } 12693 } 12694 12695 /* Delete the empty line that we started with. */ 12696 if (curbuf->b_ml.ml_line_count > 1) 12697 ml_delete(curbuf->b_ml.ml_line_count, FALSE); 12698 12699 redraw_later(NOT_VALID); 12700 } 12701 12702 /* 12703 * Dump one word: apply case modifications and append a line to the buffer. 12704 */ 12705 static void 12706 dump_word(word, round, flags, lnum) 12707 char_u *word; 12708 int round; 12709 int flags; 12710 linenr_T lnum; 12711 { 12712 int keepcap = FALSE; 12713 char_u *p; 12714 char_u cword[MAXWLEN]; 12715 char_u badword[MAXWLEN + 10]; 12716 int i; 12717 12718 if (round == 1 && (flags & WF_CAPMASK) != 0) 12719 { 12720 /* Need to fix case according to "flags". */ 12721 make_case_word(word, cword, flags); 12722 p = cword; 12723 } 12724 else 12725 { 12726 p = word; 12727 if (round == 2 && ((captype(word, NULL) & WF_KEEPCAP) == 0 12728 || (flags & WF_FIXCAP) != 0)) 12729 keepcap = TRUE; 12730 } 12731 12732 /* Add flags and regions after a slash. */ 12733 if ((flags & (WF_BANNED | WF_RARE | WF_REGION)) || keepcap) 12734 { 12735 STRCPY(badword, p); 12736 STRCAT(badword, "/"); 12737 if (keepcap) 12738 STRCAT(badword, "="); 12739 if (flags & WF_BANNED) 12740 STRCAT(badword, "!"); 12741 else if (flags & WF_RARE) 12742 STRCAT(badword, "?"); 12743 if (flags & WF_REGION) 12744 for (i = 0; i < 7; ++i) 12745 if (flags & (0x10000 << i)) 12746 sprintf((char *)badword + STRLEN(badword), "%d", i + 1); 12747 p = badword; 12748 } 12749 12750 ml_append(lnum, p, (colnr_T)0, FALSE); 12751 } 12752 12753 /* 12754 * For ":spelldump": Find matching prefixes for "word". Prepend each to 12755 * "word" and append a line to the buffer. 12756 * Return the updated line number. 12757 */ 12758 static linenr_T 12759 dump_prefixes(slang, word, round, flags, startlnum) 12760 slang_T *slang; 12761 char_u *word; /* case-folded word */ 12762 int round; 12763 int flags; /* flags with prefix ID */ 12764 linenr_T startlnum; 12765 { 12766 idx_T arridx[MAXWLEN]; 12767 int curi[MAXWLEN]; 12768 char_u prefix[MAXWLEN]; 12769 char_u word_up[MAXWLEN]; 12770 int has_word_up = FALSE; 12771 int c; 12772 char_u *byts; 12773 idx_T *idxs; 12774 linenr_T lnum = startlnum; 12775 int depth; 12776 int n; 12777 int len; 12778 int i; 12779 12780 /* if the word starts with a lower-case letter make the word with an 12781 * upper-case letter in word_up[]. */ 12782 c = PTR2CHAR(word); 12783 if (SPELL_TOUPPER(c) != c) 12784 { 12785 onecap_copy(word, word_up, TRUE); 12786 has_word_up = TRUE; 12787 } 12788 12789 byts = slang->sl_pbyts; 12790 idxs = slang->sl_pidxs; 12791 if (byts != NULL) /* array not is empty */ 12792 { 12793 /* 12794 * Loop over all prefixes, building them byte-by-byte in prefix[]. 12795 * When at the end of a prefix check that it supports "flags". 12796 */ 12797 depth = 0; 12798 arridx[0] = 0; 12799 curi[0] = 1; 12800 while (depth >= 0 && !got_int) 12801 { 12802 n = arridx[depth]; 12803 len = byts[n]; 12804 if (curi[depth] > len) 12805 { 12806 /* Done all bytes at this node, go up one level. */ 12807 --depth; 12808 line_breakcheck(); 12809 } 12810 else 12811 { 12812 /* Do one more byte at this node. */ 12813 n += curi[depth]; 12814 ++curi[depth]; 12815 c = byts[n]; 12816 if (c == 0) 12817 { 12818 /* End of prefix, find out how many IDs there are. */ 12819 for (i = 1; i < len; ++i) 12820 if (byts[n + i] != 0) 12821 break; 12822 curi[depth] += i - 1; 12823 12824 c = valid_word_prefix(i, n, flags, word, slang, FALSE); 12825 if (c != 0) 12826 { 12827 vim_strncpy(prefix + depth, word, MAXWLEN - depth - 1); 12828 dump_word(prefix, round, 12829 (c & WF_RAREPFX) ? (flags | WF_RARE) 12830 : flags, lnum++); 12831 } 12832 12833 /* Check for prefix that matches the word when the 12834 * first letter is upper-case, but only if the prefix has 12835 * a condition. */ 12836 if (has_word_up) 12837 { 12838 c = valid_word_prefix(i, n, flags, word_up, slang, 12839 TRUE); 12840 if (c != 0) 12841 { 12842 vim_strncpy(prefix + depth, word_up, 12843 MAXWLEN - depth - 1); 12844 dump_word(prefix, round, 12845 (c & WF_RAREPFX) ? (flags | WF_RARE) 12846 : flags, lnum++); 12847 } 12848 } 12849 } 12850 else 12851 { 12852 /* Normal char, go one level deeper. */ 12853 prefix[depth++] = c; 12854 arridx[depth] = idxs[n]; 12855 curi[depth] = 1; 12856 } 12857 } 12858 } 12859 } 12860 12861 return lnum; 12862 } 12863 12864 /* 12865 * Move "p" to end of word. 12866 */ 12867 char_u * 12868 spell_to_word_end(start, buf) 12869 char_u *start; 12870 buf_T *buf; 12871 { 12872 char_u *p = start; 12873 12874 while (*p != NUL && spell_iswordp(p, buf)) 12875 mb_ptr_adv(p); 12876 return p; 12877 } 12878 12879 #if defined(FEAT_INS_EXPAND) || defined(PROTO) 12880 /* 12881 * Find start of the word in front of the cursor. We don't check if it is 12882 * badly spelled, with completion we can only change the word in front of the 12883 * cursor. 12884 * Used for Insert mode completion CTRL-X ?. 12885 * Returns the column number of the word. 12886 */ 12887 int 12888 spell_word_start(startcol) 12889 int startcol; 12890 { 12891 char_u *line; 12892 char_u *p; 12893 int col = 0; 12894 12895 if (no_spell_checking(curwin)) 12896 return startcol; 12897 12898 /* Find a word character before "startcol". */ 12899 line = ml_get_curline(); 12900 for (p = line + startcol; p > line; ) 12901 { 12902 mb_ptr_back(line, p); 12903 if (spell_iswordp_nmw(p)) 12904 break; 12905 } 12906 12907 /* Go back to start of the word. */ 12908 while (p > line) 12909 { 12910 col = p - line; 12911 mb_ptr_back(line, p); 12912 if (!spell_iswordp(p, curbuf)) 12913 break; 12914 col = 0; 12915 } 12916 12917 return col; 12918 } 12919 12920 /* 12921 * Need to check for 'spellcapcheck' now, the word is removed before 12922 * expand_spelling() is called. Therefore the ugly global variable. 12923 */ 12924 static int spell_expand_need_cap; 12925 12926 void 12927 spell_expand_check_cap(col) 12928 colnr_T col; 12929 { 12930 spell_expand_need_cap = check_need_cap(curwin->w_cursor.lnum, col); 12931 } 12932 12933 /* 12934 * Get list of spelling suggestions. 12935 * Used for Insert mode completion CTRL-X ?. 12936 * Returns the number of matches. The matches are in "matchp[]", array of 12937 * allocated strings. 12938 */ 12939 /*ARGSUSED*/ 12940 int 12941 expand_spelling(lnum, col, pat, matchp) 12942 linenr_T lnum; 12943 int col; 12944 char_u *pat; 12945 char_u ***matchp; 12946 { 12947 garray_T ga; 12948 12949 spell_suggest_list(&ga, pat, 100, spell_expand_need_cap); 12950 *matchp = ga.ga_data; 12951 return ga.ga_len; 12952 } 12953 #endif 12954 12955 #endif /* FEAT_SYN_HL */ 12956