1 /* vi:set ts=8 sts=4 sw=4: 2 * 3 * VIM - Vi IMproved by Bram Moolenaar 4 * 5 * Do ":help uganda" in Vim to read copying and usage conditions. 6 * Do ":help credits" in Vim to see a list of people who contributed. 7 * See README.txt for an overview of the Vim source code. 8 */ 9 10 /* 11 * spell.c: code for spell checking 12 * 13 * The spell checking mechanism uses a tree (aka trie). Each node in the tree 14 * has a list of bytes that can appear (siblings). For each byte there is a 15 * pointer to the node with the byte that follows in the word (child). 16 * 17 * A NUL byte is used where the word may end. The bytes are sorted, so that 18 * binary searching can be used and the NUL bytes are at the start. The 19 * number of possible bytes is stored before the list of bytes. 20 * 21 * The tree uses two arrays: "byts" stores the characters, "idxs" stores 22 * either the next index or flags. The tree starts at index 0. For example, 23 * to lookup "vi" this sequence is followed: 24 * i = 0 25 * len = byts[i] 26 * n = where "v" appears in byts[i + 1] to byts[i + len] 27 * i = idxs[n] 28 * len = byts[i] 29 * n = where "i" appears in byts[i + 1] to byts[i + len] 30 * i = idxs[n] 31 * len = byts[i] 32 * find that byts[i + 1] is 0, idxs[i + 1] has flags for "vi". 33 * 34 * There are two word trees: one with case-folded words and one with words in 35 * original case. The second one is only used for keep-case words and is 36 * usually small. 37 * 38 * There is one additional tree for when not all prefixes are applied when 39 * generating the .spl file. This tree stores all the possible prefixes, as 40 * if they were words. At each word (prefix) end the prefix nr is stored, the 41 * following word must support this prefix nr. And the condition nr is 42 * stored, used to lookup the condition that the word must match with. 43 * 44 * Thanks to Olaf Seibert for providing an example implementation of this tree 45 * and the compression mechanism. 46 * 47 * Matching involves checking the caps type: Onecap ALLCAP KeepCap. 48 * 49 * Why doesn't Vim use aspell/ispell/myspell/etc.? 50 * See ":help develop-spell". 51 */ 52 53 /* Use SPELL_PRINTTREE for debugging: dump the word tree after adding a word. 54 * Only use it for small word lists! */ 55 #if 0 56 # define SPELL_PRINTTREE 57 #endif 58 59 /* 60 * Use this to adjust the score after finding suggestions, based on the 61 * suggested word sounding like the bad word. This is much faster than doing 62 * it for every possible suggestion. 63 * Disadvantage: When "the" is typed as "hte" it sounds different and goes 64 * down in the list. 65 * Used when 'spellsuggest' is set to "best". 66 */ 67 #define RESCORE(word_score, sound_score) ((3 * word_score + sound_score) / 4) 68 69 /* 70 * Vim spell file format: <HEADER> 71 * <SECTIONS> 72 * <LWORDTREE> 73 * <KWORDTREE> 74 * <PREFIXTREE> 75 * 76 * <HEADER>: <fileID> <versionnr> 77 * 78 * <fileID> 8 bytes "VIMspell" 79 * <versionnr> 1 byte VIMSPELLVERSION 80 * 81 * 82 * Sections make it possible to add information to the .spl file without 83 * making it incompatible with previous versions. There are two kinds of 84 * sections: 85 * 1. Not essential for correct spell checking. E.g. for making suggestions. 86 * These are skipped when not supported. 87 * 2. Optional information, but essential for spell checking when present. 88 * E.g. conditions for affixes. When this section is present but not 89 * supported an error message is given. 90 * 91 * <SECTIONS>: <section> ... <sectionend> 92 * 93 * <section>: <sectionID> <sectionflags> <sectionlen> (section contents) 94 * 95 * <sectionID> 1 byte number from 0 to 254 identifying the section 96 * 97 * <sectionflags> 1 byte SNF_REQUIRED: this section is required for correct 98 * spell checking 99 * 100 * <sectionlen> 4 bytes length of section contents, MSB first 101 * 102 * <sectionend> 1 byte SN_END 103 * 104 * 105 * sectionID == SN_REGION: <regionname> ... 106 * <regionname> 2 bytes Up to 8 region names: ca, au, etc. Lower case. 107 * First <regionname> is region 1. 108 * 109 * sectionID == SN_CHARFLAGS: <charflagslen> <charflags> 110 * <folcharslen> <folchars> 111 * <charflagslen> 1 byte Number of bytes in <charflags> (should be 128). 112 * <charflags> N bytes List of flags (first one is for character 128): 113 * 0x01 word character CF_WORD 114 * 0x02 upper-case character CF_UPPER 115 * <folcharslen> 2 bytes Number of bytes in <folchars>. 116 * <folchars> N bytes Folded characters, first one is for character 128. 117 * 118 * sectionID == SN_MIDWORD: <midword> 119 * <midword> N bytes Characters that are word characters only when used 120 * in the middle of a word. 121 * 122 * sectionID == SN_PREFCOND: <prefcondcnt> <prefcond> ... 123 * <prefcondcnt> 2 bytes Number of <prefcond> items following. 124 * <prefcond> : <condlen> <condstr> 125 * <condlen> 1 byte Length of <condstr>. 126 * <condstr> N bytes Condition for the prefix. 127 * 128 * sectionID == SN_REP: <repcount> <rep> ... 129 * <repcount> 2 bytes number of <rep> items, MSB first. 130 * <rep> : <repfromlen> <repfrom> <reptolen> <repto> 131 * <repfromlen> 1 byte length of <repfrom> 132 * <repfrom> N bytes "from" part of replacement 133 * <reptolen> 1 byte length of <repto> 134 * <repto> N bytes "to" part of replacement 135 * 136 * sectionID == SN_SAL: <salflags> <salcount> <sal> ... 137 * <salflags> 1 byte flags for soundsalike conversion: 138 * SAL_F0LLOWUP 139 * SAL_COLLAPSE 140 * SAL_REM_ACCENTS 141 * <salcount> 2 bytes number of <sal> items following 142 * <sal> : <salfromlen> <salfrom> <saltolen> <salto> 143 * <salfromlen> 1 byte length of <salfrom> 144 * <salfrom> N bytes "from" part of soundsalike 145 * <saltolen> 1 byte length of <salto> 146 * <salto> N bytes "to" part of soundsalike 147 * 148 * sectionID == SN_SOFO: <sofofromlen> <sofofrom> <sofotolen> <sofoto> 149 * <sofofromlen> 2 bytes length of <sofofrom> 150 * <sofofrom> N bytes "from" part of soundfold 151 * <sofotolen> 2 bytes length of <sofoto> 152 * <sofoto> N bytes "to" part of soundfold 153 * 154 * sectionID == SN_MAP: <mapstr> 155 * <mapstr> N bytes String with sequences of similar characters, 156 * separated by slashes. 157 * 158 * sectionID == SN_COMPOUND: <compmax> <compminlen> <compsylmax> <compflags> 159 * <compmax> 1 byte Maximum nr of words in compound word. 160 * <compminlen> 1 byte Minimal word length for compounding. 161 * <compsylmax> 1 byte Maximum nr of syllables in compound word. 162 * <compflags> N bytes Flags from COMPOUNDFLAGS items, separated by 163 * slashes. 164 * 165 * sectionID == SN_NOBREAK: (empty, its presence is enough) 166 * 167 * sectionID == SN_SYLLABLE: <syllable> 168 * <syllable> N bytes String from SYLLABLE item. 169 * 170 * <LWORDTREE>: <wordtree> 171 * 172 * <KWORDTREE>: <wordtree> 173 * 174 * <PREFIXTREE>: <wordtree> 175 * 176 * 177 * <wordtree>: <nodecount> <nodedata> ... 178 * 179 * <nodecount> 4 bytes Number of nodes following. MSB first. 180 * 181 * <nodedata>: <siblingcount> <sibling> ... 182 * 183 * <siblingcount> 1 byte Number of siblings in this node. The siblings 184 * follow in sorted order. 185 * 186 * <sibling>: <byte> [ <nodeidx> <xbyte> 187 * | <flags> [<flags2>] [<region>] [<affixID>] 188 * | [<pflags>] <affixID> <prefcondnr> ] 189 * 190 * <byte> 1 byte Byte value of the sibling. Special cases: 191 * BY_NOFLAGS: End of word without flags and for all 192 * regions. 193 * For PREFIXTREE <affixID> and 194 * <prefcondnr> follow. 195 * BY_FLAGS: End of word, <flags> follow. 196 * For PREFIXTREE <pflags>, <affixID> 197 * and <prefcondnr> follow. 198 * BY_FLAGS2: End of word, <flags> and <flags2> 199 * follow. Not used in PREFIXTREE. 200 * BY_INDEX: Child of sibling is shared, <nodeidx> 201 * and <xbyte> follow. 202 * 203 * <nodeidx> 3 bytes Index of child for this sibling, MSB first. 204 * 205 * <xbyte> 1 byte byte value of the sibling. 206 * 207 * <flags> 1 byte bitmask of: 208 * WF_ALLCAP word must have only capitals 209 * WF_ONECAP first char of word must be capital 210 * WF_KEEPCAP keep-case word 211 * WF_FIXCAP keep-case word, all caps not allowed 212 * WF_RARE rare word 213 * WF_BANNED bad word 214 * WF_REGION <region> follows 215 * WF_AFX <affixID> follows 216 * 217 * <flags2> 1 byte Bitmask of: 218 * WF_HAS_AFF >> 8 word includes affix 219 * WF_NEEDCOMP >> 8 word only valid in compound 220 * 221 * <pflags> 1 byte bitmask of: 222 * WFP_RARE rare prefix 223 * WFP_NC non-combining prefix 224 * WFP_UP letter after prefix made upper case 225 * 226 * <region> 1 byte Bitmask for regions in which word is valid. When 227 * omitted it's valid in all regions. 228 * Lowest bit is for region 1. 229 * 230 * <affixID> 1 byte ID of affix that can be used with this word. In 231 * PREFIXTREE used for the required prefix ID. 232 * 233 * <prefcondnr> 2 bytes Prefix condition number, index in <prefcond> list 234 * from HEADER. 235 * 236 * All text characters are in 'encoding', but stored as single bytes. 237 */ 238 239 #if defined(MSDOS) || defined(WIN16) || defined(WIN32) || defined(_WIN64) 240 # include <io.h> /* for lseek(), must be before vim.h */ 241 #endif 242 243 #include "vim.h" 244 245 #if defined(FEAT_SYN_HL) || defined(PROTO) 246 247 #ifdef HAVE_FCNTL_H 248 # include <fcntl.h> 249 #endif 250 251 #define MAXWLEN 250 /* Assume max. word len is this many bytes. 252 Some places assume a word length fits in a 253 byte, thus it can't be above 255. */ 254 255 /* Type used for indexes in the word tree need to be at least 4 bytes. If int 256 * is 8 bytes we could use something smaller, but what? */ 257 #if SIZEOF_INT > 3 258 typedef int idx_T; 259 #else 260 typedef long idx_T; 261 #endif 262 263 /* Flags used for a word. Only the lowest byte can be used, the region byte 264 * comes above it. */ 265 #define WF_REGION 0x01 /* region byte follows */ 266 #define WF_ONECAP 0x02 /* word with one capital (or all capitals) */ 267 #define WF_ALLCAP 0x04 /* word must be all capitals */ 268 #define WF_RARE 0x08 /* rare word */ 269 #define WF_BANNED 0x10 /* bad word */ 270 #define WF_AFX 0x20 /* affix ID follows */ 271 #define WF_FIXCAP 0x40 /* keep-case word, allcap not allowed */ 272 #define WF_KEEPCAP 0x80 /* keep-case word */ 273 274 /* for <flags2>, shifted up one byte to be used in wn_flags */ 275 #define WF_HAS_AFF 0x0100 /* word includes affix */ 276 #define WF_NEEDCOMP 0x0200 /* word only valid in compound */ 277 278 #define WF_CAPMASK (WF_ONECAP | WF_ALLCAP | WF_KEEPCAP | WF_FIXCAP) 279 280 /* flags for <pflags> */ 281 #define WFP_RARE 0x01 /* rare prefix */ 282 #define WFP_NC 0x02 /* prefix is not combining */ 283 #define WFP_UP 0x04 /* to-upper prefix */ 284 285 /* Flags for postponed prefixes. Must be above affixID (one byte) 286 * and prefcondnr (two bytes). */ 287 #define WF_RAREPFX (WFP_RARE << 24) /* in sl_pidxs: flag for rare 288 * postponed prefix */ 289 #define WF_PFX_NC (WFP_NC << 24) /* in sl_pidxs: flag for non-combining 290 * postponed prefix */ 291 #define WF_PFX_UP (WFP_UP << 24) /* in sl_pidxs: flag for to-upper 292 * postponed prefix */ 293 294 /* Special byte values for <byte>. Some are only used in the tree for 295 * postponed prefixes, some only in the other trees. This is a bit messy... */ 296 #define BY_NOFLAGS 0 /* end of word without flags or region; for 297 * postponed prefix: no <pflags> */ 298 #define BY_INDEX 1 /* child is shared, index follows */ 299 #define BY_FLAGS 2 /* end of word, <flags> byte follows; for 300 * postponed prefix: <pflags> follows */ 301 #define BY_FLAGS2 3 /* end of word, <flags> and <flags2> bytes 302 * follow; never used in prefix tree */ 303 #define BY_SPECIAL BY_FLAGS2 /* highest special byte value */ 304 305 /* Info from "REP" and "SAL" entries in ".aff" file used in si_rep, sl_rep, 306 * and si_sal. Not for sl_sal! 307 * One replacement: from "ft_from" to "ft_to". */ 308 typedef struct fromto_S 309 { 310 char_u *ft_from; 311 char_u *ft_to; 312 } fromto_T; 313 314 /* Info from "SAL" entries in ".aff" file used in sl_sal. 315 * The info is split for quick processing by spell_soundfold(). 316 * Note that "sm_oneof" and "sm_rules" point into sm_lead. */ 317 typedef struct salitem_S 318 { 319 char_u *sm_lead; /* leading letters */ 320 int sm_leadlen; /* length of "sm_lead" */ 321 char_u *sm_oneof; /* letters from () or NULL */ 322 char_u *sm_rules; /* rules like ^, $, priority */ 323 char_u *sm_to; /* replacement. */ 324 #ifdef FEAT_MBYTE 325 int *sm_lead_w; /* wide character copy of "sm_lead" */ 326 int *sm_oneof_w; /* wide character copy of "sm_oneof" */ 327 int *sm_to_w; /* wide character copy of "sm_to" */ 328 #endif 329 } salitem_T; 330 331 #ifdef FEAT_MBYTE 332 typedef int salfirst_T; 333 #else 334 typedef short salfirst_T; 335 #endif 336 337 /* Values for SP_*ERROR are negative, positive values are used by 338 * read_cnt_string(). */ 339 #define SP_TRUNCERROR -1 /* spell file truncated error */ 340 #define SP_FORMERROR -2 /* format error in spell file */ 341 #define SP_OTHERERROR -3 /* other error while reading spell file */ 342 343 /* 344 * Structure used to store words and other info for one language, loaded from 345 * a .spl file. 346 * The main access is through the tree in "sl_fbyts/sl_fidxs", storing the 347 * case-folded words. "sl_kbyts/sl_kidxs" is for keep-case words. 348 * 349 * The "byts" array stores the possible bytes in each tree node, preceded by 350 * the number of possible bytes, sorted on byte value: 351 * <len> <byte1> <byte2> ... 352 * The "idxs" array stores the index of the child node corresponding to the 353 * byte in "byts". 354 * Exception: when the byte is zero, the word may end here and "idxs" holds 355 * the flags, region mask and affixID for the word. There may be several 356 * zeros in sequence for alternative flag/region/affixID combinations. 357 */ 358 typedef struct slang_S slang_T; 359 struct slang_S 360 { 361 slang_T *sl_next; /* next language */ 362 char_u *sl_name; /* language name "en", "en.rare", "nl", etc. */ 363 char_u *sl_fname; /* name of .spl file */ 364 int sl_add; /* TRUE if it's a .add file. */ 365 366 char_u *sl_fbyts; /* case-folded word bytes */ 367 idx_T *sl_fidxs; /* case-folded word indexes */ 368 char_u *sl_kbyts; /* keep-case word bytes */ 369 idx_T *sl_kidxs; /* keep-case word indexes */ 370 char_u *sl_pbyts; /* prefix tree word bytes */ 371 idx_T *sl_pidxs; /* prefix tree word indexes */ 372 373 char_u sl_regions[17]; /* table with up to 8 region names plus NUL */ 374 375 char_u *sl_midword; /* MIDWORD string or NULL */ 376 377 int sl_compmax; /* COMPOUNDMAX (default: MAXWLEN) */ 378 int sl_compminlen; /* COMPOUNDMIN (default: 0) */ 379 int sl_compsylmax; /* COMPOUNDSYLMAX (default: MAXWLEN) */ 380 regprog_T *sl_compprog; /* COMPOUNDFLAGS turned into a regexp progrm 381 * (NULL when no compounding) */ 382 char_u *sl_compstartflags; /* flags for first compound word */ 383 char_u *sl_compallflags; /* all flags for compound words */ 384 char_u sl_nobreak; /* When TRUE: no spaces between words */ 385 char_u *sl_syllable; /* SYLLABLE repeatable chars or NULL */ 386 garray_T sl_syl_items; /* syllable items */ 387 388 int sl_prefixcnt; /* number of items in "sl_prefprog" */ 389 regprog_T **sl_prefprog; /* table with regprogs for prefixes */ 390 391 garray_T sl_rep; /* list of fromto_T entries from REP lines */ 392 short sl_rep_first[256]; /* indexes where byte first appears, -1 if 393 there is none */ 394 garray_T sl_sal; /* list of salitem_T entries from SAL lines */ 395 salfirst_T sl_sal_first[256]; /* indexes where byte first appears, -1 if 396 there is none */ 397 int sl_sofo; /* SOFOFROM and SOFOTO instead of SAL items: 398 * "sl_sal_first" maps chars, when has_mbyte 399 * "sl_sal" is a list of wide char lists. */ 400 int sl_followup; /* SAL followup */ 401 int sl_collapse; /* SAL collapse_result */ 402 int sl_rem_accents; /* SAL remove_accents */ 403 int sl_has_map; /* TRUE if there is a MAP line */ 404 #ifdef FEAT_MBYTE 405 hashtab_T sl_map_hash; /* MAP for multi-byte chars */ 406 int sl_map_array[256]; /* MAP for first 256 chars */ 407 #else 408 char_u sl_map_array[256]; /* MAP for first 256 chars */ 409 #endif 410 }; 411 412 /* First language that is loaded, start of the linked list of loaded 413 * languages. */ 414 static slang_T *first_lang = NULL; 415 416 /* Flags used in .spl file for soundsalike flags. */ 417 #define SAL_F0LLOWUP 1 418 #define SAL_COLLAPSE 2 419 #define SAL_REM_ACCENTS 4 420 421 /* 422 * Structure used in "b_langp", filled from 'spelllang'. 423 */ 424 typedef struct langp_S 425 { 426 slang_T *lp_slang; /* info for this language */ 427 slang_T *lp_sallang; /* language used for sound folding or NULL */ 428 slang_T *lp_replang; /* language used for REP items or NULL */ 429 int lp_region; /* bitmask for region or REGION_ALL */ 430 } langp_T; 431 432 #define LANGP_ENTRY(ga, i) (((langp_T *)(ga).ga_data) + (i)) 433 434 #define REGION_ALL 0xff /* word valid in all regions */ 435 436 #define VIMSPELLMAGIC "VIMspell" /* string at start of Vim spell file */ 437 #define VIMSPELLMAGICL 8 438 #define VIMSPELLVERSION 50 439 440 /* Section IDs. Only renumber them when VIMSPELLVERSION changes! */ 441 #define SN_REGION 0 /* <regionname> section */ 442 #define SN_CHARFLAGS 1 /* charflags section */ 443 #define SN_MIDWORD 2 /* <midword> section */ 444 #define SN_PREFCOND 3 /* <prefcond> section */ 445 #define SN_REP 4 /* REP items section */ 446 #define SN_SAL 5 /* SAL items section */ 447 #define SN_SOFO 6 /* soundfolding section */ 448 #define SN_MAP 7 /* MAP items section */ 449 #define SN_COMPOUND 8 /* compound words section */ 450 #define SN_SYLLABLE 9 /* syllable section */ 451 #define SN_NOBREAK 10 /* NOBREAK section */ 452 #define SN_END 255 /* end of sections */ 453 454 #define SNF_REQUIRED 1 /* <sectionflags>: required section */ 455 456 /* Result values. Lower number is accepted over higher one. */ 457 #define SP_BANNED -1 458 #define SP_OK 0 459 #define SP_RARE 1 460 #define SP_LOCAL 2 461 #define SP_BAD 3 462 463 /* file used for "zG" and "zW" */ 464 static char_u *int_wordlist = NULL; 465 466 /* 467 * Information used when looking for suggestions. 468 */ 469 typedef struct suginfo_S 470 { 471 garray_T su_ga; /* suggestions, contains "suggest_T" */ 472 int su_maxcount; /* max. number of suggestions displayed */ 473 int su_maxscore; /* maximum score for adding to su_ga */ 474 garray_T su_sga; /* like su_ga, sound-folded scoring */ 475 char_u *su_badptr; /* start of bad word in line */ 476 int su_badlen; /* length of detected bad word in line */ 477 int su_badflags; /* caps flags for bad word */ 478 char_u su_badword[MAXWLEN]; /* bad word truncated at su_badlen */ 479 char_u su_fbadword[MAXWLEN]; /* su_badword case-folded */ 480 char_u su_sal_badword[MAXWLEN]; /* su_badword soundfolded */ 481 slang_T *su_slang_first; /* slang_T used for su_sal_badword */ 482 hashtab_T su_banned; /* table with banned words */ 483 slang_T *su_sallang; /* default language for sound folding */ 484 } suginfo_T; 485 486 /* One word suggestion. Used in "si_ga". */ 487 typedef struct suggest_S 488 { 489 char_u *st_word; /* suggested word, allocated string */ 490 int st_orglen; /* length of replaced text */ 491 int st_score; /* lower is better */ 492 int st_altscore; /* used when st_score compares equal */ 493 int st_salscore; /* st_score is for soundalike */ 494 int st_had_bonus; /* bonus already included in score */ 495 slang_T *st_slang; /* language used for sound folding */ 496 } suggest_T; 497 498 #define SUG(ga, i) (((suggest_T *)(ga).ga_data)[i]) 499 500 /* Number of suggestions kept when cleaning up. When rescore_suggestions() is 501 * called the score may change, thus we need to keep more than what is 502 * displayed. */ 503 #define SUG_CLEAN_COUNT(su) ((su)->su_maxcount < 50 ? 50 : (su)->su_maxcount) 504 505 /* Threshold for sorting and cleaning up suggestions. Don't want to keep lots 506 * of suggestions that are not going to be displayed. */ 507 #define SUG_MAX_COUNT(su) ((su)->su_maxcount + 50) 508 509 /* score for various changes */ 510 #define SCORE_SPLIT 149 /* split bad word */ 511 #define SCORE_ICASE 52 /* slightly different case */ 512 #define SCORE_REGION 200 /* word is for different region */ 513 #define SCORE_RARE 180 /* rare word */ 514 #define SCORE_SWAP 90 /* swap two characters */ 515 #define SCORE_SWAP3 110 /* swap two characters in three */ 516 #define SCORE_REP 65 /* REP replacement */ 517 #define SCORE_SUBST 93 /* substitute a character */ 518 #define SCORE_SIMILAR 33 /* substitute a similar character */ 519 #define SCORE_SUBCOMP 33 /* substitute a composing character */ 520 #define SCORE_DEL 94 /* delete a character */ 521 #define SCORE_DELDUP 66 /* delete a duplicated character */ 522 #define SCORE_DELCOMP 28 /* delete a composing character */ 523 #define SCORE_INS 96 /* insert a character */ 524 #define SCORE_INSDUP 67 /* insert a duplicate character */ 525 #define SCORE_INSCOMP 30 /* insert a composing character */ 526 #define SCORE_NONWORD 103 /* change non-word to word char */ 527 528 #define SCORE_FILE 30 /* suggestion from a file */ 529 #define SCORE_MAXINIT 350 /* Initial maximum score: higher == slower. 530 * 350 allows for about three changes. */ 531 532 #define SCORE_BIG SCORE_INS * 3 /* big difference */ 533 #define SCORE_MAXMAX 999999 /* accept any score */ 534 535 /* 536 * Structure to store info for word matching. 537 */ 538 typedef struct matchinf_S 539 { 540 langp_T *mi_lp; /* info for language and region */ 541 542 /* pointers to original text to be checked */ 543 char_u *mi_word; /* start of word being checked */ 544 char_u *mi_end; /* end of matching word so far */ 545 char_u *mi_fend; /* next char to be added to mi_fword */ 546 char_u *mi_cend; /* char after what was used for 547 mi_capflags */ 548 549 /* case-folded text */ 550 char_u mi_fword[MAXWLEN + 1]; /* mi_word case-folded */ 551 int mi_fwordlen; /* nr of valid bytes in mi_fword */ 552 553 /* for when checking word after a prefix */ 554 int mi_prefarridx; /* index in sl_pidxs with list of 555 affixID/condition */ 556 int mi_prefcnt; /* number of entries at mi_prefarridx */ 557 int mi_prefixlen; /* byte length of prefix */ 558 #ifdef FEAT_MBYTE 559 int mi_cprefixlen; /* byte length of prefix in original 560 case */ 561 #else 562 # define mi_cprefixlen mi_prefixlen /* it's the same value */ 563 #endif 564 565 /* for when checking a compound word */ 566 int mi_compoff; /* start of following word offset */ 567 char_u mi_compflags[MAXWLEN]; /* flags for compound words used */ 568 int mi_complen; /* nr of compound words used */ 569 570 /* others */ 571 int mi_result; /* result so far: SP_BAD, SP_OK, etc. */ 572 int mi_capflags; /* WF_ONECAP WF_ALLCAP WF_KEEPCAP */ 573 buf_T *mi_buf; /* buffer being checked */ 574 575 /* for NOBREAK */ 576 int mi_result2; /* "mi_resul" without following word */ 577 char_u *mi_end2; /* "mi_end" without following word */ 578 } matchinf_T; 579 580 /* 581 * The tables used for recognizing word characters according to spelling. 582 * These are only used for the first 256 characters of 'encoding'. 583 */ 584 typedef struct spelltab_S 585 { 586 char_u st_isw[256]; /* flags: is word char */ 587 char_u st_isu[256]; /* flags: is uppercase char */ 588 char_u st_fold[256]; /* chars: folded case */ 589 char_u st_upper[256]; /* chars: upper case */ 590 } spelltab_T; 591 592 static spelltab_T spelltab; 593 static int did_set_spelltab; 594 595 #define CF_WORD 0x01 596 #define CF_UPPER 0x02 597 598 static void clear_spell_chartab __ARGS((spelltab_T *sp)); 599 static int set_spell_finish __ARGS((spelltab_T *new_st)); 600 static int spell_iswordp __ARGS((char_u *p, buf_T *buf)); 601 static int spell_iswordp_nmw __ARGS((char_u *p)); 602 #ifdef FEAT_MBYTE 603 static int spell_iswordp_w __ARGS((int *p, buf_T *buf)); 604 #endif 605 static int write_spell_prefcond __ARGS((FILE *fd, garray_T *gap)); 606 607 /* 608 * For finding suggestions: At each node in the tree these states are tried: 609 */ 610 typedef enum 611 { 612 STATE_START = 0, /* At start of node check for NUL bytes (goodword 613 * ends); if badword ends there is a match, otherwise 614 * try splitting word. */ 615 STATE_NOPREFIX, /* try without prefix */ 616 STATE_SPLITUNDO, /* Undo splitting. */ 617 STATE_ENDNUL, /* Past NUL bytes at start of the node. */ 618 STATE_PLAIN, /* Use each byte of the node. */ 619 STATE_DEL, /* Delete a byte from the bad word. */ 620 STATE_INS, /* Insert a byte in the bad word. */ 621 STATE_SWAP, /* Swap two bytes. */ 622 STATE_UNSWAP, /* Undo swap two characters. */ 623 STATE_SWAP3, /* Swap two characters over three. */ 624 STATE_UNSWAP3, /* Undo Swap two characters over three. */ 625 STATE_UNROT3L, /* Undo rotate three characters left */ 626 STATE_UNROT3R, /* Undo rotate three characters right */ 627 STATE_REP_INI, /* Prepare for using REP items. */ 628 STATE_REP, /* Use matching REP items from the .aff file. */ 629 STATE_REP_UNDO, /* Undo a REP item replacement. */ 630 STATE_FINAL /* End of this node. */ 631 } state_T; 632 633 /* 634 * Struct to keep the state at each level in suggest_try_change(). 635 */ 636 typedef struct trystate_S 637 { 638 state_T ts_state; /* state at this level, STATE_ */ 639 int ts_score; /* score */ 640 idx_T ts_arridx; /* index in tree array, start of node */ 641 short ts_curi; /* index in list of child nodes */ 642 char_u ts_fidx; /* index in fword[], case-folded bad word */ 643 char_u ts_fidxtry; /* ts_fidx at which bytes may be changed */ 644 char_u ts_twordlen; /* valid length of tword[] */ 645 char_u ts_prefixdepth; /* stack depth for end of prefix or 646 * PFD_PREFIXTREE or PFD_NOPREFIX */ 647 char_u ts_flags; /* TSF_ flags */ 648 #ifdef FEAT_MBYTE 649 char_u ts_tcharlen; /* number of bytes in tword character */ 650 char_u ts_tcharidx; /* current byte index in tword character */ 651 char_u ts_isdiff; /* DIFF_ values */ 652 char_u ts_fcharstart; /* index in fword where badword char started */ 653 #endif 654 char_u ts_prewordlen; /* length of word in "preword[]" */ 655 char_u ts_splitoff; /* index in "tword" after last split */ 656 char_u ts_splitfidx; /* "ts_fidx" at word split */ 657 char_u ts_complen; /* nr of compound words used */ 658 char_u ts_compsplit; /* index for "compflags" where word was spit */ 659 char_u ts_save_badflags; /* su_badflags saved here */ 660 } trystate_T; 661 662 /* values for ts_isdiff */ 663 #define DIFF_NONE 0 /* no different byte (yet) */ 664 #define DIFF_YES 1 /* different byte found */ 665 #define DIFF_INSERT 2 /* inserting character */ 666 667 /* values for ts_flags */ 668 #define TSF_PREFIXOK 1 /* already checked that prefix is OK */ 669 #define TSF_DIDSPLIT 2 /* tried split at this point */ 670 671 /* special values ts_prefixdepth */ 672 #define PFD_NOPREFIX 0xff /* not using prefixes */ 673 #define PFD_PREFIXTREE 0xfe /* walking through the prefix tree */ 674 #define PFD_NOTSPECIAL 0xfd /* first value that's not special */ 675 676 /* mode values for find_word */ 677 #define FIND_FOLDWORD 0 /* find word case-folded */ 678 #define FIND_KEEPWORD 1 /* find keep-case word */ 679 #define FIND_PREFIX 2 /* find word after prefix */ 680 #define FIND_COMPOUND 3 /* find case-folded compound word */ 681 #define FIND_KEEPCOMPOUND 4 /* find keep-case compound word */ 682 683 static slang_T *slang_alloc __ARGS((char_u *lang)); 684 static void slang_free __ARGS((slang_T *lp)); 685 static void slang_clear __ARGS((slang_T *lp)); 686 static void find_word __ARGS((matchinf_T *mip, int mode)); 687 static int can_compound __ARGS((slang_T *slang, char_u *word, char_u *flags)); 688 static int valid_word_prefix __ARGS((int totprefcnt, int arridx, int flags, char_u *word, slang_T *slang, int cond_req)); 689 static void find_prefix __ARGS((matchinf_T *mip, int mode)); 690 static int fold_more __ARGS((matchinf_T *mip)); 691 static int spell_valid_case __ARGS((int wordflags, int treeflags)); 692 static int no_spell_checking __ARGS((win_T *wp)); 693 static void spell_load_lang __ARGS((char_u *lang)); 694 static char_u *spell_enc __ARGS((void)); 695 static void int_wordlist_spl __ARGS((char_u *fname)); 696 static void spell_load_cb __ARGS((char_u *fname, void *cookie)); 697 static slang_T *spell_load_file __ARGS((char_u *fname, char_u *lang, slang_T *old_lp, int silent)); 698 static char_u *read_cnt_string __ARGS((FILE *fd, int cnt_bytes, int *lenp)); 699 static char_u *read_string __ARGS((FILE *fd, int cnt)); 700 static int read_region_section __ARGS((FILE *fd, slang_T *slang, int len)); 701 static int read_charflags_section __ARGS((FILE *fd)); 702 static int read_prefcond_section __ARGS((FILE *fd, slang_T *lp)); 703 static int read_rep_section __ARGS((FILE *fd, slang_T *slang)); 704 static int read_sal_section __ARGS((FILE *fd, slang_T *slang)); 705 static int read_sofo_section __ARGS((FILE *fd, slang_T *slang)); 706 static int read_compound __ARGS((FILE *fd, slang_T *slang, int len)); 707 static int byte_in_str __ARGS((char_u *str, int byte)); 708 static int init_syl_tab __ARGS((slang_T *slang)); 709 static int count_syllables __ARGS((slang_T *slang, char_u *word)); 710 static int set_sofo __ARGS((slang_T *lp, char_u *from, char_u *to)); 711 static void set_sal_first __ARGS((slang_T *lp)); 712 #ifdef FEAT_MBYTE 713 static int *mb_str2wide __ARGS((char_u *s)); 714 #endif 715 static idx_T read_tree __ARGS((FILE *fd, char_u *byts, idx_T *idxs, int maxidx, int startidx, int prefixtree, int maxprefcondnr)); 716 static void clear_midword __ARGS((buf_T *buf)); 717 static void use_midword __ARGS((slang_T *lp, buf_T *buf)); 718 static int find_region __ARGS((char_u *rp, char_u *region)); 719 static int captype __ARGS((char_u *word, char_u *end)); 720 static int badword_captype __ARGS((char_u *word, char_u *end)); 721 static void spell_reload_one __ARGS((char_u *fname, int added_word)); 722 static void set_spell_charflags __ARGS((char_u *flags, int cnt, char_u *upp)); 723 static int set_spell_chartab __ARGS((char_u *fol, char_u *low, char_u *upp)); 724 static int spell_casefold __ARGS((char_u *p, int len, char_u *buf, int buflen)); 725 static int check_need_cap __ARGS((linenr_T lnum, colnr_T col)); 726 static void spell_find_suggest __ARGS((char_u *badptr, suginfo_T *su, int maxcount, int banbadword, int need_cap)); 727 #ifdef FEAT_EVAL 728 static void spell_suggest_expr __ARGS((suginfo_T *su, char_u *expr)); 729 #endif 730 static void spell_suggest_file __ARGS((suginfo_T *su, char_u *fname)); 731 static void spell_suggest_intern __ARGS((suginfo_T *su)); 732 static void spell_find_cleanup __ARGS((suginfo_T *su)); 733 static void onecap_copy __ARGS((char_u *word, char_u *wcopy, int upper)); 734 static void allcap_copy __ARGS((char_u *word, char_u *wcopy)); 735 static void suggest_try_special __ARGS((suginfo_T *su)); 736 static void suggest_try_change __ARGS((suginfo_T *su)); 737 static int try_deeper __ARGS((suginfo_T *su, trystate_T *stack, int depth, int score_add)); 738 #ifdef FEAT_MBYTE 739 static int nofold_len __ARGS((char_u *fword, int flen, char_u *word)); 740 #endif 741 static void find_keepcap_word __ARGS((slang_T *slang, char_u *fword, char_u *kword)); 742 static void score_comp_sal __ARGS((suginfo_T *su)); 743 static void score_combine __ARGS((suginfo_T *su)); 744 static int stp_sal_score __ARGS((suggest_T *stp, suginfo_T *su, slang_T *slang, char_u *badsound)); 745 static void suggest_try_soundalike __ARGS((suginfo_T *su)); 746 static void make_case_word __ARGS((char_u *fword, char_u *cword, int flags)); 747 static void set_map_str __ARGS((slang_T *lp, char_u *map)); 748 static int similar_chars __ARGS((slang_T *slang, int c1, int c2)); 749 static void add_suggestion __ARGS((suginfo_T *su, garray_T *gap, char_u *goodword, int badlen, int score, int altscore, int had_bonus, slang_T *slang)); 750 static void add_banned __ARGS((suginfo_T *su, char_u *word)); 751 static int was_banned __ARGS((suginfo_T *su, char_u *word)); 752 static void free_banned __ARGS((suginfo_T *su)); 753 static void rescore_suggestions __ARGS((suginfo_T *su)); 754 static void rescore_one __ARGS((suginfo_T *su, suggest_T *stp)); 755 static int cleanup_suggestions __ARGS((garray_T *gap, int maxscore, int keep)); 756 static void spell_soundfold __ARGS((slang_T *slang, char_u *inword, int folded, char_u *res)); 757 static void spell_soundfold_sofo __ARGS((slang_T *slang, char_u *inword, char_u *res)); 758 static void spell_soundfold_sal __ARGS((slang_T *slang, char_u *inword, char_u *res)); 759 #ifdef FEAT_MBYTE 760 static void spell_soundfold_wsal __ARGS((slang_T *slang, char_u *inword, char_u *res)); 761 #endif 762 static int soundalike_score __ARGS((char_u *goodsound, char_u *badsound)); 763 static int spell_edit_score __ARGS((char_u *badword, char_u *goodword)); 764 static void dump_word __ARGS((char_u *word, int round, int flags, linenr_T lnum)); 765 static linenr_T dump_prefixes __ARGS((slang_T *slang, char_u *word, int round, int flags, linenr_T startlnum)); 766 767 /* 768 * Use our own character-case definitions, because the current locale may 769 * differ from what the .spl file uses. 770 * These must not be called with negative number! 771 */ 772 #ifndef FEAT_MBYTE 773 /* Non-multi-byte implementation. */ 774 # define SPELL_TOFOLD(c) ((c) < 256 ? spelltab.st_fold[c] : (c)) 775 # define SPELL_TOUPPER(c) ((c) < 256 ? spelltab.st_upper[c] : (c)) 776 # define SPELL_ISUPPER(c) ((c) < 256 ? spelltab.st_isu[c] : FALSE) 777 #else 778 # if defined(HAVE_WCHAR_H) 779 # include <wchar.h> /* for towupper() and towlower() */ 780 # endif 781 /* Multi-byte implementation. For Unicode we can call utf_*(), but don't do 782 * that for ASCII, because we don't want to use 'casemap' here. Otherwise use 783 * the "w" library function for characters above 255 if available. */ 784 # ifdef HAVE_TOWLOWER 785 # define SPELL_TOFOLD(c) (enc_utf8 && (c) >= 128 ? utf_fold(c) \ 786 : (c) < 256 ? spelltab.st_fold[c] : towlower(c)) 787 # else 788 # define SPELL_TOFOLD(c) (enc_utf8 && (c) >= 128 ? utf_fold(c) \ 789 : (c) < 256 ? spelltab.st_fold[c] : (c)) 790 # endif 791 792 # ifdef HAVE_TOWUPPER 793 # define SPELL_TOUPPER(c) (enc_utf8 && (c) >= 128 ? utf_toupper(c) \ 794 : (c) < 256 ? spelltab.st_upper[c] : towupper(c)) 795 # else 796 # define SPELL_TOUPPER(c) (enc_utf8 && (c) >= 128 ? utf_toupper(c) \ 797 : (c) < 256 ? spelltab.st_upper[c] : (c)) 798 # endif 799 800 # ifdef HAVE_ISWUPPER 801 # define SPELL_ISUPPER(c) (enc_utf8 && (c) >= 128 ? utf_isupper(c) \ 802 : (c) < 256 ? spelltab.st_isu[c] : iswupper(c)) 803 # else 804 # define SPELL_ISUPPER(c) (enc_utf8 && (c) >= 128 ? utf_isupper(c) \ 805 : (c) < 256 ? spelltab.st_isu[c] : (FALSE)) 806 # endif 807 #endif 808 809 810 static char *e_format = N_("E759: Format error in spell file"); 811 static char *e_spell_trunc = N_("E758: Truncated spell file"); 812 static char *e_afftrailing = N_("Trailing text in %s line %d: %s"); 813 static char *e_affname = N_("Affix name too long in %s line %d: %s"); 814 static char *e_affform = N_("E761: Format error in affix file FOL, LOW or UPP"); 815 static char *e_affrange = N_("E762: Character in FOL, LOW or UPP is out of range"); 816 static char *msg_compressing = N_("Compressing word tree..."); 817 818 /* 819 * Main spell-checking function. 820 * "ptr" points to a character that could be the start of a word. 821 * "*attrp" is set to the highlight index for a badly spelled word. For a 822 * non-word or when it's OK it remains unchanged. 823 * This must only be called when 'spelllang' is not empty. 824 * 825 * "capcol" is used to check for a Capitalised word after the end of a 826 * sentence. If it's zero then perform the check. Return the column where to 827 * check next, or -1 when no sentence end was found. If it's NULL then don't 828 * worry. 829 * 830 * Returns the length of the word in bytes, also when it's OK, so that the 831 * caller can skip over the word. 832 */ 833 int 834 spell_check(wp, ptr, attrp, capcol) 835 win_T *wp; /* current window */ 836 char_u *ptr; 837 hlf_T *attrp; 838 int *capcol; /* column to check for Capital */ 839 { 840 matchinf_T mi; /* Most things are put in "mi" so that it can 841 be passed to functions quickly. */ 842 int nrlen = 0; /* found a number first */ 843 int c; 844 int wrongcaplen = 0; 845 int lpi; 846 847 /* A word never starts at a space or a control character. Return quickly 848 * then, skipping over the character. */ 849 if (*ptr <= ' ') 850 return 1; 851 vim_memset(&mi, 0, sizeof(matchinf_T)); 852 853 /* A number is always OK. Also skip hexadecimal numbers 0xFF99 and 854 * 0X99FF. But when a word character follows do check spelling to find 855 * "3GPP". */ 856 if (*ptr >= '0' && *ptr <= '9') 857 { 858 if (*ptr == '0' && (ptr[1] == 'x' || ptr[1] == 'X')) 859 mi.mi_end = skiphex(ptr + 2); 860 else 861 { 862 mi.mi_end = skipdigits(ptr); 863 nrlen = mi.mi_end - ptr; 864 } 865 if (!spell_iswordp(mi.mi_end, wp->w_buffer)) 866 return (int)(mi.mi_end - ptr); 867 868 /* Try including the digits in the word. */ 869 mi.mi_fend = ptr + nrlen; 870 } 871 else 872 mi.mi_fend = ptr; 873 874 /* Find the normal end of the word (until the next non-word character). */ 875 mi.mi_word = ptr; 876 if (spell_iswordp(mi.mi_fend, wp->w_buffer)) 877 { 878 do 879 { 880 mb_ptr_adv(mi.mi_fend); 881 } while (*mi.mi_fend != NUL && spell_iswordp(mi.mi_fend, wp->w_buffer)); 882 883 if (capcol != NULL && *capcol == 0 && wp->w_buffer->b_cap_prog != NULL) 884 { 885 /* Check word starting with capital letter. */ 886 c = PTR2CHAR(ptr); 887 if (!SPELL_ISUPPER(c)) 888 wrongcaplen = (int)(mi.mi_fend - ptr); 889 } 890 } 891 if (capcol != NULL) 892 *capcol = -1; 893 894 /* We always use the characters up to the next non-word character, 895 * also for bad words. */ 896 mi.mi_end = mi.mi_fend; 897 898 /* Check caps type later. */ 899 mi.mi_buf = wp->w_buffer; 900 901 /* case-fold the word with one non-word character, so that we can check 902 * for the word end. */ 903 if (*mi.mi_fend != NUL) 904 mb_ptr_adv(mi.mi_fend); 905 906 (void)spell_casefold(ptr, (int)(mi.mi_fend - ptr), mi.mi_fword, 907 MAXWLEN + 1); 908 mi.mi_fwordlen = STRLEN(mi.mi_fword); 909 910 /* The word is bad unless we recognize it. */ 911 mi.mi_result = SP_BAD; 912 mi.mi_result2 = SP_BAD; 913 914 /* 915 * Loop over the languages specified in 'spelllang'. 916 * We check them all, because a matching word may be longer than an 917 * already found matching word. 918 */ 919 for (lpi = 0; lpi < wp->w_buffer->b_langp.ga_len; ++lpi) 920 { 921 mi.mi_lp = LANGP_ENTRY(wp->w_buffer->b_langp, lpi); 922 923 /* If reloading fails the language is still in the list but everything 924 * has been cleared. */ 925 if (mi.mi_lp->lp_slang->sl_fidxs == NULL) 926 continue; 927 928 /* Check for a matching word in case-folded words. */ 929 find_word(&mi, FIND_FOLDWORD); 930 931 /* Check for a matching word in keep-case words. */ 932 find_word(&mi, FIND_KEEPWORD); 933 934 /* Check for matching prefixes. */ 935 find_prefix(&mi, FIND_FOLDWORD); 936 937 /* For a NOBREAK language, may want to use a word without a following 938 * word as a backup. */ 939 if (mi.mi_lp->lp_slang->sl_nobreak && mi.mi_result == SP_BAD 940 && mi.mi_result2 != SP_BAD) 941 { 942 mi.mi_result = mi.mi_result2; 943 mi.mi_end = mi.mi_end2; 944 } 945 } 946 947 if (mi.mi_result != SP_OK) 948 { 949 /* If we found a number skip over it. Allows for "42nd". Do flag 950 * rare and local words, e.g., "3GPP". */ 951 if (nrlen > 0) 952 { 953 if (mi.mi_result == SP_BAD || mi.mi_result == SP_BANNED) 954 return nrlen; 955 } 956 957 /* When we are at a non-word character there is no error, just 958 * skip over the character (try looking for a word after it). */ 959 else if (!spell_iswordp_nmw(ptr)) 960 { 961 if (capcol != NULL && wp->w_buffer->b_cap_prog != NULL) 962 { 963 regmatch_T regmatch; 964 965 /* Check for end of sentence. */ 966 regmatch.regprog = wp->w_buffer->b_cap_prog; 967 regmatch.rm_ic = FALSE; 968 if (vim_regexec(®match, ptr, 0)) 969 *capcol = (int)(regmatch.endp[0] - ptr); 970 } 971 972 #ifdef FEAT_MBYTE 973 if (has_mbyte) 974 return (*mb_ptr2len)(ptr); 975 #endif 976 return 1; 977 } 978 else if (mi.mi_end == ptr) 979 /* Always include at least one character. Required for when there 980 * is a mixup in "midword". */ 981 mb_ptr_adv(mi.mi_end); 982 else if (mi.mi_result == SP_BAD 983 && LANGP_ENTRY(wp->w_buffer->b_langp, 0)->lp_slang->sl_nobreak) 984 { 985 char_u *p, *fp; 986 int save_result = mi.mi_result; 987 988 /* First language in 'spelllang' is NOBREAK. Find first position 989 * at which any word would be valid. */ 990 mi.mi_lp = LANGP_ENTRY(wp->w_buffer->b_langp, 0); 991 if (mi.mi_lp->lp_slang->sl_fidxs != NULL) 992 { 993 p = mi.mi_word; 994 fp = mi.mi_fword; 995 for (;;) 996 { 997 mb_ptr_adv(p); 998 mb_ptr_adv(fp); 999 if (p >= mi.mi_end) 1000 break; 1001 mi.mi_compoff = fp - mi.mi_fword; 1002 find_word(&mi, FIND_COMPOUND); 1003 if (mi.mi_result != SP_BAD) 1004 { 1005 mi.mi_end = p; 1006 break; 1007 } 1008 } 1009 mi.mi_result = save_result; 1010 } 1011 } 1012 1013 if (mi.mi_result == SP_BAD || mi.mi_result == SP_BANNED) 1014 *attrp = HLF_SPB; 1015 else if (mi.mi_result == SP_RARE) 1016 *attrp = HLF_SPR; 1017 else 1018 *attrp = HLF_SPL; 1019 } 1020 1021 if (wrongcaplen > 0 && (mi.mi_result == SP_OK || mi.mi_result == SP_RARE)) 1022 { 1023 /* Report SpellCap only when the word isn't badly spelled. */ 1024 *attrp = HLF_SPC; 1025 return wrongcaplen; 1026 } 1027 1028 return (int)(mi.mi_end - ptr); 1029 } 1030 1031 /* 1032 * Check if the word at "mip->mi_word" is in the tree. 1033 * When "mode" is FIND_FOLDWORD check in fold-case word tree. 1034 * When "mode" is FIND_KEEPWORD check in keep-case word tree. 1035 * When "mode" is FIND_PREFIX check for word after prefix in fold-case word 1036 * tree. 1037 * 1038 * For a match mip->mi_result is updated. 1039 */ 1040 static void 1041 find_word(mip, mode) 1042 matchinf_T *mip; 1043 int mode; 1044 { 1045 idx_T arridx = 0; 1046 int endlen[MAXWLEN]; /* length at possible word endings */ 1047 idx_T endidx[MAXWLEN]; /* possible word endings */ 1048 int endidxcnt = 0; 1049 int len; 1050 int wlen = 0; 1051 int flen; 1052 int c; 1053 char_u *ptr; 1054 idx_T lo, hi, m; 1055 #ifdef FEAT_MBYTE 1056 char_u *s; 1057 #endif 1058 char_u *p; 1059 int res = SP_BAD; 1060 slang_T *slang = mip->mi_lp->lp_slang; 1061 unsigned flags; 1062 char_u *byts; 1063 idx_T *idxs; 1064 int word_ends; 1065 int prefix_found; 1066 int nobreak_result; 1067 1068 if (mode == FIND_KEEPWORD || mode == FIND_KEEPCOMPOUND) 1069 { 1070 /* Check for word with matching case in keep-case tree. */ 1071 ptr = mip->mi_word; 1072 flen = 9999; /* no case folding, always enough bytes */ 1073 byts = slang->sl_kbyts; 1074 idxs = slang->sl_kidxs; 1075 1076 if (mode == FIND_KEEPCOMPOUND) 1077 /* Skip over the previously found word(s). */ 1078 wlen += mip->mi_compoff; 1079 } 1080 else 1081 { 1082 /* Check for case-folded in case-folded tree. */ 1083 ptr = mip->mi_fword; 1084 flen = mip->mi_fwordlen; /* available case-folded bytes */ 1085 byts = slang->sl_fbyts; 1086 idxs = slang->sl_fidxs; 1087 1088 if (mode == FIND_PREFIX) 1089 { 1090 /* Skip over the prefix. */ 1091 wlen = mip->mi_prefixlen; 1092 flen -= mip->mi_prefixlen; 1093 } 1094 else if (mode == FIND_COMPOUND) 1095 { 1096 /* Skip over the previously found word(s). */ 1097 wlen = mip->mi_compoff; 1098 flen -= mip->mi_compoff; 1099 } 1100 1101 } 1102 1103 if (byts == NULL) 1104 return; /* array is empty */ 1105 1106 /* 1107 * Repeat advancing in the tree until: 1108 * - there is a byte that doesn't match, 1109 * - we reach the end of the tree, 1110 * - or we reach the end of the line. 1111 */ 1112 for (;;) 1113 { 1114 if (flen <= 0 && *mip->mi_fend != NUL) 1115 flen = fold_more(mip); 1116 1117 len = byts[arridx++]; 1118 1119 /* If the first possible byte is a zero the word could end here. 1120 * Remember this index, we first check for the longest word. */ 1121 if (byts[arridx] == 0) 1122 { 1123 if (endidxcnt == MAXWLEN) 1124 { 1125 /* Must be a corrupted spell file. */ 1126 EMSG(_(e_format)); 1127 return; 1128 } 1129 endlen[endidxcnt] = wlen; 1130 endidx[endidxcnt++] = arridx++; 1131 --len; 1132 1133 /* Skip over the zeros, there can be several flag/region 1134 * combinations. */ 1135 while (len > 0 && byts[arridx] == 0) 1136 { 1137 ++arridx; 1138 --len; 1139 } 1140 if (len == 0) 1141 break; /* no children, word must end here */ 1142 } 1143 1144 /* Stop looking at end of the line. */ 1145 if (ptr[wlen] == NUL) 1146 break; 1147 1148 /* Perform a binary search in the list of accepted bytes. */ 1149 c = ptr[wlen]; 1150 if (c == TAB) /* <Tab> is handled like <Space> */ 1151 c = ' '; 1152 lo = arridx; 1153 hi = arridx + len - 1; 1154 while (lo < hi) 1155 { 1156 m = (lo + hi) / 2; 1157 if (byts[m] > c) 1158 hi = m - 1; 1159 else if (byts[m] < c) 1160 lo = m + 1; 1161 else 1162 { 1163 lo = hi = m; 1164 break; 1165 } 1166 } 1167 1168 /* Stop if there is no matching byte. */ 1169 if (hi < lo || byts[lo] != c) 1170 break; 1171 1172 /* Continue at the child (if there is one). */ 1173 arridx = idxs[lo]; 1174 ++wlen; 1175 --flen; 1176 1177 /* One space in the good word may stand for several spaces in the 1178 * checked word. */ 1179 if (c == ' ') 1180 { 1181 for (;;) 1182 { 1183 if (flen <= 0 && *mip->mi_fend != NUL) 1184 flen = fold_more(mip); 1185 if (ptr[wlen] != ' ' && ptr[wlen] != TAB) 1186 break; 1187 ++wlen; 1188 --flen; 1189 } 1190 } 1191 } 1192 1193 /* 1194 * Verify that one of the possible endings is valid. Try the longest 1195 * first. 1196 */ 1197 while (endidxcnt > 0) 1198 { 1199 --endidxcnt; 1200 arridx = endidx[endidxcnt]; 1201 wlen = endlen[endidxcnt]; 1202 1203 #ifdef FEAT_MBYTE 1204 if ((*mb_head_off)(ptr, ptr + wlen) > 0) 1205 continue; /* not at first byte of character */ 1206 #endif 1207 if (spell_iswordp(ptr + wlen, mip->mi_buf)) 1208 { 1209 if (slang->sl_compprog == NULL && !slang->sl_nobreak) 1210 continue; /* next char is a word character */ 1211 word_ends = FALSE; 1212 } 1213 else 1214 word_ends = TRUE; 1215 /* The prefix flag is before compound flags. Once a valid prefix flag 1216 * has been found we try compound flags. */ 1217 prefix_found = FALSE; 1218 1219 #ifdef FEAT_MBYTE 1220 if (mode != FIND_KEEPWORD && has_mbyte) 1221 { 1222 /* Compute byte length in original word, length may change 1223 * when folding case. This can be slow, take a shortcut when the 1224 * case-folded word is equal to the keep-case word. */ 1225 p = mip->mi_word; 1226 if (STRNCMP(ptr, p, wlen) != 0) 1227 { 1228 for (s = ptr; s < ptr + wlen; mb_ptr_adv(s)) 1229 mb_ptr_adv(p); 1230 wlen = p - mip->mi_word; 1231 } 1232 } 1233 #endif 1234 1235 /* Check flags and region. For FIND_PREFIX check the condition and 1236 * prefix ID. 1237 * Repeat this if there are more flags/region alternatives until there 1238 * is a match. */ 1239 res = SP_BAD; 1240 for (len = byts[arridx - 1]; len > 0 && byts[arridx] == 0; 1241 --len, ++arridx) 1242 { 1243 flags = idxs[arridx]; 1244 1245 /* For the fold-case tree check that the case of the checked word 1246 * matches with what the word in the tree requires. 1247 * For keep-case tree the case is always right. For prefixes we 1248 * don't bother to check. */ 1249 if (mode == FIND_FOLDWORD) 1250 { 1251 if (mip->mi_cend != mip->mi_word + wlen) 1252 { 1253 /* mi_capflags was set for a different word length, need 1254 * to do it again. */ 1255 mip->mi_cend = mip->mi_word + wlen; 1256 mip->mi_capflags = captype(mip->mi_word, mip->mi_cend); 1257 } 1258 1259 if (mip->mi_capflags == WF_KEEPCAP 1260 || !spell_valid_case(mip->mi_capflags, flags)) 1261 continue; 1262 } 1263 1264 /* When mode is FIND_PREFIX the word must support the prefix: 1265 * check the prefix ID and the condition. Do that for the list at 1266 * mip->mi_prefarridx that find_prefix() filled. */ 1267 else if (mode == FIND_PREFIX && !prefix_found) 1268 { 1269 c = valid_word_prefix(mip->mi_prefcnt, mip->mi_prefarridx, 1270 flags, 1271 mip->mi_word + mip->mi_cprefixlen, slang, 1272 FALSE); 1273 if (c == 0) 1274 continue; 1275 1276 /* Use the WF_RARE flag for a rare prefix. */ 1277 if (c & WF_RAREPFX) 1278 flags |= WF_RARE; 1279 prefix_found = TRUE; 1280 } 1281 1282 if (slang->sl_nobreak) 1283 { 1284 if ((mode == FIND_COMPOUND || mode == FIND_KEEPCOMPOUND) 1285 && (flags & WF_BANNED) == 0) 1286 { 1287 /* NOBREAK: found a valid following word. That's all we 1288 * need to know, so return. */ 1289 mip->mi_result = SP_OK; 1290 break; 1291 } 1292 } 1293 1294 else if ((mode == FIND_COMPOUND || mode == FIND_KEEPCOMPOUND 1295 || !word_ends)) 1296 { 1297 /* If there is no flag or the word is shorter than 1298 * COMPOUNDMIN reject it quickly. 1299 * Makes you wonder why someone puts a compound flag on a word 1300 * that's too short... Myspell compatibility requires this 1301 * anyway. */ 1302 if (((unsigned)flags >> 24) == 0 1303 || wlen - mip->mi_compoff < slang->sl_compminlen) 1304 continue; 1305 #ifdef FEAT_MBYTE 1306 /* For multi-byte chars check character length against 1307 * COMPOUNDMIN. */ 1308 if (has_mbyte 1309 && slang->sl_compminlen > 0 1310 && mb_charlen_len(mip->mi_word + mip->mi_compoff, 1311 wlen - mip->mi_compoff) < slang->sl_compminlen) 1312 continue; 1313 #endif 1314 1315 /* Limit the number of compound words to COMPOUNDMAX if no 1316 * maximum for syllables is specified. */ 1317 if (!word_ends && mip->mi_complen + 2 > slang->sl_compmax 1318 && slang->sl_compsylmax == MAXWLEN) 1319 continue; 1320 1321 /* Quickly check if compounding is possible with this flag. */ 1322 if (!byte_in_str(mip->mi_complen == 0 1323 ? slang->sl_compstartflags 1324 : slang->sl_compallflags, 1325 ((unsigned)flags >> 24))) 1326 continue; 1327 1328 if (mode == FIND_COMPOUND) 1329 { 1330 int capflags; 1331 1332 /* Need to check the caps type of the appended compound 1333 * word. */ 1334 #ifdef FEAT_MBYTE 1335 if (has_mbyte && STRNCMP(ptr, mip->mi_word, 1336 mip->mi_compoff) != 0) 1337 { 1338 /* case folding may have changed the length */ 1339 p = mip->mi_word; 1340 for (s = ptr; s < ptr + mip->mi_compoff; mb_ptr_adv(s)) 1341 mb_ptr_adv(p); 1342 } 1343 else 1344 #endif 1345 p = mip->mi_word + mip->mi_compoff; 1346 capflags = captype(p, mip->mi_word + wlen); 1347 if (capflags == WF_KEEPCAP || (capflags == WF_ALLCAP 1348 && (flags & WF_FIXCAP) != 0)) 1349 continue; 1350 1351 if (capflags != WF_ALLCAP) 1352 { 1353 /* When the character before the word is a word 1354 * character we do not accept a Onecap word. We do 1355 * accept a no-caps word, even when the dictionary 1356 * word specifies ONECAP. */ 1357 mb_ptr_back(mip->mi_word, p); 1358 if (spell_iswordp_nmw(p) 1359 ? capflags == WF_ONECAP 1360 : (flags & WF_ONECAP) != 0 1361 && capflags != WF_ONECAP) 1362 continue; 1363 } 1364 } 1365 1366 /* If the word ends the sequence of compound flags of the 1367 * words must match with one of the COMPOUNDFLAGS items and 1368 * the number of syllables must not be too large. */ 1369 mip->mi_compflags[mip->mi_complen] = ((unsigned)flags >> 24); 1370 mip->mi_compflags[mip->mi_complen + 1] = NUL; 1371 if (word_ends) 1372 { 1373 char_u fword[MAXWLEN]; 1374 1375 if (slang->sl_compsylmax < MAXWLEN) 1376 { 1377 /* "fword" is only needed for checking syllables. */ 1378 if (ptr == mip->mi_word) 1379 (void)spell_casefold(ptr, wlen, fword, MAXWLEN); 1380 else 1381 vim_strncpy(fword, ptr, endlen[endidxcnt]); 1382 } 1383 if (!can_compound(slang, fword, mip->mi_compflags)) 1384 continue; 1385 } 1386 } 1387 1388 /* Check NEEDCOMPOUND: can't use word without compounding. */ 1389 else if (flags & WF_NEEDCOMP) 1390 continue; 1391 1392 nobreak_result = SP_OK; 1393 1394 if (!word_ends) 1395 { 1396 int save_result = mip->mi_result; 1397 char_u *save_end = mip->mi_end; 1398 langp_T *save_lp = mip->mi_lp; 1399 int lpi; 1400 1401 /* Check that a valid word follows. If there is one and we 1402 * are compounding, it will set "mi_result", thus we are 1403 * always finished here. For NOBREAK we only check that a 1404 * valid word follows. 1405 * Recursive! */ 1406 if (slang->sl_nobreak) 1407 mip->mi_result = SP_BAD; 1408 1409 /* Find following word in case-folded tree. */ 1410 mip->mi_compoff = endlen[endidxcnt]; 1411 #ifdef FEAT_MBYTE 1412 if (has_mbyte && mode == FIND_KEEPWORD) 1413 { 1414 /* Compute byte length in case-folded word from "wlen": 1415 * byte length in keep-case word. Length may change when 1416 * folding case. This can be slow, take a shortcut when 1417 * the case-folded word is equal to the keep-case word. */ 1418 p = mip->mi_fword; 1419 if (STRNCMP(ptr, p, wlen) != 0) 1420 { 1421 for (s = ptr; s < ptr + wlen; mb_ptr_adv(s)) 1422 mb_ptr_adv(p); 1423 mip->mi_compoff = p - mip->mi_fword; 1424 } 1425 } 1426 #endif 1427 c = mip->mi_compoff; 1428 ++mip->mi_complen; 1429 1430 /* For NOBREAK we need to try all NOBREAK languages, at least 1431 * to find the ".add" file(s). */ 1432 for (lpi = 0; lpi < mip->mi_buf->b_langp.ga_len; ++lpi) 1433 { 1434 if (slang->sl_nobreak) 1435 { 1436 mip->mi_lp = LANGP_ENTRY(mip->mi_buf->b_langp, lpi); 1437 if (mip->mi_lp->lp_slang->sl_fidxs == NULL 1438 || !mip->mi_lp->lp_slang->sl_nobreak) 1439 continue; 1440 } 1441 1442 find_word(mip, FIND_COMPOUND); 1443 1444 /* When NOBREAK any word that matches is OK. Otherwise we 1445 * need to find the longest match, thus try with keep-case 1446 * and prefix too. */ 1447 if (!slang->sl_nobreak || mip->mi_result == SP_BAD) 1448 { 1449 /* Find following word in keep-case tree. */ 1450 mip->mi_compoff = wlen; 1451 find_word(mip, FIND_KEEPCOMPOUND); 1452 1453 if (!slang->sl_nobreak || mip->mi_result == SP_BAD) 1454 { 1455 /* Check for following word with prefix. */ 1456 mip->mi_compoff = c; 1457 find_prefix(mip, FIND_COMPOUND); 1458 } 1459 } 1460 1461 if (!slang->sl_nobreak) 1462 break; 1463 } 1464 --mip->mi_complen; 1465 mip->mi_lp = save_lp; 1466 1467 if (slang->sl_nobreak) 1468 { 1469 nobreak_result = mip->mi_result; 1470 mip->mi_result = save_result; 1471 mip->mi_end = save_end; 1472 } 1473 else 1474 { 1475 if (mip->mi_result == SP_OK) 1476 break; 1477 continue; 1478 } 1479 } 1480 1481 if (flags & WF_BANNED) 1482 res = SP_BANNED; 1483 else if (flags & WF_REGION) 1484 { 1485 /* Check region. */ 1486 if ((mip->mi_lp->lp_region & (flags >> 16)) != 0) 1487 res = SP_OK; 1488 else 1489 res = SP_LOCAL; 1490 } 1491 else if (flags & WF_RARE) 1492 res = SP_RARE; 1493 else 1494 res = SP_OK; 1495 1496 /* Always use the longest match and the best result. For NOBREAK 1497 * we separately keep the longest match without a following good 1498 * word as a fall-back. */ 1499 if (nobreak_result == SP_BAD) 1500 { 1501 if (mip->mi_result2 > res) 1502 { 1503 mip->mi_result2 = res; 1504 mip->mi_end2 = mip->mi_word + wlen; 1505 } 1506 else if (mip->mi_result2 == res 1507 && mip->mi_end2 < mip->mi_word + wlen) 1508 mip->mi_end2 = mip->mi_word + wlen; 1509 } 1510 else if (mip->mi_result > res) 1511 { 1512 mip->mi_result = res; 1513 mip->mi_end = mip->mi_word + wlen; 1514 } 1515 else if (mip->mi_result == res && mip->mi_end < mip->mi_word + wlen) 1516 mip->mi_end = mip->mi_word + wlen; 1517 1518 if (mip->mi_result == SP_OK) 1519 break; 1520 } 1521 1522 if (mip->mi_result == SP_OK) 1523 break; 1524 } 1525 } 1526 1527 /* 1528 * Return TRUE if "flags" is a valid sequence of compound flags and 1529 * "word[len]" does not have too many syllables. 1530 */ 1531 static int 1532 can_compound(slang, word, flags) 1533 slang_T *slang; 1534 char_u *word; 1535 char_u *flags; 1536 { 1537 regmatch_T regmatch; 1538 #ifdef FEAT_MBYTE 1539 char_u uflags[MAXWLEN * 2]; 1540 int i; 1541 #endif 1542 char_u *p; 1543 1544 if (slang->sl_compprog == NULL) 1545 return FALSE; 1546 #ifdef FEAT_MBYTE 1547 if (enc_utf8) 1548 { 1549 /* Need to convert the single byte flags to utf8 characters. */ 1550 p = uflags; 1551 for (i = 0; flags[i] != NUL; ++i) 1552 p += mb_char2bytes(flags[i], p); 1553 *p = NUL; 1554 p = uflags; 1555 } 1556 else 1557 #endif 1558 p = flags; 1559 regmatch.regprog = slang->sl_compprog; 1560 regmatch.rm_ic = FALSE; 1561 if (!vim_regexec(®match, p, 0)) 1562 return FALSE; 1563 1564 /* Count the number of syllables. This may be slow, do it last. If there 1565 * are too many syllables AND the number of compound words is above 1566 * COMPOUNDMAX then compounding is not allowed. */ 1567 if (slang->sl_compsylmax < MAXWLEN 1568 && count_syllables(slang, word) > slang->sl_compsylmax) 1569 return (int)STRLEN(flags) < slang->sl_compmax; 1570 return TRUE; 1571 } 1572 1573 /* 1574 * Return non-zero if the prefix indicated by "arridx" matches with the prefix 1575 * ID in "flags" for the word "word". 1576 * The WF_RAREPFX flag is included in the return value for a rare prefix. 1577 */ 1578 static int 1579 valid_word_prefix(totprefcnt, arridx, flags, word, slang, cond_req) 1580 int totprefcnt; /* nr of prefix IDs */ 1581 int arridx; /* idx in sl_pidxs[] */ 1582 int flags; 1583 char_u *word; 1584 slang_T *slang; 1585 int cond_req; /* only use prefixes with a condition */ 1586 { 1587 int prefcnt; 1588 int pidx; 1589 regprog_T *rp; 1590 regmatch_T regmatch; 1591 int prefid; 1592 1593 prefid = (unsigned)flags >> 24; 1594 for (prefcnt = totprefcnt - 1; prefcnt >= 0; --prefcnt) 1595 { 1596 pidx = slang->sl_pidxs[arridx + prefcnt]; 1597 1598 /* Check the prefix ID. */ 1599 if (prefid != (pidx & 0xff)) 1600 continue; 1601 1602 /* Check if the prefix doesn't combine and the word already has a 1603 * suffix. */ 1604 if ((flags & WF_HAS_AFF) && (pidx & WF_PFX_NC)) 1605 continue; 1606 1607 /* Check the condition, if there is one. The condition index is 1608 * stored in the two bytes above the prefix ID byte. */ 1609 rp = slang->sl_prefprog[((unsigned)pidx >> 8) & 0xffff]; 1610 if (rp != NULL) 1611 { 1612 regmatch.regprog = rp; 1613 regmatch.rm_ic = FALSE; 1614 if (!vim_regexec(®match, word, 0)) 1615 continue; 1616 } 1617 else if (cond_req) 1618 continue; 1619 1620 /* It's a match! Return the WF_ flags. */ 1621 return pidx; 1622 } 1623 return 0; 1624 } 1625 1626 /* 1627 * Check if the word at "mip->mi_word" has a matching prefix. 1628 * If it does, then check the following word. 1629 * 1630 * If "mode" is "FIND_COMPOUND" then do the same after another word, find a 1631 * prefix in a compound word. 1632 * 1633 * For a match mip->mi_result is updated. 1634 */ 1635 static void 1636 find_prefix(mip, mode) 1637 matchinf_T *mip; 1638 int mode; 1639 { 1640 idx_T arridx = 0; 1641 int len; 1642 int wlen = 0; 1643 int flen; 1644 int c; 1645 char_u *ptr; 1646 idx_T lo, hi, m; 1647 slang_T *slang = mip->mi_lp->lp_slang; 1648 char_u *byts; 1649 idx_T *idxs; 1650 1651 byts = slang->sl_pbyts; 1652 if (byts == NULL) 1653 return; /* array is empty */ 1654 1655 /* We use the case-folded word here, since prefixes are always 1656 * case-folded. */ 1657 ptr = mip->mi_fword; 1658 flen = mip->mi_fwordlen; /* available case-folded bytes */ 1659 if (mode == FIND_COMPOUND) 1660 { 1661 /* Skip over the previously found word(s). */ 1662 ptr += mip->mi_compoff; 1663 flen -= mip->mi_compoff; 1664 } 1665 idxs = slang->sl_pidxs; 1666 1667 /* 1668 * Repeat advancing in the tree until: 1669 * - there is a byte that doesn't match, 1670 * - we reach the end of the tree, 1671 * - or we reach the end of the line. 1672 */ 1673 for (;;) 1674 { 1675 if (flen == 0 && *mip->mi_fend != NUL) 1676 flen = fold_more(mip); 1677 1678 len = byts[arridx++]; 1679 1680 /* If the first possible byte is a zero the prefix could end here. 1681 * Check if the following word matches and supports the prefix. */ 1682 if (byts[arridx] == 0) 1683 { 1684 /* There can be several prefixes with different conditions. We 1685 * try them all, since we don't know which one will give the 1686 * longest match. The word is the same each time, pass the list 1687 * of possible prefixes to find_word(). */ 1688 mip->mi_prefarridx = arridx; 1689 mip->mi_prefcnt = len; 1690 while (len > 0 && byts[arridx] == 0) 1691 { 1692 ++arridx; 1693 --len; 1694 } 1695 mip->mi_prefcnt -= len; 1696 1697 /* Find the word that comes after the prefix. */ 1698 mip->mi_prefixlen = wlen; 1699 if (mode == FIND_COMPOUND) 1700 /* Skip over the previously found word(s). */ 1701 mip->mi_prefixlen += mip->mi_compoff; 1702 1703 #ifdef FEAT_MBYTE 1704 if (has_mbyte) 1705 { 1706 /* Case-folded length may differ from original length. */ 1707 mip->mi_cprefixlen = nofold_len(mip->mi_fword, 1708 mip->mi_prefixlen, mip->mi_word); 1709 } 1710 else 1711 mip->mi_cprefixlen = mip->mi_prefixlen; 1712 #endif 1713 find_word(mip, FIND_PREFIX); 1714 1715 1716 if (len == 0) 1717 break; /* no children, word must end here */ 1718 } 1719 1720 /* Stop looking at end of the line. */ 1721 if (ptr[wlen] == NUL) 1722 break; 1723 1724 /* Perform a binary search in the list of accepted bytes. */ 1725 c = ptr[wlen]; 1726 lo = arridx; 1727 hi = arridx + len - 1; 1728 while (lo < hi) 1729 { 1730 m = (lo + hi) / 2; 1731 if (byts[m] > c) 1732 hi = m - 1; 1733 else if (byts[m] < c) 1734 lo = m + 1; 1735 else 1736 { 1737 lo = hi = m; 1738 break; 1739 } 1740 } 1741 1742 /* Stop if there is no matching byte. */ 1743 if (hi < lo || byts[lo] != c) 1744 break; 1745 1746 /* Continue at the child (if there is one). */ 1747 arridx = idxs[lo]; 1748 ++wlen; 1749 --flen; 1750 } 1751 } 1752 1753 /* 1754 * Need to fold at least one more character. Do until next non-word character 1755 * for efficiency. 1756 * Return the length of the folded chars in bytes. 1757 */ 1758 static int 1759 fold_more(mip) 1760 matchinf_T *mip; 1761 { 1762 int flen; 1763 char_u *p; 1764 1765 p = mip->mi_fend; 1766 do 1767 { 1768 mb_ptr_adv(mip->mi_fend); 1769 } while (*mip->mi_fend != NUL && spell_iswordp(mip->mi_fend, mip->mi_buf)); 1770 1771 /* Include the non-word character so that we can check for the 1772 * word end. */ 1773 if (*mip->mi_fend != NUL) 1774 mb_ptr_adv(mip->mi_fend); 1775 1776 (void)spell_casefold(p, (int)(mip->mi_fend - p), 1777 mip->mi_fword + mip->mi_fwordlen, 1778 MAXWLEN - mip->mi_fwordlen); 1779 flen = STRLEN(mip->mi_fword + mip->mi_fwordlen); 1780 mip->mi_fwordlen += flen; 1781 return flen; 1782 } 1783 1784 /* 1785 * Check case flags for a word. Return TRUE if the word has the requested 1786 * case. 1787 */ 1788 static int 1789 spell_valid_case(wordflags, treeflags) 1790 int wordflags; /* flags for the checked word. */ 1791 int treeflags; /* flags for the word in the spell tree */ 1792 { 1793 return ((wordflags == WF_ALLCAP && (treeflags & WF_FIXCAP) == 0) 1794 || ((treeflags & (WF_ALLCAP | WF_KEEPCAP)) == 0 1795 && ((treeflags & WF_ONECAP) == 0 1796 || (wordflags & WF_ONECAP) != 0))); 1797 } 1798 1799 /* 1800 * Return TRUE if spell checking is not enabled. 1801 */ 1802 static int 1803 no_spell_checking(wp) 1804 win_T *wp; 1805 { 1806 if (!wp->w_p_spell || *wp->w_buffer->b_p_spl == NUL) 1807 { 1808 EMSG(_("E756: Spell checking is not enabled")); 1809 return TRUE; 1810 } 1811 return FALSE; 1812 } 1813 1814 /* 1815 * Move to next spell error. 1816 * "curline" is FALSE for "[s", "]s", "[S" and "]S". 1817 * "curline" is TRUE to find word under/after cursor in the same line. 1818 * For Insert mode completion "dir" is BACKWARD and "curline" is TRUE: move 1819 * to after badly spelled word before the cursor. 1820 * Return 0 if not found, length of the badly spelled word otherwise. 1821 */ 1822 int 1823 spell_move_to(wp, dir, allwords, curline, attrp) 1824 win_T *wp; 1825 int dir; /* FORWARD or BACKWARD */ 1826 int allwords; /* TRUE for "[s"/"]s", FALSE for "[S"/"]S" */ 1827 int curline; 1828 hlf_T *attrp; /* return: attributes of bad word or NULL 1829 (only when "dir" is FORWARD) */ 1830 { 1831 linenr_T lnum; 1832 pos_T found_pos; 1833 int found_len = 0; 1834 char_u *line; 1835 char_u *p; 1836 char_u *endp; 1837 hlf_T attr; 1838 int len; 1839 int has_syntax = syntax_present(wp->w_buffer); 1840 int col; 1841 int can_spell; 1842 char_u *buf = NULL; 1843 int buflen = 0; 1844 int skip = 0; 1845 int capcol = -1; 1846 int found_one = FALSE; 1847 int wrapped = FALSE; 1848 1849 if (no_spell_checking(wp)) 1850 return 0; 1851 1852 /* 1853 * Start looking for bad word at the start of the line, because we can't 1854 * start halfway a word, we don't know where the it starts or ends. 1855 * 1856 * When searching backwards, we continue in the line to find the last 1857 * bad word (in the cursor line: before the cursor). 1858 * 1859 * We concatenate the start of the next line, so that wrapped words work 1860 * (e.g. "et<line-break>cetera"). Doesn't work when searching backwards 1861 * though... 1862 */ 1863 lnum = wp->w_cursor.lnum; 1864 found_pos.lnum = 0; 1865 1866 while (!got_int) 1867 { 1868 line = ml_get_buf(wp->w_buffer, lnum, FALSE); 1869 1870 len = STRLEN(line); 1871 if (buflen < len + MAXWLEN + 2) 1872 { 1873 vim_free(buf); 1874 buflen = len + MAXWLEN + 2; 1875 buf = alloc(buflen); 1876 if (buf == NULL) 1877 break; 1878 } 1879 1880 /* In first line check first word for Capital. */ 1881 if (lnum == 1) 1882 capcol = 0; 1883 1884 /* For checking first word with a capital skip white space. */ 1885 if (capcol == 0) 1886 capcol = skipwhite(line) - line; 1887 1888 /* Copy the line into "buf" and append the start of the next line if 1889 * possible. */ 1890 STRCPY(buf, line); 1891 if (lnum < wp->w_buffer->b_ml.ml_line_count) 1892 spell_cat_line(buf + STRLEN(buf), ml_get(lnum + 1), MAXWLEN); 1893 1894 p = buf + skip; 1895 endp = buf + len; 1896 while (p < endp) 1897 { 1898 /* When searching backward don't search after the cursor. Unless 1899 * we wrapped around the end of the buffer. */ 1900 if (dir == BACKWARD 1901 && lnum == wp->w_cursor.lnum 1902 && !wrapped 1903 && (colnr_T)(p - buf) >= wp->w_cursor.col) 1904 break; 1905 1906 /* start of word */ 1907 attr = HLF_COUNT; 1908 len = spell_check(wp, p, &attr, &capcol); 1909 1910 if (attr != HLF_COUNT) 1911 { 1912 /* We found a bad word. Check the attribute. */ 1913 if (allwords || attr == HLF_SPB) 1914 { 1915 found_one = TRUE; 1916 1917 /* When searching forward only accept a bad word after 1918 * the cursor. */ 1919 if (dir == BACKWARD 1920 || lnum != wp->w_cursor.lnum 1921 || (lnum == wp->w_cursor.lnum 1922 && (wrapped 1923 || (colnr_T)(curline ? p - buf + len 1924 : p - buf) 1925 > wp->w_cursor.col))) 1926 { 1927 if (has_syntax) 1928 { 1929 col = p - buf; 1930 (void)syn_get_id(wp, lnum, (colnr_T)col, 1931 FALSE, &can_spell); 1932 } 1933 else 1934 can_spell = TRUE; 1935 1936 if (can_spell) 1937 { 1938 found_pos.lnum = lnum; 1939 found_pos.col = p - buf; 1940 #ifdef FEAT_VIRTUALEDIT 1941 found_pos.coladd = 0; 1942 #endif 1943 if (dir == FORWARD) 1944 { 1945 /* No need to search further. */ 1946 wp->w_cursor = found_pos; 1947 vim_free(buf); 1948 if (attrp != NULL) 1949 *attrp = attr; 1950 return len; 1951 } 1952 else if (curline) 1953 /* Insert mode completion: put cursor after 1954 * the bad word. */ 1955 found_pos.col += len; 1956 found_len = len; 1957 } 1958 } 1959 } 1960 } 1961 1962 /* advance to character after the word */ 1963 p += len; 1964 capcol -= len; 1965 } 1966 1967 if (dir == BACKWARD && found_pos.lnum != 0) 1968 { 1969 /* Use the last match in the line (before the cursor). */ 1970 wp->w_cursor = found_pos; 1971 vim_free(buf); 1972 return found_len; 1973 } 1974 1975 if (curline) 1976 break; /* only check cursor line */ 1977 1978 /* Advance to next line. */ 1979 if (dir == BACKWARD) 1980 { 1981 /* If we are back at the starting line and searched it again there 1982 * is no match, give up. */ 1983 if (lnum == wp->w_cursor.lnum && wrapped) 1984 break; 1985 1986 if (lnum > 1) 1987 --lnum; 1988 else if (!p_ws) 1989 break; /* at first line and 'nowrapscan' */ 1990 else 1991 { 1992 /* Wrap around to the end of the buffer. May search the 1993 * starting line again and accept the last match. */ 1994 lnum = wp->w_buffer->b_ml.ml_line_count; 1995 wrapped = TRUE; 1996 if (!shortmess(SHM_SEARCH)) 1997 give_warning((char_u *)_(top_bot_msg), TRUE); 1998 } 1999 capcol = -1; 2000 } 2001 else 2002 { 2003 if (lnum < wp->w_buffer->b_ml.ml_line_count) 2004 ++lnum; 2005 else if (!p_ws) 2006 break; /* at first line and 'nowrapscan' */ 2007 else 2008 { 2009 /* Wrap around to the start of the buffer. May search the 2010 * starting line again and accept the first match. */ 2011 lnum = 1; 2012 wrapped = TRUE; 2013 if (!shortmess(SHM_SEARCH)) 2014 give_warning((char_u *)_(bot_top_msg), TRUE); 2015 } 2016 2017 /* If we are back at the starting line and there is no match then 2018 * give up. */ 2019 if (lnum == wp->w_cursor.lnum && !found_one) 2020 break; 2021 2022 /* Skip the characters at the start of the next line that were 2023 * included in a match crossing line boundaries. */ 2024 if (attr == HLF_COUNT) 2025 skip = p - endp; 2026 else 2027 skip = 0; 2028 2029 /* Capscol skips over the inserted space. */ 2030 --capcol; 2031 2032 /* But after empty line check first word in next line */ 2033 if (*skipwhite(line) == NUL) 2034 capcol = 0; 2035 } 2036 2037 line_breakcheck(); 2038 } 2039 2040 vim_free(buf); 2041 return 0; 2042 } 2043 2044 /* 2045 * For spell checking: concatenate the start of the following line "line" into 2046 * "buf", blanking-out special characters. Copy less then "maxlen" bytes. 2047 */ 2048 void 2049 spell_cat_line(buf, line, maxlen) 2050 char_u *buf; 2051 char_u *line; 2052 int maxlen; 2053 { 2054 char_u *p; 2055 int n; 2056 2057 p = skipwhite(line); 2058 while (vim_strchr((char_u *)"*#/\"\t", *p) != NULL) 2059 p = skipwhite(p + 1); 2060 2061 if (*p != NUL) 2062 { 2063 *buf = ' '; 2064 vim_strncpy(buf + 1, line, maxlen - 2); 2065 n = p - line; 2066 if (n >= maxlen) 2067 n = maxlen - 1; 2068 vim_memset(buf + 1, ' ', n); 2069 } 2070 } 2071 2072 typedef struct spelload_S 2073 { 2074 char_u sl_lang[MAXWLEN + 1]; /* language name */ 2075 slang_T *sl_slang; /* resulting slang_T struct */ 2076 int sl_nobreak; /* NOBREAK language found */ 2077 } spelload_T; 2078 2079 /* 2080 * Load word list(s) for "lang" from Vim spell file(s). 2081 * "lang" must be the language without the region: e.g., "en". 2082 */ 2083 static void 2084 spell_load_lang(lang) 2085 char_u *lang; 2086 { 2087 char_u fname_enc[85]; 2088 int r; 2089 spelload_T sl; 2090 2091 /* Copy the language name to pass it to spell_load_cb() as a cookie. 2092 * It's truncated when an error is detected. */ 2093 STRCPY(sl.sl_lang, lang); 2094 sl.sl_slang = NULL; 2095 sl.sl_nobreak = FALSE; 2096 2097 /* 2098 * Find the first spell file for "lang" in 'runtimepath' and load it. 2099 */ 2100 vim_snprintf((char *)fname_enc, sizeof(fname_enc) - 5, 2101 "spell/%s.%s.spl", lang, spell_enc()); 2102 r = do_in_runtimepath(fname_enc, FALSE, spell_load_cb, &sl); 2103 2104 if (r == FAIL && *sl.sl_lang != NUL) 2105 { 2106 /* Try loading the ASCII version. */ 2107 vim_snprintf((char *)fname_enc, sizeof(fname_enc) - 5, 2108 "spell/%s.ascii.spl", lang); 2109 r = do_in_runtimepath(fname_enc, FALSE, spell_load_cb, &sl); 2110 } 2111 2112 if (r == FAIL) 2113 smsg((char_u *)_("Warning: Cannot find word list \"%s.%s.spl\" or \"%s.ascii.spl\""), 2114 lang, spell_enc(), lang); 2115 else if (sl.sl_slang != NULL) 2116 { 2117 /* At least one file was loaded, now load all the additions. */ 2118 STRCPY(fname_enc + STRLEN(fname_enc) - 3, "add.spl"); 2119 do_in_runtimepath(fname_enc, TRUE, spell_load_cb, &sl); 2120 } 2121 } 2122 2123 /* 2124 * Return the encoding used for spell checking: Use 'encoding', except that we 2125 * use "latin1" for "latin9". And limit to 60 characters (just in case). 2126 */ 2127 static char_u * 2128 spell_enc() 2129 { 2130 2131 #ifdef FEAT_MBYTE 2132 if (STRLEN(p_enc) < 60 && STRCMP(p_enc, "iso-8859-15") != 0) 2133 return p_enc; 2134 #endif 2135 return (char_u *)"latin1"; 2136 } 2137 2138 /* 2139 * Get the name of the .spl file for the internal wordlist into 2140 * "fname[MAXPATHL]". 2141 */ 2142 static void 2143 int_wordlist_spl(fname) 2144 char_u *fname; 2145 { 2146 vim_snprintf((char *)fname, MAXPATHL, "%s.%s.spl", 2147 int_wordlist, spell_enc()); 2148 } 2149 2150 /* 2151 * Allocate a new slang_T. 2152 * Caller must fill "sl_next". 2153 */ 2154 static slang_T * 2155 slang_alloc(lang) 2156 char_u *lang; 2157 { 2158 slang_T *lp; 2159 2160 lp = (slang_T *)alloc_clear(sizeof(slang_T)); 2161 if (lp != NULL) 2162 { 2163 lp->sl_name = vim_strsave(lang); 2164 ga_init2(&lp->sl_rep, sizeof(fromto_T), 10); 2165 lp->sl_compmax = MAXWLEN; 2166 lp->sl_compsylmax = MAXWLEN; 2167 } 2168 return lp; 2169 } 2170 2171 /* 2172 * Free the contents of an slang_T and the structure itself. 2173 */ 2174 static void 2175 slang_free(lp) 2176 slang_T *lp; 2177 { 2178 vim_free(lp->sl_name); 2179 vim_free(lp->sl_fname); 2180 slang_clear(lp); 2181 vim_free(lp); 2182 } 2183 2184 /* 2185 * Clear an slang_T so that the file can be reloaded. 2186 */ 2187 static void 2188 slang_clear(lp) 2189 slang_T *lp; 2190 { 2191 garray_T *gap; 2192 fromto_T *ftp; 2193 salitem_T *smp; 2194 int i; 2195 2196 vim_free(lp->sl_fbyts); 2197 lp->sl_fbyts = NULL; 2198 vim_free(lp->sl_kbyts); 2199 lp->sl_kbyts = NULL; 2200 vim_free(lp->sl_pbyts); 2201 lp->sl_pbyts = NULL; 2202 2203 vim_free(lp->sl_fidxs); 2204 lp->sl_fidxs = NULL; 2205 vim_free(lp->sl_kidxs); 2206 lp->sl_kidxs = NULL; 2207 vim_free(lp->sl_pidxs); 2208 lp->sl_pidxs = NULL; 2209 2210 gap = &lp->sl_rep; 2211 while (gap->ga_len > 0) 2212 { 2213 ftp = &((fromto_T *)gap->ga_data)[--gap->ga_len]; 2214 vim_free(ftp->ft_from); 2215 vim_free(ftp->ft_to); 2216 } 2217 ga_clear(gap); 2218 2219 gap = &lp->sl_sal; 2220 if (lp->sl_sofo) 2221 { 2222 /* "ga_len" is set to 1 without adding an item for latin1 */ 2223 if (gap->ga_data != NULL) 2224 /* SOFOFROM and SOFOTO items: free lists of wide characters. */ 2225 for (i = 0; i < gap->ga_len; ++i) 2226 vim_free(((int **)gap->ga_data)[i]); 2227 } 2228 else 2229 /* SAL items: free salitem_T items */ 2230 while (gap->ga_len > 0) 2231 { 2232 smp = &((salitem_T *)gap->ga_data)[--gap->ga_len]; 2233 vim_free(smp->sm_lead); 2234 /* Don't free sm_oneof and sm_rules, they point into sm_lead. */ 2235 vim_free(smp->sm_to); 2236 #ifdef FEAT_MBYTE 2237 vim_free(smp->sm_lead_w); 2238 vim_free(smp->sm_oneof_w); 2239 vim_free(smp->sm_to_w); 2240 #endif 2241 } 2242 ga_clear(gap); 2243 2244 for (i = 0; i < lp->sl_prefixcnt; ++i) 2245 vim_free(lp->sl_prefprog[i]); 2246 lp->sl_prefixcnt = 0; 2247 vim_free(lp->sl_prefprog); 2248 lp->sl_prefprog = NULL; 2249 2250 vim_free(lp->sl_midword); 2251 lp->sl_midword = NULL; 2252 2253 vim_free(lp->sl_compprog); 2254 vim_free(lp->sl_compstartflags); 2255 vim_free(lp->sl_compallflags); 2256 lp->sl_compprog = NULL; 2257 lp->sl_compstartflags = NULL; 2258 lp->sl_compallflags = NULL; 2259 2260 vim_free(lp->sl_syllable); 2261 lp->sl_syllable = NULL; 2262 ga_clear(&lp->sl_syl_items); 2263 2264 #ifdef FEAT_MBYTE 2265 { 2266 int todo = lp->sl_map_hash.ht_used; 2267 hashitem_T *hi; 2268 2269 for (hi = lp->sl_map_hash.ht_array; todo > 0; ++hi) 2270 if (!HASHITEM_EMPTY(hi)) 2271 { 2272 --todo; 2273 vim_free(hi->hi_key); 2274 } 2275 } 2276 hash_clear(&lp->sl_map_hash); 2277 #endif 2278 2279 lp->sl_compmax = MAXWLEN; 2280 lp->sl_compminlen = 0; 2281 lp->sl_compsylmax = MAXWLEN; 2282 lp->sl_regions[0] = NUL; 2283 } 2284 2285 /* 2286 * Load one spell file and store the info into a slang_T. 2287 * Invoked through do_in_runtimepath(). 2288 */ 2289 static void 2290 spell_load_cb(fname, cookie) 2291 char_u *fname; 2292 void *cookie; 2293 { 2294 spelload_T *slp = (spelload_T *)cookie; 2295 slang_T *slang; 2296 2297 slang = spell_load_file(fname, slp->sl_lang, NULL, FALSE); 2298 if (slang != NULL) 2299 { 2300 /* When a previously loaded file has NOBREAK also use it for the 2301 * ".add" files. */ 2302 if (slp->sl_nobreak && slang->sl_add) 2303 slang->sl_nobreak = TRUE; 2304 else if (slang->sl_nobreak) 2305 slp->sl_nobreak = TRUE; 2306 2307 slp->sl_slang = slang; 2308 } 2309 } 2310 2311 /* 2312 * Load one spell file and store the info into a slang_T. 2313 * 2314 * This is invoked in two ways: 2315 * - From spell_load_cb() to load a spell file for the first time. "lang" is 2316 * the language name, "old_lp" is NULL. Will allocate an slang_T. 2317 * - To reload a spell file that was changed. "lang" is NULL and "old_lp" 2318 * points to the existing slang_T. 2319 * Returns the slang_T the spell file was loaded into. NULL for error. 2320 */ 2321 static slang_T * 2322 spell_load_file(fname, lang, old_lp, silent) 2323 char_u *fname; 2324 char_u *lang; 2325 slang_T *old_lp; 2326 int silent; /* no error if file doesn't exist */ 2327 { 2328 FILE *fd; 2329 char_u buf[VIMSPELLMAGICL]; 2330 char_u *p; 2331 char_u *bp; 2332 idx_T *ip; 2333 int i; 2334 int n; 2335 int len; 2336 int round; 2337 char_u *save_sourcing_name = sourcing_name; 2338 linenr_T save_sourcing_lnum = sourcing_lnum; 2339 slang_T *lp = NULL; 2340 idx_T idx; 2341 int c = 0; 2342 int res; 2343 2344 fd = mch_fopen((char *)fname, "r"); 2345 if (fd == NULL) 2346 { 2347 if (!silent) 2348 EMSG2(_(e_notopen), fname); 2349 else if (p_verbose > 2) 2350 { 2351 verbose_enter(); 2352 smsg((char_u *)e_notopen, fname); 2353 verbose_leave(); 2354 } 2355 goto endFAIL; 2356 } 2357 if (p_verbose > 2) 2358 { 2359 verbose_enter(); 2360 smsg((char_u *)_("Reading spell file \"%s\""), fname); 2361 verbose_leave(); 2362 } 2363 2364 if (old_lp == NULL) 2365 { 2366 lp = slang_alloc(lang); 2367 if (lp == NULL) 2368 goto endFAIL; 2369 2370 /* Remember the file name, used to reload the file when it's updated. */ 2371 lp->sl_fname = vim_strsave(fname); 2372 if (lp->sl_fname == NULL) 2373 goto endFAIL; 2374 2375 /* Check for .add.spl. */ 2376 lp->sl_add = strstr((char *)gettail(fname), ".add.") != NULL; 2377 } 2378 else 2379 lp = old_lp; 2380 2381 /* Set sourcing_name, so that error messages mention the file name. */ 2382 sourcing_name = fname; 2383 sourcing_lnum = 0; 2384 2385 /* <HEADER>: <fileID> 2386 */ 2387 for (i = 0; i < VIMSPELLMAGICL; ++i) 2388 buf[i] = getc(fd); /* <fileID> */ 2389 if (STRNCMP(buf, VIMSPELLMAGIC, VIMSPELLMAGICL) != 0) 2390 { 2391 EMSG(_("E757: This does not look like a spell file")); 2392 goto endFAIL; 2393 } 2394 c = getc(fd); /* <versionnr> */ 2395 if (c < VIMSPELLVERSION) 2396 { 2397 EMSG(_("E771: Old spell file, needs to be updated")); 2398 goto endFAIL; 2399 } 2400 else if (c > VIMSPELLVERSION) 2401 { 2402 EMSG(_("E772: Spell file is for newer version of Vim")); 2403 goto endFAIL; 2404 } 2405 2406 2407 /* 2408 * <SECTIONS>: <section> ... <sectionend> 2409 * <section>: <sectionID> <sectionflags> <sectionlen> (section contents) 2410 */ 2411 for (;;) 2412 { 2413 n = getc(fd); /* <sectionID> or <sectionend> */ 2414 if (n == SN_END) 2415 break; 2416 c = getc(fd); /* <sectionflags> */ 2417 len = (getc(fd) << 24) + (getc(fd) << 16) + (getc(fd) << 8) + getc(fd); 2418 /* <sectionlen> */ 2419 if (len < 0) 2420 goto truncerr; 2421 2422 res = 0; 2423 switch (n) 2424 { 2425 case SN_REGION: 2426 res = read_region_section(fd, lp, len); 2427 break; 2428 2429 case SN_CHARFLAGS: 2430 res = read_charflags_section(fd); 2431 break; 2432 2433 case SN_MIDWORD: 2434 lp->sl_midword = read_string(fd, len); /* <midword> */ 2435 if (lp->sl_midword == NULL) 2436 goto endFAIL; 2437 break; 2438 2439 case SN_PREFCOND: 2440 res = read_prefcond_section(fd, lp); 2441 break; 2442 2443 case SN_REP: 2444 res = read_rep_section(fd, lp); 2445 break; 2446 2447 case SN_SAL: 2448 res = read_sal_section(fd, lp); 2449 break; 2450 2451 case SN_SOFO: 2452 res = read_sofo_section(fd, lp); 2453 break; 2454 2455 case SN_MAP: 2456 p = read_string(fd, len); /* <mapstr> */ 2457 if (p == NULL) 2458 goto endFAIL; 2459 set_map_str(lp, p); 2460 vim_free(p); 2461 break; 2462 2463 case SN_COMPOUND: 2464 res = read_compound(fd, lp, len); 2465 break; 2466 2467 case SN_NOBREAK: 2468 lp->sl_nobreak = TRUE; 2469 break; 2470 2471 case SN_SYLLABLE: 2472 lp->sl_syllable = read_string(fd, len); /* <syllable> */ 2473 if (lp->sl_syllable == NULL) 2474 goto endFAIL; 2475 if (init_syl_tab(lp) == FAIL) 2476 goto endFAIL; 2477 break; 2478 2479 default: 2480 /* Unsupported section. When it's required give an error 2481 * message. When it's not required skip the contents. */ 2482 if (c & SNF_REQUIRED) 2483 { 2484 EMSG(_("E770: Unsupported section in spell file")); 2485 goto endFAIL; 2486 } 2487 while (--len >= 0) 2488 if (getc(fd) < 0) 2489 goto truncerr; 2490 break; 2491 } 2492 if (res == SP_FORMERROR) 2493 { 2494 formerr: 2495 EMSG(_(e_format)); 2496 goto endFAIL; 2497 } 2498 if (res == SP_TRUNCERROR) 2499 { 2500 truncerr: 2501 EMSG(_(e_spell_trunc)); 2502 goto endFAIL; 2503 } 2504 if (res == SP_OTHERERROR) 2505 goto endFAIL; 2506 } 2507 2508 /* round 1: <LWORDTREE> 2509 * round 2: <KWORDTREE> 2510 * round 3: <PREFIXTREE> */ 2511 for (round = 1; round <= 3; ++round) 2512 { 2513 /* The tree size was computed when writing the file, so that we can 2514 * allocate it as one long block. <nodecount> */ 2515 len = (getc(fd) << 24) + (getc(fd) << 16) + (getc(fd) << 8) + getc(fd); 2516 if (len < 0) 2517 goto truncerr; 2518 if (len > 0) 2519 { 2520 /* Allocate the byte array. */ 2521 bp = lalloc((long_u)len, TRUE); 2522 if (bp == NULL) 2523 goto endFAIL; 2524 if (round == 1) 2525 lp->sl_fbyts = bp; 2526 else if (round == 2) 2527 lp->sl_kbyts = bp; 2528 else 2529 lp->sl_pbyts = bp; 2530 2531 /* Allocate the index array. */ 2532 ip = (idx_T *)lalloc_clear((long_u)(len * sizeof(int)), TRUE); 2533 if (ip == NULL) 2534 goto endFAIL; 2535 if (round == 1) 2536 lp->sl_fidxs = ip; 2537 else if (round == 2) 2538 lp->sl_kidxs = ip; 2539 else 2540 lp->sl_pidxs = ip; 2541 2542 /* Read the tree and store it in the array. */ 2543 idx = read_tree(fd, bp, ip, len, 0, round == 3, lp->sl_prefixcnt); 2544 if (idx == -1) 2545 goto truncerr; 2546 if (idx < 0) 2547 goto formerr; 2548 } 2549 } 2550 2551 /* For a new file link it in the list of spell files. */ 2552 if (old_lp == NULL) 2553 { 2554 lp->sl_next = first_lang; 2555 first_lang = lp; 2556 } 2557 2558 goto endOK; 2559 2560 endFAIL: 2561 if (lang != NULL) 2562 /* truncating the name signals the error to spell_load_lang() */ 2563 *lang = NUL; 2564 if (lp != NULL && old_lp == NULL) 2565 slang_free(lp); 2566 lp = NULL; 2567 2568 endOK: 2569 if (fd != NULL) 2570 fclose(fd); 2571 sourcing_name = save_sourcing_name; 2572 sourcing_lnum = save_sourcing_lnum; 2573 2574 return lp; 2575 } 2576 2577 /* 2578 * Read a length field from "fd" in "cnt_bytes" bytes. 2579 * Allocate memory, read the string into it and add a NUL at the end. 2580 * Returns NULL when the count is zero. 2581 * Sets "*cntp" to SP_*ERROR when there is an error, length of the result 2582 * otherwise. 2583 */ 2584 static char_u * 2585 read_cnt_string(fd, cnt_bytes, cntp) 2586 FILE *fd; 2587 int cnt_bytes; 2588 int *cntp; 2589 { 2590 int cnt = 0; 2591 int i; 2592 char_u *str; 2593 2594 /* read the length bytes, MSB first */ 2595 for (i = 0; i < cnt_bytes; ++i) 2596 cnt = (cnt << 8) + getc(fd); 2597 if (cnt < 0) 2598 { 2599 *cntp = SP_TRUNCERROR; 2600 return NULL; 2601 } 2602 *cntp = cnt; 2603 if (cnt == 0) 2604 return NULL; /* nothing to read, return NULL */ 2605 2606 str = read_string(fd, cnt); 2607 if (str == NULL) 2608 *cntp = SP_OTHERERROR; 2609 return str; 2610 } 2611 2612 /* 2613 * Read a string of length "cnt" from "fd" into allocated memory. 2614 * Returns NULL when out of memory. 2615 */ 2616 static char_u * 2617 read_string(fd, cnt) 2618 FILE *fd; 2619 int cnt; 2620 { 2621 char_u *str; 2622 int i; 2623 2624 /* allocate memory */ 2625 str = alloc((unsigned)cnt + 1); 2626 if (str != NULL) 2627 { 2628 /* Read the string. Doesn't check for truncated file. */ 2629 for (i = 0; i < cnt; ++i) 2630 str[i] = getc(fd); 2631 str[i] = NUL; 2632 } 2633 return str; 2634 } 2635 2636 /* 2637 * Read SN_REGION: <regionname> ... 2638 * Return SP_*ERROR flags. 2639 */ 2640 static int 2641 read_region_section(fd, lp, len) 2642 FILE *fd; 2643 slang_T *lp; 2644 int len; 2645 { 2646 int i; 2647 2648 if (len > 16) 2649 return SP_FORMERROR; 2650 for (i = 0; i < len; ++i) 2651 lp->sl_regions[i] = getc(fd); /* <regionname> */ 2652 lp->sl_regions[len] = NUL; 2653 return 0; 2654 } 2655 2656 /* 2657 * Read SN_CHARFLAGS section: <charflagslen> <charflags> 2658 * <folcharslen> <folchars> 2659 * Return SP_*ERROR flags. 2660 */ 2661 static int 2662 read_charflags_section(fd) 2663 FILE *fd; 2664 { 2665 char_u *flags; 2666 char_u *fol; 2667 int flagslen, follen; 2668 2669 /* <charflagslen> <charflags> */ 2670 flags = read_cnt_string(fd, 1, &flagslen); 2671 if (flagslen < 0) 2672 return flagslen; 2673 2674 /* <folcharslen> <folchars> */ 2675 fol = read_cnt_string(fd, 2, &follen); 2676 if (follen < 0) 2677 { 2678 vim_free(flags); 2679 return follen; 2680 } 2681 2682 /* Set the word-char flags and fill SPELL_ISUPPER() table. */ 2683 if (flags != NULL && fol != NULL) 2684 set_spell_charflags(flags, flagslen, fol); 2685 2686 vim_free(flags); 2687 vim_free(fol); 2688 2689 /* When <charflagslen> is zero then <fcharlen> must also be zero. */ 2690 if ((flags == NULL) != (fol == NULL)) 2691 return SP_FORMERROR; 2692 return 0; 2693 } 2694 2695 /* 2696 * Read SN_PREFCOND section. 2697 * Return SP_*ERROR flags. 2698 */ 2699 static int 2700 read_prefcond_section(fd, lp) 2701 FILE *fd; 2702 slang_T *lp; 2703 { 2704 int cnt; 2705 int i; 2706 int n; 2707 char_u *p; 2708 char_u buf[MAXWLEN + 1]; 2709 2710 /* <prefcondcnt> <prefcond> ... */ 2711 cnt = (getc(fd) << 8) + getc(fd); /* <prefcondcnt> */ 2712 if (cnt <= 0) 2713 return SP_FORMERROR; 2714 2715 lp->sl_prefprog = (regprog_T **)alloc_clear( 2716 (unsigned)sizeof(regprog_T *) * cnt); 2717 if (lp->sl_prefprog == NULL) 2718 return SP_OTHERERROR; 2719 lp->sl_prefixcnt = cnt; 2720 2721 for (i = 0; i < cnt; ++i) 2722 { 2723 /* <prefcond> : <condlen> <condstr> */ 2724 n = getc(fd); /* <condlen> */ 2725 if (n < 0 || n >= MAXWLEN) 2726 return SP_FORMERROR; 2727 2728 /* When <condlen> is zero we have an empty condition. Otherwise 2729 * compile the regexp program used to check for the condition. */ 2730 if (n > 0) 2731 { 2732 buf[0] = '^'; /* always match at one position only */ 2733 p = buf + 1; 2734 while (n-- > 0) 2735 *p++ = getc(fd); /* <condstr> */ 2736 *p = NUL; 2737 lp->sl_prefprog[i] = vim_regcomp(buf, RE_MAGIC + RE_STRING); 2738 } 2739 } 2740 return 0; 2741 } 2742 2743 /* 2744 * Read REP items section from "fd": <repcount> <rep> ... 2745 * Return SP_*ERROR flags. 2746 */ 2747 static int 2748 read_rep_section(fd, slang) 2749 FILE *fd; 2750 slang_T *slang; 2751 { 2752 int cnt; 2753 garray_T *gap; 2754 fromto_T *ftp; 2755 short *first; 2756 int i; 2757 2758 cnt = (getc(fd) << 8) + getc(fd); /* <repcount> */ 2759 if (cnt < 0) 2760 return SP_TRUNCERROR; 2761 2762 gap = &slang->sl_rep; 2763 if (ga_grow(gap, cnt) == FAIL) 2764 return SP_OTHERERROR; 2765 2766 /* <rep> : <repfromlen> <repfrom> <reptolen> <repto> */ 2767 for (; gap->ga_len < cnt; ++gap->ga_len) 2768 { 2769 ftp = &((fromto_T *)gap->ga_data)[gap->ga_len]; 2770 ftp->ft_from = read_cnt_string(fd, 1, &i); 2771 if (i < 0) 2772 return i; 2773 if (i == 0) 2774 return SP_FORMERROR; 2775 ftp->ft_to = read_cnt_string(fd, 1, &i); 2776 if (i <= 0) 2777 { 2778 vim_free(ftp->ft_from); 2779 if (i < 0) 2780 return i; 2781 return SP_FORMERROR; 2782 } 2783 } 2784 2785 /* Fill the first-index table. */ 2786 first = slang->sl_rep_first; 2787 for (i = 0; i < 256; ++i) 2788 first[i] = -1; 2789 for (i = 0; i < gap->ga_len; ++i) 2790 { 2791 ftp = &((fromto_T *)gap->ga_data)[i]; 2792 if (first[*ftp->ft_from] == -1) 2793 first[*ftp->ft_from] = i; 2794 } 2795 return 0; 2796 } 2797 2798 /* 2799 * Read SN_SAL section: <salflags> <salcount> <sal> ... 2800 * Return SP_*ERROR flags. 2801 */ 2802 static int 2803 read_sal_section(fd, slang) 2804 FILE *fd; 2805 slang_T *slang; 2806 { 2807 int i; 2808 int cnt; 2809 garray_T *gap; 2810 salitem_T *smp; 2811 int ccnt; 2812 char_u *p; 2813 int c = NUL; 2814 2815 slang->sl_sofo = FALSE; 2816 2817 i = getc(fd); /* <salflags> */ 2818 if (i & SAL_F0LLOWUP) 2819 slang->sl_followup = TRUE; 2820 if (i & SAL_COLLAPSE) 2821 slang->sl_collapse = TRUE; 2822 if (i & SAL_REM_ACCENTS) 2823 slang->sl_rem_accents = TRUE; 2824 2825 cnt = (getc(fd) << 8) + getc(fd); /* <salcount> */ 2826 if (cnt < 0) 2827 return SP_TRUNCERROR; 2828 2829 gap = &slang->sl_sal; 2830 ga_init2(gap, sizeof(salitem_T), 10); 2831 if (ga_grow(gap, cnt + 1) == FAIL) 2832 return SP_OTHERERROR; 2833 2834 /* <sal> : <salfromlen> <salfrom> <saltolen> <salto> */ 2835 for (; gap->ga_len < cnt; ++gap->ga_len) 2836 { 2837 smp = &((salitem_T *)gap->ga_data)[gap->ga_len]; 2838 ccnt = getc(fd); /* <salfromlen> */ 2839 if (ccnt < 0) 2840 return SP_TRUNCERROR; 2841 if ((p = alloc(ccnt + 2)) == NULL) 2842 return SP_OTHERERROR; 2843 smp->sm_lead = p; 2844 2845 /* Read up to the first special char into sm_lead. */ 2846 for (i = 0; i < ccnt; ++i) 2847 { 2848 c = getc(fd); /* <salfrom> */ 2849 if (vim_strchr((char_u *)"0123456789(-<^$", c) != NULL) 2850 break; 2851 *p++ = c; 2852 } 2853 smp->sm_leadlen = p - smp->sm_lead; 2854 *p++ = NUL; 2855 2856 /* Put (abc) chars in sm_oneof, if any. */ 2857 if (c == '(') 2858 { 2859 smp->sm_oneof = p; 2860 for (++i; i < ccnt; ++i) 2861 { 2862 c = getc(fd); /* <salfrom> */ 2863 if (c == ')') 2864 break; 2865 *p++ = c; 2866 } 2867 *p++ = NUL; 2868 if (++i < ccnt) 2869 c = getc(fd); 2870 } 2871 else 2872 smp->sm_oneof = NULL; 2873 2874 /* Any following chars go in sm_rules. */ 2875 smp->sm_rules = p; 2876 if (i < ccnt) 2877 /* store the char we got while checking for end of sm_lead */ 2878 *p++ = c; 2879 for (++i; i < ccnt; ++i) 2880 *p++ = getc(fd); /* <salfrom> */ 2881 *p++ = NUL; 2882 2883 /* <saltolen> <salto> */ 2884 smp->sm_to = read_cnt_string(fd, 1, &ccnt); 2885 if (ccnt < 0) 2886 { 2887 vim_free(smp->sm_lead); 2888 return ccnt; 2889 } 2890 2891 #ifdef FEAT_MBYTE 2892 if (has_mbyte) 2893 { 2894 /* convert the multi-byte strings to wide char strings */ 2895 smp->sm_lead_w = mb_str2wide(smp->sm_lead); 2896 smp->sm_leadlen = mb_charlen(smp->sm_lead); 2897 if (smp->sm_oneof == NULL) 2898 smp->sm_oneof_w = NULL; 2899 else 2900 smp->sm_oneof_w = mb_str2wide(smp->sm_oneof); 2901 if (smp->sm_to == NULL) 2902 smp->sm_to_w = NULL; 2903 else 2904 smp->sm_to_w = mb_str2wide(smp->sm_to); 2905 if (smp->sm_lead_w == NULL 2906 || (smp->sm_oneof_w == NULL && smp->sm_oneof != NULL) 2907 || (smp->sm_to_w == NULL && smp->sm_to != NULL)) 2908 { 2909 vim_free(smp->sm_lead); 2910 vim_free(smp->sm_to); 2911 vim_free(smp->sm_lead_w); 2912 vim_free(smp->sm_oneof_w); 2913 vim_free(smp->sm_to_w); 2914 return SP_OTHERERROR; 2915 } 2916 } 2917 #endif 2918 } 2919 2920 if (gap->ga_len > 0) 2921 { 2922 /* Add one extra entry to mark the end with an empty sm_lead. Avoids 2923 * that we need to check the index every time. */ 2924 smp = &((salitem_T *)gap->ga_data)[gap->ga_len]; 2925 if ((p = alloc(1)) == NULL) 2926 return SP_OTHERERROR; 2927 p[0] = NUL; 2928 smp->sm_lead = p; 2929 smp->sm_leadlen = 0; 2930 smp->sm_oneof = NULL; 2931 smp->sm_rules = p; 2932 smp->sm_to = NULL; 2933 #ifdef FEAT_MBYTE 2934 if (has_mbyte) 2935 { 2936 smp->sm_lead_w = mb_str2wide(smp->sm_lead); 2937 smp->sm_leadlen = 0; 2938 smp->sm_oneof_w = NULL; 2939 smp->sm_to_w = NULL; 2940 } 2941 #endif 2942 ++gap->ga_len; 2943 } 2944 2945 /* Fill the first-index table. */ 2946 set_sal_first(slang); 2947 2948 return 0; 2949 } 2950 2951 /* 2952 * SN_SOFO: <sofofromlen> <sofofrom> <sofotolen> <sofoto> 2953 * Return SP_*ERROR flags. 2954 */ 2955 static int 2956 read_sofo_section(fd, slang) 2957 FILE *fd; 2958 slang_T *slang; 2959 { 2960 int cnt; 2961 char_u *from, *to; 2962 int res; 2963 2964 slang->sl_sofo = TRUE; 2965 2966 /* <sofofromlen> <sofofrom> */ 2967 from = read_cnt_string(fd, 2, &cnt); 2968 if (cnt < 0) 2969 return cnt; 2970 2971 /* <sofotolen> <sofoto> */ 2972 to = read_cnt_string(fd, 2, &cnt); 2973 if (cnt < 0) 2974 { 2975 vim_free(from); 2976 return cnt; 2977 } 2978 2979 /* Store the info in slang->sl_sal and/or slang->sl_sal_first. */ 2980 if (from != NULL && to != NULL) 2981 res = set_sofo(slang, from, to); 2982 else if (from != NULL || to != NULL) 2983 res = SP_FORMERROR; /* only one of two strings is an error */ 2984 else 2985 res = 0; 2986 2987 vim_free(from); 2988 vim_free(to); 2989 return res; 2990 } 2991 2992 /* 2993 * Read the compound section from the .spl file: 2994 * <compmax> <compminlen> <compsylmax> <compflags> 2995 * Returns SP_*ERROR flags. 2996 */ 2997 static int 2998 read_compound(fd, slang, len) 2999 FILE *fd; 3000 slang_T *slang; 3001 int len; 3002 { 3003 int todo = len; 3004 int c; 3005 int atstart; 3006 char_u *pat; 3007 char_u *pp; 3008 char_u *cp; 3009 char_u *ap; 3010 3011 if (todo < 2) 3012 return SP_FORMERROR; /* need at least two bytes */ 3013 3014 --todo; 3015 c = getc(fd); /* <compmax> */ 3016 if (c < 2) 3017 c = MAXWLEN; 3018 slang->sl_compmax = c; 3019 3020 --todo; 3021 c = getc(fd); /* <compminlen> */ 3022 if (c < 1) 3023 c = 0; 3024 slang->sl_compminlen = c; 3025 3026 --todo; 3027 c = getc(fd); /* <compsylmax> */ 3028 if (c < 1) 3029 c = MAXWLEN; 3030 slang->sl_compsylmax = c; 3031 3032 /* Turn the COMPOUNDFLAGS items into a regexp pattern: 3033 * "a[bc]/a*b+" -> "^\(a[bc]\|a*b\+\)$". 3034 * Inserting backslashes may double the length, "^\(\)$<Nul>" is 7 bytes. 3035 * Conversion to utf-8 may double the size. */ 3036 c = todo * 2 + 7; 3037 #ifdef FEAT_MBYTE 3038 if (enc_utf8) 3039 c += todo * 2; 3040 #endif 3041 pat = alloc((unsigned)c); 3042 if (pat == NULL) 3043 return SP_OTHERERROR; 3044 3045 /* We also need a list of all flags that can appear at the start and one 3046 * for all flags. */ 3047 cp = alloc(todo + 1); 3048 if (cp == NULL) 3049 { 3050 vim_free(pat); 3051 return SP_OTHERERROR; 3052 } 3053 slang->sl_compstartflags = cp; 3054 *cp = NUL; 3055 3056 ap = alloc(todo + 1); 3057 if (ap == NULL) 3058 { 3059 vim_free(pat); 3060 return SP_OTHERERROR; 3061 } 3062 slang->sl_compallflags = ap; 3063 *ap = NUL; 3064 3065 pp = pat; 3066 *pp++ = '^'; 3067 *pp++ = '\\'; 3068 *pp++ = '('; 3069 3070 atstart = 1; 3071 while (todo-- > 0) 3072 { 3073 c = getc(fd); /* <compflags> */ 3074 3075 /* Add all flags to "sl_compallflags". */ 3076 if (vim_strchr((char_u *)"+*[]/", c) == NULL 3077 && !byte_in_str(slang->sl_compallflags, c)) 3078 { 3079 *ap++ = c; 3080 *ap = NUL; 3081 } 3082 3083 if (atstart != 0) 3084 { 3085 /* At start of item: copy flags to "sl_compstartflags". For a 3086 * [abc] item set "atstart" to 2 and copy up to the ']'. */ 3087 if (c == '[') 3088 atstart = 2; 3089 else if (c == ']') 3090 atstart = 0; 3091 else 3092 { 3093 if (!byte_in_str(slang->sl_compstartflags, c)) 3094 { 3095 *cp++ = c; 3096 *cp = NUL; 3097 } 3098 if (atstart == 1) 3099 atstart = 0; 3100 } 3101 } 3102 if (c == '/') /* slash separates two items */ 3103 { 3104 *pp++ = '\\'; 3105 *pp++ = '|'; 3106 atstart = 1; 3107 } 3108 else /* normal char, "[abc]" and '*' are copied as-is */ 3109 { 3110 if (c == '+' || c == '~') 3111 *pp++ = '\\'; /* "a+" becomes "a\+" */ 3112 #ifdef FEAT_MBYTE 3113 if (enc_utf8) 3114 pp += mb_char2bytes(c, pp); 3115 else 3116 #endif 3117 *pp++ = c; 3118 } 3119 } 3120 3121 *pp++ = '\\'; 3122 *pp++ = ')'; 3123 *pp++ = '$'; 3124 *pp = NUL; 3125 3126 slang->sl_compprog = vim_regcomp(pat, RE_MAGIC + RE_STRING + RE_STRICT); 3127 vim_free(pat); 3128 if (slang->sl_compprog == NULL) 3129 return SP_FORMERROR; 3130 3131 return 0; 3132 } 3133 3134 /* 3135 * Return TRUE if byte "n" appears in "str". 3136 * Like strchr() but independent of locale. 3137 */ 3138 static int 3139 byte_in_str(str, n) 3140 char_u *str; 3141 int n; 3142 { 3143 char_u *p; 3144 3145 for (p = str; *p != NUL; ++p) 3146 if (*p == n) 3147 return TRUE; 3148 return FALSE; 3149 } 3150 3151 #define SY_MAXLEN 30 3152 typedef struct syl_item_S 3153 { 3154 char_u sy_chars[SY_MAXLEN]; /* the sequence of chars */ 3155 int sy_len; 3156 } syl_item_T; 3157 3158 /* 3159 * Truncate "slang->sl_syllable" at the first slash and put the following items 3160 * in "slang->sl_syl_items". 3161 */ 3162 static int 3163 init_syl_tab(slang) 3164 slang_T *slang; 3165 { 3166 char_u *p; 3167 char_u *s; 3168 int l; 3169 syl_item_T *syl; 3170 3171 ga_init2(&slang->sl_syl_items, sizeof(syl_item_T), 4); 3172 p = vim_strchr(slang->sl_syllable, '/'); 3173 while (p != NULL) 3174 { 3175 *p++ = NUL; 3176 if (*p == NUL) /* trailing slash */ 3177 break; 3178 s = p; 3179 p = vim_strchr(p, '/'); 3180 if (p == NULL) 3181 l = STRLEN(s); 3182 else 3183 l = p - s; 3184 if (l >= SY_MAXLEN) 3185 return SP_FORMERROR; 3186 if (ga_grow(&slang->sl_syl_items, 1) == FAIL) 3187 return SP_OTHERERROR; 3188 syl = ((syl_item_T *)slang->sl_syl_items.ga_data) 3189 + slang->sl_syl_items.ga_len++; 3190 vim_strncpy(syl->sy_chars, s, l); 3191 syl->sy_len = l; 3192 } 3193 return OK; 3194 } 3195 3196 /* 3197 * Count the number of syllables in "word". 3198 * When "word" contains spaces the syllables after the last space are counted. 3199 * Returns zero if syllables are not defines. 3200 */ 3201 static int 3202 count_syllables(slang, word) 3203 slang_T *slang; 3204 char_u *word; 3205 { 3206 int cnt = 0; 3207 int skip = FALSE; 3208 char_u *p; 3209 int len; 3210 int i; 3211 syl_item_T *syl; 3212 int c; 3213 3214 if (slang->sl_syllable == NULL) 3215 return 0; 3216 3217 for (p = word; *p != NUL; p += len) 3218 { 3219 /* When running into a space reset counter. */ 3220 if (*p == ' ') 3221 { 3222 len = 1; 3223 cnt = 0; 3224 continue; 3225 } 3226 3227 /* Find longest match of syllable items. */ 3228 len = 0; 3229 for (i = 0; i < slang->sl_syl_items.ga_len; ++i) 3230 { 3231 syl = ((syl_item_T *)slang->sl_syl_items.ga_data) + i; 3232 if (syl->sy_len > len 3233 && STRNCMP(p, syl->sy_chars, syl->sy_len) == 0) 3234 len = syl->sy_len; 3235 } 3236 if (len != 0) /* found a match, count syllable */ 3237 { 3238 ++cnt; 3239 skip = FALSE; 3240 } 3241 else 3242 { 3243 /* No recognized syllable item, at least a syllable char then? */ 3244 #ifdef FEAT_MBYTE 3245 c = mb_ptr2char(p); 3246 len = (*mb_ptr2len)(p); 3247 #else 3248 c = *p; 3249 len = 1; 3250 #endif 3251 if (vim_strchr(slang->sl_syllable, c) == NULL) 3252 skip = FALSE; /* No, search for next syllable */ 3253 else if (!skip) 3254 { 3255 ++cnt; /* Yes, count it */ 3256 skip = TRUE; /* don't count following syllable chars */ 3257 } 3258 } 3259 } 3260 return cnt; 3261 } 3262 3263 /* 3264 * Set the SOFOFROM and SOFOTO items in language "lp". 3265 * Returns SP_*ERROR flags when there is something wrong. 3266 */ 3267 static int 3268 set_sofo(lp, from, to) 3269 slang_T *lp; 3270 char_u *from; 3271 char_u *to; 3272 { 3273 int i; 3274 3275 #ifdef FEAT_MBYTE 3276 garray_T *gap; 3277 char_u *s; 3278 char_u *p; 3279 int c; 3280 int *inp; 3281 3282 if (has_mbyte) 3283 { 3284 /* Use "sl_sal" as an array with 256 pointers to a list of wide 3285 * characters. The index is the low byte of the character. 3286 * The list contains from-to pairs with a terminating NUL. 3287 * sl_sal_first[] is used for latin1 "from" characters. */ 3288 gap = &lp->sl_sal; 3289 ga_init2(gap, sizeof(int *), 1); 3290 if (ga_grow(gap, 256) == FAIL) 3291 return SP_OTHERERROR; 3292 vim_memset(gap->ga_data, 0, sizeof(int *) * 256); 3293 gap->ga_len = 256; 3294 3295 /* First count the number of items for each list. Temporarily use 3296 * sl_sal_first[] for this. */ 3297 for (p = from, s = to; *p != NUL && *s != NUL; ) 3298 { 3299 c = mb_cptr2char_adv(&p); 3300 mb_cptr_adv(s); 3301 if (c >= 256) 3302 ++lp->sl_sal_first[c & 0xff]; 3303 } 3304 if (*p != NUL || *s != NUL) /* lengths differ */ 3305 return SP_FORMERROR; 3306 3307 /* Allocate the lists. */ 3308 for (i = 0; i < 256; ++i) 3309 if (lp->sl_sal_first[i] > 0) 3310 { 3311 p = alloc(sizeof(int) * (lp->sl_sal_first[i] * 2 + 1)); 3312 if (p == NULL) 3313 return SP_OTHERERROR; 3314 ((int **)gap->ga_data)[i] = (int *)p; 3315 *(int *)p = 0; 3316 } 3317 3318 /* Put the characters up to 255 in sl_sal_first[] the rest in a sl_sal 3319 * list. */ 3320 vim_memset(lp->sl_sal_first, 0, sizeof(salfirst_T) * 256); 3321 for (p = from, s = to; *p != NUL && *s != NUL; ) 3322 { 3323 c = mb_cptr2char_adv(&p); 3324 i = mb_cptr2char_adv(&s); 3325 if (c >= 256) 3326 { 3327 /* Append the from-to chars at the end of the list with 3328 * the low byte. */ 3329 inp = ((int **)gap->ga_data)[c & 0xff]; 3330 while (*inp != 0) 3331 ++inp; 3332 *inp++ = c; /* from char */ 3333 *inp++ = i; /* to char */ 3334 *inp++ = NUL; /* NUL at the end */ 3335 } 3336 else 3337 /* mapping byte to char is done in sl_sal_first[] */ 3338 lp->sl_sal_first[c] = i; 3339 } 3340 } 3341 else 3342 #endif 3343 { 3344 /* mapping bytes to bytes is done in sl_sal_first[] */ 3345 if (STRLEN(from) != STRLEN(to)) 3346 return SP_FORMERROR; 3347 3348 for (i = 0; to[i] != NUL; ++i) 3349 lp->sl_sal_first[from[i]] = to[i]; 3350 lp->sl_sal.ga_len = 1; /* indicates we have soundfolding */ 3351 } 3352 3353 return 0; 3354 } 3355 3356 /* 3357 * Fill the first-index table for "lp". 3358 */ 3359 static void 3360 set_sal_first(lp) 3361 slang_T *lp; 3362 { 3363 salfirst_T *sfirst; 3364 int i; 3365 salitem_T *smp; 3366 int c; 3367 garray_T *gap = &lp->sl_sal; 3368 3369 sfirst = lp->sl_sal_first; 3370 for (i = 0; i < 256; ++i) 3371 sfirst[i] = -1; 3372 smp = (salitem_T *)gap->ga_data; 3373 for (i = 0; i < gap->ga_len; ++i) 3374 { 3375 #ifdef FEAT_MBYTE 3376 if (has_mbyte) 3377 /* Use the lowest byte of the first character. For latin1 it's 3378 * the character, for other encodings it should differ for most 3379 * characters. */ 3380 c = *smp[i].sm_lead_w & 0xff; 3381 else 3382 #endif 3383 c = *smp[i].sm_lead; 3384 if (sfirst[c] == -1) 3385 { 3386 sfirst[c] = i; 3387 #ifdef FEAT_MBYTE 3388 if (has_mbyte) 3389 { 3390 int n; 3391 3392 /* Make sure all entries with this byte are following each 3393 * other. Move the ones that are in the wrong position. Do 3394 * keep the same ordering! */ 3395 while (i + 1 < gap->ga_len 3396 && (*smp[i + 1].sm_lead_w & 0xff) == c) 3397 /* Skip over entry with same index byte. */ 3398 ++i; 3399 3400 for (n = 1; i + n < gap->ga_len; ++n) 3401 if ((*smp[i + n].sm_lead_w & 0xff) == c) 3402 { 3403 salitem_T tsal; 3404 3405 /* Move entry with same index byte after the entries 3406 * we already found. */ 3407 ++i; 3408 --n; 3409 tsal = smp[i + n]; 3410 mch_memmove(smp + i + 1, smp + i, 3411 sizeof(salitem_T) * n); 3412 smp[i] = tsal; 3413 } 3414 } 3415 #endif 3416 } 3417 } 3418 } 3419 3420 #ifdef FEAT_MBYTE 3421 /* 3422 * Turn a multi-byte string into a wide character string. 3423 * Return it in allocated memory (NULL for out-of-memory) 3424 */ 3425 static int * 3426 mb_str2wide(s) 3427 char_u *s; 3428 { 3429 int *res; 3430 char_u *p; 3431 int i = 0; 3432 3433 res = (int *)alloc(sizeof(int) * (mb_charlen(s) + 1)); 3434 if (res != NULL) 3435 { 3436 for (p = s; *p != NUL; ) 3437 res[i++] = mb_ptr2char_adv(&p); 3438 res[i] = NUL; 3439 } 3440 return res; 3441 } 3442 #endif 3443 3444 /* 3445 * Read one row of siblings from the spell file and store it in the byte array 3446 * "byts" and index array "idxs". Recursively read the children. 3447 * 3448 * NOTE: The code here must match put_node(). 3449 * 3450 * Returns the index follosing the siblings. 3451 * Returns -1 if the file is shorter than expected. 3452 * Returns -2 if there is a format error. 3453 */ 3454 static idx_T 3455 read_tree(fd, byts, idxs, maxidx, startidx, prefixtree, maxprefcondnr) 3456 FILE *fd; 3457 char_u *byts; 3458 idx_T *idxs; 3459 int maxidx; /* size of arrays */ 3460 idx_T startidx; /* current index in "byts" and "idxs" */ 3461 int prefixtree; /* TRUE for reading PREFIXTREE */ 3462 int maxprefcondnr; /* maximum for <prefcondnr> */ 3463 { 3464 int len; 3465 int i; 3466 int n; 3467 idx_T idx = startidx; 3468 int c; 3469 int c2; 3470 #define SHARED_MASK 0x8000000 3471 3472 len = getc(fd); /* <siblingcount> */ 3473 if (len <= 0) 3474 return -1; 3475 3476 if (startidx + len >= maxidx) 3477 return -2; 3478 byts[idx++] = len; 3479 3480 /* Read the byte values, flag/region bytes and shared indexes. */ 3481 for (i = 1; i <= len; ++i) 3482 { 3483 c = getc(fd); /* <byte> */ 3484 if (c < 0) 3485 return -1; 3486 if (c <= BY_SPECIAL) 3487 { 3488 if (c == BY_NOFLAGS && !prefixtree) 3489 { 3490 /* No flags, all regions. */ 3491 idxs[idx] = 0; 3492 c = 0; 3493 } 3494 else if (c != BY_INDEX) 3495 { 3496 if (prefixtree) 3497 { 3498 /* Read the optional pflags byte, the prefix ID and the 3499 * condition nr. In idxs[] store the prefix ID in the low 3500 * byte, the condition index shifted up 8 bits, the flags 3501 * shifted up 24 bits. */ 3502 if (c == BY_FLAGS) 3503 c = getc(fd) << 24; /* <pflags> */ 3504 else 3505 c = 0; 3506 3507 c |= getc(fd); /* <affixID> */ 3508 3509 n = (getc(fd) << 8) + getc(fd); /* <prefcondnr> */ 3510 if (n >= maxprefcondnr) 3511 return -2; 3512 c |= (n << 8); 3513 } 3514 else /* c must be BY_FLAGS or BY_FLAGS2 */ 3515 { 3516 /* Read flags and optional region and prefix ID. In 3517 * idxs[] the flags go in the low two bytes, region above 3518 * that and prefix ID above the region. */ 3519 c2 = c; 3520 c = getc(fd); /* <flags> */ 3521 if (c2 == BY_FLAGS2) 3522 c = (getc(fd) << 8) + c; /* <flags2> */ 3523 if (c & WF_REGION) 3524 c = (getc(fd) << 16) + c; /* <region> */ 3525 if (c & WF_AFX) 3526 c = (getc(fd) << 24) + c; /* <affixID> */ 3527 } 3528 3529 idxs[idx] = c; 3530 c = 0; 3531 } 3532 else /* c == BY_INDEX */ 3533 { 3534 /* <nodeidx> */ 3535 n = (getc(fd) << 16) + (getc(fd) << 8) + getc(fd); 3536 if (n < 0 || n >= maxidx) 3537 return -2; 3538 idxs[idx] = n + SHARED_MASK; 3539 c = getc(fd); /* <xbyte> */ 3540 } 3541 } 3542 byts[idx++] = c; 3543 } 3544 3545 /* Recursively read the children for non-shared siblings. 3546 * Skip the end-of-word ones (zero byte value) and the shared ones (and 3547 * remove SHARED_MASK) */ 3548 for (i = 1; i <= len; ++i) 3549 if (byts[startidx + i] != 0) 3550 { 3551 if (idxs[startidx + i] & SHARED_MASK) 3552 idxs[startidx + i] &= ~SHARED_MASK; 3553 else 3554 { 3555 idxs[startidx + i] = idx; 3556 idx = read_tree(fd, byts, idxs, maxidx, idx, 3557 prefixtree, maxprefcondnr); 3558 if (idx < 0) 3559 break; 3560 } 3561 } 3562 3563 return idx; 3564 } 3565 3566 /* 3567 * Parse 'spelllang' and set buf->b_langp accordingly. 3568 * Returns NULL if it's OK, an error message otherwise. 3569 */ 3570 char_u * 3571 did_set_spelllang(buf) 3572 buf_T *buf; 3573 { 3574 garray_T ga; 3575 char_u *splp; 3576 char_u *region; 3577 char_u region_cp[3]; 3578 int filename; 3579 int region_mask; 3580 slang_T *slang; 3581 int c; 3582 char_u lang[MAXWLEN + 1]; 3583 char_u spf_name[MAXPATHL]; 3584 int len; 3585 char_u *p; 3586 int round; 3587 char_u *spf; 3588 char_u *use_region = NULL; 3589 int dont_use_region = FALSE; 3590 int nobreak = FALSE; 3591 int i, j; 3592 langp_T *lp, *lp2; 3593 3594 ga_init2(&ga, sizeof(langp_T), 2); 3595 clear_midword(buf); 3596 3597 /* loop over comma separated language names. */ 3598 for (splp = buf->b_p_spl; *splp != NUL; ) 3599 { 3600 /* Get one language name. */ 3601 copy_option_part(&splp, lang, MAXWLEN, ","); 3602 3603 region = NULL; 3604 len = STRLEN(lang); 3605 3606 /* If the name ends in ".spl" use it as the name of the spell file. 3607 * If there is a region name let "region" point to it and remove it 3608 * from the name. */ 3609 if (len > 4 && fnamecmp(lang + len - 4, ".spl") == 0) 3610 { 3611 filename = TRUE; 3612 3613 /* Locate a region and remove it from the file name. */ 3614 p = vim_strchr(gettail(lang), '_'); 3615 if (p != NULL && ASCII_ISALPHA(p[1]) && ASCII_ISALPHA(p[2]) 3616 && !ASCII_ISALPHA(p[3])) 3617 { 3618 vim_strncpy(region_cp, p + 1, 2); 3619 mch_memmove(p, p + 3, len - (p - lang) - 2); 3620 len -= 3; 3621 region = region_cp; 3622 } 3623 else 3624 dont_use_region = TRUE; 3625 3626 /* Check if we loaded this language before. */ 3627 for (slang = first_lang; slang != NULL; slang = slang->sl_next) 3628 if (fullpathcmp(lang, slang->sl_fname, FALSE) == FPC_SAME) 3629 break; 3630 } 3631 else 3632 { 3633 filename = FALSE; 3634 if (len > 3 && lang[len - 3] == '_') 3635 { 3636 region = lang + len - 2; 3637 len -= 3; 3638 lang[len] = NUL; 3639 } 3640 else 3641 dont_use_region = TRUE; 3642 3643 /* Check if we loaded this language before. */ 3644 for (slang = first_lang; slang != NULL; slang = slang->sl_next) 3645 if (STRICMP(lang, slang->sl_name) == 0) 3646 break; 3647 } 3648 3649 if (region != NULL) 3650 { 3651 /* If the region differs from what was used before then don't 3652 * use it for 'spellfile'. */ 3653 if (use_region != NULL && STRCMP(region, use_region) != 0) 3654 dont_use_region = TRUE; 3655 use_region = region; 3656 } 3657 3658 /* If not found try loading the language now. */ 3659 if (slang == NULL) 3660 { 3661 if (filename) 3662 (void)spell_load_file(lang, lang, NULL, FALSE); 3663 else 3664 spell_load_lang(lang); 3665 } 3666 3667 /* 3668 * Loop over the languages, there can be several files for "lang". 3669 */ 3670 for (slang = first_lang; slang != NULL; slang = slang->sl_next) 3671 if (filename ? fullpathcmp(lang, slang->sl_fname, FALSE) == FPC_SAME 3672 : STRICMP(lang, slang->sl_name) == 0) 3673 { 3674 region_mask = REGION_ALL; 3675 if (!filename && region != NULL) 3676 { 3677 /* find region in sl_regions */ 3678 c = find_region(slang->sl_regions, region); 3679 if (c == REGION_ALL) 3680 { 3681 if (slang->sl_add) 3682 { 3683 if (*slang->sl_regions != NUL) 3684 /* This addition file is for other regions. */ 3685 region_mask = 0; 3686 } 3687 else 3688 /* This is probably an error. Give a warning and 3689 * accept the words anyway. */ 3690 smsg((char_u *) 3691 _("Warning: region %s not supported"), 3692 region); 3693 } 3694 else 3695 region_mask = 1 << c; 3696 } 3697 3698 if (region_mask != 0) 3699 { 3700 if (ga_grow(&ga, 1) == FAIL) 3701 { 3702 ga_clear(&ga); 3703 return e_outofmem; 3704 } 3705 LANGP_ENTRY(ga, ga.ga_len)->lp_slang = slang; 3706 LANGP_ENTRY(ga, ga.ga_len)->lp_region = region_mask; 3707 ++ga.ga_len; 3708 use_midword(slang, buf); 3709 if (slang->sl_nobreak) 3710 nobreak = TRUE; 3711 } 3712 } 3713 } 3714 3715 /* round 0: load int_wordlist, if possible. 3716 * round 1: load first name in 'spellfile'. 3717 * round 2: load second name in 'spellfile. 3718 * etc. */ 3719 spf = curbuf->b_p_spf; 3720 for (round = 0; round == 0 || *spf != NUL; ++round) 3721 { 3722 if (round == 0) 3723 { 3724 /* Internal wordlist, if there is one. */ 3725 if (int_wordlist == NULL) 3726 continue; 3727 int_wordlist_spl(spf_name); 3728 } 3729 else 3730 { 3731 /* One entry in 'spellfile'. */ 3732 copy_option_part(&spf, spf_name, MAXPATHL - 5, ","); 3733 STRCAT(spf_name, ".spl"); 3734 3735 /* If it was already found above then skip it. */ 3736 for (c = 0; c < ga.ga_len; ++c) 3737 { 3738 p = LANGP_ENTRY(ga, c)->lp_slang->sl_fname; 3739 if (p != NULL && fullpathcmp(spf_name, p, FALSE) == FPC_SAME) 3740 break; 3741 } 3742 if (c < ga.ga_len) 3743 continue; 3744 } 3745 3746 /* Check if it was loaded already. */ 3747 for (slang = first_lang; slang != NULL; slang = slang->sl_next) 3748 if (fullpathcmp(spf_name, slang->sl_fname, FALSE) == FPC_SAME) 3749 break; 3750 if (slang == NULL) 3751 { 3752 /* Not loaded, try loading it now. The language name includes the 3753 * region name, the region is ignored otherwise. for int_wordlist 3754 * use an arbitrary name. */ 3755 if (round == 0) 3756 STRCPY(lang, "internal wordlist"); 3757 else 3758 { 3759 vim_strncpy(lang, gettail(spf_name), MAXWLEN); 3760 p = vim_strchr(lang, '.'); 3761 if (p != NULL) 3762 *p = NUL; /* truncate at ".encoding.add" */ 3763 } 3764 slang = spell_load_file(spf_name, lang, NULL, TRUE); 3765 3766 /* If one of the languages has NOBREAK we assume the addition 3767 * files also have this. */ 3768 if (slang != NULL && nobreak) 3769 slang->sl_nobreak = TRUE; 3770 } 3771 if (slang != NULL && ga_grow(&ga, 1) == OK) 3772 { 3773 region_mask = REGION_ALL; 3774 if (use_region != NULL && !dont_use_region) 3775 { 3776 /* find region in sl_regions */ 3777 c = find_region(slang->sl_regions, use_region); 3778 if (c != REGION_ALL) 3779 region_mask = 1 << c; 3780 else if (*slang->sl_regions != NUL) 3781 /* This spell file is for other regions. */ 3782 region_mask = 0; 3783 } 3784 3785 if (region_mask != 0) 3786 { 3787 LANGP_ENTRY(ga, ga.ga_len)->lp_slang = slang; 3788 LANGP_ENTRY(ga, ga.ga_len)->lp_sallang = NULL; 3789 LANGP_ENTRY(ga, ga.ga_len)->lp_replang = NULL; 3790 LANGP_ENTRY(ga, ga.ga_len)->lp_region = region_mask; 3791 ++ga.ga_len; 3792 use_midword(slang, buf); 3793 } 3794 } 3795 } 3796 3797 /* Everything is fine, store the new b_langp value. */ 3798 ga_clear(&buf->b_langp); 3799 buf->b_langp = ga; 3800 3801 /* For each language figure out what language to use for sound folding and 3802 * REP items. If the language doesn't support it itself use another one 3803 * with the same name. E.g. for "en-math" use "en". */ 3804 for (i = 0; i < ga.ga_len; ++i) 3805 { 3806 lp = LANGP_ENTRY(ga, i); 3807 3808 /* sound folding */ 3809 if (lp->lp_slang->sl_sal.ga_len > 0) 3810 /* language does sound folding itself */ 3811 lp->lp_sallang = lp->lp_slang; 3812 else 3813 /* find first similar language that does sound folding */ 3814 for (j = 0; j < ga.ga_len; ++j) 3815 { 3816 lp2 = LANGP_ENTRY(ga, j); 3817 if (lp2->lp_slang->sl_sal.ga_len > 0 3818 && STRNCMP(lp->lp_slang->sl_name, 3819 lp2->lp_slang->sl_name, 2) == 0) 3820 { 3821 lp->lp_sallang = lp2->lp_slang; 3822 break; 3823 } 3824 } 3825 3826 /* REP items */ 3827 if (lp->lp_slang->sl_rep.ga_len > 0) 3828 /* language has REP items itself */ 3829 lp->lp_replang = lp->lp_slang; 3830 else 3831 /* find first similar language that does sound folding */ 3832 for (j = 0; j < ga.ga_len; ++j) 3833 { 3834 lp2 = LANGP_ENTRY(ga, j); 3835 if (lp2->lp_slang->sl_rep.ga_len > 0 3836 && STRNCMP(lp->lp_slang->sl_name, 3837 lp2->lp_slang->sl_name, 2) == 0) 3838 { 3839 lp->lp_replang = lp2->lp_slang; 3840 break; 3841 } 3842 } 3843 } 3844 3845 return NULL; 3846 } 3847 3848 /* 3849 * Clear the midword characters for buffer "buf". 3850 */ 3851 static void 3852 clear_midword(buf) 3853 buf_T *buf; 3854 { 3855 vim_memset(buf->b_spell_ismw, 0, 256); 3856 #ifdef FEAT_MBYTE 3857 vim_free(buf->b_spell_ismw_mb); 3858 buf->b_spell_ismw_mb = NULL; 3859 #endif 3860 } 3861 3862 /* 3863 * Use the "sl_midword" field of language "lp" for buffer "buf". 3864 * They add up to any currently used midword characters. 3865 */ 3866 static void 3867 use_midword(lp, buf) 3868 slang_T *lp; 3869 buf_T *buf; 3870 { 3871 char_u *p; 3872 3873 if (lp->sl_midword == NULL) /* there aren't any */ 3874 return; 3875 3876 for (p = lp->sl_midword; *p != NUL; ) 3877 #ifdef FEAT_MBYTE 3878 if (has_mbyte) 3879 { 3880 int c, l, n; 3881 char_u *bp; 3882 3883 c = mb_ptr2char(p); 3884 l = (*mb_ptr2len)(p); 3885 if (c < 256 && l <= 2) 3886 buf->b_spell_ismw[c] = TRUE; 3887 else if (buf->b_spell_ismw_mb == NULL) 3888 /* First multi-byte char in "b_spell_ismw_mb". */ 3889 buf->b_spell_ismw_mb = vim_strnsave(p, l); 3890 else 3891 { 3892 /* Append multi-byte chars to "b_spell_ismw_mb". */ 3893 n = STRLEN(buf->b_spell_ismw_mb); 3894 bp = vim_strnsave(buf->b_spell_ismw_mb, n + l); 3895 if (bp != NULL) 3896 { 3897 vim_free(buf->b_spell_ismw_mb); 3898 buf->b_spell_ismw_mb = bp; 3899 vim_strncpy(bp + n, p, l); 3900 } 3901 } 3902 p += l; 3903 } 3904 else 3905 #endif 3906 buf->b_spell_ismw[*p++] = TRUE; 3907 } 3908 3909 /* 3910 * Find the region "region[2]" in "rp" (points to "sl_regions"). 3911 * Each region is simply stored as the two characters of it's name. 3912 * Returns the index if found (first is 0), REGION_ALL if not found. 3913 */ 3914 static int 3915 find_region(rp, region) 3916 char_u *rp; 3917 char_u *region; 3918 { 3919 int i; 3920 3921 for (i = 0; ; i += 2) 3922 { 3923 if (rp[i] == NUL) 3924 return REGION_ALL; 3925 if (rp[i] == region[0] && rp[i + 1] == region[1]) 3926 break; 3927 } 3928 return i / 2; 3929 } 3930 3931 /* 3932 * Return case type of word: 3933 * w word 0 3934 * Word WF_ONECAP 3935 * W WORD WF_ALLCAP 3936 * WoRd wOrd WF_KEEPCAP 3937 */ 3938 static int 3939 captype(word, end) 3940 char_u *word; 3941 char_u *end; /* When NULL use up to NUL byte. */ 3942 { 3943 char_u *p; 3944 int c; 3945 int firstcap; 3946 int allcap; 3947 int past_second = FALSE; /* past second word char */ 3948 3949 /* find first letter */ 3950 for (p = word; !spell_iswordp_nmw(p); mb_ptr_adv(p)) 3951 if (end == NULL ? *p == NUL : p >= end) 3952 return 0; /* only non-word characters, illegal word */ 3953 #ifdef FEAT_MBYTE 3954 if (has_mbyte) 3955 c = mb_ptr2char_adv(&p); 3956 else 3957 #endif 3958 c = *p++; 3959 firstcap = allcap = SPELL_ISUPPER(c); 3960 3961 /* 3962 * Need to check all letters to find a word with mixed upper/lower. 3963 * But a word with an upper char only at start is a ONECAP. 3964 */ 3965 for ( ; end == NULL ? *p != NUL : p < end; mb_ptr_adv(p)) 3966 if (spell_iswordp_nmw(p)) 3967 { 3968 c = PTR2CHAR(p); 3969 if (!SPELL_ISUPPER(c)) 3970 { 3971 /* UUl -> KEEPCAP */ 3972 if (past_second && allcap) 3973 return WF_KEEPCAP; 3974 allcap = FALSE; 3975 } 3976 else if (!allcap) 3977 /* UlU -> KEEPCAP */ 3978 return WF_KEEPCAP; 3979 past_second = TRUE; 3980 } 3981 3982 if (allcap) 3983 return WF_ALLCAP; 3984 if (firstcap) 3985 return WF_ONECAP; 3986 return 0; 3987 } 3988 3989 /* 3990 * Like captype() but for a KEEPCAP word add ONECAP if the word starts with a 3991 * capital. So that make_case_word() can turn WOrd into Word. 3992 * Add ALLCAP for "WOrD". 3993 */ 3994 static int 3995 badword_captype(word, end) 3996 char_u *word; 3997 char_u *end; 3998 { 3999 int flags = captype(word, end); 4000 int c; 4001 int l, u; 4002 int first; 4003 char_u *p; 4004 4005 if (flags & WF_KEEPCAP) 4006 { 4007 /* Count the number of UPPER and lower case letters. */ 4008 l = u = 0; 4009 first = FALSE; 4010 for (p = word; p < end; mb_ptr_adv(p)) 4011 { 4012 c = PTR2CHAR(p); 4013 if (SPELL_ISUPPER(c)) 4014 { 4015 ++u; 4016 if (p == word) 4017 first = TRUE; 4018 } 4019 else 4020 ++l; 4021 } 4022 4023 /* If there are more UPPER than lower case letters suggest an 4024 * ALLCAP word. Otherwise, if the first letter is UPPER then 4025 * suggest ONECAP. Exception: "ALl" most likely should be "All", 4026 * require three upper case letters. */ 4027 if (u > l && u > 2) 4028 flags |= WF_ALLCAP; 4029 else if (first) 4030 flags |= WF_ONECAP; 4031 } 4032 return flags; 4033 } 4034 4035 # if defined(FEAT_MBYTE) || defined(EXITFREE) || defined(PROTO) 4036 /* 4037 * Free all languages. 4038 */ 4039 void 4040 spell_free_all() 4041 { 4042 slang_T *slang; 4043 buf_T *buf; 4044 char_u fname[MAXPATHL]; 4045 4046 /* Go through all buffers and handle 'spelllang'. */ 4047 for (buf = firstbuf; buf != NULL; buf = buf->b_next) 4048 ga_clear(&buf->b_langp); 4049 4050 while (first_lang != NULL) 4051 { 4052 slang = first_lang; 4053 first_lang = slang->sl_next; 4054 slang_free(slang); 4055 } 4056 4057 if (int_wordlist != NULL) 4058 { 4059 /* Delete the internal wordlist and its .spl file */ 4060 mch_remove(int_wordlist); 4061 int_wordlist_spl(fname); 4062 mch_remove(fname); 4063 vim_free(int_wordlist); 4064 int_wordlist = NULL; 4065 } 4066 4067 init_spell_chartab(); 4068 } 4069 # endif 4070 4071 # if defined(FEAT_MBYTE) || defined(PROTO) 4072 /* 4073 * Clear all spelling tables and reload them. 4074 * Used after 'encoding' is set and when ":mkspell" was used. 4075 */ 4076 void 4077 spell_reload() 4078 { 4079 buf_T *buf; 4080 win_T *wp; 4081 4082 /* Initialize the table for spell_iswordp(). */ 4083 init_spell_chartab(); 4084 4085 /* Unload all allocated memory. */ 4086 spell_free_all(); 4087 4088 /* Go through all buffers and handle 'spelllang'. */ 4089 for (buf = firstbuf; buf != NULL; buf = buf->b_next) 4090 { 4091 /* Only load the wordlists when 'spelllang' is set and there is a 4092 * window for this buffer in which 'spell' is set. */ 4093 if (*buf->b_p_spl != NUL) 4094 { 4095 FOR_ALL_WINDOWS(wp) 4096 if (wp->w_buffer == buf && wp->w_p_spell) 4097 { 4098 (void)did_set_spelllang(buf); 4099 # ifdef FEAT_WINDOWS 4100 break; 4101 # endif 4102 } 4103 } 4104 } 4105 } 4106 # endif 4107 4108 /* 4109 * Reload the spell file "fname" if it's loaded. 4110 */ 4111 static void 4112 spell_reload_one(fname, added_word) 4113 char_u *fname; 4114 int added_word; /* invoked through "zg" */ 4115 { 4116 slang_T *slang; 4117 int didit = FALSE; 4118 4119 for (slang = first_lang; slang != NULL; slang = slang->sl_next) 4120 { 4121 if (fullpathcmp(fname, slang->sl_fname, FALSE) == FPC_SAME) 4122 { 4123 slang_clear(slang); 4124 if (spell_load_file(fname, NULL, slang, FALSE) == NULL) 4125 /* reloading failed, clear the language */ 4126 slang_clear(slang); 4127 redraw_all_later(NOT_VALID); 4128 didit = TRUE; 4129 } 4130 } 4131 4132 /* When "zg" was used and the file wasn't loaded yet, should redo 4133 * 'spelllang' to get it loaded. */ 4134 if (added_word && !didit) 4135 did_set_spelllang(curbuf); 4136 } 4137 4138 4139 /* 4140 * Functions for ":mkspell". 4141 */ 4142 4143 #define MAXLINELEN 500 /* Maximum length in bytes of a line in a .aff 4144 and .dic file. */ 4145 /* 4146 * Main structure to store the contents of a ".aff" file. 4147 */ 4148 typedef struct afffile_S 4149 { 4150 char_u *af_enc; /* "SET", normalized, alloc'ed string or NULL */ 4151 int af_flagtype; /* AFT_CHAR, AFT_LONG, AFT_NUM or AFT_CAPLONG */ 4152 int af_slash; /* character used in word for slash */ 4153 unsigned af_rar; /* RAR ID for rare word */ 4154 unsigned af_kep; /* KEP ID for keep-case word */ 4155 unsigned af_bad; /* BAD ID for banned word */ 4156 unsigned af_needaffix; /* NEEDAFFIX ID */ 4157 unsigned af_needcomp; /* NEEDCOMPOUND ID */ 4158 int af_pfxpostpone; /* postpone prefixes without chop string */ 4159 hashtab_T af_pref; /* hashtable for prefixes, affheader_T */ 4160 hashtab_T af_suff; /* hashtable for suffixes, affheader_T */ 4161 hashtab_T af_comp; /* hashtable for compound flags, compitem_T */ 4162 } afffile_T; 4163 4164 #define AFT_CHAR 0 /* flags are one character */ 4165 #define AFT_LONG 1 /* flags are two characters */ 4166 #define AFT_CAPLONG 2 /* flags are one or two characters */ 4167 #define AFT_NUM 3 /* flags are numbers, comma separated */ 4168 4169 typedef struct affentry_S affentry_T; 4170 /* Affix entry from ".aff" file. Used for prefixes and suffixes. */ 4171 struct affentry_S 4172 { 4173 affentry_T *ae_next; /* next affix with same name/number */ 4174 char_u *ae_chop; /* text to chop off basic word (can be NULL) */ 4175 char_u *ae_add; /* text to add to basic word (can be NULL) */ 4176 char_u *ae_cond; /* condition (NULL for ".") */ 4177 regprog_T *ae_prog; /* regexp program for ae_cond or NULL */ 4178 char_u ae_rare; /* rare affix */ 4179 char_u ae_nocomp; /* word with affix not compoundable */ 4180 }; 4181 4182 #ifdef FEAT_MBYTE 4183 # define AH_KEY_LEN 17 /* 2 x 8 bytes + NUL */ 4184 #else 4185 # define AH_KEY_LEN 7 /* 6 digits + NUL */ 4186 #endif 4187 4188 /* Affix header from ".aff" file. Used for af_pref and af_suff. */ 4189 typedef struct affheader_S 4190 { 4191 char_u ah_key[AH_KEY_LEN]; /* key for hashtab == name of affix */ 4192 unsigned ah_flag; /* affix name as number, uses "af_flagtype" */ 4193 int ah_newID; /* prefix ID after renumbering; 0 if not used */ 4194 int ah_combine; /* suffix may combine with prefix */ 4195 int ah_follows; /* another affix block should be following */ 4196 affentry_T *ah_first; /* first affix entry */ 4197 } affheader_T; 4198 4199 #define HI2AH(hi) ((affheader_T *)(hi)->hi_key) 4200 4201 /* Flag used in compound items. */ 4202 typedef struct compitem_S 4203 { 4204 char_u ci_key[AH_KEY_LEN]; /* key for hashtab == name of compound */ 4205 unsigned ci_flag; /* affix name as number, uses "af_flagtype" */ 4206 int ci_newID; /* affix ID after renumbering. */ 4207 } compitem_T; 4208 4209 #define HI2CI(hi) ((compitem_T *)(hi)->hi_key) 4210 4211 /* 4212 * Structure that is used to store the items in the word tree. This avoids 4213 * the need to keep track of each allocated thing, everything is freed all at 4214 * once after ":mkspell" is done. 4215 */ 4216 #define SBLOCKSIZE 16000 /* size of sb_data */ 4217 typedef struct sblock_S sblock_T; 4218 struct sblock_S 4219 { 4220 sblock_T *sb_next; /* next block in list */ 4221 int sb_used; /* nr of bytes already in use */ 4222 char_u sb_data[1]; /* data, actually longer */ 4223 }; 4224 4225 /* 4226 * A node in the tree. 4227 */ 4228 typedef struct wordnode_S wordnode_T; 4229 struct wordnode_S 4230 { 4231 union /* shared to save space */ 4232 { 4233 char_u hashkey[6]; /* the hash key, only used while compressing */ 4234 int index; /* index in written nodes (valid after first 4235 round) */ 4236 } wn_u1; 4237 union /* shared to save space */ 4238 { 4239 wordnode_T *next; /* next node with same hash key */ 4240 wordnode_T *wnode; /* parent node that will write this node */ 4241 } wn_u2; 4242 wordnode_T *wn_child; /* child (next byte in word) */ 4243 wordnode_T *wn_sibling; /* next sibling (alternate byte in word, 4244 always sorted) */ 4245 int wn_refs; /* Nr. of references to this node. Only 4246 relevant for first node in a list of 4247 siblings, in following siblings it is 4248 always one. */ 4249 char_u wn_byte; /* Byte for this node. NUL for word end */ 4250 char_u wn_affixID; /* when "wn_byte" is NUL: supported/required 4251 prefix ID or 0 */ 4252 short_u wn_flags; /* when "wn_byte" is NUL: WF_ flags */ 4253 short wn_region; /* when "wn_byte" is NUL: region mask; for 4254 PREFIXTREE it's the prefcondnr */ 4255 #ifdef SPELL_PRINTTREE 4256 int wn_nr; /* sequence nr for printing */ 4257 #endif 4258 }; 4259 4260 #define WN_MASK 0xffff /* mask relevant bits of "wn_flags" */ 4261 4262 #define HI2WN(hi) (wordnode_T *)((hi)->hi_key) 4263 4264 /* 4265 * Info used while reading the spell files. 4266 */ 4267 typedef struct spellinfo_S 4268 { 4269 wordnode_T *si_foldroot; /* tree with case-folded words */ 4270 long si_foldwcount; /* nr of words in si_foldroot */ 4271 4272 wordnode_T *si_keeproot; /* tree with keep-case words */ 4273 long si_keepwcount; /* nr of words in si_keeproot */ 4274 4275 wordnode_T *si_prefroot; /* tree with postponed prefixes */ 4276 4277 sblock_T *si_blocks; /* memory blocks used */ 4278 long si_blocks_cnt; /* memory blocks allocated */ 4279 long si_compress_cnt; /* words to add before lowering 4280 compression limit */ 4281 wordnode_T *si_first_free; /* List of nodes that have been freed during 4282 compression, linked by "wn_child" field. */ 4283 long si_free_count; /* number of nodes in si_first_free */ 4284 #ifdef SPELL_PRINTTREE 4285 int si_wordnode_nr; /* sequence nr for nodes */ 4286 #endif 4287 4288 4289 int si_ascii; /* handling only ASCII words */ 4290 int si_add; /* addition file */ 4291 int si_clear_chartab; /* when TRUE clear char tables */ 4292 int si_region; /* region mask */ 4293 vimconv_T si_conv; /* for conversion to 'encoding' */ 4294 int si_memtot; /* runtime memory used */ 4295 int si_verbose; /* verbose messages */ 4296 int si_msg_count; /* number of words added since last message */ 4297 int si_region_count; /* number of regions supported (1 when there 4298 are no regions) */ 4299 char_u si_region_name[16]; /* region names; used only if 4300 * si_region_count > 1) */ 4301 4302 garray_T si_rep; /* list of fromto_T entries from REP lines */ 4303 garray_T si_sal; /* list of fromto_T entries from SAL lines */ 4304 char_u *si_sofofr; /* SOFOFROM text */ 4305 char_u *si_sofoto; /* SOFOTO text */ 4306 int si_followup; /* soundsalike: ? */ 4307 int si_collapse; /* soundsalike: ? */ 4308 int si_rem_accents; /* soundsalike: remove accents */ 4309 garray_T si_map; /* MAP info concatenated */ 4310 char_u *si_midword; /* MIDWORD chars or NULL */ 4311 int si_compmax; /* max nr of words for compounding */ 4312 int si_compminlen; /* minimal length for compounding */ 4313 int si_compsylmax; /* max nr of syllables for compounding */ 4314 char_u *si_compflags; /* flags used for compounding */ 4315 char_u si_nobreak; /* NOBREAK */ 4316 char_u *si_syllable; /* syllable string */ 4317 garray_T si_prefcond; /* table with conditions for postponed 4318 * prefixes, each stored as a string */ 4319 int si_newprefID; /* current value for ah_newID */ 4320 int si_newcompID; /* current value for compound ID */ 4321 } spellinfo_T; 4322 4323 static afffile_T *spell_read_aff __ARGS((spellinfo_T *spin, char_u *fname)); 4324 static unsigned affitem2flag __ARGS((int flagtype, char_u *item, char_u *fname, int lnum)); 4325 static unsigned get_affitem __ARGS((int flagtype, char_u **pp)); 4326 static void process_compflags __ARGS((spellinfo_T *spin, afffile_T *aff, char_u *compflags)); 4327 static void check_renumber __ARGS((spellinfo_T *spin)); 4328 static int flag_in_afflist __ARGS((int flagtype, char_u *afflist, unsigned flag)); 4329 static void aff_check_number __ARGS((int spinval, int affval, char *name)); 4330 static void aff_check_string __ARGS((char_u *spinval, char_u *affval, char *name)); 4331 static int str_equal __ARGS((char_u *s1, char_u *s2)); 4332 static void add_fromto __ARGS((spellinfo_T *spin, garray_T *gap, char_u *from, char_u *to)); 4333 static int sal_to_bool __ARGS((char_u *s)); 4334 static int has_non_ascii __ARGS((char_u *s)); 4335 static void spell_free_aff __ARGS((afffile_T *aff)); 4336 static int spell_read_dic __ARGS((spellinfo_T *spin, char_u *fname, afffile_T *affile)); 4337 static int get_pfxlist __ARGS((afffile_T *affile, char_u *afflist, char_u *store_afflist)); 4338 static void get_compflags __ARGS((afffile_T *affile, char_u *afflist, char_u *store_afflist)); 4339 static int store_aff_word __ARGS((spellinfo_T *spin, char_u *word, char_u *afflist, afffile_T *affile, hashtab_T *ht, hashtab_T *xht, int comb, int flags, char_u *pfxlist, int pfxlen)); 4340 static int spell_read_wordfile __ARGS((spellinfo_T *spin, char_u *fname)); 4341 static void *getroom __ARGS((spellinfo_T *spin, size_t len, int align)); 4342 static char_u *getroom_save __ARGS((spellinfo_T *spin, char_u *s)); 4343 static void free_blocks __ARGS((sblock_T *bl)); 4344 static wordnode_T *wordtree_alloc __ARGS((spellinfo_T *spin)); 4345 static int store_word __ARGS((spellinfo_T *spin, char_u *word, int flags, int region, char_u *pfxlist, int need_affix)); 4346 static int tree_add_word __ARGS((spellinfo_T *spin, char_u *word, wordnode_T *tree, int flags, int region, int affixID)); 4347 static wordnode_T *get_wordnode __ARGS((spellinfo_T *spin)); 4348 static void deref_wordnode __ARGS((spellinfo_T *spin, wordnode_T *node)); 4349 static void free_wordnode __ARGS((spellinfo_T *spin, wordnode_T *n)); 4350 static void wordtree_compress __ARGS((spellinfo_T *spin, wordnode_T *root)); 4351 static int node_compress __ARGS((spellinfo_T *spin, wordnode_T *node, hashtab_T *ht, int *tot)); 4352 static int node_equal __ARGS((wordnode_T *n1, wordnode_T *n2)); 4353 static int write_vim_spell __ARGS((spellinfo_T *spin, char_u *fname)); 4354 static void clear_node __ARGS((wordnode_T *node)); 4355 static int put_node __ARGS((FILE *fd, wordnode_T *node, int index, int regionmask, int prefixtree)); 4356 static void mkspell __ARGS((int fcount, char_u **fnames, int ascii, int overwrite, int added_word)); 4357 static void init_spellfile __ARGS((void)); 4358 4359 /* In the postponed prefixes tree wn_flags is used to store the WFP_ flags, 4360 * but it must be negative to indicate the prefix tree to tree_add_word(). 4361 * Use a negative number with the lower 8 bits zero. */ 4362 #define PFX_FLAGS -256 4363 4364 /* 4365 * Tunable parameters for when the tree is compressed. See 'mkspellmem'. 4366 */ 4367 static long compress_start = 30000; /* memory / SBLOCKSIZE */ 4368 static long compress_inc = 100; /* memory / SBLOCKSIZE */ 4369 static long compress_added = 500000; /* word count */ 4370 4371 #ifdef SPELL_PRINTTREE 4372 /* 4373 * For debugging the tree code: print the current tree in a (more or less) 4374 * readable format, so that we can see what happens when adding a word and/or 4375 * compressing the tree. 4376 * Based on code from Olaf Seibert. 4377 */ 4378 #define PRINTLINESIZE 1000 4379 #define PRINTWIDTH 6 4380 4381 #define PRINTSOME(l, depth, fmt, a1, a2) vim_snprintf(l + depth * PRINTWIDTH, \ 4382 PRINTLINESIZE - PRINTWIDTH * depth, fmt, a1, a2) 4383 4384 static char line1[PRINTLINESIZE]; 4385 static char line2[PRINTLINESIZE]; 4386 static char line3[PRINTLINESIZE]; 4387 4388 static void 4389 spell_clear_flags(wordnode_T *node) 4390 { 4391 wordnode_T *np; 4392 4393 for (np = node; np != NULL; np = np->wn_sibling) 4394 { 4395 np->wn_u1.index = FALSE; 4396 spell_clear_flags(np->wn_child); 4397 } 4398 } 4399 4400 static void 4401 spell_print_node(wordnode_T *node, int depth) 4402 { 4403 if (node->wn_u1.index) 4404 { 4405 /* Done this node before, print the reference. */ 4406 PRINTSOME(line1, depth, "(%d)", node->wn_nr, 0); 4407 PRINTSOME(line2, depth, " ", 0, 0); 4408 PRINTSOME(line3, depth, " ", 0, 0); 4409 msg(line1); 4410 msg(line2); 4411 msg(line3); 4412 } 4413 else 4414 { 4415 node->wn_u1.index = TRUE; 4416 4417 if (node->wn_byte != NUL) 4418 { 4419 if (node->wn_child != NULL) 4420 PRINTSOME(line1, depth, " %c -> ", node->wn_byte, 0); 4421 else 4422 /* Cannot happen? */ 4423 PRINTSOME(line1, depth, " %c ???", node->wn_byte, 0); 4424 } 4425 else 4426 PRINTSOME(line1, depth, " $ ", 0, 0); 4427 4428 PRINTSOME(line2, depth, "%d/%d ", node->wn_nr, node->wn_refs); 4429 4430 if (node->wn_sibling != NULL) 4431 PRINTSOME(line3, depth, " | ", 0, 0); 4432 else 4433 PRINTSOME(line3, depth, " ", 0, 0); 4434 4435 if (node->wn_byte == NUL) 4436 { 4437 msg(line1); 4438 msg(line2); 4439 msg(line3); 4440 } 4441 4442 /* do the children */ 4443 if (node->wn_byte != NUL && node->wn_child != NULL) 4444 spell_print_node(node->wn_child, depth + 1); 4445 4446 /* do the siblings */ 4447 if (node->wn_sibling != NULL) 4448 { 4449 /* get rid of all parent details except | */ 4450 STRCPY(line1, line3); 4451 STRCPY(line2, line3); 4452 spell_print_node(node->wn_sibling, depth); 4453 } 4454 } 4455 } 4456 4457 static void 4458 spell_print_tree(wordnode_T *root) 4459 { 4460 if (root != NULL) 4461 { 4462 /* Clear the "wn_u1.index" fields, used to remember what has been 4463 * done. */ 4464 spell_clear_flags(root); 4465 4466 /* Recursively print the tree. */ 4467 spell_print_node(root, 0); 4468 } 4469 } 4470 #endif /* SPELL_PRINTTREE */ 4471 4472 /* 4473 * Read the affix file "fname". 4474 * Returns an afffile_T, NULL for complete failure. 4475 */ 4476 static afffile_T * 4477 spell_read_aff(spin, fname) 4478 spellinfo_T *spin; 4479 char_u *fname; 4480 { 4481 FILE *fd; 4482 afffile_T *aff; 4483 char_u rline[MAXLINELEN]; 4484 char_u *line; 4485 char_u *pc = NULL; 4486 #define MAXITEMCNT 7 4487 char_u *(items[MAXITEMCNT]); 4488 int itemcnt; 4489 char_u *p; 4490 int lnum = 0; 4491 affheader_T *cur_aff = NULL; 4492 int did_postpone_prefix = FALSE; 4493 int aff_todo = 0; 4494 hashtab_T *tp; 4495 char_u *low = NULL; 4496 char_u *fol = NULL; 4497 char_u *upp = NULL; 4498 int do_rep; 4499 int do_sal; 4500 int do_map; 4501 int found_map = FALSE; 4502 hashitem_T *hi; 4503 int l; 4504 int compminlen = 0; /* COMPOUNDMIN value */ 4505 int compsylmax = 0; /* COMPOUNDSYLMAX value */ 4506 int compmax = 0; /* COMPOUNDMAX value */ 4507 char_u *compflags = NULL; /* COMPOUNDFLAG and COMPOUNDFLAGS 4508 concatenated */ 4509 char_u *midword = NULL; /* MIDWORD value */ 4510 char_u *syllable = NULL; /* SYLLABLE value */ 4511 char_u *sofofrom = NULL; /* SOFOFROM value */ 4512 char_u *sofoto = NULL; /* SOFOTO value */ 4513 4514 /* 4515 * Open the file. 4516 */ 4517 fd = mch_fopen((char *)fname, "r"); 4518 if (fd == NULL) 4519 { 4520 EMSG2(_(e_notopen), fname); 4521 return NULL; 4522 } 4523 4524 if (spin->si_verbose || p_verbose > 2) 4525 { 4526 if (!spin->si_verbose) 4527 verbose_enter(); 4528 smsg((char_u *)_("Reading affix file %s ..."), fname); 4529 out_flush(); 4530 if (!spin->si_verbose) 4531 verbose_leave(); 4532 } 4533 4534 /* Only do REP lines when not done in another .aff file already. */ 4535 do_rep = spin->si_rep.ga_len == 0; 4536 4537 /* Only do SAL lines when not done in another .aff file already. */ 4538 do_sal = spin->si_sal.ga_len == 0; 4539 4540 /* Only do MAP lines when not done in another .aff file already. */ 4541 do_map = spin->si_map.ga_len == 0; 4542 4543 /* 4544 * Allocate and init the afffile_T structure. 4545 */ 4546 aff = (afffile_T *)getroom(spin, sizeof(afffile_T), TRUE); 4547 if (aff == NULL) 4548 return NULL; 4549 hash_init(&aff->af_pref); 4550 hash_init(&aff->af_suff); 4551 hash_init(&aff->af_comp); 4552 4553 /* 4554 * Read all the lines in the file one by one. 4555 */ 4556 while (!vim_fgets(rline, MAXLINELEN, fd) && !got_int) 4557 { 4558 line_breakcheck(); 4559 ++lnum; 4560 4561 /* Skip comment lines. */ 4562 if (*rline == '#') 4563 continue; 4564 4565 /* Convert from "SET" to 'encoding' when needed. */ 4566 vim_free(pc); 4567 #ifdef FEAT_MBYTE 4568 if (spin->si_conv.vc_type != CONV_NONE) 4569 { 4570 pc = string_convert(&spin->si_conv, rline, NULL); 4571 if (pc == NULL) 4572 { 4573 smsg((char_u *)_("Conversion failure for word in %s line %d: %s"), 4574 fname, lnum, rline); 4575 continue; 4576 } 4577 line = pc; 4578 } 4579 else 4580 #endif 4581 { 4582 pc = NULL; 4583 line = rline; 4584 } 4585 4586 /* Split the line up in white separated items. Put a NUL after each 4587 * item. */ 4588 itemcnt = 0; 4589 for (p = line; ; ) 4590 { 4591 while (*p != NUL && *p <= ' ') /* skip white space and CR/NL */ 4592 ++p; 4593 if (*p == NUL) 4594 break; 4595 if (itemcnt == MAXITEMCNT) /* too many items */ 4596 break; 4597 items[itemcnt++] = p; 4598 while (*p > ' ') /* skip until white space or CR/NL */ 4599 ++p; 4600 if (*p == NUL) 4601 break; 4602 *p++ = NUL; 4603 } 4604 4605 /* Handle non-empty lines. */ 4606 if (itemcnt > 0) 4607 { 4608 if (STRCMP(items[0], "SET") == 0 && itemcnt == 2 4609 && aff->af_enc == NULL) 4610 { 4611 #ifdef FEAT_MBYTE 4612 /* Setup for conversion from "ENC" to 'encoding'. */ 4613 aff->af_enc = enc_canonize(items[1]); 4614 if (aff->af_enc != NULL && !spin->si_ascii 4615 && convert_setup(&spin->si_conv, aff->af_enc, 4616 p_enc) == FAIL) 4617 smsg((char_u *)_("Conversion in %s not supported: from %s to %s"), 4618 fname, aff->af_enc, p_enc); 4619 spin->si_conv.vc_fail = TRUE; 4620 #else 4621 smsg((char_u *)_("Conversion in %s not supported"), fname); 4622 #endif 4623 } 4624 else if (STRCMP(items[0], "FLAG") == 0 && itemcnt == 2 4625 && aff->af_flagtype == AFT_CHAR) 4626 { 4627 if (STRCMP(items[1], "long") == 0) 4628 aff->af_flagtype = AFT_LONG; 4629 else if (STRCMP(items[1], "num") == 0) 4630 aff->af_flagtype = AFT_NUM; 4631 else if (STRCMP(items[1], "caplong") == 0) 4632 aff->af_flagtype = AFT_CAPLONG; 4633 else 4634 smsg((char_u *)_("Invalid value for FLAG in %s line %d: %s"), 4635 fname, lnum, items[1]); 4636 if (aff->af_rar != 0 || aff->af_kep != 0 || aff->af_bad != 0 4637 || aff->af_needaffix != 0 4638 || aff->af_needcomp != 0 4639 || compflags != NULL 4640 || aff->af_suff.ht_used > 0 4641 || aff->af_pref.ht_used > 0) 4642 smsg((char_u *)_("FLAG after using flags in %s line %d: %s"), 4643 fname, lnum, items[1]); 4644 } 4645 else if (STRCMP(items[0], "MIDWORD") == 0 && itemcnt == 2 4646 && midword == NULL) 4647 { 4648 midword = getroom_save(spin, items[1]); 4649 } 4650 else if (STRCMP(items[0], "NOSPLITSUGS") == 0 && itemcnt == 1) 4651 { 4652 /* ignored, we always split */ 4653 } 4654 else if (STRCMP(items[0], "TRY") == 0 && itemcnt == 2) 4655 { 4656 /* ignored, we look in the tree for what chars may appear */ 4657 } 4658 else if (STRCMP(items[0], "SLASH") == 0 && itemcnt == 2 4659 && aff->af_slash == 0) 4660 { 4661 aff->af_slash = items[1][0]; 4662 if (items[1][1] != NUL) 4663 smsg((char_u *)_("Character used for SLASH must be ASCII; in %s line %d: %s"), 4664 fname, lnum, items[1]); 4665 } 4666 else if (STRCMP(items[0], "RAR") == 0 && itemcnt == 2 4667 && aff->af_rar == 0) 4668 { 4669 aff->af_rar = affitem2flag(aff->af_flagtype, items[1], 4670 fname, lnum); 4671 } 4672 else if (STRCMP(items[0], "KEP") == 0 && itemcnt == 2 4673 && aff->af_kep == 0) 4674 { 4675 aff->af_kep = affitem2flag(aff->af_flagtype, items[1], 4676 fname, lnum); 4677 } 4678 else if (STRCMP(items[0], "BAD") == 0 && itemcnt == 2 4679 && aff->af_bad == 0) 4680 { 4681 aff->af_bad = affitem2flag(aff->af_flagtype, items[1], 4682 fname, lnum); 4683 } 4684 else if (STRCMP(items[0], "NEEDAFFIX") == 0 && itemcnt == 2 4685 && aff->af_needaffix == 0) 4686 { 4687 aff->af_needaffix = affitem2flag(aff->af_flagtype, items[1], 4688 fname, lnum); 4689 } 4690 else if (STRCMP(items[0], "NEEDCOMPOUND") == 0 && itemcnt == 2 4691 && aff->af_needcomp == 0) 4692 { 4693 aff->af_needcomp = affitem2flag(aff->af_flagtype, items[1], 4694 fname, lnum); 4695 } 4696 else if (STRCMP(items[0], "COMPOUNDFLAG") == 0 && itemcnt == 2 4697 && compflags == NULL) 4698 { 4699 /* Turn flag "c" into COMPOUNDFLAGS compatible string "c+", 4700 * "Na" into "Na+", "1234" into "1234+". */ 4701 p = getroom(spin, STRLEN(items[1]) + 2, FALSE); 4702 if (p != NULL) 4703 { 4704 STRCPY(p, items[1]); 4705 STRCAT(p, "+"); 4706 compflags = p; 4707 } 4708 } 4709 else if (STRCMP(items[0], "COMPOUNDFLAGS") == 0 && itemcnt == 2) 4710 { 4711 /* Concatenate this string to previously defined ones, using a 4712 * slash to separate them. */ 4713 l = STRLEN(items[1]) + 1; 4714 if (compflags != NULL) 4715 l += STRLEN(compflags) + 1; 4716 p = getroom(spin, l, FALSE); 4717 if (p != NULL) 4718 { 4719 if (compflags != NULL) 4720 { 4721 STRCPY(p, compflags); 4722 STRCAT(p, "/"); 4723 } 4724 STRCAT(p, items[1]); 4725 compflags = p; 4726 } 4727 } 4728 else if (STRCMP(items[0], "COMPOUNDMAX") == 0 && itemcnt == 2 4729 && compmax == 0) 4730 { 4731 compmax = atoi((char *)items[1]); 4732 if (compmax == 0) 4733 smsg((char_u *)_("Wrong COMPOUNDMAX value in %s line %d: %s"), 4734 fname, lnum, items[1]); 4735 } 4736 else if (STRCMP(items[0], "COMPOUNDMIN") == 0 && itemcnt == 2 4737 && compminlen == 0) 4738 { 4739 compminlen = atoi((char *)items[1]); 4740 if (compminlen == 0) 4741 smsg((char_u *)_("Wrong COMPOUNDMIN value in %s line %d: %s"), 4742 fname, lnum, items[1]); 4743 } 4744 else if (STRCMP(items[0], "COMPOUNDSYLMAX") == 0 && itemcnt == 2 4745 && compsylmax == 0) 4746 { 4747 compsylmax = atoi((char *)items[1]); 4748 if (compsylmax == 0) 4749 smsg((char_u *)_("Wrong COMPOUNDSYLMAX value in %s line %d: %s"), 4750 fname, lnum, items[1]); 4751 } 4752 else if (STRCMP(items[0], "SYLLABLE") == 0 && itemcnt == 2 4753 && syllable == NULL) 4754 { 4755 syllable = getroom_save(spin, items[1]); 4756 } 4757 else if (STRCMP(items[0], "NOBREAK") == 0 && itemcnt == 1) 4758 { 4759 spin->si_nobreak = TRUE; 4760 } 4761 else if (STRCMP(items[0], "PFXPOSTPONE") == 0 && itemcnt == 1) 4762 { 4763 aff->af_pfxpostpone = TRUE; 4764 } 4765 else if ((STRCMP(items[0], "PFX") == 0 4766 || STRCMP(items[0], "SFX") == 0) 4767 && aff_todo == 0 4768 && itemcnt >= 4) 4769 { 4770 int lasti = 4; 4771 char_u key[AH_KEY_LEN]; 4772 4773 if (*items[0] == 'P') 4774 tp = &aff->af_pref; 4775 else 4776 tp = &aff->af_suff; 4777 4778 /* Myspell allows the same affix name to be used multiple 4779 * times. The affix files that do this have an undocumented 4780 * "S" flag on all but the last block, thus we check for that 4781 * and store it in ah_follows. */ 4782 vim_strncpy(key, items[1], AH_KEY_LEN - 1); 4783 hi = hash_find(tp, key); 4784 if (!HASHITEM_EMPTY(hi)) 4785 { 4786 cur_aff = HI2AH(hi); 4787 if (cur_aff->ah_combine != (*items[2] == 'Y')) 4788 smsg((char_u *)_("Different combining flag in continued affix block in %s line %d: %s"), 4789 fname, lnum, items[1]); 4790 if (!cur_aff->ah_follows) 4791 smsg((char_u *)_("Duplicate affix in %s line %d: %s"), 4792 fname, lnum, items[1]); 4793 } 4794 else 4795 { 4796 /* New affix letter. */ 4797 cur_aff = (affheader_T *)getroom(spin, 4798 sizeof(affheader_T), TRUE); 4799 if (cur_aff == NULL) 4800 break; 4801 cur_aff->ah_flag = affitem2flag(aff->af_flagtype, items[1], 4802 fname, lnum); 4803 if (cur_aff->ah_flag == 0 || STRLEN(items[1]) >= AH_KEY_LEN) 4804 break; 4805 if (cur_aff->ah_flag == aff->af_bad 4806 || cur_aff->ah_flag == aff->af_rar 4807 || cur_aff->ah_flag == aff->af_kep 4808 || cur_aff->ah_flag == aff->af_needaffix 4809 || cur_aff->ah_flag == aff->af_needcomp) 4810 smsg((char_u *)_("Affix also used for BAD/RAR/KEP/NEEDAFFIX/NEEDCOMPOUND in %s line %d: %s"), 4811 fname, lnum, items[1]); 4812 STRCPY(cur_aff->ah_key, items[1]); 4813 hash_add(tp, cur_aff->ah_key); 4814 4815 cur_aff->ah_combine = (*items[2] == 'Y'); 4816 } 4817 4818 /* Check for the "S" flag, which apparently means that another 4819 * block with the same affix name is following. */ 4820 if (itemcnt > lasti && STRCMP(items[lasti], "S") == 0) 4821 { 4822 ++lasti; 4823 cur_aff->ah_follows = TRUE; 4824 } 4825 else 4826 cur_aff->ah_follows = FALSE; 4827 4828 /* Myspell allows extra text after the item, but that might 4829 * mean mistakes go unnoticed. Require a comment-starter. */ 4830 if (itemcnt > lasti && *items[lasti] != '#') 4831 smsg((char_u *)_("Trailing text in %s line %d: %s"), 4832 fname, lnum, items[4]); 4833 4834 if (STRCMP(items[2], "Y") != 0 && STRCMP(items[2], "N") != 0) 4835 smsg((char_u *)_("Expected Y or N in %s line %d: %s"), 4836 fname, lnum, items[2]); 4837 4838 if (*items[0] == 'P' && aff->af_pfxpostpone) 4839 { 4840 if (cur_aff->ah_newID == 0) 4841 { 4842 /* Use a new number in the .spl file later, to be able 4843 * to handle multiple .aff files. */ 4844 check_renumber(spin); 4845 cur_aff->ah_newID = ++spin->si_newprefID; 4846 4847 /* We only really use ah_newID if the prefix is 4848 * postponed. We know that only after handling all 4849 * the items. */ 4850 did_postpone_prefix = FALSE; 4851 } 4852 else 4853 /* Did use the ID in a previous block. */ 4854 did_postpone_prefix = TRUE; 4855 } 4856 4857 aff_todo = atoi((char *)items[3]); 4858 } 4859 else if ((STRCMP(items[0], "PFX") == 0 4860 || STRCMP(items[0], "SFX") == 0) 4861 && aff_todo > 0 4862 && STRCMP(cur_aff->ah_key, items[1]) == 0 4863 && itemcnt >= 5) 4864 { 4865 affentry_T *aff_entry; 4866 int rare = FALSE; 4867 int nocomp = FALSE; 4868 int upper = FALSE; 4869 int lasti = 5; 4870 4871 /* Check for "rare" and "nocomp" after the other info. */ 4872 while (itemcnt > lasti) 4873 { 4874 if (!rare && STRICMP(items[lasti], "rare") == 0) 4875 { 4876 rare = TRUE; 4877 ++lasti; 4878 } 4879 else if (!nocomp && STRICMP(items[lasti], "nocomp") == 0) 4880 { 4881 nocomp = TRUE; 4882 ++lasti; 4883 } 4884 else 4885 break; 4886 } 4887 4888 /* Myspell allows extra text after the item, but that might 4889 * mean mistakes go unnoticed. Require a comment-starter. */ 4890 if (itemcnt > lasti && *items[lasti] != '#') 4891 smsg((char_u *)_(e_afftrailing), fname, lnum, items[lasti]); 4892 4893 /* New item for an affix letter. */ 4894 --aff_todo; 4895 aff_entry = (affentry_T *)getroom(spin, 4896 sizeof(affentry_T), TRUE); 4897 if (aff_entry == NULL) 4898 break; 4899 aff_entry->ae_rare = rare; 4900 aff_entry->ae_nocomp = nocomp; 4901 4902 if (STRCMP(items[2], "0") != 0) 4903 aff_entry->ae_chop = getroom_save(spin, items[2]); 4904 if (STRCMP(items[3], "0") != 0) 4905 aff_entry->ae_add = getroom_save(spin, items[3]); 4906 4907 /* Don't use an affix entry with non-ASCII characters when 4908 * "spin->si_ascii" is TRUE. */ 4909 if (!spin->si_ascii || !(has_non_ascii(aff_entry->ae_chop) 4910 || has_non_ascii(aff_entry->ae_add))) 4911 { 4912 aff_entry->ae_next = cur_aff->ah_first; 4913 cur_aff->ah_first = aff_entry; 4914 4915 if (STRCMP(items[4], ".") != 0) 4916 { 4917 char_u buf[MAXLINELEN]; 4918 4919 aff_entry->ae_cond = getroom_save(spin, items[4]); 4920 if (*items[0] == 'P') 4921 sprintf((char *)buf, "^%s", items[4]); 4922 else 4923 sprintf((char *)buf, "%s$", items[4]); 4924 aff_entry->ae_prog = vim_regcomp(buf, 4925 RE_MAGIC + RE_STRING + RE_STRICT); 4926 if (aff_entry->ae_prog == NULL) 4927 smsg((char_u *)_("Broken condition in %s line %d: %s"), 4928 fname, lnum, items[4]); 4929 } 4930 4931 /* For postponed prefixes we need an entry in si_prefcond 4932 * for the condition. Use an existing one if possible. */ 4933 if (*items[0] == 'P' && aff->af_pfxpostpone) 4934 { 4935 /* When the chop string is one lower-case letter and 4936 * the add string ends in the upper-case letter we set 4937 * the "upper" flag, clear "ae_chop" and remove the 4938 * letters from "ae_add". The condition must either 4939 * be empty or start with the same letter. */ 4940 if (aff_entry->ae_chop != NULL 4941 && aff_entry->ae_add != NULL 4942 #ifdef FEAT_MBYTE 4943 && aff_entry->ae_chop[(*mb_ptr2len)( 4944 aff_entry->ae_chop)] == NUL 4945 #else 4946 && aff_entry->ae_chop[1] == NUL 4947 #endif 4948 ) 4949 { 4950 int c, c_up; 4951 4952 c = PTR2CHAR(aff_entry->ae_chop); 4953 c_up = SPELL_TOUPPER(c); 4954 if (c_up != c 4955 && (aff_entry->ae_cond == NULL 4956 || PTR2CHAR(aff_entry->ae_cond) == c)) 4957 { 4958 p = aff_entry->ae_add 4959 + STRLEN(aff_entry->ae_add); 4960 mb_ptr_back(aff_entry->ae_add, p); 4961 if (PTR2CHAR(p) == c_up) 4962 { 4963 upper = TRUE; 4964 aff_entry->ae_chop = NULL; 4965 *p = NUL; 4966 4967 /* The condition is matched with the 4968 * actual word, thus must check for the 4969 * upper-case letter. */ 4970 if (aff_entry->ae_cond != NULL) 4971 { 4972 char_u buf[MAXLINELEN]; 4973 #ifdef FEAT_MBYTE 4974 if (has_mbyte) 4975 { 4976 onecap_copy(items[4], buf, TRUE); 4977 aff_entry->ae_cond = getroom_save( 4978 spin, buf); 4979 } 4980 else 4981 #endif 4982 *aff_entry->ae_cond = c_up; 4983 if (aff_entry->ae_cond != NULL) 4984 { 4985 sprintf((char *)buf, "^%s", 4986 aff_entry->ae_cond); 4987 vim_free(aff_entry->ae_prog); 4988 aff_entry->ae_prog = vim_regcomp( 4989 buf, RE_MAGIC + RE_STRING); 4990 } 4991 } 4992 } 4993 } 4994 } 4995 4996 if (aff_entry->ae_chop == NULL) 4997 { 4998 int idx; 4999 char_u **pp; 5000 int n; 5001 5002 /* Find a previously used condition. */ 5003 for (idx = spin->si_prefcond.ga_len - 1; idx >= 0; 5004 --idx) 5005 { 5006 p = ((char_u **)spin->si_prefcond.ga_data)[idx]; 5007 if (str_equal(p, aff_entry->ae_cond)) 5008 break; 5009 } 5010 if (idx < 0 && ga_grow(&spin->si_prefcond, 1) == OK) 5011 { 5012 /* Not found, add a new condition. */ 5013 idx = spin->si_prefcond.ga_len++; 5014 pp = ((char_u **)spin->si_prefcond.ga_data) 5015 + idx; 5016 if (aff_entry->ae_cond == NULL) 5017 *pp = NULL; 5018 else 5019 *pp = getroom_save(spin, 5020 aff_entry->ae_cond); 5021 } 5022 5023 /* Add the prefix to the prefix tree. */ 5024 if (aff_entry->ae_add == NULL) 5025 p = (char_u *)""; 5026 else 5027 p = aff_entry->ae_add; 5028 /* PFX_FLAGS is a negative number, so that 5029 * tree_add_word() knows this is the prefix tree. */ 5030 n = PFX_FLAGS; 5031 if (rare) 5032 n |= WFP_RARE; 5033 if (!cur_aff->ah_combine) 5034 n |= WFP_NC; 5035 if (upper) 5036 n |= WFP_UP; 5037 tree_add_word(spin, p, spin->si_prefroot, n, 5038 idx, cur_aff->ah_newID); 5039 did_postpone_prefix = TRUE; 5040 } 5041 5042 /* Didn't actually use ah_newID, backup si_newprefID. */ 5043 if (aff_todo == 0 && !did_postpone_prefix) 5044 { 5045 --spin->si_newprefID; 5046 cur_aff->ah_newID = 0; 5047 } 5048 } 5049 } 5050 } 5051 else if (STRCMP(items[0], "FOL") == 0 && itemcnt == 2 5052 && fol == NULL) 5053 { 5054 fol = vim_strsave(items[1]); 5055 } 5056 else if (STRCMP(items[0], "LOW") == 0 && itemcnt == 2 5057 && low == NULL) 5058 { 5059 low = vim_strsave(items[1]); 5060 } 5061 else if (STRCMP(items[0], "UPP") == 0 && itemcnt == 2 5062 && upp == NULL) 5063 { 5064 upp = vim_strsave(items[1]); 5065 } 5066 else if (STRCMP(items[0], "REP") == 0 && itemcnt == 2) 5067 { 5068 /* Ignore REP count */; 5069 if (!isdigit(*items[1])) 5070 smsg((char_u *)_("Expected REP count in %s line %d"), 5071 fname, lnum); 5072 } 5073 else if (STRCMP(items[0], "REP") == 0 && itemcnt >= 3) 5074 { 5075 /* REP item */ 5076 /* Myspell ignores extra arguments, we require it starts with 5077 * # to detect mistakes. */ 5078 if (itemcnt > 3 && items[3][0] != '#') 5079 smsg((char_u *)_(e_afftrailing), fname, lnum, items[3]); 5080 if (do_rep) 5081 { 5082 /* Replace underscore with space (can't include a space 5083 * directly). */ 5084 for (p = items[1]; *p != NUL; mb_ptr_adv(p)) 5085 if (*p == '_') 5086 *p = ' '; 5087 for (p = items[2]; *p != NUL; mb_ptr_adv(p)) 5088 if (*p == '_') 5089 *p = ' '; 5090 add_fromto(spin, &spin->si_rep, items[1], items[2]); 5091 } 5092 } 5093 else if (STRCMP(items[0], "MAP") == 0 && itemcnt == 2) 5094 { 5095 /* MAP item or count */ 5096 if (!found_map) 5097 { 5098 /* First line contains the count. */ 5099 found_map = TRUE; 5100 if (!isdigit(*items[1])) 5101 smsg((char_u *)_("Expected MAP count in %s line %d"), 5102 fname, lnum); 5103 } 5104 else if (do_map) 5105 { 5106 int c; 5107 5108 /* Check that every character appears only once. */ 5109 for (p = items[1]; *p != NUL; ) 5110 { 5111 #ifdef FEAT_MBYTE 5112 c = mb_ptr2char_adv(&p); 5113 #else 5114 c = *p++; 5115 #endif 5116 if ((spin->si_map.ga_len > 0 5117 && vim_strchr(spin->si_map.ga_data, c) 5118 != NULL) 5119 || vim_strchr(p, c) != NULL) 5120 smsg((char_u *)_("Duplicate character in MAP in %s line %d"), 5121 fname, lnum); 5122 } 5123 5124 /* We simply concatenate all the MAP strings, separated by 5125 * slashes. */ 5126 ga_concat(&spin->si_map, items[1]); 5127 ga_append(&spin->si_map, '/'); 5128 } 5129 } 5130 /* Accept "SAL from to" and "SAL from to # comment". */ 5131 else if (STRCMP(items[0], "SAL") == 0 5132 && (itemcnt == 3 || (itemcnt > 3 && items[3][0] == '#'))) 5133 { 5134 if (do_sal) 5135 { 5136 /* SAL item (sounds-a-like) 5137 * Either one of the known keys or a from-to pair. */ 5138 if (STRCMP(items[1], "followup") == 0) 5139 spin->si_followup = sal_to_bool(items[2]); 5140 else if (STRCMP(items[1], "collapse_result") == 0) 5141 spin->si_collapse = sal_to_bool(items[2]); 5142 else if (STRCMP(items[1], "remove_accents") == 0) 5143 spin->si_rem_accents = sal_to_bool(items[2]); 5144 else 5145 /* when "to" is "_" it means empty */ 5146 add_fromto(spin, &spin->si_sal, items[1], 5147 STRCMP(items[2], "_") == 0 ? (char_u *)"" 5148 : items[2]); 5149 } 5150 } 5151 else if (STRCMP(items[0], "SOFOFROM") == 0 && itemcnt == 2 5152 && sofofrom == NULL) 5153 { 5154 sofofrom = getroom_save(spin, items[1]); 5155 } 5156 else if (STRCMP(items[0], "SOFOTO") == 0 && itemcnt == 2 5157 && sofoto == NULL) 5158 { 5159 sofoto = getroom_save(spin, items[1]); 5160 } 5161 else 5162 smsg((char_u *)_("Unrecognized or duplicate item in %s line %d: %s"), 5163 fname, lnum, items[0]); 5164 } 5165 } 5166 5167 if (fol != NULL || low != NULL || upp != NULL) 5168 { 5169 if (spin->si_clear_chartab) 5170 { 5171 /* Clear the char type tables, don't want to use any of the 5172 * currently used spell properties. */ 5173 init_spell_chartab(); 5174 spin->si_clear_chartab = FALSE; 5175 } 5176 5177 /* 5178 * Don't write a word table for an ASCII file, so that we don't check 5179 * for conflicts with a word table that matches 'encoding'. 5180 * Don't write one for utf-8 either, we use utf_*() and 5181 * mb_get_class(), the list of chars in the file will be incomplete. 5182 */ 5183 if (!spin->si_ascii 5184 #ifdef FEAT_MBYTE 5185 && !enc_utf8 5186 #endif 5187 ) 5188 { 5189 if (fol == NULL || low == NULL || upp == NULL) 5190 smsg((char_u *)_("Missing FOL/LOW/UPP line in %s"), fname); 5191 else 5192 (void)set_spell_chartab(fol, low, upp); 5193 } 5194 5195 vim_free(fol); 5196 vim_free(low); 5197 vim_free(upp); 5198 } 5199 5200 /* Use compound specifications of the .aff file for the spell info. */ 5201 if (compmax != 0) 5202 { 5203 aff_check_number(spin->si_compmax, compmax, "COMPOUNDMAX"); 5204 spin->si_compmax = compmax; 5205 } 5206 5207 if (compminlen != 0) 5208 { 5209 aff_check_number(spin->si_compminlen, compminlen, "COMPOUNDMIN"); 5210 spin->si_compminlen = compminlen; 5211 } 5212 5213 if (compsylmax != 0) 5214 { 5215 if (syllable == NULL) 5216 smsg((char_u *)_("COMPOUNDSYLMAX used without SYLLABLE")); 5217 aff_check_number(spin->si_compsylmax, compsylmax, "COMPOUNDSYLMAX"); 5218 spin->si_compsylmax = compsylmax; 5219 } 5220 5221 if (compflags != NULL) 5222 process_compflags(spin, aff, compflags); 5223 5224 /* Check that we didn't use too many renumbered flags. */ 5225 if (spin->si_newcompID < spin->si_newprefID) 5226 { 5227 if (spin->si_newcompID == 127 || spin->si_newcompID == 255) 5228 MSG(_("Too many postponed prefixes")); 5229 else if (spin->si_newprefID == 0 || spin->si_newprefID == 127) 5230 MSG(_("Too many compound flags")); 5231 else 5232 MSG(_("Too many posponed prefixes and/or compound flags")); 5233 } 5234 5235 if (syllable != NULL) 5236 { 5237 aff_check_string(spin->si_syllable, syllable, "SYLLABLE"); 5238 spin->si_syllable = syllable; 5239 } 5240 5241 if (sofofrom != NULL || sofoto != NULL) 5242 { 5243 if (sofofrom == NULL || sofoto == NULL) 5244 smsg((char_u *)_("Missing SOFO%s line in %s"), 5245 sofofrom == NULL ? "FROM" : "TO", fname); 5246 else if (spin->si_sal.ga_len > 0) 5247 smsg((char_u *)_("Both SAL and SOFO lines in %s"), fname); 5248 else 5249 { 5250 aff_check_string(spin->si_sofofr, sofofrom, "SOFOFROM"); 5251 aff_check_string(spin->si_sofoto, sofoto, "SOFOTO"); 5252 spin->si_sofofr = sofofrom; 5253 spin->si_sofoto = sofoto; 5254 } 5255 } 5256 5257 if (midword != NULL) 5258 { 5259 aff_check_string(spin->si_midword, midword, "MIDWORD"); 5260 spin->si_midword = midword; 5261 } 5262 5263 vim_free(pc); 5264 fclose(fd); 5265 return aff; 5266 } 5267 5268 /* 5269 * Turn an affix flag name into a number, according to the FLAG type. 5270 * returns zero for failure. 5271 */ 5272 static unsigned 5273 affitem2flag(flagtype, item, fname, lnum) 5274 int flagtype; 5275 char_u *item; 5276 char_u *fname; 5277 int lnum; 5278 { 5279 unsigned res; 5280 char_u *p = item; 5281 5282 res = get_affitem(flagtype, &p); 5283 if (res == 0) 5284 { 5285 if (flagtype == AFT_NUM) 5286 smsg((char_u *)_("Flag is not a number in %s line %d: %s"), 5287 fname, lnum, item); 5288 else 5289 smsg((char_u *)_("Illegal flag in %s line %d: %s"), 5290 fname, lnum, item); 5291 } 5292 if (*p != NUL) 5293 { 5294 smsg((char_u *)_(e_affname), fname, lnum, item); 5295 return 0; 5296 } 5297 5298 return res; 5299 } 5300 5301 /* 5302 * Get one affix name from "*pp" and advance the pointer. 5303 * Returns zero for an error, still advances the pointer then. 5304 */ 5305 static unsigned 5306 get_affitem(flagtype, pp) 5307 int flagtype; 5308 char_u **pp; 5309 { 5310 int res; 5311 5312 if (flagtype == AFT_NUM) 5313 { 5314 if (!VIM_ISDIGIT(**pp)) 5315 { 5316 ++*pp; /* always advance, avoid getting stuck */ 5317 return 0; 5318 } 5319 res = getdigits(pp); 5320 } 5321 else 5322 { 5323 #ifdef FEAT_MBYTE 5324 res = mb_ptr2char_adv(pp); 5325 #else 5326 res = *(*pp)++; 5327 #endif 5328 if (flagtype == AFT_LONG || (flagtype == AFT_CAPLONG 5329 && res >= 'A' && res <= 'Z')) 5330 { 5331 if (**pp == NUL) 5332 return 0; 5333 #ifdef FEAT_MBYTE 5334 res = mb_ptr2char_adv(pp) + (res << 16); 5335 #else 5336 res = *(*pp)++ + (res << 16); 5337 #endif 5338 } 5339 } 5340 return res; 5341 } 5342 5343 /* 5344 * Process the "compflags" string used in an affix file and append it to 5345 * spin->si_compflags. 5346 * The processing involves changing the affix names to ID numbers, so that 5347 * they fit in one byte. 5348 */ 5349 static void 5350 process_compflags(spin, aff, compflags) 5351 spellinfo_T *spin; 5352 afffile_T *aff; 5353 char_u *compflags; 5354 { 5355 char_u *p; 5356 char_u *prevp; 5357 unsigned flag; 5358 compitem_T *ci; 5359 int id; 5360 int len; 5361 char_u *tp; 5362 char_u key[AH_KEY_LEN]; 5363 hashitem_T *hi; 5364 5365 /* Make room for the old and the new compflags, concatenated with a / in 5366 * between. Processing it makes it shorter, but we don't know by how 5367 * much, thus allocate the maximum. */ 5368 len = STRLEN(compflags) + 1; 5369 if (spin->si_compflags != NULL) 5370 len += STRLEN(spin->si_compflags) + 1; 5371 p = getroom(spin, len, FALSE); 5372 if (p == NULL) 5373 return; 5374 if (spin->si_compflags != NULL) 5375 { 5376 STRCPY(p, spin->si_compflags); 5377 STRCAT(p, "/"); 5378 } 5379 spin->si_compflags = p; 5380 tp = p + STRLEN(p); 5381 5382 for (p = compflags; *p != NUL; ) 5383 { 5384 if (vim_strchr((char_u *)"/*+[]", *p) != NULL) 5385 /* Copy non-flag characters directly. */ 5386 *tp++ = *p++; 5387 else 5388 { 5389 /* First get the flag number, also checks validity. */ 5390 prevp = p; 5391 flag = get_affitem(aff->af_flagtype, &p); 5392 if (flag != 0) 5393 { 5394 /* Find the flag in the hashtable. If it was used before, use 5395 * the existing ID. Otherwise add a new entry. */ 5396 vim_strncpy(key, prevp, p - prevp); 5397 hi = hash_find(&aff->af_comp, key); 5398 if (!HASHITEM_EMPTY(hi)) 5399 id = HI2CI(hi)->ci_newID; 5400 else 5401 { 5402 ci = (compitem_T *)getroom(spin, sizeof(compitem_T), TRUE); 5403 if (ci == NULL) 5404 break; 5405 STRCPY(ci->ci_key, key); 5406 ci->ci_flag = flag; 5407 /* Avoid using a flag ID that has a special meaning in a 5408 * regexp (also inside []). */ 5409 do 5410 { 5411 check_renumber(spin); 5412 id = spin->si_newcompID--; 5413 } while (vim_strchr((char_u *)"/+*[]\\-^", id) != NULL); 5414 ci->ci_newID = id; 5415 hash_add(&aff->af_comp, ci->ci_key); 5416 } 5417 *tp++ = id; 5418 } 5419 if (aff->af_flagtype == AFT_NUM && *p == ',') 5420 ++p; 5421 } 5422 } 5423 5424 *tp = NUL; 5425 } 5426 5427 /* 5428 * Check that the new IDs for postponed affixes and compounding don't overrun 5429 * each other. We have almost 255 available, but start at 0-127 to avoid 5430 * using two bytes for utf-8. When the 0-127 range is used up go to 128-255. 5431 * When that is used up an error message is given. 5432 */ 5433 static void 5434 check_renumber(spin) 5435 spellinfo_T *spin; 5436 { 5437 if (spin->si_newprefID == spin->si_newcompID && spin->si_newcompID < 128) 5438 { 5439 spin->si_newprefID = 127; 5440 spin->si_newcompID = 255; 5441 } 5442 } 5443 5444 /* 5445 * Return TRUE if flag "flag" appears in affix list "afflist". 5446 */ 5447 static int 5448 flag_in_afflist(flagtype, afflist, flag) 5449 int flagtype; 5450 char_u *afflist; 5451 unsigned flag; 5452 { 5453 char_u *p; 5454 unsigned n; 5455 5456 switch (flagtype) 5457 { 5458 case AFT_CHAR: 5459 return vim_strchr(afflist, flag) != NULL; 5460 5461 case AFT_CAPLONG: 5462 case AFT_LONG: 5463 for (p = afflist; *p != NUL; ) 5464 { 5465 #ifdef FEAT_MBYTE 5466 n = mb_ptr2char_adv(&p); 5467 #else 5468 n = *p++; 5469 #endif 5470 if ((flagtype == AFT_LONG || (n >= 'A' && n <= 'Z')) 5471 && *p != NUL) 5472 #ifdef FEAT_MBYTE 5473 n = mb_ptr2char_adv(&p) + (n << 16); 5474 #else 5475 n = *p++ + (n << 16); 5476 #endif 5477 if (n == flag) 5478 return TRUE; 5479 } 5480 break; 5481 5482 case AFT_NUM: 5483 for (p = afflist; *p != NUL; ) 5484 { 5485 n = getdigits(&p); 5486 if (n == flag) 5487 return TRUE; 5488 if (*p != NUL) /* skip over comma */ 5489 ++p; 5490 } 5491 break; 5492 } 5493 return FALSE; 5494 } 5495 5496 /* 5497 * Give a warning when "spinval" and "affval" numbers are set and not the same. 5498 */ 5499 static void 5500 aff_check_number(spinval, affval, name) 5501 int spinval; 5502 int affval; 5503 char *name; 5504 { 5505 if (spinval != 0 && spinval != affval) 5506 smsg((char_u *)_("%s value differs from what is used in another .aff file"), name); 5507 } 5508 5509 /* 5510 * Give a warning when "spinval" and "affval" strings are set and not the same. 5511 */ 5512 static void 5513 aff_check_string(spinval, affval, name) 5514 char_u *spinval; 5515 char_u *affval; 5516 char *name; 5517 { 5518 if (spinval != NULL && STRCMP(spinval, affval) != 0) 5519 smsg((char_u *)_("%s value differs from what is used in another .aff file"), name); 5520 } 5521 5522 /* 5523 * Return TRUE if strings "s1" and "s2" are equal. Also consider both being 5524 * NULL as equal. 5525 */ 5526 static int 5527 str_equal(s1, s2) 5528 char_u *s1; 5529 char_u *s2; 5530 { 5531 if (s1 == NULL || s2 == NULL) 5532 return s1 == s2; 5533 return STRCMP(s1, s2) == 0; 5534 } 5535 5536 /* 5537 * Add a from-to item to "gap". Used for REP and SAL items. 5538 * They are stored case-folded. 5539 */ 5540 static void 5541 add_fromto(spin, gap, from, to) 5542 spellinfo_T *spin; 5543 garray_T *gap; 5544 char_u *from; 5545 char_u *to; 5546 { 5547 fromto_T *ftp; 5548 char_u word[MAXWLEN]; 5549 5550 if (ga_grow(gap, 1) == OK) 5551 { 5552 ftp = ((fromto_T *)gap->ga_data) + gap->ga_len; 5553 (void)spell_casefold(from, STRLEN(from), word, MAXWLEN); 5554 ftp->ft_from = getroom_save(spin, word); 5555 (void)spell_casefold(to, STRLEN(to), word, MAXWLEN); 5556 ftp->ft_to = getroom_save(spin, word); 5557 ++gap->ga_len; 5558 } 5559 } 5560 5561 /* 5562 * Convert a boolean argument in a SAL line to TRUE or FALSE; 5563 */ 5564 static int 5565 sal_to_bool(s) 5566 char_u *s; 5567 { 5568 return STRCMP(s, "1") == 0 || STRCMP(s, "true") == 0; 5569 } 5570 5571 /* 5572 * Return TRUE if string "s" contains a non-ASCII character (128 or higher). 5573 * When "s" is NULL FALSE is returned. 5574 */ 5575 static int 5576 has_non_ascii(s) 5577 char_u *s; 5578 { 5579 char_u *p; 5580 5581 if (s != NULL) 5582 for (p = s; *p != NUL; ++p) 5583 if (*p >= 128) 5584 return TRUE; 5585 return FALSE; 5586 } 5587 5588 /* 5589 * Free the structure filled by spell_read_aff(). 5590 */ 5591 static void 5592 spell_free_aff(aff) 5593 afffile_T *aff; 5594 { 5595 hashtab_T *ht; 5596 hashitem_T *hi; 5597 int todo; 5598 affheader_T *ah; 5599 affentry_T *ae; 5600 5601 vim_free(aff->af_enc); 5602 5603 /* All this trouble to free the "ae_prog" items... */ 5604 for (ht = &aff->af_pref; ; ht = &aff->af_suff) 5605 { 5606 todo = ht->ht_used; 5607 for (hi = ht->ht_array; todo > 0; ++hi) 5608 { 5609 if (!HASHITEM_EMPTY(hi)) 5610 { 5611 --todo; 5612 ah = HI2AH(hi); 5613 for (ae = ah->ah_first; ae != NULL; ae = ae->ae_next) 5614 vim_free(ae->ae_prog); 5615 } 5616 } 5617 if (ht == &aff->af_suff) 5618 break; 5619 } 5620 5621 hash_clear(&aff->af_pref); 5622 hash_clear(&aff->af_suff); 5623 hash_clear(&aff->af_comp); 5624 } 5625 5626 /* 5627 * Read dictionary file "fname". 5628 * Returns OK or FAIL; 5629 */ 5630 static int 5631 spell_read_dic(spin, fname, affile) 5632 spellinfo_T *spin; 5633 char_u *fname; 5634 afffile_T *affile; 5635 { 5636 hashtab_T ht; 5637 char_u line[MAXLINELEN]; 5638 char_u *p; 5639 char_u *afflist; 5640 char_u store_afflist[MAXWLEN]; 5641 int pfxlen; 5642 int need_affix; 5643 char_u *dw; 5644 char_u *pc; 5645 char_u *w; 5646 int l; 5647 hash_T hash; 5648 hashitem_T *hi; 5649 FILE *fd; 5650 int lnum = 1; 5651 int non_ascii = 0; 5652 int retval = OK; 5653 char_u message[MAXLINELEN + MAXWLEN]; 5654 int flags; 5655 int duplicate = 0; 5656 5657 /* 5658 * Open the file. 5659 */ 5660 fd = mch_fopen((char *)fname, "r"); 5661 if (fd == NULL) 5662 { 5663 EMSG2(_(e_notopen), fname); 5664 return FAIL; 5665 } 5666 5667 /* The hashtable is only used to detect duplicated words. */ 5668 hash_init(&ht); 5669 5670 if (spin->si_verbose || p_verbose > 2) 5671 { 5672 if (!spin->si_verbose) 5673 verbose_enter(); 5674 smsg((char_u *)_("Reading dictionary file %s ..."), fname); 5675 out_flush(); 5676 if (!spin->si_verbose) 5677 verbose_leave(); 5678 } 5679 5680 /* start with a message for the first line */ 5681 spin->si_msg_count = 999999; 5682 5683 /* Read and ignore the first line: word count. */ 5684 (void)vim_fgets(line, MAXLINELEN, fd); 5685 if (!vim_isdigit(*skipwhite(line))) 5686 EMSG2(_("E760: No word count in %s"), fname); 5687 5688 /* 5689 * Read all the lines in the file one by one. 5690 * The words are converted to 'encoding' here, before being added to 5691 * the hashtable. 5692 */ 5693 while (!vim_fgets(line, MAXLINELEN, fd) && !got_int) 5694 { 5695 line_breakcheck(); 5696 ++lnum; 5697 if (line[0] == '#' || line[0] == '/') 5698 continue; /* comment line */ 5699 5700 /* Remove CR, LF and white space from the end. White space halfway 5701 * the word is kept to allow e.g., "et al.". */ 5702 l = STRLEN(line); 5703 while (l > 0 && line[l - 1] <= ' ') 5704 --l; 5705 if (l == 0) 5706 continue; /* empty line */ 5707 line[l] = NUL; 5708 5709 /* Find the optional affix names. Replace the SLASH character by a 5710 * slash. */ 5711 afflist = NULL; 5712 for (p = line; *p != NUL; mb_ptr_adv(p)) 5713 { 5714 if (*p == affile->af_slash) 5715 *p = '/'; 5716 else if (*p == '/') 5717 { 5718 *p = NUL; 5719 afflist = p + 1; 5720 break; 5721 } 5722 } 5723 5724 /* Skip non-ASCII words when "spin->si_ascii" is TRUE. */ 5725 if (spin->si_ascii && has_non_ascii(line)) 5726 { 5727 ++non_ascii; 5728 continue; 5729 } 5730 5731 #ifdef FEAT_MBYTE 5732 /* Convert from "SET" to 'encoding' when needed. */ 5733 if (spin->si_conv.vc_type != CONV_NONE) 5734 { 5735 pc = string_convert(&spin->si_conv, line, NULL); 5736 if (pc == NULL) 5737 { 5738 smsg((char_u *)_("Conversion failure for word in %s line %d: %s"), 5739 fname, lnum, line); 5740 continue; 5741 } 5742 w = pc; 5743 } 5744 else 5745 #endif 5746 { 5747 pc = NULL; 5748 w = line; 5749 } 5750 5751 /* This takes time, print a message every 10000 words. */ 5752 if (spin->si_verbose && spin->si_msg_count > 10000) 5753 { 5754 spin->si_msg_count = 0; 5755 vim_snprintf((char *)message, sizeof(message), 5756 _("line %6d, word %6d - %s"), 5757 lnum, spin->si_foldwcount + spin->si_keepwcount, w); 5758 msg_start(); 5759 msg_puts_long_attr(message, 0); 5760 msg_clr_eos(); 5761 msg_didout = FALSE; 5762 msg_col = 0; 5763 out_flush(); 5764 } 5765 5766 /* Store the word in the hashtable to be able to find duplicates. */ 5767 dw = (char_u *)getroom_save(spin, w); 5768 if (dw == NULL) 5769 retval = FAIL; 5770 vim_free(pc); 5771 if (retval == FAIL) 5772 break; 5773 5774 hash = hash_hash(dw); 5775 hi = hash_lookup(&ht, dw, hash); 5776 if (!HASHITEM_EMPTY(hi)) 5777 { 5778 if (p_verbose > 0) 5779 smsg((char_u *)_("Duplicate word in %s line %d: %s"), 5780 fname, lnum, dw); 5781 else if (duplicate == 0) 5782 smsg((char_u *)_("First duplicate word in %s line %d: %s"), 5783 fname, lnum, dw); 5784 ++duplicate; 5785 } 5786 else 5787 hash_add_item(&ht, hi, dw, hash); 5788 5789 flags = 0; 5790 store_afflist[0] = NUL; 5791 pfxlen = 0; 5792 need_affix = FALSE; 5793 if (afflist != NULL) 5794 { 5795 /* Check for affix name that stands for keep-case word and stands 5796 * for rare word (if defined). */ 5797 if (affile->af_kep != 0 && flag_in_afflist( 5798 affile->af_flagtype, afflist, affile->af_kep)) 5799 flags |= WF_KEEPCAP | WF_FIXCAP; 5800 if (affile->af_rar != 0 && flag_in_afflist( 5801 affile->af_flagtype, afflist, affile->af_rar)) 5802 flags |= WF_RARE; 5803 if (affile->af_bad != 0 && flag_in_afflist( 5804 affile->af_flagtype, afflist, affile->af_bad)) 5805 flags |= WF_BANNED; 5806 if (affile->af_needaffix != 0 && flag_in_afflist( 5807 affile->af_flagtype, afflist, affile->af_needaffix)) 5808 need_affix = TRUE; 5809 if (affile->af_needcomp != 0 && flag_in_afflist( 5810 affile->af_flagtype, afflist, affile->af_needcomp)) 5811 flags |= WF_NEEDCOMP; 5812 5813 if (affile->af_pfxpostpone) 5814 /* Need to store the list of prefix IDs with the word. */ 5815 pfxlen = get_pfxlist(affile, afflist, store_afflist); 5816 5817 if (spin->si_compflags != NULL) 5818 /* Need to store the list of compound flags with the word. 5819 * Concatenate them to the list of prefix IDs. */ 5820 get_compflags(affile, afflist, store_afflist + pfxlen); 5821 } 5822 5823 /* Add the word to the word tree(s). */ 5824 if (store_word(spin, dw, flags, spin->si_region, 5825 store_afflist, need_affix) == FAIL) 5826 retval = FAIL; 5827 5828 if (afflist != NULL) 5829 { 5830 /* Find all matching suffixes and add the resulting words. 5831 * Additionally do matching prefixes that combine. */ 5832 if (store_aff_word(spin, dw, afflist, affile, 5833 &affile->af_suff, &affile->af_pref, 5834 FALSE, flags, store_afflist, pfxlen) == FAIL) 5835 retval = FAIL; 5836 5837 /* Find all matching prefixes and add the resulting words. */ 5838 if (store_aff_word(spin, dw, afflist, affile, 5839 &affile->af_pref, NULL, 5840 FALSE, flags, store_afflist, pfxlen) == FAIL) 5841 retval = FAIL; 5842 } 5843 } 5844 5845 if (duplicate > 0) 5846 smsg((char_u *)_("%d duplicate word(s) in %s"), duplicate, fname); 5847 if (spin->si_ascii && non_ascii > 0) 5848 smsg((char_u *)_("Ignored %d word(s) with non-ASCII characters in %s"), 5849 non_ascii, fname); 5850 hash_clear(&ht); 5851 5852 fclose(fd); 5853 return retval; 5854 } 5855 5856 /* 5857 * Get the list of prefix IDs from the affix list "afflist". 5858 * Used for PFXPOSTPONE. 5859 * Put the resulting flags in "store_afflist[MAXWLEN]" with a terminating NUL 5860 * and return the number of affixes. 5861 */ 5862 static int 5863 get_pfxlist(affile, afflist, store_afflist) 5864 afffile_T *affile; 5865 char_u *afflist; 5866 char_u *store_afflist; 5867 { 5868 char_u *p; 5869 char_u *prevp; 5870 int cnt = 0; 5871 int id; 5872 char_u key[AH_KEY_LEN]; 5873 hashitem_T *hi; 5874 5875 for (p = afflist; *p != NUL; ) 5876 { 5877 prevp = p; 5878 if (get_affitem(affile->af_flagtype, &p) != 0) 5879 { 5880 /* A flag is a postponed prefix flag if it appears in "af_pref" 5881 * and it's ID is not zero. */ 5882 vim_strncpy(key, prevp, p - prevp); 5883 hi = hash_find(&affile->af_pref, key); 5884 if (!HASHITEM_EMPTY(hi)) 5885 { 5886 id = HI2AH(hi)->ah_newID; 5887 if (id != 0) 5888 store_afflist[cnt++] = id; 5889 } 5890 } 5891 if (affile->af_flagtype == AFT_NUM && *p == ',') 5892 ++p; 5893 } 5894 5895 store_afflist[cnt] = NUL; 5896 return cnt; 5897 } 5898 5899 /* 5900 * Get the list of compound IDs from the affix list "afflist" that are used 5901 * for compound words. 5902 * Puts the flags in "store_afflist[]". 5903 */ 5904 static void 5905 get_compflags(affile, afflist, store_afflist) 5906 afffile_T *affile; 5907 char_u *afflist; 5908 char_u *store_afflist; 5909 { 5910 char_u *p; 5911 char_u *prevp; 5912 int cnt = 0; 5913 char_u key[AH_KEY_LEN]; 5914 hashitem_T *hi; 5915 5916 for (p = afflist; *p != NUL; ) 5917 { 5918 prevp = p; 5919 if (get_affitem(affile->af_flagtype, &p) != 0) 5920 { 5921 /* A flag is a compound flag if it appears in "af_comp". */ 5922 vim_strncpy(key, prevp, p - prevp); 5923 hi = hash_find(&affile->af_comp, key); 5924 if (!HASHITEM_EMPTY(hi)) 5925 store_afflist[cnt++] = HI2CI(hi)->ci_newID; 5926 } 5927 if (affile->af_flagtype == AFT_NUM && *p == ',') 5928 ++p; 5929 } 5930 5931 store_afflist[cnt] = NUL; 5932 } 5933 5934 /* 5935 * Apply affixes to a word and store the resulting words. 5936 * "ht" is the hashtable with affentry_T that need to be applied, either 5937 * prefixes or suffixes. 5938 * "xht", when not NULL, is the prefix hashtable, to be used additionally on 5939 * the resulting words for combining affixes. 5940 * 5941 * Returns FAIL when out of memory. 5942 */ 5943 static int 5944 store_aff_word(spin, word, afflist, affile, ht, xht, comb, flags, 5945 pfxlist, pfxlen) 5946 spellinfo_T *spin; /* spell info */ 5947 char_u *word; /* basic word start */ 5948 char_u *afflist; /* list of names of supported affixes */ 5949 afffile_T *affile; 5950 hashtab_T *ht; 5951 hashtab_T *xht; 5952 int comb; /* only use affixes that combine */ 5953 int flags; /* flags for the word */ 5954 char_u *pfxlist; /* list of prefix IDs */ 5955 int pfxlen; /* nr of flags in "pfxlist" for prefixes, rest 5956 * is compound flags */ 5957 { 5958 int todo; 5959 hashitem_T *hi; 5960 affheader_T *ah; 5961 affentry_T *ae; 5962 regmatch_T regmatch; 5963 char_u newword[MAXWLEN]; 5964 int retval = OK; 5965 int i; 5966 char_u *p; 5967 int use_flags; 5968 char_u *use_pfxlist; 5969 char_u pfx_pfxlist[MAXWLEN]; 5970 size_t wordlen = STRLEN(word); 5971 5972 todo = ht->ht_used; 5973 for (hi = ht->ht_array; todo > 0 && retval == OK; ++hi) 5974 { 5975 if (!HASHITEM_EMPTY(hi)) 5976 { 5977 --todo; 5978 ah = HI2AH(hi); 5979 5980 /* Check that the affix combines, if required, and that the word 5981 * supports this affix. */ 5982 if ((!comb || ah->ah_combine) && flag_in_afflist( 5983 affile->af_flagtype, afflist, ah->ah_flag)) 5984 { 5985 /* Loop over all affix entries with this name. */ 5986 for (ae = ah->ah_first; ae != NULL; ae = ae->ae_next) 5987 { 5988 /* Check the condition. It's not logical to match case 5989 * here, but it is required for compatibility with 5990 * Myspell. 5991 * Another requirement from Myspell is that the chop 5992 * string is shorter than the word itself. 5993 * For prefixes, when "PFXPOSTPONE" was used, only do 5994 * prefixes with a chop string. */ 5995 regmatch.regprog = ae->ae_prog; 5996 regmatch.rm_ic = FALSE; 5997 if ((xht != NULL || !affile->af_pfxpostpone 5998 || ae->ae_chop != NULL) 5999 && (ae->ae_chop == NULL 6000 || STRLEN(ae->ae_chop) < wordlen) 6001 && (ae->ae_prog == NULL 6002 || vim_regexec(®match, word, (colnr_T)0))) 6003 { 6004 /* Match. Remove the chop and add the affix. */ 6005 if (xht == NULL) 6006 { 6007 /* prefix: chop/add at the start of the word */ 6008 if (ae->ae_add == NULL) 6009 *newword = NUL; 6010 else 6011 STRCPY(newword, ae->ae_add); 6012 p = word; 6013 if (ae->ae_chop != NULL) 6014 { 6015 /* Skip chop string. */ 6016 #ifdef FEAT_MBYTE 6017 if (has_mbyte) 6018 { 6019 i = mb_charlen(ae->ae_chop); 6020 for ( ; i > 0; --i) 6021 mb_ptr_adv(p); 6022 } 6023 else 6024 #endif 6025 p += STRLEN(ae->ae_chop); 6026 } 6027 STRCAT(newword, p); 6028 } 6029 else 6030 { 6031 /* suffix: chop/add at the end of the word */ 6032 STRCPY(newword, word); 6033 if (ae->ae_chop != NULL) 6034 { 6035 /* Remove chop string. */ 6036 p = newword + STRLEN(newword); 6037 i = MB_CHARLEN(ae->ae_chop); 6038 for ( ; i > 0; --i) 6039 mb_ptr_back(newword, p); 6040 *p = NUL; 6041 } 6042 if (ae->ae_add != NULL) 6043 STRCAT(newword, ae->ae_add); 6044 } 6045 6046 /* Obey the "rare" flag of the affix. */ 6047 if (ae->ae_rare) 6048 use_flags = flags | WF_RARE; 6049 else 6050 use_flags = flags; 6051 6052 /* Obey the "nocomp" flag of the affix: don't use the 6053 * compound flags. */ 6054 use_pfxlist = pfxlist; 6055 if (ae->ae_nocomp && pfxlist != NULL) 6056 { 6057 vim_strncpy(pfx_pfxlist, pfxlist, pfxlen); 6058 use_pfxlist = pfx_pfxlist; 6059 } 6060 6061 /* When there are postponed prefixes... */ 6062 if (spin->si_prefroot != NULL 6063 && spin->si_prefroot->wn_sibling != NULL) 6064 { 6065 /* ... add a flag to indicate an affix was used. */ 6066 use_flags |= WF_HAS_AFF; 6067 6068 /* ... don't use a prefix list if combining 6069 * affixes is not allowed. But do use the 6070 * compound flags after them. */ 6071 if ((!ah->ah_combine || comb) && pfxlist != NULL) 6072 use_pfxlist += pfxlen; 6073 } 6074 6075 /* Store the modified word. */ 6076 if (store_word(spin, newword, use_flags, 6077 spin->si_region, use_pfxlist, FALSE) == FAIL) 6078 retval = FAIL; 6079 6080 /* When added a suffix and combining is allowed also 6081 * try adding prefixes additionally. */ 6082 if (xht != NULL && ah->ah_combine) 6083 if (store_aff_word(spin, newword, afflist, affile, 6084 xht, NULL, TRUE, 6085 use_flags, use_pfxlist, pfxlen) == FAIL) 6086 retval = FAIL; 6087 } 6088 } 6089 } 6090 } 6091 } 6092 6093 return retval; 6094 } 6095 6096 /* 6097 * Read a file with a list of words. 6098 */ 6099 static int 6100 spell_read_wordfile(spin, fname) 6101 spellinfo_T *spin; 6102 char_u *fname; 6103 { 6104 FILE *fd; 6105 long lnum = 0; 6106 char_u rline[MAXLINELEN]; 6107 char_u *line; 6108 char_u *pc = NULL; 6109 char_u *p; 6110 int l; 6111 int retval = OK; 6112 int did_word = FALSE; 6113 int non_ascii = 0; 6114 int flags; 6115 int regionmask; 6116 6117 /* 6118 * Open the file. 6119 */ 6120 fd = mch_fopen((char *)fname, "r"); 6121 if (fd == NULL) 6122 { 6123 EMSG2(_(e_notopen), fname); 6124 return FAIL; 6125 } 6126 6127 if (spin->si_verbose || p_verbose > 2) 6128 { 6129 if (!spin->si_verbose) 6130 verbose_enter(); 6131 smsg((char_u *)_("Reading word file %s ..."), fname); 6132 out_flush(); 6133 if (!spin->si_verbose) 6134 verbose_leave(); 6135 } 6136 6137 /* 6138 * Read all the lines in the file one by one. 6139 */ 6140 while (!vim_fgets(rline, MAXLINELEN, fd) && !got_int) 6141 { 6142 line_breakcheck(); 6143 ++lnum; 6144 6145 /* Skip comment lines. */ 6146 if (*rline == '#') 6147 continue; 6148 6149 /* Remove CR, LF and white space from the end. */ 6150 l = STRLEN(rline); 6151 while (l > 0 && rline[l - 1] <= ' ') 6152 --l; 6153 if (l == 0) 6154 continue; /* empty or blank line */ 6155 rline[l] = NUL; 6156 6157 /* Convert from "=encoding={encoding}" to 'encoding' when needed. */ 6158 vim_free(pc); 6159 #ifdef FEAT_MBYTE 6160 if (spin->si_conv.vc_type != CONV_NONE) 6161 { 6162 pc = string_convert(&spin->si_conv, rline, NULL); 6163 if (pc == NULL) 6164 { 6165 smsg((char_u *)_("Conversion failure for word in %s line %d: %s"), 6166 fname, lnum, rline); 6167 continue; 6168 } 6169 line = pc; 6170 } 6171 else 6172 #endif 6173 { 6174 pc = NULL; 6175 line = rline; 6176 } 6177 6178 if (*line == '/') 6179 { 6180 ++line; 6181 if (STRNCMP(line, "encoding=", 9) == 0) 6182 { 6183 if (spin->si_conv.vc_type != CONV_NONE) 6184 smsg((char_u *)_("Duplicate /encoding= line ignored in %s line %d: %s"), 6185 fname, lnum, line - 1); 6186 else if (did_word) 6187 smsg((char_u *)_("/encoding= line after word ignored in %s line %d: %s"), 6188 fname, lnum, line - 1); 6189 else 6190 { 6191 #ifdef FEAT_MBYTE 6192 char_u *enc; 6193 6194 /* Setup for conversion to 'encoding'. */ 6195 line += 10; 6196 enc = enc_canonize(line); 6197 if (enc != NULL && !spin->si_ascii 6198 && convert_setup(&spin->si_conv, enc, 6199 p_enc) == FAIL) 6200 smsg((char_u *)_("Conversion in %s not supported: from %s to %s"), 6201 fname, line, p_enc); 6202 vim_free(enc); 6203 spin->si_conv.vc_fail = TRUE; 6204 #else 6205 smsg((char_u *)_("Conversion in %s not supported"), fname); 6206 #endif 6207 } 6208 continue; 6209 } 6210 6211 if (STRNCMP(line, "regions=", 8) == 0) 6212 { 6213 if (spin->si_region_count > 1) 6214 smsg((char_u *)_("Duplicate /regions= line ignored in %s line %d: %s"), 6215 fname, lnum, line); 6216 else 6217 { 6218 line += 8; 6219 if (STRLEN(line) > 16) 6220 smsg((char_u *)_("Too many regions in %s line %d: %s"), 6221 fname, lnum, line); 6222 else 6223 { 6224 spin->si_region_count = STRLEN(line) / 2; 6225 STRCPY(spin->si_region_name, line); 6226 6227 /* Adjust the mask for a word valid in all regions. */ 6228 spin->si_region = (1 << spin->si_region_count) - 1; 6229 } 6230 } 6231 continue; 6232 } 6233 6234 smsg((char_u *)_("/ line ignored in %s line %d: %s"), 6235 fname, lnum, line - 1); 6236 continue; 6237 } 6238 6239 flags = 0; 6240 regionmask = spin->si_region; 6241 6242 /* Check for flags and region after a slash. */ 6243 p = vim_strchr(line, '/'); 6244 if (p != NULL) 6245 { 6246 *p++ = NUL; 6247 while (*p != NUL) 6248 { 6249 if (*p == '=') /* keep-case word */ 6250 flags |= WF_KEEPCAP | WF_FIXCAP; 6251 else if (*p == '!') /* Bad, bad, wicked word. */ 6252 flags |= WF_BANNED; 6253 else if (*p == '?') /* Rare word. */ 6254 flags |= WF_RARE; 6255 else if (VIM_ISDIGIT(*p)) /* region number(s) */ 6256 { 6257 if ((flags & WF_REGION) == 0) /* first one */ 6258 regionmask = 0; 6259 flags |= WF_REGION; 6260 6261 l = *p - '0'; 6262 if (l > spin->si_region_count) 6263 { 6264 smsg((char_u *)_("Invalid region nr in %s line %d: %s"), 6265 fname, lnum, p); 6266 break; 6267 } 6268 regionmask |= 1 << (l - 1); 6269 } 6270 else 6271 { 6272 smsg((char_u *)_("Unrecognized flags in %s line %d: %s"), 6273 fname, lnum, p); 6274 break; 6275 } 6276 ++p; 6277 } 6278 } 6279 6280 /* Skip non-ASCII words when "spin->si_ascii" is TRUE. */ 6281 if (spin->si_ascii && has_non_ascii(line)) 6282 { 6283 ++non_ascii; 6284 continue; 6285 } 6286 6287 /* Normal word: store it. */ 6288 if (store_word(spin, line, flags, regionmask, NULL, FALSE) == FAIL) 6289 { 6290 retval = FAIL; 6291 break; 6292 } 6293 did_word = TRUE; 6294 } 6295 6296 vim_free(pc); 6297 fclose(fd); 6298 6299 if (spin->si_ascii && non_ascii > 0 && (spin->si_verbose || p_verbose > 2)) 6300 { 6301 if (p_verbose > 2) 6302 verbose_enter(); 6303 smsg((char_u *)_("Ignored %d words with non-ASCII characters"), 6304 non_ascii); 6305 if (p_verbose > 2) 6306 verbose_leave(); 6307 } 6308 return retval; 6309 } 6310 6311 /* 6312 * Get part of an sblock_T, "len" bytes long. 6313 * This avoids calling free() for every little struct we use (and keeping 6314 * track of them). 6315 * The memory is cleared to all zeros. 6316 * Returns NULL when out of memory. 6317 */ 6318 static void * 6319 getroom(spin, len, align) 6320 spellinfo_T *spin; 6321 size_t len; /* length needed */ 6322 int align; /* align for pointer */ 6323 { 6324 char_u *p; 6325 sblock_T *bl = spin->si_blocks; 6326 6327 if (align && bl != NULL) 6328 /* Round size up for alignment. On some systems structures need to be 6329 * aligned to the size of a pointer (e.g., SPARC). */ 6330 bl->sb_used = (bl->sb_used + sizeof(char *) - 1) 6331 & ~(sizeof(char *) - 1); 6332 6333 if (bl == NULL || bl->sb_used + len > SBLOCKSIZE) 6334 { 6335 /* Allocate a block of memory. This is not freed until much later. */ 6336 bl = (sblock_T *)alloc_clear((unsigned)(sizeof(sblock_T) + SBLOCKSIZE)); 6337 if (bl == NULL) 6338 return NULL; 6339 bl->sb_next = spin->si_blocks; 6340 spin->si_blocks = bl; 6341 bl->sb_used = 0; 6342 ++spin->si_blocks_cnt; 6343 } 6344 6345 p = bl->sb_data + bl->sb_used; 6346 bl->sb_used += len; 6347 6348 return p; 6349 } 6350 6351 /* 6352 * Make a copy of a string into memory allocated with getroom(). 6353 */ 6354 static char_u * 6355 getroom_save(spin, s) 6356 spellinfo_T *spin; 6357 char_u *s; 6358 { 6359 char_u *sc; 6360 6361 sc = (char_u *)getroom(spin, STRLEN(s) + 1, FALSE); 6362 if (sc != NULL) 6363 STRCPY(sc, s); 6364 return sc; 6365 } 6366 6367 6368 /* 6369 * Free the list of allocated sblock_T. 6370 */ 6371 static void 6372 free_blocks(bl) 6373 sblock_T *bl; 6374 { 6375 sblock_T *next; 6376 6377 while (bl != NULL) 6378 { 6379 next = bl->sb_next; 6380 vim_free(bl); 6381 bl = next; 6382 } 6383 } 6384 6385 /* 6386 * Allocate the root of a word tree. 6387 */ 6388 static wordnode_T * 6389 wordtree_alloc(spin) 6390 spellinfo_T *spin; 6391 { 6392 return (wordnode_T *)getroom(spin, sizeof(wordnode_T), TRUE); 6393 } 6394 6395 /* 6396 * Store a word in the tree(s). 6397 * Always store it in the case-folded tree. For a keep-case word this is 6398 * useful when the word can also be used with all caps (no WF_FIXCAP flag) and 6399 * used to find suggestions. 6400 * For a keep-case word also store it in the keep-case tree. 6401 * When "pfxlist" is not NULL store the word for each postponed prefix ID and 6402 * compound flag. 6403 */ 6404 static int 6405 store_word(spin, word, flags, region, pfxlist, need_affix) 6406 spellinfo_T *spin; 6407 char_u *word; 6408 int flags; /* extra flags, WF_BANNED */ 6409 int region; /* supported region(s) */ 6410 char_u *pfxlist; /* list of prefix IDs or NULL */ 6411 int need_affix; /* only store word with affix ID */ 6412 { 6413 int len = STRLEN(word); 6414 int ct = captype(word, word + len); 6415 char_u foldword[MAXWLEN]; 6416 int res = OK; 6417 char_u *p; 6418 6419 (void)spell_casefold(word, len, foldword, MAXWLEN); 6420 for (p = pfxlist; res == OK; ++p) 6421 { 6422 if (!need_affix || (p != NULL && *p != NUL)) 6423 res = tree_add_word(spin, foldword, spin->si_foldroot, ct | flags, 6424 region, p == NULL ? 0 : *p); 6425 if (p == NULL || *p == NUL) 6426 break; 6427 } 6428 ++spin->si_foldwcount; 6429 6430 if (res == OK && (ct == WF_KEEPCAP || (flags & WF_KEEPCAP))) 6431 { 6432 for (p = pfxlist; res == OK; ++p) 6433 { 6434 if (!need_affix || (p != NULL && *p != NUL)) 6435 res = tree_add_word(spin, word, spin->si_keeproot, flags, 6436 region, p == NULL ? 0 : *p); 6437 if (p == NULL || *p == NUL) 6438 break; 6439 } 6440 ++spin->si_keepwcount; 6441 } 6442 return res; 6443 } 6444 6445 /* 6446 * Add word "word" to a word tree at "root". 6447 * When "flags" < 0 we are adding to the prefix tree where flags is used for 6448 * "rare" and "region" is the condition nr. 6449 * Returns FAIL when out of memory. 6450 */ 6451 static int 6452 tree_add_word(spin, word, root, flags, region, affixID) 6453 spellinfo_T *spin; 6454 char_u *word; 6455 wordnode_T *root; 6456 int flags; 6457 int region; 6458 int affixID; 6459 { 6460 wordnode_T *node = root; 6461 wordnode_T *np; 6462 wordnode_T *copyp, **copyprev; 6463 wordnode_T **prev = NULL; 6464 int i; 6465 6466 /* Add each byte of the word to the tree, including the NUL at the end. */ 6467 for (i = 0; ; ++i) 6468 { 6469 /* When there is more than one reference to this node we need to make 6470 * a copy, so that we can modify it. Copy the whole list of siblings 6471 * (we don't optimize for a partly shared list of siblings). */ 6472 if (node != NULL && node->wn_refs > 1) 6473 { 6474 --node->wn_refs; 6475 copyprev = prev; 6476 for (copyp = node; copyp != NULL; copyp = copyp->wn_sibling) 6477 { 6478 /* Allocate a new node and copy the info. */ 6479 np = get_wordnode(spin); 6480 if (np == NULL) 6481 return FAIL; 6482 np->wn_child = copyp->wn_child; 6483 if (np->wn_child != NULL) 6484 ++np->wn_child->wn_refs; /* child gets extra ref */ 6485 np->wn_byte = copyp->wn_byte; 6486 if (np->wn_byte == NUL) 6487 { 6488 np->wn_flags = copyp->wn_flags; 6489 np->wn_region = copyp->wn_region; 6490 np->wn_affixID = copyp->wn_affixID; 6491 } 6492 6493 /* Link the new node in the list, there will be one ref. */ 6494 np->wn_refs = 1; 6495 *copyprev = np; 6496 copyprev = &np->wn_sibling; 6497 6498 /* Let "node" point to the head of the copied list. */ 6499 if (copyp == node) 6500 node = np; 6501 } 6502 } 6503 6504 /* Look for the sibling that has the same character. They are sorted 6505 * on byte value, thus stop searching when a sibling is found with a 6506 * higher byte value. For zero bytes (end of word) the sorting is 6507 * done on flags and then on affixID. */ 6508 while (node != NULL 6509 && (node->wn_byte < word[i] 6510 || (node->wn_byte == NUL 6511 && (flags < 0 6512 ? node->wn_affixID < affixID 6513 : node->wn_flags < (flags & WN_MASK) 6514 || (node->wn_flags == (flags & WN_MASK) 6515 && node->wn_affixID < affixID))))) 6516 { 6517 prev = &node->wn_sibling; 6518 node = *prev; 6519 } 6520 if (node == NULL 6521 || node->wn_byte != word[i] 6522 || (word[i] == NUL 6523 && (flags < 0 6524 || node->wn_flags != (flags & WN_MASK) 6525 || node->wn_affixID != affixID))) 6526 { 6527 /* Allocate a new node. */ 6528 np = get_wordnode(spin); 6529 if (np == NULL) 6530 return FAIL; 6531 np->wn_byte = word[i]; 6532 6533 /* If "node" is NULL this is a new child or the end of the sibling 6534 * list: ref count is one. Otherwise use ref count of sibling and 6535 * make ref count of sibling one (matters when inserting in front 6536 * of the list of siblings). */ 6537 if (node == NULL) 6538 np->wn_refs = 1; 6539 else 6540 { 6541 np->wn_refs = node->wn_refs; 6542 node->wn_refs = 1; 6543 } 6544 *prev = np; 6545 np->wn_sibling = node; 6546 node = np; 6547 } 6548 6549 if (word[i] == NUL) 6550 { 6551 node->wn_flags = flags; 6552 node->wn_region |= region; 6553 node->wn_affixID = affixID; 6554 break; 6555 } 6556 prev = &node->wn_child; 6557 node = *prev; 6558 } 6559 #ifdef SPELL_PRINTTREE 6560 smsg("Added \"%s\"", word); 6561 spell_print_tree(root->wn_sibling); 6562 #endif 6563 6564 /* count nr of words added since last message */ 6565 ++spin->si_msg_count; 6566 6567 if (spin->si_compress_cnt > 1) 6568 { 6569 if (--spin->si_compress_cnt == 1) 6570 /* Did enough words to lower the block count limit. */ 6571 spin->si_blocks_cnt += compress_inc; 6572 } 6573 6574 /* 6575 * When we have allocated lots of memory we need to compress the word tree 6576 * to free up some room. But compression is slow, and we might actually 6577 * need that room, thus only compress in the following situations: 6578 * 1. When not compressed before (si_compress_cnt == 0): when using 6579 * "compress_start" blocks. 6580 * 2. When compressed before and used "compress_inc" blocks before 6581 * adding "compress_added" words (si_compress_cnt > 1). 6582 * 3. When compressed before, added "compress_added" words 6583 * (si_compress_cnt == 1) and the number of free nodes drops below the 6584 * maximum word length. 6585 */ 6586 #ifndef SPELL_PRINTTREE 6587 if (spin->si_compress_cnt == 1 6588 ? spin->si_free_count < MAXWLEN 6589 : spin->si_blocks_cnt >= compress_start) 6590 #endif 6591 { 6592 /* Decrement the block counter. The effect is that we compress again 6593 * when the freed up room has been used and another "compress_inc" 6594 * blocks have been allocated. Unless "compress_added" words have 6595 * been added, then the limit is put back again. */ 6596 spin->si_blocks_cnt -= compress_inc; 6597 spin->si_compress_cnt = compress_added; 6598 6599 if (spin->si_verbose) 6600 { 6601 msg_start(); 6602 msg_puts((char_u *)_(msg_compressing)); 6603 msg_clr_eos(); 6604 msg_didout = FALSE; 6605 msg_col = 0; 6606 out_flush(); 6607 } 6608 6609 /* Compress both trees. Either they both have many nodes, which makes 6610 * compression useful, or one of them is small, which means 6611 * compression goes fast. */ 6612 wordtree_compress(spin, spin->si_foldroot); 6613 wordtree_compress(spin, spin->si_keeproot); 6614 } 6615 6616 return OK; 6617 } 6618 6619 /* 6620 * Check the 'mkspellmem' option. Return FAIL if it's wrong. 6621 * Sets "sps_flags". 6622 */ 6623 int 6624 spell_check_msm() 6625 { 6626 char_u *p = p_msm; 6627 long start = 0; 6628 long inc = 0; 6629 long added = 0; 6630 6631 if (!VIM_ISDIGIT(*p)) 6632 return FAIL; 6633 /* block count = (value * 1024) / SBLOCKSIZE (but avoid overflow)*/ 6634 start = (getdigits(&p) * 10) / (SBLOCKSIZE / 102); 6635 if (*p != ',') 6636 return FAIL; 6637 ++p; 6638 if (!VIM_ISDIGIT(*p)) 6639 return FAIL; 6640 inc = (getdigits(&p) * 102) / (SBLOCKSIZE / 10); 6641 if (*p != ',') 6642 return FAIL; 6643 ++p; 6644 if (!VIM_ISDIGIT(*p)) 6645 return FAIL; 6646 added = getdigits(&p) * 1024; 6647 if (*p != NUL) 6648 return FAIL; 6649 6650 if (start == 0 || inc == 0 || added == 0 || inc > start) 6651 return FAIL; 6652 6653 compress_start = start; 6654 compress_inc = inc; 6655 compress_added = added; 6656 return OK; 6657 } 6658 6659 6660 /* 6661 * Get a wordnode_T, either from the list of previously freed nodes or 6662 * allocate a new one. 6663 */ 6664 static wordnode_T * 6665 get_wordnode(spin) 6666 spellinfo_T *spin; 6667 { 6668 wordnode_T *n; 6669 6670 if (spin->si_first_free == NULL) 6671 n = (wordnode_T *)getroom(spin, sizeof(wordnode_T), TRUE); 6672 else 6673 { 6674 n = spin->si_first_free; 6675 spin->si_first_free = n->wn_child; 6676 vim_memset(n, 0, sizeof(wordnode_T)); 6677 --spin->si_free_count; 6678 } 6679 #ifdef SPELL_PRINTTREE 6680 n->wn_nr = ++spin->si_wordnode_nr; 6681 #endif 6682 return n; 6683 } 6684 6685 /* 6686 * Decrement the reference count on a node (which is the head of a list of 6687 * siblings). If the reference count becomes zero free the node and its 6688 * siblings. 6689 */ 6690 static void 6691 deref_wordnode(spin, node) 6692 spellinfo_T *spin; 6693 wordnode_T *node; 6694 { 6695 wordnode_T *np; 6696 6697 if (--node->wn_refs == 0) 6698 for (np = node; np != NULL; np = np->wn_sibling) 6699 { 6700 if (np->wn_child != NULL) 6701 deref_wordnode(spin, np->wn_child); 6702 free_wordnode(spin, np); 6703 } 6704 } 6705 6706 /* 6707 * Free a wordnode_T for re-use later. 6708 * Only the "wn_child" field becomes invalid. 6709 */ 6710 static void 6711 free_wordnode(spin, n) 6712 spellinfo_T *spin; 6713 wordnode_T *n; 6714 { 6715 n->wn_child = spin->si_first_free; 6716 spin->si_first_free = n; 6717 ++spin->si_free_count; 6718 } 6719 6720 /* 6721 * Compress a tree: find tails that are identical and can be shared. 6722 */ 6723 static void 6724 wordtree_compress(spin, root) 6725 spellinfo_T *spin; 6726 wordnode_T *root; 6727 { 6728 hashtab_T ht; 6729 int n; 6730 int tot = 0; 6731 int perc; 6732 6733 /* Skip the root itself, it's not actually used. The first sibling is the 6734 * start of the tree. */ 6735 if (root->wn_sibling != NULL) 6736 { 6737 hash_init(&ht); 6738 n = node_compress(spin, root->wn_sibling, &ht, &tot); 6739 6740 #ifndef SPELL_PRINTTREE 6741 if (spin->si_verbose || p_verbose > 2) 6742 #endif 6743 { 6744 if (!spin->si_verbose) 6745 verbose_enter(); 6746 if (tot > 1000000) 6747 perc = (tot - n) / (tot / 100); 6748 else if (tot == 0) 6749 perc = 0; 6750 else 6751 perc = (tot - n) * 100 / tot; 6752 smsg((char_u *)_("Compressed %d of %d nodes; %d%% remaining"), 6753 n, tot, perc); 6754 if (p_verbose > 2) 6755 verbose_leave(); 6756 } 6757 #ifdef SPELL_PRINTTREE 6758 spell_print_tree(root->wn_sibling); 6759 #endif 6760 hash_clear(&ht); 6761 } 6762 } 6763 6764 /* 6765 * Compress a node, its siblings and its children, depth first. 6766 * Returns the number of compressed nodes. 6767 */ 6768 static int 6769 node_compress(spin, node, ht, tot) 6770 spellinfo_T *spin; 6771 wordnode_T *node; 6772 hashtab_T *ht; 6773 int *tot; /* total count of nodes before compressing, 6774 incremented while going through the tree */ 6775 { 6776 wordnode_T *np; 6777 wordnode_T *tp; 6778 wordnode_T *child; 6779 hash_T hash; 6780 hashitem_T *hi; 6781 int len = 0; 6782 unsigned nr, n; 6783 int compressed = 0; 6784 6785 /* 6786 * Go through the list of siblings. Compress each child and then try 6787 * finding an identical child to replace it. 6788 * Note that with "child" we mean not just the node that is pointed to, 6789 * but the whole list of siblings, of which the node is the first. 6790 */ 6791 for (np = node; np != NULL && !got_int; np = np->wn_sibling) 6792 { 6793 ++len; 6794 if ((child = np->wn_child) != NULL) 6795 { 6796 /* Compress the child. This fills hashkey. */ 6797 compressed += node_compress(spin, child, ht, tot); 6798 6799 /* Try to find an identical child. */ 6800 hash = hash_hash(child->wn_u1.hashkey); 6801 hi = hash_lookup(ht, child->wn_u1.hashkey, hash); 6802 tp = NULL; 6803 if (!HASHITEM_EMPTY(hi)) 6804 { 6805 /* There are children with an identical hash value. Now check 6806 * if there is one that is really identical. */ 6807 for (tp = HI2WN(hi); tp != NULL; tp = tp->wn_u2.next) 6808 if (node_equal(child, tp)) 6809 { 6810 /* Found one! Now use that child in place of the 6811 * current one. This means the current child and all 6812 * its siblings is unlinked from the tree. */ 6813 ++tp->wn_refs; 6814 deref_wordnode(spin, child); 6815 np->wn_child = tp; 6816 ++compressed; 6817 break; 6818 } 6819 if (tp == NULL) 6820 { 6821 /* No other child with this hash value equals the child of 6822 * the node, add it to the linked list after the first 6823 * item. */ 6824 tp = HI2WN(hi); 6825 child->wn_u2.next = tp->wn_u2.next; 6826 tp->wn_u2.next = child; 6827 } 6828 } 6829 else 6830 /* No other child has this hash value, add it to the 6831 * hashtable. */ 6832 hash_add_item(ht, hi, child->wn_u1.hashkey, hash); 6833 } 6834 } 6835 *tot += len; 6836 6837 /* 6838 * Make a hash key for the node and its siblings, so that we can quickly 6839 * find a lookalike node. This must be done after compressing the sibling 6840 * list, otherwise the hash key would become invalid by the compression. 6841 */ 6842 node->wn_u1.hashkey[0] = len; 6843 nr = 0; 6844 for (np = node; np != NULL; np = np->wn_sibling) 6845 { 6846 if (np->wn_byte == NUL) 6847 /* end node: use wn_flags, wn_region and wn_affixID */ 6848 n = np->wn_flags + (np->wn_region << 8) + (np->wn_affixID << 16); 6849 else 6850 /* byte node: use the byte value and the child pointer */ 6851 n = np->wn_byte + ((long_u)np->wn_child << 8); 6852 nr = nr * 101 + n; 6853 } 6854 6855 /* Avoid NUL bytes, it terminates the hash key. */ 6856 n = nr & 0xff; 6857 node->wn_u1.hashkey[1] = n == 0 ? 1 : n; 6858 n = (nr >> 8) & 0xff; 6859 node->wn_u1.hashkey[2] = n == 0 ? 1 : n; 6860 n = (nr >> 16) & 0xff; 6861 node->wn_u1.hashkey[3] = n == 0 ? 1 : n; 6862 n = (nr >> 24) & 0xff; 6863 node->wn_u1.hashkey[4] = n == 0 ? 1 : n; 6864 node->wn_u1.hashkey[5] = NUL; 6865 6866 /* Check for CTRL-C pressed now and then. */ 6867 fast_breakcheck(); 6868 6869 return compressed; 6870 } 6871 6872 /* 6873 * Return TRUE when two nodes have identical siblings and children. 6874 */ 6875 static int 6876 node_equal(n1, n2) 6877 wordnode_T *n1; 6878 wordnode_T *n2; 6879 { 6880 wordnode_T *p1; 6881 wordnode_T *p2; 6882 6883 for (p1 = n1, p2 = n2; p1 != NULL && p2 != NULL; 6884 p1 = p1->wn_sibling, p2 = p2->wn_sibling) 6885 if (p1->wn_byte != p2->wn_byte 6886 || (p1->wn_byte == NUL 6887 ? (p1->wn_flags != p2->wn_flags 6888 || p1->wn_region != p2->wn_region 6889 || p1->wn_affixID != p2->wn_affixID) 6890 : (p1->wn_child != p2->wn_child))) 6891 break; 6892 6893 return p1 == NULL && p2 == NULL; 6894 } 6895 6896 /* 6897 * Write a number to file "fd", MSB first, in "len" bytes. 6898 */ 6899 void 6900 put_bytes(fd, nr, len) 6901 FILE *fd; 6902 long_u nr; 6903 int len; 6904 { 6905 int i; 6906 6907 for (i = len - 1; i >= 0; --i) 6908 putc((int)(nr >> (i * 8)), fd); 6909 } 6910 6911 static int 6912 #ifdef __BORLANDC__ 6913 _RTLENTRYF 6914 #endif 6915 rep_compare __ARGS((const void *s1, const void *s2)); 6916 6917 /* 6918 * Function given to qsort() to sort the REP items on "from" string. 6919 */ 6920 static int 6921 #ifdef __BORLANDC__ 6922 _RTLENTRYF 6923 #endif 6924 rep_compare(s1, s2) 6925 const void *s1; 6926 const void *s2; 6927 { 6928 fromto_T *p1 = (fromto_T *)s1; 6929 fromto_T *p2 = (fromto_T *)s2; 6930 6931 return STRCMP(p1->ft_from, p2->ft_from); 6932 } 6933 6934 /* 6935 * Write the Vim .spl file "fname". 6936 * Return FAIL or OK; 6937 */ 6938 static int 6939 write_vim_spell(spin, fname) 6940 spellinfo_T *spin; 6941 char_u *fname; 6942 { 6943 FILE *fd; 6944 int regionmask; 6945 int round; 6946 wordnode_T *tree; 6947 int nodecount; 6948 int i; 6949 int l; 6950 garray_T *gap; 6951 fromto_T *ftp; 6952 char_u *p; 6953 int rr; 6954 int retval = OK; 6955 6956 fd = mch_fopen((char *)fname, "w"); 6957 if (fd == NULL) 6958 { 6959 EMSG2(_(e_notopen), fname); 6960 return FAIL; 6961 } 6962 6963 /* <HEADER>: <fileID> <versionnr> */ 6964 /* <fileID> */ 6965 if (fwrite(VIMSPELLMAGIC, VIMSPELLMAGICL, (size_t)1, fd) != 1) 6966 { 6967 EMSG(_(e_write)); 6968 retval = FAIL; 6969 } 6970 putc(VIMSPELLVERSION, fd); /* <versionnr> */ 6971 6972 /* 6973 * <SECTIONS>: <section> ... <sectionend> 6974 */ 6975 6976 /* SN_REGION: <regionname> ... 6977 * Write the region names only if there is more than one. */ 6978 if (spin->si_region_count > 1) 6979 { 6980 putc(SN_REGION, fd); /* <sectionID> */ 6981 putc(SNF_REQUIRED, fd); /* <sectionflags> */ 6982 l = spin->si_region_count * 2; 6983 put_bytes(fd, (long_u)l, 4); /* <sectionlen> */ 6984 fwrite(spin->si_region_name, (size_t)l, (size_t)1, fd); 6985 /* <regionname> ... */ 6986 regionmask = (1 << spin->si_region_count) - 1; 6987 } 6988 else 6989 regionmask = 0; 6990 6991 /* SN_CHARFLAGS: <charflagslen> <charflags> <folcharslen> <folchars> 6992 * 6993 * The table with character flags and the table for case folding. 6994 * This makes sure the same characters are recognized as word characters 6995 * when generating an when using a spell file. 6996 * Skip this for ASCII, the table may conflict with the one used for 6997 * 'encoding'. 6998 * Also skip this for an .add.spl file, the main spell file must contain 6999 * the table (avoids that it conflicts). File is shorter too. 7000 */ 7001 if (!spin->si_ascii && !spin->si_add) 7002 { 7003 char_u folchars[128 * 8]; 7004 int flags; 7005 7006 putc(SN_CHARFLAGS, fd); /* <sectionID> */ 7007 putc(SNF_REQUIRED, fd); /* <sectionflags> */ 7008 7009 /* Form the <folchars> string first, we need to know its length. */ 7010 l = 0; 7011 for (i = 128; i < 256; ++i) 7012 { 7013 #ifdef FEAT_MBYTE 7014 if (has_mbyte) 7015 l += mb_char2bytes(spelltab.st_fold[i], folchars + l); 7016 else 7017 #endif 7018 folchars[l++] = spelltab.st_fold[i]; 7019 } 7020 put_bytes(fd, (long_u)(1 + 128 + 2 + l), 4); /* <sectionlen> */ 7021 7022 fputc(128, fd); /* <charflagslen> */ 7023 for (i = 128; i < 256; ++i) 7024 { 7025 flags = 0; 7026 if (spelltab.st_isw[i]) 7027 flags |= CF_WORD; 7028 if (spelltab.st_isu[i]) 7029 flags |= CF_UPPER; 7030 fputc(flags, fd); /* <charflags> */ 7031 } 7032 7033 put_bytes(fd, (long_u)l, 2); /* <folcharslen> */ 7034 fwrite(folchars, (size_t)l, (size_t)1, fd); /* <folchars> */ 7035 } 7036 7037 /* SN_MIDWORD: <midword> */ 7038 if (spin->si_midword != NULL) 7039 { 7040 putc(SN_MIDWORD, fd); /* <sectionID> */ 7041 putc(SNF_REQUIRED, fd); /* <sectionflags> */ 7042 7043 i = STRLEN(spin->si_midword); 7044 put_bytes(fd, (long_u)i, 4); /* <sectionlen> */ 7045 fwrite(spin->si_midword, (size_t)i, (size_t)1, fd); /* <midword> */ 7046 } 7047 7048 /* SN_PREFCOND: <prefcondcnt> <prefcond> ... */ 7049 if (spin->si_prefcond.ga_len > 0) 7050 { 7051 putc(SN_PREFCOND, fd); /* <sectionID> */ 7052 putc(SNF_REQUIRED, fd); /* <sectionflags> */ 7053 7054 l = write_spell_prefcond(NULL, &spin->si_prefcond); 7055 put_bytes(fd, (long_u)l, 4); /* <sectionlen> */ 7056 7057 write_spell_prefcond(fd, &spin->si_prefcond); 7058 } 7059 7060 /* SN_REP: <repcount> <rep> ... 7061 * SN_SAL: <salflags> <salcount> <sal> ... */ 7062 7063 /* Sort the REP items. */ 7064 qsort(spin->si_rep.ga_data, (size_t)spin->si_rep.ga_len, 7065 sizeof(fromto_T), rep_compare); 7066 7067 /* round 1: SN_REP section 7068 * round 2: SN_SAL section (unless SN_SOFO is used) */ 7069 for (round = 1; round <= 2; ++round) 7070 { 7071 if (round == 1) 7072 { 7073 gap = &spin->si_rep; 7074 putc(SN_REP, fd); /* <sectionID> */ 7075 } 7076 else 7077 { 7078 if (spin->si_sofofr != NULL && spin->si_sofoto != NULL) 7079 /* using SN_SOFO section instead of SN_SAL */ 7080 break; 7081 gap = &spin->si_sal; 7082 putc(SN_SAL, fd); /* <sectionID> */ 7083 } 7084 7085 /* This is for making suggestions, section is not required. */ 7086 putc(0, fd); /* <sectionflags> */ 7087 7088 /* Compute the length of what follows. */ 7089 l = 2; /* count <repcount> or <salcount> */ 7090 for (i = 0; i < gap->ga_len; ++i) 7091 { 7092 ftp = &((fromto_T *)gap->ga_data)[i]; 7093 l += 1 + STRLEN(ftp->ft_from); /* count <*fromlen> and <*from> */ 7094 l += 1 + STRLEN(ftp->ft_to); /* count <*tolen> and <*to> */ 7095 } 7096 if (round == 2) 7097 ++l; /* count <salflags> */ 7098 put_bytes(fd, (long_u)l, 4); /* <sectionlen> */ 7099 7100 if (round == 2) 7101 { 7102 i = 0; 7103 if (spin->si_followup) 7104 i |= SAL_F0LLOWUP; 7105 if (spin->si_collapse) 7106 i |= SAL_COLLAPSE; 7107 if (spin->si_rem_accents) 7108 i |= SAL_REM_ACCENTS; 7109 putc(i, fd); /* <salflags> */ 7110 } 7111 7112 put_bytes(fd, (long_u)gap->ga_len, 2); /* <repcount> or <salcount> */ 7113 for (i = 0; i < gap->ga_len; ++i) 7114 { 7115 /* <rep> : <repfromlen> <repfrom> <reptolen> <repto> */ 7116 /* <sal> : <salfromlen> <salfrom> <saltolen> <salto> */ 7117 ftp = &((fromto_T *)gap->ga_data)[i]; 7118 for (rr = 1; rr <= 2; ++rr) 7119 { 7120 p = rr == 1 ? ftp->ft_from : ftp->ft_to; 7121 l = STRLEN(p); 7122 putc(l, fd); 7123 fwrite(p, l, (size_t)1, fd); 7124 } 7125 } 7126 7127 } 7128 7129 /* SN_SOFO: <sofofromlen> <sofofrom> <sofotolen> <sofoto> 7130 * This is for making suggestions, section is not required. */ 7131 if (spin->si_sofofr != NULL && spin->si_sofoto != NULL) 7132 { 7133 putc(SN_SOFO, fd); /* <sectionID> */ 7134 putc(0, fd); /* <sectionflags> */ 7135 7136 l = STRLEN(spin->si_sofofr); 7137 put_bytes(fd, (long_u)(l + STRLEN(spin->si_sofoto) + 4), 4); 7138 /* <sectionlen> */ 7139 7140 put_bytes(fd, (long_u)l, 2); /* <sofofromlen> */ 7141 fwrite(spin->si_sofofr, l, (size_t)1, fd); /* <sofofrom> */ 7142 7143 l = STRLEN(spin->si_sofoto); 7144 put_bytes(fd, (long_u)l, 2); /* <sofotolen> */ 7145 fwrite(spin->si_sofoto, l, (size_t)1, fd); /* <sofoto> */ 7146 } 7147 7148 /* SN_MAP: <mapstr> 7149 * This is for making suggestions, section is not required. */ 7150 if (spin->si_map.ga_len > 0) 7151 { 7152 putc(SN_MAP, fd); /* <sectionID> */ 7153 putc(0, fd); /* <sectionflags> */ 7154 l = spin->si_map.ga_len; 7155 put_bytes(fd, (long_u)l, 4); /* <sectionlen> */ 7156 fwrite(spin->si_map.ga_data, (size_t)l, (size_t)1, fd); 7157 /* <mapstr> */ 7158 } 7159 7160 /* SN_COMPOUND: compound info. 7161 * We don't mark it required, when not supported all compound words will 7162 * be bad words. */ 7163 if (spin->si_compflags != NULL) 7164 { 7165 putc(SN_COMPOUND, fd); /* <sectionID> */ 7166 putc(0, fd); /* <sectionflags> */ 7167 7168 l = STRLEN(spin->si_compflags); 7169 put_bytes(fd, (long_u)(l + 3), 4); /* <sectionlen> */ 7170 putc(spin->si_compmax, fd); /* <compmax> */ 7171 putc(spin->si_compminlen, fd); /* <compminlen> */ 7172 putc(spin->si_compsylmax, fd); /* <compsylmax> */ 7173 /* <compflags> */ 7174 fwrite(spin->si_compflags, (size_t)l, (size_t)1, fd); 7175 } 7176 7177 /* SN_NOBREAK: NOBREAK flag */ 7178 if (spin->si_nobreak) 7179 { 7180 putc(SN_NOBREAK, fd); /* <sectionID> */ 7181 putc(0, fd); /* <sectionflags> */ 7182 7183 /* It's empty, the precense of the section flags the feature. */ 7184 put_bytes(fd, (long_u)0, 4); /* <sectionlen> */ 7185 } 7186 7187 /* SN_SYLLABLE: syllable info. 7188 * We don't mark it required, when not supported syllables will not be 7189 * counted. */ 7190 if (spin->si_syllable != NULL) 7191 { 7192 putc(SN_SYLLABLE, fd); /* <sectionID> */ 7193 putc(0, fd); /* <sectionflags> */ 7194 7195 l = STRLEN(spin->si_syllable); 7196 put_bytes(fd, (long_u)l, 4); /* <sectionlen> */ 7197 fwrite(spin->si_syllable, (size_t)l, (size_t)1, fd); /* <syllable> */ 7198 } 7199 7200 /* end of <SECTIONS> */ 7201 putc(SN_END, fd); /* <sectionend> */ 7202 7203 7204 /* 7205 * <LWORDTREE> <KWORDTREE> <PREFIXTREE> 7206 */ 7207 spin->si_memtot = 0; 7208 for (round = 1; round <= 3; ++round) 7209 { 7210 if (round == 1) 7211 tree = spin->si_foldroot->wn_sibling; 7212 else if (round == 2) 7213 tree = spin->si_keeproot->wn_sibling; 7214 else 7215 tree = spin->si_prefroot->wn_sibling; 7216 7217 /* Clear the index and wnode fields in the tree. */ 7218 clear_node(tree); 7219 7220 /* Count the number of nodes. Needed to be able to allocate the 7221 * memory when reading the nodes. Also fills in index for shared 7222 * nodes. */ 7223 nodecount = put_node(NULL, tree, 0, regionmask, round == 3); 7224 7225 /* number of nodes in 4 bytes */ 7226 put_bytes(fd, (long_u)nodecount, 4); /* <nodecount> */ 7227 spin->si_memtot += nodecount + nodecount * sizeof(int); 7228 7229 /* Write the nodes. */ 7230 (void)put_node(fd, tree, 0, regionmask, round == 3); 7231 } 7232 7233 /* Write another byte to check for errors. */ 7234 if (putc(0, fd) == EOF) 7235 retval = FAIL; 7236 7237 if (fclose(fd) == EOF) 7238 retval = FAIL; 7239 7240 return retval; 7241 } 7242 7243 /* 7244 * Clear the index and wnode fields of "node", it siblings and its 7245 * children. This is needed because they are a union with other items to save 7246 * space. 7247 */ 7248 static void 7249 clear_node(node) 7250 wordnode_T *node; 7251 { 7252 wordnode_T *np; 7253 7254 if (node != NULL) 7255 for (np = node; np != NULL; np = np->wn_sibling) 7256 { 7257 np->wn_u1.index = 0; 7258 np->wn_u2.wnode = NULL; 7259 7260 if (np->wn_byte != NUL) 7261 clear_node(np->wn_child); 7262 } 7263 } 7264 7265 7266 /* 7267 * Dump a word tree at node "node". 7268 * 7269 * This first writes the list of possible bytes (siblings). Then for each 7270 * byte recursively write the children. 7271 * 7272 * NOTE: The code here must match the code in read_tree(), since assumptions 7273 * are made about the indexes (so that we don't have to write them in the 7274 * file). 7275 * 7276 * Returns the number of nodes used. 7277 */ 7278 static int 7279 put_node(fd, node, index, regionmask, prefixtree) 7280 FILE *fd; /* NULL when only counting */ 7281 wordnode_T *node; 7282 int index; 7283 int regionmask; 7284 int prefixtree; /* TRUE for PREFIXTREE */ 7285 { 7286 int newindex = index; 7287 int siblingcount = 0; 7288 wordnode_T *np; 7289 int flags; 7290 7291 /* If "node" is zero the tree is empty. */ 7292 if (node == NULL) 7293 return 0; 7294 7295 /* Store the index where this node is written. */ 7296 node->wn_u1.index = index; 7297 7298 /* Count the number of siblings. */ 7299 for (np = node; np != NULL; np = np->wn_sibling) 7300 ++siblingcount; 7301 7302 /* Write the sibling count. */ 7303 if (fd != NULL) 7304 putc(siblingcount, fd); /* <siblingcount> */ 7305 7306 /* Write each sibling byte and optionally extra info. */ 7307 for (np = node; np != NULL; np = np->wn_sibling) 7308 { 7309 if (np->wn_byte == 0) 7310 { 7311 if (fd != NULL) 7312 { 7313 /* For a NUL byte (end of word) write the flags etc. */ 7314 if (prefixtree) 7315 { 7316 /* In PREFIXTREE write the required affixID and the 7317 * associated condition nr (stored in wn_region). The 7318 * byte value is misused to store the "rare" and "not 7319 * combining" flags */ 7320 if (np->wn_flags == (short_u)PFX_FLAGS) 7321 putc(BY_NOFLAGS, fd); /* <byte> */ 7322 else 7323 { 7324 putc(BY_FLAGS, fd); /* <byte> */ 7325 putc(np->wn_flags, fd); /* <pflags> */ 7326 } 7327 putc(np->wn_affixID, fd); /* <affixID> */ 7328 put_bytes(fd, (long_u)np->wn_region, 2); /* <prefcondnr> */ 7329 } 7330 else 7331 { 7332 /* For word trees we write the flag/region items. */ 7333 flags = np->wn_flags; 7334 if (regionmask != 0 && np->wn_region != regionmask) 7335 flags |= WF_REGION; 7336 if (np->wn_affixID != 0) 7337 flags |= WF_AFX; 7338 if (flags == 0) 7339 { 7340 /* word without flags or region */ 7341 putc(BY_NOFLAGS, fd); /* <byte> */ 7342 } 7343 else 7344 { 7345 if (np->wn_flags >= 0x100) 7346 { 7347 putc(BY_FLAGS2, fd); /* <byte> */ 7348 putc(flags, fd); /* <flags> */ 7349 putc((unsigned)flags >> 8, fd); /* <flags2> */ 7350 } 7351 else 7352 { 7353 putc(BY_FLAGS, fd); /* <byte> */ 7354 putc(flags, fd); /* <flags> */ 7355 } 7356 if (flags & WF_REGION) 7357 putc(np->wn_region, fd); /* <region> */ 7358 if (flags & WF_AFX) 7359 putc(np->wn_affixID, fd); /* <affixID> */ 7360 } 7361 } 7362 } 7363 } 7364 else 7365 { 7366 if (np->wn_child->wn_u1.index != 0 7367 && np->wn_child->wn_u2.wnode != node) 7368 { 7369 /* The child is written elsewhere, write the reference. */ 7370 if (fd != NULL) 7371 { 7372 putc(BY_INDEX, fd); /* <byte> */ 7373 /* <nodeidx> */ 7374 put_bytes(fd, (long_u)np->wn_child->wn_u1.index, 3); 7375 } 7376 } 7377 else if (np->wn_child->wn_u2.wnode == NULL) 7378 /* We will write the child below and give it an index. */ 7379 np->wn_child->wn_u2.wnode = node; 7380 7381 if (fd != NULL) 7382 if (putc(np->wn_byte, fd) == EOF) /* <byte> or <xbyte> */ 7383 { 7384 EMSG(_(e_write)); 7385 return 0; 7386 } 7387 } 7388 } 7389 7390 /* Space used in the array when reading: one for each sibling and one for 7391 * the count. */ 7392 newindex += siblingcount + 1; 7393 7394 /* Recursively dump the children of each sibling. */ 7395 for (np = node; np != NULL; np = np->wn_sibling) 7396 if (np->wn_byte != 0 && np->wn_child->wn_u2.wnode == node) 7397 newindex = put_node(fd, np->wn_child, newindex, regionmask, 7398 prefixtree); 7399 7400 return newindex; 7401 } 7402 7403 7404 /* 7405 * ":mkspell [-ascii] outfile infile ..." 7406 * ":mkspell [-ascii] addfile" 7407 */ 7408 void 7409 ex_mkspell(eap) 7410 exarg_T *eap; 7411 { 7412 int fcount; 7413 char_u **fnames; 7414 char_u *arg = eap->arg; 7415 int ascii = FALSE; 7416 7417 if (STRNCMP(arg, "-ascii", 6) == 0) 7418 { 7419 ascii = TRUE; 7420 arg = skipwhite(arg + 6); 7421 } 7422 7423 /* Expand all the remaining arguments (e.g., $VIMRUNTIME). */ 7424 if (get_arglist_exp(arg, &fcount, &fnames) == OK) 7425 { 7426 mkspell(fcount, fnames, ascii, eap->forceit, FALSE); 7427 FreeWild(fcount, fnames); 7428 } 7429 } 7430 7431 /* 7432 * Create a Vim spell file from one or more word lists. 7433 * "fnames[0]" is the output file name. 7434 * "fnames[fcount - 1]" is the last input file name. 7435 * Exception: when "fnames[0]" ends in ".add" it's used as the input file name 7436 * and ".spl" is appended to make the output file name. 7437 */ 7438 static void 7439 mkspell(fcount, fnames, ascii, overwrite, added_word) 7440 int fcount; 7441 char_u **fnames; 7442 int ascii; /* -ascii argument given */ 7443 int overwrite; /* overwrite existing output file */ 7444 int added_word; /* invoked through "zg" */ 7445 { 7446 char_u fname[MAXPATHL]; 7447 char_u wfname[MAXPATHL]; 7448 char_u **innames; 7449 int incount; 7450 afffile_T *(afile[8]); 7451 int i; 7452 int len; 7453 struct stat st; 7454 int error = FALSE; 7455 spellinfo_T spin; 7456 7457 vim_memset(&spin, 0, sizeof(spin)); 7458 spin.si_verbose = !added_word; 7459 spin.si_ascii = ascii; 7460 spin.si_followup = TRUE; 7461 spin.si_rem_accents = TRUE; 7462 ga_init2(&spin.si_rep, (int)sizeof(fromto_T), 20); 7463 ga_init2(&spin.si_sal, (int)sizeof(fromto_T), 20); 7464 ga_init2(&spin.si_map, (int)sizeof(char_u), 100); 7465 ga_init2(&spin.si_prefcond, (int)sizeof(char_u *), 50); 7466 spin.si_newcompID = 127; /* start compound ID at first maximum */ 7467 7468 /* default: fnames[0] is output file, following are input files */ 7469 innames = &fnames[1]; 7470 incount = fcount - 1; 7471 7472 if (fcount >= 1) 7473 { 7474 len = STRLEN(fnames[0]); 7475 if (fcount == 1 && len > 4 && STRCMP(fnames[0] + len - 4, ".add") == 0) 7476 { 7477 /* For ":mkspell path/en.latin1.add" output file is 7478 * "path/en.latin1.add.spl". */ 7479 innames = &fnames[0]; 7480 incount = 1; 7481 vim_snprintf((char *)wfname, sizeof(wfname), "%s.spl", fnames[0]); 7482 } 7483 else if (fcount == 1) 7484 { 7485 /* For ":mkspell path/vim" output file is "path/vim.latin1.spl". */ 7486 innames = &fnames[0]; 7487 incount = 1; 7488 vim_snprintf((char *)wfname, sizeof(wfname), "%s.%s.spl", fnames[0], 7489 spin.si_ascii ? (char_u *)"ascii" : spell_enc()); 7490 } 7491 else if (len > 4 && STRCMP(fnames[0] + len - 4, ".spl") == 0) 7492 { 7493 /* Name ends in ".spl", use as the file name. */ 7494 vim_strncpy(wfname, fnames[0], sizeof(wfname) - 1); 7495 } 7496 else 7497 /* Name should be language, make the file name from it. */ 7498 vim_snprintf((char *)wfname, sizeof(wfname), "%s.%s.spl", fnames[0], 7499 spin.si_ascii ? (char_u *)"ascii" : spell_enc()); 7500 7501 /* Check for .ascii.spl. */ 7502 if (strstr((char *)gettail(wfname), ".ascii.") != NULL) 7503 spin.si_ascii = TRUE; 7504 7505 /* Check for .add.spl. */ 7506 if (strstr((char *)gettail(wfname), ".add.") != NULL) 7507 spin.si_add = TRUE; 7508 } 7509 7510 if (incount <= 0) 7511 EMSG(_(e_invarg)); /* need at least output and input names */ 7512 else if (vim_strchr(gettail(wfname), '_') != NULL) 7513 EMSG(_("E751: Output file name must not have region name")); 7514 else if (incount > 8) 7515 EMSG(_("E754: Only up to 8 regions supported")); 7516 else 7517 { 7518 /* Check for overwriting before doing things that may take a lot of 7519 * time. */ 7520 if (!overwrite && mch_stat((char *)wfname, &st) >= 0) 7521 { 7522 EMSG(_(e_exists)); 7523 return; 7524 } 7525 if (mch_isdir(wfname)) 7526 { 7527 EMSG2(_(e_isadir2), wfname); 7528 return; 7529 } 7530 7531 /* 7532 * Init the aff and dic pointers. 7533 * Get the region names if there are more than 2 arguments. 7534 */ 7535 for (i = 0; i < incount; ++i) 7536 { 7537 afile[i] = NULL; 7538 7539 if (incount > 1) 7540 { 7541 len = STRLEN(innames[i]); 7542 if (STRLEN(gettail(innames[i])) < 5 7543 || innames[i][len - 3] != '_') 7544 { 7545 EMSG2(_("E755: Invalid region in %s"), innames[i]); 7546 return; 7547 } 7548 spin.si_region_name[i * 2] = TOLOWER_ASC(innames[i][len - 2]); 7549 spin.si_region_name[i * 2 + 1] = 7550 TOLOWER_ASC(innames[i][len - 1]); 7551 } 7552 } 7553 spin.si_region_count = incount; 7554 7555 spin.si_foldroot = wordtree_alloc(&spin); 7556 spin.si_keeproot = wordtree_alloc(&spin); 7557 spin.si_prefroot = wordtree_alloc(&spin); 7558 if (spin.si_foldroot == NULL 7559 || spin.si_keeproot == NULL 7560 || spin.si_prefroot == NULL) 7561 { 7562 free_blocks(spin.si_blocks); 7563 return; 7564 } 7565 7566 /* When not producing a .add.spl file clear the character table when 7567 * we encounter one in the .aff file. This means we dump the current 7568 * one in the .spl file if the .aff file doesn't define one. That's 7569 * better than guessing the contents, the table will match a 7570 * previously loaded spell file. */ 7571 if (!spin.si_add) 7572 spin.si_clear_chartab = TRUE; 7573 7574 /* 7575 * Read all the .aff and .dic files. 7576 * Text is converted to 'encoding'. 7577 * Words are stored in the case-folded and keep-case trees. 7578 */ 7579 for (i = 0; i < incount && !error; ++i) 7580 { 7581 spin.si_conv.vc_type = CONV_NONE; 7582 spin.si_region = 1 << i; 7583 7584 vim_snprintf((char *)fname, sizeof(fname), "%s.aff", innames[i]); 7585 if (mch_stat((char *)fname, &st) >= 0) 7586 { 7587 /* Read the .aff file. Will init "spin->si_conv" based on the 7588 * "SET" line. */ 7589 afile[i] = spell_read_aff(&spin, fname); 7590 if (afile[i] == NULL) 7591 error = TRUE; 7592 else 7593 { 7594 /* Read the .dic file and store the words in the trees. */ 7595 vim_snprintf((char *)fname, sizeof(fname), "%s.dic", 7596 innames[i]); 7597 if (spell_read_dic(&spin, fname, afile[i]) == FAIL) 7598 error = TRUE; 7599 } 7600 } 7601 else 7602 { 7603 /* No .aff file, try reading the file as a word list. Store 7604 * the words in the trees. */ 7605 if (spell_read_wordfile(&spin, innames[i]) == FAIL) 7606 error = TRUE; 7607 } 7608 7609 #ifdef FEAT_MBYTE 7610 /* Free any conversion stuff. */ 7611 convert_setup(&spin.si_conv, NULL, NULL); 7612 #endif 7613 } 7614 7615 if (spin.si_compflags != NULL && spin.si_nobreak) 7616 MSG(_("Warning: both compounding and NOBREAK specified")); 7617 7618 if (!error) 7619 { 7620 /* 7621 * Combine tails in the tree. 7622 */ 7623 if (spin.si_verbose || p_verbose > 2) 7624 { 7625 if (!spin.si_verbose) 7626 verbose_enter(); 7627 MSG(_(msg_compressing)); 7628 out_flush(); 7629 if (!spin.si_verbose) 7630 verbose_leave(); 7631 } 7632 wordtree_compress(&spin, spin.si_foldroot); 7633 wordtree_compress(&spin, spin.si_keeproot); 7634 wordtree_compress(&spin, spin.si_prefroot); 7635 } 7636 7637 if (!error) 7638 { 7639 /* 7640 * Write the info in the spell file. 7641 */ 7642 if (spin.si_verbose || p_verbose > 2) 7643 { 7644 if (!spin.si_verbose) 7645 verbose_enter(); 7646 smsg((char_u *)_("Writing spell file %s ..."), wfname); 7647 out_flush(); 7648 if (!spin.si_verbose) 7649 verbose_leave(); 7650 } 7651 7652 error = write_vim_spell(&spin, wfname) == FAIL; 7653 7654 if (spin.si_verbose || p_verbose > 2) 7655 { 7656 if (!spin.si_verbose) 7657 verbose_enter(); 7658 MSG(_("Done!")); 7659 smsg((char_u *)_("Estimated runtime memory use: %d bytes"), 7660 spin.si_memtot); 7661 out_flush(); 7662 if (!spin.si_verbose) 7663 verbose_leave(); 7664 } 7665 7666 /* If the file is loaded need to reload it. */ 7667 if (!error) 7668 spell_reload_one(wfname, added_word); 7669 } 7670 7671 /* Free the allocated memory. */ 7672 ga_clear(&spin.si_rep); 7673 ga_clear(&spin.si_sal); 7674 ga_clear(&spin.si_map); 7675 ga_clear(&spin.si_prefcond); 7676 7677 /* Free the .aff file structures. */ 7678 for (i = 0; i < incount; ++i) 7679 if (afile[i] != NULL) 7680 spell_free_aff(afile[i]); 7681 7682 /* Free all the bits and pieces at once. */ 7683 free_blocks(spin.si_blocks); 7684 } 7685 } 7686 7687 7688 /* 7689 * ":[count]spellgood {word}" 7690 * ":[count]spellwrong {word}" 7691 */ 7692 void 7693 ex_spell(eap) 7694 exarg_T *eap; 7695 { 7696 spell_add_word(eap->arg, STRLEN(eap->arg), eap->cmdidx == CMD_spellwrong, 7697 eap->forceit ? 0 : (int)eap->line2); 7698 } 7699 7700 /* 7701 * Add "word[len]" to 'spellfile' as a good or bad word. 7702 */ 7703 void 7704 spell_add_word(word, len, bad, index) 7705 char_u *word; 7706 int len; 7707 int bad; 7708 int index; /* "zG" and "zW": zero, otherwise index in 7709 'spellfile' */ 7710 { 7711 FILE *fd; 7712 buf_T *buf = NULL; 7713 int new_spf = FALSE; 7714 struct stat st; 7715 char_u *fname; 7716 char_u fnamebuf[MAXPATHL]; 7717 char_u line[MAXWLEN * 2]; 7718 long fpos, fpos_next = 0; 7719 int i; 7720 char_u *spf; 7721 7722 if (index == 0) /* use internal wordlist */ 7723 { 7724 if (int_wordlist == NULL) 7725 { 7726 int_wordlist = vim_tempname('s'); 7727 if (int_wordlist == NULL) 7728 return; 7729 } 7730 fname = int_wordlist; 7731 } 7732 else 7733 { 7734 /* If 'spellfile' isn't set figure out a good default value. */ 7735 if (*curbuf->b_p_spf == NUL) 7736 { 7737 init_spellfile(); 7738 new_spf = TRUE; 7739 } 7740 7741 if (*curbuf->b_p_spf == NUL) 7742 { 7743 EMSG2(_(e_notset), "spellfile"); 7744 return; 7745 } 7746 7747 for (spf = curbuf->b_p_spf, i = 1; *spf != NUL; ++i) 7748 { 7749 copy_option_part(&spf, fnamebuf, MAXPATHL, ","); 7750 if (i == index) 7751 break; 7752 if (*spf == NUL) 7753 { 7754 EMSGN(_("E765: 'spellfile' does not have %ld entries"), index); 7755 return; 7756 } 7757 } 7758 7759 /* Check that the user isn't editing the .add file somewhere. */ 7760 buf = buflist_findname_exp(fnamebuf); 7761 if (buf != NULL && buf->b_ml.ml_mfp == NULL) 7762 buf = NULL; 7763 if (buf != NULL && bufIsChanged(buf)) 7764 { 7765 EMSG(_(e_bufloaded)); 7766 return; 7767 } 7768 7769 fname = fnamebuf; 7770 } 7771 7772 if (bad) 7773 { 7774 /* When the word also appears as good word we need to remove that one, 7775 * since its flags sort before the one with WF_BANNED. */ 7776 fd = mch_fopen((char *)fname, "r"); 7777 if (fd != NULL) 7778 { 7779 while (!vim_fgets(line, MAXWLEN * 2, fd)) 7780 { 7781 fpos = fpos_next; 7782 fpos_next = ftell(fd); 7783 if (STRNCMP(word, line, len) == 0 7784 && (line[len] == '/' || line[len] < ' ')) 7785 { 7786 /* Found duplicate word. Remove it by writing a '#' at 7787 * the start of the line. Mixing reading and writing 7788 * doesn't work for all systems, close the file first. */ 7789 fclose(fd); 7790 fd = mch_fopen((char *)fname, "r+"); 7791 if (fd == NULL) 7792 break; 7793 if (fseek(fd, fpos, SEEK_SET) == 0) 7794 fputc('#', fd); 7795 fseek(fd, fpos_next, SEEK_SET); 7796 } 7797 } 7798 fclose(fd); 7799 } 7800 } 7801 7802 fd = mch_fopen((char *)fname, "a"); 7803 if (fd == NULL && new_spf) 7804 { 7805 /* We just initialized the 'spellfile' option and can't open the file. 7806 * We may need to create the "spell" directory first. We already 7807 * checked the runtime directory is writable in init_spellfile(). */ 7808 STRCPY(NameBuff, fname); 7809 *gettail_sep(NameBuff) = NUL; 7810 if (mch_stat((char *)NameBuff, &st) < 0) 7811 { 7812 /* The directory doesn't exist. Try creating it and opening the 7813 * file again. */ 7814 vim_mkdir(NameBuff, 0755); 7815 fd = mch_fopen((char *)fname, "a"); 7816 } 7817 } 7818 7819 if (fd == NULL) 7820 EMSG2(_(e_notopen), fname); 7821 else 7822 { 7823 if (bad) 7824 fprintf(fd, "%.*s/!\n", len, word); 7825 else 7826 fprintf(fd, "%.*s\n", len, word); 7827 fclose(fd); 7828 7829 /* Update the .add.spl file. */ 7830 mkspell(1, &fname, FALSE, TRUE, TRUE); 7831 7832 /* If the .add file is edited somewhere, reload it. */ 7833 if (buf != NULL) 7834 buf_reload(buf); 7835 7836 redraw_all_later(NOT_VALID); 7837 } 7838 } 7839 7840 /* 7841 * Initialize 'spellfile' for the current buffer. 7842 */ 7843 static void 7844 init_spellfile() 7845 { 7846 char_u buf[MAXPATHL]; 7847 int l; 7848 char_u *fname; 7849 char_u *rtp; 7850 char_u *lend; 7851 int aspath = FALSE; 7852 char_u *lstart = curbuf->b_p_spl; 7853 7854 if (*curbuf->b_p_spl != NUL && curbuf->b_langp.ga_len > 0) 7855 { 7856 /* Find the end of the language name. Exclude the region. If there 7857 * is a path separator remember the start of the tail. */ 7858 for (lend = curbuf->b_p_spl; *lend != NUL 7859 && vim_strchr((char_u *)",._", *lend) == NULL; ++lend) 7860 if (vim_ispathsep(*lend)) 7861 { 7862 aspath = TRUE; 7863 lstart = lend + 1; 7864 } 7865 7866 /* Loop over all entries in 'runtimepath'. Use the first one where we 7867 * are allowed to write. */ 7868 rtp = p_rtp; 7869 while (*rtp != NUL) 7870 { 7871 if (aspath) 7872 /* Use directory of an entry with path, e.g., for 7873 * "/dir/lg.utf-8.spl" use "/dir". */ 7874 vim_strncpy(buf, curbuf->b_p_spl, lstart - curbuf->b_p_spl - 1); 7875 else 7876 /* Copy the path from 'runtimepath' to buf[]. */ 7877 copy_option_part(&rtp, buf, MAXPATHL, ","); 7878 if (filewritable(buf) == 2) 7879 { 7880 /* Use the first language name from 'spelllang' and the 7881 * encoding used in the first loaded .spl file. */ 7882 if (aspath) 7883 vim_strncpy(buf, curbuf->b_p_spl, lend - curbuf->b_p_spl); 7884 else 7885 { 7886 l = STRLEN(buf); 7887 vim_snprintf((char *)buf + l, MAXPATHL - l, 7888 "/spell/%.*s", (int)(lend - lstart), lstart); 7889 } 7890 l = STRLEN(buf); 7891 fname = LANGP_ENTRY(curbuf->b_langp, 0)->lp_slang->sl_fname; 7892 vim_snprintf((char *)buf + l, MAXPATHL - l, ".%s.add", 7893 fname != NULL 7894 && strstr((char *)gettail(fname), ".ascii.") != NULL 7895 ? (char_u *)"ascii" : spell_enc()); 7896 set_option_value((char_u *)"spellfile", 0L, buf, OPT_LOCAL); 7897 break; 7898 } 7899 aspath = FALSE; 7900 } 7901 } 7902 } 7903 7904 7905 /* 7906 * Init the chartab used for spelling for ASCII. 7907 * EBCDIC is not supported! 7908 */ 7909 static void 7910 clear_spell_chartab(sp) 7911 spelltab_T *sp; 7912 { 7913 int i; 7914 7915 /* Init everything to FALSE. */ 7916 vim_memset(sp->st_isw, FALSE, sizeof(sp->st_isw)); 7917 vim_memset(sp->st_isu, FALSE, sizeof(sp->st_isu)); 7918 for (i = 0; i < 256; ++i) 7919 { 7920 sp->st_fold[i] = i; 7921 sp->st_upper[i] = i; 7922 } 7923 7924 /* We include digits. A word shouldn't start with a digit, but handling 7925 * that is done separately. */ 7926 for (i = '0'; i <= '9'; ++i) 7927 sp->st_isw[i] = TRUE; 7928 for (i = 'A'; i <= 'Z'; ++i) 7929 { 7930 sp->st_isw[i] = TRUE; 7931 sp->st_isu[i] = TRUE; 7932 sp->st_fold[i] = i + 0x20; 7933 } 7934 for (i = 'a'; i <= 'z'; ++i) 7935 { 7936 sp->st_isw[i] = TRUE; 7937 sp->st_upper[i] = i - 0x20; 7938 } 7939 } 7940 7941 /* 7942 * Init the chartab used for spelling. Only depends on 'encoding'. 7943 * Called once while starting up and when 'encoding' changes. 7944 * The default is to use isalpha(), but the spell file should define the word 7945 * characters to make it possible that 'encoding' differs from the current 7946 * locale. For utf-8 we don't use isalpha() but our own functions. 7947 */ 7948 void 7949 init_spell_chartab() 7950 { 7951 int i; 7952 7953 did_set_spelltab = FALSE; 7954 clear_spell_chartab(&spelltab); 7955 #ifdef FEAT_MBYTE 7956 if (enc_dbcs) 7957 { 7958 /* DBCS: assume double-wide characters are word characters. */ 7959 for (i = 128; i <= 255; ++i) 7960 if (MB_BYTE2LEN(i) == 2) 7961 spelltab.st_isw[i] = TRUE; 7962 } 7963 else if (enc_utf8) 7964 { 7965 for (i = 128; i < 256; ++i) 7966 { 7967 spelltab.st_isu[i] = utf_isupper(i); 7968 spelltab.st_isw[i] = spelltab.st_isu[i] || utf_islower(i); 7969 spelltab.st_fold[i] = utf_fold(i); 7970 spelltab.st_upper[i] = utf_toupper(i); 7971 } 7972 } 7973 else 7974 #endif 7975 { 7976 /* Rough guess: use locale-dependent library functions. */ 7977 for (i = 128; i < 256; ++i) 7978 { 7979 if (MB_ISUPPER(i)) 7980 { 7981 spelltab.st_isw[i] = TRUE; 7982 spelltab.st_isu[i] = TRUE; 7983 spelltab.st_fold[i] = MB_TOLOWER(i); 7984 } 7985 else if (MB_ISLOWER(i)) 7986 { 7987 spelltab.st_isw[i] = TRUE; 7988 spelltab.st_upper[i] = MB_TOUPPER(i); 7989 } 7990 } 7991 } 7992 } 7993 7994 /* 7995 * Set the spell character tables from strings in the affix file. 7996 */ 7997 static int 7998 set_spell_chartab(fol, low, upp) 7999 char_u *fol; 8000 char_u *low; 8001 char_u *upp; 8002 { 8003 /* We build the new tables here first, so that we can compare with the 8004 * previous one. */ 8005 spelltab_T new_st; 8006 char_u *pf = fol, *pl = low, *pu = upp; 8007 int f, l, u; 8008 8009 clear_spell_chartab(&new_st); 8010 8011 while (*pf != NUL) 8012 { 8013 if (*pl == NUL || *pu == NUL) 8014 { 8015 EMSG(_(e_affform)); 8016 return FAIL; 8017 } 8018 #ifdef FEAT_MBYTE 8019 f = mb_ptr2char_adv(&pf); 8020 l = mb_ptr2char_adv(&pl); 8021 u = mb_ptr2char_adv(&pu); 8022 #else 8023 f = *pf++; 8024 l = *pl++; 8025 u = *pu++; 8026 #endif 8027 /* Every character that appears is a word character. */ 8028 if (f < 256) 8029 new_st.st_isw[f] = TRUE; 8030 if (l < 256) 8031 new_st.st_isw[l] = TRUE; 8032 if (u < 256) 8033 new_st.st_isw[u] = TRUE; 8034 8035 /* if "LOW" and "FOL" are not the same the "LOW" char needs 8036 * case-folding */ 8037 if (l < 256 && l != f) 8038 { 8039 if (f >= 256) 8040 { 8041 EMSG(_(e_affrange)); 8042 return FAIL; 8043 } 8044 new_st.st_fold[l] = f; 8045 } 8046 8047 /* if "UPP" and "FOL" are not the same the "UPP" char needs 8048 * case-folding, it's upper case and the "UPP" is the upper case of 8049 * "FOL" . */ 8050 if (u < 256 && u != f) 8051 { 8052 if (f >= 256) 8053 { 8054 EMSG(_(e_affrange)); 8055 return FAIL; 8056 } 8057 new_st.st_fold[u] = f; 8058 new_st.st_isu[u] = TRUE; 8059 new_st.st_upper[f] = u; 8060 } 8061 } 8062 8063 if (*pl != NUL || *pu != NUL) 8064 { 8065 EMSG(_(e_affform)); 8066 return FAIL; 8067 } 8068 8069 return set_spell_finish(&new_st); 8070 } 8071 8072 /* 8073 * Set the spell character tables from strings in the .spl file. 8074 */ 8075 static void 8076 set_spell_charflags(flags, cnt, fol) 8077 char_u *flags; 8078 int cnt; /* length of "flags" */ 8079 char_u *fol; 8080 { 8081 /* We build the new tables here first, so that we can compare with the 8082 * previous one. */ 8083 spelltab_T new_st; 8084 int i; 8085 char_u *p = fol; 8086 int c; 8087 8088 clear_spell_chartab(&new_st); 8089 8090 for (i = 0; i < 128; ++i) 8091 { 8092 if (i < cnt) 8093 { 8094 new_st.st_isw[i + 128] = (flags[i] & CF_WORD) != 0; 8095 new_st.st_isu[i + 128] = (flags[i] & CF_UPPER) != 0; 8096 } 8097 8098 if (*p != NUL) 8099 { 8100 #ifdef FEAT_MBYTE 8101 c = mb_ptr2char_adv(&p); 8102 #else 8103 c = *p++; 8104 #endif 8105 new_st.st_fold[i + 128] = c; 8106 if (i + 128 != c && new_st.st_isu[i + 128] && c < 256) 8107 new_st.st_upper[c] = i + 128; 8108 } 8109 } 8110 8111 (void)set_spell_finish(&new_st); 8112 } 8113 8114 static int 8115 set_spell_finish(new_st) 8116 spelltab_T *new_st; 8117 { 8118 int i; 8119 8120 if (did_set_spelltab) 8121 { 8122 /* check that it's the same table */ 8123 for (i = 0; i < 256; ++i) 8124 { 8125 if (spelltab.st_isw[i] != new_st->st_isw[i] 8126 || spelltab.st_isu[i] != new_st->st_isu[i] 8127 || spelltab.st_fold[i] != new_st->st_fold[i] 8128 || spelltab.st_upper[i] != new_st->st_upper[i]) 8129 { 8130 EMSG(_("E763: Word characters differ between spell files")); 8131 return FAIL; 8132 } 8133 } 8134 } 8135 else 8136 { 8137 /* copy the new spelltab into the one being used */ 8138 spelltab = *new_st; 8139 did_set_spelltab = TRUE; 8140 } 8141 8142 return OK; 8143 } 8144 8145 /* 8146 * Return TRUE if "p" points to a word character. 8147 * As a special case we see "midword" characters as word character when it is 8148 * followed by a word character. This finds they'there but not 'they there'. 8149 * Thus this only works properly when past the first character of the word. 8150 */ 8151 static int 8152 spell_iswordp(p, buf) 8153 char_u *p; 8154 buf_T *buf; /* buffer used */ 8155 { 8156 #ifdef FEAT_MBYTE 8157 char_u *s; 8158 int l; 8159 int c; 8160 8161 if (has_mbyte) 8162 { 8163 l = MB_BYTE2LEN(*p); 8164 s = p; 8165 if (l == 1) 8166 { 8167 /* be quick for ASCII */ 8168 if (buf->b_spell_ismw[*p]) 8169 { 8170 s = p + 1; /* skip a mid-word character */ 8171 l = MB_BYTE2LEN(*s); 8172 } 8173 } 8174 else 8175 { 8176 c = mb_ptr2char(p); 8177 if (c < 256 ? buf->b_spell_ismw[c] 8178 : (buf->b_spell_ismw_mb != NULL 8179 && vim_strchr(buf->b_spell_ismw_mb, c) != NULL)) 8180 { 8181 s = p + l; 8182 l = MB_BYTE2LEN(*s); 8183 } 8184 } 8185 8186 c = mb_ptr2char(s); 8187 if (c > 255) 8188 return mb_get_class(s) >= 2; 8189 return spelltab.st_isw[c]; 8190 } 8191 #endif 8192 8193 return spelltab.st_isw[buf->b_spell_ismw[*p] ? p[1] : p[0]]; 8194 } 8195 8196 /* 8197 * Return TRUE if "p" points to a word character. 8198 * Unlike spell_iswordp() this doesn't check for "midword" characters. 8199 */ 8200 static int 8201 spell_iswordp_nmw(p) 8202 char_u *p; 8203 { 8204 #ifdef FEAT_MBYTE 8205 int c; 8206 8207 if (has_mbyte) 8208 { 8209 c = mb_ptr2char(p); 8210 if (c > 255) 8211 return mb_get_class(p) >= 2; 8212 return spelltab.st_isw[c]; 8213 } 8214 #endif 8215 return spelltab.st_isw[*p]; 8216 } 8217 8218 #ifdef FEAT_MBYTE 8219 /* 8220 * Return TRUE if "p" points to a word character. 8221 * Wide version of spell_iswordp(). 8222 */ 8223 static int 8224 spell_iswordp_w(p, buf) 8225 int *p; 8226 buf_T *buf; 8227 { 8228 int *s; 8229 8230 if (*p < 256 ? buf->b_spell_ismw[*p] 8231 : (buf->b_spell_ismw_mb != NULL 8232 && vim_strchr(buf->b_spell_ismw_mb, *p) != NULL)) 8233 s = p + 1; 8234 else 8235 s = p; 8236 8237 if (*s > 255) 8238 { 8239 if (enc_utf8) 8240 return utf_class(*s) >= 2; 8241 if (enc_dbcs) 8242 return dbcs_class((unsigned)*s >> 8, *s & 0xff) >= 2; 8243 return 0; 8244 } 8245 return spelltab.st_isw[*s]; 8246 } 8247 #endif 8248 8249 /* 8250 * Write the table with prefix conditions to the .spl file. 8251 * When "fd" is NULL only count the length of what is written. 8252 */ 8253 static int 8254 write_spell_prefcond(fd, gap) 8255 FILE *fd; 8256 garray_T *gap; 8257 { 8258 int i; 8259 char_u *p; 8260 int len; 8261 int totlen; 8262 8263 if (fd != NULL) 8264 put_bytes(fd, (long_u)gap->ga_len, 2); /* <prefcondcnt> */ 8265 8266 totlen = 2 + gap->ga_len; /* length of <prefcondcnt> and <condlen> bytes */ 8267 8268 for (i = 0; i < gap->ga_len; ++i) 8269 { 8270 /* <prefcond> : <condlen> <condstr> */ 8271 p = ((char_u **)gap->ga_data)[i]; 8272 if (p != NULL) 8273 { 8274 len = STRLEN(p); 8275 if (fd != NULL) 8276 { 8277 fputc(len, fd); 8278 fwrite(p, (size_t)len, (size_t)1, fd); 8279 } 8280 totlen += len; 8281 } 8282 else if (fd != NULL) 8283 fputc(0, fd); 8284 } 8285 8286 return totlen; 8287 } 8288 8289 /* 8290 * Case-fold "str[len]" into "buf[buflen]". The result is NUL terminated. 8291 * Uses the character definitions from the .spl file. 8292 * When using a multi-byte 'encoding' the length may change! 8293 * Returns FAIL when something wrong. 8294 */ 8295 static int 8296 spell_casefold(str, len, buf, buflen) 8297 char_u *str; 8298 int len; 8299 char_u *buf; 8300 int buflen; 8301 { 8302 int i; 8303 8304 if (len >= buflen) 8305 { 8306 buf[0] = NUL; 8307 return FAIL; /* result will not fit */ 8308 } 8309 8310 #ifdef FEAT_MBYTE 8311 if (has_mbyte) 8312 { 8313 int outi = 0; 8314 char_u *p; 8315 int c; 8316 8317 /* Fold one character at a time. */ 8318 for (p = str; p < str + len; ) 8319 { 8320 if (outi + MB_MAXBYTES > buflen) 8321 { 8322 buf[outi] = NUL; 8323 return FAIL; 8324 } 8325 c = mb_cptr2char_adv(&p); 8326 outi += mb_char2bytes(SPELL_TOFOLD(c), buf + outi); 8327 } 8328 buf[outi] = NUL; 8329 } 8330 else 8331 #endif 8332 { 8333 /* Be quick for non-multibyte encodings. */ 8334 for (i = 0; i < len; ++i) 8335 buf[i] = spelltab.st_fold[str[i]]; 8336 buf[i] = NUL; 8337 } 8338 8339 return OK; 8340 } 8341 8342 #define SPS_BEST 1 8343 #define SPS_FAST 2 8344 #define SPS_DOUBLE 4 8345 8346 static int sps_flags = SPS_BEST; 8347 static int sps_limit = 9999; 8348 8349 /* 8350 * Check the 'spellsuggest' option. Return FAIL if it's wrong. 8351 * Sets "sps_flags" and "sps_limit". 8352 */ 8353 int 8354 spell_check_sps() 8355 { 8356 char_u *p; 8357 char_u *s; 8358 char_u buf[MAXPATHL]; 8359 int f; 8360 8361 sps_flags = 0; 8362 sps_limit = 9999; 8363 8364 for (p = p_sps; *p != NUL; ) 8365 { 8366 copy_option_part(&p, buf, MAXPATHL, ","); 8367 8368 f = 0; 8369 if (VIM_ISDIGIT(*buf)) 8370 { 8371 s = buf; 8372 sps_limit = getdigits(&s); 8373 if (*s != NUL && !VIM_ISDIGIT(*s)) 8374 f = -1; 8375 } 8376 else if (STRCMP(buf, "best") == 0) 8377 f = SPS_BEST; 8378 else if (STRCMP(buf, "fast") == 0) 8379 f = SPS_FAST; 8380 else if (STRCMP(buf, "double") == 0) 8381 f = SPS_DOUBLE; 8382 else if (STRNCMP(buf, "expr:", 5) != 0 8383 && STRNCMP(buf, "file:", 5) != 0) 8384 f = -1; 8385 8386 if (f == -1 || (sps_flags != 0 && f != 0)) 8387 { 8388 sps_flags = SPS_BEST; 8389 sps_limit = 9999; 8390 return FAIL; 8391 } 8392 if (f != 0) 8393 sps_flags = f; 8394 } 8395 8396 if (sps_flags == 0) 8397 sps_flags = SPS_BEST; 8398 8399 return OK; 8400 } 8401 8402 /* Remember what "z?" replaced. */ 8403 static char_u *repl_from = NULL; 8404 static char_u *repl_to = NULL; 8405 8406 /* 8407 * "z?": Find badly spelled word under or after the cursor. 8408 * Give suggestions for the properly spelled word. 8409 * When "count" is non-zero use that suggestion. 8410 */ 8411 void 8412 spell_suggest(count) 8413 int count; 8414 { 8415 char_u *line; 8416 pos_T prev_cursor = curwin->w_cursor; 8417 char_u wcopy[MAXWLEN + 2]; 8418 char_u *p; 8419 int i; 8420 int c; 8421 suginfo_T sug; 8422 suggest_T *stp; 8423 int mouse_used; 8424 int need_cap; 8425 int limit; 8426 int selected = count; 8427 8428 /* Find the start of the badly spelled word. */ 8429 if (spell_move_to(curwin, FORWARD, TRUE, TRUE, NULL) == 0 8430 || curwin->w_cursor.col > prev_cursor.col) 8431 { 8432 if (!curwin->w_p_spell || *curbuf->b_p_spl == NUL) 8433 return; 8434 8435 /* No bad word or it starts after the cursor: use the word under the 8436 * cursor. */ 8437 curwin->w_cursor = prev_cursor; 8438 line = ml_get_curline(); 8439 p = line + curwin->w_cursor.col; 8440 /* Backup to before start of word. */ 8441 while (p > line && spell_iswordp_nmw(p)) 8442 mb_ptr_back(line, p); 8443 /* Forward to start of word. */ 8444 while (*p != NUL && !spell_iswordp_nmw(p)) 8445 mb_ptr_adv(p); 8446 8447 if (!spell_iswordp_nmw(p)) /* No word found. */ 8448 { 8449 beep_flush(); 8450 return; 8451 } 8452 curwin->w_cursor.col = p - line; 8453 } 8454 8455 /* Get the word and its length. */ 8456 8457 /* Figure out if the word should be capitalised. */ 8458 need_cap = check_need_cap(curwin->w_cursor.lnum, curwin->w_cursor.col); 8459 8460 line = ml_get_curline(); 8461 8462 /* Get the list of suggestions. Limit to 'lines' - 2 or the number in 8463 * 'spellsuggest', whatever is smaller. */ 8464 if (sps_limit > (int)Rows - 2) 8465 limit = (int)Rows - 2; 8466 else 8467 limit = sps_limit; 8468 spell_find_suggest(line + curwin->w_cursor.col, &sug, limit, 8469 TRUE, need_cap); 8470 8471 if (sug.su_ga.ga_len == 0) 8472 MSG(_("Sorry, no suggestions")); 8473 else if (count > 0) 8474 { 8475 if (count > sug.su_ga.ga_len) 8476 smsg((char_u *)_("Sorry, only %ld suggestions"), 8477 (long)sug.su_ga.ga_len); 8478 } 8479 else 8480 { 8481 vim_free(repl_from); 8482 repl_from = NULL; 8483 vim_free(repl_to); 8484 repl_to = NULL; 8485 8486 #ifdef FEAT_RIGHTLEFT 8487 /* When 'rightleft' is set the list is drawn right-left. */ 8488 cmdmsg_rl = curwin->w_p_rl; 8489 if (cmdmsg_rl) 8490 msg_col = Columns - 1; 8491 #endif 8492 8493 /* List the suggestions. */ 8494 msg_start(); 8495 lines_left = Rows; /* avoid more prompt */ 8496 vim_snprintf((char *)IObuff, IOSIZE, _("Change \"%.*s\" to:"), 8497 sug.su_badlen, sug.su_badptr); 8498 #ifdef FEAT_RIGHTLEFT 8499 if (cmdmsg_rl && STRNCMP(IObuff, "Change", 6) == 0) 8500 { 8501 /* And now the rabbit from the high hat: Avoid showing the 8502 * untranslated message rightleft. */ 8503 vim_snprintf((char *)IObuff, IOSIZE, ":ot \"%.*s\" egnahC", 8504 sug.su_badlen, sug.su_badptr); 8505 } 8506 #endif 8507 msg_puts(IObuff); 8508 msg_clr_eos(); 8509 msg_putchar('\n'); 8510 8511 msg_scroll = TRUE; 8512 for (i = 0; i < sug.su_ga.ga_len; ++i) 8513 { 8514 stp = &SUG(sug.su_ga, i); 8515 8516 /* The suggested word may replace only part of the bad word, add 8517 * the not replaced part. */ 8518 STRCPY(wcopy, stp->st_word); 8519 if (sug.su_badlen > stp->st_orglen) 8520 vim_strncpy(wcopy + STRLEN(wcopy), 8521 sug.su_badptr + stp->st_orglen, 8522 sug.su_badlen - stp->st_orglen); 8523 vim_snprintf((char *)IObuff, IOSIZE, "%2d", i + 1); 8524 #ifdef FEAT_RIGHTLEFT 8525 if (cmdmsg_rl) 8526 rl_mirror(IObuff); 8527 #endif 8528 msg_puts(IObuff); 8529 8530 vim_snprintf((char *)IObuff, IOSIZE, " \"%s\"", wcopy); 8531 msg_puts(IObuff); 8532 8533 /* The word may replace more than "su_badlen". */ 8534 if (sug.su_badlen < stp->st_orglen) 8535 { 8536 vim_snprintf((char *)IObuff, IOSIZE, _(" < \"%.*s\""), 8537 stp->st_orglen, sug.su_badptr); 8538 msg_puts(IObuff); 8539 } 8540 8541 if (p_verbose > 0) 8542 { 8543 /* Add the score. */ 8544 if (sps_flags & (SPS_DOUBLE | SPS_BEST)) 8545 vim_snprintf((char *)IObuff, IOSIZE, " (%s%d - %d)", 8546 stp->st_salscore ? "s " : "", 8547 stp->st_score, stp->st_altscore); 8548 else 8549 vim_snprintf((char *)IObuff, IOSIZE, " (%d)", 8550 stp->st_score); 8551 #ifdef FEAT_RIGHTLEFT 8552 if (cmdmsg_rl) 8553 /* Mirror the numbers, but keep the leading space. */ 8554 rl_mirror(IObuff + 1); 8555 #endif 8556 msg_advance(30); 8557 msg_puts(IObuff); 8558 } 8559 msg_putchar('\n'); 8560 } 8561 8562 #ifdef FEAT_RIGHTLEFT 8563 cmdmsg_rl = FALSE; 8564 msg_col = 0; 8565 #endif 8566 /* Ask for choice. */ 8567 selected = prompt_for_number(&mouse_used); 8568 if (mouse_used) 8569 selected -= lines_left; 8570 } 8571 8572 if (selected > 0 && selected <= sug.su_ga.ga_len && u_save_cursor() == OK) 8573 { 8574 /* Save the from and to text for :spellrepall. */ 8575 stp = &SUG(sug.su_ga, selected - 1); 8576 if (sug.su_badlen > stp->st_orglen) 8577 { 8578 /* Replacing less than "su_badlen", append the remainder to 8579 * repl_to. */ 8580 repl_from = vim_strnsave(sug.su_badptr, sug.su_badlen); 8581 vim_snprintf((char *)IObuff, IOSIZE, "%s%.*s", stp->st_word, 8582 sug.su_badlen - stp->st_orglen, 8583 sug.su_badptr + stp->st_orglen); 8584 repl_to = vim_strsave(IObuff); 8585 } 8586 else 8587 { 8588 /* Replacing su_badlen or more, use the whole word. */ 8589 repl_from = vim_strnsave(sug.su_badptr, stp->st_orglen); 8590 repl_to = vim_strsave(stp->st_word); 8591 } 8592 8593 /* Replace the word. */ 8594 p = alloc(STRLEN(line) - stp->st_orglen + STRLEN(stp->st_word) + 1); 8595 if (p != NULL) 8596 { 8597 c = sug.su_badptr - line; 8598 mch_memmove(p, line, c); 8599 STRCPY(p + c, stp->st_word); 8600 STRCAT(p, sug.su_badptr + stp->st_orglen); 8601 ml_replace(curwin->w_cursor.lnum, p, FALSE); 8602 curwin->w_cursor.col = c; 8603 changed_bytes(curwin->w_cursor.lnum, c); 8604 8605 /* For redo we use a change-word command. */ 8606 ResetRedobuff(); 8607 AppendToRedobuff((char_u *)"ciw"); 8608 AppendToRedobuff(stp->st_word); 8609 AppendCharToRedobuff(ESC); 8610 } 8611 } 8612 else 8613 curwin->w_cursor = prev_cursor; 8614 8615 spell_find_cleanup(&sug); 8616 } 8617 8618 /* 8619 * Check if the word at line "lnum" column "col" is required to start with a 8620 * capital. This uses 'spellcapcheck' of the current buffer. 8621 */ 8622 static int 8623 check_need_cap(lnum, col) 8624 linenr_T lnum; 8625 colnr_T col; 8626 { 8627 int need_cap = FALSE; 8628 char_u *line; 8629 char_u *line_copy = NULL; 8630 char_u *p; 8631 colnr_T endcol; 8632 regmatch_T regmatch; 8633 8634 if (curbuf->b_cap_prog == NULL) 8635 return FALSE; 8636 8637 line = ml_get_curline(); 8638 endcol = 0; 8639 if ((int)(skipwhite(line) - line) >= (int)col) 8640 { 8641 /* At start of line, check if previous line is empty or sentence 8642 * ends there. */ 8643 if (lnum == 1) 8644 need_cap = TRUE; 8645 else 8646 { 8647 line = ml_get(lnum - 1); 8648 if (*skipwhite(line) == NUL) 8649 need_cap = TRUE; 8650 else 8651 { 8652 /* Append a space in place of the line break. */ 8653 line_copy = concat_str(line, (char_u *)" "); 8654 line = line_copy; 8655 endcol = STRLEN(line); 8656 } 8657 } 8658 } 8659 else 8660 endcol = col; 8661 8662 if (endcol > 0) 8663 { 8664 /* Check if sentence ends before the bad word. */ 8665 regmatch.regprog = curbuf->b_cap_prog; 8666 regmatch.rm_ic = FALSE; 8667 p = line + endcol; 8668 for (;;) 8669 { 8670 mb_ptr_back(line, p); 8671 if (p == line || spell_iswordp_nmw(p)) 8672 break; 8673 if (vim_regexec(®match, p, 0) 8674 && regmatch.endp[0] == line + endcol) 8675 { 8676 need_cap = TRUE; 8677 break; 8678 } 8679 } 8680 } 8681 8682 vim_free(line_copy); 8683 8684 return need_cap; 8685 } 8686 8687 8688 /* 8689 * ":spellrepall" 8690 */ 8691 /*ARGSUSED*/ 8692 void 8693 ex_spellrepall(eap) 8694 exarg_T *eap; 8695 { 8696 pos_T pos = curwin->w_cursor; 8697 char_u *frompat; 8698 int addlen; 8699 char_u *line; 8700 char_u *p; 8701 int save_ws = p_ws; 8702 linenr_T prev_lnum = 0; 8703 8704 if (repl_from == NULL || repl_to == NULL) 8705 { 8706 EMSG(_("E752: No previous spell replacement")); 8707 return; 8708 } 8709 addlen = STRLEN(repl_to) - STRLEN(repl_from); 8710 8711 frompat = alloc(STRLEN(repl_from) + 7); 8712 if (frompat == NULL) 8713 return; 8714 sprintf((char *)frompat, "\\V\\<%s\\>", repl_from); 8715 p_ws = FALSE; 8716 8717 sub_nsubs = 0; 8718 sub_nlines = 0; 8719 curwin->w_cursor.lnum = 0; 8720 while (!got_int) 8721 { 8722 if (do_search(NULL, '/', frompat, 1L, SEARCH_KEEP) == 0 8723 || u_save_cursor() == FAIL) 8724 break; 8725 8726 /* Only replace when the right word isn't there yet. This happens 8727 * when changing "etc" to "etc.". */ 8728 line = ml_get_curline(); 8729 if (addlen <= 0 || STRNCMP(line + curwin->w_cursor.col, 8730 repl_to, STRLEN(repl_to)) != 0) 8731 { 8732 p = alloc(STRLEN(line) + addlen + 1); 8733 if (p == NULL) 8734 break; 8735 mch_memmove(p, line, curwin->w_cursor.col); 8736 STRCPY(p + curwin->w_cursor.col, repl_to); 8737 STRCAT(p, line + curwin->w_cursor.col + STRLEN(repl_from)); 8738 ml_replace(curwin->w_cursor.lnum, p, FALSE); 8739 changed_bytes(curwin->w_cursor.lnum, curwin->w_cursor.col); 8740 8741 if (curwin->w_cursor.lnum != prev_lnum) 8742 { 8743 ++sub_nlines; 8744 prev_lnum = curwin->w_cursor.lnum; 8745 } 8746 ++sub_nsubs; 8747 } 8748 curwin->w_cursor.col += STRLEN(repl_to); 8749 } 8750 8751 p_ws = save_ws; 8752 curwin->w_cursor = pos; 8753 vim_free(frompat); 8754 8755 if (sub_nsubs == 0) 8756 EMSG2(_("E753: Not found: %s"), repl_from); 8757 else 8758 do_sub_msg(FALSE); 8759 } 8760 8761 /* 8762 * Find spell suggestions for "word". Return them in the growarray "*gap" as 8763 * a list of allocated strings. 8764 */ 8765 void 8766 spell_suggest_list(gap, word, maxcount, need_cap) 8767 garray_T *gap; 8768 char_u *word; 8769 int maxcount; /* maximum nr of suggestions */ 8770 int need_cap; /* 'spellcapcheck' matched */ 8771 { 8772 suginfo_T sug; 8773 int i; 8774 suggest_T *stp; 8775 char_u *wcopy; 8776 8777 spell_find_suggest(word, &sug, maxcount, FALSE, need_cap); 8778 8779 /* Make room in "gap". */ 8780 ga_init2(gap, sizeof(char_u *), sug.su_ga.ga_len + 1); 8781 if (ga_grow(gap, sug.su_ga.ga_len) == FAIL) 8782 return; 8783 8784 for (i = 0; i < sug.su_ga.ga_len; ++i) 8785 { 8786 stp = &SUG(sug.su_ga, i); 8787 8788 /* The suggested word may replace only part of "word", add the not 8789 * replaced part. */ 8790 wcopy = alloc(STRLEN(stp->st_word) 8791 + STRLEN(sug.su_badptr + stp->st_orglen) + 1); 8792 if (wcopy == NULL) 8793 break; 8794 STRCPY(wcopy, stp->st_word); 8795 STRCAT(wcopy, sug.su_badptr + stp->st_orglen); 8796 ((char_u **)gap->ga_data)[gap->ga_len++] = wcopy; 8797 } 8798 8799 spell_find_cleanup(&sug); 8800 } 8801 8802 /* 8803 * Find spell suggestions for the word at the start of "badptr". 8804 * Return the suggestions in "su->su_ga". 8805 * The maximum number of suggestions is "maxcount". 8806 * Note: does use info for the current window. 8807 * This is based on the mechanisms of Aspell, but completely reimplemented. 8808 */ 8809 static void 8810 spell_find_suggest(badptr, su, maxcount, banbadword, need_cap) 8811 char_u *badptr; 8812 suginfo_T *su; 8813 int maxcount; 8814 int banbadword; /* don't include badword in suggestions */ 8815 int need_cap; /* word should start with capital */ 8816 { 8817 hlf_T attr = HLF_COUNT; 8818 char_u buf[MAXPATHL]; 8819 char_u *p; 8820 int do_combine = FALSE; 8821 char_u *sps_copy; 8822 #ifdef FEAT_EVAL 8823 static int expr_busy = FALSE; 8824 #endif 8825 int c; 8826 int i; 8827 langp_T *lp; 8828 8829 /* 8830 * Set the info in "*su". 8831 */ 8832 vim_memset(su, 0, sizeof(suginfo_T)); 8833 ga_init2(&su->su_ga, (int)sizeof(suggest_T), 10); 8834 ga_init2(&su->su_sga, (int)sizeof(suggest_T), 10); 8835 if (*badptr == NUL) 8836 return; 8837 hash_init(&su->su_banned); 8838 8839 su->su_badptr = badptr; 8840 su->su_badlen = spell_check(curwin, su->su_badptr, &attr, NULL); 8841 su->su_maxcount = maxcount; 8842 su->su_maxscore = SCORE_MAXINIT; 8843 8844 if (su->su_badlen >= MAXWLEN) 8845 su->su_badlen = MAXWLEN - 1; /* just in case */ 8846 vim_strncpy(su->su_badword, su->su_badptr, su->su_badlen); 8847 (void)spell_casefold(su->su_badptr, su->su_badlen, 8848 su->su_fbadword, MAXWLEN); 8849 /* get caps flags for bad word */ 8850 su->su_badflags = badword_captype(su->su_badptr, 8851 su->su_badptr + su->su_badlen); 8852 if (need_cap) 8853 su->su_badflags |= WF_ONECAP; 8854 8855 /* Find the default language for sound folding. We simply use the first 8856 * one in 'spelllang' that supports sound folding. That's good for when 8857 * using multiple files for one language, it's not that bad when mixing 8858 * languages (e.g., "pl,en"). */ 8859 for (i = 0; i < curbuf->b_langp.ga_len; ++i) 8860 { 8861 lp = LANGP_ENTRY(curbuf->b_langp, i); 8862 if (lp->lp_sallang != NULL) 8863 { 8864 su->su_sallang = lp->lp_sallang; 8865 break; 8866 } 8867 } 8868 8869 /* Soundfold the bad word with the default sound folding, so that we don't 8870 * have to do this many times. */ 8871 if (su->su_sallang != NULL) 8872 spell_soundfold(su->su_sallang, su->su_fbadword, TRUE, 8873 su->su_sal_badword); 8874 8875 /* If the word is not capitalised and spell_check() doesn't consider the 8876 * word to be bad then it might need to be capitalised. Add a suggestion 8877 * for that. */ 8878 c = PTR2CHAR(su->su_badptr); 8879 if (!SPELL_ISUPPER(c) && attr == HLF_COUNT) 8880 { 8881 make_case_word(su->su_badword, buf, WF_ONECAP); 8882 add_suggestion(su, &su->su_ga, buf, su->su_badlen, SCORE_ICASE, 8883 0, TRUE, su->su_sallang); 8884 } 8885 8886 /* Ban the bad word itself. It may appear in another region. */ 8887 if (banbadword) 8888 add_banned(su, su->su_badword); 8889 8890 /* Make a copy of 'spellsuggest', because the expression may change it. */ 8891 sps_copy = vim_strsave(p_sps); 8892 if (sps_copy == NULL) 8893 return; 8894 8895 /* Loop over the items in 'spellsuggest'. */ 8896 for (p = sps_copy; *p != NUL; ) 8897 { 8898 copy_option_part(&p, buf, MAXPATHL, ","); 8899 8900 if (STRNCMP(buf, "expr:", 5) == 0) 8901 { 8902 #ifdef FEAT_EVAL 8903 /* Evaluate an expression. Skip this when called recursively, 8904 * when using spellsuggest() in the expression. */ 8905 if (!expr_busy) 8906 { 8907 expr_busy = TRUE; 8908 spell_suggest_expr(su, buf + 5); 8909 expr_busy = FALSE; 8910 } 8911 #endif 8912 } 8913 else if (STRNCMP(buf, "file:", 5) == 0) 8914 /* Use list of suggestions in a file. */ 8915 spell_suggest_file(su, buf + 5); 8916 else 8917 { 8918 /* Use internal method. */ 8919 spell_suggest_intern(su); 8920 if (sps_flags & SPS_DOUBLE) 8921 do_combine = TRUE; 8922 } 8923 } 8924 8925 vim_free(sps_copy); 8926 8927 if (do_combine) 8928 /* Combine the two list of suggestions. This must be done last, 8929 * because sorting changes the order again. */ 8930 score_combine(su); 8931 } 8932 8933 #ifdef FEAT_EVAL 8934 /* 8935 * Find suggestions by evaluating expression "expr". 8936 */ 8937 static void 8938 spell_suggest_expr(su, expr) 8939 suginfo_T *su; 8940 char_u *expr; 8941 { 8942 list_T *list; 8943 listitem_T *li; 8944 int score; 8945 char_u *p; 8946 8947 /* The work is split up in a few parts to avoid having to export 8948 * suginfo_T. 8949 * First evaluate the expression and get the resulting list. */ 8950 list = eval_spell_expr(su->su_badword, expr); 8951 if (list != NULL) 8952 { 8953 /* Loop over the items in the list. */ 8954 for (li = list->lv_first; li != NULL; li = li->li_next) 8955 if (li->li_tv.v_type == VAR_LIST) 8956 { 8957 /* Get the word and the score from the items. */ 8958 score = get_spellword(li->li_tv.vval.v_list, &p); 8959 if (score >= 0) 8960 add_suggestion(su, &su->su_ga, p, 8961 su->su_badlen, score, 0, TRUE, su->su_sallang); 8962 } 8963 list_unref(list); 8964 } 8965 8966 /* Sort the suggestions and truncate at "maxcount". */ 8967 (void)cleanup_suggestions(&su->su_ga, su->su_maxscore, su->su_maxcount); 8968 } 8969 #endif 8970 8971 /* 8972 * Find suggestions in file "fname". Used for "file:" in 'spellsuggest'. 8973 */ 8974 static void 8975 spell_suggest_file(su, fname) 8976 suginfo_T *su; 8977 char_u *fname; 8978 { 8979 FILE *fd; 8980 char_u line[MAXWLEN * 2]; 8981 char_u *p; 8982 int len; 8983 char_u cword[MAXWLEN]; 8984 8985 /* Open the file. */ 8986 fd = mch_fopen((char *)fname, "r"); 8987 if (fd == NULL) 8988 { 8989 EMSG2(_(e_notopen), fname); 8990 return; 8991 } 8992 8993 /* Read it line by line. */ 8994 while (!vim_fgets(line, MAXWLEN * 2, fd) && !got_int) 8995 { 8996 line_breakcheck(); 8997 8998 p = vim_strchr(line, '/'); 8999 if (p == NULL) 9000 continue; /* No Tab found, just skip the line. */ 9001 *p++ = NUL; 9002 if (STRICMP(su->su_badword, line) == 0) 9003 { 9004 /* Match! Isolate the good word, until CR or NL. */ 9005 for (len = 0; p[len] >= ' '; ++len) 9006 ; 9007 p[len] = NUL; 9008 9009 /* If the suggestion doesn't have specific case duplicate the case 9010 * of the bad word. */ 9011 if (captype(p, NULL) == 0) 9012 { 9013 make_case_word(p, cword, su->su_badflags); 9014 p = cword; 9015 } 9016 9017 add_suggestion(su, &su->su_ga, p, su->su_badlen, 9018 SCORE_FILE, 0, TRUE, su->su_sallang); 9019 } 9020 } 9021 9022 fclose(fd); 9023 9024 /* Sort the suggestions and truncate at "maxcount". */ 9025 (void)cleanup_suggestions(&su->su_ga, su->su_maxscore, su->su_maxcount); 9026 } 9027 9028 /* 9029 * Find suggestions for the internal method indicated by "sps_flags". 9030 */ 9031 static void 9032 spell_suggest_intern(su) 9033 suginfo_T *su; 9034 { 9035 /* 9036 * 1. Try special cases, such as repeating a word: "the the" -> "the". 9037 * 9038 * Set a maximum score to limit the combination of operations that is 9039 * tried. 9040 */ 9041 suggest_try_special(su); 9042 9043 /* 9044 * 2. Try inserting/deleting/swapping/changing a letter, use REP entries 9045 * from the .aff file and inserting a space (split the word). 9046 */ 9047 suggest_try_change(su); 9048 9049 /* For the resulting top-scorers compute the sound-a-like score. */ 9050 if (sps_flags & SPS_DOUBLE) 9051 score_comp_sal(su); 9052 9053 /* 9054 * 3. Try finding sound-a-like words. 9055 * 9056 * Only do this when we don't have a lot of suggestions yet, because it's 9057 * very slow and often doesn't find new suggestions. 9058 */ 9059 if ((sps_flags & SPS_DOUBLE) 9060 || (!(sps_flags & SPS_FAST) 9061 && su->su_ga.ga_len < SUG_CLEAN_COUNT(su))) 9062 { 9063 /* Allow a higher score now. */ 9064 su->su_maxscore = SCORE_MAXMAX; 9065 suggest_try_soundalike(su); 9066 } 9067 9068 /* When CTRL-C was hit while searching do show the results. */ 9069 ui_breakcheck(); 9070 if (got_int) 9071 { 9072 (void)vgetc(); 9073 got_int = FALSE; 9074 } 9075 9076 if ((sps_flags & SPS_DOUBLE) == 0 && su->su_ga.ga_len != 0) 9077 { 9078 if (sps_flags & SPS_BEST) 9079 /* Adjust the word score for how it sounds like. */ 9080 rescore_suggestions(su); 9081 9082 /* Sort the suggestions and truncate at "maxcount". */ 9083 (void)cleanup_suggestions(&su->su_ga, su->su_maxscore, su->su_maxcount); 9084 } 9085 } 9086 9087 /* 9088 * Free the info put in "*su" by spell_find_suggest(). 9089 */ 9090 static void 9091 spell_find_cleanup(su) 9092 suginfo_T *su; 9093 { 9094 int i; 9095 9096 /* Free the suggestions. */ 9097 for (i = 0; i < su->su_ga.ga_len; ++i) 9098 vim_free(SUG(su->su_ga, i).st_word); 9099 ga_clear(&su->su_ga); 9100 for (i = 0; i < su->su_sga.ga_len; ++i) 9101 vim_free(SUG(su->su_sga, i).st_word); 9102 ga_clear(&su->su_sga); 9103 9104 /* Free the banned words. */ 9105 free_banned(su); 9106 } 9107 9108 /* 9109 * Make a copy of "word", with the first letter upper or lower cased, to 9110 * "wcopy[MAXWLEN]". "word" must not be empty. 9111 * The result is NUL terminated. 9112 */ 9113 static void 9114 onecap_copy(word, wcopy, upper) 9115 char_u *word; 9116 char_u *wcopy; 9117 int upper; /* TRUE: first letter made upper case */ 9118 { 9119 char_u *p; 9120 int c; 9121 int l; 9122 9123 p = word; 9124 #ifdef FEAT_MBYTE 9125 if (has_mbyte) 9126 c = mb_cptr2char_adv(&p); 9127 else 9128 #endif 9129 c = *p++; 9130 if (upper) 9131 c = SPELL_TOUPPER(c); 9132 else 9133 c = SPELL_TOFOLD(c); 9134 #ifdef FEAT_MBYTE 9135 if (has_mbyte) 9136 l = mb_char2bytes(c, wcopy); 9137 else 9138 #endif 9139 { 9140 l = 1; 9141 wcopy[0] = c; 9142 } 9143 vim_strncpy(wcopy + l, p, MAXWLEN - l - 1); 9144 } 9145 9146 /* 9147 * Make a copy of "word" with all the letters upper cased into 9148 * "wcopy[MAXWLEN]". The result is NUL terminated. 9149 */ 9150 static void 9151 allcap_copy(word, wcopy) 9152 char_u *word; 9153 char_u *wcopy; 9154 { 9155 char_u *s; 9156 char_u *d; 9157 int c; 9158 9159 d = wcopy; 9160 for (s = word; *s != NUL; ) 9161 { 9162 #ifdef FEAT_MBYTE 9163 if (has_mbyte) 9164 c = mb_cptr2char_adv(&s); 9165 else 9166 #endif 9167 c = *s++; 9168 9169 #ifdef FEAT_MBYTE 9170 /* We only change � to SS when we are certain latin1 is used. It 9171 * would cause weird errors in other 8-bit encodings. */ 9172 if (enc_latin1like && c == 0xdf) 9173 { 9174 c = 'S'; 9175 if (d - wcopy >= MAXWLEN - 1) 9176 break; 9177 *d++ = c; 9178 } 9179 else 9180 #endif 9181 c = SPELL_TOUPPER(c); 9182 9183 #ifdef FEAT_MBYTE 9184 if (has_mbyte) 9185 { 9186 if (d - wcopy >= MAXWLEN - MB_MAXBYTES) 9187 break; 9188 d += mb_char2bytes(c, d); 9189 } 9190 else 9191 #endif 9192 { 9193 if (d - wcopy >= MAXWLEN - 1) 9194 break; 9195 *d++ = c; 9196 } 9197 } 9198 *d = NUL; 9199 } 9200 9201 /* 9202 * Try finding suggestions by recognizing specific situations. 9203 */ 9204 static void 9205 suggest_try_special(su) 9206 suginfo_T *su; 9207 { 9208 char_u *p; 9209 size_t len; 9210 int c; 9211 char_u word[MAXWLEN]; 9212 9213 /* 9214 * Recognize a word that is repeated: "the the". 9215 */ 9216 p = skiptowhite(su->su_fbadword); 9217 len = p - su->su_fbadword; 9218 p = skipwhite(p); 9219 if (STRLEN(p) == len && STRNCMP(su->su_fbadword, p, len) == 0) 9220 { 9221 /* Include badflags: if the badword is onecap or allcap 9222 * use that for the goodword too: "The the" -> "The". */ 9223 c = su->su_fbadword[len]; 9224 su->su_fbadword[len] = NUL; 9225 make_case_word(su->su_fbadword, word, su->su_badflags); 9226 su->su_fbadword[len] = c; 9227 9228 /* Give a soundalike score of 0, compute the score as if deleting one 9229 * character. */ 9230 add_suggestion(su, &su->su_ga, word, su->su_badlen, 9231 RESCORE(SCORE_REP, 0), 0, TRUE, su->su_sallang); 9232 } 9233 } 9234 9235 /* 9236 * Try finding suggestions by adding/removing/swapping letters. 9237 * 9238 * This uses a state machine. At each node in the tree we try various 9239 * operations. When trying if an operation work "depth" is increased and the 9240 * stack[] is used to store info. This allows combinations, thus insert one 9241 * character, replace one and delete another. The number of changes is 9242 * limited by su->su_maxscore, checked in try_deeper(). 9243 * 9244 * After implementing this I noticed an article by Kemal Oflazer that 9245 * describes something similar: "Error-tolerant Finite State Recognition with 9246 * Applications to Morphological Analysis and Spelling Correction" (1996). 9247 * The implementation in the article is simplified and requires a stack of 9248 * unknown depth. The implementation here only needs a stack depth of the 9249 * length of the word. 9250 */ 9251 static void 9252 suggest_try_change(su) 9253 suginfo_T *su; 9254 { 9255 char_u fword[MAXWLEN]; /* copy of the bad word, case-folded */ 9256 char_u tword[MAXWLEN]; /* good word collected so far */ 9257 trystate_T stack[MAXWLEN]; 9258 char_u preword[MAXWLEN * 3]; /* word found with proper case; 9259 * concatanation of prefix compound 9260 * words and split word. NUL terminated 9261 * when going deeper but not when coming 9262 * back. */ 9263 char_u compflags[MAXWLEN]; /* compound flags, one for each word */ 9264 trystate_T *sp; 9265 int newscore; 9266 langp_T *lp; 9267 char_u *byts, *fbyts, *pbyts; 9268 idx_T *idxs, *fidxs, *pidxs; 9269 int depth; 9270 int c, c2, c3; 9271 int n; 9272 int flags; 9273 garray_T *gap; 9274 idx_T arridx; 9275 int len; 9276 char_u *p; 9277 fromto_T *ftp; 9278 int fl = 0, tl; 9279 int repextra = 0; /* extra bytes in fword[] from REP item */ 9280 slang_T *slang; 9281 int fword_ends; 9282 int lpi; 9283 int maysplit; 9284 int goodword_ends; 9285 9286 /* We make a copy of the case-folded bad word, so that we can modify it 9287 * to find matches (esp. REP items). Append some more text, changing 9288 * chars after the bad word may help. */ 9289 STRCPY(fword, su->su_fbadword); 9290 n = STRLEN(fword); 9291 p = su->su_badptr + su->su_badlen; 9292 (void)spell_casefold(p, STRLEN(p), fword + n, MAXWLEN - n); 9293 9294 for (lpi = 0; lpi < curbuf->b_langp.ga_len; ++lpi) 9295 { 9296 lp = LANGP_ENTRY(curbuf->b_langp, lpi); 9297 slang = lp->lp_slang; 9298 9299 /* If reloading a spell file fails it's still in the list but 9300 * everything has been cleared. */ 9301 if (slang->sl_fbyts == NULL) 9302 continue; 9303 9304 /* 9305 * Go through the whole case-fold tree, try changes at each node. 9306 * "tword[]" contains the word collected from nodes in the tree. 9307 * "fword[]" the word we are trying to match with (initially the bad 9308 * word). 9309 */ 9310 depth = 0; 9311 sp = &stack[0]; 9312 vim_memset(sp, 0, sizeof(trystate_T)); 9313 sp->ts_curi = 1; 9314 9315 /* 9316 * When there are postponed prefixes we need to use these first. At 9317 * the end of the prefix we continue in the case-fold tree. 9318 */ 9319 fbyts = slang->sl_fbyts; 9320 fidxs = slang->sl_fidxs; 9321 pbyts = slang->sl_pbyts; 9322 pidxs = slang->sl_pidxs; 9323 if (pbyts != NULL) 9324 { 9325 byts = pbyts; 9326 idxs = pidxs; 9327 sp->ts_prefixdepth = PFD_PREFIXTREE; 9328 sp->ts_state = STATE_NOPREFIX; /* try without prefix first */ 9329 } 9330 else 9331 { 9332 byts = fbyts; 9333 idxs = fidxs; 9334 sp->ts_prefixdepth = PFD_NOPREFIX; 9335 sp->ts_state = STATE_START; 9336 } 9337 9338 /* 9339 * Loop to find all suggestions. At each round we either: 9340 * - For the current state try one operation, advance "ts_curi", 9341 * increase "depth". 9342 * - When a state is done go to the next, set "ts_state". 9343 * - When all states are tried decrease "depth". 9344 */ 9345 while (depth >= 0 && !got_int) 9346 { 9347 sp = &stack[depth]; 9348 switch (sp->ts_state) 9349 { 9350 case STATE_START: 9351 case STATE_NOPREFIX: 9352 /* 9353 * Start of node: Deal with NUL bytes, which means 9354 * tword[] may end here. 9355 */ 9356 arridx = sp->ts_arridx; /* current node in the tree */ 9357 len = byts[arridx]; /* bytes in this node */ 9358 arridx += sp->ts_curi; /* index of current byte */ 9359 9360 if (sp->ts_prefixdepth == PFD_PREFIXTREE) 9361 { 9362 /* Skip over the NUL bytes, we use them later. */ 9363 for (n = 0; n < len && byts[arridx + n] == 0; ++n) 9364 ; 9365 sp->ts_curi += n; 9366 9367 /* Always past NUL bytes now. */ 9368 n = (int)sp->ts_state; 9369 sp->ts_state = STATE_ENDNUL; 9370 sp->ts_save_badflags = su->su_badflags; 9371 9372 /* At end of a prefix or at start of prefixtree: check for 9373 * following word. */ 9374 if (byts[arridx] == 0 || n == (int)STATE_NOPREFIX) 9375 { 9376 /* Set su->su_badflags to the caps type at this 9377 * position. Use the caps type until here for the 9378 * prefix itself. */ 9379 #ifdef FEAT_MBYTE 9380 if (has_mbyte) 9381 n = nofold_len(fword, sp->ts_fidx, su->su_badptr); 9382 else 9383 #endif 9384 n = sp->ts_fidx; 9385 flags = badword_captype(su->su_badptr, 9386 su->su_badptr + n); 9387 su->su_badflags = badword_captype(su->su_badptr + n, 9388 su->su_badptr + su->su_badlen); 9389 ++depth; 9390 stack[depth] = stack[depth - 1]; 9391 sp = &stack[depth]; 9392 sp->ts_prefixdepth = depth - 1; 9393 byts = fbyts; 9394 idxs = fidxs; 9395 sp->ts_state = STATE_START; 9396 sp->ts_curi = 1; /* start just after length byte */ 9397 sp->ts_arridx = 0; 9398 9399 /* Move the prefix to preword[] with the right case 9400 * and make find_keepcap_word() works. */ 9401 tword[sp->ts_twordlen] = NUL; 9402 make_case_word(tword + sp->ts_splitoff, 9403 preword + sp->ts_prewordlen, 9404 flags); 9405 sp->ts_prewordlen = STRLEN(preword); 9406 sp->ts_splitoff = sp->ts_twordlen; 9407 } 9408 break; 9409 } 9410 9411 if (sp->ts_curi > len || byts[arridx] != 0) 9412 { 9413 /* Past bytes in node and/or past NUL bytes. */ 9414 sp->ts_state = STATE_ENDNUL; 9415 sp->ts_save_badflags = su->su_badflags; 9416 break; 9417 } 9418 9419 /* 9420 * End of word in tree. 9421 */ 9422 ++sp->ts_curi; /* eat one NUL byte */ 9423 9424 flags = (int)idxs[arridx]; 9425 fword_ends = (fword[sp->ts_fidx] == NUL 9426 || !spell_iswordp(fword + sp->ts_fidx, curbuf)); 9427 tword[sp->ts_twordlen] = NUL; 9428 9429 if (sp->ts_prefixdepth <= PFD_NOTSPECIAL 9430 && (sp->ts_flags & TSF_PREFIXOK) == 0) 9431 { 9432 /* There was a prefix before the word. Check that the 9433 * prefix can be used with this word. */ 9434 /* Count the length of the NULs in the prefix. If there 9435 * are none this must be the first try without a prefix. 9436 */ 9437 n = stack[sp->ts_prefixdepth].ts_arridx; 9438 len = pbyts[n++]; 9439 for (c = 0; c < len && pbyts[n + c] == 0; ++c) 9440 ; 9441 if (c > 0) 9442 { 9443 c = valid_word_prefix(c, n, flags, 9444 tword + sp->ts_splitoff, slang, FALSE); 9445 if (c == 0) 9446 break; 9447 9448 /* Use the WF_RARE flag for a rare prefix. */ 9449 if (c & WF_RAREPFX) 9450 flags |= WF_RARE; 9451 9452 /* Tricky: when checking for both prefix and 9453 * compounding we run into the prefix flag first. 9454 * Remember that it's OK, so that we accept the prefix 9455 * when arriving at a compound flag. */ 9456 sp->ts_flags |= TSF_PREFIXOK; 9457 } 9458 } 9459 9460 /* Check NEEDCOMPOUND: can't use word without compounding. Do 9461 * try appending another compound word below. */ 9462 if (sp->ts_complen == sp->ts_compsplit && fword_ends 9463 && (flags & WF_NEEDCOMP)) 9464 goodword_ends = FALSE; 9465 else 9466 goodword_ends = TRUE; 9467 9468 if (sp->ts_complen > sp->ts_compsplit) 9469 { 9470 if (slang->sl_nobreak) 9471 { 9472 /* There was a word before this word. When there was 9473 * no change in this word (it was correct) add the 9474 * first word as a suggestion. If this word was 9475 * corrected too, we need to check if a correct word 9476 * follows. */ 9477 if (sp->ts_fidx - sp->ts_splitfidx 9478 == sp->ts_twordlen - sp->ts_splitoff 9479 && STRNCMP(fword + sp->ts_splitfidx, 9480 tword + sp->ts_splitoff, 9481 sp->ts_fidx - sp->ts_splitfidx) == 0) 9482 { 9483 preword[sp->ts_prewordlen] = NUL; 9484 add_suggestion(su, &su->su_ga, preword, 9485 sp->ts_splitfidx - repextra, 9486 sp->ts_score, 0, FALSE, 9487 lp->lp_sallang); 9488 break; 9489 } 9490 } 9491 else 9492 { 9493 /* There was a compound word before this word. If 9494 * this word does not support compounding then give up 9495 * (splitting is tried for the word without compound 9496 * flag). */ 9497 if (((unsigned)flags >> 24) == 0 9498 || sp->ts_twordlen - sp->ts_splitoff 9499 < slang->sl_compminlen) 9500 break; 9501 #ifdef FEAT_MBYTE 9502 /* For multi-byte chars check character length against 9503 * COMPOUNDMIN. */ 9504 if (has_mbyte 9505 && slang->sl_compminlen > 0 9506 && mb_charlen(tword + sp->ts_splitoff) 9507 < slang->sl_compminlen) 9508 break; 9509 #endif 9510 9511 compflags[sp->ts_complen] = ((unsigned)flags >> 24); 9512 compflags[sp->ts_complen + 1] = NUL; 9513 vim_strncpy(preword + sp->ts_prewordlen, 9514 tword + sp->ts_splitoff, 9515 sp->ts_twordlen - sp->ts_splitoff); 9516 p = preword; 9517 while (*skiptowhite(p) != NUL) 9518 p = skipwhite(skiptowhite(p)); 9519 if (fword_ends && !can_compound(slang, p, 9520 compflags + sp->ts_compsplit)) 9521 break; 9522 9523 /* Get pointer to last char of previous word. */ 9524 p = preword + sp->ts_prewordlen; 9525 mb_ptr_back(preword, p); 9526 } 9527 } 9528 else 9529 p = NULL; 9530 9531 /* 9532 * Form the word with proper case in preword. 9533 * If there is a word from a previous split, append. 9534 */ 9535 if (flags & WF_KEEPCAP) 9536 /* Must find the word in the keep-case tree. */ 9537 find_keepcap_word(slang, tword + sp->ts_splitoff, 9538 preword + sp->ts_prewordlen); 9539 else 9540 { 9541 /* Include badflags: if the badword is onecap or allcap 9542 * use that for the goodword too. But if the badword is 9543 * allcap and it's only one char long use onecap. */ 9544 c = su->su_badflags; 9545 if ((c & WF_ALLCAP) 9546 #ifdef FEAT_MBYTE 9547 && su->su_badlen == (*mb_ptr2len)(su->su_badptr) 9548 #else 9549 && su->su_badlen == 1 9550 #endif 9551 ) 9552 c = WF_ONECAP; 9553 c |= flags; 9554 9555 /* When appending a compound word after a word character 9556 * don't use Onecap. */ 9557 if (p != NULL && spell_iswordp_nmw(p)) 9558 c &= ~WF_ONECAP; 9559 make_case_word(tword + sp->ts_splitoff, 9560 preword + sp->ts_prewordlen, c); 9561 } 9562 9563 /* Don't use a banned word. It may appear again as a good 9564 * word, thus remember it. */ 9565 if (flags & WF_BANNED) 9566 { 9567 add_banned(su, preword + sp->ts_prewordlen); 9568 break; 9569 } 9570 if ((sp->ts_complen == sp->ts_compsplit 9571 && was_banned(su, preword + sp->ts_prewordlen)) 9572 || was_banned(su, preword)) 9573 { 9574 if (slang->sl_compprog == NULL) 9575 break; 9576 /* the word so far was banned but we may try compounding */ 9577 goodword_ends = FALSE; 9578 } 9579 9580 newscore = 0; 9581 if ((flags & WF_REGION) 9582 && (((unsigned)flags >> 16) & lp->lp_region) == 0) 9583 newscore += SCORE_REGION; 9584 if (flags & WF_RARE) 9585 newscore += SCORE_RARE; 9586 9587 if (!spell_valid_case(su->su_badflags, 9588 captype(preword + sp->ts_prewordlen, NULL))) 9589 newscore += SCORE_ICASE; 9590 9591 maysplit = TRUE; 9592 if (fword_ends && goodword_ends 9593 && sp->ts_fidx >= sp->ts_fidxtry) 9594 { 9595 /* The badword also ends: add suggestions. Give a penalty 9596 * when changing non-word char to word char, e.g., "thes," 9597 * -> "these". */ 9598 p = fword + sp->ts_fidx; 9599 #ifdef FEAT_MBYTE 9600 if (has_mbyte) 9601 mb_ptr_back(fword, p); 9602 else 9603 #endif 9604 --p; 9605 if (!spell_iswordp(p, curbuf)) 9606 { 9607 p = preword + STRLEN(preword); 9608 #ifdef FEAT_MBYTE 9609 if (has_mbyte) 9610 mb_ptr_back(preword, p); 9611 else 9612 #endif 9613 --p; 9614 if (spell_iswordp(p, curbuf)) 9615 newscore += SCORE_NONWORD; 9616 } 9617 9618 add_suggestion(su, &su->su_ga, preword, 9619 sp->ts_fidx - repextra, 9620 sp->ts_score + newscore, 0, FALSE, 9621 lp->lp_sallang); 9622 9623 /* When the bad word doesn't end yet, try changing the 9624 * next word. E.g., find suggestions for "the the" where 9625 * the second "the" is different. It's done like a split. 9626 */ 9627 if (sp->ts_fidx - repextra >= su->su_badlen) 9628 maysplit = FALSE; 9629 } 9630 9631 if (maysplit 9632 && (sp->ts_fidx >= sp->ts_fidxtry || fword_ends) 9633 #ifdef FEAT_MBYTE 9634 /* Don't split halfway a character. */ 9635 && (!has_mbyte || sp->ts_tcharlen == 0) 9636 #endif 9637 ) 9638 { 9639 int try_compound; 9640 9641 /* Get here in two situations: 9642 * 1. The word in the tree ends but the badword continues: 9643 * If the word allows compounding try that. Otherwise 9644 * try a split by inserting a space. For both check 9645 * that a valid words starts at fword[sp->ts_fidx]. 9646 * For NOBREAK do like compounding to be able to check 9647 * if the next word is valid. 9648 * 2. The badword does end, but it was due to a change 9649 * (e.g., a swap). No need to split, but do check that 9650 * the following word is valid. 9651 */ 9652 try_compound = FALSE; 9653 if ((!fword_ends || !goodword_ends) 9654 && slang->sl_compprog != NULL 9655 && ((unsigned)flags >> 24) != 0 9656 && sp->ts_twordlen - sp->ts_splitoff 9657 >= slang->sl_compminlen 9658 #ifdef FEAT_MBYTE 9659 && (!has_mbyte 9660 || slang->sl_compminlen == 0 9661 || mb_charlen(tword + sp->ts_splitoff) 9662 >= slang->sl_compminlen) 9663 #endif 9664 && (slang->sl_compsylmax < MAXWLEN 9665 || sp->ts_complen + 1 - sp->ts_compsplit 9666 < slang->sl_compmax) 9667 && (byte_in_str(sp->ts_complen == sp->ts_compsplit 9668 ? slang->sl_compstartflags 9669 : slang->sl_compallflags, 9670 ((unsigned)flags >> 24)))) 9671 { 9672 try_compound = TRUE; 9673 compflags[sp->ts_complen] = ((unsigned)flags >> 24); 9674 compflags[sp->ts_complen + 1] = NUL; 9675 } 9676 9677 /* For NOBREAK we never try splitting, it won't make any 9678 * word valid. */ 9679 if (slang->sl_nobreak) 9680 try_compound = TRUE; 9681 9682 /* If we could add a compound word, and it's also possible 9683 * to split at this point, do the split first and set 9684 * TSF_DIDSPLIT to avoid doing it again. */ 9685 else if (!fword_ends 9686 && try_compound 9687 && (sp->ts_flags & TSF_DIDSPLIT) == 0) 9688 { 9689 try_compound = FALSE; 9690 sp->ts_flags |= TSF_DIDSPLIT; 9691 --sp->ts_curi; /* do the same NUL again */ 9692 compflags[sp->ts_complen] = NUL; 9693 } 9694 else 9695 sp->ts_flags &= ~TSF_DIDSPLIT; 9696 9697 if (!try_compound && (!fword_ends || !goodword_ends)) 9698 { 9699 /* If we're going to split need to check that the 9700 * words so far are valid for compounding. If there 9701 * is only one word it must not have the NEEDCOMPOUND 9702 * flag. */ 9703 if (sp->ts_complen == sp->ts_compsplit 9704 && (flags & WF_NEEDCOMP)) 9705 break; 9706 p = preword; 9707 while (*skiptowhite(p) != NUL) 9708 p = skipwhite(skiptowhite(p)); 9709 if (sp->ts_complen > sp->ts_compsplit 9710 && !can_compound(slang, p, 9711 compflags + sp->ts_compsplit)) 9712 break; 9713 newscore += SCORE_SPLIT; 9714 } 9715 9716 if (try_deeper(su, stack, depth, newscore)) 9717 { 9718 /* Save things to be restored at STATE_SPLITUNDO. */ 9719 sp->ts_save_badflags = su->su_badflags; 9720 sp->ts_state = STATE_SPLITUNDO; 9721 9722 ++depth; 9723 sp = &stack[depth]; 9724 9725 /* Append a space to preword when splitting. */ 9726 if (!try_compound && !fword_ends) 9727 STRCAT(preword, " "); 9728 sp->ts_prewordlen = STRLEN(preword); 9729 sp->ts_splitoff = sp->ts_twordlen; 9730 sp->ts_splitfidx = sp->ts_fidx; 9731 9732 /* If the badword has a non-word character at this 9733 * position skip it. That means replacing the 9734 * non-word character with a space. Always skip a 9735 * character when the word ends. But only when the 9736 * good word can end. */ 9737 if (((!try_compound 9738 && !spell_iswordp_nmw(fword + sp->ts_fidx)) 9739 || fword_ends) 9740 && goodword_ends) 9741 { 9742 int l; 9743 9744 #ifdef FEAT_MBYTE 9745 if (has_mbyte) 9746 l = MB_BYTE2LEN(fword[sp->ts_fidx]); 9747 else 9748 #endif 9749 l = 1; 9750 if (fword_ends) 9751 { 9752 /* Copy the skipped character to preword. */ 9753 mch_memmove(preword + sp->ts_prewordlen, 9754 fword + sp->ts_fidx, l); 9755 sp->ts_prewordlen += l; 9756 preword[sp->ts_prewordlen] = NUL; 9757 } 9758 else 9759 sp->ts_score -= SCORE_SPLIT - SCORE_SUBST; 9760 sp->ts_fidx += l; 9761 } 9762 9763 /* When compounding include compound flag in 9764 * compflags[] (already set above). When splitting we 9765 * may start compounding over again. */ 9766 if (try_compound) 9767 ++sp->ts_complen; 9768 else 9769 sp->ts_compsplit = sp->ts_complen; 9770 sp->ts_prefixdepth = PFD_NOPREFIX; 9771 9772 /* set su->su_badflags to the caps type at this 9773 * position */ 9774 #ifdef FEAT_MBYTE 9775 if (has_mbyte) 9776 n = nofold_len(fword, sp->ts_fidx, su->su_badptr); 9777 else 9778 #endif 9779 n = sp->ts_fidx; 9780 su->su_badflags = badword_captype(su->su_badptr + n, 9781 su->su_badptr + su->su_badlen); 9782 9783 /* Restart at top of the tree. */ 9784 sp->ts_arridx = 0; 9785 9786 /* If there are postponed prefixes, try these too. */ 9787 if (pbyts != NULL) 9788 { 9789 byts = pbyts; 9790 idxs = pidxs; 9791 sp->ts_prefixdepth = PFD_PREFIXTREE; 9792 sp->ts_state = STATE_NOPREFIX; 9793 } 9794 } 9795 } 9796 break; 9797 9798 case STATE_SPLITUNDO: 9799 /* Undo the changes done for word split or compound word. */ 9800 su->su_badflags = sp->ts_save_badflags; 9801 9802 /* Continue looking for NUL bytes. */ 9803 sp->ts_state = STATE_START; 9804 9805 /* In case we went into the prefix tree. */ 9806 byts = fbyts; 9807 idxs = fidxs; 9808 break; 9809 9810 case STATE_ENDNUL: 9811 /* Past the NUL bytes in the node. */ 9812 su->su_badflags = sp->ts_save_badflags; 9813 if (fword[sp->ts_fidx] == NUL 9814 #ifdef FEAT_MBYTE 9815 && sp->ts_tcharlen == 0 9816 #endif 9817 ) 9818 { 9819 /* The badword ends, can't use the bytes in this node. */ 9820 sp->ts_state = STATE_DEL; 9821 break; 9822 } 9823 sp->ts_state = STATE_PLAIN; 9824 /*FALLTHROUGH*/ 9825 9826 case STATE_PLAIN: 9827 /* 9828 * Go over all possible bytes at this node, add each to 9829 * tword[] and use child node. "ts_curi" is the index. 9830 */ 9831 arridx = sp->ts_arridx; 9832 if (sp->ts_curi > byts[arridx]) 9833 { 9834 /* Done all bytes at this node, do next state. When still 9835 * at already changed bytes skip the other tricks. */ 9836 if (sp->ts_fidx >= sp->ts_fidxtry) 9837 sp->ts_state = STATE_DEL; 9838 else 9839 sp->ts_state = STATE_FINAL; 9840 } 9841 else 9842 { 9843 arridx += sp->ts_curi++; 9844 c = byts[arridx]; 9845 9846 /* Normal byte, go one level deeper. If it's not equal to 9847 * the byte in the bad word adjust the score. But don't 9848 * even try when the byte was already changed. */ 9849 if (c == fword[sp->ts_fidx] 9850 #ifdef FEAT_MBYTE 9851 || (sp->ts_tcharlen > 0 9852 && sp->ts_isdiff != DIFF_NONE) 9853 #endif 9854 ) 9855 newscore = 0; 9856 else 9857 newscore = SCORE_SUBST; 9858 if ((newscore == 0 || sp->ts_fidx >= sp->ts_fidxtry) 9859 && try_deeper(su, stack, depth, newscore)) 9860 { 9861 ++depth; 9862 sp = &stack[depth]; 9863 ++sp->ts_fidx; 9864 tword[sp->ts_twordlen++] = c; 9865 sp->ts_arridx = idxs[arridx]; 9866 #ifdef FEAT_MBYTE 9867 if (newscore == SCORE_SUBST) 9868 sp->ts_isdiff = DIFF_YES; 9869 if (has_mbyte) 9870 { 9871 /* Multi-byte characters are a bit complicated to 9872 * handle: They differ when any of the bytes 9873 * differ and then their length may also differ. */ 9874 if (sp->ts_tcharlen == 0) 9875 { 9876 /* First byte. */ 9877 sp->ts_tcharidx = 0; 9878 sp->ts_tcharlen = MB_BYTE2LEN(c); 9879 sp->ts_fcharstart = sp->ts_fidx - 1; 9880 sp->ts_isdiff = (newscore != 0) 9881 ? DIFF_YES : DIFF_NONE; 9882 } 9883 else if (sp->ts_isdiff == DIFF_INSERT) 9884 /* When inserting trail bytes don't advance in 9885 * the bad word. */ 9886 --sp->ts_fidx; 9887 if (++sp->ts_tcharidx == sp->ts_tcharlen) 9888 { 9889 /* Last byte of character. */ 9890 if (sp->ts_isdiff == DIFF_YES) 9891 { 9892 /* Correct ts_fidx for the byte length of 9893 * the character (we didn't check that 9894 * before). */ 9895 sp->ts_fidx = sp->ts_fcharstart 9896 + MB_BYTE2LEN( 9897 fword[sp->ts_fcharstart]); 9898 9899 /* For changing a composing character 9900 * adjust the score from SCORE_SUBST to 9901 * SCORE_SUBCOMP. */ 9902 if (enc_utf8 9903 && utf_iscomposing( 9904 mb_ptr2char(tword 9905 + sp->ts_twordlen 9906 - sp->ts_tcharlen)) 9907 && utf_iscomposing( 9908 mb_ptr2char(fword 9909 + sp->ts_fcharstart))) 9910 sp->ts_score -= 9911 SCORE_SUBST - SCORE_SUBCOMP; 9912 9913 /* For a similar character adjust score 9914 * from SCORE_SUBST to SCORE_SIMILAR. */ 9915 else if (slang->sl_has_map 9916 && similar_chars(slang, 9917 mb_ptr2char(tword 9918 + sp->ts_twordlen 9919 - sp->ts_tcharlen), 9920 mb_ptr2char(fword 9921 + sp->ts_fcharstart))) 9922 sp->ts_score -= 9923 SCORE_SUBST - SCORE_SIMILAR; 9924 } 9925 else if (sp->ts_isdiff == DIFF_INSERT 9926 && sp->ts_twordlen > sp->ts_tcharlen) 9927 { 9928 p = tword + sp->ts_twordlen 9929 - sp->ts_tcharlen; 9930 c = mb_ptr2char(p); 9931 if (enc_utf8 && utf_iscomposing(c)) 9932 { 9933 /* Inserting a composing char doesn't 9934 * count that much. */ 9935 sp->ts_score -= SCORE_INS 9936 - SCORE_INSCOMP; 9937 } 9938 else 9939 { 9940 /* If the previous character was the 9941 * same, thus doubling a character, 9942 * give a bonus to the score. */ 9943 mb_ptr_back(tword, p); 9944 if (c == mb_ptr2char(p)) 9945 sp->ts_score -= SCORE_INS 9946 - SCORE_INSDUP; 9947 } 9948 } 9949 9950 /* Starting a new char, reset the length. */ 9951 sp->ts_tcharlen = 0; 9952 } 9953 } 9954 else 9955 #endif 9956 { 9957 /* If we found a similar char adjust the score. 9958 * We do this after calling try_deeper() because 9959 * it's slow. */ 9960 if (newscore != 0 9961 && slang->sl_has_map 9962 && similar_chars(slang, 9963 c, fword[sp->ts_fidx - 1])) 9964 sp->ts_score -= SCORE_SUBST - SCORE_SIMILAR; 9965 } 9966 } 9967 } 9968 break; 9969 9970 case STATE_DEL: 9971 #ifdef FEAT_MBYTE 9972 /* When past the first byte of a multi-byte char don't try 9973 * delete/insert/swap a character. */ 9974 if (has_mbyte && sp->ts_tcharlen > 0) 9975 { 9976 sp->ts_state = STATE_FINAL; 9977 break; 9978 } 9979 #endif 9980 /* 9981 * Try skipping one character in the bad word (delete it). 9982 */ 9983 sp->ts_state = STATE_INS; 9984 sp->ts_curi = 1; 9985 if (fword[sp->ts_fidx] != NUL 9986 && try_deeper(su, stack, depth, SCORE_DEL)) 9987 { 9988 ++depth; 9989 9990 /* Advance over the character in fword[]. Give a bonus to 9991 * the score if the same character is following "nn" -> 9992 * "n". */ 9993 #ifdef FEAT_MBYTE 9994 if (has_mbyte) 9995 { 9996 c = mb_ptr2char(fword + sp->ts_fidx); 9997 stack[depth].ts_fidx += MB_BYTE2LEN(fword[sp->ts_fidx]); 9998 if (enc_utf8 && utf_iscomposing(c)) 9999 stack[depth].ts_score -= SCORE_DEL - SCORE_DELCOMP; 10000 else if (c == mb_ptr2char(fword + stack[depth].ts_fidx)) 10001 stack[depth].ts_score -= SCORE_DEL - SCORE_DELDUP; 10002 } 10003 else 10004 #endif 10005 { 10006 ++stack[depth].ts_fidx; 10007 if (fword[sp->ts_fidx] == fword[sp->ts_fidx + 1]) 10008 stack[depth].ts_score -= SCORE_DEL - SCORE_DELDUP; 10009 } 10010 break; 10011 } 10012 /*FALLTHROUGH*/ 10013 10014 case STATE_INS: 10015 /* Insert one byte. Do this for each possible byte at this 10016 * node. */ 10017 n = sp->ts_arridx; 10018 if (sp->ts_curi > byts[n]) 10019 { 10020 /* Done all bytes at this node, do next state. */ 10021 sp->ts_state = STATE_SWAP; 10022 } 10023 else 10024 { 10025 /* Do one more byte at this node. Skip NUL bytes. */ 10026 n += sp->ts_curi++; 10027 c = byts[n]; 10028 if (c != 0 && try_deeper(su, stack, depth, SCORE_INS)) 10029 { 10030 ++depth; 10031 sp = &stack[depth]; 10032 tword[sp->ts_twordlen++] = c; 10033 sp->ts_arridx = idxs[n]; 10034 #ifdef FEAT_MBYTE 10035 if (has_mbyte) 10036 { 10037 fl = MB_BYTE2LEN(c); 10038 if (fl > 1) 10039 { 10040 /* There are following bytes for the same 10041 * character. We must find all bytes before 10042 * trying delete/insert/swap/etc. */ 10043 sp->ts_tcharlen = fl; 10044 sp->ts_tcharidx = 1; 10045 sp->ts_isdiff = DIFF_INSERT; 10046 } 10047 } 10048 else 10049 fl = 1; 10050 if (fl == 1) 10051 #endif 10052 { 10053 /* If the previous character was the same, thus 10054 * doubling a character, give a bonus to the 10055 * score. */ 10056 if (sp->ts_twordlen >= 2 10057 && tword[sp->ts_twordlen - 2] == c) 10058 sp->ts_score -= SCORE_INS - SCORE_INSDUP; 10059 } 10060 } 10061 } 10062 break; 10063 10064 case STATE_SWAP: 10065 /* 10066 * Swap two bytes in the bad word: "12" -> "21". 10067 * We change "fword" here, it's changed back afterwards. 10068 */ 10069 p = fword + sp->ts_fidx; 10070 c = *p; 10071 if (c == NUL) 10072 { 10073 /* End of word, can't swap or replace. */ 10074 sp->ts_state = STATE_FINAL; 10075 break; 10076 } 10077 10078 /* Don't swap if the first character is not a word character. 10079 * SWAP3 etc. also don't make sense then. */ 10080 if (!spell_iswordp(p, curbuf)) 10081 { 10082 sp->ts_state = STATE_REP_INI; 10083 break; 10084 } 10085 10086 #ifdef FEAT_MBYTE 10087 if (has_mbyte) 10088 { 10089 n = mb_cptr2len(p); 10090 c = mb_ptr2char(p); 10091 if (!spell_iswordp(p + n, curbuf)) 10092 c2 = c; /* don't swap non-word char */ 10093 else 10094 c2 = mb_ptr2char(p + n); 10095 } 10096 else 10097 #endif 10098 { 10099 if (!spell_iswordp(p + 1, curbuf)) 10100 c2 = c; /* don't swap non-word char */ 10101 else 10102 c2 = p[1]; 10103 } 10104 10105 /* When characters are identical, swap won't do anything. 10106 * Also get here if the second char is not a word character. */ 10107 if (c == c2) 10108 { 10109 sp->ts_state = STATE_SWAP3; 10110 break; 10111 } 10112 if (c2 != NUL && try_deeper(su, stack, depth, SCORE_SWAP)) 10113 { 10114 sp->ts_state = STATE_UNSWAP; 10115 ++depth; 10116 #ifdef FEAT_MBYTE 10117 if (has_mbyte) 10118 { 10119 fl = mb_char2len(c2); 10120 mch_memmove(p, p + n, fl); 10121 mb_char2bytes(c, p + fl); 10122 stack[depth].ts_fidxtry = sp->ts_fidx + n + fl; 10123 } 10124 else 10125 #endif 10126 { 10127 p[0] = c2; 10128 p[1] = c; 10129 stack[depth].ts_fidxtry = sp->ts_fidx + 2; 10130 } 10131 } 10132 else 10133 /* If this swap doesn't work then SWAP3 won't either. */ 10134 sp->ts_state = STATE_REP_INI; 10135 break; 10136 10137 case STATE_UNSWAP: 10138 /* Undo the STATE_SWAP swap: "21" -> "12". */ 10139 p = fword + sp->ts_fidx; 10140 #ifdef FEAT_MBYTE 10141 if (has_mbyte) 10142 { 10143 n = MB_BYTE2LEN(*p); 10144 c = mb_ptr2char(p + n); 10145 mch_memmove(p + MB_BYTE2LEN(p[n]), p, n); 10146 mb_char2bytes(c, p); 10147 } 10148 else 10149 #endif 10150 { 10151 c = *p; 10152 *p = p[1]; 10153 p[1] = c; 10154 } 10155 /*FALLTHROUGH*/ 10156 10157 case STATE_SWAP3: 10158 /* Swap two bytes, skipping one: "123" -> "321". We change 10159 * "fword" here, it's changed back afterwards. */ 10160 p = fword + sp->ts_fidx; 10161 #ifdef FEAT_MBYTE 10162 if (has_mbyte) 10163 { 10164 n = mb_cptr2len(p); 10165 c = mb_ptr2char(p); 10166 fl = mb_cptr2len(p + n); 10167 c2 = mb_ptr2char(p + n); 10168 if (!spell_iswordp(p + n + fl, curbuf)) 10169 c3 = c; /* don't swap non-word char */ 10170 else 10171 c3 = mb_ptr2char(p + n + fl); 10172 } 10173 else 10174 #endif 10175 { 10176 c = *p; 10177 c2 = p[1]; 10178 if (!spell_iswordp(p + 2, curbuf)) 10179 c3 = c; /* don't swap non-word char */ 10180 else 10181 c3 = p[2]; 10182 } 10183 10184 /* When characters are identical: "121" then SWAP3 result is 10185 * identical, ROT3L result is same as SWAP: "211", ROT3L 10186 * result is same as SWAP on next char: "112". Thus skip all 10187 * swapping. Also skip when c3 is NUL. 10188 * Also get here when the third character is not a word 10189 * character. Second character may any char: "a.b" -> "b.a" */ 10190 if (c == c3 || c3 == NUL) 10191 { 10192 sp->ts_state = STATE_REP_INI; 10193 break; 10194 } 10195 if (try_deeper(su, stack, depth, SCORE_SWAP3)) 10196 { 10197 sp->ts_state = STATE_UNSWAP3; 10198 ++depth; 10199 #ifdef FEAT_MBYTE 10200 if (has_mbyte) 10201 { 10202 tl = mb_char2len(c3); 10203 mch_memmove(p, p + n + fl, tl); 10204 mb_char2bytes(c2, p + tl); 10205 mb_char2bytes(c, p + fl + tl); 10206 stack[depth].ts_fidxtry = sp->ts_fidx + n + fl + tl; 10207 } 10208 else 10209 #endif 10210 { 10211 p[0] = p[2]; 10212 p[2] = c; 10213 stack[depth].ts_fidxtry = sp->ts_fidx + 3; 10214 } 10215 } 10216 else 10217 sp->ts_state = STATE_REP_INI; 10218 break; 10219 10220 case STATE_UNSWAP3: 10221 /* Undo STATE_SWAP3: "321" -> "123" */ 10222 p = fword + sp->ts_fidx; 10223 #ifdef FEAT_MBYTE 10224 if (has_mbyte) 10225 { 10226 n = MB_BYTE2LEN(*p); 10227 c2 = mb_ptr2char(p + n); 10228 fl = MB_BYTE2LEN(p[n]); 10229 c = mb_ptr2char(p + n + fl); 10230 tl = MB_BYTE2LEN(p[n + fl]); 10231 mch_memmove(p + fl + tl, p, n); 10232 mb_char2bytes(c, p); 10233 mb_char2bytes(c2, p + tl); 10234 p = p + tl; 10235 } 10236 else 10237 #endif 10238 { 10239 c = *p; 10240 *p = p[2]; 10241 p[2] = c; 10242 ++p; 10243 } 10244 10245 if (!spell_iswordp(p, curbuf)) 10246 { 10247 /* Middle char is not a word char, skip the rotate. 10248 * First and third char were already checked at swap 10249 * and swap3. */ 10250 sp->ts_state = STATE_REP_INI; 10251 break; 10252 } 10253 10254 /* Rotate three characters left: "123" -> "231". We change 10255 * "fword" here, it's changed back afterwards. */ 10256 if (try_deeper(su, stack, depth, SCORE_SWAP3)) 10257 { 10258 sp->ts_state = STATE_UNROT3L; 10259 ++depth; 10260 p = fword + sp->ts_fidx; 10261 #ifdef FEAT_MBYTE 10262 if (has_mbyte) 10263 { 10264 n = mb_cptr2len(p); 10265 c = mb_ptr2char(p); 10266 fl = mb_cptr2len(p + n); 10267 fl += mb_cptr2len(p + n + fl); 10268 mch_memmove(p, p + n, fl); 10269 mb_char2bytes(c, p + fl); 10270 stack[depth].ts_fidxtry = sp->ts_fidx + n + fl; 10271 } 10272 else 10273 #endif 10274 { 10275 c = *p; 10276 *p = p[1]; 10277 p[1] = p[2]; 10278 p[2] = c; 10279 stack[depth].ts_fidxtry = sp->ts_fidx + 3; 10280 } 10281 } 10282 else 10283 sp->ts_state = STATE_REP_INI; 10284 break; 10285 10286 case STATE_UNROT3L: 10287 /* Undo ROT3L: "231" -> "123" */ 10288 p = fword + sp->ts_fidx; 10289 #ifdef FEAT_MBYTE 10290 if (has_mbyte) 10291 { 10292 n = MB_BYTE2LEN(*p); 10293 n += MB_BYTE2LEN(p[n]); 10294 c = mb_ptr2char(p + n); 10295 tl = MB_BYTE2LEN(p[n]); 10296 mch_memmove(p + tl, p, n); 10297 mb_char2bytes(c, p); 10298 } 10299 else 10300 #endif 10301 { 10302 c = p[2]; 10303 p[2] = p[1]; 10304 p[1] = *p; 10305 *p = c; 10306 } 10307 10308 /* Rotate three bytes right: "123" -> "312". We change 10309 * "fword" here, it's changed back afterwards. */ 10310 if (try_deeper(su, stack, depth, SCORE_SWAP3)) 10311 { 10312 sp->ts_state = STATE_UNROT3R; 10313 ++depth; 10314 p = fword + sp->ts_fidx; 10315 #ifdef FEAT_MBYTE 10316 if (has_mbyte) 10317 { 10318 n = mb_cptr2len(p); 10319 n += mb_cptr2len(p + n); 10320 c = mb_ptr2char(p + n); 10321 tl = mb_cptr2len(p + n); 10322 mch_memmove(p + tl, p, n); 10323 mb_char2bytes(c, p); 10324 stack[depth].ts_fidxtry = sp->ts_fidx + n + tl; 10325 } 10326 else 10327 #endif 10328 { 10329 c = p[2]; 10330 p[2] = p[1]; 10331 p[1] = *p; 10332 *p = c; 10333 stack[depth].ts_fidxtry = sp->ts_fidx + 3; 10334 } 10335 } 10336 else 10337 sp->ts_state = STATE_REP_INI; 10338 break; 10339 10340 case STATE_UNROT3R: 10341 /* Undo ROT3R: "312" -> "123" */ 10342 p = fword + sp->ts_fidx; 10343 #ifdef FEAT_MBYTE 10344 if (has_mbyte) 10345 { 10346 c = mb_ptr2char(p); 10347 tl = MB_BYTE2LEN(*p); 10348 n = MB_BYTE2LEN(p[tl]); 10349 n += MB_BYTE2LEN(p[tl + n]); 10350 mch_memmove(p, p + tl, n); 10351 mb_char2bytes(c, p + n); 10352 } 10353 else 10354 #endif 10355 { 10356 c = *p; 10357 *p = p[1]; 10358 p[1] = p[2]; 10359 p[2] = c; 10360 } 10361 /*FALLTHROUGH*/ 10362 10363 case STATE_REP_INI: 10364 /* Check if matching with REP items from the .aff file would 10365 * work. Quickly skip if: 10366 * - there are no REP items 10367 * - the score is going to be too high anyway 10368 * - already applied a REP item or swapped here */ 10369 if (lp->lp_replang == NULL 10370 || sp->ts_score + SCORE_REP >= su->su_maxscore 10371 || sp->ts_fidx < sp->ts_fidxtry) 10372 { 10373 sp->ts_state = STATE_FINAL; 10374 break; 10375 } 10376 gap = &lp->lp_replang->sl_rep; 10377 10378 /* Use the first byte to quickly find the first entry that 10379 * may match. If the index is -1 there is none. */ 10380 sp->ts_curi = lp->lp_replang->sl_rep_first[fword[sp->ts_fidx]]; 10381 if (sp->ts_curi < 0) 10382 { 10383 sp->ts_state = STATE_FINAL; 10384 break; 10385 } 10386 10387 sp->ts_state = STATE_REP; 10388 /*FALLTHROUGH*/ 10389 10390 case STATE_REP: 10391 /* Try matching with REP items from the .aff file. For each 10392 * match replace the characters and check if the resulting 10393 * word is valid. */ 10394 p = fword + sp->ts_fidx; 10395 10396 gap = &lp->lp_replang->sl_rep; 10397 while (sp->ts_curi < gap->ga_len) 10398 { 10399 ftp = (fromto_T *)gap->ga_data + sp->ts_curi++; 10400 if (*ftp->ft_from != *p) 10401 { 10402 /* past possible matching entries */ 10403 sp->ts_curi = gap->ga_len; 10404 break; 10405 } 10406 if (STRNCMP(ftp->ft_from, p, STRLEN(ftp->ft_from)) == 0 10407 && try_deeper(su, stack, depth, SCORE_REP)) 10408 { 10409 /* Need to undo this afterwards. */ 10410 sp->ts_state = STATE_REP_UNDO; 10411 10412 /* Change the "from" to the "to" string. */ 10413 ++depth; 10414 fl = STRLEN(ftp->ft_from); 10415 tl = STRLEN(ftp->ft_to); 10416 if (fl != tl) 10417 { 10418 mch_memmove(p + tl, p + fl, STRLEN(p + fl) + 1); 10419 repextra += tl - fl; 10420 } 10421 mch_memmove(p, ftp->ft_to, tl); 10422 stack[depth].ts_fidxtry = sp->ts_fidx + tl; 10423 #ifdef FEAT_MBYTE 10424 stack[depth].ts_tcharlen = 0; 10425 #endif 10426 break; 10427 } 10428 } 10429 10430 if (sp->ts_curi >= gap->ga_len && sp->ts_state == STATE_REP) 10431 /* No (more) matches. */ 10432 sp->ts_state = STATE_FINAL; 10433 10434 break; 10435 10436 case STATE_REP_UNDO: 10437 /* Undo a REP replacement and continue with the next one. */ 10438 ftp = (fromto_T *)lp->lp_replang->sl_rep.ga_data 10439 + sp->ts_curi - 1; 10440 fl = STRLEN(ftp->ft_from); 10441 tl = STRLEN(ftp->ft_to); 10442 p = fword + sp->ts_fidx; 10443 if (fl != tl) 10444 { 10445 mch_memmove(p + fl, p + tl, STRLEN(p + tl) + 1); 10446 repextra -= tl - fl; 10447 } 10448 mch_memmove(p, ftp->ft_from, fl); 10449 sp->ts_state = STATE_REP; 10450 break; 10451 10452 default: 10453 /* Did all possible states at this level, go up one level. */ 10454 --depth; 10455 10456 if (depth >= 0 && stack[depth].ts_prefixdepth == PFD_PREFIXTREE) 10457 { 10458 /* Continue in or go back to the prefix tree. */ 10459 byts = pbyts; 10460 idxs = pidxs; 10461 } 10462 10463 /* Don't check for CTRL-C too often, it takes time. */ 10464 line_breakcheck(); 10465 } 10466 } 10467 } 10468 } 10469 10470 /* 10471 * Try going one level deeper in the tree. 10472 */ 10473 static int 10474 try_deeper(su, stack, depth, score_add) 10475 suginfo_T *su; 10476 trystate_T *stack; 10477 int depth; 10478 int score_add; 10479 { 10480 int newscore; 10481 10482 /* Refuse to go deeper if the scrore is getting too big. */ 10483 newscore = stack[depth].ts_score + score_add; 10484 if (newscore >= su->su_maxscore) 10485 return FALSE; 10486 10487 stack[depth + 1] = stack[depth]; 10488 stack[depth + 1].ts_state = STATE_START; 10489 stack[depth + 1].ts_score = newscore; 10490 stack[depth + 1].ts_curi = 1; /* start just after length byte */ 10491 stack[depth + 1].ts_flags = 0; 10492 return TRUE; 10493 } 10494 10495 #ifdef FEAT_MBYTE 10496 /* 10497 * Case-folding may change the number of bytes: Count nr of chars in 10498 * fword[flen] and return the byte length of that many chars in "word". 10499 */ 10500 static int 10501 nofold_len(fword, flen, word) 10502 char_u *fword; 10503 int flen; 10504 char_u *word; 10505 { 10506 char_u *p; 10507 int i = 0; 10508 10509 for (p = fword; p < fword + flen; mb_ptr_adv(p)) 10510 ++i; 10511 for (p = word; i > 0; mb_ptr_adv(p)) 10512 --i; 10513 return (int)(p - word); 10514 } 10515 #endif 10516 10517 /* 10518 * "fword" is a good word with case folded. Find the matching keep-case 10519 * words and put it in "kword". 10520 * Theoretically there could be several keep-case words that result in the 10521 * same case-folded word, but we only find one... 10522 */ 10523 static void 10524 find_keepcap_word(slang, fword, kword) 10525 slang_T *slang; 10526 char_u *fword; 10527 char_u *kword; 10528 { 10529 char_u uword[MAXWLEN]; /* "fword" in upper-case */ 10530 int depth; 10531 idx_T tryidx; 10532 10533 /* The following arrays are used at each depth in the tree. */ 10534 idx_T arridx[MAXWLEN]; 10535 int round[MAXWLEN]; 10536 int fwordidx[MAXWLEN]; 10537 int uwordidx[MAXWLEN]; 10538 int kwordlen[MAXWLEN]; 10539 10540 int flen, ulen; 10541 int l; 10542 int len; 10543 int c; 10544 idx_T lo, hi, m; 10545 char_u *p; 10546 char_u *byts = slang->sl_kbyts; /* array with bytes of the words */ 10547 idx_T *idxs = slang->sl_kidxs; /* array with indexes */ 10548 10549 if (byts == NULL) 10550 { 10551 /* array is empty: "cannot happen" */ 10552 *kword = NUL; 10553 return; 10554 } 10555 10556 /* Make an all-cap version of "fword". */ 10557 allcap_copy(fword, uword); 10558 10559 /* 10560 * Each character needs to be tried both case-folded and upper-case. 10561 * All this gets very complicated if we keep in mind that changing case 10562 * may change the byte length of a multi-byte character... 10563 */ 10564 depth = 0; 10565 arridx[0] = 0; 10566 round[0] = 0; 10567 fwordidx[0] = 0; 10568 uwordidx[0] = 0; 10569 kwordlen[0] = 0; 10570 while (depth >= 0) 10571 { 10572 if (fword[fwordidx[depth]] == NUL) 10573 { 10574 /* We are at the end of "fword". If the tree allows a word to end 10575 * here we have found a match. */ 10576 if (byts[arridx[depth] + 1] == 0) 10577 { 10578 kword[kwordlen[depth]] = NUL; 10579 return; 10580 } 10581 10582 /* kword is getting too long, continue one level up */ 10583 --depth; 10584 } 10585 else if (++round[depth] > 2) 10586 { 10587 /* tried both fold-case and upper-case character, continue one 10588 * level up */ 10589 --depth; 10590 } 10591 else 10592 { 10593 /* 10594 * round[depth] == 1: Try using the folded-case character. 10595 * round[depth] == 2: Try using the upper-case character. 10596 */ 10597 #ifdef FEAT_MBYTE 10598 if (has_mbyte) 10599 { 10600 flen = mb_cptr2len(fword + fwordidx[depth]); 10601 ulen = mb_cptr2len(uword + uwordidx[depth]); 10602 } 10603 else 10604 #endif 10605 ulen = flen = 1; 10606 if (round[depth] == 1) 10607 { 10608 p = fword + fwordidx[depth]; 10609 l = flen; 10610 } 10611 else 10612 { 10613 p = uword + uwordidx[depth]; 10614 l = ulen; 10615 } 10616 10617 for (tryidx = arridx[depth]; l > 0; --l) 10618 { 10619 /* Perform a binary search in the list of accepted bytes. */ 10620 len = byts[tryidx++]; 10621 c = *p++; 10622 lo = tryidx; 10623 hi = tryidx + len - 1; 10624 while (lo < hi) 10625 { 10626 m = (lo + hi) / 2; 10627 if (byts[m] > c) 10628 hi = m - 1; 10629 else if (byts[m] < c) 10630 lo = m + 1; 10631 else 10632 { 10633 lo = hi = m; 10634 break; 10635 } 10636 } 10637 10638 /* Stop if there is no matching byte. */ 10639 if (hi < lo || byts[lo] != c) 10640 break; 10641 10642 /* Continue at the child (if there is one). */ 10643 tryidx = idxs[lo]; 10644 } 10645 10646 if (l == 0) 10647 { 10648 /* 10649 * Found the matching char. Copy it to "kword" and go a 10650 * level deeper. 10651 */ 10652 if (round[depth] == 1) 10653 { 10654 STRNCPY(kword + kwordlen[depth], fword + fwordidx[depth], 10655 flen); 10656 kwordlen[depth + 1] = kwordlen[depth] + flen; 10657 } 10658 else 10659 { 10660 STRNCPY(kword + kwordlen[depth], uword + uwordidx[depth], 10661 ulen); 10662 kwordlen[depth + 1] = kwordlen[depth] + ulen; 10663 } 10664 fwordidx[depth + 1] = fwordidx[depth] + flen; 10665 uwordidx[depth + 1] = uwordidx[depth] + ulen; 10666 10667 ++depth; 10668 arridx[depth] = tryidx; 10669 round[depth] = 0; 10670 } 10671 } 10672 } 10673 10674 /* Didn't find it: "cannot happen". */ 10675 *kword = NUL; 10676 } 10677 10678 /* 10679 * Compute the sound-a-like score for suggestions in su->su_ga and add them to 10680 * su->su_sga. 10681 */ 10682 static void 10683 score_comp_sal(su) 10684 suginfo_T *su; 10685 { 10686 langp_T *lp; 10687 char_u badsound[MAXWLEN]; 10688 int i; 10689 suggest_T *stp; 10690 suggest_T *sstp; 10691 int score; 10692 int lpi; 10693 10694 if (ga_grow(&su->su_sga, su->su_ga.ga_len) == FAIL) 10695 return; 10696 10697 /* Use the sound-folding of the first language that supports it. */ 10698 for (lpi = 0; lpi < curbuf->b_langp.ga_len; ++lpi) 10699 { 10700 lp = LANGP_ENTRY(curbuf->b_langp, lpi); 10701 if (lp->lp_slang->sl_sal.ga_len > 0) 10702 { 10703 /* soundfold the bad word */ 10704 spell_soundfold(lp->lp_slang, su->su_fbadword, TRUE, badsound); 10705 10706 for (i = 0; i < su->su_ga.ga_len; ++i) 10707 { 10708 stp = &SUG(su->su_ga, i); 10709 10710 /* Case-fold the suggested word, sound-fold it and compute the 10711 * sound-a-like score. */ 10712 score = stp_sal_score(stp, su, lp->lp_slang, badsound); 10713 if (score < SCORE_MAXMAX) 10714 { 10715 /* Add the suggestion. */ 10716 sstp = &SUG(su->su_sga, su->su_sga.ga_len); 10717 sstp->st_word = vim_strsave(stp->st_word); 10718 if (sstp->st_word != NULL) 10719 { 10720 sstp->st_score = score; 10721 sstp->st_altscore = 0; 10722 sstp->st_orglen = stp->st_orglen; 10723 ++su->su_sga.ga_len; 10724 } 10725 } 10726 } 10727 break; 10728 } 10729 } 10730 } 10731 10732 /* 10733 * Combine the list of suggestions in su->su_ga and su->su_sga. 10734 * They are intwined. 10735 */ 10736 static void 10737 score_combine(su) 10738 suginfo_T *su; 10739 { 10740 int i; 10741 int j; 10742 garray_T ga; 10743 garray_T *gap; 10744 langp_T *lp; 10745 suggest_T *stp; 10746 char_u *p; 10747 char_u badsound[MAXWLEN]; 10748 int round; 10749 int lpi; 10750 10751 /* Add the alternate score to su_ga. */ 10752 for (lpi = 0; lpi < curbuf->b_langp.ga_len; ++lpi) 10753 { 10754 lp = LANGP_ENTRY(curbuf->b_langp, lpi); 10755 if (lp->lp_slang->sl_sal.ga_len > 0) 10756 { 10757 /* soundfold the bad word */ 10758 spell_soundfold(lp->lp_slang, su->su_fbadword, TRUE, badsound); 10759 10760 for (i = 0; i < su->su_ga.ga_len; ++i) 10761 { 10762 stp = &SUG(su->su_ga, i); 10763 stp->st_altscore = stp_sal_score(stp, su, lp->lp_slang, 10764 badsound); 10765 if (stp->st_altscore == SCORE_MAXMAX) 10766 stp->st_score = (stp->st_score * 3 + SCORE_BIG) / 4; 10767 else 10768 stp->st_score = (stp->st_score * 3 10769 + stp->st_altscore) / 4; 10770 stp->st_salscore = FALSE; 10771 } 10772 break; 10773 } 10774 } 10775 10776 /* Add the alternate score to su_sga. */ 10777 for (i = 0; i < su->su_sga.ga_len; ++i) 10778 { 10779 stp = &SUG(su->su_sga, i); 10780 stp->st_altscore = spell_edit_score(su->su_badword, stp->st_word); 10781 if (stp->st_score == SCORE_MAXMAX) 10782 stp->st_score = (SCORE_BIG * 7 + stp->st_altscore) / 8; 10783 else 10784 stp->st_score = (stp->st_score * 7 + stp->st_altscore) / 8; 10785 stp->st_salscore = TRUE; 10786 } 10787 10788 /* Sort the suggestions and truncate at "maxcount" for both lists. */ 10789 (void)cleanup_suggestions(&su->su_ga, su->su_maxscore, su->su_maxcount); 10790 (void)cleanup_suggestions(&su->su_sga, su->su_maxscore, su->su_maxcount); 10791 10792 ga_init2(&ga, (int)sizeof(suginfo_T), 1); 10793 if (ga_grow(&ga, su->su_ga.ga_len + su->su_sga.ga_len) == FAIL) 10794 return; 10795 10796 stp = &SUG(ga, 0); 10797 for (i = 0; i < su->su_ga.ga_len || i < su->su_sga.ga_len; ++i) 10798 { 10799 /* round 1: get a suggestion from su_ga 10800 * round 2: get a suggestion from su_sga */ 10801 for (round = 1; round <= 2; ++round) 10802 { 10803 gap = round == 1 ? &su->su_ga : &su->su_sga; 10804 if (i < gap->ga_len) 10805 { 10806 /* Don't add a word if it's already there. */ 10807 p = SUG(*gap, i).st_word; 10808 for (j = 0; j < ga.ga_len; ++j) 10809 if (STRCMP(stp[j].st_word, p) == 0) 10810 break; 10811 if (j == ga.ga_len) 10812 stp[ga.ga_len++] = SUG(*gap, i); 10813 else 10814 vim_free(p); 10815 } 10816 } 10817 } 10818 10819 ga_clear(&su->su_ga); 10820 ga_clear(&su->su_sga); 10821 10822 /* Truncate the list to the number of suggestions that will be displayed. */ 10823 if (ga.ga_len > su->su_maxcount) 10824 { 10825 for (i = su->su_maxcount; i < ga.ga_len; ++i) 10826 vim_free(stp[i].st_word); 10827 ga.ga_len = su->su_maxcount; 10828 } 10829 10830 su->su_ga = ga; 10831 } 10832 10833 /* 10834 * For the goodword in "stp" compute the soundalike score compared to the 10835 * badword. 10836 */ 10837 static int 10838 stp_sal_score(stp, su, slang, badsound) 10839 suggest_T *stp; 10840 suginfo_T *su; 10841 slang_T *slang; 10842 char_u *badsound; /* sound-folded badword */ 10843 { 10844 char_u *p; 10845 char_u *pbad; 10846 char_u *pgood; 10847 char_u badsound2[MAXWLEN]; 10848 char_u fword[MAXWLEN]; 10849 char_u goodsound[MAXWLEN]; 10850 char_u goodword[MAXWLEN]; 10851 int lendiff; 10852 10853 lendiff = (int)(su->su_badlen - stp->st_orglen); 10854 if (lendiff >= 0) 10855 pbad = badsound; 10856 else 10857 { 10858 /* soundfold the bad word with more characters following */ 10859 (void)spell_casefold(su->su_badptr, stp->st_orglen, fword, MAXWLEN); 10860 10861 /* When joining two words the sound often changes a lot. E.g., "t he" 10862 * sounds like "t h" while "the" sounds like "@". Avoid that by 10863 * removing the space. Don't do it when the good word also contains a 10864 * space. */ 10865 if (vim_iswhite(su->su_badptr[su->su_badlen]) 10866 && *skiptowhite(stp->st_word) == NUL) 10867 for (p = fword; *(p = skiptowhite(p)) != NUL; ) 10868 mch_memmove(p, p + 1, STRLEN(p)); 10869 10870 spell_soundfold(slang, fword, TRUE, badsound2); 10871 pbad = badsound2; 10872 } 10873 10874 if (lendiff > 0) 10875 { 10876 /* Add part of the bad word to the good word, so that we soundfold 10877 * what replaces the bad word. */ 10878 STRCPY(goodword, stp->st_word); 10879 STRNCAT(goodword, su->su_badptr + su->su_badlen - lendiff, lendiff); 10880 pgood = goodword; 10881 } 10882 else 10883 pgood = stp->st_word; 10884 10885 /* Sound-fold the word and compute the score for the difference. */ 10886 spell_soundfold(slang, pgood, FALSE, goodsound); 10887 10888 return soundalike_score(goodsound, pbad); 10889 } 10890 10891 /* 10892 * Find suggestions by comparing the word in a sound-a-like form. 10893 * Note: This doesn't support postponed prefixes. 10894 */ 10895 static void 10896 suggest_try_soundalike(su) 10897 suginfo_T *su; 10898 { 10899 char_u salword[MAXWLEN]; 10900 char_u tword[MAXWLEN]; 10901 char_u tsalword[MAXWLEN]; 10902 idx_T arridx[MAXWLEN]; 10903 int curi[MAXWLEN]; 10904 langp_T *lp; 10905 char_u *byts; 10906 idx_T *idxs; 10907 int depth; 10908 int c; 10909 idx_T n; 10910 int round; 10911 int flags; 10912 int sound_score; 10913 int local_score; 10914 int lpi; 10915 slang_T *slang; 10916 10917 /* Do this for all languages that support sound folding. */ 10918 for (lpi = 0; lpi < curbuf->b_langp.ga_len; ++lpi) 10919 { 10920 lp = LANGP_ENTRY(curbuf->b_langp, lpi); 10921 slang = lp->lp_slang; 10922 if (slang->sl_sal.ga_len > 0) 10923 { 10924 /* soundfold the bad word */ 10925 spell_soundfold(slang, su->su_fbadword, TRUE, salword); 10926 10927 /* 10928 * Go through the whole tree, soundfold each word and compare. 10929 * round 1: use the case-folded tree. 10930 * round 2: use the keep-case tree. 10931 */ 10932 for (round = 1; round <= 2; ++round) 10933 { 10934 if (round == 1) 10935 { 10936 byts = slang->sl_fbyts; 10937 idxs = slang->sl_fidxs; 10938 } 10939 else 10940 { 10941 byts = slang->sl_kbyts; 10942 idxs = slang->sl_kidxs; 10943 if (byts == NULL) /* no keep-case words */ 10944 continue; 10945 } 10946 10947 depth = 0; 10948 arridx[0] = 0; 10949 curi[0] = 1; 10950 while (depth >= 0 && !got_int) 10951 { 10952 if (curi[depth] > byts[arridx[depth]]) 10953 { 10954 /* Done all bytes at this node, go up one level. */ 10955 --depth; 10956 line_breakcheck(); 10957 } 10958 else 10959 { 10960 /* Do one more byte at this node. */ 10961 n = arridx[depth] + curi[depth]; 10962 ++curi[depth]; 10963 c = byts[n]; 10964 if (c == 0) 10965 { 10966 /* End of word, deal with the word. */ 10967 flags = (int)idxs[n]; 10968 if (round == 2 || (flags & WF_KEEPCAP) == 0) 10969 { 10970 tword[depth] = NUL; 10971 /* Sound-fold. Only in keep-case tree need to 10972 * case-fold the word. */ 10973 spell_soundfold(slang, tword, 10974 round == 1, tsalword); 10975 10976 /* Compute the edit distance between the 10977 * sound-a-like words. */ 10978 sound_score = soundalike_score(salword, 10979 tsalword); 10980 10981 /* Add a penalty for words in another region. */ 10982 if ((flags & WF_REGION) && (((unsigned)flags 10983 >> 16) & lp->lp_region) == 0) 10984 local_score = SCORE_REGION; 10985 else 10986 local_score = 0; 10987 sound_score += local_score; 10988 10989 if (sound_score < SCORE_MAXMAX) 10990 { 10991 char_u cword[MAXWLEN]; 10992 char_u *p; 10993 int score; 10994 10995 flags |= su->su_badflags; 10996 if (round == 1 && (flags & WF_CAPMASK) != 0) 10997 { 10998 /* Need to fix case according to 10999 * "flags". */ 11000 make_case_word(tword, cword, flags); 11001 p = cword; 11002 } 11003 else 11004 p = tword; 11005 11006 if (sps_flags & SPS_DOUBLE) 11007 add_suggestion(su, &su->su_sga, p, 11008 su->su_badlen, 11009 sound_score, 0, FALSE, 11010 lp->lp_sallang); 11011 else 11012 { 11013 /* Compute the score. */ 11014 score = spell_edit_score( 11015 su->su_badword, p) 11016 + local_score; 11017 if (sps_flags & SPS_BEST) 11018 /* give a bonus for the good word 11019 * sounding the same as the bad 11020 * word */ 11021 add_suggestion(su, &su->su_ga, p, 11022 su->su_badlen, 11023 RESCORE(score, sound_score), 11024 sound_score, TRUE, 11025 lp->lp_sallang); 11026 else 11027 add_suggestion(su, &su->su_ga, p, 11028 su->su_badlen, 11029 score + sound_score, 11030 0, FALSE, 11031 lp->lp_sallang); 11032 } 11033 } 11034 } 11035 11036 /* Skip over other NUL bytes. */ 11037 while (byts[n + 1] == 0) 11038 { 11039 ++n; 11040 ++curi[depth]; 11041 } 11042 } 11043 else 11044 { 11045 /* Normal char, go one level deeper. */ 11046 tword[depth++] = c; 11047 arridx[depth] = idxs[n]; 11048 curi[depth] = 1; 11049 } 11050 } 11051 } 11052 } 11053 } 11054 } 11055 } 11056 11057 /* 11058 * Copy "fword" to "cword", fixing case according to "flags". 11059 */ 11060 static void 11061 make_case_word(fword, cword, flags) 11062 char_u *fword; 11063 char_u *cword; 11064 int flags; 11065 { 11066 if (flags & WF_ALLCAP) 11067 /* Make it all upper-case */ 11068 allcap_copy(fword, cword); 11069 else if (flags & WF_ONECAP) 11070 /* Make the first letter upper-case */ 11071 onecap_copy(fword, cword, TRUE); 11072 else 11073 /* Use goodword as-is. */ 11074 STRCPY(cword, fword); 11075 } 11076 11077 /* 11078 * Use map string "map" for languages "lp". 11079 */ 11080 static void 11081 set_map_str(lp, map) 11082 slang_T *lp; 11083 char_u *map; 11084 { 11085 char_u *p; 11086 int headc = 0; 11087 int c; 11088 int i; 11089 11090 if (*map == NUL) 11091 { 11092 lp->sl_has_map = FALSE; 11093 return; 11094 } 11095 lp->sl_has_map = TRUE; 11096 11097 /* Init the array and hash table empty. */ 11098 for (i = 0; i < 256; ++i) 11099 lp->sl_map_array[i] = 0; 11100 #ifdef FEAT_MBYTE 11101 hash_init(&lp->sl_map_hash); 11102 #endif 11103 11104 /* 11105 * The similar characters are stored separated with slashes: 11106 * "aaa/bbb/ccc/". Fill sl_map_array[c] with the character before c and 11107 * before the same slash. For characters above 255 sl_map_hash is used. 11108 */ 11109 for (p = map; *p != NUL; ) 11110 { 11111 #ifdef FEAT_MBYTE 11112 c = mb_cptr2char_adv(&p); 11113 #else 11114 c = *p++; 11115 #endif 11116 if (c == '/') 11117 headc = 0; 11118 else 11119 { 11120 if (headc == 0) 11121 headc = c; 11122 11123 #ifdef FEAT_MBYTE 11124 /* Characters above 255 don't fit in sl_map_array[], put them in 11125 * the hash table. Each entry is the char, a NUL the headchar and 11126 * a NUL. */ 11127 if (c >= 256) 11128 { 11129 int cl = mb_char2len(c); 11130 int headcl = mb_char2len(headc); 11131 char_u *b; 11132 hash_T hash; 11133 hashitem_T *hi; 11134 11135 b = alloc((unsigned)(cl + headcl + 2)); 11136 if (b == NULL) 11137 return; 11138 mb_char2bytes(c, b); 11139 b[cl] = NUL; 11140 mb_char2bytes(headc, b + cl + 1); 11141 b[cl + 1 + headcl] = NUL; 11142 hash = hash_hash(b); 11143 hi = hash_lookup(&lp->sl_map_hash, b, hash); 11144 if (HASHITEM_EMPTY(hi)) 11145 hash_add_item(&lp->sl_map_hash, hi, b, hash); 11146 else 11147 { 11148 /* This should have been checked when generating the .spl 11149 * file. */ 11150 EMSG(_("E999: duplicate char in MAP entry")); 11151 vim_free(b); 11152 } 11153 } 11154 else 11155 #endif 11156 lp->sl_map_array[c] = headc; 11157 } 11158 } 11159 } 11160 11161 /* 11162 * Return TRUE if "c1" and "c2" are similar characters according to the MAP 11163 * lines in the .aff file. 11164 */ 11165 static int 11166 similar_chars(slang, c1, c2) 11167 slang_T *slang; 11168 int c1; 11169 int c2; 11170 { 11171 int m1, m2; 11172 #ifdef FEAT_MBYTE 11173 char_u buf[MB_MAXBYTES]; 11174 hashitem_T *hi; 11175 11176 if (c1 >= 256) 11177 { 11178 buf[mb_char2bytes(c1, buf)] = 0; 11179 hi = hash_find(&slang->sl_map_hash, buf); 11180 if (HASHITEM_EMPTY(hi)) 11181 m1 = 0; 11182 else 11183 m1 = mb_ptr2char(hi->hi_key + STRLEN(hi->hi_key) + 1); 11184 } 11185 else 11186 #endif 11187 m1 = slang->sl_map_array[c1]; 11188 if (m1 == 0) 11189 return FALSE; 11190 11191 11192 #ifdef FEAT_MBYTE 11193 if (c2 >= 256) 11194 { 11195 buf[mb_char2bytes(c2, buf)] = 0; 11196 hi = hash_find(&slang->sl_map_hash, buf); 11197 if (HASHITEM_EMPTY(hi)) 11198 m2 = 0; 11199 else 11200 m2 = mb_ptr2char(hi->hi_key + STRLEN(hi->hi_key) + 1); 11201 } 11202 else 11203 #endif 11204 m2 = slang->sl_map_array[c2]; 11205 11206 return m1 == m2; 11207 } 11208 11209 /* 11210 * Add a suggestion to the list of suggestions. 11211 * Do not add a duplicate suggestion or suggestions with a bad score. 11212 * When "use_score" is not zero it's used, otherwise the score is computed 11213 * with spell_edit_score(). 11214 */ 11215 static void 11216 add_suggestion(su, gap, goodword, badlenarg, score, altscore, had_bonus, slang) 11217 suginfo_T *su; 11218 garray_T *gap; 11219 char_u *goodword; 11220 int badlenarg; /* len of bad word replaced with "goodword" */ 11221 int score; 11222 int altscore; 11223 int had_bonus; /* value for st_had_bonus */ 11224 slang_T *slang; /* language for sound folding */ 11225 { 11226 int goodlen = STRLEN(goodword); /* len of goodword changed */ 11227 int badlen = badlenarg; /* len of bad word changed */ 11228 suggest_T *stp; 11229 suggest_T new_sug; 11230 int i; 11231 hlf_T attr = HLF_COUNT; 11232 char_u longword[MAXWLEN + 1]; 11233 char_u *pgood, *pbad; 11234 11235 /* Check that the word really is valid. Esp. for banned words and for 11236 * split words, such as "the the". Need to append what follows to check 11237 * for that. */ 11238 STRCPY(longword, goodword); 11239 vim_strncpy(longword + goodlen, su->su_badptr + badlen, MAXWLEN - goodlen); 11240 (void)spell_check(curwin, longword, &attr, NULL); 11241 if (attr != HLF_COUNT) 11242 return; 11243 11244 /* Minimize "badlen" for consistency. Avoids that changing "the the" to 11245 * "thee the" is added next to changing the first "the" the "thee". */ 11246 pgood = goodword + STRLEN(goodword); 11247 pbad = su->su_badptr + badlen; 11248 while (pgood > goodword && pbad > su->su_badptr) 11249 { 11250 mb_ptr_back(goodword, pgood); 11251 mb_ptr_back(su->su_badptr, pbad); 11252 #ifdef FEAT_MBYTE 11253 if (has_mbyte) 11254 { 11255 if (mb_ptr2char(pgood) != mb_ptr2char(pbad)) 11256 break; 11257 } 11258 else 11259 #endif 11260 if (*pgood != *pbad) 11261 break; 11262 badlen = pbad - su->su_badptr; 11263 goodlen = pgood - goodword; 11264 } 11265 if (badlen == 0 && goodlen == 0) 11266 /* goodword doesn't change anything; may happen for "the the" changing 11267 * the first "the" to itself. */ 11268 return; 11269 11270 if (score <= su->su_maxscore) 11271 { 11272 /* Check if the word is already there. Also check the length that is 11273 * being replaced "thes," -> "these" is a different suggestion from 11274 * "thes" -> "these". */ 11275 stp = &SUG(*gap, 0); 11276 for (i = gap->ga_len - 1; i >= 0; --i) 11277 if ((int)STRLEN(stp[i].st_word) == goodlen 11278 && STRNCMP(stp[i].st_word, goodword, goodlen) == 0 11279 && stp[i].st_orglen == badlen) 11280 { 11281 /* 11282 * Found it. Remember the word with the lowest score. 11283 */ 11284 if (stp[i].st_slang == NULL) 11285 stp[i].st_slang = slang; 11286 11287 new_sug.st_score = score; 11288 new_sug.st_altscore = altscore; 11289 new_sug.st_had_bonus = had_bonus; 11290 11291 if (stp[i].st_had_bonus != had_bonus) 11292 { 11293 /* Only one of the two had the soundalike score computed. 11294 * Need to do that for the other one now, otherwise the 11295 * scores can't be compared. This happens because 11296 * suggest_try_change() doesn't compute the soundalike 11297 * word to keep it fast, while some special methods set 11298 * the soundalike score to zero. */ 11299 if (had_bonus) 11300 rescore_one(su, &stp[i]); 11301 else 11302 { 11303 new_sug.st_word = goodword; 11304 new_sug.st_slang = stp[i].st_slang; 11305 new_sug.st_orglen = badlen; 11306 rescore_one(su, &new_sug); 11307 } 11308 } 11309 11310 if (stp[i].st_score > new_sug.st_score) 11311 { 11312 stp[i].st_score = new_sug.st_score; 11313 stp[i].st_altscore = new_sug.st_altscore; 11314 stp[i].st_had_bonus = new_sug.st_had_bonus; 11315 } 11316 break; 11317 } 11318 11319 if (i < 0 && ga_grow(gap, 1) == OK) 11320 { 11321 /* Add a suggestion. */ 11322 stp = &SUG(*gap, gap->ga_len); 11323 stp->st_word = vim_strnsave(goodword, goodlen); 11324 if (stp->st_word != NULL) 11325 { 11326 stp->st_score = score; 11327 stp->st_altscore = altscore; 11328 stp->st_had_bonus = had_bonus; 11329 stp->st_orglen = badlen; 11330 stp->st_slang = slang; 11331 ++gap->ga_len; 11332 11333 /* If we have too many suggestions now, sort the list and keep 11334 * the best suggestions. */ 11335 if (gap->ga_len > SUG_MAX_COUNT(su)) 11336 su->su_maxscore = cleanup_suggestions(gap, su->su_maxscore, 11337 SUG_CLEAN_COUNT(su)); 11338 } 11339 } 11340 } 11341 } 11342 11343 /* 11344 * Add a word to be banned. 11345 */ 11346 static void 11347 add_banned(su, word) 11348 suginfo_T *su; 11349 char_u *word; 11350 { 11351 char_u *s = vim_strsave(word); 11352 hash_T hash; 11353 hashitem_T *hi; 11354 11355 if (s != NULL) 11356 { 11357 hash = hash_hash(s); 11358 hi = hash_lookup(&su->su_banned, s, hash); 11359 if (HASHITEM_EMPTY(hi)) 11360 hash_add_item(&su->su_banned, hi, s, hash); 11361 else 11362 vim_free(s); 11363 } 11364 } 11365 11366 /* 11367 * Return TRUE if a word appears in the list of banned words. 11368 */ 11369 static int 11370 was_banned(su, word) 11371 suginfo_T *su; 11372 char_u *word; 11373 { 11374 hashitem_T *hi = hash_find(&su->su_banned, word); 11375 11376 return !HASHITEM_EMPTY(hi); 11377 } 11378 11379 /* 11380 * Free the banned words in "su". 11381 */ 11382 static void 11383 free_banned(su) 11384 suginfo_T *su; 11385 { 11386 int todo; 11387 hashitem_T *hi; 11388 11389 todo = su->su_banned.ht_used; 11390 for (hi = su->su_banned.ht_array; todo > 0; ++hi) 11391 { 11392 if (!HASHITEM_EMPTY(hi)) 11393 { 11394 vim_free(hi->hi_key); 11395 --todo; 11396 } 11397 } 11398 hash_clear(&su->su_banned); 11399 } 11400 11401 /* 11402 * Recompute the score for all suggestions if sound-folding is possible. This 11403 * is slow, thus only done for the final results. 11404 */ 11405 static void 11406 rescore_suggestions(su) 11407 suginfo_T *su; 11408 { 11409 int i; 11410 11411 if (su->su_sallang != NULL) 11412 for (i = 0; i < su->su_ga.ga_len; ++i) 11413 rescore_one(su, &SUG(su->su_ga, i)); 11414 } 11415 11416 /* 11417 * Recompute the score for one suggestion if sound-folding is possible. 11418 */ 11419 static void 11420 rescore_one(su, stp) 11421 suginfo_T *su; 11422 suggest_T *stp; 11423 { 11424 slang_T *slang = stp->st_slang; 11425 char_u sal_badword[MAXWLEN]; 11426 char_u *p; 11427 11428 /* Only rescore suggestions that have no sal score yet and do have a 11429 * language. */ 11430 if (slang != NULL && slang->sl_sal.ga_len > 0 && !stp->st_had_bonus) 11431 { 11432 if (slang == su->su_sallang) 11433 p = su->su_sal_badword; 11434 else 11435 { 11436 spell_soundfold(slang, su->su_fbadword, TRUE, sal_badword); 11437 p = sal_badword; 11438 } 11439 11440 stp->st_altscore = stp_sal_score(stp, su, slang, p); 11441 if (stp->st_altscore == SCORE_MAXMAX) 11442 stp->st_altscore = SCORE_BIG; 11443 stp->st_score = RESCORE(stp->st_score, stp->st_altscore); 11444 stp->st_had_bonus = TRUE; 11445 } 11446 } 11447 11448 static int 11449 #ifdef __BORLANDC__ 11450 _RTLENTRYF 11451 #endif 11452 sug_compare __ARGS((const void *s1, const void *s2)); 11453 11454 /* 11455 * Function given to qsort() to sort the suggestions on st_score. 11456 * First on "st_score", then "st_altscore" then alphabetically. 11457 */ 11458 static int 11459 #ifdef __BORLANDC__ 11460 _RTLENTRYF 11461 #endif 11462 sug_compare(s1, s2) 11463 const void *s1; 11464 const void *s2; 11465 { 11466 suggest_T *p1 = (suggest_T *)s1; 11467 suggest_T *p2 = (suggest_T *)s2; 11468 int n = p1->st_score - p2->st_score; 11469 11470 if (n == 0) 11471 { 11472 n = p1->st_altscore - p2->st_altscore; 11473 if (n == 0) 11474 n = STRICMP(p1->st_word, p2->st_word); 11475 } 11476 return n; 11477 } 11478 11479 /* 11480 * Cleanup the suggestions: 11481 * - Sort on score. 11482 * - Remove words that won't be displayed. 11483 * Returns the maximum score in the list or "maxscore" unmodified. 11484 */ 11485 static int 11486 cleanup_suggestions(gap, maxscore, keep) 11487 garray_T *gap; 11488 int maxscore; 11489 int keep; /* nr of suggestions to keep */ 11490 { 11491 suggest_T *stp = &SUG(*gap, 0); 11492 int i; 11493 11494 /* Sort the list. */ 11495 qsort(gap->ga_data, (size_t)gap->ga_len, sizeof(suggest_T), sug_compare); 11496 11497 /* Truncate the list to the number of suggestions that will be displayed. */ 11498 if (gap->ga_len > keep) 11499 { 11500 for (i = keep; i < gap->ga_len; ++i) 11501 vim_free(stp[i].st_word); 11502 gap->ga_len = keep; 11503 return stp[keep - 1].st_score; 11504 } 11505 return maxscore; 11506 } 11507 11508 #if defined(FEAT_EVAL) || defined(PROTO) 11509 /* 11510 * Soundfold a string, for soundfold(). 11511 * Result is in allocated memory, NULL for an error. 11512 */ 11513 char_u * 11514 eval_soundfold(word) 11515 char_u *word; 11516 { 11517 langp_T *lp; 11518 char_u sound[MAXWLEN]; 11519 int lpi; 11520 11521 if (curwin->w_p_spell && *curbuf->b_p_spl != NUL) 11522 /* Use the sound-folding of the first language that supports it. */ 11523 for (lpi = 0; lpi < curbuf->b_langp.ga_len; ++lpi) 11524 { 11525 lp = LANGP_ENTRY(curbuf->b_langp, lpi); 11526 if (lp->lp_slang->sl_sal.ga_len > 0) 11527 { 11528 /* soundfold the word */ 11529 spell_soundfold(lp->lp_slang, word, FALSE, sound); 11530 return vim_strsave(sound); 11531 } 11532 } 11533 11534 /* No language with sound folding, return word as-is. */ 11535 return vim_strsave(word); 11536 } 11537 #endif 11538 11539 /* 11540 * Turn "inword" into its sound-a-like equivalent in "res[MAXWLEN]". 11541 * 11542 * There are many ways to turn a word into a sound-a-like representation. The 11543 * oldest is Soundex (1918!). A nice overview can be found in "Approximate 11544 * swedish name matching - survey and test of different algorithms" by Klas 11545 * Erikson. 11546 * 11547 * We support two methods: 11548 * 1. SOFOFROM/SOFOTO do a simple character mapping. 11549 * 2. SAL items define a more advanced sound-folding (and much slower). 11550 */ 11551 static void 11552 spell_soundfold(slang, inword, folded, res) 11553 slang_T *slang; 11554 char_u *inword; 11555 int folded; /* "inword" is already case-folded */ 11556 char_u *res; 11557 { 11558 char_u fword[MAXWLEN]; 11559 char_u *word; 11560 11561 if (slang->sl_sofo) 11562 /* SOFOFROM and SOFOTO used */ 11563 spell_soundfold_sofo(slang, inword, res); 11564 else 11565 { 11566 /* SAL items used. Requires the word to be case-folded. */ 11567 if (folded) 11568 word = inword; 11569 else 11570 { 11571 (void)spell_casefold(inword, STRLEN(inword), fword, MAXWLEN); 11572 word = fword; 11573 } 11574 11575 #ifdef FEAT_MBYTE 11576 if (has_mbyte) 11577 spell_soundfold_wsal(slang, word, res); 11578 else 11579 #endif 11580 spell_soundfold_sal(slang, word, res); 11581 } 11582 } 11583 11584 /* 11585 * Perform sound folding of "inword" into "res" according to SOFOFROM and 11586 * SOFOTO lines. 11587 */ 11588 static void 11589 spell_soundfold_sofo(slang, inword, res) 11590 slang_T *slang; 11591 char_u *inword; 11592 char_u *res; 11593 { 11594 char_u *s; 11595 int ri = 0; 11596 int c; 11597 11598 #ifdef FEAT_MBYTE 11599 if (has_mbyte) 11600 { 11601 int prevc = 0; 11602 int *ip; 11603 11604 /* The sl_sal_first[] table contains the translation for chars up to 11605 * 255, sl_sal the rest. */ 11606 for (s = inword; *s != NUL; ) 11607 { 11608 c = mb_cptr2char_adv(&s); 11609 if (enc_utf8 ? utf_class(c) == 0 : vim_iswhite(c)) 11610 c = ' '; 11611 else if (c < 256) 11612 c = slang->sl_sal_first[c]; 11613 else 11614 { 11615 ip = ((int **)slang->sl_sal.ga_data)[c & 0xff]; 11616 if (ip == NULL) /* empty list, can't match */ 11617 c = NUL; 11618 else 11619 for (;;) /* find "c" in the list */ 11620 { 11621 if (*ip == 0) /* not found */ 11622 { 11623 c = NUL; 11624 break; 11625 } 11626 if (*ip == c) /* match! */ 11627 { 11628 c = ip[1]; 11629 break; 11630 } 11631 ip += 2; 11632 } 11633 } 11634 11635 if (c != NUL && c != prevc) 11636 { 11637 ri += mb_char2bytes(c, res + ri); 11638 if (ri + MB_MAXBYTES > MAXWLEN) 11639 break; 11640 prevc = c; 11641 } 11642 } 11643 } 11644 else 11645 #endif 11646 { 11647 /* The sl_sal_first[] table contains the translation. */ 11648 for (s = inword; (c = *s) != NUL; ++s) 11649 { 11650 if (vim_iswhite(c)) 11651 c = ' '; 11652 else 11653 c = slang->sl_sal_first[c]; 11654 if (c != NUL && (ri == 0 || res[ri - 1] != c)) 11655 res[ri++] = c; 11656 } 11657 } 11658 11659 res[ri] = NUL; 11660 } 11661 11662 static void 11663 spell_soundfold_sal(slang, inword, res) 11664 slang_T *slang; 11665 char_u *inword; 11666 char_u *res; 11667 { 11668 salitem_T *smp; 11669 char_u word[MAXWLEN]; 11670 char_u *s = inword; 11671 char_u *t; 11672 char_u *pf; 11673 int i, j, z; 11674 int reslen; 11675 int n, k = 0; 11676 int z0; 11677 int k0; 11678 int n0; 11679 int c; 11680 int pri; 11681 int p0 = -333; 11682 int c0; 11683 11684 /* Remove accents, if wanted. We actually remove all non-word characters. 11685 * But keep white space. We need a copy, the word may be changed here. */ 11686 if (slang->sl_rem_accents) 11687 { 11688 t = word; 11689 while (*s != NUL) 11690 { 11691 if (vim_iswhite(*s)) 11692 { 11693 *t++ = ' '; 11694 s = skipwhite(s); 11695 } 11696 else 11697 { 11698 if (spell_iswordp_nmw(s)) 11699 *t++ = *s; 11700 ++s; 11701 } 11702 } 11703 *t = NUL; 11704 } 11705 else 11706 STRCPY(word, s); 11707 11708 smp = (salitem_T *)slang->sl_sal.ga_data; 11709 11710 /* 11711 * This comes from Aspell phonet.cpp. Converted from C++ to C. 11712 * Changed to keep spaces. 11713 */ 11714 i = reslen = z = 0; 11715 while ((c = word[i]) != NUL) 11716 { 11717 /* Start with the first rule that has the character in the word. */ 11718 n = slang->sl_sal_first[c]; 11719 z0 = 0; 11720 11721 if (n >= 0) 11722 { 11723 /* check all rules for the same letter */ 11724 for (; (s = smp[n].sm_lead)[0] == c; ++n) 11725 { 11726 /* Quickly skip entries that don't match the word. Most 11727 * entries are less then three chars, optimize for that. */ 11728 k = smp[n].sm_leadlen; 11729 if (k > 1) 11730 { 11731 if (word[i + 1] != s[1]) 11732 continue; 11733 if (k > 2) 11734 { 11735 for (j = 2; j < k; ++j) 11736 if (word[i + j] != s[j]) 11737 break; 11738 if (j < k) 11739 continue; 11740 } 11741 } 11742 11743 if ((pf = smp[n].sm_oneof) != NULL) 11744 { 11745 /* Check for match with one of the chars in "sm_oneof". */ 11746 while (*pf != NUL && *pf != word[i + k]) 11747 ++pf; 11748 if (*pf == NUL) 11749 continue; 11750 ++k; 11751 } 11752 s = smp[n].sm_rules; 11753 pri = 5; /* default priority */ 11754 11755 p0 = *s; 11756 k0 = k; 11757 while (*s == '-' && k > 1) 11758 { 11759 k--; 11760 s++; 11761 } 11762 if (*s == '<') 11763 s++; 11764 if (VIM_ISDIGIT(*s)) 11765 { 11766 /* determine priority */ 11767 pri = *s - '0'; 11768 s++; 11769 } 11770 if (*s == '^' && *(s + 1) == '^') 11771 s++; 11772 11773 if (*s == NUL 11774 || (*s == '^' 11775 && (i == 0 || !(word[i - 1] == ' ' 11776 || spell_iswordp(word + i - 1, curbuf))) 11777 && (*(s + 1) != '$' 11778 || (!spell_iswordp(word + i + k0, curbuf)))) 11779 || (*s == '$' && i > 0 11780 && spell_iswordp(word + i - 1, curbuf) 11781 && (!spell_iswordp(word + i + k0, curbuf)))) 11782 { 11783 /* search for followup rules, if: */ 11784 /* followup and k > 1 and NO '-' in searchstring */ 11785 c0 = word[i + k - 1]; 11786 n0 = slang->sl_sal_first[c0]; 11787 11788 if (slang->sl_followup && k > 1 && n0 >= 0 11789 && p0 != '-' && word[i + k] != NUL) 11790 { 11791 /* test follow-up rule for "word[i + k]" */ 11792 for ( ; (s = smp[n0].sm_lead)[0] == c0; ++n0) 11793 { 11794 /* Quickly skip entries that don't match the word. 11795 * */ 11796 k0 = smp[n0].sm_leadlen; 11797 if (k0 > 1) 11798 { 11799 if (word[i + k] != s[1]) 11800 continue; 11801 if (k0 > 2) 11802 { 11803 pf = word + i + k + 1; 11804 for (j = 2; j < k0; ++j) 11805 if (*pf++ != s[j]) 11806 break; 11807 if (j < k0) 11808 continue; 11809 } 11810 } 11811 k0 += k - 1; 11812 11813 if ((pf = smp[n0].sm_oneof) != NULL) 11814 { 11815 /* Check for match with one of the chars in 11816 * "sm_oneof". */ 11817 while (*pf != NUL && *pf != word[i + k0]) 11818 ++pf; 11819 if (*pf == NUL) 11820 continue; 11821 ++k0; 11822 } 11823 11824 p0 = 5; 11825 s = smp[n0].sm_rules; 11826 while (*s == '-') 11827 { 11828 /* "k0" gets NOT reduced because 11829 * "if (k0 == k)" */ 11830 s++; 11831 } 11832 if (*s == '<') 11833 s++; 11834 if (VIM_ISDIGIT(*s)) 11835 { 11836 p0 = *s - '0'; 11837 s++; 11838 } 11839 11840 if (*s == NUL 11841 /* *s == '^' cuts */ 11842 || (*s == '$' 11843 && !spell_iswordp(word + i + k0, 11844 curbuf))) 11845 { 11846 if (k0 == k) 11847 /* this is just a piece of the string */ 11848 continue; 11849 11850 if (p0 < pri) 11851 /* priority too low */ 11852 continue; 11853 /* rule fits; stop search */ 11854 break; 11855 } 11856 } 11857 11858 if (p0 >= pri && smp[n0].sm_lead[0] == c0) 11859 continue; 11860 } 11861 11862 /* replace string */ 11863 s = smp[n].sm_to; 11864 if (s == NULL) 11865 s = (char_u *)""; 11866 pf = smp[n].sm_rules; 11867 p0 = (vim_strchr(pf, '<') != NULL) ? 1 : 0; 11868 if (p0 == 1 && z == 0) 11869 { 11870 /* rule with '<' is used */ 11871 if (reslen > 0 && *s != NUL && (res[reslen - 1] == c 11872 || res[reslen - 1] == *s)) 11873 reslen--; 11874 z0 = 1; 11875 z = 1; 11876 k0 = 0; 11877 while (*s != NUL && word[i + k0] != NUL) 11878 { 11879 word[i + k0] = *s; 11880 k0++; 11881 s++; 11882 } 11883 if (k > k0) 11884 mch_memmove(word + i + k0, word + i + k, 11885 STRLEN(word + i + k) + 1); 11886 11887 /* new "actual letter" */ 11888 c = word[i]; 11889 } 11890 else 11891 { 11892 /* no '<' rule used */ 11893 i += k - 1; 11894 z = 0; 11895 while (*s != NUL && s[1] != NUL && reslen < MAXWLEN) 11896 { 11897 if (reslen == 0 || res[reslen - 1] != *s) 11898 res[reslen++] = *s; 11899 s++; 11900 } 11901 /* new "actual letter" */ 11902 c = *s; 11903 if (strstr((char *)pf, "^^") != NULL) 11904 { 11905 if (c != NUL) 11906 res[reslen++] = c; 11907 mch_memmove(word, word + i + 1, 11908 STRLEN(word + i + 1) + 1); 11909 i = 0; 11910 z0 = 1; 11911 } 11912 } 11913 break; 11914 } 11915 } 11916 } 11917 else if (vim_iswhite(c)) 11918 { 11919 c = ' '; 11920 k = 1; 11921 } 11922 11923 if (z0 == 0) 11924 { 11925 if (k && !p0 && reslen < MAXWLEN && c != NUL 11926 && (!slang->sl_collapse || reslen == 0 11927 || res[reslen - 1] != c)) 11928 /* condense only double letters */ 11929 res[reslen++] = c; 11930 11931 i++; 11932 z = 0; 11933 k = 0; 11934 } 11935 } 11936 11937 res[reslen] = NUL; 11938 } 11939 11940 #ifdef FEAT_MBYTE 11941 /* 11942 * Turn "inword" into its sound-a-like equivalent in "res[MAXWLEN]". 11943 * Multi-byte version of spell_soundfold(). 11944 */ 11945 static void 11946 spell_soundfold_wsal(slang, inword, res) 11947 slang_T *slang; 11948 char_u *inword; 11949 char_u *res; 11950 { 11951 salitem_T *smp = (salitem_T *)slang->sl_sal.ga_data; 11952 int word[MAXWLEN]; 11953 int wres[MAXWLEN]; 11954 int l; 11955 char_u *s; 11956 int *ws; 11957 char_u *t; 11958 int *pf; 11959 int i, j, z; 11960 int reslen; 11961 int n, k = 0; 11962 int z0; 11963 int k0; 11964 int n0; 11965 int c; 11966 int pri; 11967 int p0 = -333; 11968 int c0; 11969 int did_white = FALSE; 11970 11971 /* 11972 * Convert the multi-byte string to a wide-character string. 11973 * Remove accents, if wanted. We actually remove all non-word characters. 11974 * But keep white space. 11975 */ 11976 n = 0; 11977 for (s = inword; *s != NUL; ) 11978 { 11979 t = s; 11980 c = mb_cptr2char_adv(&s); 11981 if (slang->sl_rem_accents) 11982 { 11983 if (enc_utf8 ? utf_class(c) == 0 : vim_iswhite(c)) 11984 { 11985 if (did_white) 11986 continue; 11987 c = ' '; 11988 did_white = TRUE; 11989 } 11990 else 11991 { 11992 did_white = FALSE; 11993 if (!spell_iswordp_nmw(t)) 11994 continue; 11995 } 11996 } 11997 word[n++] = c; 11998 } 11999 word[n] = NUL; 12000 12001 /* 12002 * This comes from Aspell phonet.cpp. 12003 * Converted from C++ to C. Added support for multi-byte chars. 12004 * Changed to keep spaces. 12005 */ 12006 i = reslen = z = 0; 12007 while ((c = word[i]) != NUL) 12008 { 12009 /* Start with the first rule that has the character in the word. */ 12010 n = slang->sl_sal_first[c & 0xff]; 12011 z0 = 0; 12012 12013 if (n >= 0) 12014 { 12015 /* check all rules for the same index byte */ 12016 for (; ((ws = smp[n].sm_lead_w)[0] & 0xff) == (c & 0xff); ++n) 12017 { 12018 /* Quickly skip entries that don't match the word. Most 12019 * entries are less then three chars, optimize for that. */ 12020 if (c != ws[0]) 12021 continue; 12022 k = smp[n].sm_leadlen; 12023 if (k > 1) 12024 { 12025 if (word[i + 1] != ws[1]) 12026 continue; 12027 if (k > 2) 12028 { 12029 for (j = 2; j < k; ++j) 12030 if (word[i + j] != ws[j]) 12031 break; 12032 if (j < k) 12033 continue; 12034 } 12035 } 12036 12037 if ((pf = smp[n].sm_oneof_w) != NULL) 12038 { 12039 /* Check for match with one of the chars in "sm_oneof". */ 12040 while (*pf != NUL && *pf != word[i + k]) 12041 ++pf; 12042 if (*pf == NUL) 12043 continue; 12044 ++k; 12045 } 12046 s = smp[n].sm_rules; 12047 pri = 5; /* default priority */ 12048 12049 p0 = *s; 12050 k0 = k; 12051 while (*s == '-' && k > 1) 12052 { 12053 k--; 12054 s++; 12055 } 12056 if (*s == '<') 12057 s++; 12058 if (VIM_ISDIGIT(*s)) 12059 { 12060 /* determine priority */ 12061 pri = *s - '0'; 12062 s++; 12063 } 12064 if (*s == '^' && *(s + 1) == '^') 12065 s++; 12066 12067 if (*s == NUL 12068 || (*s == '^' 12069 && (i == 0 || !(word[i - 1] == ' ' 12070 || spell_iswordp_w(word + i - 1, curbuf))) 12071 && (*(s + 1) != '$' 12072 || (!spell_iswordp_w(word + i + k0, curbuf)))) 12073 || (*s == '$' && i > 0 12074 && spell_iswordp_w(word + i - 1, curbuf) 12075 && (!spell_iswordp_w(word + i + k0, curbuf)))) 12076 { 12077 /* search for followup rules, if: */ 12078 /* followup and k > 1 and NO '-' in searchstring */ 12079 c0 = word[i + k - 1]; 12080 n0 = slang->sl_sal_first[c0 & 0xff]; 12081 12082 if (slang->sl_followup && k > 1 && n0 >= 0 12083 && p0 != '-' && word[i + k] != NUL) 12084 { 12085 /* Test follow-up rule for "word[i + k]"; loop over 12086 * all entries with the same index byte. */ 12087 for ( ; ((ws = smp[n0].sm_lead_w)[0] & 0xff) 12088 == (c0 & 0xff); ++n0) 12089 { 12090 /* Quickly skip entries that don't match the word. 12091 */ 12092 if (c0 != ws[0]) 12093 continue; 12094 k0 = smp[n0].sm_leadlen; 12095 if (k0 > 1) 12096 { 12097 if (word[i + k] != ws[1]) 12098 continue; 12099 if (k0 > 2) 12100 { 12101 pf = word + i + k + 1; 12102 for (j = 2; j < k0; ++j) 12103 if (*pf++ != ws[j]) 12104 break; 12105 if (j < k0) 12106 continue; 12107 } 12108 } 12109 k0 += k - 1; 12110 12111 if ((pf = smp[n0].sm_oneof_w) != NULL) 12112 { 12113 /* Check for match with one of the chars in 12114 * "sm_oneof". */ 12115 while (*pf != NUL && *pf != word[i + k0]) 12116 ++pf; 12117 if (*pf == NUL) 12118 continue; 12119 ++k0; 12120 } 12121 12122 p0 = 5; 12123 s = smp[n0].sm_rules; 12124 while (*s == '-') 12125 { 12126 /* "k0" gets NOT reduced because 12127 * "if (k0 == k)" */ 12128 s++; 12129 } 12130 if (*s == '<') 12131 s++; 12132 if (VIM_ISDIGIT(*s)) 12133 { 12134 p0 = *s - '0'; 12135 s++; 12136 } 12137 12138 if (*s == NUL 12139 /* *s == '^' cuts */ 12140 || (*s == '$' 12141 && !spell_iswordp_w(word + i + k0, 12142 curbuf))) 12143 { 12144 if (k0 == k) 12145 /* this is just a piece of the string */ 12146 continue; 12147 12148 if (p0 < pri) 12149 /* priority too low */ 12150 continue; 12151 /* rule fits; stop search */ 12152 break; 12153 } 12154 } 12155 12156 if (p0 >= pri && (smp[n0].sm_lead_w[0] & 0xff) 12157 == (c0 & 0xff)) 12158 continue; 12159 } 12160 12161 /* replace string */ 12162 ws = smp[n].sm_to_w; 12163 s = smp[n].sm_rules; 12164 p0 = (vim_strchr(s, '<') != NULL) ? 1 : 0; 12165 if (p0 == 1 && z == 0) 12166 { 12167 /* rule with '<' is used */ 12168 if (reslen > 0 && ws != NULL && *ws != NUL 12169 && (wres[reslen - 1] == c 12170 || wres[reslen - 1] == *ws)) 12171 reslen--; 12172 z0 = 1; 12173 z = 1; 12174 k0 = 0; 12175 if (ws != NULL) 12176 while (*ws != NUL && word[i + k0] != NUL) 12177 { 12178 word[i + k0] = *ws; 12179 k0++; 12180 ws++; 12181 } 12182 if (k > k0) 12183 mch_memmove(word + i + k0, word + i + k, 12184 sizeof(int) * (STRLEN(word + i + k) + 1)); 12185 12186 /* new "actual letter" */ 12187 c = word[i]; 12188 } 12189 else 12190 { 12191 /* no '<' rule used */ 12192 i += k - 1; 12193 z = 0; 12194 if (ws != NULL) 12195 while (*ws != NUL && ws[1] != NUL 12196 && reslen < MAXWLEN) 12197 { 12198 if (reslen == 0 || wres[reslen - 1] != *ws) 12199 wres[reslen++] = *ws; 12200 ws++; 12201 } 12202 /* new "actual letter" */ 12203 if (ws == NULL) 12204 c = NUL; 12205 else 12206 c = *ws; 12207 if (strstr((char *)s, "^^") != NULL) 12208 { 12209 if (c != NUL) 12210 wres[reslen++] = c; 12211 mch_memmove(word, word + i + 1, 12212 sizeof(int) * (STRLEN(word + i + 1) + 1)); 12213 i = 0; 12214 z0 = 1; 12215 } 12216 } 12217 break; 12218 } 12219 } 12220 } 12221 else if (vim_iswhite(c)) 12222 { 12223 c = ' '; 12224 k = 1; 12225 } 12226 12227 if (z0 == 0) 12228 { 12229 if (k && !p0 && reslen < MAXWLEN && c != NUL 12230 && (!slang->sl_collapse || reslen == 0 12231 || wres[reslen - 1] != c)) 12232 /* condense only double letters */ 12233 wres[reslen++] = c; 12234 12235 i++; 12236 z = 0; 12237 k = 0; 12238 } 12239 } 12240 12241 /* Convert wide characters in "wres" to a multi-byte string in "res". */ 12242 l = 0; 12243 for (n = 0; n < reslen; ++n) 12244 { 12245 l += mb_char2bytes(wres[n], res + l); 12246 if (l + MB_MAXBYTES > MAXWLEN) 12247 break; 12248 } 12249 res[l] = NUL; 12250 } 12251 #endif 12252 12253 /* 12254 * Compute a score for two sound-a-like words. 12255 * This permits up to two inserts/deletes/swaps/etc. to keep things fast. 12256 * Instead of a generic loop we write out the code. That keeps it fast by 12257 * avoiding checks that will not be possible. 12258 */ 12259 static int 12260 soundalike_score(goodstart, badstart) 12261 char_u *goodstart; /* sound-folded good word */ 12262 char_u *badstart; /* sound-folded bad word */ 12263 { 12264 char_u *goodsound = goodstart; 12265 char_u *badsound = badstart; 12266 int goodlen; 12267 int badlen; 12268 int n; 12269 char_u *pl, *ps; 12270 char_u *pl2, *ps2; 12271 int score = 0; 12272 12273 /* adding/inserting "*" at the start (word starts with vowel) shouldn't be 12274 * counted so much, vowels halfway the word aren't counted at all. */ 12275 if ((*badsound == '*' || *goodsound == '*') && *badsound != *goodsound) 12276 { 12277 score = SCORE_DEL / 2; 12278 if (*badsound == '*') 12279 ++badsound; 12280 else 12281 ++goodsound; 12282 } 12283 12284 goodlen = STRLEN(goodsound); 12285 badlen = STRLEN(badsound); 12286 12287 /* Return quickly if the lenghts are too different to be fixed by two 12288 * changes. */ 12289 n = goodlen - badlen; 12290 if (n < -2 || n > 2) 12291 return SCORE_MAXMAX; 12292 12293 if (n > 0) 12294 { 12295 pl = goodsound; /* goodsound is longest */ 12296 ps = badsound; 12297 } 12298 else 12299 { 12300 pl = badsound; /* badsound is longest */ 12301 ps = goodsound; 12302 } 12303 12304 /* Skip over the identical part. */ 12305 while (*pl == *ps && *pl != NUL) 12306 { 12307 ++pl; 12308 ++ps; 12309 } 12310 12311 switch (n) 12312 { 12313 case -2: 12314 case 2: 12315 /* 12316 * Must delete two characters from "pl". 12317 */ 12318 ++pl; /* first delete */ 12319 while (*pl == *ps) 12320 { 12321 ++pl; 12322 ++ps; 12323 } 12324 /* strings must be equal after second delete */ 12325 if (STRCMP(pl + 1, ps) == 0) 12326 return score + SCORE_DEL * 2; 12327 12328 /* Failed to compare. */ 12329 break; 12330 12331 case -1: 12332 case 1: 12333 /* 12334 * Minimal one delete from "pl" required. 12335 */ 12336 12337 /* 1: delete */ 12338 pl2 = pl + 1; 12339 ps2 = ps; 12340 while (*pl2 == *ps2) 12341 { 12342 if (*pl2 == NUL) /* reached the end */ 12343 return score + SCORE_DEL; 12344 ++pl2; 12345 ++ps2; 12346 } 12347 12348 /* 2: delete then swap, then rest must be equal */ 12349 if (pl2[0] == ps2[1] && pl2[1] == ps2[0] 12350 && STRCMP(pl2 + 2, ps2 + 2) == 0) 12351 return score + SCORE_DEL + SCORE_SWAP; 12352 12353 /* 3: delete then substitute, then the rest must be equal */ 12354 if (STRCMP(pl2 + 1, ps2 + 1) == 0) 12355 return score + SCORE_DEL + SCORE_SUBST; 12356 12357 /* 4: first swap then delete */ 12358 if (pl[0] == ps[1] && pl[1] == ps[0]) 12359 { 12360 pl2 = pl + 2; /* swap, skip two chars */ 12361 ps2 = ps + 2; 12362 while (*pl2 == *ps2) 12363 { 12364 ++pl2; 12365 ++ps2; 12366 } 12367 /* delete a char and then strings must be equal */ 12368 if (STRCMP(pl2 + 1, ps2) == 0) 12369 return score + SCORE_SWAP + SCORE_DEL; 12370 } 12371 12372 /* 5: first substitute then delete */ 12373 pl2 = pl + 1; /* substitute, skip one char */ 12374 ps2 = ps + 1; 12375 while (*pl2 == *ps2) 12376 { 12377 ++pl2; 12378 ++ps2; 12379 } 12380 /* delete a char and then strings must be equal */ 12381 if (STRCMP(pl2 + 1, ps2) == 0) 12382 return score + SCORE_SUBST + SCORE_DEL; 12383 12384 /* Failed to compare. */ 12385 break; 12386 12387 case 0: 12388 /* 12389 * Lenghts are equal, thus changes must result in same length: An 12390 * insert is only possible in combination with a delete. 12391 * 1: check if for identical strings 12392 */ 12393 if (*pl == NUL) 12394 return score; 12395 12396 /* 2: swap */ 12397 if (pl[0] == ps[1] && pl[1] == ps[0]) 12398 { 12399 pl2 = pl + 2; /* swap, skip two chars */ 12400 ps2 = ps + 2; 12401 while (*pl2 == *ps2) 12402 { 12403 if (*pl2 == NUL) /* reached the end */ 12404 return score + SCORE_SWAP; 12405 ++pl2; 12406 ++ps2; 12407 } 12408 /* 3: swap and swap again */ 12409 if (pl2[0] == ps2[1] && pl2[1] == ps2[0] 12410 && STRCMP(pl2 + 2, ps2 + 2) == 0) 12411 return score + SCORE_SWAP + SCORE_SWAP; 12412 12413 /* 4: swap and substitute */ 12414 if (STRCMP(pl2 + 1, ps2 + 1) == 0) 12415 return score + SCORE_SWAP + SCORE_SUBST; 12416 } 12417 12418 /* 5: substitute */ 12419 pl2 = pl + 1; 12420 ps2 = ps + 1; 12421 while (*pl2 == *ps2) 12422 { 12423 if (*pl2 == NUL) /* reached the end */ 12424 return score + SCORE_SUBST; 12425 ++pl2; 12426 ++ps2; 12427 } 12428 12429 /* 6: substitute and swap */ 12430 if (pl2[0] == ps2[1] && pl2[1] == ps2[0] 12431 && STRCMP(pl2 + 2, ps2 + 2) == 0) 12432 return score + SCORE_SUBST + SCORE_SWAP; 12433 12434 /* 7: substitute and substitute */ 12435 if (STRCMP(pl2 + 1, ps2 + 1) == 0) 12436 return score + SCORE_SUBST + SCORE_SUBST; 12437 12438 /* 8: insert then delete */ 12439 pl2 = pl; 12440 ps2 = ps + 1; 12441 while (*pl2 == *ps2) 12442 { 12443 ++pl2; 12444 ++ps2; 12445 } 12446 if (STRCMP(pl2 + 1, ps2) == 0) 12447 return score + SCORE_INS + SCORE_DEL; 12448 12449 /* 9: delete then insert */ 12450 pl2 = pl + 1; 12451 ps2 = ps; 12452 while (*pl2 == *ps2) 12453 { 12454 ++pl2; 12455 ++ps2; 12456 } 12457 if (STRCMP(pl2, ps2 + 1) == 0) 12458 return score + SCORE_INS + SCORE_DEL; 12459 12460 /* Failed to compare. */ 12461 break; 12462 } 12463 12464 return SCORE_MAXMAX; 12465 } 12466 12467 /* 12468 * Compute the "edit distance" to turn "badword" into "goodword". The less 12469 * deletes/inserts/substitutes/swaps are required the lower the score. 12470 * 12471 * The algorithm is described by Du and Chang, 1992. 12472 * The implementation of the algorithm comes from Aspell editdist.cpp, 12473 * edit_distance(). It has been converted from C++ to C and modified to 12474 * support multi-byte characters. 12475 */ 12476 static int 12477 spell_edit_score(badword, goodword) 12478 char_u *badword; 12479 char_u *goodword; 12480 { 12481 int *cnt; 12482 int badlen, goodlen; /* lenghts including NUL */ 12483 int j, i; 12484 int t; 12485 int bc, gc; 12486 int pbc, pgc; 12487 #ifdef FEAT_MBYTE 12488 char_u *p; 12489 int wbadword[MAXWLEN]; 12490 int wgoodword[MAXWLEN]; 12491 12492 if (has_mbyte) 12493 { 12494 /* Get the characters from the multi-byte strings and put them in an 12495 * int array for easy access. */ 12496 for (p = badword, badlen = 0; *p != NUL; ) 12497 wbadword[badlen++] = mb_cptr2char_adv(&p); 12498 wbadword[badlen++] = 0; 12499 for (p = goodword, goodlen = 0; *p != NUL; ) 12500 wgoodword[goodlen++] = mb_cptr2char_adv(&p); 12501 wgoodword[goodlen++] = 0; 12502 } 12503 else 12504 #endif 12505 { 12506 badlen = STRLEN(badword) + 1; 12507 goodlen = STRLEN(goodword) + 1; 12508 } 12509 12510 /* We use "cnt" as an array: CNT(badword_idx, goodword_idx). */ 12511 #define CNT(a, b) cnt[(a) + (b) * (badlen + 1)] 12512 cnt = (int *)lalloc((long_u)(sizeof(int) * (badlen + 1) * (goodlen + 1)), 12513 TRUE); 12514 if (cnt == NULL) 12515 return 0; /* out of memory */ 12516 12517 CNT(0, 0) = 0; 12518 for (j = 1; j <= goodlen; ++j) 12519 CNT(0, j) = CNT(0, j - 1) + SCORE_DEL; 12520 12521 for (i = 1; i <= badlen; ++i) 12522 { 12523 CNT(i, 0) = CNT(i - 1, 0) + SCORE_INS; 12524 for (j = 1; j <= goodlen; ++j) 12525 { 12526 #ifdef FEAT_MBYTE 12527 if (has_mbyte) 12528 { 12529 bc = wbadword[i - 1]; 12530 gc = wgoodword[j - 1]; 12531 } 12532 else 12533 #endif 12534 { 12535 bc = badword[i - 1]; 12536 gc = goodword[j - 1]; 12537 } 12538 if (bc == gc) 12539 CNT(i, j) = CNT(i - 1, j - 1); 12540 else 12541 { 12542 /* Use a better score when there is only a case difference. */ 12543 if (SPELL_TOFOLD(bc) == SPELL_TOFOLD(gc)) 12544 CNT(i, j) = SCORE_ICASE + CNT(i - 1, j - 1); 12545 else 12546 CNT(i, j) = SCORE_SUBST + CNT(i - 1, j - 1); 12547 12548 if (i > 1 && j > 1) 12549 { 12550 #ifdef FEAT_MBYTE 12551 if (has_mbyte) 12552 { 12553 pbc = wbadword[i - 2]; 12554 pgc = wgoodword[j - 2]; 12555 } 12556 else 12557 #endif 12558 { 12559 pbc = badword[i - 2]; 12560 pgc = goodword[j - 2]; 12561 } 12562 if (bc == pgc && pbc == gc) 12563 { 12564 t = SCORE_SWAP + CNT(i - 2, j - 2); 12565 if (t < CNT(i, j)) 12566 CNT(i, j) = t; 12567 } 12568 } 12569 t = SCORE_DEL + CNT(i - 1, j); 12570 if (t < CNT(i, j)) 12571 CNT(i, j) = t; 12572 t = SCORE_INS + CNT(i, j - 1); 12573 if (t < CNT(i, j)) 12574 CNT(i, j) = t; 12575 } 12576 } 12577 } 12578 12579 i = CNT(badlen - 1, goodlen - 1); 12580 vim_free(cnt); 12581 return i; 12582 } 12583 12584 /* 12585 * ":spelldump" 12586 */ 12587 /*ARGSUSED*/ 12588 void 12589 ex_spelldump(eap) 12590 exarg_T *eap; 12591 { 12592 buf_T *buf = curbuf; 12593 langp_T *lp; 12594 slang_T *slang; 12595 idx_T arridx[MAXWLEN]; 12596 int curi[MAXWLEN]; 12597 char_u word[MAXWLEN]; 12598 int c; 12599 char_u *byts; 12600 idx_T *idxs; 12601 linenr_T lnum = 0; 12602 int round; 12603 int depth; 12604 int n; 12605 int flags; 12606 char_u *region_names = NULL; /* region names being used */ 12607 int do_region = TRUE; /* dump region names and numbers */ 12608 char_u *p; 12609 int lpi; 12610 12611 if (no_spell_checking(curwin)) 12612 return; 12613 12614 /* Create a new empty buffer by splitting the window. */ 12615 do_cmdline_cmd((char_u *)"new"); 12616 if (!bufempty() || !buf_valid(buf)) 12617 return; 12618 12619 /* Find out if we can support regions: All languages must support the same 12620 * regions or none at all. */ 12621 for (lpi = 0; lpi < buf->b_langp.ga_len; ++lpi) 12622 { 12623 lp = LANGP_ENTRY(buf->b_langp, lpi); 12624 p = lp->lp_slang->sl_regions; 12625 if (p[0] != 0) 12626 { 12627 if (region_names == NULL) /* first language with regions */ 12628 region_names = p; 12629 else if (STRCMP(region_names, p) != 0) 12630 { 12631 do_region = FALSE; /* region names are different */ 12632 break; 12633 } 12634 } 12635 } 12636 12637 if (do_region && region_names != NULL) 12638 { 12639 vim_snprintf((char *)IObuff, IOSIZE, "/regions=%s", region_names); 12640 ml_append(lnum++, IObuff, (colnr_T)0, FALSE); 12641 } 12642 else 12643 do_region = FALSE; 12644 12645 /* 12646 * Loop over all files loaded for the entries in 'spelllang'. 12647 */ 12648 for (lpi = 0; lpi < buf->b_langp.ga_len; ++lpi) 12649 { 12650 lp = LANGP_ENTRY(buf->b_langp, lpi); 12651 slang = lp->lp_slang; 12652 if (slang->sl_fbyts == NULL) /* reloading failed */ 12653 continue; 12654 12655 vim_snprintf((char *)IObuff, IOSIZE, "# file: %s", slang->sl_fname); 12656 ml_append(lnum++, IObuff, (colnr_T)0, FALSE); 12657 12658 /* round 1: case-folded tree 12659 * round 2: keep-case tree */ 12660 for (round = 1; round <= 2; ++round) 12661 { 12662 if (round == 1) 12663 { 12664 byts = slang->sl_fbyts; 12665 idxs = slang->sl_fidxs; 12666 } 12667 else 12668 { 12669 byts = slang->sl_kbyts; 12670 idxs = slang->sl_kidxs; 12671 } 12672 if (byts == NULL) 12673 continue; /* array is empty */ 12674 12675 depth = 0; 12676 arridx[0] = 0; 12677 curi[0] = 1; 12678 while (depth >= 0 && !got_int) 12679 { 12680 if (curi[depth] > byts[arridx[depth]]) 12681 { 12682 /* Done all bytes at this node, go up one level. */ 12683 --depth; 12684 line_breakcheck(); 12685 } 12686 else 12687 { 12688 /* Do one more byte at this node. */ 12689 n = arridx[depth] + curi[depth]; 12690 ++curi[depth]; 12691 c = byts[n]; 12692 if (c == 0) 12693 { 12694 /* End of word, deal with the word. 12695 * Don't use keep-case words in the fold-case tree, 12696 * they will appear in the keep-case tree. 12697 * Only use the word when the region matches. */ 12698 flags = (int)idxs[n]; 12699 if ((round == 2 || (flags & WF_KEEPCAP) == 0) 12700 && (flags & WF_NEEDCOMP) == 0 12701 && (do_region 12702 || (flags & WF_REGION) == 0 12703 || (((unsigned)flags >> 16) 12704 & lp->lp_region) != 0)) 12705 { 12706 word[depth] = NUL; 12707 if (!do_region) 12708 flags &= ~WF_REGION; 12709 12710 /* Dump the basic word if there is no prefix or 12711 * when it's the first one. */ 12712 c = (unsigned)flags >> 24; 12713 if (c == 0 || curi[depth] == 2) 12714 dump_word(word, round, flags, lnum++); 12715 12716 /* Apply the prefix, if there is one. */ 12717 if (c != 0) 12718 lnum = dump_prefixes(slang, word, round, 12719 flags, lnum); 12720 } 12721 } 12722 else 12723 { 12724 /* Normal char, go one level deeper. */ 12725 word[depth++] = c; 12726 arridx[depth] = idxs[n]; 12727 curi[depth] = 1; 12728 } 12729 } 12730 } 12731 } 12732 } 12733 12734 /* Delete the empty line that we started with. */ 12735 if (curbuf->b_ml.ml_line_count > 1) 12736 ml_delete(curbuf->b_ml.ml_line_count, FALSE); 12737 12738 redraw_later(NOT_VALID); 12739 } 12740 12741 /* 12742 * Dump one word: apply case modifications and append a line to the buffer. 12743 */ 12744 static void 12745 dump_word(word, round, flags, lnum) 12746 char_u *word; 12747 int round; 12748 int flags; 12749 linenr_T lnum; 12750 { 12751 int keepcap = FALSE; 12752 char_u *p; 12753 char_u cword[MAXWLEN]; 12754 char_u badword[MAXWLEN + 10]; 12755 int i; 12756 12757 if (round == 1 && (flags & WF_CAPMASK) != 0) 12758 { 12759 /* Need to fix case according to "flags". */ 12760 make_case_word(word, cword, flags); 12761 p = cword; 12762 } 12763 else 12764 { 12765 p = word; 12766 if (round == 2 && ((captype(word, NULL) & WF_KEEPCAP) == 0 12767 || (flags & WF_FIXCAP) != 0)) 12768 keepcap = TRUE; 12769 } 12770 12771 /* Add flags and regions after a slash. */ 12772 if ((flags & (WF_BANNED | WF_RARE | WF_REGION)) || keepcap) 12773 { 12774 STRCPY(badword, p); 12775 STRCAT(badword, "/"); 12776 if (keepcap) 12777 STRCAT(badword, "="); 12778 if (flags & WF_BANNED) 12779 STRCAT(badword, "!"); 12780 else if (flags & WF_RARE) 12781 STRCAT(badword, "?"); 12782 if (flags & WF_REGION) 12783 for (i = 0; i < 7; ++i) 12784 if (flags & (0x10000 << i)) 12785 sprintf((char *)badword + STRLEN(badword), "%d", i + 1); 12786 p = badword; 12787 } 12788 12789 ml_append(lnum, p, (colnr_T)0, FALSE); 12790 } 12791 12792 /* 12793 * For ":spelldump": Find matching prefixes for "word". Prepend each to 12794 * "word" and append a line to the buffer. 12795 * Return the updated line number. 12796 */ 12797 static linenr_T 12798 dump_prefixes(slang, word, round, flags, startlnum) 12799 slang_T *slang; 12800 char_u *word; /* case-folded word */ 12801 int round; 12802 int flags; /* flags with prefix ID */ 12803 linenr_T startlnum; 12804 { 12805 idx_T arridx[MAXWLEN]; 12806 int curi[MAXWLEN]; 12807 char_u prefix[MAXWLEN]; 12808 char_u word_up[MAXWLEN]; 12809 int has_word_up = FALSE; 12810 int c; 12811 char_u *byts; 12812 idx_T *idxs; 12813 linenr_T lnum = startlnum; 12814 int depth; 12815 int n; 12816 int len; 12817 int i; 12818 12819 /* if the word starts with a lower-case letter make the word with an 12820 * upper-case letter in word_up[]. */ 12821 c = PTR2CHAR(word); 12822 if (SPELL_TOUPPER(c) != c) 12823 { 12824 onecap_copy(word, word_up, TRUE); 12825 has_word_up = TRUE; 12826 } 12827 12828 byts = slang->sl_pbyts; 12829 idxs = slang->sl_pidxs; 12830 if (byts != NULL) /* array not is empty */ 12831 { 12832 /* 12833 * Loop over all prefixes, building them byte-by-byte in prefix[]. 12834 * When at the end of a prefix check that it supports "flags". 12835 */ 12836 depth = 0; 12837 arridx[0] = 0; 12838 curi[0] = 1; 12839 while (depth >= 0 && !got_int) 12840 { 12841 n = arridx[depth]; 12842 len = byts[n]; 12843 if (curi[depth] > len) 12844 { 12845 /* Done all bytes at this node, go up one level. */ 12846 --depth; 12847 line_breakcheck(); 12848 } 12849 else 12850 { 12851 /* Do one more byte at this node. */ 12852 n += curi[depth]; 12853 ++curi[depth]; 12854 c = byts[n]; 12855 if (c == 0) 12856 { 12857 /* End of prefix, find out how many IDs there are. */ 12858 for (i = 1; i < len; ++i) 12859 if (byts[n + i] != 0) 12860 break; 12861 curi[depth] += i - 1; 12862 12863 c = valid_word_prefix(i, n, flags, word, slang, FALSE); 12864 if (c != 0) 12865 { 12866 vim_strncpy(prefix + depth, word, MAXWLEN - depth - 1); 12867 dump_word(prefix, round, 12868 (c & WF_RAREPFX) ? (flags | WF_RARE) 12869 : flags, lnum++); 12870 } 12871 12872 /* Check for prefix that matches the word when the 12873 * first letter is upper-case, but only if the prefix has 12874 * a condition. */ 12875 if (has_word_up) 12876 { 12877 c = valid_word_prefix(i, n, flags, word_up, slang, 12878 TRUE); 12879 if (c != 0) 12880 { 12881 vim_strncpy(prefix + depth, word_up, 12882 MAXWLEN - depth - 1); 12883 dump_word(prefix, round, 12884 (c & WF_RAREPFX) ? (flags | WF_RARE) 12885 : flags, lnum++); 12886 } 12887 } 12888 } 12889 else 12890 { 12891 /* Normal char, go one level deeper. */ 12892 prefix[depth++] = c; 12893 arridx[depth] = idxs[n]; 12894 curi[depth] = 1; 12895 } 12896 } 12897 } 12898 } 12899 12900 return lnum; 12901 } 12902 12903 /* 12904 * Move "p" to end of word. 12905 */ 12906 char_u * 12907 spell_to_word_end(start, buf) 12908 char_u *start; 12909 buf_T *buf; 12910 { 12911 char_u *p = start; 12912 12913 while (*p != NUL && spell_iswordp(p, buf)) 12914 mb_ptr_adv(p); 12915 return p; 12916 } 12917 12918 #if defined(FEAT_INS_EXPAND) || defined(PROTO) 12919 /* 12920 * Find start of the word in front of the cursor. We don't check if it is 12921 * badly spelled, with completion we can only change the word in front of the 12922 * cursor. 12923 * Used for Insert mode completion CTRL-X ?. 12924 * Returns the column number of the word. 12925 */ 12926 int 12927 spell_word_start(startcol) 12928 int startcol; 12929 { 12930 char_u *line; 12931 char_u *p; 12932 int col = 0; 12933 12934 if (no_spell_checking(curwin)) 12935 return startcol; 12936 12937 /* Find a word character before "startcol". */ 12938 line = ml_get_curline(); 12939 for (p = line + startcol; p > line; ) 12940 { 12941 mb_ptr_back(line, p); 12942 if (spell_iswordp_nmw(p)) 12943 break; 12944 } 12945 12946 /* Go back to start of the word. */ 12947 while (p > line) 12948 { 12949 col = p - line; 12950 mb_ptr_back(line, p); 12951 if (!spell_iswordp(p, curbuf)) 12952 break; 12953 col = 0; 12954 } 12955 12956 return col; 12957 } 12958 12959 /* 12960 * Need to check for 'spellcapcheck' now, the word is removed before 12961 * expand_spelling() is called. Therefore the ugly global variable. 12962 */ 12963 static int spell_expand_need_cap; 12964 12965 void 12966 spell_expand_check_cap(col) 12967 colnr_T col; 12968 { 12969 spell_expand_need_cap = check_need_cap(curwin->w_cursor.lnum, col); 12970 } 12971 12972 /* 12973 * Get list of spelling suggestions. 12974 * Used for Insert mode completion CTRL-X ?. 12975 * Returns the number of matches. The matches are in "matchp[]", array of 12976 * allocated strings. 12977 */ 12978 /*ARGSUSED*/ 12979 int 12980 expand_spelling(lnum, col, pat, matchp) 12981 linenr_T lnum; 12982 int col; 12983 char_u *pat; 12984 char_u ***matchp; 12985 { 12986 garray_T ga; 12987 12988 spell_suggest_list(&ga, pat, 100, spell_expand_need_cap); 12989 *matchp = ga.ga_data; 12990 return ga.ga_len; 12991 } 12992 #endif 12993 12994 #endif /* FEAT_SYN_HL */ 12995