1 /* vi:set ts=8 sts=4 sw=4: 2 * 3 * VIM - Vi IMproved by Bram Moolenaar 4 * 5 * Do ":help uganda" in Vim to read copying and usage conditions. 6 * Do ":help credits" in Vim to see a list of people who contributed. 7 * See README.txt for an overview of the Vim source code. 8 */ 9 10 /* 11 * spell.c: code for spell checking 12 * 13 * The spell checking mechanism uses a tree (aka trie). Each node in the tree 14 * has a list of bytes that can appear (siblings). For each byte there is a 15 * pointer to the node with the byte that follows in the word (child). 16 * 17 * A NUL byte is used where the word may end. The bytes are sorted, so that 18 * binary searching can be used and the NUL bytes are at the start. The 19 * number of possible bytes is stored before the list of bytes. 20 * 21 * The tree uses two arrays: "byts" stores the characters, "idxs" stores 22 * either the next index or flags. The tree starts at index 0. For example, 23 * to lookup "vi" this sequence is followed: 24 * i = 0 25 * len = byts[i] 26 * n = where "v" appears in byts[i + 1] to byts[i + len] 27 * i = idxs[n] 28 * len = byts[i] 29 * n = where "i" appears in byts[i + 1] to byts[i + len] 30 * i = idxs[n] 31 * len = byts[i] 32 * find that byts[i + 1] is 0, idxs[i + 1] has flags for "vi". 33 * 34 * There are two word trees: one with case-folded words and one with words in 35 * original case. The second one is only used for keep-case words and is 36 * usually small. 37 * 38 * There is one additional tree for when not all prefixes are applied when 39 * generating the .spl file. This tree stores all the possible prefixes, as 40 * if they were words. At each word (prefix) end the prefix nr is stored, the 41 * following word must support this prefix nr. And the condition nr is 42 * stored, used to lookup the condition that the word must match with. 43 * 44 * Thanks to Olaf Seibert for providing an example implementation of this tree 45 * and the compression mechanism. 46 * 47 * Matching involves checking the caps type: Onecap ALLCAP KeepCap. 48 * 49 * Why doesn't Vim use aspell/ispell/myspell/etc.? 50 * See ":help develop-spell". 51 */ 52 53 /* Use SPELL_PRINTTREE for debugging: dump the word tree after adding a word. 54 * Only use it for small word lists! */ 55 #if 0 56 # define SPELL_PRINTTREE 57 #endif 58 59 /* 60 * Use this to adjust the score after finding suggestions, based on the 61 * suggested word sounding like the bad word. This is much faster than doing 62 * it for every possible suggestion. 63 * Disadvantage: When "the" is typed as "hte" it sounds different and goes 64 * down in the list. 65 * Used when 'spellsuggest' is set to "best". 66 */ 67 #define RESCORE(word_score, sound_score) ((3 * word_score + sound_score) / 4) 68 69 /* 70 * Vim spell file format: <HEADER> 71 * <SECTIONS> 72 * <LWORDTREE> 73 * <KWORDTREE> 74 * <PREFIXTREE> 75 * 76 * <HEADER>: <fileID> <versionnr> 77 * 78 * <fileID> 8 bytes "VIMspell" 79 * <versionnr> 1 byte VIMSPELLVERSION 80 * 81 * 82 * Sections make it possible to add information to the .spl file without 83 * making it incompatible with previous versions. There are two kinds of 84 * sections: 85 * 1. Not essential for correct spell checking. E.g. for making suggestions. 86 * These are skipped when not supported. 87 * 2. Optional information, but essential for spell checking when present. 88 * E.g. conditions for affixes. When this section is present but not 89 * supported an error message is given. 90 * 91 * <SECTIONS>: <section> ... <sectionend> 92 * 93 * <section>: <sectionID> <sectionflags> <sectionlen> (section contents) 94 * 95 * <sectionID> 1 byte number from 0 to 254 identifying the section 96 * 97 * <sectionflags> 1 byte SNF_REQUIRED: this section is required for correct 98 * spell checking 99 * 100 * <sectionlen> 4 bytes length of section contents, MSB first 101 * 102 * <sectionend> 1 byte SN_END 103 * 104 * 105 * sectionID == SN_REGION: <regionname> ... 106 * <regionname> 2 bytes Up to 8 region names: ca, au, etc. Lower case. 107 * First <regionname> is region 1. 108 * 109 * sectionID == SN_CHARFLAGS: <charflagslen> <charflags> 110 * <folcharslen> <folchars> 111 * <charflagslen> 1 byte Number of bytes in <charflags> (should be 128). 112 * <charflags> N bytes List of flags (first one is for character 128): 113 * 0x01 word character CF_WORD 114 * 0x02 upper-case character CF_UPPER 115 * <folcharslen> 2 bytes Number of bytes in <folchars>. 116 * <folchars> N bytes Folded characters, first one is for character 128. 117 * 118 * sectionID == SN_MIDWORD: <midword> 119 * <midword> N bytes Characters that are word characters only when used 120 * in the middle of a word. 121 * 122 * sectionID == SN_PREFCOND: <prefcondcnt> <prefcond> ... 123 * <prefcondcnt> 2 bytes Number of <prefcond> items following. 124 * <prefcond> : <condlen> <condstr> 125 * <condlen> 1 byte Length of <condstr>. 126 * <condstr> N bytes Condition for the prefix. 127 * 128 * sectionID == SN_REP: <repcount> <rep> ... 129 * <repcount> 2 bytes number of <rep> items, MSB first. 130 * <rep> : <repfromlen> <repfrom> <reptolen> <repto> 131 * <repfromlen> 1 byte length of <repfrom> 132 * <repfrom> N bytes "from" part of replacement 133 * <reptolen> 1 byte length of <repto> 134 * <repto> N bytes "to" part of replacement 135 * 136 * sectionID == SN_SAL: <salflags> <salcount> <sal> ... 137 * <salflags> 1 byte flags for soundsalike conversion: 138 * SAL_F0LLOWUP 139 * SAL_COLLAPSE 140 * SAL_REM_ACCENTS 141 * <salcount> 2 bytes number of <sal> items following 142 * <sal> : <salfromlen> <salfrom> <saltolen> <salto> 143 * <salfromlen> 1 byte length of <salfrom> 144 * <salfrom> N bytes "from" part of soundsalike 145 * <saltolen> 1 byte length of <salto> 146 * <salto> N bytes "to" part of soundsalike 147 * 148 * sectionID == SN_SOFO: <sofofromlen> <sofofrom> <sofotolen> <sofoto> 149 * <sofofromlen> 2 bytes length of <sofofrom> 150 * <sofofrom> N bytes "from" part of soundfold 151 * <sofotolen> 2 bytes length of <sofoto> 152 * <sofoto> N bytes "to" part of soundfold 153 * 154 * sectionID == SN_MAP: <mapstr> 155 * <mapstr> N bytes String with sequences of similar characters, 156 * separated by slashes. 157 * 158 * sectionID == SN_COMPOUND: <compmax> <compminlen> <compsylmax> <compflags> 159 * <compmax> 1 byte Maximum nr of words in compound word. 160 * <compminlen> 1 byte Minimal word length for compounding. 161 * <compsylmax> 1 byte Maximum nr of syllables in compound word. 162 * <compflags> N bytes Flags from COMPOUNDFLAGS items, separated by 163 * slashes. 164 * 165 * sectionID == SN_NOBREAK: (empty, its presence is enough) 166 * 167 * sectionID == SN_SYLLABLE: <syllable> 168 * <syllable> N bytes String from SYLLABLE item. 169 * 170 * <LWORDTREE>: <wordtree> 171 * 172 * <KWORDTREE>: <wordtree> 173 * 174 * <PREFIXTREE>: <wordtree> 175 * 176 * 177 * <wordtree>: <nodecount> <nodedata> ... 178 * 179 * <nodecount> 4 bytes Number of nodes following. MSB first. 180 * 181 * <nodedata>: <siblingcount> <sibling> ... 182 * 183 * <siblingcount> 1 byte Number of siblings in this node. The siblings 184 * follow in sorted order. 185 * 186 * <sibling>: <byte> [ <nodeidx> <xbyte> 187 * | <flags> [<flags2>] [<region>] [<affixID>] 188 * | [<pflags>] <affixID> <prefcondnr> ] 189 * 190 * <byte> 1 byte Byte value of the sibling. Special cases: 191 * BY_NOFLAGS: End of word without flags and for all 192 * regions. 193 * For PREFIXTREE <affixID> and 194 * <prefcondnr> follow. 195 * BY_FLAGS: End of word, <flags> follow. 196 * For PREFIXTREE <pflags>, <affixID> 197 * and <prefcondnr> follow. 198 * BY_FLAGS2: End of word, <flags> and <flags2> 199 * follow. Not used in PREFIXTREE. 200 * BY_INDEX: Child of sibling is shared, <nodeidx> 201 * and <xbyte> follow. 202 * 203 * <nodeidx> 3 bytes Index of child for this sibling, MSB first. 204 * 205 * <xbyte> 1 byte byte value of the sibling. 206 * 207 * <flags> 1 byte bitmask of: 208 * WF_ALLCAP word must have only capitals 209 * WF_ONECAP first char of word must be capital 210 * WF_KEEPCAP keep-case word 211 * WF_FIXCAP keep-case word, all caps not allowed 212 * WF_RARE rare word 213 * WF_BANNED bad word 214 * WF_REGION <region> follows 215 * WF_AFX <affixID> follows 216 * 217 * <flags2> 1 byte Bitmask of: 218 * WF_HAS_AFF >> 8 word includes affix 219 * WF_NEEDCOMP >> 8 word only valid in compound 220 * 221 * <pflags> 1 byte bitmask of: 222 * WFP_RARE rare prefix 223 * WFP_NC non-combining prefix 224 * WFP_UP letter after prefix made upper case 225 * 226 * <region> 1 byte Bitmask for regions in which word is valid. When 227 * omitted it's valid in all regions. 228 * Lowest bit is for region 1. 229 * 230 * <affixID> 1 byte ID of affix that can be used with this word. In 231 * PREFIXTREE used for the required prefix ID. 232 * 233 * <prefcondnr> 2 bytes Prefix condition number, index in <prefcond> list 234 * from HEADER. 235 * 236 * All text characters are in 'encoding', but stored as single bytes. 237 */ 238 239 #if defined(MSDOS) || defined(WIN16) || defined(WIN32) || defined(_WIN64) 240 # include <io.h> /* for lseek(), must be before vim.h */ 241 #endif 242 243 #include "vim.h" 244 245 #if defined(FEAT_SYN_HL) || defined(PROTO) 246 247 #ifdef HAVE_FCNTL_H 248 # include <fcntl.h> 249 #endif 250 251 #define MAXWLEN 250 /* Assume max. word len is this many bytes. 252 Some places assume a word length fits in a 253 byte, thus it can't be above 255. */ 254 255 /* Type used for indexes in the word tree need to be at least 4 bytes. If int 256 * is 8 bytes we could use something smaller, but what? */ 257 #if SIZEOF_INT > 3 258 typedef int idx_T; 259 #else 260 typedef long idx_T; 261 #endif 262 263 /* Flags used for a word. Only the lowest byte can be used, the region byte 264 * comes above it. */ 265 #define WF_REGION 0x01 /* region byte follows */ 266 #define WF_ONECAP 0x02 /* word with one capital (or all capitals) */ 267 #define WF_ALLCAP 0x04 /* word must be all capitals */ 268 #define WF_RARE 0x08 /* rare word */ 269 #define WF_BANNED 0x10 /* bad word */ 270 #define WF_AFX 0x20 /* affix ID follows */ 271 #define WF_FIXCAP 0x40 /* keep-case word, allcap not allowed */ 272 #define WF_KEEPCAP 0x80 /* keep-case word */ 273 274 /* for <flags2>, shifted up one byte to be used in wn_flags */ 275 #define WF_HAS_AFF 0x0100 /* word includes affix */ 276 #define WF_NEEDCOMP 0x0200 /* word only valid in compound */ 277 278 #define WF_CAPMASK (WF_ONECAP | WF_ALLCAP | WF_KEEPCAP | WF_FIXCAP) 279 280 /* flags for <pflags> */ 281 #define WFP_RARE 0x01 /* rare prefix */ 282 #define WFP_NC 0x02 /* prefix is not combining */ 283 #define WFP_UP 0x04 /* to-upper prefix */ 284 285 /* Flags for postponed prefixes. Must be above affixID (one byte) 286 * and prefcondnr (two bytes). */ 287 #define WF_RAREPFX (WFP_RARE << 24) /* in sl_pidxs: flag for rare 288 * postponed prefix */ 289 #define WF_PFX_NC (WFP_NC << 24) /* in sl_pidxs: flag for non-combining 290 * postponed prefix */ 291 #define WF_PFX_UP (WFP_UP << 24) /* in sl_pidxs: flag for to-upper 292 * postponed prefix */ 293 294 /* Special byte values for <byte>. Some are only used in the tree for 295 * postponed prefixes, some only in the other trees. This is a bit messy... */ 296 #define BY_NOFLAGS 0 /* end of word without flags or region; for 297 * postponed prefix: no <pflags> */ 298 #define BY_INDEX 1 /* child is shared, index follows */ 299 #define BY_FLAGS 2 /* end of word, <flags> byte follows; for 300 * postponed prefix: <pflags> follows */ 301 #define BY_FLAGS2 3 /* end of word, <flags> and <flags2> bytes 302 * follow; never used in prefix tree */ 303 #define BY_SPECIAL BY_FLAGS2 /* highest special byte value */ 304 305 /* Info from "REP" and "SAL" entries in ".aff" file used in si_rep, sl_rep, 306 * and si_sal. Not for sl_sal! 307 * One replacement: from "ft_from" to "ft_to". */ 308 typedef struct fromto_S 309 { 310 char_u *ft_from; 311 char_u *ft_to; 312 } fromto_T; 313 314 /* Info from "SAL" entries in ".aff" file used in sl_sal. 315 * The info is split for quick processing by spell_soundfold(). 316 * Note that "sm_oneof" and "sm_rules" point into sm_lead. */ 317 typedef struct salitem_S 318 { 319 char_u *sm_lead; /* leading letters */ 320 int sm_leadlen; /* length of "sm_lead" */ 321 char_u *sm_oneof; /* letters from () or NULL */ 322 char_u *sm_rules; /* rules like ^, $, priority */ 323 char_u *sm_to; /* replacement. */ 324 #ifdef FEAT_MBYTE 325 int *sm_lead_w; /* wide character copy of "sm_lead" */ 326 int *sm_oneof_w; /* wide character copy of "sm_oneof" */ 327 int *sm_to_w; /* wide character copy of "sm_to" */ 328 #endif 329 } salitem_T; 330 331 #ifdef FEAT_MBYTE 332 typedef int salfirst_T; 333 #else 334 typedef short salfirst_T; 335 #endif 336 337 /* Values for SP_*ERROR are negative, positive values are used by 338 * read_cnt_string(). */ 339 #define SP_TRUNCERROR -1 /* spell file truncated error */ 340 #define SP_FORMERROR -2 /* format error in spell file */ 341 #define SP_OTHERERROR -3 /* other error while reading spell file */ 342 343 /* 344 * Structure used to store words and other info for one language, loaded from 345 * a .spl file. 346 * The main access is through the tree in "sl_fbyts/sl_fidxs", storing the 347 * case-folded words. "sl_kbyts/sl_kidxs" is for keep-case words. 348 * 349 * The "byts" array stores the possible bytes in each tree node, preceded by 350 * the number of possible bytes, sorted on byte value: 351 * <len> <byte1> <byte2> ... 352 * The "idxs" array stores the index of the child node corresponding to the 353 * byte in "byts". 354 * Exception: when the byte is zero, the word may end here and "idxs" holds 355 * the flags, region mask and affixID for the word. There may be several 356 * zeros in sequence for alternative flag/region/affixID combinations. 357 */ 358 typedef struct slang_S slang_T; 359 struct slang_S 360 { 361 slang_T *sl_next; /* next language */ 362 char_u *sl_name; /* language name "en", "en.rare", "nl", etc. */ 363 char_u *sl_fname; /* name of .spl file */ 364 int sl_add; /* TRUE if it's a .add file. */ 365 366 char_u *sl_fbyts; /* case-folded word bytes */ 367 idx_T *sl_fidxs; /* case-folded word indexes */ 368 char_u *sl_kbyts; /* keep-case word bytes */ 369 idx_T *sl_kidxs; /* keep-case word indexes */ 370 char_u *sl_pbyts; /* prefix tree word bytes */ 371 idx_T *sl_pidxs; /* prefix tree word indexes */ 372 373 char_u sl_regions[17]; /* table with up to 8 region names plus NUL */ 374 375 char_u *sl_midword; /* MIDWORD string or NULL */ 376 377 int sl_compmax; /* COMPOUNDMAX (default: MAXWLEN) */ 378 int sl_compminlen; /* COMPOUNDMIN (default: 0) */ 379 int sl_compsylmax; /* COMPOUNDSYLMAX (default: MAXWLEN) */ 380 regprog_T *sl_compprog; /* COMPOUNDFLAGS turned into a regexp progrm 381 * (NULL when no compounding) */ 382 char_u *sl_compstartflags; /* flags for first compound word */ 383 char_u *sl_compallflags; /* all flags for compound words */ 384 char_u sl_nobreak; /* When TRUE: no spaces between words */ 385 char_u *sl_syllable; /* SYLLABLE repeatable chars or NULL */ 386 garray_T sl_syl_items; /* syllable items */ 387 388 int sl_prefixcnt; /* number of items in "sl_prefprog" */ 389 regprog_T **sl_prefprog; /* table with regprogs for prefixes */ 390 391 garray_T sl_rep; /* list of fromto_T entries from REP lines */ 392 short sl_rep_first[256]; /* indexes where byte first appears, -1 if 393 there is none */ 394 garray_T sl_sal; /* list of salitem_T entries from SAL lines */ 395 salfirst_T sl_sal_first[256]; /* indexes where byte first appears, -1 if 396 there is none */ 397 int sl_sofo; /* SOFOFROM and SOFOTO instead of SAL items: 398 * "sl_sal_first" maps chars, when has_mbyte 399 * "sl_sal" is a list of wide char lists. */ 400 int sl_followup; /* SAL followup */ 401 int sl_collapse; /* SAL collapse_result */ 402 int sl_rem_accents; /* SAL remove_accents */ 403 int sl_has_map; /* TRUE if there is a MAP line */ 404 #ifdef FEAT_MBYTE 405 hashtab_T sl_map_hash; /* MAP for multi-byte chars */ 406 int sl_map_array[256]; /* MAP for first 256 chars */ 407 #else 408 char_u sl_map_array[256]; /* MAP for first 256 chars */ 409 #endif 410 }; 411 412 /* First language that is loaded, start of the linked list of loaded 413 * languages. */ 414 static slang_T *first_lang = NULL; 415 416 /* Flags used in .spl file for soundsalike flags. */ 417 #define SAL_F0LLOWUP 1 418 #define SAL_COLLAPSE 2 419 #define SAL_REM_ACCENTS 4 420 421 /* 422 * Structure used in "b_langp", filled from 'spelllang'. 423 */ 424 typedef struct langp_S 425 { 426 slang_T *lp_slang; /* info for this language */ 427 slang_T *lp_sallang; /* language used for sound folding or NULL */ 428 slang_T *lp_replang; /* language used for REP items or NULL */ 429 int lp_region; /* bitmask for region or REGION_ALL */ 430 } langp_T; 431 432 #define LANGP_ENTRY(ga, i) (((langp_T *)(ga).ga_data) + (i)) 433 434 #define REGION_ALL 0xff /* word valid in all regions */ 435 436 #define VIMSPELLMAGIC "VIMspell" /* string at start of Vim spell file */ 437 #define VIMSPELLMAGICL 8 438 #define VIMSPELLVERSION 50 439 440 /* Section IDs. Only renumber them when VIMSPELLVERSION changes! */ 441 #define SN_REGION 0 /* <regionname> section */ 442 #define SN_CHARFLAGS 1 /* charflags section */ 443 #define SN_MIDWORD 2 /* <midword> section */ 444 #define SN_PREFCOND 3 /* <prefcond> section */ 445 #define SN_REP 4 /* REP items section */ 446 #define SN_SAL 5 /* SAL items section */ 447 #define SN_SOFO 6 /* soundfolding section */ 448 #define SN_MAP 7 /* MAP items section */ 449 #define SN_COMPOUND 8 /* compound words section */ 450 #define SN_SYLLABLE 9 /* syllable section */ 451 #define SN_NOBREAK 10 /* NOBREAK section */ 452 #define SN_END 255 /* end of sections */ 453 454 #define SNF_REQUIRED 1 /* <sectionflags>: required section */ 455 456 /* Result values. Lower number is accepted over higher one. */ 457 #define SP_BANNED -1 458 #define SP_OK 0 459 #define SP_RARE 1 460 #define SP_LOCAL 2 461 #define SP_BAD 3 462 463 /* file used for "zG" and "zW" */ 464 static char_u *int_wordlist = NULL; 465 466 /* 467 * Information used when looking for suggestions. 468 */ 469 typedef struct suginfo_S 470 { 471 garray_T su_ga; /* suggestions, contains "suggest_T" */ 472 int su_maxcount; /* max. number of suggestions displayed */ 473 int su_maxscore; /* maximum score for adding to su_ga */ 474 garray_T su_sga; /* like su_ga, sound-folded scoring */ 475 char_u *su_badptr; /* start of bad word in line */ 476 int su_badlen; /* length of detected bad word in line */ 477 int su_badflags; /* caps flags for bad word */ 478 char_u su_badword[MAXWLEN]; /* bad word truncated at su_badlen */ 479 char_u su_fbadword[MAXWLEN]; /* su_badword case-folded */ 480 hashtab_T su_banned; /* table with banned words */ 481 slang_T *su_sallang; /* default language for sound folding */ 482 } suginfo_T; 483 484 /* One word suggestion. Used in "si_ga". */ 485 typedef struct suggest_S 486 { 487 char_u *st_word; /* suggested word, allocated string */ 488 int st_orglen; /* length of replaced text */ 489 int st_score; /* lower is better */ 490 int st_altscore; /* used when st_score compares equal */ 491 int st_salscore; /* st_score is for soundalike */ 492 int st_had_bonus; /* bonus already included in score */ 493 slang_T *st_slang; /* language used for sound folding */ 494 } suggest_T; 495 496 #define SUG(ga, i) (((suggest_T *)(ga).ga_data)[i]) 497 498 /* Number of suggestions kept when cleaning up. When rescore_suggestions() is 499 * called the score may change, thus we need to keep more than what is 500 * displayed. */ 501 #define SUG_CLEAN_COUNT(su) ((su)->su_maxcount < 50 ? 50 : (su)->su_maxcount) 502 503 /* Threshold for sorting and cleaning up suggestions. Don't want to keep lots 504 * of suggestions that are not going to be displayed. */ 505 #define SUG_MAX_COUNT(su) ((su)->su_maxcount + 50) 506 507 /* score for various changes */ 508 #define SCORE_SPLIT 149 /* split bad word */ 509 #define SCORE_ICASE 52 /* slightly different case */ 510 #define SCORE_REGION 200 /* word is for different region */ 511 #define SCORE_RARE 180 /* rare word */ 512 #define SCORE_SWAP 90 /* swap two characters */ 513 #define SCORE_SWAP3 110 /* swap two characters in three */ 514 #define SCORE_REP 87 /* REP replacement */ 515 #define SCORE_SUBST 93 /* substitute a character */ 516 #define SCORE_SIMILAR 33 /* substitute a similar character */ 517 #define SCORE_SUBCOMP 33 /* substitute a composing character */ 518 #define SCORE_DEL 94 /* delete a character */ 519 #define SCORE_DELDUP 64 /* delete a duplicated character */ 520 #define SCORE_DELCOMP 28 /* delete a composing character */ 521 #define SCORE_INS 96 /* insert a character */ 522 #define SCORE_INSDUP 66 /* insert a duplicate character */ 523 #define SCORE_INSCOMP 30 /* insert a composing character */ 524 #define SCORE_NONWORD 103 /* change non-word to word char */ 525 526 #define SCORE_FILE 30 /* suggestion from a file */ 527 #define SCORE_MAXINIT 350 /* Initial maximum score: higher == slower. 528 * 350 allows for about three changes. */ 529 530 #define SCORE_BIG SCORE_INS * 3 /* big difference */ 531 #define SCORE_MAXMAX 999999 /* accept any score */ 532 533 /* 534 * Structure to store info for word matching. 535 */ 536 typedef struct matchinf_S 537 { 538 langp_T *mi_lp; /* info for language and region */ 539 540 /* pointers to original text to be checked */ 541 char_u *mi_word; /* start of word being checked */ 542 char_u *mi_end; /* end of matching word so far */ 543 char_u *mi_fend; /* next char to be added to mi_fword */ 544 char_u *mi_cend; /* char after what was used for 545 mi_capflags */ 546 547 /* case-folded text */ 548 char_u mi_fword[MAXWLEN + 1]; /* mi_word case-folded */ 549 int mi_fwordlen; /* nr of valid bytes in mi_fword */ 550 551 /* for when checking word after a prefix */ 552 int mi_prefarridx; /* index in sl_pidxs with list of 553 affixID/condition */ 554 int mi_prefcnt; /* number of entries at mi_prefarridx */ 555 int mi_prefixlen; /* byte length of prefix */ 556 #ifdef FEAT_MBYTE 557 int mi_cprefixlen; /* byte length of prefix in original 558 case */ 559 #else 560 # define mi_cprefixlen mi_prefixlen /* it's the same value */ 561 #endif 562 563 /* for when checking a compound word */ 564 int mi_compoff; /* start of following word offset */ 565 char_u mi_compflags[MAXWLEN]; /* flags for compound words used */ 566 int mi_complen; /* nr of compound words used */ 567 568 /* others */ 569 int mi_result; /* result so far: SP_BAD, SP_OK, etc. */ 570 int mi_capflags; /* WF_ONECAP WF_ALLCAP WF_KEEPCAP */ 571 buf_T *mi_buf; /* buffer being checked */ 572 573 /* for NOBREAK */ 574 int mi_result2; /* "mi_resul" without following word */ 575 char_u *mi_end2; /* "mi_end" without following word */ 576 } matchinf_T; 577 578 /* 579 * The tables used for recognizing word characters according to spelling. 580 * These are only used for the first 256 characters of 'encoding'. 581 */ 582 typedef struct spelltab_S 583 { 584 char_u st_isw[256]; /* flags: is word char */ 585 char_u st_isu[256]; /* flags: is uppercase char */ 586 char_u st_fold[256]; /* chars: folded case */ 587 char_u st_upper[256]; /* chars: upper case */ 588 } spelltab_T; 589 590 static spelltab_T spelltab; 591 static int did_set_spelltab; 592 593 #define CF_WORD 0x01 594 #define CF_UPPER 0x02 595 596 static void clear_spell_chartab __ARGS((spelltab_T *sp)); 597 static int set_spell_finish __ARGS((spelltab_T *new_st)); 598 static int spell_iswordp __ARGS((char_u *p, buf_T *buf)); 599 static int spell_iswordp_nmw __ARGS((char_u *p)); 600 #ifdef FEAT_MBYTE 601 static int spell_iswordp_w __ARGS((int *p, buf_T *buf)); 602 #endif 603 static int write_spell_prefcond __ARGS((FILE *fd, garray_T *gap)); 604 605 /* 606 * For finding suggestions: At each node in the tree these states are tried: 607 */ 608 typedef enum 609 { 610 STATE_START = 0, /* At start of node check for NUL bytes (goodword 611 * ends); if badword ends there is a match, otherwise 612 * try splitting word. */ 613 STATE_NOPREFIX, /* try without prefix */ 614 STATE_SPLITUNDO, /* Undo splitting. */ 615 STATE_ENDNUL, /* Past NUL bytes at start of the node. */ 616 STATE_PLAIN, /* Use each byte of the node. */ 617 STATE_DEL, /* Delete a byte from the bad word. */ 618 STATE_INS, /* Insert a byte in the bad word. */ 619 STATE_SWAP, /* Swap two bytes. */ 620 STATE_UNSWAP, /* Undo swap two characters. */ 621 STATE_SWAP3, /* Swap two characters over three. */ 622 STATE_UNSWAP3, /* Undo Swap two characters over three. */ 623 STATE_UNROT3L, /* Undo rotate three characters left */ 624 STATE_UNROT3R, /* Undo rotate three characters right */ 625 STATE_REP_INI, /* Prepare for using REP items. */ 626 STATE_REP, /* Use matching REP items from the .aff file. */ 627 STATE_REP_UNDO, /* Undo a REP item replacement. */ 628 STATE_FINAL /* End of this node. */ 629 } state_T; 630 631 /* 632 * Struct to keep the state at each level in suggest_try_change(). 633 */ 634 typedef struct trystate_S 635 { 636 state_T ts_state; /* state at this level, STATE_ */ 637 int ts_score; /* score */ 638 idx_T ts_arridx; /* index in tree array, start of node */ 639 short ts_curi; /* index in list of child nodes */ 640 char_u ts_fidx; /* index in fword[], case-folded bad word */ 641 char_u ts_fidxtry; /* ts_fidx at which bytes may be changed */ 642 char_u ts_twordlen; /* valid length of tword[] */ 643 char_u ts_prefixdepth; /* stack depth for end of prefix or 644 * PFD_PREFIXTREE or PFD_NOPREFIX */ 645 char_u ts_flags; /* TSF_ flags */ 646 #ifdef FEAT_MBYTE 647 char_u ts_tcharlen; /* number of bytes in tword character */ 648 char_u ts_tcharidx; /* current byte index in tword character */ 649 char_u ts_isdiff; /* DIFF_ values */ 650 char_u ts_fcharstart; /* index in fword where badword char started */ 651 #endif 652 char_u ts_prewordlen; /* length of word in "preword[]" */ 653 char_u ts_splitoff; /* index in "tword" after last split */ 654 char_u ts_splitfidx; /* "ts_fidx" at word split */ 655 char_u ts_complen; /* nr of compound words used */ 656 char_u ts_compsplit; /* index for "compflags" where word was spit */ 657 char_u ts_save_badflags; /* su_badflags saved here */ 658 } trystate_T; 659 660 /* values for ts_isdiff */ 661 #define DIFF_NONE 0 /* no different byte (yet) */ 662 #define DIFF_YES 1 /* different byte found */ 663 #define DIFF_INSERT 2 /* inserting character */ 664 665 /* values for ts_flags */ 666 #define TSF_PREFIXOK 1 /* already checked that prefix is OK */ 667 #define TSF_DIDSPLIT 2 /* tried split at this point */ 668 669 /* special values ts_prefixdepth */ 670 #define PFD_NOPREFIX 0xff /* not using prefixes */ 671 #define PFD_PREFIXTREE 0xfe /* walking through the prefix tree */ 672 #define PFD_NOTSPECIAL 0xfd /* first value that's not special */ 673 674 /* mode values for find_word */ 675 #define FIND_FOLDWORD 0 /* find word case-folded */ 676 #define FIND_KEEPWORD 1 /* find keep-case word */ 677 #define FIND_PREFIX 2 /* find word after prefix */ 678 #define FIND_COMPOUND 3 /* find case-folded compound word */ 679 #define FIND_KEEPCOMPOUND 4 /* find keep-case compound word */ 680 681 static slang_T *slang_alloc __ARGS((char_u *lang)); 682 static void slang_free __ARGS((slang_T *lp)); 683 static void slang_clear __ARGS((slang_T *lp)); 684 static void find_word __ARGS((matchinf_T *mip, int mode)); 685 static int can_compound __ARGS((slang_T *slang, char_u *word, char_u *flags)); 686 static int valid_word_prefix __ARGS((int totprefcnt, int arridx, int flags, char_u *word, slang_T *slang, int cond_req)); 687 static void find_prefix __ARGS((matchinf_T *mip, int mode)); 688 static int fold_more __ARGS((matchinf_T *mip)); 689 static int spell_valid_case __ARGS((int wordflags, int treeflags)); 690 static int no_spell_checking __ARGS((win_T *wp)); 691 static void spell_load_lang __ARGS((char_u *lang)); 692 static char_u *spell_enc __ARGS((void)); 693 static void int_wordlist_spl __ARGS((char_u *fname)); 694 static void spell_load_cb __ARGS((char_u *fname, void *cookie)); 695 static slang_T *spell_load_file __ARGS((char_u *fname, char_u *lang, slang_T *old_lp, int silent)); 696 static char_u *read_cnt_string __ARGS((FILE *fd, int cnt_bytes, int *lenp)); 697 static char_u *read_string __ARGS((FILE *fd, int cnt)); 698 static int read_region_section __ARGS((FILE *fd, slang_T *slang, int len)); 699 static int read_charflags_section __ARGS((FILE *fd)); 700 static int read_prefcond_section __ARGS((FILE *fd, slang_T *lp)); 701 static int read_rep_section __ARGS((FILE *fd, slang_T *slang)); 702 static int read_sal_section __ARGS((FILE *fd, slang_T *slang)); 703 static int read_sofo_section __ARGS((FILE *fd, slang_T *slang)); 704 static int read_compound __ARGS((FILE *fd, slang_T *slang, int len)); 705 static int byte_in_str __ARGS((char_u *str, int byte)); 706 static int init_syl_tab __ARGS((slang_T *slang)); 707 static int count_syllables __ARGS((slang_T *slang, char_u *word)); 708 static int set_sofo __ARGS((slang_T *lp, char_u *from, char_u *to)); 709 static void set_sal_first __ARGS((slang_T *lp)); 710 #ifdef FEAT_MBYTE 711 static int *mb_str2wide __ARGS((char_u *s)); 712 #endif 713 static idx_T read_tree __ARGS((FILE *fd, char_u *byts, idx_T *idxs, int maxidx, int startidx, int prefixtree, int maxprefcondnr)); 714 static void clear_midword __ARGS((buf_T *buf)); 715 static void use_midword __ARGS((slang_T *lp, buf_T *buf)); 716 static int find_region __ARGS((char_u *rp, char_u *region)); 717 static int captype __ARGS((char_u *word, char_u *end)); 718 static int badword_captype __ARGS((char_u *word, char_u *end)); 719 static void spell_reload_one __ARGS((char_u *fname, int added_word)); 720 static void set_spell_charflags __ARGS((char_u *flags, int cnt, char_u *upp)); 721 static int set_spell_chartab __ARGS((char_u *fol, char_u *low, char_u *upp)); 722 static int spell_casefold __ARGS((char_u *p, int len, char_u *buf, int buflen)); 723 static int check_need_cap __ARGS((linenr_T lnum, colnr_T col)); 724 static void spell_find_suggest __ARGS((char_u *badptr, suginfo_T *su, int maxcount, int banbadword, int need_cap)); 725 #ifdef FEAT_EVAL 726 static void spell_suggest_expr __ARGS((suginfo_T *su, char_u *expr)); 727 #endif 728 static void spell_suggest_file __ARGS((suginfo_T *su, char_u *fname)); 729 static void spell_suggest_intern __ARGS((suginfo_T *su)); 730 static void spell_find_cleanup __ARGS((suginfo_T *su)); 731 static void onecap_copy __ARGS((char_u *word, char_u *wcopy, int upper)); 732 static void allcap_copy __ARGS((char_u *word, char_u *wcopy)); 733 static void suggest_try_special __ARGS((suginfo_T *su)); 734 static void suggest_try_change __ARGS((suginfo_T *su)); 735 static int try_deeper __ARGS((suginfo_T *su, trystate_T *stack, int depth, int score_add)); 736 #ifdef FEAT_MBYTE 737 static int nofold_len __ARGS((char_u *fword, int flen, char_u *word)); 738 #endif 739 static void find_keepcap_word __ARGS((slang_T *slang, char_u *fword, char_u *kword)); 740 static void score_comp_sal __ARGS((suginfo_T *su)); 741 static void score_combine __ARGS((suginfo_T *su)); 742 static int stp_sal_score __ARGS((suggest_T *stp, suginfo_T *su, slang_T *slang, char_u *badsound)); 743 static void suggest_try_soundalike __ARGS((suginfo_T *su)); 744 static void make_case_word __ARGS((char_u *fword, char_u *cword, int flags)); 745 static void set_map_str __ARGS((slang_T *lp, char_u *map)); 746 static int similar_chars __ARGS((slang_T *slang, int c1, int c2)); 747 static void add_suggestion __ARGS((suginfo_T *su, garray_T *gap, char_u *goodword, int badlen, int score, int altscore, int had_bonus, slang_T *slang)); 748 static void add_banned __ARGS((suginfo_T *su, char_u *word)); 749 static int was_banned __ARGS((suginfo_T *su, char_u *word)); 750 static void free_banned __ARGS((suginfo_T *su)); 751 static void rescore_suggestions __ARGS((suginfo_T *su)); 752 static int cleanup_suggestions __ARGS((garray_T *gap, int maxscore, int keep)); 753 static void spell_soundfold __ARGS((slang_T *slang, char_u *inword, int folded, char_u *res)); 754 static void spell_soundfold_sofo __ARGS((slang_T *slang, char_u *inword, char_u *res)); 755 static void spell_soundfold_sal __ARGS((slang_T *slang, char_u *inword, char_u *res)); 756 #ifdef FEAT_MBYTE 757 static void spell_soundfold_wsal __ARGS((slang_T *slang, char_u *inword, char_u *res)); 758 #endif 759 static int soundalike_score __ARGS((char_u *goodsound, char_u *badsound)); 760 static int spell_edit_score __ARGS((char_u *badword, char_u *goodword)); 761 static void dump_word __ARGS((char_u *word, int round, int flags, linenr_T lnum)); 762 static linenr_T dump_prefixes __ARGS((slang_T *slang, char_u *word, int round, int flags, linenr_T startlnum)); 763 764 /* 765 * Use our own character-case definitions, because the current locale may 766 * differ from what the .spl file uses. 767 * These must not be called with negative number! 768 */ 769 #ifndef FEAT_MBYTE 770 /* Non-multi-byte implementation. */ 771 # define SPELL_TOFOLD(c) ((c) < 256 ? spelltab.st_fold[c] : (c)) 772 # define SPELL_TOUPPER(c) ((c) < 256 ? spelltab.st_upper[c] : (c)) 773 # define SPELL_ISUPPER(c) ((c) < 256 ? spelltab.st_isu[c] : FALSE) 774 #else 775 # if defined(HAVE_WCHAR_H) 776 # include <wchar.h> /* for towupper() and towlower() */ 777 # endif 778 /* Multi-byte implementation. For Unicode we can call utf_*(), but don't do 779 * that for ASCII, because we don't want to use 'casemap' here. Otherwise use 780 * the "w" library function for characters above 255 if available. */ 781 # ifdef HAVE_TOWLOWER 782 # define SPELL_TOFOLD(c) (enc_utf8 && (c) >= 128 ? utf_fold(c) \ 783 : (c) < 256 ? spelltab.st_fold[c] : towlower(c)) 784 # else 785 # define SPELL_TOFOLD(c) (enc_utf8 && (c) >= 128 ? utf_fold(c) \ 786 : (c) < 256 ? spelltab.st_fold[c] : (c)) 787 # endif 788 789 # ifdef HAVE_TOWUPPER 790 # define SPELL_TOUPPER(c) (enc_utf8 && (c) >= 128 ? utf_toupper(c) \ 791 : (c) < 256 ? spelltab.st_upper[c] : towupper(c)) 792 # else 793 # define SPELL_TOUPPER(c) (enc_utf8 && (c) >= 128 ? utf_toupper(c) \ 794 : (c) < 256 ? spelltab.st_upper[c] : (c)) 795 # endif 796 797 # ifdef HAVE_ISWUPPER 798 # define SPELL_ISUPPER(c) (enc_utf8 && (c) >= 128 ? utf_isupper(c) \ 799 : (c) < 256 ? spelltab.st_isu[c] : iswupper(c)) 800 # else 801 # define SPELL_ISUPPER(c) (enc_utf8 && (c) >= 128 ? utf_isupper(c) \ 802 : (c) < 256 ? spelltab.st_isu[c] : (FALSE)) 803 # endif 804 #endif 805 806 807 static char *e_format = N_("E759: Format error in spell file"); 808 static char *e_spell_trunc = N_("E758: Truncated spell file"); 809 static char *e_afftrailing = N_("Trailing text in %s line %d: %s"); 810 static char *e_affname = N_("Affix name too long in %s line %d: %s"); 811 static char *e_affform = N_("E761: Format error in affix file FOL, LOW or UPP"); 812 static char *e_affrange = N_("E762: Character in FOL, LOW or UPP is out of range"); 813 static char *msg_compressing = N_("Compressing word tree..."); 814 815 /* 816 * Main spell-checking function. 817 * "ptr" points to a character that could be the start of a word. 818 * "*attrp" is set to the attributes for a badly spelled word. For a non-word 819 * or when it's OK it remains unchanged. 820 * This must only be called when 'spelllang' is not empty. 821 * 822 * "capcol" is used to check for a Capitalised word after the end of a 823 * sentence. If it's zero then perform the check. Return the column where to 824 * check next, or -1 when no sentence end was found. If it's NULL then don't 825 * worry. 826 * 827 * Returns the length of the word in bytes, also when it's OK, so that the 828 * caller can skip over the word. 829 */ 830 int 831 spell_check(wp, ptr, attrp, capcol) 832 win_T *wp; /* current window */ 833 char_u *ptr; 834 int *attrp; 835 int *capcol; /* column to check for Capital */ 836 { 837 matchinf_T mi; /* Most things are put in "mi" so that it can 838 be passed to functions quickly. */ 839 int nrlen = 0; /* found a number first */ 840 int c; 841 int wrongcaplen = 0; 842 int lpi; 843 844 /* A word never starts at a space or a control character. Return quickly 845 * then, skipping over the character. */ 846 if (*ptr <= ' ') 847 return 1; 848 vim_memset(&mi, 0, sizeof(matchinf_T)); 849 850 /* A number is always OK. Also skip hexadecimal numbers 0xFF99 and 851 * 0X99FF. But when a word character follows do check spelling to find 852 * "3GPP". */ 853 if (*ptr >= '0' && *ptr <= '9') 854 { 855 if (*ptr == '0' && (ptr[1] == 'x' || ptr[1] == 'X')) 856 mi.mi_end = skiphex(ptr + 2); 857 else 858 { 859 mi.mi_end = skipdigits(ptr); 860 nrlen = mi.mi_end - ptr; 861 } 862 if (!spell_iswordp(mi.mi_end, wp->w_buffer)) 863 return (int)(mi.mi_end - ptr); 864 865 /* Try including the digits in the word. */ 866 mi.mi_fend = ptr + nrlen; 867 } 868 else 869 mi.mi_fend = ptr; 870 871 /* Find the normal end of the word (until the next non-word character). */ 872 mi.mi_word = ptr; 873 if (spell_iswordp(mi.mi_fend, wp->w_buffer)) 874 { 875 do 876 { 877 mb_ptr_adv(mi.mi_fend); 878 } while (*mi.mi_fend != NUL && spell_iswordp(mi.mi_fend, wp->w_buffer)); 879 880 if (capcol != NULL && *capcol == 0 && wp->w_buffer->b_cap_prog != NULL) 881 { 882 /* Check word starting with capital letter. */ 883 c = PTR2CHAR(ptr); 884 if (!SPELL_ISUPPER(c)) 885 wrongcaplen = (int)(mi.mi_fend - ptr); 886 } 887 } 888 if (capcol != NULL) 889 *capcol = -1; 890 891 /* We always use the characters up to the next non-word character, 892 * also for bad words. */ 893 mi.mi_end = mi.mi_fend; 894 895 /* Check caps type later. */ 896 mi.mi_buf = wp->w_buffer; 897 898 /* case-fold the word with one non-word character, so that we can check 899 * for the word end. */ 900 if (*mi.mi_fend != NUL) 901 mb_ptr_adv(mi.mi_fend); 902 903 (void)spell_casefold(ptr, (int)(mi.mi_fend - ptr), mi.mi_fword, 904 MAXWLEN + 1); 905 mi.mi_fwordlen = STRLEN(mi.mi_fword); 906 907 /* The word is bad unless we recognize it. */ 908 mi.mi_result = SP_BAD; 909 mi.mi_result2 = SP_BAD; 910 911 /* 912 * Loop over the languages specified in 'spelllang'. 913 * We check them all, because a matching word may be longer than an 914 * already found matching word. 915 */ 916 for (lpi = 0; lpi < wp->w_buffer->b_langp.ga_len; ++lpi) 917 { 918 mi.mi_lp = LANGP_ENTRY(wp->w_buffer->b_langp, lpi); 919 920 /* If reloading fails the language is still in the list but everything 921 * has been cleared. */ 922 if (mi.mi_lp->lp_slang->sl_fidxs == NULL) 923 continue; 924 925 /* Check for a matching word in case-folded words. */ 926 find_word(&mi, FIND_FOLDWORD); 927 928 /* Check for a matching word in keep-case words. */ 929 find_word(&mi, FIND_KEEPWORD); 930 931 /* Check for matching prefixes. */ 932 find_prefix(&mi, FIND_FOLDWORD); 933 934 /* For a NOBREAK language, may want to use a word without a following 935 * word as a backup. */ 936 if (mi.mi_lp->lp_slang->sl_nobreak && mi.mi_result == SP_BAD 937 && mi.mi_result2 != SP_BAD) 938 { 939 mi.mi_result = mi.mi_result2; 940 mi.mi_end = mi.mi_end2; 941 } 942 } 943 944 if (mi.mi_result != SP_OK) 945 { 946 /* If we found a number skip over it. Allows for "42nd". Do flag 947 * rare and local words, e.g., "3GPP". */ 948 if (nrlen > 0) 949 { 950 if (mi.mi_result == SP_BAD || mi.mi_result == SP_BANNED) 951 return nrlen; 952 } 953 954 /* When we are at a non-word character there is no error, just 955 * skip over the character (try looking for a word after it). */ 956 else if (!spell_iswordp_nmw(ptr)) 957 { 958 if (capcol != NULL && wp->w_buffer->b_cap_prog != NULL) 959 { 960 regmatch_T regmatch; 961 962 /* Check for end of sentence. */ 963 regmatch.regprog = wp->w_buffer->b_cap_prog; 964 regmatch.rm_ic = FALSE; 965 if (vim_regexec(®match, ptr, 0)) 966 *capcol = (int)(regmatch.endp[0] - ptr); 967 } 968 969 #ifdef FEAT_MBYTE 970 if (has_mbyte) 971 return (*mb_ptr2len)(ptr); 972 #endif 973 return 1; 974 } 975 else if (mi.mi_end == ptr) 976 /* Always include at least one character. Required for when there 977 * is a mixup in "midword". */ 978 mb_ptr_adv(mi.mi_end); 979 else if (mi.mi_result == SP_BAD 980 && LANGP_ENTRY(wp->w_buffer->b_langp, 0)->lp_slang->sl_nobreak) 981 { 982 char_u *p, *fp; 983 int save_result = mi.mi_result; 984 985 /* First language in 'spelllang' is NOBREAK. Find first position 986 * at which any word would be valid. */ 987 mi.mi_lp = LANGP_ENTRY(wp->w_buffer->b_langp, 0); 988 if (mi.mi_lp->lp_slang->sl_fidxs != NULL) 989 { 990 p = mi.mi_word; 991 fp = mi.mi_fword; 992 for (;;) 993 { 994 mb_ptr_adv(p); 995 mb_ptr_adv(fp); 996 if (p >= mi.mi_end) 997 break; 998 mi.mi_compoff = fp - mi.mi_fword; 999 find_word(&mi, FIND_COMPOUND); 1000 if (mi.mi_result != SP_BAD) 1001 { 1002 mi.mi_end = p; 1003 break; 1004 } 1005 } 1006 mi.mi_result = save_result; 1007 } 1008 } 1009 1010 if (mi.mi_result == SP_BAD || mi.mi_result == SP_BANNED) 1011 *attrp = highlight_attr[HLF_SPB]; 1012 else if (mi.mi_result == SP_RARE) 1013 *attrp = highlight_attr[HLF_SPR]; 1014 else 1015 *attrp = highlight_attr[HLF_SPL]; 1016 } 1017 1018 if (wrongcaplen > 0 && (mi.mi_result == SP_OK || mi.mi_result == SP_RARE)) 1019 { 1020 /* Report SpellCap only when the word isn't badly spelled. */ 1021 *attrp = highlight_attr[HLF_SPC]; 1022 return wrongcaplen; 1023 } 1024 1025 return (int)(mi.mi_end - ptr); 1026 } 1027 1028 /* 1029 * Check if the word at "mip->mi_word" is in the tree. 1030 * When "mode" is FIND_FOLDWORD check in fold-case word tree. 1031 * When "mode" is FIND_KEEPWORD check in keep-case word tree. 1032 * When "mode" is FIND_PREFIX check for word after prefix in fold-case word 1033 * tree. 1034 * 1035 * For a match mip->mi_result is updated. 1036 */ 1037 static void 1038 find_word(mip, mode) 1039 matchinf_T *mip; 1040 int mode; 1041 { 1042 idx_T arridx = 0; 1043 int endlen[MAXWLEN]; /* length at possible word endings */ 1044 idx_T endidx[MAXWLEN]; /* possible word endings */ 1045 int endidxcnt = 0; 1046 int len; 1047 int wlen = 0; 1048 int flen; 1049 int c; 1050 char_u *ptr; 1051 idx_T lo, hi, m; 1052 #ifdef FEAT_MBYTE 1053 char_u *s; 1054 #endif 1055 char_u *p; 1056 int res = SP_BAD; 1057 slang_T *slang = mip->mi_lp->lp_slang; 1058 unsigned flags; 1059 char_u *byts; 1060 idx_T *idxs; 1061 int word_ends; 1062 int prefix_found; 1063 int nobreak_result; 1064 1065 if (mode == FIND_KEEPWORD || mode == FIND_KEEPCOMPOUND) 1066 { 1067 /* Check for word with matching case in keep-case tree. */ 1068 ptr = mip->mi_word; 1069 flen = 9999; /* no case folding, always enough bytes */ 1070 byts = slang->sl_kbyts; 1071 idxs = slang->sl_kidxs; 1072 1073 if (mode == FIND_KEEPCOMPOUND) 1074 /* Skip over the previously found word(s). */ 1075 wlen += mip->mi_compoff; 1076 } 1077 else 1078 { 1079 /* Check for case-folded in case-folded tree. */ 1080 ptr = mip->mi_fword; 1081 flen = mip->mi_fwordlen; /* available case-folded bytes */ 1082 byts = slang->sl_fbyts; 1083 idxs = slang->sl_fidxs; 1084 1085 if (mode == FIND_PREFIX) 1086 { 1087 /* Skip over the prefix. */ 1088 wlen = mip->mi_prefixlen; 1089 flen -= mip->mi_prefixlen; 1090 } 1091 else if (mode == FIND_COMPOUND) 1092 { 1093 /* Skip over the previously found word(s). */ 1094 wlen = mip->mi_compoff; 1095 flen -= mip->mi_compoff; 1096 } 1097 1098 } 1099 1100 if (byts == NULL) 1101 return; /* array is empty */ 1102 1103 /* 1104 * Repeat advancing in the tree until: 1105 * - there is a byte that doesn't match, 1106 * - we reach the end of the tree, 1107 * - or we reach the end of the line. 1108 */ 1109 for (;;) 1110 { 1111 if (flen <= 0 && *mip->mi_fend != NUL) 1112 flen = fold_more(mip); 1113 1114 len = byts[arridx++]; 1115 1116 /* If the first possible byte is a zero the word could end here. 1117 * Remember this index, we first check for the longest word. */ 1118 if (byts[arridx] == 0) 1119 { 1120 if (endidxcnt == MAXWLEN) 1121 { 1122 /* Must be a corrupted spell file. */ 1123 EMSG(_(e_format)); 1124 return; 1125 } 1126 endlen[endidxcnt] = wlen; 1127 endidx[endidxcnt++] = arridx++; 1128 --len; 1129 1130 /* Skip over the zeros, there can be several flag/region 1131 * combinations. */ 1132 while (len > 0 && byts[arridx] == 0) 1133 { 1134 ++arridx; 1135 --len; 1136 } 1137 if (len == 0) 1138 break; /* no children, word must end here */ 1139 } 1140 1141 /* Stop looking at end of the line. */ 1142 if (ptr[wlen] == NUL) 1143 break; 1144 1145 /* Perform a binary search in the list of accepted bytes. */ 1146 c = ptr[wlen]; 1147 if (c == TAB) /* <Tab> is handled like <Space> */ 1148 c = ' '; 1149 lo = arridx; 1150 hi = arridx + len - 1; 1151 while (lo < hi) 1152 { 1153 m = (lo + hi) / 2; 1154 if (byts[m] > c) 1155 hi = m - 1; 1156 else if (byts[m] < c) 1157 lo = m + 1; 1158 else 1159 { 1160 lo = hi = m; 1161 break; 1162 } 1163 } 1164 1165 /* Stop if there is no matching byte. */ 1166 if (hi < lo || byts[lo] != c) 1167 break; 1168 1169 /* Continue at the child (if there is one). */ 1170 arridx = idxs[lo]; 1171 ++wlen; 1172 --flen; 1173 1174 /* One space in the good word may stand for several spaces in the 1175 * checked word. */ 1176 if (c == ' ') 1177 { 1178 for (;;) 1179 { 1180 if (flen <= 0 && *mip->mi_fend != NUL) 1181 flen = fold_more(mip); 1182 if (ptr[wlen] != ' ' && ptr[wlen] != TAB) 1183 break; 1184 ++wlen; 1185 --flen; 1186 } 1187 } 1188 } 1189 1190 /* 1191 * Verify that one of the possible endings is valid. Try the longest 1192 * first. 1193 */ 1194 while (endidxcnt > 0) 1195 { 1196 --endidxcnt; 1197 arridx = endidx[endidxcnt]; 1198 wlen = endlen[endidxcnt]; 1199 1200 #ifdef FEAT_MBYTE 1201 if ((*mb_head_off)(ptr, ptr + wlen) > 0) 1202 continue; /* not at first byte of character */ 1203 #endif 1204 if (spell_iswordp(ptr + wlen, mip->mi_buf)) 1205 { 1206 if (slang->sl_compprog == NULL && !slang->sl_nobreak) 1207 continue; /* next char is a word character */ 1208 word_ends = FALSE; 1209 } 1210 else 1211 word_ends = TRUE; 1212 /* The prefix flag is before compound flags. Once a valid prefix flag 1213 * has been found we try compound flags. */ 1214 prefix_found = FALSE; 1215 1216 #ifdef FEAT_MBYTE 1217 if (mode != FIND_KEEPWORD && has_mbyte) 1218 { 1219 /* Compute byte length in original word, length may change 1220 * when folding case. This can be slow, take a shortcut when the 1221 * case-folded word is equal to the keep-case word. */ 1222 p = mip->mi_word; 1223 if (STRNCMP(ptr, p, wlen) != 0) 1224 { 1225 for (s = ptr; s < ptr + wlen; mb_ptr_adv(s)) 1226 mb_ptr_adv(p); 1227 wlen = p - mip->mi_word; 1228 } 1229 } 1230 #endif 1231 1232 /* Check flags and region. For FIND_PREFIX check the condition and 1233 * prefix ID. 1234 * Repeat this if there are more flags/region alternatives until there 1235 * is a match. */ 1236 res = SP_BAD; 1237 for (len = byts[arridx - 1]; len > 0 && byts[arridx] == 0; 1238 --len, ++arridx) 1239 { 1240 flags = idxs[arridx]; 1241 1242 /* For the fold-case tree check that the case of the checked word 1243 * matches with what the word in the tree requires. 1244 * For keep-case tree the case is always right. For prefixes we 1245 * don't bother to check. */ 1246 if (mode == FIND_FOLDWORD) 1247 { 1248 if (mip->mi_cend != mip->mi_word + wlen) 1249 { 1250 /* mi_capflags was set for a different word length, need 1251 * to do it again. */ 1252 mip->mi_cend = mip->mi_word + wlen; 1253 mip->mi_capflags = captype(mip->mi_word, mip->mi_cend); 1254 } 1255 1256 if (mip->mi_capflags == WF_KEEPCAP 1257 || !spell_valid_case(mip->mi_capflags, flags)) 1258 continue; 1259 } 1260 1261 /* When mode is FIND_PREFIX the word must support the prefix: 1262 * check the prefix ID and the condition. Do that for the list at 1263 * mip->mi_prefarridx that find_prefix() filled. */ 1264 else if (mode == FIND_PREFIX && !prefix_found) 1265 { 1266 c = valid_word_prefix(mip->mi_prefcnt, mip->mi_prefarridx, 1267 flags, 1268 mip->mi_word + mip->mi_cprefixlen, slang, 1269 FALSE); 1270 if (c == 0) 1271 continue; 1272 1273 /* Use the WF_RARE flag for a rare prefix. */ 1274 if (c & WF_RAREPFX) 1275 flags |= WF_RARE; 1276 prefix_found = TRUE; 1277 } 1278 1279 if (slang->sl_nobreak) 1280 { 1281 if ((mode == FIND_COMPOUND || mode == FIND_KEEPCOMPOUND) 1282 && (flags & WF_BANNED) == 0) 1283 { 1284 /* NOBREAK: found a valid following word. That's all we 1285 * need to know, so return. */ 1286 mip->mi_result = SP_OK; 1287 break; 1288 } 1289 } 1290 1291 else if ((mode == FIND_COMPOUND || mode == FIND_KEEPCOMPOUND 1292 || !word_ends)) 1293 { 1294 /* If there is no flag or the word is shorter than 1295 * COMPOUNDMIN reject it quickly. 1296 * Makes you wonder why someone puts a compound flag on a word 1297 * that's too short... Myspell compatibility requires this 1298 * anyway. */ 1299 if (((unsigned)flags >> 24) == 0 1300 || wlen - mip->mi_compoff < slang->sl_compminlen) 1301 continue; 1302 #ifdef FEAT_MBYTE 1303 /* For multi-byte chars check character length against 1304 * COMPOUNDMIN. */ 1305 if (has_mbyte 1306 && slang->sl_compminlen > 0 1307 && mb_charlen_len(mip->mi_word + mip->mi_compoff, 1308 wlen - mip->mi_compoff) < slang->sl_compminlen) 1309 continue; 1310 #endif 1311 1312 /* Limit the number of compound words to COMPOUNDMAX if no 1313 * maximum for syllables is specified. */ 1314 if (!word_ends && mip->mi_complen + 2 > slang->sl_compmax 1315 && slang->sl_compsylmax == MAXWLEN) 1316 continue; 1317 1318 /* Quickly check if compounding is possible with this flag. */ 1319 if (!byte_in_str(mip->mi_complen == 0 1320 ? slang->sl_compstartflags 1321 : slang->sl_compallflags, 1322 ((unsigned)flags >> 24))) 1323 continue; 1324 1325 if (mode == FIND_COMPOUND) 1326 { 1327 int capflags; 1328 1329 /* Need to check the caps type of the appended compound 1330 * word. */ 1331 #ifdef FEAT_MBYTE 1332 if (has_mbyte && STRNCMP(ptr, mip->mi_word, 1333 mip->mi_compoff) != 0) 1334 { 1335 /* case folding may have changed the length */ 1336 p = mip->mi_word; 1337 for (s = ptr; s < ptr + mip->mi_compoff; mb_ptr_adv(s)) 1338 mb_ptr_adv(p); 1339 } 1340 else 1341 #endif 1342 p = mip->mi_word + mip->mi_compoff; 1343 capflags = captype(p, mip->mi_word + wlen); 1344 if (capflags == WF_KEEPCAP || (capflags == WF_ALLCAP 1345 && (flags & WF_FIXCAP) != 0)) 1346 continue; 1347 1348 if (capflags != WF_ALLCAP) 1349 { 1350 /* When the character before the word is a word 1351 * character we do not accept a Onecap word. We do 1352 * accept a no-caps word, even when the dictionary 1353 * word specifies ONECAP. */ 1354 mb_ptr_back(mip->mi_word, p); 1355 if (spell_iswordp_nmw(p) 1356 ? capflags == WF_ONECAP 1357 : (flags & WF_ONECAP) != 0 1358 && capflags != WF_ONECAP) 1359 continue; 1360 } 1361 } 1362 1363 /* If the word ends the sequence of compound flags of the 1364 * words must match with one of the COMPOUNDFLAGS items and 1365 * the number of syllables must not be too large. */ 1366 mip->mi_compflags[mip->mi_complen] = ((unsigned)flags >> 24); 1367 mip->mi_compflags[mip->mi_complen + 1] = NUL; 1368 if (word_ends) 1369 { 1370 char_u fword[MAXWLEN]; 1371 1372 if (slang->sl_compsylmax < MAXWLEN) 1373 { 1374 /* "fword" is only needed for checking syllables. */ 1375 if (ptr == mip->mi_word) 1376 (void)spell_casefold(ptr, wlen, fword, MAXWLEN); 1377 else 1378 vim_strncpy(fword, ptr, endlen[endidxcnt]); 1379 } 1380 if (!can_compound(slang, fword, mip->mi_compflags)) 1381 continue; 1382 } 1383 } 1384 1385 /* Check NEEDCOMPOUND: can't use word without compounding. */ 1386 else if (flags & WF_NEEDCOMP) 1387 continue; 1388 1389 nobreak_result = SP_OK; 1390 1391 if (!word_ends) 1392 { 1393 int save_result = mip->mi_result; 1394 char_u *save_end = mip->mi_end; 1395 langp_T *save_lp = mip->mi_lp; 1396 int lpi; 1397 1398 /* Check that a valid word follows. If there is one and we 1399 * are compounding, it will set "mi_result", thus we are 1400 * always finished here. For NOBREAK we only check that a 1401 * valid word follows. 1402 * Recursive! */ 1403 if (slang->sl_nobreak) 1404 mip->mi_result = SP_BAD; 1405 1406 /* Find following word in case-folded tree. */ 1407 mip->mi_compoff = endlen[endidxcnt]; 1408 #ifdef FEAT_MBYTE 1409 if (has_mbyte && mode == FIND_KEEPWORD) 1410 { 1411 /* Compute byte length in case-folded word from "wlen": 1412 * byte length in keep-case word. Length may change when 1413 * folding case. This can be slow, take a shortcut when 1414 * the case-folded word is equal to the keep-case word. */ 1415 p = mip->mi_fword; 1416 if (STRNCMP(ptr, p, wlen) != 0) 1417 { 1418 for (s = ptr; s < ptr + wlen; mb_ptr_adv(s)) 1419 mb_ptr_adv(p); 1420 mip->mi_compoff = p - mip->mi_fword; 1421 } 1422 } 1423 #endif 1424 c = mip->mi_compoff; 1425 ++mip->mi_complen; 1426 1427 /* For NOBREAK we need to try all NOBREAK languages, at least 1428 * to find the ".add" file(s). */ 1429 for (lpi = 0; lpi < mip->mi_buf->b_langp.ga_len; ++lpi) 1430 { 1431 if (slang->sl_nobreak) 1432 { 1433 mip->mi_lp = LANGP_ENTRY(mip->mi_buf->b_langp, lpi); 1434 if (mip->mi_lp->lp_slang->sl_fidxs == NULL 1435 || !mip->mi_lp->lp_slang->sl_nobreak) 1436 continue; 1437 } 1438 1439 find_word(mip, FIND_COMPOUND); 1440 1441 /* When NOBREAK any word that matches is OK. Otherwise we 1442 * need to find the longest match, thus try with keep-case 1443 * and prefix too. */ 1444 if (!slang->sl_nobreak || mip->mi_result == SP_BAD) 1445 { 1446 /* Find following word in keep-case tree. */ 1447 mip->mi_compoff = wlen; 1448 find_word(mip, FIND_KEEPCOMPOUND); 1449 1450 if (!slang->sl_nobreak || mip->mi_result == SP_BAD) 1451 { 1452 /* Check for following word with prefix. */ 1453 mip->mi_compoff = c; 1454 find_prefix(mip, FIND_COMPOUND); 1455 } 1456 } 1457 1458 if (!slang->sl_nobreak) 1459 break; 1460 } 1461 --mip->mi_complen; 1462 mip->mi_lp = save_lp; 1463 1464 if (slang->sl_nobreak) 1465 { 1466 nobreak_result = mip->mi_result; 1467 mip->mi_result = save_result; 1468 mip->mi_end = save_end; 1469 } 1470 else 1471 { 1472 if (mip->mi_result == SP_OK) 1473 break; 1474 continue; 1475 } 1476 } 1477 1478 if (flags & WF_BANNED) 1479 res = SP_BANNED; 1480 else if (flags & WF_REGION) 1481 { 1482 /* Check region. */ 1483 if ((mip->mi_lp->lp_region & (flags >> 16)) != 0) 1484 res = SP_OK; 1485 else 1486 res = SP_LOCAL; 1487 } 1488 else if (flags & WF_RARE) 1489 res = SP_RARE; 1490 else 1491 res = SP_OK; 1492 1493 /* Always use the longest match and the best result. For NOBREAK 1494 * we separately keep the longest match without a following good 1495 * word as a fall-back. */ 1496 if (nobreak_result == SP_BAD) 1497 { 1498 if (mip->mi_result2 > res) 1499 { 1500 mip->mi_result2 = res; 1501 mip->mi_end2 = mip->mi_word + wlen; 1502 } 1503 else if (mip->mi_result2 == res 1504 && mip->mi_end2 < mip->mi_word + wlen) 1505 mip->mi_end2 = mip->mi_word + wlen; 1506 } 1507 else if (mip->mi_result > res) 1508 { 1509 mip->mi_result = res; 1510 mip->mi_end = mip->mi_word + wlen; 1511 } 1512 else if (mip->mi_result == res && mip->mi_end < mip->mi_word + wlen) 1513 mip->mi_end = mip->mi_word + wlen; 1514 1515 if (mip->mi_result == SP_OK) 1516 break; 1517 } 1518 1519 if (mip->mi_result == SP_OK) 1520 break; 1521 } 1522 } 1523 1524 /* 1525 * Return TRUE if "flags" is a valid sequence of compound flags and 1526 * "word[len]" does not have too many syllables. 1527 */ 1528 static int 1529 can_compound(slang, word, flags) 1530 slang_T *slang; 1531 char_u *word; 1532 char_u *flags; 1533 { 1534 regmatch_T regmatch; 1535 #ifdef FEAT_MBYTE 1536 char_u uflags[MAXWLEN * 2]; 1537 int i; 1538 #endif 1539 char_u *p; 1540 1541 if (slang->sl_compprog == NULL) 1542 return FALSE; 1543 #ifdef FEAT_MBYTE 1544 if (enc_utf8) 1545 { 1546 /* Need to convert the single byte flags to utf8 characters. */ 1547 p = uflags; 1548 for (i = 0; flags[i] != NUL; ++i) 1549 p += mb_char2bytes(flags[i], p); 1550 *p = NUL; 1551 p = uflags; 1552 } 1553 else 1554 #endif 1555 p = flags; 1556 regmatch.regprog = slang->sl_compprog; 1557 regmatch.rm_ic = FALSE; 1558 if (!vim_regexec(®match, p, 0)) 1559 return FALSE; 1560 1561 /* Count the number of syllables. This may be slow, do it last. If there 1562 * are too many syllables AND the number of compound words is above 1563 * COMPOUNDMAX then compounding is not allowed. */ 1564 if (slang->sl_compsylmax < MAXWLEN 1565 && count_syllables(slang, word) > slang->sl_compsylmax) 1566 return (int)STRLEN(flags) < slang->sl_compmax; 1567 return TRUE; 1568 } 1569 1570 /* 1571 * Return non-zero if the prefix indicated by "arridx" matches with the prefix 1572 * ID in "flags" for the word "word". 1573 * The WF_RAREPFX flag is included in the return value for a rare prefix. 1574 */ 1575 static int 1576 valid_word_prefix(totprefcnt, arridx, flags, word, slang, cond_req) 1577 int totprefcnt; /* nr of prefix IDs */ 1578 int arridx; /* idx in sl_pidxs[] */ 1579 int flags; 1580 char_u *word; 1581 slang_T *slang; 1582 int cond_req; /* only use prefixes with a condition */ 1583 { 1584 int prefcnt; 1585 int pidx; 1586 regprog_T *rp; 1587 regmatch_T regmatch; 1588 int prefid; 1589 1590 prefid = (unsigned)flags >> 24; 1591 for (prefcnt = totprefcnt - 1; prefcnt >= 0; --prefcnt) 1592 { 1593 pidx = slang->sl_pidxs[arridx + prefcnt]; 1594 1595 /* Check the prefix ID. */ 1596 if (prefid != (pidx & 0xff)) 1597 continue; 1598 1599 /* Check if the prefix doesn't combine and the word already has a 1600 * suffix. */ 1601 if ((flags & WF_HAS_AFF) && (pidx & WF_PFX_NC)) 1602 continue; 1603 1604 /* Check the condition, if there is one. The condition index is 1605 * stored in the two bytes above the prefix ID byte. */ 1606 rp = slang->sl_prefprog[((unsigned)pidx >> 8) & 0xffff]; 1607 if (rp != NULL) 1608 { 1609 regmatch.regprog = rp; 1610 regmatch.rm_ic = FALSE; 1611 if (!vim_regexec(®match, word, 0)) 1612 continue; 1613 } 1614 else if (cond_req) 1615 continue; 1616 1617 /* It's a match! Return the WF_ flags. */ 1618 return pidx; 1619 } 1620 return 0; 1621 } 1622 1623 /* 1624 * Check if the word at "mip->mi_word" has a matching prefix. 1625 * If it does, then check the following word. 1626 * 1627 * If "mode" is "FIND_COMPOUND" then do the same after another word, find a 1628 * prefix in a compound word. 1629 * 1630 * For a match mip->mi_result is updated. 1631 */ 1632 static void 1633 find_prefix(mip, mode) 1634 matchinf_T *mip; 1635 int mode; 1636 { 1637 idx_T arridx = 0; 1638 int len; 1639 int wlen = 0; 1640 int flen; 1641 int c; 1642 char_u *ptr; 1643 idx_T lo, hi, m; 1644 slang_T *slang = mip->mi_lp->lp_slang; 1645 char_u *byts; 1646 idx_T *idxs; 1647 1648 byts = slang->sl_pbyts; 1649 if (byts == NULL) 1650 return; /* array is empty */ 1651 1652 /* We use the case-folded word here, since prefixes are always 1653 * case-folded. */ 1654 ptr = mip->mi_fword; 1655 flen = mip->mi_fwordlen; /* available case-folded bytes */ 1656 if (mode == FIND_COMPOUND) 1657 { 1658 /* Skip over the previously found word(s). */ 1659 ptr += mip->mi_compoff; 1660 flen -= mip->mi_compoff; 1661 } 1662 idxs = slang->sl_pidxs; 1663 1664 /* 1665 * Repeat advancing in the tree until: 1666 * - there is a byte that doesn't match, 1667 * - we reach the end of the tree, 1668 * - or we reach the end of the line. 1669 */ 1670 for (;;) 1671 { 1672 if (flen == 0 && *mip->mi_fend != NUL) 1673 flen = fold_more(mip); 1674 1675 len = byts[arridx++]; 1676 1677 /* If the first possible byte is a zero the prefix could end here. 1678 * Check if the following word matches and supports the prefix. */ 1679 if (byts[arridx] == 0) 1680 { 1681 /* There can be several prefixes with different conditions. We 1682 * try them all, since we don't know which one will give the 1683 * longest match. The word is the same each time, pass the list 1684 * of possible prefixes to find_word(). */ 1685 mip->mi_prefarridx = arridx; 1686 mip->mi_prefcnt = len; 1687 while (len > 0 && byts[arridx] == 0) 1688 { 1689 ++arridx; 1690 --len; 1691 } 1692 mip->mi_prefcnt -= len; 1693 1694 /* Find the word that comes after the prefix. */ 1695 mip->mi_prefixlen = wlen; 1696 if (mode == FIND_COMPOUND) 1697 /* Skip over the previously found word(s). */ 1698 mip->mi_prefixlen += mip->mi_compoff; 1699 1700 #ifdef FEAT_MBYTE 1701 if (has_mbyte) 1702 { 1703 /* Case-folded length may differ from original length. */ 1704 mip->mi_cprefixlen = nofold_len(mip->mi_fword, 1705 mip->mi_prefixlen, mip->mi_word); 1706 } 1707 else 1708 mip->mi_cprefixlen = mip->mi_prefixlen; 1709 #endif 1710 find_word(mip, FIND_PREFIX); 1711 1712 1713 if (len == 0) 1714 break; /* no children, word must end here */ 1715 } 1716 1717 /* Stop looking at end of the line. */ 1718 if (ptr[wlen] == NUL) 1719 break; 1720 1721 /* Perform a binary search in the list of accepted bytes. */ 1722 c = ptr[wlen]; 1723 lo = arridx; 1724 hi = arridx + len - 1; 1725 while (lo < hi) 1726 { 1727 m = (lo + hi) / 2; 1728 if (byts[m] > c) 1729 hi = m - 1; 1730 else if (byts[m] < c) 1731 lo = m + 1; 1732 else 1733 { 1734 lo = hi = m; 1735 break; 1736 } 1737 } 1738 1739 /* Stop if there is no matching byte. */ 1740 if (hi < lo || byts[lo] != c) 1741 break; 1742 1743 /* Continue at the child (if there is one). */ 1744 arridx = idxs[lo]; 1745 ++wlen; 1746 --flen; 1747 } 1748 } 1749 1750 /* 1751 * Need to fold at least one more character. Do until next non-word character 1752 * for efficiency. 1753 * Return the length of the folded chars in bytes. 1754 */ 1755 static int 1756 fold_more(mip) 1757 matchinf_T *mip; 1758 { 1759 int flen; 1760 char_u *p; 1761 1762 p = mip->mi_fend; 1763 do 1764 { 1765 mb_ptr_adv(mip->mi_fend); 1766 } while (*mip->mi_fend != NUL && spell_iswordp(mip->mi_fend, mip->mi_buf)); 1767 1768 /* Include the non-word character so that we can check for the 1769 * word end. */ 1770 if (*mip->mi_fend != NUL) 1771 mb_ptr_adv(mip->mi_fend); 1772 1773 (void)spell_casefold(p, (int)(mip->mi_fend - p), 1774 mip->mi_fword + mip->mi_fwordlen, 1775 MAXWLEN - mip->mi_fwordlen); 1776 flen = STRLEN(mip->mi_fword + mip->mi_fwordlen); 1777 mip->mi_fwordlen += flen; 1778 return flen; 1779 } 1780 1781 /* 1782 * Check case flags for a word. Return TRUE if the word has the requested 1783 * case. 1784 */ 1785 static int 1786 spell_valid_case(wordflags, treeflags) 1787 int wordflags; /* flags for the checked word. */ 1788 int treeflags; /* flags for the word in the spell tree */ 1789 { 1790 return ((wordflags == WF_ALLCAP && (treeflags & WF_FIXCAP) == 0) 1791 || ((treeflags & (WF_ALLCAP | WF_KEEPCAP)) == 0 1792 && ((treeflags & WF_ONECAP) == 0 1793 || (wordflags & WF_ONECAP) != 0))); 1794 } 1795 1796 /* 1797 * Return TRUE if spell checking is not enabled. 1798 */ 1799 static int 1800 no_spell_checking(wp) 1801 win_T *wp; 1802 { 1803 if (!wp->w_p_spell || *wp->w_buffer->b_p_spl == NUL) 1804 { 1805 EMSG(_("E756: Spell checking is not enabled")); 1806 return TRUE; 1807 } 1808 return FALSE; 1809 } 1810 1811 /* 1812 * Move to next spell error. 1813 * "curline" is FALSE for "[s", "]s", "[S" and "]S". 1814 * "curline" is TRUE to find word under/after cursor in the same line. 1815 * For Insert mode completion "dir" is BACKWARD and "curline" is TRUE: move 1816 * to after badly spelled word before the cursor. 1817 * Return 0 if not found, length of the badly spelled word otherwise. 1818 */ 1819 int 1820 spell_move_to(wp, dir, allwords, curline, attrp) 1821 win_T *wp; 1822 int dir; /* FORWARD or BACKWARD */ 1823 int allwords; /* TRUE for "[s"/"]s", FALSE for "[S"/"]S" */ 1824 int curline; 1825 int *attrp; /* return: attributes of bad word or NULL */ 1826 { 1827 linenr_T lnum; 1828 pos_T found_pos; 1829 int found_len = 0; 1830 char_u *line; 1831 char_u *p; 1832 char_u *endp; 1833 int attr; 1834 int len; 1835 int has_syntax = syntax_present(wp->w_buffer); 1836 int col; 1837 int can_spell; 1838 char_u *buf = NULL; 1839 int buflen = 0; 1840 int skip = 0; 1841 int capcol = -1; 1842 int found_one = FALSE; 1843 int wrapped = FALSE; 1844 1845 if (no_spell_checking(wp)) 1846 return 0; 1847 1848 /* 1849 * Start looking for bad word at the start of the line, because we can't 1850 * start halfway a word, we don't know where the it starts or ends. 1851 * 1852 * When searching backwards, we continue in the line to find the last 1853 * bad word (in the cursor line: before the cursor). 1854 * 1855 * We concatenate the start of the next line, so that wrapped words work 1856 * (e.g. "et<line-break>cetera"). Doesn't work when searching backwards 1857 * though... 1858 */ 1859 lnum = wp->w_cursor.lnum; 1860 found_pos.lnum = 0; 1861 1862 while (!got_int) 1863 { 1864 line = ml_get_buf(wp->w_buffer, lnum, FALSE); 1865 1866 len = STRLEN(line); 1867 if (buflen < len + MAXWLEN + 2) 1868 { 1869 vim_free(buf); 1870 buflen = len + MAXWLEN + 2; 1871 buf = alloc(buflen); 1872 if (buf == NULL) 1873 break; 1874 } 1875 1876 /* In first line check first word for Capital. */ 1877 if (lnum == 1) 1878 capcol = 0; 1879 1880 /* For checking first word with a capital skip white space. */ 1881 if (capcol == 0) 1882 capcol = skipwhite(line) - line; 1883 1884 /* Copy the line into "buf" and append the start of the next line if 1885 * possible. */ 1886 STRCPY(buf, line); 1887 if (lnum < wp->w_buffer->b_ml.ml_line_count) 1888 spell_cat_line(buf + STRLEN(buf), ml_get(lnum + 1), MAXWLEN); 1889 1890 p = buf + skip; 1891 endp = buf + len; 1892 while (p < endp) 1893 { 1894 /* When searching backward don't search after the cursor. Unless 1895 * we wrapped around the end of the buffer. */ 1896 if (dir == BACKWARD 1897 && lnum == wp->w_cursor.lnum 1898 && !wrapped 1899 && (colnr_T)(p - buf) >= wp->w_cursor.col) 1900 break; 1901 1902 /* start of word */ 1903 attr = 0; 1904 len = spell_check(wp, p, &attr, &capcol); 1905 1906 if (attr != 0) 1907 { 1908 /* We found a bad word. Check the attribute. */ 1909 if (allwords || attr == highlight_attr[HLF_SPB]) 1910 { 1911 found_one = TRUE; 1912 1913 /* When searching forward only accept a bad word after 1914 * the cursor. */ 1915 if (dir == BACKWARD 1916 || lnum != wp->w_cursor.lnum 1917 || (lnum == wp->w_cursor.lnum 1918 && (wrapped 1919 || (colnr_T)(curline ? p - buf + len 1920 : p - buf) 1921 > wp->w_cursor.col))) 1922 { 1923 if (has_syntax) 1924 { 1925 col = p - buf; 1926 (void)syn_get_id(wp, lnum, (colnr_T)col, 1927 FALSE, &can_spell); 1928 } 1929 else 1930 can_spell = TRUE; 1931 1932 if (can_spell) 1933 { 1934 found_pos.lnum = lnum; 1935 found_pos.col = p - buf; 1936 #ifdef FEAT_VIRTUALEDIT 1937 found_pos.coladd = 0; 1938 #endif 1939 if (dir == FORWARD) 1940 { 1941 /* No need to search further. */ 1942 wp->w_cursor = found_pos; 1943 vim_free(buf); 1944 if (attrp != NULL) 1945 *attrp = attr; 1946 return len; 1947 } 1948 else if (curline) 1949 /* Insert mode completion: put cursor after 1950 * the bad word. */ 1951 found_pos.col += len; 1952 found_len = len; 1953 } 1954 } 1955 } 1956 } 1957 1958 /* advance to character after the word */ 1959 p += len; 1960 capcol -= len; 1961 } 1962 1963 if (dir == BACKWARD && found_pos.lnum != 0) 1964 { 1965 /* Use the last match in the line (before the cursor). */ 1966 wp->w_cursor = found_pos; 1967 vim_free(buf); 1968 return found_len; 1969 } 1970 1971 if (curline) 1972 break; /* only check cursor line */ 1973 1974 /* Advance to next line. */ 1975 if (dir == BACKWARD) 1976 { 1977 /* If we are back at the starting line and searched it again there 1978 * is no match, give up. */ 1979 if (lnum == wp->w_cursor.lnum && wrapped) 1980 break; 1981 1982 if (lnum > 1) 1983 --lnum; 1984 else if (!p_ws) 1985 break; /* at first line and 'nowrapscan' */ 1986 else 1987 { 1988 /* Wrap around to the end of the buffer. May search the 1989 * starting line again and accept the last match. */ 1990 lnum = wp->w_buffer->b_ml.ml_line_count; 1991 wrapped = TRUE; 1992 if (!shortmess(SHM_SEARCH)) 1993 give_warning((char_u *)_(top_bot_msg), TRUE); 1994 } 1995 capcol = -1; 1996 } 1997 else 1998 { 1999 if (lnum < wp->w_buffer->b_ml.ml_line_count) 2000 ++lnum; 2001 else if (!p_ws) 2002 break; /* at first line and 'nowrapscan' */ 2003 else 2004 { 2005 /* Wrap around to the start of the buffer. May search the 2006 * starting line again and accept the first match. */ 2007 lnum = 1; 2008 wrapped = TRUE; 2009 if (!shortmess(SHM_SEARCH)) 2010 give_warning((char_u *)_(bot_top_msg), TRUE); 2011 } 2012 2013 /* If we are back at the starting line and there is no match then 2014 * give up. */ 2015 if (lnum == wp->w_cursor.lnum && !found_one) 2016 break; 2017 2018 /* Skip the characters at the start of the next line that were 2019 * included in a match crossing line boundaries. */ 2020 if (attr == 0) 2021 skip = p - endp; 2022 else 2023 skip = 0; 2024 2025 /* Capscol skips over the inserted space. */ 2026 --capcol; 2027 2028 /* But after empty line check first word in next line */ 2029 if (*skipwhite(line) == NUL) 2030 capcol = 0; 2031 } 2032 2033 line_breakcheck(); 2034 } 2035 2036 vim_free(buf); 2037 return 0; 2038 } 2039 2040 /* 2041 * For spell checking: concatenate the start of the following line "line" into 2042 * "buf", blanking-out special characters. Copy less then "maxlen" bytes. 2043 */ 2044 void 2045 spell_cat_line(buf, line, maxlen) 2046 char_u *buf; 2047 char_u *line; 2048 int maxlen; 2049 { 2050 char_u *p; 2051 int n; 2052 2053 p = skipwhite(line); 2054 while (vim_strchr((char_u *)"*#/\"\t", *p) != NULL) 2055 p = skipwhite(p + 1); 2056 2057 if (*p != NUL) 2058 { 2059 *buf = ' '; 2060 vim_strncpy(buf + 1, line, maxlen - 2); 2061 n = p - line; 2062 if (n >= maxlen) 2063 n = maxlen - 1; 2064 vim_memset(buf + 1, ' ', n); 2065 } 2066 } 2067 2068 typedef struct spelload_S 2069 { 2070 char_u sl_lang[MAXWLEN + 1]; /* language name */ 2071 slang_T *sl_slang; /* resulting slang_T struct */ 2072 int sl_nobreak; /* NOBREAK language found */ 2073 } spelload_T; 2074 2075 /* 2076 * Load word list(s) for "lang" from Vim spell file(s). 2077 * "lang" must be the language without the region: e.g., "en". 2078 */ 2079 static void 2080 spell_load_lang(lang) 2081 char_u *lang; 2082 { 2083 char_u fname_enc[85]; 2084 int r; 2085 spelload_T sl; 2086 2087 /* Copy the language name to pass it to spell_load_cb() as a cookie. 2088 * It's truncated when an error is detected. */ 2089 STRCPY(sl.sl_lang, lang); 2090 sl.sl_slang = NULL; 2091 sl.sl_nobreak = FALSE; 2092 2093 /* 2094 * Find the first spell file for "lang" in 'runtimepath' and load it. 2095 */ 2096 vim_snprintf((char *)fname_enc, sizeof(fname_enc) - 5, 2097 "spell/%s.%s.spl", lang, spell_enc()); 2098 r = do_in_runtimepath(fname_enc, FALSE, spell_load_cb, &sl); 2099 2100 if (r == FAIL && *sl.sl_lang != NUL) 2101 { 2102 /* Try loading the ASCII version. */ 2103 vim_snprintf((char *)fname_enc, sizeof(fname_enc) - 5, 2104 "spell/%s.ascii.spl", lang); 2105 r = do_in_runtimepath(fname_enc, FALSE, spell_load_cb, &sl); 2106 } 2107 2108 if (r == FAIL) 2109 smsg((char_u *)_("Warning: Cannot find word list \"%s.%s.spl\" or \"%s.ascii.spl\""), 2110 lang, spell_enc(), lang); 2111 else if (sl.sl_slang != NULL) 2112 { 2113 /* At least one file was loaded, now load all the additions. */ 2114 STRCPY(fname_enc + STRLEN(fname_enc) - 3, "add.spl"); 2115 do_in_runtimepath(fname_enc, TRUE, spell_load_cb, &sl); 2116 } 2117 } 2118 2119 /* 2120 * Return the encoding used for spell checking: Use 'encoding', except that we 2121 * use "latin1" for "latin9". And limit to 60 characters (just in case). 2122 */ 2123 static char_u * 2124 spell_enc() 2125 { 2126 2127 #ifdef FEAT_MBYTE 2128 if (STRLEN(p_enc) < 60 && STRCMP(p_enc, "iso-8859-15") != 0) 2129 return p_enc; 2130 #endif 2131 return (char_u *)"latin1"; 2132 } 2133 2134 /* 2135 * Get the name of the .spl file for the internal wordlist into 2136 * "fname[MAXPATHL]". 2137 */ 2138 static void 2139 int_wordlist_spl(fname) 2140 char_u *fname; 2141 { 2142 vim_snprintf((char *)fname, MAXPATHL, "%s.%s.spl", 2143 int_wordlist, spell_enc()); 2144 } 2145 2146 /* 2147 * Allocate a new slang_T. 2148 * Caller must fill "sl_next". 2149 */ 2150 static slang_T * 2151 slang_alloc(lang) 2152 char_u *lang; 2153 { 2154 slang_T *lp; 2155 2156 lp = (slang_T *)alloc_clear(sizeof(slang_T)); 2157 if (lp != NULL) 2158 { 2159 lp->sl_name = vim_strsave(lang); 2160 ga_init2(&lp->sl_rep, sizeof(fromto_T), 10); 2161 lp->sl_compmax = MAXWLEN; 2162 lp->sl_compsylmax = MAXWLEN; 2163 } 2164 return lp; 2165 } 2166 2167 /* 2168 * Free the contents of an slang_T and the structure itself. 2169 */ 2170 static void 2171 slang_free(lp) 2172 slang_T *lp; 2173 { 2174 vim_free(lp->sl_name); 2175 vim_free(lp->sl_fname); 2176 slang_clear(lp); 2177 vim_free(lp); 2178 } 2179 2180 /* 2181 * Clear an slang_T so that the file can be reloaded. 2182 */ 2183 static void 2184 slang_clear(lp) 2185 slang_T *lp; 2186 { 2187 garray_T *gap; 2188 fromto_T *ftp; 2189 salitem_T *smp; 2190 int i; 2191 2192 vim_free(lp->sl_fbyts); 2193 lp->sl_fbyts = NULL; 2194 vim_free(lp->sl_kbyts); 2195 lp->sl_kbyts = NULL; 2196 vim_free(lp->sl_pbyts); 2197 lp->sl_pbyts = NULL; 2198 2199 vim_free(lp->sl_fidxs); 2200 lp->sl_fidxs = NULL; 2201 vim_free(lp->sl_kidxs); 2202 lp->sl_kidxs = NULL; 2203 vim_free(lp->sl_pidxs); 2204 lp->sl_pidxs = NULL; 2205 2206 gap = &lp->sl_rep; 2207 while (gap->ga_len > 0) 2208 { 2209 ftp = &((fromto_T *)gap->ga_data)[--gap->ga_len]; 2210 vim_free(ftp->ft_from); 2211 vim_free(ftp->ft_to); 2212 } 2213 ga_clear(gap); 2214 2215 gap = &lp->sl_sal; 2216 if (lp->sl_sofo) 2217 { 2218 /* "ga_len" is set to 1 without adding an item for latin1 */ 2219 if (gap->ga_data != NULL) 2220 /* SOFOFROM and SOFOTO items: free lists of wide characters. */ 2221 for (i = 0; i < gap->ga_len; ++i) 2222 vim_free(((int **)gap->ga_data)[i]); 2223 } 2224 else 2225 /* SAL items: free salitem_T items */ 2226 while (gap->ga_len > 0) 2227 { 2228 smp = &((salitem_T *)gap->ga_data)[--gap->ga_len]; 2229 vim_free(smp->sm_lead); 2230 /* Don't free sm_oneof and sm_rules, they point into sm_lead. */ 2231 vim_free(smp->sm_to); 2232 #ifdef FEAT_MBYTE 2233 vim_free(smp->sm_lead_w); 2234 vim_free(smp->sm_oneof_w); 2235 vim_free(smp->sm_to_w); 2236 #endif 2237 } 2238 ga_clear(gap); 2239 2240 for (i = 0; i < lp->sl_prefixcnt; ++i) 2241 vim_free(lp->sl_prefprog[i]); 2242 lp->sl_prefixcnt = 0; 2243 vim_free(lp->sl_prefprog); 2244 lp->sl_prefprog = NULL; 2245 2246 vim_free(lp->sl_midword); 2247 lp->sl_midword = NULL; 2248 2249 vim_free(lp->sl_compprog); 2250 vim_free(lp->sl_compstartflags); 2251 vim_free(lp->sl_compallflags); 2252 lp->sl_compprog = NULL; 2253 lp->sl_compstartflags = NULL; 2254 lp->sl_compallflags = NULL; 2255 2256 vim_free(lp->sl_syllable); 2257 lp->sl_syllable = NULL; 2258 ga_clear(&lp->sl_syl_items); 2259 2260 #ifdef FEAT_MBYTE 2261 { 2262 int todo = lp->sl_map_hash.ht_used; 2263 hashitem_T *hi; 2264 2265 for (hi = lp->sl_map_hash.ht_array; todo > 0; ++hi) 2266 if (!HASHITEM_EMPTY(hi)) 2267 { 2268 --todo; 2269 vim_free(hi->hi_key); 2270 } 2271 } 2272 hash_clear(&lp->sl_map_hash); 2273 #endif 2274 2275 lp->sl_compmax = MAXWLEN; 2276 lp->sl_compminlen = 0; 2277 lp->sl_compsylmax = MAXWLEN; 2278 lp->sl_regions[0] = NUL; 2279 } 2280 2281 /* 2282 * Load one spell file and store the info into a slang_T. 2283 * Invoked through do_in_runtimepath(). 2284 */ 2285 static void 2286 spell_load_cb(fname, cookie) 2287 char_u *fname; 2288 void *cookie; 2289 { 2290 spelload_T *slp = (spelload_T *)cookie; 2291 slang_T *slang; 2292 2293 slang = spell_load_file(fname, slp->sl_lang, NULL, FALSE); 2294 if (slang != NULL) 2295 { 2296 /* When a previously loaded file has NOBREAK also use it for the 2297 * ".add" files. */ 2298 if (slp->sl_nobreak && slang->sl_add) 2299 slang->sl_nobreak = TRUE; 2300 else if (slang->sl_nobreak) 2301 slp->sl_nobreak = TRUE; 2302 2303 slp->sl_slang = slang; 2304 } 2305 } 2306 2307 /* 2308 * Load one spell file and store the info into a slang_T. 2309 * 2310 * This is invoked in two ways: 2311 * - From spell_load_cb() to load a spell file for the first time. "lang" is 2312 * the language name, "old_lp" is NULL. Will allocate an slang_T. 2313 * - To reload a spell file that was changed. "lang" is NULL and "old_lp" 2314 * points to the existing slang_T. 2315 * Returns the slang_T the spell file was loaded into. NULL for error. 2316 */ 2317 static slang_T * 2318 spell_load_file(fname, lang, old_lp, silent) 2319 char_u *fname; 2320 char_u *lang; 2321 slang_T *old_lp; 2322 int silent; /* no error if file doesn't exist */ 2323 { 2324 FILE *fd; 2325 char_u buf[VIMSPELLMAGICL]; 2326 char_u *p; 2327 char_u *bp; 2328 idx_T *ip; 2329 int i; 2330 int n; 2331 int len; 2332 int round; 2333 char_u *save_sourcing_name = sourcing_name; 2334 linenr_T save_sourcing_lnum = sourcing_lnum; 2335 slang_T *lp = NULL; 2336 idx_T idx; 2337 int c = 0; 2338 int res; 2339 2340 fd = mch_fopen((char *)fname, "r"); 2341 if (fd == NULL) 2342 { 2343 if (!silent) 2344 EMSG2(_(e_notopen), fname); 2345 else if (p_verbose > 2) 2346 { 2347 verbose_enter(); 2348 smsg((char_u *)e_notopen, fname); 2349 verbose_leave(); 2350 } 2351 goto endFAIL; 2352 } 2353 if (p_verbose > 2) 2354 { 2355 verbose_enter(); 2356 smsg((char_u *)_("Reading spell file \"%s\""), fname); 2357 verbose_leave(); 2358 } 2359 2360 if (old_lp == NULL) 2361 { 2362 lp = slang_alloc(lang); 2363 if (lp == NULL) 2364 goto endFAIL; 2365 2366 /* Remember the file name, used to reload the file when it's updated. */ 2367 lp->sl_fname = vim_strsave(fname); 2368 if (lp->sl_fname == NULL) 2369 goto endFAIL; 2370 2371 /* Check for .add.spl. */ 2372 lp->sl_add = strstr((char *)gettail(fname), ".add.") != NULL; 2373 } 2374 else 2375 lp = old_lp; 2376 2377 /* Set sourcing_name, so that error messages mention the file name. */ 2378 sourcing_name = fname; 2379 sourcing_lnum = 0; 2380 2381 /* <HEADER>: <fileID> 2382 */ 2383 for (i = 0; i < VIMSPELLMAGICL; ++i) 2384 buf[i] = getc(fd); /* <fileID> */ 2385 if (STRNCMP(buf, VIMSPELLMAGIC, VIMSPELLMAGICL) != 0) 2386 { 2387 EMSG(_("E757: This does not look like a spell file")); 2388 goto endFAIL; 2389 } 2390 c = getc(fd); /* <versionnr> */ 2391 if (c < VIMSPELLVERSION) 2392 { 2393 EMSG(_("E771: Old spell file, needs to be updated")); 2394 goto endFAIL; 2395 } 2396 else if (c > VIMSPELLVERSION) 2397 { 2398 EMSG(_("E772: Spell file is for newer version of Vim")); 2399 goto endFAIL; 2400 } 2401 2402 2403 /* 2404 * <SECTIONS>: <section> ... <sectionend> 2405 * <section>: <sectionID> <sectionflags> <sectionlen> (section contents) 2406 */ 2407 for (;;) 2408 { 2409 n = getc(fd); /* <sectionID> or <sectionend> */ 2410 if (n == SN_END) 2411 break; 2412 c = getc(fd); /* <sectionflags> */ 2413 len = (getc(fd) << 24) + (getc(fd) << 16) + (getc(fd) << 8) + getc(fd); 2414 /* <sectionlen> */ 2415 if (len < 0) 2416 goto truncerr; 2417 2418 res = 0; 2419 switch (n) 2420 { 2421 case SN_REGION: 2422 res = read_region_section(fd, lp, len); 2423 break; 2424 2425 case SN_CHARFLAGS: 2426 res = read_charflags_section(fd); 2427 break; 2428 2429 case SN_MIDWORD: 2430 lp->sl_midword = read_string(fd, len); /* <midword> */ 2431 if (lp->sl_midword == NULL) 2432 goto endFAIL; 2433 break; 2434 2435 case SN_PREFCOND: 2436 res = read_prefcond_section(fd, lp); 2437 break; 2438 2439 case SN_REP: 2440 res = read_rep_section(fd, lp); 2441 break; 2442 2443 case SN_SAL: 2444 res = read_sal_section(fd, lp); 2445 break; 2446 2447 case SN_SOFO: 2448 res = read_sofo_section(fd, lp); 2449 break; 2450 2451 case SN_MAP: 2452 p = read_string(fd, len); /* <mapstr> */ 2453 if (p == NULL) 2454 goto endFAIL; 2455 set_map_str(lp, p); 2456 vim_free(p); 2457 break; 2458 2459 case SN_COMPOUND: 2460 res = read_compound(fd, lp, len); 2461 break; 2462 2463 case SN_NOBREAK: 2464 lp->sl_nobreak = TRUE; 2465 break; 2466 2467 case SN_SYLLABLE: 2468 lp->sl_syllable = read_string(fd, len); /* <syllable> */ 2469 if (lp->sl_syllable == NULL) 2470 goto endFAIL; 2471 if (init_syl_tab(lp) == FAIL) 2472 goto endFAIL; 2473 break; 2474 2475 default: 2476 /* Unsupported section. When it's required give an error 2477 * message. When it's not required skip the contents. */ 2478 if (c & SNF_REQUIRED) 2479 { 2480 EMSG(_("E770: Unsupported section in spell file")); 2481 goto endFAIL; 2482 } 2483 while (--len >= 0) 2484 if (getc(fd) < 0) 2485 goto truncerr; 2486 break; 2487 } 2488 if (res == SP_FORMERROR) 2489 { 2490 formerr: 2491 EMSG(_(e_format)); 2492 goto endFAIL; 2493 } 2494 if (res == SP_TRUNCERROR) 2495 { 2496 truncerr: 2497 EMSG(_(e_spell_trunc)); 2498 goto endFAIL; 2499 } 2500 if (res == SP_OTHERERROR) 2501 goto endFAIL; 2502 } 2503 2504 /* round 1: <LWORDTREE> 2505 * round 2: <KWORDTREE> 2506 * round 3: <PREFIXTREE> */ 2507 for (round = 1; round <= 3; ++round) 2508 { 2509 /* The tree size was computed when writing the file, so that we can 2510 * allocate it as one long block. <nodecount> */ 2511 len = (getc(fd) << 24) + (getc(fd) << 16) + (getc(fd) << 8) + getc(fd); 2512 if (len < 0) 2513 goto truncerr; 2514 if (len > 0) 2515 { 2516 /* Allocate the byte array. */ 2517 bp = lalloc((long_u)len, TRUE); 2518 if (bp == NULL) 2519 goto endFAIL; 2520 if (round == 1) 2521 lp->sl_fbyts = bp; 2522 else if (round == 2) 2523 lp->sl_kbyts = bp; 2524 else 2525 lp->sl_pbyts = bp; 2526 2527 /* Allocate the index array. */ 2528 ip = (idx_T *)lalloc_clear((long_u)(len * sizeof(int)), TRUE); 2529 if (ip == NULL) 2530 goto endFAIL; 2531 if (round == 1) 2532 lp->sl_fidxs = ip; 2533 else if (round == 2) 2534 lp->sl_kidxs = ip; 2535 else 2536 lp->sl_pidxs = ip; 2537 2538 /* Read the tree and store it in the array. */ 2539 idx = read_tree(fd, bp, ip, len, 0, round == 3, lp->sl_prefixcnt); 2540 if (idx == -1) 2541 goto truncerr; 2542 if (idx < 0) 2543 goto formerr; 2544 } 2545 } 2546 2547 /* For a new file link it in the list of spell files. */ 2548 if (old_lp == NULL) 2549 { 2550 lp->sl_next = first_lang; 2551 first_lang = lp; 2552 } 2553 2554 goto endOK; 2555 2556 endFAIL: 2557 if (lang != NULL) 2558 /* truncating the name signals the error to spell_load_lang() */ 2559 *lang = NUL; 2560 if (lp != NULL && old_lp == NULL) 2561 slang_free(lp); 2562 lp = NULL; 2563 2564 endOK: 2565 if (fd != NULL) 2566 fclose(fd); 2567 sourcing_name = save_sourcing_name; 2568 sourcing_lnum = save_sourcing_lnum; 2569 2570 return lp; 2571 } 2572 2573 /* 2574 * Read a length field from "fd" in "cnt_bytes" bytes. 2575 * Allocate memory, read the string into it and add a NUL at the end. 2576 * Returns NULL when the count is zero. 2577 * Sets "*cntp" to SP_*ERROR when there is an error, length of the result 2578 * otherwise. 2579 */ 2580 static char_u * 2581 read_cnt_string(fd, cnt_bytes, cntp) 2582 FILE *fd; 2583 int cnt_bytes; 2584 int *cntp; 2585 { 2586 int cnt = 0; 2587 int i; 2588 char_u *str; 2589 2590 /* read the length bytes, MSB first */ 2591 for (i = 0; i < cnt_bytes; ++i) 2592 cnt = (cnt << 8) + getc(fd); 2593 if (cnt < 0) 2594 { 2595 *cntp = SP_TRUNCERROR; 2596 return NULL; 2597 } 2598 *cntp = cnt; 2599 if (cnt == 0) 2600 return NULL; /* nothing to read, return NULL */ 2601 2602 str = read_string(fd, cnt); 2603 if (str == NULL) 2604 *cntp = SP_OTHERERROR; 2605 return str; 2606 } 2607 2608 /* 2609 * Read a string of length "cnt" from "fd" into allocated memory. 2610 * Returns NULL when out of memory. 2611 */ 2612 static char_u * 2613 read_string(fd, cnt) 2614 FILE *fd; 2615 int cnt; 2616 { 2617 char_u *str; 2618 int i; 2619 2620 /* allocate memory */ 2621 str = alloc((unsigned)cnt + 1); 2622 if (str != NULL) 2623 { 2624 /* Read the string. Doesn't check for truncated file. */ 2625 for (i = 0; i < cnt; ++i) 2626 str[i] = getc(fd); 2627 str[i] = NUL; 2628 } 2629 return str; 2630 } 2631 2632 /* 2633 * Read SN_REGION: <regionname> ... 2634 * Return SP_*ERROR flags. 2635 */ 2636 static int 2637 read_region_section(fd, lp, len) 2638 FILE *fd; 2639 slang_T *lp; 2640 int len; 2641 { 2642 int i; 2643 2644 if (len > 16) 2645 return SP_FORMERROR; 2646 for (i = 0; i < len; ++i) 2647 lp->sl_regions[i] = getc(fd); /* <regionname> */ 2648 lp->sl_regions[len] = NUL; 2649 return 0; 2650 } 2651 2652 /* 2653 * Read SN_CHARFLAGS section: <charflagslen> <charflags> 2654 * <folcharslen> <folchars> 2655 * Return SP_*ERROR flags. 2656 */ 2657 static int 2658 read_charflags_section(fd) 2659 FILE *fd; 2660 { 2661 char_u *flags; 2662 char_u *fol; 2663 int flagslen, follen; 2664 2665 /* <charflagslen> <charflags> */ 2666 flags = read_cnt_string(fd, 1, &flagslen); 2667 if (flagslen < 0) 2668 return flagslen; 2669 2670 /* <folcharslen> <folchars> */ 2671 fol = read_cnt_string(fd, 2, &follen); 2672 if (follen < 0) 2673 { 2674 vim_free(flags); 2675 return follen; 2676 } 2677 2678 /* Set the word-char flags and fill SPELL_ISUPPER() table. */ 2679 if (flags != NULL && fol != NULL) 2680 set_spell_charflags(flags, flagslen, fol); 2681 2682 vim_free(flags); 2683 vim_free(fol); 2684 2685 /* When <charflagslen> is zero then <fcharlen> must also be zero. */ 2686 if ((flags == NULL) != (fol == NULL)) 2687 return SP_FORMERROR; 2688 return 0; 2689 } 2690 2691 /* 2692 * Read SN_PREFCOND section. 2693 * Return SP_*ERROR flags. 2694 */ 2695 static int 2696 read_prefcond_section(fd, lp) 2697 FILE *fd; 2698 slang_T *lp; 2699 { 2700 int cnt; 2701 int i; 2702 int n; 2703 char_u *p; 2704 char_u buf[MAXWLEN + 1]; 2705 2706 /* <prefcondcnt> <prefcond> ... */ 2707 cnt = (getc(fd) << 8) + getc(fd); /* <prefcondcnt> */ 2708 if (cnt <= 0) 2709 return SP_FORMERROR; 2710 2711 lp->sl_prefprog = (regprog_T **)alloc_clear( 2712 (unsigned)sizeof(regprog_T *) * cnt); 2713 if (lp->sl_prefprog == NULL) 2714 return SP_OTHERERROR; 2715 lp->sl_prefixcnt = cnt; 2716 2717 for (i = 0; i < cnt; ++i) 2718 { 2719 /* <prefcond> : <condlen> <condstr> */ 2720 n = getc(fd); /* <condlen> */ 2721 if (n < 0 || n >= MAXWLEN) 2722 return SP_FORMERROR; 2723 2724 /* When <condlen> is zero we have an empty condition. Otherwise 2725 * compile the regexp program used to check for the condition. */ 2726 if (n > 0) 2727 { 2728 buf[0] = '^'; /* always match at one position only */ 2729 p = buf + 1; 2730 while (n-- > 0) 2731 *p++ = getc(fd); /* <condstr> */ 2732 *p = NUL; 2733 lp->sl_prefprog[i] = vim_regcomp(buf, RE_MAGIC + RE_STRING); 2734 } 2735 } 2736 return 0; 2737 } 2738 2739 /* 2740 * Read REP items section from "fd": <repcount> <rep> ... 2741 * Return SP_*ERROR flags. 2742 */ 2743 static int 2744 read_rep_section(fd, slang) 2745 FILE *fd; 2746 slang_T *slang; 2747 { 2748 int cnt; 2749 garray_T *gap; 2750 fromto_T *ftp; 2751 short *first; 2752 int i; 2753 2754 cnt = (getc(fd) << 8) + getc(fd); /* <repcount> */ 2755 if (cnt < 0) 2756 return SP_TRUNCERROR; 2757 2758 gap = &slang->sl_rep; 2759 if (ga_grow(gap, cnt) == FAIL) 2760 return SP_OTHERERROR; 2761 2762 /* <rep> : <repfromlen> <repfrom> <reptolen> <repto> */ 2763 for (; gap->ga_len < cnt; ++gap->ga_len) 2764 { 2765 ftp = &((fromto_T *)gap->ga_data)[gap->ga_len]; 2766 ftp->ft_from = read_cnt_string(fd, 1, &i); 2767 if (i < 0) 2768 return i; 2769 if (i == 0) 2770 return SP_FORMERROR; 2771 ftp->ft_to = read_cnt_string(fd, 1, &i); 2772 if (i <= 0) 2773 { 2774 vim_free(ftp->ft_from); 2775 if (i < 0) 2776 return i; 2777 return SP_FORMERROR; 2778 } 2779 } 2780 2781 /* Fill the first-index table. */ 2782 first = slang->sl_rep_first; 2783 for (i = 0; i < 256; ++i) 2784 first[i] = -1; 2785 for (i = 0; i < gap->ga_len; ++i) 2786 { 2787 ftp = &((fromto_T *)gap->ga_data)[i]; 2788 if (first[*ftp->ft_from] == -1) 2789 first[*ftp->ft_from] = i; 2790 } 2791 return 0; 2792 } 2793 2794 /* 2795 * Read SN_SAL section: <salflags> <salcount> <sal> ... 2796 * Return SP_*ERROR flags. 2797 */ 2798 static int 2799 read_sal_section(fd, slang) 2800 FILE *fd; 2801 slang_T *slang; 2802 { 2803 int i; 2804 int cnt; 2805 garray_T *gap; 2806 salitem_T *smp; 2807 int ccnt; 2808 char_u *p; 2809 int c = NUL; 2810 2811 slang->sl_sofo = FALSE; 2812 2813 i = getc(fd); /* <salflags> */ 2814 if (i & SAL_F0LLOWUP) 2815 slang->sl_followup = TRUE; 2816 if (i & SAL_COLLAPSE) 2817 slang->sl_collapse = TRUE; 2818 if (i & SAL_REM_ACCENTS) 2819 slang->sl_rem_accents = TRUE; 2820 2821 cnt = (getc(fd) << 8) + getc(fd); /* <salcount> */ 2822 if (cnt < 0) 2823 return SP_TRUNCERROR; 2824 2825 gap = &slang->sl_sal; 2826 ga_init2(gap, sizeof(salitem_T), 10); 2827 if (ga_grow(gap, cnt) == FAIL) 2828 return SP_OTHERERROR; 2829 2830 /* <sal> : <salfromlen> <salfrom> <saltolen> <salto> */ 2831 for (; gap->ga_len < cnt; ++gap->ga_len) 2832 { 2833 smp = &((salitem_T *)gap->ga_data)[gap->ga_len]; 2834 ccnt = getc(fd); /* <salfromlen> */ 2835 if (ccnt < 0) 2836 return SP_TRUNCERROR; 2837 if ((p = alloc(ccnt + 2)) == NULL) 2838 return SP_OTHERERROR; 2839 smp->sm_lead = p; 2840 2841 /* Read up to the first special char into sm_lead. */ 2842 for (i = 0; i < ccnt; ++i) 2843 { 2844 c = getc(fd); /* <salfrom> */ 2845 if (vim_strchr((char_u *)"0123456789(-<^$", c) != NULL) 2846 break; 2847 *p++ = c; 2848 } 2849 smp->sm_leadlen = p - smp->sm_lead; 2850 *p++ = NUL; 2851 2852 /* Put (abc) chars in sm_oneof, if any. */ 2853 if (c == '(') 2854 { 2855 smp->sm_oneof = p; 2856 for (++i; i < ccnt; ++i) 2857 { 2858 c = getc(fd); /* <salfrom> */ 2859 if (c == ')') 2860 break; 2861 *p++ = c; 2862 } 2863 *p++ = NUL; 2864 if (++i < ccnt) 2865 c = getc(fd); 2866 } 2867 else 2868 smp->sm_oneof = NULL; 2869 2870 /* Any following chars go in sm_rules. */ 2871 smp->sm_rules = p; 2872 if (i < ccnt) 2873 /* store the char we got while checking for end of sm_lead */ 2874 *p++ = c; 2875 for (++i; i < ccnt; ++i) 2876 *p++ = getc(fd); /* <salfrom> */ 2877 *p++ = NUL; 2878 2879 /* <saltolen> <salto> */ 2880 smp->sm_to = read_cnt_string(fd, 1, &ccnt); 2881 if (ccnt < 0) 2882 { 2883 vim_free(smp->sm_lead); 2884 return ccnt; 2885 } 2886 2887 #ifdef FEAT_MBYTE 2888 if (has_mbyte) 2889 { 2890 /* convert the multi-byte strings to wide char strings */ 2891 smp->sm_lead_w = mb_str2wide(smp->sm_lead); 2892 smp->sm_leadlen = mb_charlen(smp->sm_lead); 2893 if (smp->sm_oneof == NULL) 2894 smp->sm_oneof_w = NULL; 2895 else 2896 smp->sm_oneof_w = mb_str2wide(smp->sm_oneof); 2897 if (smp->sm_to == NULL) 2898 smp->sm_to_w = NULL; 2899 else 2900 smp->sm_to_w = mb_str2wide(smp->sm_to); 2901 if (smp->sm_lead_w == NULL 2902 || (smp->sm_oneof_w == NULL && smp->sm_oneof != NULL) 2903 || (smp->sm_to_w == NULL && smp->sm_to != NULL)) 2904 { 2905 vim_free(smp->sm_lead); 2906 vim_free(smp->sm_to); 2907 vim_free(smp->sm_lead_w); 2908 vim_free(smp->sm_oneof_w); 2909 vim_free(smp->sm_to_w); 2910 return SP_OTHERERROR; 2911 } 2912 } 2913 #endif 2914 } 2915 2916 /* Fill the first-index table. */ 2917 set_sal_first(slang); 2918 2919 return 0; 2920 } 2921 2922 /* 2923 * SN_SOFO: <sofofromlen> <sofofrom> <sofotolen> <sofoto> 2924 * Return SP_*ERROR flags. 2925 */ 2926 static int 2927 read_sofo_section(fd, slang) 2928 FILE *fd; 2929 slang_T *slang; 2930 { 2931 int cnt; 2932 char_u *from, *to; 2933 int res; 2934 2935 slang->sl_sofo = TRUE; 2936 2937 /* <sofofromlen> <sofofrom> */ 2938 from = read_cnt_string(fd, 2, &cnt); 2939 if (cnt < 0) 2940 return cnt; 2941 2942 /* <sofotolen> <sofoto> */ 2943 to = read_cnt_string(fd, 2, &cnt); 2944 if (cnt < 0) 2945 { 2946 vim_free(from); 2947 return cnt; 2948 } 2949 2950 /* Store the info in slang->sl_sal and/or slang->sl_sal_first. */ 2951 if (from != NULL && to != NULL) 2952 res = set_sofo(slang, from, to); 2953 else if (from != NULL || to != NULL) 2954 res = SP_FORMERROR; /* only one of two strings is an error */ 2955 else 2956 res = 0; 2957 2958 vim_free(from); 2959 vim_free(to); 2960 return res; 2961 } 2962 2963 /* 2964 * Read the compound section from the .spl file: 2965 * <compmax> <compminlen> <compsylmax> <compflags> 2966 * Returns SP_*ERROR flags. 2967 */ 2968 static int 2969 read_compound(fd, slang, len) 2970 FILE *fd; 2971 slang_T *slang; 2972 int len; 2973 { 2974 int todo = len; 2975 int c; 2976 int atstart; 2977 char_u *pat; 2978 char_u *pp; 2979 char_u *cp; 2980 char_u *ap; 2981 2982 if (todo < 2) 2983 return SP_FORMERROR; /* need at least two bytes */ 2984 2985 --todo; 2986 c = getc(fd); /* <compmax> */ 2987 if (c < 2) 2988 c = MAXWLEN; 2989 slang->sl_compmax = c; 2990 2991 --todo; 2992 c = getc(fd); /* <compminlen> */ 2993 if (c < 1) 2994 c = 0; 2995 slang->sl_compminlen = c; 2996 2997 --todo; 2998 c = getc(fd); /* <compsylmax> */ 2999 if (c < 1) 3000 c = MAXWLEN; 3001 slang->sl_compsylmax = c; 3002 3003 /* Turn the COMPOUNDFLAGS items into a regexp pattern: 3004 * "a[bc]/a*b+" -> "^\(a[bc]\|a*b\+\)$". 3005 * Inserting backslashes may double the length, "^\(\)$<Nul>" is 7 bytes. 3006 * Conversion to utf-8 may double the size. */ 3007 c = todo * 2 + 7; 3008 #ifdef FEAT_MBYTE 3009 if (enc_utf8) 3010 c += todo * 2; 3011 #endif 3012 pat = alloc((unsigned)c); 3013 if (pat == NULL) 3014 return SP_OTHERERROR; 3015 3016 /* We also need a list of all flags that can appear at the start and one 3017 * for all flags. */ 3018 cp = alloc(todo + 1); 3019 if (cp == NULL) 3020 { 3021 vim_free(pat); 3022 return SP_OTHERERROR; 3023 } 3024 slang->sl_compstartflags = cp; 3025 *cp = NUL; 3026 3027 ap = alloc(todo + 1); 3028 if (ap == NULL) 3029 { 3030 vim_free(pat); 3031 return SP_OTHERERROR; 3032 } 3033 slang->sl_compallflags = ap; 3034 *ap = NUL; 3035 3036 pp = pat; 3037 *pp++ = '^'; 3038 *pp++ = '\\'; 3039 *pp++ = '('; 3040 3041 atstart = 1; 3042 while (todo-- > 0) 3043 { 3044 c = getc(fd); /* <compflags> */ 3045 3046 /* Add all flags to "sl_compallflags". */ 3047 if (vim_strchr((char_u *)"+*[]/", c) == NULL 3048 && !byte_in_str(slang->sl_compallflags, c)) 3049 { 3050 *ap++ = c; 3051 *ap = NUL; 3052 } 3053 3054 if (atstart != 0) 3055 { 3056 /* At start of item: copy flags to "sl_compstartflags". For a 3057 * [abc] item set "atstart" to 2 and copy up to the ']'. */ 3058 if (c == '[') 3059 atstart = 2; 3060 else if (c == ']') 3061 atstart = 0; 3062 else 3063 { 3064 if (!byte_in_str(slang->sl_compstartflags, c)) 3065 { 3066 *cp++ = c; 3067 *cp = NUL; 3068 } 3069 if (atstart == 1) 3070 atstart = 0; 3071 } 3072 } 3073 if (c == '/') /* slash separates two items */ 3074 { 3075 *pp++ = '\\'; 3076 *pp++ = '|'; 3077 atstart = 1; 3078 } 3079 else /* normal char, "[abc]" and '*' are copied as-is */ 3080 { 3081 if (c == '+' || c == '~') 3082 *pp++ = '\\'; /* "a+" becomes "a\+" */ 3083 #ifdef FEAT_MBYTE 3084 if (enc_utf8) 3085 pp += mb_char2bytes(c, pp); 3086 else 3087 #endif 3088 *pp++ = c; 3089 } 3090 } 3091 3092 *pp++ = '\\'; 3093 *pp++ = ')'; 3094 *pp++ = '$'; 3095 *pp = NUL; 3096 3097 slang->sl_compprog = vim_regcomp(pat, RE_MAGIC + RE_STRING + RE_STRICT); 3098 vim_free(pat); 3099 if (slang->sl_compprog == NULL) 3100 return SP_FORMERROR; 3101 3102 return 0; 3103 } 3104 3105 /* 3106 * Return TRUE if byte "n" appears in "str". 3107 * Like strchr() but independent of locale. 3108 */ 3109 static int 3110 byte_in_str(str, n) 3111 char_u *str; 3112 int n; 3113 { 3114 char_u *p; 3115 3116 for (p = str; *p != NUL; ++p) 3117 if (*p == n) 3118 return TRUE; 3119 return FALSE; 3120 } 3121 3122 #define SY_MAXLEN 30 3123 typedef struct syl_item_S 3124 { 3125 char_u sy_chars[SY_MAXLEN]; /* the sequence of chars */ 3126 int sy_len; 3127 } syl_item_T; 3128 3129 /* 3130 * Truncate "slang->sl_syllable" at the first slash and put the following items 3131 * in "slang->sl_syl_items". 3132 */ 3133 static int 3134 init_syl_tab(slang) 3135 slang_T *slang; 3136 { 3137 char_u *p; 3138 char_u *s; 3139 int l; 3140 syl_item_T *syl; 3141 3142 ga_init2(&slang->sl_syl_items, sizeof(syl_item_T), 4); 3143 p = vim_strchr(slang->sl_syllable, '/'); 3144 while (p != NULL) 3145 { 3146 *p++ = NUL; 3147 if (*p == NUL) /* trailing slash */ 3148 break; 3149 s = p; 3150 p = vim_strchr(p, '/'); 3151 if (p == NULL) 3152 l = STRLEN(s); 3153 else 3154 l = p - s; 3155 if (l >= SY_MAXLEN) 3156 return SP_FORMERROR; 3157 if (ga_grow(&slang->sl_syl_items, 1) == FAIL) 3158 return SP_OTHERERROR; 3159 syl = ((syl_item_T *)slang->sl_syl_items.ga_data) 3160 + slang->sl_syl_items.ga_len++; 3161 vim_strncpy(syl->sy_chars, s, l); 3162 syl->sy_len = l; 3163 } 3164 return OK; 3165 } 3166 3167 /* 3168 * Count the number of syllables in "word". 3169 * When "word" contains spaces the syllables after the last space are counted. 3170 * Returns zero if syllables are not defines. 3171 */ 3172 static int 3173 count_syllables(slang, word) 3174 slang_T *slang; 3175 char_u *word; 3176 { 3177 int cnt = 0; 3178 int skip = FALSE; 3179 char_u *p; 3180 int len; 3181 int i; 3182 syl_item_T *syl; 3183 int c; 3184 3185 if (slang->sl_syllable == NULL) 3186 return 0; 3187 3188 for (p = word; *p != NUL; p += len) 3189 { 3190 /* When running into a space reset counter. */ 3191 if (*p == ' ') 3192 { 3193 len = 1; 3194 cnt = 0; 3195 continue; 3196 } 3197 3198 /* Find longest match of syllable items. */ 3199 len = 0; 3200 for (i = 0; i < slang->sl_syl_items.ga_len; ++i) 3201 { 3202 syl = ((syl_item_T *)slang->sl_syl_items.ga_data) + i; 3203 if (syl->sy_len > len 3204 && STRNCMP(p, syl->sy_chars, syl->sy_len) == 0) 3205 len = syl->sy_len; 3206 } 3207 if (len != 0) /* found a match, count syllable */ 3208 { 3209 ++cnt; 3210 skip = FALSE; 3211 } 3212 else 3213 { 3214 /* No recognized syllable item, at least a syllable char then? */ 3215 #ifdef FEAT_MBYTE 3216 c = mb_ptr2char(p); 3217 len = (*mb_ptr2len)(p); 3218 #else 3219 c = *p; 3220 len = 1; 3221 #endif 3222 if (vim_strchr(slang->sl_syllable, c) == NULL) 3223 skip = FALSE; /* No, search for next syllable */ 3224 else if (!skip) 3225 { 3226 ++cnt; /* Yes, count it */ 3227 skip = TRUE; /* don't count following syllable chars */ 3228 } 3229 } 3230 } 3231 return cnt; 3232 } 3233 3234 /* 3235 * Set the SOFOFROM and SOFOTO items in language "lp". 3236 * Returns SP_*ERROR flags when there is something wrong. 3237 */ 3238 static int 3239 set_sofo(lp, from, to) 3240 slang_T *lp; 3241 char_u *from; 3242 char_u *to; 3243 { 3244 int i; 3245 3246 #ifdef FEAT_MBYTE 3247 garray_T *gap; 3248 char_u *s; 3249 char_u *p; 3250 int c; 3251 int *inp; 3252 3253 if (has_mbyte) 3254 { 3255 /* Use "sl_sal" as an array with 256 pointers to a list of wide 3256 * characters. The index is the low byte of the character. 3257 * The list contains from-to pairs with a terminating NUL. 3258 * sl_sal_first[] is used for latin1 "from" characters. */ 3259 gap = &lp->sl_sal; 3260 ga_init2(gap, sizeof(int *), 1); 3261 if (ga_grow(gap, 256) == FAIL) 3262 return SP_OTHERERROR; 3263 vim_memset(gap->ga_data, 0, sizeof(int *) * 256); 3264 gap->ga_len = 256; 3265 3266 /* First count the number of items for each list. Temporarily use 3267 * sl_sal_first[] for this. */ 3268 for (p = from, s = to; *p != NUL && *s != NUL; ) 3269 { 3270 c = mb_cptr2char_adv(&p); 3271 mb_cptr_adv(s); 3272 if (c >= 256) 3273 ++lp->sl_sal_first[c & 0xff]; 3274 } 3275 if (*p != NUL || *s != NUL) /* lengths differ */ 3276 return SP_FORMERROR; 3277 3278 /* Allocate the lists. */ 3279 for (i = 0; i < 256; ++i) 3280 if (lp->sl_sal_first[i] > 0) 3281 { 3282 p = alloc(sizeof(int) * (lp->sl_sal_first[i] * 2 + 1)); 3283 if (p == NULL) 3284 return SP_OTHERERROR; 3285 ((int **)gap->ga_data)[i] = (int *)p; 3286 *(int *)p = 0; 3287 } 3288 3289 /* Put the characters up to 255 in sl_sal_first[] the rest in a sl_sal 3290 * list. */ 3291 vim_memset(lp->sl_sal_first, 0, sizeof(salfirst_T) * 256); 3292 for (p = from, s = to; *p != NUL && *s != NUL; ) 3293 { 3294 c = mb_cptr2char_adv(&p); 3295 i = mb_cptr2char_adv(&s); 3296 if (c >= 256) 3297 { 3298 /* Append the from-to chars at the end of the list with 3299 * the low byte. */ 3300 inp = ((int **)gap->ga_data)[c & 0xff]; 3301 while (*inp != 0) 3302 ++inp; 3303 *inp++ = c; /* from char */ 3304 *inp++ = i; /* to char */ 3305 *inp++ = NUL; /* NUL at the end */ 3306 } 3307 else 3308 /* mapping byte to char is done in sl_sal_first[] */ 3309 lp->sl_sal_first[c] = i; 3310 } 3311 } 3312 else 3313 #endif 3314 { 3315 /* mapping bytes to bytes is done in sl_sal_first[] */ 3316 if (STRLEN(from) != STRLEN(to)) 3317 return SP_FORMERROR; 3318 3319 for (i = 0; to[i] != NUL; ++i) 3320 lp->sl_sal_first[from[i]] = to[i]; 3321 lp->sl_sal.ga_len = 1; /* indicates we have soundfolding */ 3322 } 3323 3324 return 0; 3325 } 3326 3327 /* 3328 * Fill the first-index table for "lp". 3329 */ 3330 static void 3331 set_sal_first(lp) 3332 slang_T *lp; 3333 { 3334 salfirst_T *sfirst; 3335 int i; 3336 salitem_T *smp; 3337 int c; 3338 garray_T *gap = &lp->sl_sal; 3339 3340 sfirst = lp->sl_sal_first; 3341 for (i = 0; i < 256; ++i) 3342 sfirst[i] = -1; 3343 smp = (salitem_T *)gap->ga_data; 3344 for (i = 0; i < gap->ga_len; ++i) 3345 { 3346 #ifdef FEAT_MBYTE 3347 if (has_mbyte) 3348 /* Use the lowest byte of the first character. For latin1 it's 3349 * the character, for other encodings it should differ for most 3350 * characters. */ 3351 c = *smp[i].sm_lead_w & 0xff; 3352 else 3353 #endif 3354 c = *smp[i].sm_lead; 3355 if (sfirst[c] == -1) 3356 { 3357 sfirst[c] = i; 3358 #ifdef FEAT_MBYTE 3359 if (has_mbyte) 3360 { 3361 int n; 3362 3363 /* Make sure all entries with this byte are following each 3364 * other. Move the ones that are in the wrong position. Do 3365 * keep the same ordering! */ 3366 while (i + 1 < gap->ga_len 3367 && (*smp[i + 1].sm_lead_w & 0xff) == c) 3368 /* Skip over entry with same index byte. */ 3369 ++i; 3370 3371 for (n = 1; i + n < gap->ga_len; ++n) 3372 if ((*smp[i + n].sm_lead_w & 0xff) == c) 3373 { 3374 salitem_T tsal; 3375 3376 /* Move entry with same index byte after the entries 3377 * we already found. */ 3378 ++i; 3379 --n; 3380 tsal = smp[i + n]; 3381 mch_memmove(smp + i + 1, smp + i, 3382 sizeof(salitem_T) * n); 3383 smp[i] = tsal; 3384 } 3385 } 3386 #endif 3387 } 3388 } 3389 } 3390 3391 #ifdef FEAT_MBYTE 3392 /* 3393 * Turn a multi-byte string into a wide character string. 3394 * Return it in allocated memory (NULL for out-of-memory) 3395 */ 3396 static int * 3397 mb_str2wide(s) 3398 char_u *s; 3399 { 3400 int *res; 3401 char_u *p; 3402 int i = 0; 3403 3404 res = (int *)alloc(sizeof(int) * (mb_charlen(s) + 1)); 3405 if (res != NULL) 3406 { 3407 for (p = s; *p != NUL; ) 3408 res[i++] = mb_ptr2char_adv(&p); 3409 res[i] = NUL; 3410 } 3411 return res; 3412 } 3413 #endif 3414 3415 /* 3416 * Read one row of siblings from the spell file and store it in the byte array 3417 * "byts" and index array "idxs". Recursively read the children. 3418 * 3419 * NOTE: The code here must match put_node(). 3420 * 3421 * Returns the index follosing the siblings. 3422 * Returns -1 if the file is shorter than expected. 3423 * Returns -2 if there is a format error. 3424 */ 3425 static idx_T 3426 read_tree(fd, byts, idxs, maxidx, startidx, prefixtree, maxprefcondnr) 3427 FILE *fd; 3428 char_u *byts; 3429 idx_T *idxs; 3430 int maxidx; /* size of arrays */ 3431 idx_T startidx; /* current index in "byts" and "idxs" */ 3432 int prefixtree; /* TRUE for reading PREFIXTREE */ 3433 int maxprefcondnr; /* maximum for <prefcondnr> */ 3434 { 3435 int len; 3436 int i; 3437 int n; 3438 idx_T idx = startidx; 3439 int c; 3440 int c2; 3441 #define SHARED_MASK 0x8000000 3442 3443 len = getc(fd); /* <siblingcount> */ 3444 if (len <= 0) 3445 return -1; 3446 3447 if (startidx + len >= maxidx) 3448 return -2; 3449 byts[idx++] = len; 3450 3451 /* Read the byte values, flag/region bytes and shared indexes. */ 3452 for (i = 1; i <= len; ++i) 3453 { 3454 c = getc(fd); /* <byte> */ 3455 if (c < 0) 3456 return -1; 3457 if (c <= BY_SPECIAL) 3458 { 3459 if (c == BY_NOFLAGS && !prefixtree) 3460 { 3461 /* No flags, all regions. */ 3462 idxs[idx] = 0; 3463 c = 0; 3464 } 3465 else if (c != BY_INDEX) 3466 { 3467 if (prefixtree) 3468 { 3469 /* Read the optional pflags byte, the prefix ID and the 3470 * condition nr. In idxs[] store the prefix ID in the low 3471 * byte, the condition index shifted up 8 bits, the flags 3472 * shifted up 24 bits. */ 3473 if (c == BY_FLAGS) 3474 c = getc(fd) << 24; /* <pflags> */ 3475 else 3476 c = 0; 3477 3478 c |= getc(fd); /* <affixID> */ 3479 3480 n = (getc(fd) << 8) + getc(fd); /* <prefcondnr> */ 3481 if (n >= maxprefcondnr) 3482 return -2; 3483 c |= (n << 8); 3484 } 3485 else /* c must be BY_FLAGS or BY_FLAGS2 */ 3486 { 3487 /* Read flags and optional region and prefix ID. In 3488 * idxs[] the flags go in the low two bytes, region above 3489 * that and prefix ID above the region. */ 3490 c2 = c; 3491 c = getc(fd); /* <flags> */ 3492 if (c2 == BY_FLAGS2) 3493 c = (getc(fd) << 8) + c; /* <flags2> */ 3494 if (c & WF_REGION) 3495 c = (getc(fd) << 16) + c; /* <region> */ 3496 if (c & WF_AFX) 3497 c = (getc(fd) << 24) + c; /* <affixID> */ 3498 } 3499 3500 idxs[idx] = c; 3501 c = 0; 3502 } 3503 else /* c == BY_INDEX */ 3504 { 3505 /* <nodeidx> */ 3506 n = (getc(fd) << 16) + (getc(fd) << 8) + getc(fd); 3507 if (n < 0 || n >= maxidx) 3508 return -2; 3509 idxs[idx] = n + SHARED_MASK; 3510 c = getc(fd); /* <xbyte> */ 3511 } 3512 } 3513 byts[idx++] = c; 3514 } 3515 3516 /* Recursively read the children for non-shared siblings. 3517 * Skip the end-of-word ones (zero byte value) and the shared ones (and 3518 * remove SHARED_MASK) */ 3519 for (i = 1; i <= len; ++i) 3520 if (byts[startidx + i] != 0) 3521 { 3522 if (idxs[startidx + i] & SHARED_MASK) 3523 idxs[startidx + i] &= ~SHARED_MASK; 3524 else 3525 { 3526 idxs[startidx + i] = idx; 3527 idx = read_tree(fd, byts, idxs, maxidx, idx, 3528 prefixtree, maxprefcondnr); 3529 if (idx < 0) 3530 break; 3531 } 3532 } 3533 3534 return idx; 3535 } 3536 3537 /* 3538 * Parse 'spelllang' and set buf->b_langp accordingly. 3539 * Returns NULL if it's OK, an error message otherwise. 3540 */ 3541 char_u * 3542 did_set_spelllang(buf) 3543 buf_T *buf; 3544 { 3545 garray_T ga; 3546 char_u *splp; 3547 char_u *region; 3548 char_u region_cp[3]; 3549 int filename; 3550 int region_mask; 3551 slang_T *slang; 3552 int c; 3553 char_u lang[MAXWLEN + 1]; 3554 char_u spf_name[MAXPATHL]; 3555 int len; 3556 char_u *p; 3557 int round; 3558 char_u *spf; 3559 char_u *use_region = NULL; 3560 int dont_use_region = FALSE; 3561 int nobreak = FALSE; 3562 int i, j; 3563 langp_T *lp, *lp2; 3564 3565 ga_init2(&ga, sizeof(langp_T), 2); 3566 clear_midword(buf); 3567 3568 /* loop over comma separated language names. */ 3569 for (splp = buf->b_p_spl; *splp != NUL; ) 3570 { 3571 /* Get one language name. */ 3572 copy_option_part(&splp, lang, MAXWLEN, ","); 3573 3574 region = NULL; 3575 len = STRLEN(lang); 3576 3577 /* If the name ends in ".spl" use it as the name of the spell file. 3578 * If there is a region name let "region" point to it and remove it 3579 * from the name. */ 3580 if (len > 4 && fnamecmp(lang + len - 4, ".spl") == 0) 3581 { 3582 filename = TRUE; 3583 3584 /* Locate a region and remove it from the file name. */ 3585 p = vim_strchr(gettail(lang), '_'); 3586 if (p != NULL && ASCII_ISALPHA(p[1]) && ASCII_ISALPHA(p[2]) 3587 && !ASCII_ISALPHA(p[3])) 3588 { 3589 vim_strncpy(region_cp, p + 1, 2); 3590 mch_memmove(p, p + 3, len - (p - lang) - 2); 3591 len -= 3; 3592 region = region_cp; 3593 } 3594 else 3595 dont_use_region = TRUE; 3596 3597 /* Check if we loaded this language before. */ 3598 for (slang = first_lang; slang != NULL; slang = slang->sl_next) 3599 if (fullpathcmp(lang, slang->sl_fname, FALSE) == FPC_SAME) 3600 break; 3601 } 3602 else 3603 { 3604 filename = FALSE; 3605 if (len > 3 && lang[len - 3] == '_') 3606 { 3607 region = lang + len - 2; 3608 len -= 3; 3609 lang[len] = NUL; 3610 } 3611 else 3612 dont_use_region = TRUE; 3613 3614 /* Check if we loaded this language before. */ 3615 for (slang = first_lang; slang != NULL; slang = slang->sl_next) 3616 if (STRICMP(lang, slang->sl_name) == 0) 3617 break; 3618 } 3619 3620 if (region != NULL) 3621 { 3622 /* If the region differs from what was used before then don't 3623 * use it for 'spellfile'. */ 3624 if (use_region != NULL && STRCMP(region, use_region) != 0) 3625 dont_use_region = TRUE; 3626 use_region = region; 3627 } 3628 3629 /* If not found try loading the language now. */ 3630 if (slang == NULL) 3631 { 3632 if (filename) 3633 (void)spell_load_file(lang, lang, NULL, FALSE); 3634 else 3635 spell_load_lang(lang); 3636 } 3637 3638 /* 3639 * Loop over the languages, there can be several files for "lang". 3640 */ 3641 for (slang = first_lang; slang != NULL; slang = slang->sl_next) 3642 if (filename ? fullpathcmp(lang, slang->sl_fname, FALSE) == FPC_SAME 3643 : STRICMP(lang, slang->sl_name) == 0) 3644 { 3645 region_mask = REGION_ALL; 3646 if (!filename && region != NULL) 3647 { 3648 /* find region in sl_regions */ 3649 c = find_region(slang->sl_regions, region); 3650 if (c == REGION_ALL) 3651 { 3652 if (slang->sl_add) 3653 { 3654 if (*slang->sl_regions != NUL) 3655 /* This addition file is for other regions. */ 3656 region_mask = 0; 3657 } 3658 else 3659 /* This is probably an error. Give a warning and 3660 * accept the words anyway. */ 3661 smsg((char_u *) 3662 _("Warning: region %s not supported"), 3663 region); 3664 } 3665 else 3666 region_mask = 1 << c; 3667 } 3668 3669 if (region_mask != 0) 3670 { 3671 if (ga_grow(&ga, 1) == FAIL) 3672 { 3673 ga_clear(&ga); 3674 return e_outofmem; 3675 } 3676 LANGP_ENTRY(ga, ga.ga_len)->lp_slang = slang; 3677 LANGP_ENTRY(ga, ga.ga_len)->lp_region = region_mask; 3678 ++ga.ga_len; 3679 use_midword(slang, buf); 3680 if (slang->sl_nobreak) 3681 nobreak = TRUE; 3682 } 3683 } 3684 } 3685 3686 /* round 0: load int_wordlist, if possible. 3687 * round 1: load first name in 'spellfile'. 3688 * round 2: load second name in 'spellfile. 3689 * etc. */ 3690 spf = curbuf->b_p_spf; 3691 for (round = 0; round == 0 || *spf != NUL; ++round) 3692 { 3693 if (round == 0) 3694 { 3695 /* Internal wordlist, if there is one. */ 3696 if (int_wordlist == NULL) 3697 continue; 3698 int_wordlist_spl(spf_name); 3699 } 3700 else 3701 { 3702 /* One entry in 'spellfile'. */ 3703 copy_option_part(&spf, spf_name, MAXPATHL - 5, ","); 3704 STRCAT(spf_name, ".spl"); 3705 3706 /* If it was already found above then skip it. */ 3707 for (c = 0; c < ga.ga_len; ++c) 3708 { 3709 p = LANGP_ENTRY(ga, c)->lp_slang->sl_fname; 3710 if (p != NULL && fullpathcmp(spf_name, p, FALSE) == FPC_SAME) 3711 break; 3712 } 3713 if (c < ga.ga_len) 3714 continue; 3715 } 3716 3717 /* Check if it was loaded already. */ 3718 for (slang = first_lang; slang != NULL; slang = slang->sl_next) 3719 if (fullpathcmp(spf_name, slang->sl_fname, FALSE) == FPC_SAME) 3720 break; 3721 if (slang == NULL) 3722 { 3723 /* Not loaded, try loading it now. The language name includes the 3724 * region name, the region is ignored otherwise. for int_wordlist 3725 * use an arbitrary name. */ 3726 if (round == 0) 3727 STRCPY(lang, "internal wordlist"); 3728 else 3729 { 3730 vim_strncpy(lang, gettail(spf_name), MAXWLEN); 3731 p = vim_strchr(lang, '.'); 3732 if (p != NULL) 3733 *p = NUL; /* truncate at ".encoding.add" */ 3734 } 3735 slang = spell_load_file(spf_name, lang, NULL, TRUE); 3736 3737 /* If one of the languages has NOBREAK we assume the addition 3738 * files also have this. */ 3739 if (slang != NULL && nobreak) 3740 slang->sl_nobreak = TRUE; 3741 } 3742 if (slang != NULL && ga_grow(&ga, 1) == OK) 3743 { 3744 region_mask = REGION_ALL; 3745 if (use_region != NULL && !dont_use_region) 3746 { 3747 /* find region in sl_regions */ 3748 c = find_region(slang->sl_regions, use_region); 3749 if (c != REGION_ALL) 3750 region_mask = 1 << c; 3751 else if (*slang->sl_regions != NUL) 3752 /* This spell file is for other regions. */ 3753 region_mask = 0; 3754 } 3755 3756 if (region_mask != 0) 3757 { 3758 LANGP_ENTRY(ga, ga.ga_len)->lp_slang = slang; 3759 LANGP_ENTRY(ga, ga.ga_len)->lp_sallang = NULL; 3760 LANGP_ENTRY(ga, ga.ga_len)->lp_replang = NULL; 3761 LANGP_ENTRY(ga, ga.ga_len)->lp_region = region_mask; 3762 ++ga.ga_len; 3763 use_midword(slang, buf); 3764 } 3765 } 3766 } 3767 3768 /* Everything is fine, store the new b_langp value. */ 3769 ga_clear(&buf->b_langp); 3770 buf->b_langp = ga; 3771 3772 /* For each language figure out what language to use for sound folding and 3773 * REP items. If the language doesn't support it itself use another one 3774 * with the same name. E.g. for "en-math" use "en". */ 3775 for (i = 0; i < ga.ga_len; ++i) 3776 { 3777 lp = LANGP_ENTRY(ga, i); 3778 3779 /* sound folding */ 3780 if (lp->lp_slang->sl_sal.ga_len > 0) 3781 /* language does sound folding itself */ 3782 lp->lp_sallang = lp->lp_slang; 3783 else 3784 /* find first similar language that does sound folding */ 3785 for (j = 0; j < ga.ga_len; ++j) 3786 { 3787 lp2 = LANGP_ENTRY(ga, j); 3788 if (lp2->lp_slang->sl_sal.ga_len > 0 3789 && STRNCMP(lp->lp_slang->sl_name, 3790 lp2->lp_slang->sl_name, 2) == 0) 3791 { 3792 lp->lp_sallang = lp2->lp_slang; 3793 break; 3794 } 3795 } 3796 3797 /* REP items */ 3798 if (lp->lp_slang->sl_rep.ga_len > 0) 3799 /* language has REP items itself */ 3800 lp->lp_replang = lp->lp_slang; 3801 else 3802 /* find first similar language that does sound folding */ 3803 for (j = 0; j < ga.ga_len; ++j) 3804 { 3805 lp2 = LANGP_ENTRY(ga, j); 3806 if (lp2->lp_slang->sl_rep.ga_len > 0 3807 && STRNCMP(lp->lp_slang->sl_name, 3808 lp2->lp_slang->sl_name, 2) == 0) 3809 { 3810 lp->lp_replang = lp2->lp_slang; 3811 break; 3812 } 3813 } 3814 } 3815 3816 return NULL; 3817 } 3818 3819 /* 3820 * Clear the midword characters for buffer "buf". 3821 */ 3822 static void 3823 clear_midword(buf) 3824 buf_T *buf; 3825 { 3826 vim_memset(buf->b_spell_ismw, 0, 256); 3827 #ifdef FEAT_MBYTE 3828 vim_free(buf->b_spell_ismw_mb); 3829 buf->b_spell_ismw_mb = NULL; 3830 #endif 3831 } 3832 3833 /* 3834 * Use the "sl_midword" field of language "lp" for buffer "buf". 3835 * They add up to any currently used midword characters. 3836 */ 3837 static void 3838 use_midword(lp, buf) 3839 slang_T *lp; 3840 buf_T *buf; 3841 { 3842 char_u *p; 3843 3844 if (lp->sl_midword == NULL) /* there aren't any */ 3845 return; 3846 3847 for (p = lp->sl_midword; *p != NUL; ) 3848 #ifdef FEAT_MBYTE 3849 if (has_mbyte) 3850 { 3851 int c, l, n; 3852 char_u *bp; 3853 3854 c = mb_ptr2char(p); 3855 l = (*mb_ptr2len)(p); 3856 if (c < 256 && l <= 2) 3857 buf->b_spell_ismw[c] = TRUE; 3858 else if (buf->b_spell_ismw_mb == NULL) 3859 /* First multi-byte char in "b_spell_ismw_mb". */ 3860 buf->b_spell_ismw_mb = vim_strnsave(p, l); 3861 else 3862 { 3863 /* Append multi-byte chars to "b_spell_ismw_mb". */ 3864 n = STRLEN(buf->b_spell_ismw_mb); 3865 bp = vim_strnsave(buf->b_spell_ismw_mb, n + l); 3866 if (bp != NULL) 3867 { 3868 vim_free(buf->b_spell_ismw_mb); 3869 buf->b_spell_ismw_mb = bp; 3870 vim_strncpy(bp + n, p, l); 3871 } 3872 } 3873 p += l; 3874 } 3875 else 3876 #endif 3877 buf->b_spell_ismw[*p++] = TRUE; 3878 } 3879 3880 /* 3881 * Find the region "region[2]" in "rp" (points to "sl_regions"). 3882 * Each region is simply stored as the two characters of it's name. 3883 * Returns the index if found (first is 0), REGION_ALL if not found. 3884 */ 3885 static int 3886 find_region(rp, region) 3887 char_u *rp; 3888 char_u *region; 3889 { 3890 int i; 3891 3892 for (i = 0; ; i += 2) 3893 { 3894 if (rp[i] == NUL) 3895 return REGION_ALL; 3896 if (rp[i] == region[0] && rp[i + 1] == region[1]) 3897 break; 3898 } 3899 return i / 2; 3900 } 3901 3902 /* 3903 * Return case type of word: 3904 * w word 0 3905 * Word WF_ONECAP 3906 * W WORD WF_ALLCAP 3907 * WoRd wOrd WF_KEEPCAP 3908 */ 3909 static int 3910 captype(word, end) 3911 char_u *word; 3912 char_u *end; /* When NULL use up to NUL byte. */ 3913 { 3914 char_u *p; 3915 int c; 3916 int firstcap; 3917 int allcap; 3918 int past_second = FALSE; /* past second word char */ 3919 3920 /* find first letter */ 3921 for (p = word; !spell_iswordp_nmw(p); mb_ptr_adv(p)) 3922 if (end == NULL ? *p == NUL : p >= end) 3923 return 0; /* only non-word characters, illegal word */ 3924 #ifdef FEAT_MBYTE 3925 if (has_mbyte) 3926 c = mb_ptr2char_adv(&p); 3927 else 3928 #endif 3929 c = *p++; 3930 firstcap = allcap = SPELL_ISUPPER(c); 3931 3932 /* 3933 * Need to check all letters to find a word with mixed upper/lower. 3934 * But a word with an upper char only at start is a ONECAP. 3935 */ 3936 for ( ; end == NULL ? *p != NUL : p < end; mb_ptr_adv(p)) 3937 if (spell_iswordp_nmw(p)) 3938 { 3939 c = PTR2CHAR(p); 3940 if (!SPELL_ISUPPER(c)) 3941 { 3942 /* UUl -> KEEPCAP */ 3943 if (past_second && allcap) 3944 return WF_KEEPCAP; 3945 allcap = FALSE; 3946 } 3947 else if (!allcap) 3948 /* UlU -> KEEPCAP */ 3949 return WF_KEEPCAP; 3950 past_second = TRUE; 3951 } 3952 3953 if (allcap) 3954 return WF_ALLCAP; 3955 if (firstcap) 3956 return WF_ONECAP; 3957 return 0; 3958 } 3959 3960 /* 3961 * Like captype() but for a KEEPCAP word add ONECAP if the word starts with a 3962 * capital. So that make_case_word() can turn WOrd into Word. 3963 * Add ALLCAP for "WOrD". 3964 */ 3965 static int 3966 badword_captype(word, end) 3967 char_u *word; 3968 char_u *end; 3969 { 3970 int flags = captype(word, end); 3971 int c; 3972 int l, u; 3973 int first; 3974 char_u *p; 3975 3976 if (flags & WF_KEEPCAP) 3977 { 3978 /* Count the number of UPPER and lower case letters. */ 3979 l = u = 0; 3980 first = FALSE; 3981 for (p = word; p < end; mb_ptr_adv(p)) 3982 { 3983 c = PTR2CHAR(p); 3984 if (SPELL_ISUPPER(c)) 3985 { 3986 ++u; 3987 if (p == word) 3988 first = TRUE; 3989 } 3990 else 3991 ++l; 3992 } 3993 3994 /* If there are more UPPER than lower case letters suggest an 3995 * ALLCAP word. Otherwise, if the first letter is UPPER then 3996 * suggest ONECAP. Exception: "ALl" most likely should be "All", 3997 * require three upper case letters. */ 3998 if (u > l && u > 2) 3999 flags |= WF_ALLCAP; 4000 else if (first) 4001 flags |= WF_ONECAP; 4002 } 4003 return flags; 4004 } 4005 4006 # if defined(FEAT_MBYTE) || defined(EXITFREE) || defined(PROTO) 4007 /* 4008 * Free all languages. 4009 */ 4010 void 4011 spell_free_all() 4012 { 4013 slang_T *slang; 4014 buf_T *buf; 4015 char_u fname[MAXPATHL]; 4016 4017 /* Go through all buffers and handle 'spelllang'. */ 4018 for (buf = firstbuf; buf != NULL; buf = buf->b_next) 4019 ga_clear(&buf->b_langp); 4020 4021 while (first_lang != NULL) 4022 { 4023 slang = first_lang; 4024 first_lang = slang->sl_next; 4025 slang_free(slang); 4026 } 4027 4028 if (int_wordlist != NULL) 4029 { 4030 /* Delete the internal wordlist and its .spl file */ 4031 mch_remove(int_wordlist); 4032 int_wordlist_spl(fname); 4033 mch_remove(fname); 4034 vim_free(int_wordlist); 4035 int_wordlist = NULL; 4036 } 4037 4038 init_spell_chartab(); 4039 } 4040 # endif 4041 4042 # if defined(FEAT_MBYTE) || defined(PROTO) 4043 /* 4044 * Clear all spelling tables and reload them. 4045 * Used after 'encoding' is set and when ":mkspell" was used. 4046 */ 4047 void 4048 spell_reload() 4049 { 4050 buf_T *buf; 4051 win_T *wp; 4052 4053 /* Initialize the table for spell_iswordp(). */ 4054 init_spell_chartab(); 4055 4056 /* Unload all allocated memory. */ 4057 spell_free_all(); 4058 4059 /* Go through all buffers and handle 'spelllang'. */ 4060 for (buf = firstbuf; buf != NULL; buf = buf->b_next) 4061 { 4062 /* Only load the wordlists when 'spelllang' is set and there is a 4063 * window for this buffer in which 'spell' is set. */ 4064 if (*buf->b_p_spl != NUL) 4065 { 4066 FOR_ALL_WINDOWS(wp) 4067 if (wp->w_buffer == buf && wp->w_p_spell) 4068 { 4069 (void)did_set_spelllang(buf); 4070 # ifdef FEAT_WINDOWS 4071 break; 4072 # endif 4073 } 4074 } 4075 } 4076 } 4077 # endif 4078 4079 /* 4080 * Reload the spell file "fname" if it's loaded. 4081 */ 4082 static void 4083 spell_reload_one(fname, added_word) 4084 char_u *fname; 4085 int added_word; /* invoked through "zg" */ 4086 { 4087 slang_T *slang; 4088 int didit = FALSE; 4089 4090 for (slang = first_lang; slang != NULL; slang = slang->sl_next) 4091 { 4092 if (fullpathcmp(fname, slang->sl_fname, FALSE) == FPC_SAME) 4093 { 4094 slang_clear(slang); 4095 if (spell_load_file(fname, NULL, slang, FALSE) == NULL) 4096 /* reloading failed, clear the language */ 4097 slang_clear(slang); 4098 redraw_all_later(NOT_VALID); 4099 didit = TRUE; 4100 } 4101 } 4102 4103 /* When "zg" was used and the file wasn't loaded yet, should redo 4104 * 'spelllang' to get it loaded. */ 4105 if (added_word && !didit) 4106 did_set_spelllang(curbuf); 4107 } 4108 4109 4110 /* 4111 * Functions for ":mkspell". 4112 */ 4113 4114 #define MAXLINELEN 500 /* Maximum length in bytes of a line in a .aff 4115 and .dic file. */ 4116 /* 4117 * Main structure to store the contents of a ".aff" file. 4118 */ 4119 typedef struct afffile_S 4120 { 4121 char_u *af_enc; /* "SET", normalized, alloc'ed string or NULL */ 4122 int af_flagtype; /* AFT_CHAR, AFT_LONG, AFT_NUM or AFT_CAPLONG */ 4123 int af_slash; /* character used in word for slash */ 4124 unsigned af_rar; /* RAR ID for rare word */ 4125 unsigned af_kep; /* KEP ID for keep-case word */ 4126 unsigned af_bad; /* BAD ID for banned word */ 4127 unsigned af_needaffix; /* NEEDAFFIX ID */ 4128 unsigned af_needcomp; /* NEEDCOMPOUND ID */ 4129 int af_pfxpostpone; /* postpone prefixes without chop string */ 4130 hashtab_T af_pref; /* hashtable for prefixes, affheader_T */ 4131 hashtab_T af_suff; /* hashtable for suffixes, affheader_T */ 4132 hashtab_T af_comp; /* hashtable for compound flags, compitem_T */ 4133 } afffile_T; 4134 4135 #define AFT_CHAR 0 /* flags are one character */ 4136 #define AFT_LONG 1 /* flags are two characters */ 4137 #define AFT_CAPLONG 2 /* flags are one or two characters */ 4138 #define AFT_NUM 3 /* flags are numbers, comma separated */ 4139 4140 typedef struct affentry_S affentry_T; 4141 /* Affix entry from ".aff" file. Used for prefixes and suffixes. */ 4142 struct affentry_S 4143 { 4144 affentry_T *ae_next; /* next affix with same name/number */ 4145 char_u *ae_chop; /* text to chop off basic word (can be NULL) */ 4146 char_u *ae_add; /* text to add to basic word (can be NULL) */ 4147 char_u *ae_cond; /* condition (NULL for ".") */ 4148 regprog_T *ae_prog; /* regexp program for ae_cond or NULL */ 4149 char_u ae_rare; /* rare affix */ 4150 char_u ae_nocomp; /* word with affix not compoundable */ 4151 }; 4152 4153 #ifdef FEAT_MBYTE 4154 # define AH_KEY_LEN 17 /* 2 x 8 bytes + NUL */ 4155 #else 4156 # define AH_KEY_LEN 7 /* 6 digits + NUL */ 4157 #endif 4158 4159 /* Affix header from ".aff" file. Used for af_pref and af_suff. */ 4160 typedef struct affheader_S 4161 { 4162 char_u ah_key[AH_KEY_LEN]; /* key for hashtab == name of affix */ 4163 unsigned ah_flag; /* affix name as number, uses "af_flagtype" */ 4164 int ah_newID; /* prefix ID after renumbering; 0 if not used */ 4165 int ah_combine; /* suffix may combine with prefix */ 4166 int ah_follows; /* another affix block should be following */ 4167 affentry_T *ah_first; /* first affix entry */ 4168 } affheader_T; 4169 4170 #define HI2AH(hi) ((affheader_T *)(hi)->hi_key) 4171 4172 /* Flag used in compound items. */ 4173 typedef struct compitem_S 4174 { 4175 char_u ci_key[AH_KEY_LEN]; /* key for hashtab == name of compound */ 4176 unsigned ci_flag; /* affix name as number, uses "af_flagtype" */ 4177 int ci_newID; /* affix ID after renumbering. */ 4178 } compitem_T; 4179 4180 #define HI2CI(hi) ((compitem_T *)(hi)->hi_key) 4181 4182 /* 4183 * Structure that is used to store the items in the word tree. This avoids 4184 * the need to keep track of each allocated thing, everything is freed all at 4185 * once after ":mkspell" is done. 4186 */ 4187 #define SBLOCKSIZE 16000 /* size of sb_data */ 4188 typedef struct sblock_S sblock_T; 4189 struct sblock_S 4190 { 4191 sblock_T *sb_next; /* next block in list */ 4192 int sb_used; /* nr of bytes already in use */ 4193 char_u sb_data[1]; /* data, actually longer */ 4194 }; 4195 4196 /* 4197 * A node in the tree. 4198 */ 4199 typedef struct wordnode_S wordnode_T; 4200 struct wordnode_S 4201 { 4202 union /* shared to save space */ 4203 { 4204 char_u hashkey[6]; /* the hash key, only used while compressing */ 4205 int index; /* index in written nodes (valid after first 4206 round) */ 4207 } wn_u1; 4208 union /* shared to save space */ 4209 { 4210 wordnode_T *next; /* next node with same hash key */ 4211 wordnode_T *wnode; /* parent node that will write this node */ 4212 } wn_u2; 4213 wordnode_T *wn_child; /* child (next byte in word) */ 4214 wordnode_T *wn_sibling; /* next sibling (alternate byte in word, 4215 always sorted) */ 4216 int wn_refs; /* Nr. of references to this node. Only 4217 relevant for first node in a list of 4218 siblings, in following siblings it is 4219 always one. */ 4220 char_u wn_byte; /* Byte for this node. NUL for word end */ 4221 char_u wn_affixID; /* when "wn_byte" is NUL: supported/required 4222 prefix ID or 0 */ 4223 short_u wn_flags; /* when "wn_byte" is NUL: WF_ flags */ 4224 short wn_region; /* when "wn_byte" is NUL: region mask; for 4225 PREFIXTREE it's the prefcondnr */ 4226 #ifdef SPELL_PRINTTREE 4227 int wn_nr; /* sequence nr for printing */ 4228 #endif 4229 }; 4230 4231 #define WN_MASK 0xffff /* mask relevant bits of "wn_flags" */ 4232 4233 #define HI2WN(hi) (wordnode_T *)((hi)->hi_key) 4234 4235 /* 4236 * Info used while reading the spell files. 4237 */ 4238 typedef struct spellinfo_S 4239 { 4240 wordnode_T *si_foldroot; /* tree with case-folded words */ 4241 long si_foldwcount; /* nr of words in si_foldroot */ 4242 4243 wordnode_T *si_keeproot; /* tree with keep-case words */ 4244 long si_keepwcount; /* nr of words in si_keeproot */ 4245 4246 wordnode_T *si_prefroot; /* tree with postponed prefixes */ 4247 4248 sblock_T *si_blocks; /* memory blocks used */ 4249 long si_blocks_cnt; /* memory blocks allocated */ 4250 long si_compress_cnt; /* words to add before lowering 4251 compression limit */ 4252 wordnode_T *si_first_free; /* List of nodes that have been freed during 4253 compression, linked by "wn_child" field. */ 4254 long si_free_count; /* number of nodes in si_first_free */ 4255 #ifdef SPELL_PRINTTREE 4256 int si_wordnode_nr; /* sequence nr for nodes */ 4257 #endif 4258 4259 4260 int si_ascii; /* handling only ASCII words */ 4261 int si_add; /* addition file */ 4262 int si_clear_chartab; /* when TRUE clear char tables */ 4263 int si_region; /* region mask */ 4264 vimconv_T si_conv; /* for conversion to 'encoding' */ 4265 int si_memtot; /* runtime memory used */ 4266 int si_verbose; /* verbose messages */ 4267 int si_msg_count; /* number of words added since last message */ 4268 int si_region_count; /* number of regions supported (1 when there 4269 are no regions) */ 4270 char_u si_region_name[16]; /* region names; used only if 4271 * si_region_count > 1) */ 4272 4273 garray_T si_rep; /* list of fromto_T entries from REP lines */ 4274 garray_T si_sal; /* list of fromto_T entries from SAL lines */ 4275 char_u *si_sofofr; /* SOFOFROM text */ 4276 char_u *si_sofoto; /* SOFOTO text */ 4277 int si_followup; /* soundsalike: ? */ 4278 int si_collapse; /* soundsalike: ? */ 4279 int si_rem_accents; /* soundsalike: remove accents */ 4280 garray_T si_map; /* MAP info concatenated */ 4281 char_u *si_midword; /* MIDWORD chars or NULL */ 4282 int si_compmax; /* max nr of words for compounding */ 4283 int si_compminlen; /* minimal length for compounding */ 4284 int si_compsylmax; /* max nr of syllables for compounding */ 4285 char_u *si_compflags; /* flags used for compounding */ 4286 char_u si_nobreak; /* NOBREAK */ 4287 char_u *si_syllable; /* syllable string */ 4288 garray_T si_prefcond; /* table with conditions for postponed 4289 * prefixes, each stored as a string */ 4290 int si_newprefID; /* current value for ah_newID */ 4291 int si_newcompID; /* current value for compound ID */ 4292 } spellinfo_T; 4293 4294 static afffile_T *spell_read_aff __ARGS((spellinfo_T *spin, char_u *fname)); 4295 static unsigned affitem2flag __ARGS((int flagtype, char_u *item, char_u *fname, int lnum)); 4296 static unsigned get_affitem __ARGS((int flagtype, char_u **pp)); 4297 static void process_compflags __ARGS((spellinfo_T *spin, afffile_T *aff, char_u *compflags)); 4298 static void check_renumber __ARGS((spellinfo_T *spin)); 4299 static int flag_in_afflist __ARGS((int flagtype, char_u *afflist, unsigned flag)); 4300 static void aff_check_number __ARGS((int spinval, int affval, char *name)); 4301 static void aff_check_string __ARGS((char_u *spinval, char_u *affval, char *name)); 4302 static int str_equal __ARGS((char_u *s1, char_u *s2)); 4303 static void add_fromto __ARGS((spellinfo_T *spin, garray_T *gap, char_u *from, char_u *to)); 4304 static int sal_to_bool __ARGS((char_u *s)); 4305 static int has_non_ascii __ARGS((char_u *s)); 4306 static void spell_free_aff __ARGS((afffile_T *aff)); 4307 static int spell_read_dic __ARGS((spellinfo_T *spin, char_u *fname, afffile_T *affile)); 4308 static int get_pfxlist __ARGS((afffile_T *affile, char_u *afflist, char_u *store_afflist)); 4309 static void get_compflags __ARGS((afffile_T *affile, char_u *afflist, char_u *store_afflist)); 4310 static int store_aff_word __ARGS((spellinfo_T *spin, char_u *word, char_u *afflist, afffile_T *affile, hashtab_T *ht, hashtab_T *xht, int comb, int flags, char_u *pfxlist, int pfxlen)); 4311 static int spell_read_wordfile __ARGS((spellinfo_T *spin, char_u *fname)); 4312 static void *getroom __ARGS((spellinfo_T *spin, size_t len, int align)); 4313 static char_u *getroom_save __ARGS((spellinfo_T *spin, char_u *s)); 4314 static void free_blocks __ARGS((sblock_T *bl)); 4315 static wordnode_T *wordtree_alloc __ARGS((spellinfo_T *spin)); 4316 static int store_word __ARGS((spellinfo_T *spin, char_u *word, int flags, int region, char_u *pfxlist, int need_affix)); 4317 static int tree_add_word __ARGS((spellinfo_T *spin, char_u *word, wordnode_T *tree, int flags, int region, int affixID)); 4318 static wordnode_T *get_wordnode __ARGS((spellinfo_T *spin)); 4319 static void deref_wordnode __ARGS((spellinfo_T *spin, wordnode_T *node)); 4320 static void free_wordnode __ARGS((spellinfo_T *spin, wordnode_T *n)); 4321 static void wordtree_compress __ARGS((spellinfo_T *spin, wordnode_T *root)); 4322 static int node_compress __ARGS((spellinfo_T *spin, wordnode_T *node, hashtab_T *ht, int *tot)); 4323 static int node_equal __ARGS((wordnode_T *n1, wordnode_T *n2)); 4324 static int write_vim_spell __ARGS((spellinfo_T *spin, char_u *fname)); 4325 static void clear_node __ARGS((wordnode_T *node)); 4326 static int put_node __ARGS((FILE *fd, wordnode_T *node, int index, int regionmask, int prefixtree)); 4327 static void mkspell __ARGS((int fcount, char_u **fnames, int ascii, int overwrite, int added_word)); 4328 static void init_spellfile __ARGS((void)); 4329 4330 /* In the postponed prefixes tree wn_flags is used to store the WFP_ flags, 4331 * but it must be negative to indicate the prefix tree to tree_add_word(). 4332 * Use a negative number with the lower 8 bits zero. */ 4333 #define PFX_FLAGS -256 4334 4335 /* 4336 * Tunable parameters for when the tree is compressed. See 'mkspellmem'. 4337 */ 4338 static long compress_start = 30000; /* memory / SBLOCKSIZE */ 4339 static long compress_inc = 100; /* memory / SBLOCKSIZE */ 4340 static long compress_added = 500000; /* word count */ 4341 4342 #ifdef SPELL_PRINTTREE 4343 /* 4344 * For debugging the tree code: print the current tree in a (more or less) 4345 * readable format, so that we can see what happens when adding a word and/or 4346 * compressing the tree. 4347 * Based on code from Olaf Seibert. 4348 */ 4349 #define PRINTLINESIZE 1000 4350 #define PRINTWIDTH 6 4351 4352 #define PRINTSOME(l, depth, fmt, a1, a2) vim_snprintf(l + depth * PRINTWIDTH, \ 4353 PRINTLINESIZE - PRINTWIDTH * depth, fmt, a1, a2) 4354 4355 static char line1[PRINTLINESIZE]; 4356 static char line2[PRINTLINESIZE]; 4357 static char line3[PRINTLINESIZE]; 4358 4359 static void 4360 spell_clear_flags(wordnode_T *node) 4361 { 4362 wordnode_T *np; 4363 4364 for (np = node; np != NULL; np = np->wn_sibling) 4365 { 4366 np->wn_u1.index = FALSE; 4367 spell_clear_flags(np->wn_child); 4368 } 4369 } 4370 4371 static void 4372 spell_print_node(wordnode_T *node, int depth) 4373 { 4374 if (node->wn_u1.index) 4375 { 4376 /* Done this node before, print the reference. */ 4377 PRINTSOME(line1, depth, "(%d)", node->wn_nr, 0); 4378 PRINTSOME(line2, depth, " ", 0, 0); 4379 PRINTSOME(line3, depth, " ", 0, 0); 4380 msg(line1); 4381 msg(line2); 4382 msg(line3); 4383 } 4384 else 4385 { 4386 node->wn_u1.index = TRUE; 4387 4388 if (node->wn_byte != NUL) 4389 { 4390 if (node->wn_child != NULL) 4391 PRINTSOME(line1, depth, " %c -> ", node->wn_byte, 0); 4392 else 4393 /* Cannot happen? */ 4394 PRINTSOME(line1, depth, " %c ???", node->wn_byte, 0); 4395 } 4396 else 4397 PRINTSOME(line1, depth, " $ ", 0, 0); 4398 4399 PRINTSOME(line2, depth, "%d/%d ", node->wn_nr, node->wn_refs); 4400 4401 if (node->wn_sibling != NULL) 4402 PRINTSOME(line3, depth, " | ", 0, 0); 4403 else 4404 PRINTSOME(line3, depth, " ", 0, 0); 4405 4406 if (node->wn_byte == NUL) 4407 { 4408 msg(line1); 4409 msg(line2); 4410 msg(line3); 4411 } 4412 4413 /* do the children */ 4414 if (node->wn_byte != NUL && node->wn_child != NULL) 4415 spell_print_node(node->wn_child, depth + 1); 4416 4417 /* do the siblings */ 4418 if (node->wn_sibling != NULL) 4419 { 4420 /* get rid of all parent details except | */ 4421 STRCPY(line1, line3); 4422 STRCPY(line2, line3); 4423 spell_print_node(node->wn_sibling, depth); 4424 } 4425 } 4426 } 4427 4428 static void 4429 spell_print_tree(wordnode_T *root) 4430 { 4431 if (root != NULL) 4432 { 4433 /* Clear the "wn_u1.index" fields, used to remember what has been 4434 * done. */ 4435 spell_clear_flags(root); 4436 4437 /* Recursively print the tree. */ 4438 spell_print_node(root, 0); 4439 } 4440 } 4441 #endif /* SPELL_PRINTTREE */ 4442 4443 /* 4444 * Read the affix file "fname". 4445 * Returns an afffile_T, NULL for complete failure. 4446 */ 4447 static afffile_T * 4448 spell_read_aff(spin, fname) 4449 spellinfo_T *spin; 4450 char_u *fname; 4451 { 4452 FILE *fd; 4453 afffile_T *aff; 4454 char_u rline[MAXLINELEN]; 4455 char_u *line; 4456 char_u *pc = NULL; 4457 #define MAXITEMCNT 7 4458 char_u *(items[MAXITEMCNT]); 4459 int itemcnt; 4460 char_u *p; 4461 int lnum = 0; 4462 affheader_T *cur_aff = NULL; 4463 int did_postpone_prefix = FALSE; 4464 int aff_todo = 0; 4465 hashtab_T *tp; 4466 char_u *low = NULL; 4467 char_u *fol = NULL; 4468 char_u *upp = NULL; 4469 int do_rep; 4470 int do_sal; 4471 int do_map; 4472 int found_map = FALSE; 4473 hashitem_T *hi; 4474 int l; 4475 int compminlen = 0; /* COMPOUNDMIN value */ 4476 int compsylmax = 0; /* COMPOUNDSYLMAX value */ 4477 int compmax = 0; /* COMPOUNDMAX value */ 4478 char_u *compflags = NULL; /* COMPOUNDFLAG and COMPOUNDFLAGS 4479 concatenated */ 4480 char_u *midword = NULL; /* MIDWORD value */ 4481 char_u *syllable = NULL; /* SYLLABLE value */ 4482 char_u *sofofrom = NULL; /* SOFOFROM value */ 4483 char_u *sofoto = NULL; /* SOFOTO value */ 4484 4485 /* 4486 * Open the file. 4487 */ 4488 fd = mch_fopen((char *)fname, "r"); 4489 if (fd == NULL) 4490 { 4491 EMSG2(_(e_notopen), fname); 4492 return NULL; 4493 } 4494 4495 if (spin->si_verbose || p_verbose > 2) 4496 { 4497 if (!spin->si_verbose) 4498 verbose_enter(); 4499 smsg((char_u *)_("Reading affix file %s ..."), fname); 4500 out_flush(); 4501 if (!spin->si_verbose) 4502 verbose_leave(); 4503 } 4504 4505 /* Only do REP lines when not done in another .aff file already. */ 4506 do_rep = spin->si_rep.ga_len == 0; 4507 4508 /* Only do SAL lines when not done in another .aff file already. */ 4509 do_sal = spin->si_sal.ga_len == 0; 4510 4511 /* Only do MAP lines when not done in another .aff file already. */ 4512 do_map = spin->si_map.ga_len == 0; 4513 4514 /* 4515 * Allocate and init the afffile_T structure. 4516 */ 4517 aff = (afffile_T *)getroom(spin, sizeof(afffile_T), TRUE); 4518 if (aff == NULL) 4519 return NULL; 4520 hash_init(&aff->af_pref); 4521 hash_init(&aff->af_suff); 4522 hash_init(&aff->af_comp); 4523 4524 /* 4525 * Read all the lines in the file one by one. 4526 */ 4527 while (!vim_fgets(rline, MAXLINELEN, fd) && !got_int) 4528 { 4529 line_breakcheck(); 4530 ++lnum; 4531 4532 /* Skip comment lines. */ 4533 if (*rline == '#') 4534 continue; 4535 4536 /* Convert from "SET" to 'encoding' when needed. */ 4537 vim_free(pc); 4538 #ifdef FEAT_MBYTE 4539 if (spin->si_conv.vc_type != CONV_NONE) 4540 { 4541 pc = string_convert(&spin->si_conv, rline, NULL); 4542 if (pc == NULL) 4543 { 4544 smsg((char_u *)_("Conversion failure for word in %s line %d: %s"), 4545 fname, lnum, rline); 4546 continue; 4547 } 4548 line = pc; 4549 } 4550 else 4551 #endif 4552 { 4553 pc = NULL; 4554 line = rline; 4555 } 4556 4557 /* Split the line up in white separated items. Put a NUL after each 4558 * item. */ 4559 itemcnt = 0; 4560 for (p = line; ; ) 4561 { 4562 while (*p != NUL && *p <= ' ') /* skip white space and CR/NL */ 4563 ++p; 4564 if (*p == NUL) 4565 break; 4566 if (itemcnt == MAXITEMCNT) /* too many items */ 4567 break; 4568 items[itemcnt++] = p; 4569 while (*p > ' ') /* skip until white space or CR/NL */ 4570 ++p; 4571 if (*p == NUL) 4572 break; 4573 *p++ = NUL; 4574 } 4575 4576 /* Handle non-empty lines. */ 4577 if (itemcnt > 0) 4578 { 4579 if (STRCMP(items[0], "SET") == 0 && itemcnt == 2 4580 && aff->af_enc == NULL) 4581 { 4582 #ifdef FEAT_MBYTE 4583 /* Setup for conversion from "ENC" to 'encoding'. */ 4584 aff->af_enc = enc_canonize(items[1]); 4585 if (aff->af_enc != NULL && !spin->si_ascii 4586 && convert_setup(&spin->si_conv, aff->af_enc, 4587 p_enc) == FAIL) 4588 smsg((char_u *)_("Conversion in %s not supported: from %s to %s"), 4589 fname, aff->af_enc, p_enc); 4590 spin->si_conv.vc_fail = TRUE; 4591 #else 4592 smsg((char_u *)_("Conversion in %s not supported"), fname); 4593 #endif 4594 } 4595 else if (STRCMP(items[0], "FLAG") == 0 && itemcnt == 2 4596 && aff->af_flagtype == AFT_CHAR) 4597 { 4598 if (STRCMP(items[1], "long") == 0) 4599 aff->af_flagtype = AFT_LONG; 4600 else if (STRCMP(items[1], "num") == 0) 4601 aff->af_flagtype = AFT_NUM; 4602 else if (STRCMP(items[1], "caplong") == 0) 4603 aff->af_flagtype = AFT_CAPLONG; 4604 else 4605 smsg((char_u *)_("Invalid value for FLAG in %s line %d: %s"), 4606 fname, lnum, items[1]); 4607 if (aff->af_rar != 0 || aff->af_kep != 0 || aff->af_bad != 0 4608 || aff->af_needaffix != 0 4609 || aff->af_needcomp != 0 4610 || compflags != NULL 4611 || aff->af_suff.ht_used > 0 4612 || aff->af_pref.ht_used > 0) 4613 smsg((char_u *)_("FLAG after using flags in %s line %d: %s"), 4614 fname, lnum, items[1]); 4615 } 4616 else if (STRCMP(items[0], "MIDWORD") == 0 && itemcnt == 2 4617 && midword == NULL) 4618 { 4619 midword = getroom_save(spin, items[1]); 4620 } 4621 else if (STRCMP(items[0], "NOSPLITSUGS") == 0 && itemcnt == 1) 4622 { 4623 /* ignored, we always split */ 4624 } 4625 else if (STRCMP(items[0], "TRY") == 0 && itemcnt == 2) 4626 { 4627 /* ignored, we look in the tree for what chars may appear */ 4628 } 4629 else if (STRCMP(items[0], "SLASH") == 0 && itemcnt == 2 4630 && aff->af_slash == 0) 4631 { 4632 aff->af_slash = items[1][0]; 4633 if (items[1][1] != NUL) 4634 smsg((char_u *)_("Character used for SLASH must be ASCII; in %s line %d: %s"), 4635 fname, lnum, items[1]); 4636 } 4637 else if (STRCMP(items[0], "RAR") == 0 && itemcnt == 2 4638 && aff->af_rar == 0) 4639 { 4640 aff->af_rar = affitem2flag(aff->af_flagtype, items[1], 4641 fname, lnum); 4642 } 4643 else if (STRCMP(items[0], "KEP") == 0 && itemcnt == 2 4644 && aff->af_kep == 0) 4645 { 4646 aff->af_kep = affitem2flag(aff->af_flagtype, items[1], 4647 fname, lnum); 4648 } 4649 else if (STRCMP(items[0], "BAD") == 0 && itemcnt == 2 4650 && aff->af_bad == 0) 4651 { 4652 aff->af_bad = affitem2flag(aff->af_flagtype, items[1], 4653 fname, lnum); 4654 } 4655 else if (STRCMP(items[0], "NEEDAFFIX") == 0 && itemcnt == 2 4656 && aff->af_needaffix == 0) 4657 { 4658 aff->af_needaffix = affitem2flag(aff->af_flagtype, items[1], 4659 fname, lnum); 4660 } 4661 else if (STRCMP(items[0], "NEEDCOMPOUND") == 0 && itemcnt == 2 4662 && aff->af_needcomp == 0) 4663 { 4664 aff->af_needcomp = affitem2flag(aff->af_flagtype, items[1], 4665 fname, lnum); 4666 } 4667 else if (STRCMP(items[0], "COMPOUNDFLAG") == 0 && itemcnt == 2 4668 && compflags == NULL) 4669 { 4670 /* Turn flag "c" into COMPOUNDFLAGS compatible string "c+", 4671 * "Na" into "Na+", "1234" into "1234+". */ 4672 p = getroom(spin, STRLEN(items[1]) + 2, FALSE); 4673 if (p != NULL) 4674 { 4675 STRCPY(p, items[1]); 4676 STRCAT(p, "+"); 4677 compflags = p; 4678 } 4679 } 4680 else if (STRCMP(items[0], "COMPOUNDFLAGS") == 0 && itemcnt == 2) 4681 { 4682 /* Concatenate this string to previously defined ones, using a 4683 * slash to separate them. */ 4684 l = STRLEN(items[1]) + 1; 4685 if (compflags != NULL) 4686 l += STRLEN(compflags) + 1; 4687 p = getroom(spin, l, FALSE); 4688 if (p != NULL) 4689 { 4690 if (compflags != NULL) 4691 { 4692 STRCPY(p, compflags); 4693 STRCAT(p, "/"); 4694 } 4695 STRCAT(p, items[1]); 4696 compflags = p; 4697 } 4698 } 4699 else if (STRCMP(items[0], "COMPOUNDMAX") == 0 && itemcnt == 2 4700 && compmax == 0) 4701 { 4702 compmax = atoi((char *)items[1]); 4703 if (compmax == 0) 4704 smsg((char_u *)_("Wrong COMPOUNDMAX value in %s line %d: %s"), 4705 fname, lnum, items[1]); 4706 } 4707 else if (STRCMP(items[0], "COMPOUNDMIN") == 0 && itemcnt == 2 4708 && compminlen == 0) 4709 { 4710 compminlen = atoi((char *)items[1]); 4711 if (compminlen == 0) 4712 smsg((char_u *)_("Wrong COMPOUNDMIN value in %s line %d: %s"), 4713 fname, lnum, items[1]); 4714 } 4715 else if (STRCMP(items[0], "COMPOUNDSYLMAX") == 0 && itemcnt == 2 4716 && compsylmax == 0) 4717 { 4718 compsylmax = atoi((char *)items[1]); 4719 if (compsylmax == 0) 4720 smsg((char_u *)_("Wrong COMPOUNDSYLMAX value in %s line %d: %s"), 4721 fname, lnum, items[1]); 4722 } 4723 else if (STRCMP(items[0], "SYLLABLE") == 0 && itemcnt == 2 4724 && syllable == NULL) 4725 { 4726 syllable = getroom_save(spin, items[1]); 4727 } 4728 else if (STRCMP(items[0], "NOBREAK") == 0 && itemcnt == 1) 4729 { 4730 spin->si_nobreak = TRUE; 4731 } 4732 else if (STRCMP(items[0], "PFXPOSTPONE") == 0 && itemcnt == 1) 4733 { 4734 aff->af_pfxpostpone = TRUE; 4735 } 4736 else if ((STRCMP(items[0], "PFX") == 0 4737 || STRCMP(items[0], "SFX") == 0) 4738 && aff_todo == 0 4739 && itemcnt >= 4) 4740 { 4741 int lasti = 4; 4742 char_u key[AH_KEY_LEN]; 4743 4744 if (*items[0] == 'P') 4745 tp = &aff->af_pref; 4746 else 4747 tp = &aff->af_suff; 4748 4749 /* Myspell allows the same affix name to be used multiple 4750 * times. The affix files that do this have an undocumented 4751 * "S" flag on all but the last block, thus we check for that 4752 * and store it in ah_follows. */ 4753 vim_strncpy(key, items[1], AH_KEY_LEN - 1); 4754 hi = hash_find(tp, key); 4755 if (!HASHITEM_EMPTY(hi)) 4756 { 4757 cur_aff = HI2AH(hi); 4758 if (cur_aff->ah_combine != (*items[2] == 'Y')) 4759 smsg((char_u *)_("Different combining flag in continued affix block in %s line %d: %s"), 4760 fname, lnum, items[1]); 4761 if (!cur_aff->ah_follows) 4762 smsg((char_u *)_("Duplicate affix in %s line %d: %s"), 4763 fname, lnum, items[1]); 4764 } 4765 else 4766 { 4767 /* New affix letter. */ 4768 cur_aff = (affheader_T *)getroom(spin, 4769 sizeof(affheader_T), TRUE); 4770 if (cur_aff == NULL) 4771 break; 4772 cur_aff->ah_flag = affitem2flag(aff->af_flagtype, items[1], 4773 fname, lnum); 4774 if (cur_aff->ah_flag == 0 || STRLEN(items[1]) >= AH_KEY_LEN) 4775 break; 4776 if (cur_aff->ah_flag == aff->af_bad 4777 || cur_aff->ah_flag == aff->af_rar 4778 || cur_aff->ah_flag == aff->af_kep 4779 || cur_aff->ah_flag == aff->af_needaffix 4780 || cur_aff->ah_flag == aff->af_needcomp) 4781 smsg((char_u *)_("Affix also used for BAD/RAR/KEP/NEEDAFFIX/NEEDCOMPOUND in %s line %d: %s"), 4782 fname, lnum, items[1]); 4783 STRCPY(cur_aff->ah_key, items[1]); 4784 hash_add(tp, cur_aff->ah_key); 4785 4786 cur_aff->ah_combine = (*items[2] == 'Y'); 4787 } 4788 4789 /* Check for the "S" flag, which apparently means that another 4790 * block with the same affix name is following. */ 4791 if (itemcnt > lasti && STRCMP(items[lasti], "S") == 0) 4792 { 4793 ++lasti; 4794 cur_aff->ah_follows = TRUE; 4795 } 4796 else 4797 cur_aff->ah_follows = FALSE; 4798 4799 /* Myspell allows extra text after the item, but that might 4800 * mean mistakes go unnoticed. Require a comment-starter. */ 4801 if (itemcnt > lasti && *items[lasti] != '#') 4802 smsg((char_u *)_("Trailing text in %s line %d: %s"), 4803 fname, lnum, items[4]); 4804 4805 if (STRCMP(items[2], "Y") != 0 && STRCMP(items[2], "N") != 0) 4806 smsg((char_u *)_("Expected Y or N in %s line %d: %s"), 4807 fname, lnum, items[2]); 4808 4809 if (*items[0] == 'P' && aff->af_pfxpostpone) 4810 { 4811 if (cur_aff->ah_newID == 0) 4812 { 4813 /* Use a new number in the .spl file later, to be able 4814 * to handle multiple .aff files. */ 4815 check_renumber(spin); 4816 cur_aff->ah_newID = ++spin->si_newprefID; 4817 4818 /* We only really use ah_newID if the prefix is 4819 * postponed. We know that only after handling all 4820 * the items. */ 4821 did_postpone_prefix = FALSE; 4822 } 4823 else 4824 /* Did use the ID in a previous block. */ 4825 did_postpone_prefix = TRUE; 4826 } 4827 4828 aff_todo = atoi((char *)items[3]); 4829 } 4830 else if ((STRCMP(items[0], "PFX") == 0 4831 || STRCMP(items[0], "SFX") == 0) 4832 && aff_todo > 0 4833 && STRCMP(cur_aff->ah_key, items[1]) == 0 4834 && itemcnt >= 5) 4835 { 4836 affentry_T *aff_entry; 4837 int rare = FALSE; 4838 int nocomp = FALSE; 4839 int upper = FALSE; 4840 int lasti = 5; 4841 4842 /* Check for "rare" and "nocomp" after the other info. */ 4843 while (itemcnt > lasti) 4844 { 4845 if (!rare && STRICMP(items[lasti], "rare") == 0) 4846 { 4847 rare = TRUE; 4848 ++lasti; 4849 } 4850 else if (!nocomp && STRICMP(items[lasti], "nocomp") == 0) 4851 { 4852 nocomp = TRUE; 4853 ++lasti; 4854 } 4855 else 4856 break; 4857 } 4858 4859 /* Myspell allows extra text after the item, but that might 4860 * mean mistakes go unnoticed. Require a comment-starter. */ 4861 if (itemcnt > lasti && *items[lasti] != '#') 4862 smsg((char_u *)_(e_afftrailing), fname, lnum, items[lasti]); 4863 4864 /* New item for an affix letter. */ 4865 --aff_todo; 4866 aff_entry = (affentry_T *)getroom(spin, 4867 sizeof(affentry_T), TRUE); 4868 if (aff_entry == NULL) 4869 break; 4870 aff_entry->ae_rare = rare; 4871 aff_entry->ae_nocomp = nocomp; 4872 4873 if (STRCMP(items[2], "0") != 0) 4874 aff_entry->ae_chop = getroom_save(spin, items[2]); 4875 if (STRCMP(items[3], "0") != 0) 4876 aff_entry->ae_add = getroom_save(spin, items[3]); 4877 4878 /* Don't use an affix entry with non-ASCII characters when 4879 * "spin->si_ascii" is TRUE. */ 4880 if (!spin->si_ascii || !(has_non_ascii(aff_entry->ae_chop) 4881 || has_non_ascii(aff_entry->ae_add))) 4882 { 4883 aff_entry->ae_next = cur_aff->ah_first; 4884 cur_aff->ah_first = aff_entry; 4885 4886 if (STRCMP(items[4], ".") != 0) 4887 { 4888 char_u buf[MAXLINELEN]; 4889 4890 aff_entry->ae_cond = getroom_save(spin, items[4]); 4891 if (*items[0] == 'P') 4892 sprintf((char *)buf, "^%s", items[4]); 4893 else 4894 sprintf((char *)buf, "%s$", items[4]); 4895 aff_entry->ae_prog = vim_regcomp(buf, 4896 RE_MAGIC + RE_STRING + RE_STRICT); 4897 if (aff_entry->ae_prog == NULL) 4898 smsg((char_u *)_("Broken condition in %s line %d: %s"), 4899 fname, lnum, items[4]); 4900 } 4901 4902 /* For postponed prefixes we need an entry in si_prefcond 4903 * for the condition. Use an existing one if possible. */ 4904 if (*items[0] == 'P' && aff->af_pfxpostpone) 4905 { 4906 /* When the chop string is one lower-case letter and 4907 * the add string ends in the upper-case letter we set 4908 * the "upper" flag, clear "ae_chop" and remove the 4909 * letters from "ae_add". The condition must either 4910 * be empty or start with the same letter. */ 4911 if (aff_entry->ae_chop != NULL 4912 && aff_entry->ae_add != NULL 4913 #ifdef FEAT_MBYTE 4914 && aff_entry->ae_chop[(*mb_ptr2len)( 4915 aff_entry->ae_chop)] == NUL 4916 #else 4917 && aff_entry->ae_chop[1] == NUL 4918 #endif 4919 ) 4920 { 4921 int c, c_up; 4922 4923 c = PTR2CHAR(aff_entry->ae_chop); 4924 c_up = SPELL_TOUPPER(c); 4925 if (c_up != c 4926 && (aff_entry->ae_cond == NULL 4927 || PTR2CHAR(aff_entry->ae_cond) == c)) 4928 { 4929 p = aff_entry->ae_add 4930 + STRLEN(aff_entry->ae_add); 4931 mb_ptr_back(aff_entry->ae_add, p); 4932 if (PTR2CHAR(p) == c_up) 4933 { 4934 upper = TRUE; 4935 aff_entry->ae_chop = NULL; 4936 *p = NUL; 4937 4938 /* The condition is matched with the 4939 * actual word, thus must check for the 4940 * upper-case letter. */ 4941 if (aff_entry->ae_cond != NULL) 4942 { 4943 char_u buf[MAXLINELEN]; 4944 #ifdef FEAT_MBYTE 4945 if (has_mbyte) 4946 { 4947 onecap_copy(items[4], buf, TRUE); 4948 aff_entry->ae_cond = getroom_save( 4949 spin, buf); 4950 } 4951 else 4952 #endif 4953 *aff_entry->ae_cond = c_up; 4954 if (aff_entry->ae_cond != NULL) 4955 { 4956 sprintf((char *)buf, "^%s", 4957 aff_entry->ae_cond); 4958 vim_free(aff_entry->ae_prog); 4959 aff_entry->ae_prog = vim_regcomp( 4960 buf, RE_MAGIC + RE_STRING); 4961 } 4962 } 4963 } 4964 } 4965 } 4966 4967 if (aff_entry->ae_chop == NULL) 4968 { 4969 int idx; 4970 char_u **pp; 4971 int n; 4972 4973 /* Find a previously used condition. */ 4974 for (idx = spin->si_prefcond.ga_len - 1; idx >= 0; 4975 --idx) 4976 { 4977 p = ((char_u **)spin->si_prefcond.ga_data)[idx]; 4978 if (str_equal(p, aff_entry->ae_cond)) 4979 break; 4980 } 4981 if (idx < 0 && ga_grow(&spin->si_prefcond, 1) == OK) 4982 { 4983 /* Not found, add a new condition. */ 4984 idx = spin->si_prefcond.ga_len++; 4985 pp = ((char_u **)spin->si_prefcond.ga_data) 4986 + idx; 4987 if (aff_entry->ae_cond == NULL) 4988 *pp = NULL; 4989 else 4990 *pp = getroom_save(spin, 4991 aff_entry->ae_cond); 4992 } 4993 4994 /* Add the prefix to the prefix tree. */ 4995 if (aff_entry->ae_add == NULL) 4996 p = (char_u *)""; 4997 else 4998 p = aff_entry->ae_add; 4999 /* PFX_FLAGS is a negative number, so that 5000 * tree_add_word() knows this is the prefix tree. */ 5001 n = PFX_FLAGS; 5002 if (rare) 5003 n |= WFP_RARE; 5004 if (!cur_aff->ah_combine) 5005 n |= WFP_NC; 5006 if (upper) 5007 n |= WFP_UP; 5008 tree_add_word(spin, p, spin->si_prefroot, n, 5009 idx, cur_aff->ah_newID); 5010 did_postpone_prefix = TRUE; 5011 } 5012 5013 /* Didn't actually use ah_newID, backup si_newprefID. */ 5014 if (aff_todo == 0 && !did_postpone_prefix) 5015 { 5016 --spin->si_newprefID; 5017 cur_aff->ah_newID = 0; 5018 } 5019 } 5020 } 5021 } 5022 else if (STRCMP(items[0], "FOL") == 0 && itemcnt == 2 5023 && fol == NULL) 5024 { 5025 fol = vim_strsave(items[1]); 5026 } 5027 else if (STRCMP(items[0], "LOW") == 0 && itemcnt == 2 5028 && low == NULL) 5029 { 5030 low = vim_strsave(items[1]); 5031 } 5032 else if (STRCMP(items[0], "UPP") == 0 && itemcnt == 2 5033 && upp == NULL) 5034 { 5035 upp = vim_strsave(items[1]); 5036 } 5037 else if (STRCMP(items[0], "REP") == 0 && itemcnt == 2) 5038 { 5039 /* Ignore REP count */; 5040 if (!isdigit(*items[1])) 5041 smsg((char_u *)_("Expected REP count in %s line %d"), 5042 fname, lnum); 5043 } 5044 else if (STRCMP(items[0], "REP") == 0 && itemcnt >= 3) 5045 { 5046 /* REP item */ 5047 /* Myspell ignores extra arguments, we require it starts with 5048 * # to detect mistakes. */ 5049 if (itemcnt > 3 && items[3][0] != '#') 5050 smsg((char_u *)_(e_afftrailing), fname, lnum, items[3]); 5051 if (do_rep) 5052 add_fromto(spin, &spin->si_rep, items[1], items[2]); 5053 } 5054 else if (STRCMP(items[0], "MAP") == 0 && itemcnt == 2) 5055 { 5056 /* MAP item or count */ 5057 if (!found_map) 5058 { 5059 /* First line contains the count. */ 5060 found_map = TRUE; 5061 if (!isdigit(*items[1])) 5062 smsg((char_u *)_("Expected MAP count in %s line %d"), 5063 fname, lnum); 5064 } 5065 else if (do_map) 5066 { 5067 int c; 5068 5069 /* Check that every character appears only once. */ 5070 for (p = items[1]; *p != NUL; ) 5071 { 5072 #ifdef FEAT_MBYTE 5073 c = mb_ptr2char_adv(&p); 5074 #else 5075 c = *p++; 5076 #endif 5077 if ((spin->si_map.ga_len > 0 5078 && vim_strchr(spin->si_map.ga_data, c) 5079 != NULL) 5080 || vim_strchr(p, c) != NULL) 5081 smsg((char_u *)_("Duplicate character in MAP in %s line %d"), 5082 fname, lnum); 5083 } 5084 5085 /* We simply concatenate all the MAP strings, separated by 5086 * slashes. */ 5087 ga_concat(&spin->si_map, items[1]); 5088 ga_append(&spin->si_map, '/'); 5089 } 5090 } 5091 else if (STRCMP(items[0], "SAL") == 0 && itemcnt == 3) 5092 { 5093 if (do_sal) 5094 { 5095 /* SAL item (sounds-a-like) 5096 * Either one of the known keys or a from-to pair. */ 5097 if (STRCMP(items[1], "followup") == 0) 5098 spin->si_followup = sal_to_bool(items[2]); 5099 else if (STRCMP(items[1], "collapse_result") == 0) 5100 spin->si_collapse = sal_to_bool(items[2]); 5101 else if (STRCMP(items[1], "remove_accents") == 0) 5102 spin->si_rem_accents = sal_to_bool(items[2]); 5103 else 5104 /* when "to" is "_" it means empty */ 5105 add_fromto(spin, &spin->si_sal, items[1], 5106 STRCMP(items[2], "_") == 0 ? (char_u *)"" 5107 : items[2]); 5108 } 5109 } 5110 else if (STRCMP(items[0], "SOFOFROM") == 0 && itemcnt == 2 5111 && sofofrom == NULL) 5112 { 5113 sofofrom = getroom_save(spin, items[1]); 5114 } 5115 else if (STRCMP(items[0], "SOFOTO") == 0 && itemcnt == 2 5116 && sofoto == NULL) 5117 { 5118 sofoto = getroom_save(spin, items[1]); 5119 } 5120 else 5121 smsg((char_u *)_("Unrecognized or duplicate item in %s line %d: %s"), 5122 fname, lnum, items[0]); 5123 } 5124 } 5125 5126 if (fol != NULL || low != NULL || upp != NULL) 5127 { 5128 if (spin->si_clear_chartab) 5129 { 5130 /* Clear the char type tables, don't want to use any of the 5131 * currently used spell properties. */ 5132 init_spell_chartab(); 5133 spin->si_clear_chartab = FALSE; 5134 } 5135 5136 /* 5137 * Don't write a word table for an ASCII file, so that we don't check 5138 * for conflicts with a word table that matches 'encoding'. 5139 * Don't write one for utf-8 either, we use utf_*() and 5140 * mb_get_class(), the list of chars in the file will be incomplete. 5141 */ 5142 if (!spin->si_ascii 5143 #ifdef FEAT_MBYTE 5144 && !enc_utf8 5145 #endif 5146 ) 5147 { 5148 if (fol == NULL || low == NULL || upp == NULL) 5149 smsg((char_u *)_("Missing FOL/LOW/UPP line in %s"), fname); 5150 else 5151 (void)set_spell_chartab(fol, low, upp); 5152 } 5153 5154 vim_free(fol); 5155 vim_free(low); 5156 vim_free(upp); 5157 } 5158 5159 /* Use compound specifications of the .aff file for the spell info. */ 5160 if (compmax != 0) 5161 { 5162 aff_check_number(spin->si_compmax, compmax, "COMPOUNDMAX"); 5163 spin->si_compmax = compmax; 5164 } 5165 5166 if (compminlen != 0) 5167 { 5168 aff_check_number(spin->si_compminlen, compminlen, "COMPOUNDMIN"); 5169 spin->si_compminlen = compminlen; 5170 } 5171 5172 if (compsylmax != 0) 5173 { 5174 if (syllable == NULL) 5175 smsg((char_u *)_("COMPOUNDSYLMAX used without SYLLABLE")); 5176 aff_check_number(spin->si_compsylmax, compsylmax, "COMPOUNDSYLMAX"); 5177 spin->si_compsylmax = compsylmax; 5178 } 5179 5180 if (compflags != NULL) 5181 process_compflags(spin, aff, compflags); 5182 5183 /* Check that we didn't use too many renumbered flags. */ 5184 if (spin->si_newcompID < spin->si_newprefID) 5185 { 5186 if (spin->si_newcompID == 127 || spin->si_newcompID == 255) 5187 MSG(_("Too many postponed prefixes")); 5188 else if (spin->si_newprefID == 0 || spin->si_newprefID == 127) 5189 MSG(_("Too many compound flags")); 5190 else 5191 MSG(_("Too many posponed prefixes and/or compound flags")); 5192 } 5193 5194 if (syllable != NULL) 5195 { 5196 aff_check_string(spin->si_syllable, syllable, "SYLLABLE"); 5197 spin->si_syllable = syllable; 5198 } 5199 5200 if (sofofrom != NULL || sofoto != NULL) 5201 { 5202 if (sofofrom == NULL || sofoto == NULL) 5203 smsg((char_u *)_("Missing SOFO%s line in %s"), 5204 sofofrom == NULL ? "FROM" : "TO", fname); 5205 else if (spin->si_sal.ga_len > 0) 5206 smsg((char_u *)_("Both SAL and SOFO lines in %s"), fname); 5207 else 5208 { 5209 aff_check_string(spin->si_sofofr, sofofrom, "SOFOFROM"); 5210 aff_check_string(spin->si_sofoto, sofoto, "SOFOTO"); 5211 spin->si_sofofr = sofofrom; 5212 spin->si_sofoto = sofoto; 5213 } 5214 } 5215 5216 if (midword != NULL) 5217 { 5218 aff_check_string(spin->si_midword, midword, "MIDWORD"); 5219 spin->si_midword = midword; 5220 } 5221 5222 vim_free(pc); 5223 fclose(fd); 5224 return aff; 5225 } 5226 5227 /* 5228 * Turn an affix flag name into a number, according to the FLAG type. 5229 * returns zero for failure. 5230 */ 5231 static unsigned 5232 affitem2flag(flagtype, item, fname, lnum) 5233 int flagtype; 5234 char_u *item; 5235 char_u *fname; 5236 int lnum; 5237 { 5238 unsigned res; 5239 char_u *p = item; 5240 5241 res = get_affitem(flagtype, &p); 5242 if (res == 0) 5243 { 5244 if (flagtype == AFT_NUM) 5245 smsg((char_u *)_("Flag is not a number in %s line %d: %s"), 5246 fname, lnum, item); 5247 else 5248 smsg((char_u *)_("Illegal flag in %s line %d: %s"), 5249 fname, lnum, item); 5250 } 5251 if (*p != NUL) 5252 { 5253 smsg((char_u *)_(e_affname), fname, lnum, item); 5254 return 0; 5255 } 5256 5257 return res; 5258 } 5259 5260 /* 5261 * Get one affix name from "*pp" and advance the pointer. 5262 * Returns zero for an error, still advances the pointer then. 5263 */ 5264 static unsigned 5265 get_affitem(flagtype, pp) 5266 int flagtype; 5267 char_u **pp; 5268 { 5269 int res; 5270 5271 if (flagtype == AFT_NUM) 5272 { 5273 if (!VIM_ISDIGIT(**pp)) 5274 { 5275 ++*pp; /* always advance, avoid getting stuck */ 5276 return 0; 5277 } 5278 res = getdigits(pp); 5279 } 5280 else 5281 { 5282 #ifdef FEAT_MBYTE 5283 res = mb_ptr2char_adv(pp); 5284 #else 5285 res = *(*pp)++; 5286 #endif 5287 if (flagtype == AFT_LONG || (flagtype == AFT_CAPLONG 5288 && res >= 'A' && res <= 'Z')) 5289 { 5290 if (**pp == NUL) 5291 return 0; 5292 #ifdef FEAT_MBYTE 5293 res = mb_ptr2char_adv(pp) + (res << 16); 5294 #else 5295 res = *(*pp)++ + (res << 16); 5296 #endif 5297 } 5298 } 5299 return res; 5300 } 5301 5302 /* 5303 * Process the "compflags" string used in an affix file and append it to 5304 * spin->si_compflags. 5305 * The processing involves changing the affix names to ID numbers, so that 5306 * they fit in one byte. 5307 */ 5308 static void 5309 process_compflags(spin, aff, compflags) 5310 spellinfo_T *spin; 5311 afffile_T *aff; 5312 char_u *compflags; 5313 { 5314 char_u *p; 5315 char_u *prevp; 5316 unsigned flag; 5317 compitem_T *ci; 5318 int id; 5319 int len; 5320 char_u *tp; 5321 char_u key[AH_KEY_LEN]; 5322 hashitem_T *hi; 5323 5324 /* Make room for the old and the new compflags, concatenated with a / in 5325 * between. Processing it makes it shorter, but we don't know by how 5326 * much, thus allocate the maximum. */ 5327 len = STRLEN(compflags) + 1; 5328 if (spin->si_compflags != NULL) 5329 len += STRLEN(spin->si_compflags) + 1; 5330 p = getroom(spin, len, FALSE); 5331 if (p == NULL) 5332 return; 5333 if (spin->si_compflags != NULL) 5334 { 5335 STRCPY(p, spin->si_compflags); 5336 STRCAT(p, "/"); 5337 } 5338 spin->si_compflags = p; 5339 tp = p + STRLEN(p); 5340 5341 for (p = compflags; *p != NUL; ) 5342 { 5343 if (vim_strchr((char_u *)"/*+[]", *p) != NULL) 5344 /* Copy non-flag characters directly. */ 5345 *tp++ = *p++; 5346 else 5347 { 5348 /* First get the flag number, also checks validity. */ 5349 prevp = p; 5350 flag = get_affitem(aff->af_flagtype, &p); 5351 if (flag != 0) 5352 { 5353 /* Find the flag in the hashtable. If it was used before, use 5354 * the existing ID. Otherwise add a new entry. */ 5355 vim_strncpy(key, prevp, p - prevp); 5356 hi = hash_find(&aff->af_comp, key); 5357 if (!HASHITEM_EMPTY(hi)) 5358 id = HI2CI(hi)->ci_newID; 5359 else 5360 { 5361 ci = (compitem_T *)getroom(spin, sizeof(compitem_T), TRUE); 5362 if (ci == NULL) 5363 break; 5364 STRCPY(ci->ci_key, key); 5365 ci->ci_flag = flag; 5366 /* Avoid using a flag ID that has a special meaning in a 5367 * regexp (also inside []). */ 5368 do 5369 { 5370 check_renumber(spin); 5371 id = spin->si_newcompID--; 5372 } while (vim_strchr((char_u *)"/+*[]\\-^", id) != NULL); 5373 ci->ci_newID = id; 5374 hash_add(&aff->af_comp, ci->ci_key); 5375 } 5376 *tp++ = id; 5377 } 5378 if (aff->af_flagtype == AFT_NUM && *p == ',') 5379 ++p; 5380 } 5381 } 5382 5383 *tp = NUL; 5384 } 5385 5386 /* 5387 * Check that the new IDs for postponed affixes and compounding don't overrun 5388 * each other. We have almost 255 available, but start at 0-127 to avoid 5389 * using two bytes for utf-8. When the 0-127 range is used up go to 128-255. 5390 * When that is used up an error message is given. 5391 */ 5392 static void 5393 check_renumber(spin) 5394 spellinfo_T *spin; 5395 { 5396 if (spin->si_newprefID == spin->si_newcompID && spin->si_newcompID < 128) 5397 { 5398 spin->si_newprefID = 127; 5399 spin->si_newcompID = 255; 5400 } 5401 } 5402 5403 /* 5404 * Return TRUE if flag "flag" appears in affix list "afflist". 5405 */ 5406 static int 5407 flag_in_afflist(flagtype, afflist, flag) 5408 int flagtype; 5409 char_u *afflist; 5410 unsigned flag; 5411 { 5412 char_u *p; 5413 unsigned n; 5414 5415 switch (flagtype) 5416 { 5417 case AFT_CHAR: 5418 return vim_strchr(afflist, flag) != NULL; 5419 5420 case AFT_CAPLONG: 5421 case AFT_LONG: 5422 for (p = afflist; *p != NUL; ) 5423 { 5424 #ifdef FEAT_MBYTE 5425 n = mb_ptr2char_adv(&p); 5426 #else 5427 n = *p++; 5428 #endif 5429 if ((flagtype == AFT_LONG || (n >= 'A' && n <= 'Z')) 5430 && *p != NUL) 5431 #ifdef FEAT_MBYTE 5432 n = mb_ptr2char_adv(&p) + (n << 16); 5433 #else 5434 n = *p++ + (n << 16); 5435 #endif 5436 if (n == flag) 5437 return TRUE; 5438 } 5439 break; 5440 5441 case AFT_NUM: 5442 for (p = afflist; *p != NUL; ) 5443 { 5444 n = getdigits(&p); 5445 if (n == flag) 5446 return TRUE; 5447 if (*p != NUL) /* skip over comma */ 5448 ++p; 5449 } 5450 break; 5451 } 5452 return FALSE; 5453 } 5454 5455 /* 5456 * Give a warning when "spinval" and "affval" numbers are set and not the same. 5457 */ 5458 static void 5459 aff_check_number(spinval, affval, name) 5460 int spinval; 5461 int affval; 5462 char *name; 5463 { 5464 if (spinval != 0 && spinval != affval) 5465 smsg((char_u *)_("%s value differs from what is used in another .aff file"), name); 5466 } 5467 5468 /* 5469 * Give a warning when "spinval" and "affval" strings are set and not the same. 5470 */ 5471 static void 5472 aff_check_string(spinval, affval, name) 5473 char_u *spinval; 5474 char_u *affval; 5475 char *name; 5476 { 5477 if (spinval != NULL && STRCMP(spinval, affval) != 0) 5478 smsg((char_u *)_("%s value differs from what is used in another .aff file"), name); 5479 } 5480 5481 /* 5482 * Return TRUE if strings "s1" and "s2" are equal. Also consider both being 5483 * NULL as equal. 5484 */ 5485 static int 5486 str_equal(s1, s2) 5487 char_u *s1; 5488 char_u *s2; 5489 { 5490 if (s1 == NULL || s2 == NULL) 5491 return s1 == s2; 5492 return STRCMP(s1, s2) == 0; 5493 } 5494 5495 /* 5496 * Add a from-to item to "gap". Used for REP and SAL items. 5497 * They are stored case-folded. 5498 */ 5499 static void 5500 add_fromto(spin, gap, from, to) 5501 spellinfo_T *spin; 5502 garray_T *gap; 5503 char_u *from; 5504 char_u *to; 5505 { 5506 fromto_T *ftp; 5507 char_u word[MAXWLEN]; 5508 5509 if (ga_grow(gap, 1) == OK) 5510 { 5511 ftp = ((fromto_T *)gap->ga_data) + gap->ga_len; 5512 (void)spell_casefold(from, STRLEN(from), word, MAXWLEN); 5513 ftp->ft_from = getroom_save(spin, word); 5514 (void)spell_casefold(to, STRLEN(to), word, MAXWLEN); 5515 ftp->ft_to = getroom_save(spin, word); 5516 ++gap->ga_len; 5517 } 5518 } 5519 5520 /* 5521 * Convert a boolean argument in a SAL line to TRUE or FALSE; 5522 */ 5523 static int 5524 sal_to_bool(s) 5525 char_u *s; 5526 { 5527 return STRCMP(s, "1") == 0 || STRCMP(s, "true") == 0; 5528 } 5529 5530 /* 5531 * Return TRUE if string "s" contains a non-ASCII character (128 or higher). 5532 * When "s" is NULL FALSE is returned. 5533 */ 5534 static int 5535 has_non_ascii(s) 5536 char_u *s; 5537 { 5538 char_u *p; 5539 5540 if (s != NULL) 5541 for (p = s; *p != NUL; ++p) 5542 if (*p >= 128) 5543 return TRUE; 5544 return FALSE; 5545 } 5546 5547 /* 5548 * Free the structure filled by spell_read_aff(). 5549 */ 5550 static void 5551 spell_free_aff(aff) 5552 afffile_T *aff; 5553 { 5554 hashtab_T *ht; 5555 hashitem_T *hi; 5556 int todo; 5557 affheader_T *ah; 5558 affentry_T *ae; 5559 5560 vim_free(aff->af_enc); 5561 5562 /* All this trouble to free the "ae_prog" items... */ 5563 for (ht = &aff->af_pref; ; ht = &aff->af_suff) 5564 { 5565 todo = ht->ht_used; 5566 for (hi = ht->ht_array; todo > 0; ++hi) 5567 { 5568 if (!HASHITEM_EMPTY(hi)) 5569 { 5570 --todo; 5571 ah = HI2AH(hi); 5572 for (ae = ah->ah_first; ae != NULL; ae = ae->ae_next) 5573 vim_free(ae->ae_prog); 5574 } 5575 } 5576 if (ht == &aff->af_suff) 5577 break; 5578 } 5579 5580 hash_clear(&aff->af_pref); 5581 hash_clear(&aff->af_suff); 5582 hash_clear(&aff->af_comp); 5583 } 5584 5585 /* 5586 * Read dictionary file "fname". 5587 * Returns OK or FAIL; 5588 */ 5589 static int 5590 spell_read_dic(spin, fname, affile) 5591 spellinfo_T *spin; 5592 char_u *fname; 5593 afffile_T *affile; 5594 { 5595 hashtab_T ht; 5596 char_u line[MAXLINELEN]; 5597 char_u *p; 5598 char_u *afflist; 5599 char_u store_afflist[MAXWLEN]; 5600 int pfxlen; 5601 int need_affix; 5602 char_u *dw; 5603 char_u *pc; 5604 char_u *w; 5605 int l; 5606 hash_T hash; 5607 hashitem_T *hi; 5608 FILE *fd; 5609 int lnum = 1; 5610 int non_ascii = 0; 5611 int retval = OK; 5612 char_u message[MAXLINELEN + MAXWLEN]; 5613 int flags; 5614 int duplicate = 0; 5615 5616 /* 5617 * Open the file. 5618 */ 5619 fd = mch_fopen((char *)fname, "r"); 5620 if (fd == NULL) 5621 { 5622 EMSG2(_(e_notopen), fname); 5623 return FAIL; 5624 } 5625 5626 /* The hashtable is only used to detect duplicated words. */ 5627 hash_init(&ht); 5628 5629 if (spin->si_verbose || p_verbose > 2) 5630 { 5631 if (!spin->si_verbose) 5632 verbose_enter(); 5633 smsg((char_u *)_("Reading dictionary file %s ..."), fname); 5634 out_flush(); 5635 if (!spin->si_verbose) 5636 verbose_leave(); 5637 } 5638 5639 /* start with a message for the first line */ 5640 spin->si_msg_count = 999999; 5641 5642 /* Read and ignore the first line: word count. */ 5643 (void)vim_fgets(line, MAXLINELEN, fd); 5644 if (!vim_isdigit(*skipwhite(line))) 5645 EMSG2(_("E760: No word count in %s"), fname); 5646 5647 /* 5648 * Read all the lines in the file one by one. 5649 * The words are converted to 'encoding' here, before being added to 5650 * the hashtable. 5651 */ 5652 while (!vim_fgets(line, MAXLINELEN, fd) && !got_int) 5653 { 5654 line_breakcheck(); 5655 ++lnum; 5656 if (line[0] == '#' || line[0] == '/') 5657 continue; /* comment line */ 5658 5659 /* Remove CR, LF and white space from the end. White space halfway 5660 * the word is kept to allow e.g., "et al.". */ 5661 l = STRLEN(line); 5662 while (l > 0 && line[l - 1] <= ' ') 5663 --l; 5664 if (l == 0) 5665 continue; /* empty line */ 5666 line[l] = NUL; 5667 5668 /* Find the optional affix names. Replace the SLASH character by a 5669 * slash. */ 5670 afflist = NULL; 5671 for (p = line; *p != NUL; mb_ptr_adv(p)) 5672 { 5673 if (*p == affile->af_slash) 5674 *p = '/'; 5675 else if (*p == '/') 5676 { 5677 *p = NUL; 5678 afflist = p + 1; 5679 break; 5680 } 5681 } 5682 5683 /* Skip non-ASCII words when "spin->si_ascii" is TRUE. */ 5684 if (spin->si_ascii && has_non_ascii(line)) 5685 { 5686 ++non_ascii; 5687 continue; 5688 } 5689 5690 #ifdef FEAT_MBYTE 5691 /* Convert from "SET" to 'encoding' when needed. */ 5692 if (spin->si_conv.vc_type != CONV_NONE) 5693 { 5694 pc = string_convert(&spin->si_conv, line, NULL); 5695 if (pc == NULL) 5696 { 5697 smsg((char_u *)_("Conversion failure for word in %s line %d: %s"), 5698 fname, lnum, line); 5699 continue; 5700 } 5701 w = pc; 5702 } 5703 else 5704 #endif 5705 { 5706 pc = NULL; 5707 w = line; 5708 } 5709 5710 /* This takes time, print a message every 10000 words. */ 5711 if (spin->si_verbose && spin->si_msg_count > 10000) 5712 { 5713 spin->si_msg_count = 0; 5714 vim_snprintf((char *)message, sizeof(message), 5715 _("line %6d, word %6d - %s"), 5716 lnum, spin->si_foldwcount + spin->si_keepwcount, w); 5717 msg_start(); 5718 msg_puts_long_attr(message, 0); 5719 msg_clr_eos(); 5720 msg_didout = FALSE; 5721 msg_col = 0; 5722 out_flush(); 5723 } 5724 5725 /* Store the word in the hashtable to be able to find duplicates. */ 5726 dw = (char_u *)getroom_save(spin, w); 5727 if (dw == NULL) 5728 retval = FAIL; 5729 vim_free(pc); 5730 if (retval == FAIL) 5731 break; 5732 5733 hash = hash_hash(dw); 5734 hi = hash_lookup(&ht, dw, hash); 5735 if (!HASHITEM_EMPTY(hi)) 5736 { 5737 if (p_verbose > 0) 5738 smsg((char_u *)_("Duplicate word in %s line %d: %s"), 5739 fname, lnum, dw); 5740 else if (duplicate == 0) 5741 smsg((char_u *)_("First duplicate word in %s line %d: %s"), 5742 fname, lnum, dw); 5743 ++duplicate; 5744 } 5745 else 5746 hash_add_item(&ht, hi, dw, hash); 5747 5748 flags = 0; 5749 store_afflist[0] = NUL; 5750 pfxlen = 0; 5751 need_affix = FALSE; 5752 if (afflist != NULL) 5753 { 5754 /* Check for affix name that stands for keep-case word and stands 5755 * for rare word (if defined). */ 5756 if (affile->af_kep != 0 && flag_in_afflist( 5757 affile->af_flagtype, afflist, affile->af_kep)) 5758 flags |= WF_KEEPCAP | WF_FIXCAP; 5759 if (affile->af_rar != 0 && flag_in_afflist( 5760 affile->af_flagtype, afflist, affile->af_rar)) 5761 flags |= WF_RARE; 5762 if (affile->af_bad != 0 && flag_in_afflist( 5763 affile->af_flagtype, afflist, affile->af_bad)) 5764 flags |= WF_BANNED; 5765 if (affile->af_needaffix != 0 && flag_in_afflist( 5766 affile->af_flagtype, afflist, affile->af_needaffix)) 5767 need_affix = TRUE; 5768 if (affile->af_needcomp != 0 && flag_in_afflist( 5769 affile->af_flagtype, afflist, affile->af_needcomp)) 5770 flags |= WF_NEEDCOMP; 5771 5772 if (affile->af_pfxpostpone) 5773 /* Need to store the list of prefix IDs with the word. */ 5774 pfxlen = get_pfxlist(affile, afflist, store_afflist); 5775 5776 if (spin->si_compflags != NULL) 5777 /* Need to store the list of compound flags with the word. 5778 * Concatenate them to the list of prefix IDs. */ 5779 get_compflags(affile, afflist, store_afflist + pfxlen); 5780 } 5781 5782 /* Add the word to the word tree(s). */ 5783 if (store_word(spin, dw, flags, spin->si_region, 5784 store_afflist, need_affix) == FAIL) 5785 retval = FAIL; 5786 5787 if (afflist != NULL) 5788 { 5789 /* Find all matching suffixes and add the resulting words. 5790 * Additionally do matching prefixes that combine. */ 5791 if (store_aff_word(spin, dw, afflist, affile, 5792 &affile->af_suff, &affile->af_pref, 5793 FALSE, flags, store_afflist, pfxlen) == FAIL) 5794 retval = FAIL; 5795 5796 /* Find all matching prefixes and add the resulting words. */ 5797 if (store_aff_word(spin, dw, afflist, affile, 5798 &affile->af_pref, NULL, 5799 FALSE, flags, store_afflist, pfxlen) == FAIL) 5800 retval = FAIL; 5801 } 5802 } 5803 5804 if (duplicate > 0) 5805 smsg((char_u *)_("%d duplicate word(s) in %s"), duplicate, fname); 5806 if (spin->si_ascii && non_ascii > 0) 5807 smsg((char_u *)_("Ignored %d word(s) with non-ASCII characters in %s"), 5808 non_ascii, fname); 5809 hash_clear(&ht); 5810 5811 fclose(fd); 5812 return retval; 5813 } 5814 5815 /* 5816 * Get the list of prefix IDs from the affix list "afflist". 5817 * Used for PFXPOSTPONE. 5818 * Put the resulting flags in "store_afflist[MAXWLEN]" with a terminating NUL 5819 * and return the number of affixes. 5820 */ 5821 static int 5822 get_pfxlist(affile, afflist, store_afflist) 5823 afffile_T *affile; 5824 char_u *afflist; 5825 char_u *store_afflist; 5826 { 5827 char_u *p; 5828 char_u *prevp; 5829 int cnt = 0; 5830 int id; 5831 char_u key[AH_KEY_LEN]; 5832 hashitem_T *hi; 5833 5834 for (p = afflist; *p != NUL; ) 5835 { 5836 prevp = p; 5837 if (get_affitem(affile->af_flagtype, &p) != 0) 5838 { 5839 /* A flag is a postponed prefix flag if it appears in "af_pref" 5840 * and it's ID is not zero. */ 5841 vim_strncpy(key, prevp, p - prevp); 5842 hi = hash_find(&affile->af_pref, key); 5843 if (!HASHITEM_EMPTY(hi)) 5844 { 5845 id = HI2AH(hi)->ah_newID; 5846 if (id != 0) 5847 store_afflist[cnt++] = id; 5848 } 5849 } 5850 if (affile->af_flagtype == AFT_NUM && *p == ',') 5851 ++p; 5852 } 5853 5854 store_afflist[cnt] = NUL; 5855 return cnt; 5856 } 5857 5858 /* 5859 * Get the list of compound IDs from the affix list "afflist" that are used 5860 * for compound words. 5861 * Puts the flags in "store_afflist[]". 5862 */ 5863 static void 5864 get_compflags(affile, afflist, store_afflist) 5865 afffile_T *affile; 5866 char_u *afflist; 5867 char_u *store_afflist; 5868 { 5869 char_u *p; 5870 char_u *prevp; 5871 int cnt = 0; 5872 char_u key[AH_KEY_LEN]; 5873 hashitem_T *hi; 5874 5875 for (p = afflist; *p != NUL; ) 5876 { 5877 prevp = p; 5878 if (get_affitem(affile->af_flagtype, &p) != 0) 5879 { 5880 /* A flag is a compound flag if it appears in "af_comp". */ 5881 vim_strncpy(key, prevp, p - prevp); 5882 hi = hash_find(&affile->af_comp, key); 5883 if (!HASHITEM_EMPTY(hi)) 5884 store_afflist[cnt++] = HI2CI(hi)->ci_newID; 5885 } 5886 if (affile->af_flagtype == AFT_NUM && *p == ',') 5887 ++p; 5888 } 5889 5890 store_afflist[cnt] = NUL; 5891 } 5892 5893 /* 5894 * Apply affixes to a word and store the resulting words. 5895 * "ht" is the hashtable with affentry_T that need to be applied, either 5896 * prefixes or suffixes. 5897 * "xht", when not NULL, is the prefix hashtable, to be used additionally on 5898 * the resulting words for combining affixes. 5899 * 5900 * Returns FAIL when out of memory. 5901 */ 5902 static int 5903 store_aff_word(spin, word, afflist, affile, ht, xht, comb, flags, 5904 pfxlist, pfxlen) 5905 spellinfo_T *spin; /* spell info */ 5906 char_u *word; /* basic word start */ 5907 char_u *afflist; /* list of names of supported affixes */ 5908 afffile_T *affile; 5909 hashtab_T *ht; 5910 hashtab_T *xht; 5911 int comb; /* only use affixes that combine */ 5912 int flags; /* flags for the word */ 5913 char_u *pfxlist; /* list of prefix IDs */ 5914 int pfxlen; /* nr of flags in "pfxlist" for prefixes, rest 5915 * is compound flags */ 5916 { 5917 int todo; 5918 hashitem_T *hi; 5919 affheader_T *ah; 5920 affentry_T *ae; 5921 regmatch_T regmatch; 5922 char_u newword[MAXWLEN]; 5923 int retval = OK; 5924 int i; 5925 char_u *p; 5926 int use_flags; 5927 char_u *use_pfxlist; 5928 char_u pfx_pfxlist[MAXWLEN]; 5929 size_t wordlen = STRLEN(word); 5930 5931 todo = ht->ht_used; 5932 for (hi = ht->ht_array; todo > 0 && retval == OK; ++hi) 5933 { 5934 if (!HASHITEM_EMPTY(hi)) 5935 { 5936 --todo; 5937 ah = HI2AH(hi); 5938 5939 /* Check that the affix combines, if required, and that the word 5940 * supports this affix. */ 5941 if ((!comb || ah->ah_combine) && flag_in_afflist( 5942 affile->af_flagtype, afflist, ah->ah_flag)) 5943 { 5944 /* Loop over all affix entries with this name. */ 5945 for (ae = ah->ah_first; ae != NULL; ae = ae->ae_next) 5946 { 5947 /* Check the condition. It's not logical to match case 5948 * here, but it is required for compatibility with 5949 * Myspell. 5950 * Another requirement from Myspell is that the chop 5951 * string is shorter than the word itself. 5952 * For prefixes, when "PFXPOSTPONE" was used, only do 5953 * prefixes with a chop string. */ 5954 regmatch.regprog = ae->ae_prog; 5955 regmatch.rm_ic = FALSE; 5956 if ((xht != NULL || !affile->af_pfxpostpone 5957 || ae->ae_chop != NULL) 5958 && (ae->ae_chop == NULL 5959 || STRLEN(ae->ae_chop) < wordlen) 5960 && (ae->ae_prog == NULL 5961 || vim_regexec(®match, word, (colnr_T)0))) 5962 { 5963 /* Match. Remove the chop and add the affix. */ 5964 if (xht == NULL) 5965 { 5966 /* prefix: chop/add at the start of the word */ 5967 if (ae->ae_add == NULL) 5968 *newword = NUL; 5969 else 5970 STRCPY(newword, ae->ae_add); 5971 p = word; 5972 if (ae->ae_chop != NULL) 5973 { 5974 /* Skip chop string. */ 5975 #ifdef FEAT_MBYTE 5976 if (has_mbyte) 5977 { 5978 i = mb_charlen(ae->ae_chop); 5979 for ( ; i > 0; --i) 5980 mb_ptr_adv(p); 5981 } 5982 else 5983 #endif 5984 p += STRLEN(ae->ae_chop); 5985 } 5986 STRCAT(newword, p); 5987 } 5988 else 5989 { 5990 /* suffix: chop/add at the end of the word */ 5991 STRCPY(newword, word); 5992 if (ae->ae_chop != NULL) 5993 { 5994 /* Remove chop string. */ 5995 p = newword + STRLEN(newword); 5996 i = MB_CHARLEN(ae->ae_chop); 5997 for ( ; i > 0; --i) 5998 mb_ptr_back(newword, p); 5999 *p = NUL; 6000 } 6001 if (ae->ae_add != NULL) 6002 STRCAT(newword, ae->ae_add); 6003 } 6004 6005 /* Obey the "rare" flag of the affix. */ 6006 if (ae->ae_rare) 6007 use_flags = flags | WF_RARE; 6008 else 6009 use_flags = flags; 6010 6011 /* Obey the "nocomp" flag of the affix: don't use the 6012 * compound flags. */ 6013 use_pfxlist = pfxlist; 6014 if (ae->ae_nocomp && pfxlist != NULL) 6015 { 6016 vim_strncpy(pfx_pfxlist, pfxlist, pfxlen); 6017 use_pfxlist = pfx_pfxlist; 6018 } 6019 6020 /* When there are postponed prefixes... */ 6021 if (spin->si_prefroot != NULL 6022 && spin->si_prefroot->wn_sibling != NULL) 6023 { 6024 /* ... add a flag to indicate an affix was used. */ 6025 use_flags |= WF_HAS_AFF; 6026 6027 /* ... don't use a prefix list if combining 6028 * affixes is not allowed. But do use the 6029 * compound flags after them. */ 6030 if ((!ah->ah_combine || comb) && pfxlist != NULL) 6031 use_pfxlist += pfxlen; 6032 } 6033 6034 /* Store the modified word. */ 6035 if (store_word(spin, newword, use_flags, 6036 spin->si_region, use_pfxlist, FALSE) == FAIL) 6037 retval = FAIL; 6038 6039 /* When added a suffix and combining is allowed also 6040 * try adding prefixes additionally. */ 6041 if (xht != NULL && ah->ah_combine) 6042 if (store_aff_word(spin, newword, afflist, affile, 6043 xht, NULL, TRUE, 6044 use_flags, use_pfxlist, pfxlen) == FAIL) 6045 retval = FAIL; 6046 } 6047 } 6048 } 6049 } 6050 } 6051 6052 return retval; 6053 } 6054 6055 /* 6056 * Read a file with a list of words. 6057 */ 6058 static int 6059 spell_read_wordfile(spin, fname) 6060 spellinfo_T *spin; 6061 char_u *fname; 6062 { 6063 FILE *fd; 6064 long lnum = 0; 6065 char_u rline[MAXLINELEN]; 6066 char_u *line; 6067 char_u *pc = NULL; 6068 char_u *p; 6069 int l; 6070 int retval = OK; 6071 int did_word = FALSE; 6072 int non_ascii = 0; 6073 int flags; 6074 int regionmask; 6075 6076 /* 6077 * Open the file. 6078 */ 6079 fd = mch_fopen((char *)fname, "r"); 6080 if (fd == NULL) 6081 { 6082 EMSG2(_(e_notopen), fname); 6083 return FAIL; 6084 } 6085 6086 if (spin->si_verbose || p_verbose > 2) 6087 { 6088 if (!spin->si_verbose) 6089 verbose_enter(); 6090 smsg((char_u *)_("Reading word file %s ..."), fname); 6091 out_flush(); 6092 if (!spin->si_verbose) 6093 verbose_leave(); 6094 } 6095 6096 /* 6097 * Read all the lines in the file one by one. 6098 */ 6099 while (!vim_fgets(rline, MAXLINELEN, fd) && !got_int) 6100 { 6101 line_breakcheck(); 6102 ++lnum; 6103 6104 /* Skip comment lines. */ 6105 if (*rline == '#') 6106 continue; 6107 6108 /* Remove CR, LF and white space from the end. */ 6109 l = STRLEN(rline); 6110 while (l > 0 && rline[l - 1] <= ' ') 6111 --l; 6112 if (l == 0) 6113 continue; /* empty or blank line */ 6114 rline[l] = NUL; 6115 6116 /* Convert from "=encoding={encoding}" to 'encoding' when needed. */ 6117 vim_free(pc); 6118 #ifdef FEAT_MBYTE 6119 if (spin->si_conv.vc_type != CONV_NONE) 6120 { 6121 pc = string_convert(&spin->si_conv, rline, NULL); 6122 if (pc == NULL) 6123 { 6124 smsg((char_u *)_("Conversion failure for word in %s line %d: %s"), 6125 fname, lnum, rline); 6126 continue; 6127 } 6128 line = pc; 6129 } 6130 else 6131 #endif 6132 { 6133 pc = NULL; 6134 line = rline; 6135 } 6136 6137 if (*line == '/') 6138 { 6139 ++line; 6140 if (STRNCMP(line, "encoding=", 9) == 0) 6141 { 6142 if (spin->si_conv.vc_type != CONV_NONE) 6143 smsg((char_u *)_("Duplicate /encoding= line ignored in %s line %d: %s"), 6144 fname, lnum, line - 1); 6145 else if (did_word) 6146 smsg((char_u *)_("/encoding= line after word ignored in %s line %d: %s"), 6147 fname, lnum, line - 1); 6148 else 6149 { 6150 #ifdef FEAT_MBYTE 6151 char_u *enc; 6152 6153 /* Setup for conversion to 'encoding'. */ 6154 line += 10; 6155 enc = enc_canonize(line); 6156 if (enc != NULL && !spin->si_ascii 6157 && convert_setup(&spin->si_conv, enc, 6158 p_enc) == FAIL) 6159 smsg((char_u *)_("Conversion in %s not supported: from %s to %s"), 6160 fname, line, p_enc); 6161 vim_free(enc); 6162 spin->si_conv.vc_fail = TRUE; 6163 #else 6164 smsg((char_u *)_("Conversion in %s not supported"), fname); 6165 #endif 6166 } 6167 continue; 6168 } 6169 6170 if (STRNCMP(line, "regions=", 8) == 0) 6171 { 6172 if (spin->si_region_count > 1) 6173 smsg((char_u *)_("Duplicate /regions= line ignored in %s line %d: %s"), 6174 fname, lnum, line); 6175 else 6176 { 6177 line += 8; 6178 if (STRLEN(line) > 16) 6179 smsg((char_u *)_("Too many regions in %s line %d: %s"), 6180 fname, lnum, line); 6181 else 6182 { 6183 spin->si_region_count = STRLEN(line) / 2; 6184 STRCPY(spin->si_region_name, line); 6185 6186 /* Adjust the mask for a word valid in all regions. */ 6187 spin->si_region = (1 << spin->si_region_count) - 1; 6188 } 6189 } 6190 continue; 6191 } 6192 6193 smsg((char_u *)_("/ line ignored in %s line %d: %s"), 6194 fname, lnum, line - 1); 6195 continue; 6196 } 6197 6198 flags = 0; 6199 regionmask = spin->si_region; 6200 6201 /* Check for flags and region after a slash. */ 6202 p = vim_strchr(line, '/'); 6203 if (p != NULL) 6204 { 6205 *p++ = NUL; 6206 while (*p != NUL) 6207 { 6208 if (*p == '=') /* keep-case word */ 6209 flags |= WF_KEEPCAP | WF_FIXCAP; 6210 else if (*p == '!') /* Bad, bad, wicked word. */ 6211 flags |= WF_BANNED; 6212 else if (*p == '?') /* Rare word. */ 6213 flags |= WF_RARE; 6214 else if (VIM_ISDIGIT(*p)) /* region number(s) */ 6215 { 6216 if ((flags & WF_REGION) == 0) /* first one */ 6217 regionmask = 0; 6218 flags |= WF_REGION; 6219 6220 l = *p - '0'; 6221 if (l > spin->si_region_count) 6222 { 6223 smsg((char_u *)_("Invalid region nr in %s line %d: %s"), 6224 fname, lnum, p); 6225 break; 6226 } 6227 regionmask |= 1 << (l - 1); 6228 } 6229 else 6230 { 6231 smsg((char_u *)_("Unrecognized flags in %s line %d: %s"), 6232 fname, lnum, p); 6233 break; 6234 } 6235 ++p; 6236 } 6237 } 6238 6239 /* Skip non-ASCII words when "spin->si_ascii" is TRUE. */ 6240 if (spin->si_ascii && has_non_ascii(line)) 6241 { 6242 ++non_ascii; 6243 continue; 6244 } 6245 6246 /* Normal word: store it. */ 6247 if (store_word(spin, line, flags, regionmask, NULL, FALSE) == FAIL) 6248 { 6249 retval = FAIL; 6250 break; 6251 } 6252 did_word = TRUE; 6253 } 6254 6255 vim_free(pc); 6256 fclose(fd); 6257 6258 if (spin->si_ascii && non_ascii > 0 && (spin->si_verbose || p_verbose > 2)) 6259 { 6260 if (p_verbose > 2) 6261 verbose_enter(); 6262 smsg((char_u *)_("Ignored %d words with non-ASCII characters"), 6263 non_ascii); 6264 if (p_verbose > 2) 6265 verbose_leave(); 6266 } 6267 return retval; 6268 } 6269 6270 /* 6271 * Get part of an sblock_T, "len" bytes long. 6272 * This avoids calling free() for every little struct we use (and keeping 6273 * track of them). 6274 * The memory is cleared to all zeros. 6275 * Returns NULL when out of memory. 6276 */ 6277 static void * 6278 getroom(spin, len, align) 6279 spellinfo_T *spin; 6280 size_t len; /* length needed */ 6281 int align; /* align for pointer */ 6282 { 6283 char_u *p; 6284 sblock_T *bl = spin->si_blocks; 6285 6286 if (align && bl != NULL) 6287 /* Round size up for alignment. On some systems structures need to be 6288 * aligned to the size of a pointer (e.g., SPARC). */ 6289 bl->sb_used = (bl->sb_used + sizeof(char *) - 1) 6290 & ~(sizeof(char *) - 1); 6291 6292 if (bl == NULL || bl->sb_used + len > SBLOCKSIZE) 6293 { 6294 /* Allocate a block of memory. This is not freed until much later. */ 6295 bl = (sblock_T *)alloc_clear((unsigned)(sizeof(sblock_T) + SBLOCKSIZE)); 6296 if (bl == NULL) 6297 return NULL; 6298 bl->sb_next = spin->si_blocks; 6299 spin->si_blocks = bl; 6300 bl->sb_used = 0; 6301 ++spin->si_blocks_cnt; 6302 } 6303 6304 p = bl->sb_data + bl->sb_used; 6305 bl->sb_used += len; 6306 6307 return p; 6308 } 6309 6310 /* 6311 * Make a copy of a string into memory allocated with getroom(). 6312 */ 6313 static char_u * 6314 getroom_save(spin, s) 6315 spellinfo_T *spin; 6316 char_u *s; 6317 { 6318 char_u *sc; 6319 6320 sc = (char_u *)getroom(spin, STRLEN(s) + 1, FALSE); 6321 if (sc != NULL) 6322 STRCPY(sc, s); 6323 return sc; 6324 } 6325 6326 6327 /* 6328 * Free the list of allocated sblock_T. 6329 */ 6330 static void 6331 free_blocks(bl) 6332 sblock_T *bl; 6333 { 6334 sblock_T *next; 6335 6336 while (bl != NULL) 6337 { 6338 next = bl->sb_next; 6339 vim_free(bl); 6340 bl = next; 6341 } 6342 } 6343 6344 /* 6345 * Allocate the root of a word tree. 6346 */ 6347 static wordnode_T * 6348 wordtree_alloc(spin) 6349 spellinfo_T *spin; 6350 { 6351 return (wordnode_T *)getroom(spin, sizeof(wordnode_T), TRUE); 6352 } 6353 6354 /* 6355 * Store a word in the tree(s). 6356 * Always store it in the case-folded tree. For a keep-case word this is 6357 * useful when the word can also be used with all caps (no WF_FIXCAP flag) and 6358 * used to find suggestions. 6359 * For a keep-case word also store it in the keep-case tree. 6360 * When "pfxlist" is not NULL store the word for each postponed prefix ID and 6361 * compound flag. 6362 */ 6363 static int 6364 store_word(spin, word, flags, region, pfxlist, need_affix) 6365 spellinfo_T *spin; 6366 char_u *word; 6367 int flags; /* extra flags, WF_BANNED */ 6368 int region; /* supported region(s) */ 6369 char_u *pfxlist; /* list of prefix IDs or NULL */ 6370 int need_affix; /* only store word with affix ID */ 6371 { 6372 int len = STRLEN(word); 6373 int ct = captype(word, word + len); 6374 char_u foldword[MAXWLEN]; 6375 int res = OK; 6376 char_u *p; 6377 6378 (void)spell_casefold(word, len, foldword, MAXWLEN); 6379 for (p = pfxlist; res == OK; ++p) 6380 { 6381 if (!need_affix || (p != NULL && *p != NUL)) 6382 res = tree_add_word(spin, foldword, spin->si_foldroot, ct | flags, 6383 region, p == NULL ? 0 : *p); 6384 if (p == NULL || *p == NUL) 6385 break; 6386 } 6387 ++spin->si_foldwcount; 6388 6389 if (res == OK && (ct == WF_KEEPCAP || (flags & WF_KEEPCAP))) 6390 { 6391 for (p = pfxlist; res == OK; ++p) 6392 { 6393 if (!need_affix || (p != NULL && *p != NUL)) 6394 res = tree_add_word(spin, word, spin->si_keeproot, flags, 6395 region, p == NULL ? 0 : *p); 6396 if (p == NULL || *p == NUL) 6397 break; 6398 } 6399 ++spin->si_keepwcount; 6400 } 6401 return res; 6402 } 6403 6404 /* 6405 * Add word "word" to a word tree at "root". 6406 * When "flags" < 0 we are adding to the prefix tree where flags is used for 6407 * "rare" and "region" is the condition nr. 6408 * Returns FAIL when out of memory. 6409 */ 6410 static int 6411 tree_add_word(spin, word, root, flags, region, affixID) 6412 spellinfo_T *spin; 6413 char_u *word; 6414 wordnode_T *root; 6415 int flags; 6416 int region; 6417 int affixID; 6418 { 6419 wordnode_T *node = root; 6420 wordnode_T *np; 6421 wordnode_T *copyp, **copyprev; 6422 wordnode_T **prev = NULL; 6423 int i; 6424 6425 /* Add each byte of the word to the tree, including the NUL at the end. */ 6426 for (i = 0; ; ++i) 6427 { 6428 /* When there is more than one reference to this node we need to make 6429 * a copy, so that we can modify it. Copy the whole list of siblings 6430 * (we don't optimize for a partly shared list of siblings). */ 6431 if (node != NULL && node->wn_refs > 1) 6432 { 6433 --node->wn_refs; 6434 copyprev = prev; 6435 for (copyp = node; copyp != NULL; copyp = copyp->wn_sibling) 6436 { 6437 /* Allocate a new node and copy the info. */ 6438 np = get_wordnode(spin); 6439 if (np == NULL) 6440 return FAIL; 6441 np->wn_child = copyp->wn_child; 6442 if (np->wn_child != NULL) 6443 ++np->wn_child->wn_refs; /* child gets extra ref */ 6444 np->wn_byte = copyp->wn_byte; 6445 if (np->wn_byte == NUL) 6446 { 6447 np->wn_flags = copyp->wn_flags; 6448 np->wn_region = copyp->wn_region; 6449 np->wn_affixID = copyp->wn_affixID; 6450 } 6451 6452 /* Link the new node in the list, there will be one ref. */ 6453 np->wn_refs = 1; 6454 *copyprev = np; 6455 copyprev = &np->wn_sibling; 6456 6457 /* Let "node" point to the head of the copied list. */ 6458 if (copyp == node) 6459 node = np; 6460 } 6461 } 6462 6463 /* Look for the sibling that has the same character. They are sorted 6464 * on byte value, thus stop searching when a sibling is found with a 6465 * higher byte value. For zero bytes (end of word) the sorting is 6466 * done on flags and then on affixID. */ 6467 while (node != NULL 6468 && (node->wn_byte < word[i] 6469 || (node->wn_byte == NUL 6470 && (flags < 0 6471 ? node->wn_affixID < affixID 6472 : node->wn_flags < (flags & WN_MASK) 6473 || (node->wn_flags == (flags & WN_MASK) 6474 && node->wn_affixID < affixID))))) 6475 { 6476 prev = &node->wn_sibling; 6477 node = *prev; 6478 } 6479 if (node == NULL 6480 || node->wn_byte != word[i] 6481 || (word[i] == NUL 6482 && (flags < 0 6483 || node->wn_flags != (flags & WN_MASK) 6484 || node->wn_affixID != affixID))) 6485 { 6486 /* Allocate a new node. */ 6487 np = get_wordnode(spin); 6488 if (np == NULL) 6489 return FAIL; 6490 np->wn_byte = word[i]; 6491 6492 /* If "node" is NULL this is a new child or the end of the sibling 6493 * list: ref count is one. Otherwise use ref count of sibling and 6494 * make ref count of sibling one (matters when inserting in front 6495 * of the list of siblings). */ 6496 if (node == NULL) 6497 np->wn_refs = 1; 6498 else 6499 { 6500 np->wn_refs = node->wn_refs; 6501 node->wn_refs = 1; 6502 } 6503 *prev = np; 6504 np->wn_sibling = node; 6505 node = np; 6506 } 6507 6508 if (word[i] == NUL) 6509 { 6510 node->wn_flags = flags; 6511 node->wn_region |= region; 6512 node->wn_affixID = affixID; 6513 break; 6514 } 6515 prev = &node->wn_child; 6516 node = *prev; 6517 } 6518 #ifdef SPELL_PRINTTREE 6519 smsg("Added \"%s\"", word); 6520 spell_print_tree(root->wn_sibling); 6521 #endif 6522 6523 /* count nr of words added since last message */ 6524 ++spin->si_msg_count; 6525 6526 if (spin->si_compress_cnt > 1) 6527 { 6528 if (--spin->si_compress_cnt == 1) 6529 /* Did enough words to lower the block count limit. */ 6530 spin->si_blocks_cnt += compress_inc; 6531 } 6532 6533 /* 6534 * When we have allocated lots of memory we need to compress the word tree 6535 * to free up some room. But compression is slow, and we might actually 6536 * need that room, thus only compress in the following situations: 6537 * 1. When not compressed before (si_compress_cnt == 0): when using 6538 * "compress_start" blocks. 6539 * 2. When compressed before and used "compress_inc" blocks before 6540 * adding "compress_added" words (si_compress_cnt > 1). 6541 * 3. When compressed before, added "compress_added" words 6542 * (si_compress_cnt == 1) and the number of free nodes drops below the 6543 * maximum word length. 6544 */ 6545 #ifndef SPELL_PRINTTREE 6546 if (spin->si_compress_cnt == 1 6547 ? spin->si_free_count < MAXWLEN 6548 : spin->si_blocks_cnt >= compress_start) 6549 #endif 6550 { 6551 /* Decrement the block counter. The effect is that we compress again 6552 * when the freed up room has been used and another "compress_inc" 6553 * blocks have been allocated. Unless "compress_added" words have 6554 * been added, then the limit is put back again. */ 6555 spin->si_blocks_cnt -= compress_inc; 6556 spin->si_compress_cnt = compress_added; 6557 6558 if (spin->si_verbose) 6559 { 6560 msg_start(); 6561 msg_puts((char_u *)_(msg_compressing)); 6562 msg_clr_eos(); 6563 msg_didout = FALSE; 6564 msg_col = 0; 6565 out_flush(); 6566 } 6567 6568 /* Compress both trees. Either they both have many nodes, which makes 6569 * compression useful, or one of them is small, which means 6570 * compression goes fast. */ 6571 wordtree_compress(spin, spin->si_foldroot); 6572 wordtree_compress(spin, spin->si_keeproot); 6573 } 6574 6575 return OK; 6576 } 6577 6578 /* 6579 * Check the 'mkspellmem' option. Return FAIL if it's wrong. 6580 * Sets "sps_flags". 6581 */ 6582 int 6583 spell_check_msm() 6584 { 6585 char_u *p = p_msm; 6586 long start = 0; 6587 long inc = 0; 6588 long added = 0; 6589 6590 if (!VIM_ISDIGIT(*p)) 6591 return FAIL; 6592 /* block count = (value * 1024) / SBLOCKSIZE (but avoid overflow)*/ 6593 start = (getdigits(&p) * 10) / (SBLOCKSIZE / 102); 6594 if (*p != ',') 6595 return FAIL; 6596 ++p; 6597 if (!VIM_ISDIGIT(*p)) 6598 return FAIL; 6599 inc = (getdigits(&p) * 102) / (SBLOCKSIZE / 10); 6600 if (*p != ',') 6601 return FAIL; 6602 ++p; 6603 if (!VIM_ISDIGIT(*p)) 6604 return FAIL; 6605 added = getdigits(&p) * 1024; 6606 if (*p != NUL) 6607 return FAIL; 6608 6609 if (start == 0 || inc == 0 || added == 0 || inc > start) 6610 return FAIL; 6611 6612 compress_start = start; 6613 compress_inc = inc; 6614 compress_added = added; 6615 return OK; 6616 } 6617 6618 6619 /* 6620 * Get a wordnode_T, either from the list of previously freed nodes or 6621 * allocate a new one. 6622 */ 6623 static wordnode_T * 6624 get_wordnode(spin) 6625 spellinfo_T *spin; 6626 { 6627 wordnode_T *n; 6628 6629 if (spin->si_first_free == NULL) 6630 n = (wordnode_T *)getroom(spin, sizeof(wordnode_T), TRUE); 6631 else 6632 { 6633 n = spin->si_first_free; 6634 spin->si_first_free = n->wn_child; 6635 vim_memset(n, 0, sizeof(wordnode_T)); 6636 --spin->si_free_count; 6637 } 6638 #ifdef SPELL_PRINTTREE 6639 n->wn_nr = ++spin->si_wordnode_nr; 6640 #endif 6641 return n; 6642 } 6643 6644 /* 6645 * Decrement the reference count on a node (which is the head of a list of 6646 * siblings). If the reference count becomes zero free the node and its 6647 * siblings. 6648 */ 6649 static void 6650 deref_wordnode(spin, node) 6651 spellinfo_T *spin; 6652 wordnode_T *node; 6653 { 6654 wordnode_T *np; 6655 6656 if (--node->wn_refs == 0) 6657 for (np = node; np != NULL; np = np->wn_sibling) 6658 { 6659 if (np->wn_child != NULL) 6660 deref_wordnode(spin, np->wn_child); 6661 free_wordnode(spin, np); 6662 } 6663 } 6664 6665 /* 6666 * Free a wordnode_T for re-use later. 6667 * Only the "wn_child" field becomes invalid. 6668 */ 6669 static void 6670 free_wordnode(spin, n) 6671 spellinfo_T *spin; 6672 wordnode_T *n; 6673 { 6674 n->wn_child = spin->si_first_free; 6675 spin->si_first_free = n; 6676 ++spin->si_free_count; 6677 } 6678 6679 /* 6680 * Compress a tree: find tails that are identical and can be shared. 6681 */ 6682 static void 6683 wordtree_compress(spin, root) 6684 spellinfo_T *spin; 6685 wordnode_T *root; 6686 { 6687 hashtab_T ht; 6688 int n; 6689 int tot = 0; 6690 int perc; 6691 6692 /* Skip the root itself, it's not actually used. The first sibling is the 6693 * start of the tree. */ 6694 if (root->wn_sibling != NULL) 6695 { 6696 hash_init(&ht); 6697 n = node_compress(spin, root->wn_sibling, &ht, &tot); 6698 6699 #ifndef SPELL_PRINTTREE 6700 if (spin->si_verbose || p_verbose > 2) 6701 #endif 6702 { 6703 if (!spin->si_verbose) 6704 verbose_enter(); 6705 if (tot > 1000000) 6706 perc = (tot - n) / (tot / 100); 6707 else if (tot == 0) 6708 perc = 0; 6709 else 6710 perc = (tot - n) * 100 / tot; 6711 smsg((char_u *)_("Compressed %d of %d nodes; %d%% remaining"), 6712 n, tot, perc); 6713 if (p_verbose > 2) 6714 verbose_leave(); 6715 } 6716 #ifdef SPELL_PRINTTREE 6717 spell_print_tree(root->wn_sibling); 6718 #endif 6719 hash_clear(&ht); 6720 } 6721 } 6722 6723 /* 6724 * Compress a node, its siblings and its children, depth first. 6725 * Returns the number of compressed nodes. 6726 */ 6727 static int 6728 node_compress(spin, node, ht, tot) 6729 spellinfo_T *spin; 6730 wordnode_T *node; 6731 hashtab_T *ht; 6732 int *tot; /* total count of nodes before compressing, 6733 incremented while going through the tree */ 6734 { 6735 wordnode_T *np; 6736 wordnode_T *tp; 6737 wordnode_T *child; 6738 hash_T hash; 6739 hashitem_T *hi; 6740 int len = 0; 6741 unsigned nr, n; 6742 int compressed = 0; 6743 6744 /* 6745 * Go through the list of siblings. Compress each child and then try 6746 * finding an identical child to replace it. 6747 * Note that with "child" we mean not just the node that is pointed to, 6748 * but the whole list of siblings, of which the node is the first. 6749 */ 6750 for (np = node; np != NULL && !got_int; np = np->wn_sibling) 6751 { 6752 ++len; 6753 if ((child = np->wn_child) != NULL) 6754 { 6755 /* Compress the child. This fills hashkey. */ 6756 compressed += node_compress(spin, child, ht, tot); 6757 6758 /* Try to find an identical child. */ 6759 hash = hash_hash(child->wn_u1.hashkey); 6760 hi = hash_lookup(ht, child->wn_u1.hashkey, hash); 6761 tp = NULL; 6762 if (!HASHITEM_EMPTY(hi)) 6763 { 6764 /* There are children with an identical hash value. Now check 6765 * if there is one that is really identical. */ 6766 for (tp = HI2WN(hi); tp != NULL; tp = tp->wn_u2.next) 6767 if (node_equal(child, tp)) 6768 { 6769 /* Found one! Now use that child in place of the 6770 * current one. This means the current child and all 6771 * its siblings is unlinked from the tree. */ 6772 ++tp->wn_refs; 6773 deref_wordnode(spin, child); 6774 np->wn_child = tp; 6775 ++compressed; 6776 break; 6777 } 6778 if (tp == NULL) 6779 { 6780 /* No other child with this hash value equals the child of 6781 * the node, add it to the linked list after the first 6782 * item. */ 6783 tp = HI2WN(hi); 6784 child->wn_u2.next = tp->wn_u2.next; 6785 tp->wn_u2.next = child; 6786 } 6787 } 6788 else 6789 /* No other child has this hash value, add it to the 6790 * hashtable. */ 6791 hash_add_item(ht, hi, child->wn_u1.hashkey, hash); 6792 } 6793 } 6794 *tot += len; 6795 6796 /* 6797 * Make a hash key for the node and its siblings, so that we can quickly 6798 * find a lookalike node. This must be done after compressing the sibling 6799 * list, otherwise the hash key would become invalid by the compression. 6800 */ 6801 node->wn_u1.hashkey[0] = len; 6802 nr = 0; 6803 for (np = node; np != NULL; np = np->wn_sibling) 6804 { 6805 if (np->wn_byte == NUL) 6806 /* end node: use wn_flags, wn_region and wn_affixID */ 6807 n = np->wn_flags + (np->wn_region << 8) + (np->wn_affixID << 16); 6808 else 6809 /* byte node: use the byte value and the child pointer */ 6810 n = np->wn_byte + ((long_u)np->wn_child << 8); 6811 nr = nr * 101 + n; 6812 } 6813 6814 /* Avoid NUL bytes, it terminates the hash key. */ 6815 n = nr & 0xff; 6816 node->wn_u1.hashkey[1] = n == 0 ? 1 : n; 6817 n = (nr >> 8) & 0xff; 6818 node->wn_u1.hashkey[2] = n == 0 ? 1 : n; 6819 n = (nr >> 16) & 0xff; 6820 node->wn_u1.hashkey[3] = n == 0 ? 1 : n; 6821 n = (nr >> 24) & 0xff; 6822 node->wn_u1.hashkey[4] = n == 0 ? 1 : n; 6823 node->wn_u1.hashkey[5] = NUL; 6824 6825 /* Check for CTRL-C pressed now and then. */ 6826 fast_breakcheck(); 6827 6828 return compressed; 6829 } 6830 6831 /* 6832 * Return TRUE when two nodes have identical siblings and children. 6833 */ 6834 static int 6835 node_equal(n1, n2) 6836 wordnode_T *n1; 6837 wordnode_T *n2; 6838 { 6839 wordnode_T *p1; 6840 wordnode_T *p2; 6841 6842 for (p1 = n1, p2 = n2; p1 != NULL && p2 != NULL; 6843 p1 = p1->wn_sibling, p2 = p2->wn_sibling) 6844 if (p1->wn_byte != p2->wn_byte 6845 || (p1->wn_byte == NUL 6846 ? (p1->wn_flags != p2->wn_flags 6847 || p1->wn_region != p2->wn_region 6848 || p1->wn_affixID != p2->wn_affixID) 6849 : (p1->wn_child != p2->wn_child))) 6850 break; 6851 6852 return p1 == NULL && p2 == NULL; 6853 } 6854 6855 /* 6856 * Write a number to file "fd", MSB first, in "len" bytes. 6857 */ 6858 void 6859 put_bytes(fd, nr, len) 6860 FILE *fd; 6861 long_u nr; 6862 int len; 6863 { 6864 int i; 6865 6866 for (i = len - 1; i >= 0; --i) 6867 putc((int)(nr >> (i * 8)), fd); 6868 } 6869 6870 static int 6871 #ifdef __BORLANDC__ 6872 _RTLENTRYF 6873 #endif 6874 rep_compare __ARGS((const void *s1, const void *s2)); 6875 6876 /* 6877 * Function given to qsort() to sort the REP items on "from" string. 6878 */ 6879 static int 6880 #ifdef __BORLANDC__ 6881 _RTLENTRYF 6882 #endif 6883 rep_compare(s1, s2) 6884 const void *s1; 6885 const void *s2; 6886 { 6887 fromto_T *p1 = (fromto_T *)s1; 6888 fromto_T *p2 = (fromto_T *)s2; 6889 6890 return STRCMP(p1->ft_from, p2->ft_from); 6891 } 6892 6893 /* 6894 * Write the Vim .spl file "fname". 6895 * Return FAIL or OK; 6896 */ 6897 static int 6898 write_vim_spell(spin, fname) 6899 spellinfo_T *spin; 6900 char_u *fname; 6901 { 6902 FILE *fd; 6903 int regionmask; 6904 int round; 6905 wordnode_T *tree; 6906 int nodecount; 6907 int i; 6908 int l; 6909 garray_T *gap; 6910 fromto_T *ftp; 6911 char_u *p; 6912 int rr; 6913 int retval = OK; 6914 6915 fd = mch_fopen((char *)fname, "w"); 6916 if (fd == NULL) 6917 { 6918 EMSG2(_(e_notopen), fname); 6919 return FAIL; 6920 } 6921 6922 /* <HEADER>: <fileID> <versionnr> */ 6923 /* <fileID> */ 6924 if (fwrite(VIMSPELLMAGIC, VIMSPELLMAGICL, (size_t)1, fd) != 1) 6925 { 6926 EMSG(_(e_write)); 6927 retval = FAIL; 6928 } 6929 putc(VIMSPELLVERSION, fd); /* <versionnr> */ 6930 6931 /* 6932 * <SECTIONS>: <section> ... <sectionend> 6933 */ 6934 6935 /* SN_REGION: <regionname> ... 6936 * Write the region names only if there is more than one. */ 6937 if (spin->si_region_count > 1) 6938 { 6939 putc(SN_REGION, fd); /* <sectionID> */ 6940 putc(SNF_REQUIRED, fd); /* <sectionflags> */ 6941 l = spin->si_region_count * 2; 6942 put_bytes(fd, (long_u)l, 4); /* <sectionlen> */ 6943 fwrite(spin->si_region_name, (size_t)l, (size_t)1, fd); 6944 /* <regionname> ... */ 6945 regionmask = (1 << spin->si_region_count) - 1; 6946 } 6947 else 6948 regionmask = 0; 6949 6950 /* SN_CHARFLAGS: <charflagslen> <charflags> <folcharslen> <folchars> 6951 * 6952 * The table with character flags and the table for case folding. 6953 * This makes sure the same characters are recognized as word characters 6954 * when generating an when using a spell file. 6955 * Skip this for ASCII, the table may conflict with the one used for 6956 * 'encoding'. 6957 * Also skip this for an .add.spl file, the main spell file must contain 6958 * the table (avoids that it conflicts). File is shorter too. 6959 */ 6960 if (!spin->si_ascii && !spin->si_add) 6961 { 6962 char_u folchars[128 * 8]; 6963 int flags; 6964 6965 putc(SN_CHARFLAGS, fd); /* <sectionID> */ 6966 putc(SNF_REQUIRED, fd); /* <sectionflags> */ 6967 6968 /* Form the <folchars> string first, we need to know its length. */ 6969 l = 0; 6970 for (i = 128; i < 256; ++i) 6971 { 6972 #ifdef FEAT_MBYTE 6973 if (has_mbyte) 6974 l += mb_char2bytes(spelltab.st_fold[i], folchars + l); 6975 else 6976 #endif 6977 folchars[l++] = spelltab.st_fold[i]; 6978 } 6979 put_bytes(fd, (long_u)(1 + 128 + 2 + l), 4); /* <sectionlen> */ 6980 6981 fputc(128, fd); /* <charflagslen> */ 6982 for (i = 128; i < 256; ++i) 6983 { 6984 flags = 0; 6985 if (spelltab.st_isw[i]) 6986 flags |= CF_WORD; 6987 if (spelltab.st_isu[i]) 6988 flags |= CF_UPPER; 6989 fputc(flags, fd); /* <charflags> */ 6990 } 6991 6992 put_bytes(fd, (long_u)l, 2); /* <folcharslen> */ 6993 fwrite(folchars, (size_t)l, (size_t)1, fd); /* <folchars> */ 6994 } 6995 6996 /* SN_MIDWORD: <midword> */ 6997 if (spin->si_midword != NULL) 6998 { 6999 putc(SN_MIDWORD, fd); /* <sectionID> */ 7000 putc(SNF_REQUIRED, fd); /* <sectionflags> */ 7001 7002 i = STRLEN(spin->si_midword); 7003 put_bytes(fd, (long_u)i, 4); /* <sectionlen> */ 7004 fwrite(spin->si_midword, (size_t)i, (size_t)1, fd); /* <midword> */ 7005 } 7006 7007 /* SN_PREFCOND: <prefcondcnt> <prefcond> ... */ 7008 if (spin->si_prefcond.ga_len > 0) 7009 { 7010 putc(SN_PREFCOND, fd); /* <sectionID> */ 7011 putc(SNF_REQUIRED, fd); /* <sectionflags> */ 7012 7013 l = write_spell_prefcond(NULL, &spin->si_prefcond); 7014 put_bytes(fd, (long_u)l, 4); /* <sectionlen> */ 7015 7016 write_spell_prefcond(fd, &spin->si_prefcond); 7017 } 7018 7019 /* SN_REP: <repcount> <rep> ... 7020 * SN_SAL: <salflags> <salcount> <sal> ... */ 7021 7022 /* Sort the REP items. */ 7023 qsort(spin->si_rep.ga_data, (size_t)spin->si_rep.ga_len, 7024 sizeof(fromto_T), rep_compare); 7025 7026 /* round 1: SN_REP section 7027 * round 2: SN_SAL section (unless SN_SOFO is used) */ 7028 for (round = 1; round <= 2; ++round) 7029 { 7030 if (round == 1) 7031 { 7032 gap = &spin->si_rep; 7033 putc(SN_REP, fd); /* <sectionID> */ 7034 } 7035 else 7036 { 7037 if (spin->si_sofofr != NULL && spin->si_sofoto != NULL) 7038 /* using SN_SOFO section instead of SN_SAL */ 7039 break; 7040 gap = &spin->si_sal; 7041 putc(SN_SAL, fd); /* <sectionID> */ 7042 } 7043 7044 /* This is for making suggestions, section is not required. */ 7045 putc(0, fd); /* <sectionflags> */ 7046 7047 /* Compute the length of what follows. */ 7048 l = 2; /* count <repcount> or <salcount> */ 7049 for (i = 0; i < gap->ga_len; ++i) 7050 { 7051 ftp = &((fromto_T *)gap->ga_data)[i]; 7052 l += 1 + STRLEN(ftp->ft_from); /* count <*fromlen> and <*from> */ 7053 l += 1 + STRLEN(ftp->ft_to); /* count <*tolen> and <*to> */ 7054 } 7055 if (round == 2) 7056 ++l; /* count <salflags> */ 7057 put_bytes(fd, (long_u)l, 4); /* <sectionlen> */ 7058 7059 if (round == 2) 7060 { 7061 i = 0; 7062 if (spin->si_followup) 7063 i |= SAL_F0LLOWUP; 7064 if (spin->si_collapse) 7065 i |= SAL_COLLAPSE; 7066 if (spin->si_rem_accents) 7067 i |= SAL_REM_ACCENTS; 7068 putc(i, fd); /* <salflags> */ 7069 } 7070 7071 put_bytes(fd, (long_u)gap->ga_len, 2); /* <repcount> or <salcount> */ 7072 for (i = 0; i < gap->ga_len; ++i) 7073 { 7074 /* <rep> : <repfromlen> <repfrom> <reptolen> <repto> */ 7075 /* <sal> : <salfromlen> <salfrom> <saltolen> <salto> */ 7076 ftp = &((fromto_T *)gap->ga_data)[i]; 7077 for (rr = 1; rr <= 2; ++rr) 7078 { 7079 p = rr == 1 ? ftp->ft_from : ftp->ft_to; 7080 l = STRLEN(p); 7081 putc(l, fd); 7082 fwrite(p, l, (size_t)1, fd); 7083 } 7084 } 7085 7086 } 7087 7088 /* SN_SOFO: <sofofromlen> <sofofrom> <sofotolen> <sofoto> 7089 * This is for making suggestions, section is not required. */ 7090 if (spin->si_sofofr != NULL && spin->si_sofoto != NULL) 7091 { 7092 putc(SN_SOFO, fd); /* <sectionID> */ 7093 putc(0, fd); /* <sectionflags> */ 7094 7095 l = STRLEN(spin->si_sofofr); 7096 put_bytes(fd, (long_u)(l + STRLEN(spin->si_sofoto) + 4), 4); 7097 /* <sectionlen> */ 7098 7099 put_bytes(fd, (long_u)l, 2); /* <sofofromlen> */ 7100 fwrite(spin->si_sofofr, l, (size_t)1, fd); /* <sofofrom> */ 7101 7102 l = STRLEN(spin->si_sofoto); 7103 put_bytes(fd, (long_u)l, 2); /* <sofotolen> */ 7104 fwrite(spin->si_sofoto, l, (size_t)1, fd); /* <sofoto> */ 7105 } 7106 7107 /* SN_MAP: <mapstr> 7108 * This is for making suggestions, section is not required. */ 7109 if (spin->si_map.ga_len > 0) 7110 { 7111 putc(SN_MAP, fd); /* <sectionID> */ 7112 putc(0, fd); /* <sectionflags> */ 7113 l = spin->si_map.ga_len; 7114 put_bytes(fd, (long_u)l, 4); /* <sectionlen> */ 7115 fwrite(spin->si_map.ga_data, (size_t)l, (size_t)1, fd); 7116 /* <mapstr> */ 7117 } 7118 7119 /* SN_COMPOUND: compound info. 7120 * We don't mark it required, when not supported all compound words will 7121 * be bad words. */ 7122 if (spin->si_compflags != NULL) 7123 { 7124 putc(SN_COMPOUND, fd); /* <sectionID> */ 7125 putc(0, fd); /* <sectionflags> */ 7126 7127 l = STRLEN(spin->si_compflags); 7128 put_bytes(fd, (long_u)(l + 3), 4); /* <sectionlen> */ 7129 putc(spin->si_compmax, fd); /* <compmax> */ 7130 putc(spin->si_compminlen, fd); /* <compminlen> */ 7131 putc(spin->si_compsylmax, fd); /* <compsylmax> */ 7132 /* <compflags> */ 7133 fwrite(spin->si_compflags, (size_t)l, (size_t)1, fd); 7134 } 7135 7136 /* SN_NOBREAK: NOBREAK flag */ 7137 if (spin->si_nobreak) 7138 { 7139 putc(SN_NOBREAK, fd); /* <sectionID> */ 7140 putc(0, fd); /* <sectionflags> */ 7141 7142 /* It's empty, the precense of the section flags the feature. */ 7143 put_bytes(fd, (long_u)0, 4); /* <sectionlen> */ 7144 } 7145 7146 /* SN_SYLLABLE: syllable info. 7147 * We don't mark it required, when not supported syllables will not be 7148 * counted. */ 7149 if (spin->si_syllable != NULL) 7150 { 7151 putc(SN_SYLLABLE, fd); /* <sectionID> */ 7152 putc(0, fd); /* <sectionflags> */ 7153 7154 l = STRLEN(spin->si_syllable); 7155 put_bytes(fd, (long_u)l, 4); /* <sectionlen> */ 7156 fwrite(spin->si_syllable, (size_t)l, (size_t)1, fd); /* <syllable> */ 7157 } 7158 7159 /* end of <SECTIONS> */ 7160 putc(SN_END, fd); /* <sectionend> */ 7161 7162 7163 /* 7164 * <LWORDTREE> <KWORDTREE> <PREFIXTREE> 7165 */ 7166 spin->si_memtot = 0; 7167 for (round = 1; round <= 3; ++round) 7168 { 7169 if (round == 1) 7170 tree = spin->si_foldroot->wn_sibling; 7171 else if (round == 2) 7172 tree = spin->si_keeproot->wn_sibling; 7173 else 7174 tree = spin->si_prefroot->wn_sibling; 7175 7176 /* Clear the index and wnode fields in the tree. */ 7177 clear_node(tree); 7178 7179 /* Count the number of nodes. Needed to be able to allocate the 7180 * memory when reading the nodes. Also fills in index for shared 7181 * nodes. */ 7182 nodecount = put_node(NULL, tree, 0, regionmask, round == 3); 7183 7184 /* number of nodes in 4 bytes */ 7185 put_bytes(fd, (long_u)nodecount, 4); /* <nodecount> */ 7186 spin->si_memtot += nodecount + nodecount * sizeof(int); 7187 7188 /* Write the nodes. */ 7189 (void)put_node(fd, tree, 0, regionmask, round == 3); 7190 } 7191 7192 /* Write another byte to check for errors. */ 7193 if (putc(0, fd) == EOF) 7194 retval = FAIL; 7195 7196 if (fclose(fd) == EOF) 7197 retval = FAIL; 7198 7199 return retval; 7200 } 7201 7202 /* 7203 * Clear the index and wnode fields of "node", it siblings and its 7204 * children. This is needed because they are a union with other items to save 7205 * space. 7206 */ 7207 static void 7208 clear_node(node) 7209 wordnode_T *node; 7210 { 7211 wordnode_T *np; 7212 7213 if (node != NULL) 7214 for (np = node; np != NULL; np = np->wn_sibling) 7215 { 7216 np->wn_u1.index = 0; 7217 np->wn_u2.wnode = NULL; 7218 7219 if (np->wn_byte != NUL) 7220 clear_node(np->wn_child); 7221 } 7222 } 7223 7224 7225 /* 7226 * Dump a word tree at node "node". 7227 * 7228 * This first writes the list of possible bytes (siblings). Then for each 7229 * byte recursively write the children. 7230 * 7231 * NOTE: The code here must match the code in read_tree(), since assumptions 7232 * are made about the indexes (so that we don't have to write them in the 7233 * file). 7234 * 7235 * Returns the number of nodes used. 7236 */ 7237 static int 7238 put_node(fd, node, index, regionmask, prefixtree) 7239 FILE *fd; /* NULL when only counting */ 7240 wordnode_T *node; 7241 int index; 7242 int regionmask; 7243 int prefixtree; /* TRUE for PREFIXTREE */ 7244 { 7245 int newindex = index; 7246 int siblingcount = 0; 7247 wordnode_T *np; 7248 int flags; 7249 7250 /* If "node" is zero the tree is empty. */ 7251 if (node == NULL) 7252 return 0; 7253 7254 /* Store the index where this node is written. */ 7255 node->wn_u1.index = index; 7256 7257 /* Count the number of siblings. */ 7258 for (np = node; np != NULL; np = np->wn_sibling) 7259 ++siblingcount; 7260 7261 /* Write the sibling count. */ 7262 if (fd != NULL) 7263 putc(siblingcount, fd); /* <siblingcount> */ 7264 7265 /* Write each sibling byte and optionally extra info. */ 7266 for (np = node; np != NULL; np = np->wn_sibling) 7267 { 7268 if (np->wn_byte == 0) 7269 { 7270 if (fd != NULL) 7271 { 7272 /* For a NUL byte (end of word) write the flags etc. */ 7273 if (prefixtree) 7274 { 7275 /* In PREFIXTREE write the required affixID and the 7276 * associated condition nr (stored in wn_region). The 7277 * byte value is misused to store the "rare" and "not 7278 * combining" flags */ 7279 if (np->wn_flags == (short_u)PFX_FLAGS) 7280 putc(BY_NOFLAGS, fd); /* <byte> */ 7281 else 7282 { 7283 putc(BY_FLAGS, fd); /* <byte> */ 7284 putc(np->wn_flags, fd); /* <pflags> */ 7285 } 7286 putc(np->wn_affixID, fd); /* <affixID> */ 7287 put_bytes(fd, (long_u)np->wn_region, 2); /* <prefcondnr> */ 7288 } 7289 else 7290 { 7291 /* For word trees we write the flag/region items. */ 7292 flags = np->wn_flags; 7293 if (regionmask != 0 && np->wn_region != regionmask) 7294 flags |= WF_REGION; 7295 if (np->wn_affixID != 0) 7296 flags |= WF_AFX; 7297 if (flags == 0) 7298 { 7299 /* word without flags or region */ 7300 putc(BY_NOFLAGS, fd); /* <byte> */ 7301 } 7302 else 7303 { 7304 if (np->wn_flags >= 0x100) 7305 { 7306 putc(BY_FLAGS2, fd); /* <byte> */ 7307 putc(flags, fd); /* <flags> */ 7308 putc((unsigned)flags >> 8, fd); /* <flags2> */ 7309 } 7310 else 7311 { 7312 putc(BY_FLAGS, fd); /* <byte> */ 7313 putc(flags, fd); /* <flags> */ 7314 } 7315 if (flags & WF_REGION) 7316 putc(np->wn_region, fd); /* <region> */ 7317 if (flags & WF_AFX) 7318 putc(np->wn_affixID, fd); /* <affixID> */ 7319 } 7320 } 7321 } 7322 } 7323 else 7324 { 7325 if (np->wn_child->wn_u1.index != 0 7326 && np->wn_child->wn_u2.wnode != node) 7327 { 7328 /* The child is written elsewhere, write the reference. */ 7329 if (fd != NULL) 7330 { 7331 putc(BY_INDEX, fd); /* <byte> */ 7332 /* <nodeidx> */ 7333 put_bytes(fd, (long_u)np->wn_child->wn_u1.index, 3); 7334 } 7335 } 7336 else if (np->wn_child->wn_u2.wnode == NULL) 7337 /* We will write the child below and give it an index. */ 7338 np->wn_child->wn_u2.wnode = node; 7339 7340 if (fd != NULL) 7341 if (putc(np->wn_byte, fd) == EOF) /* <byte> or <xbyte> */ 7342 { 7343 EMSG(_(e_write)); 7344 return 0; 7345 } 7346 } 7347 } 7348 7349 /* Space used in the array when reading: one for each sibling and one for 7350 * the count. */ 7351 newindex += siblingcount + 1; 7352 7353 /* Recursively dump the children of each sibling. */ 7354 for (np = node; np != NULL; np = np->wn_sibling) 7355 if (np->wn_byte != 0 && np->wn_child->wn_u2.wnode == node) 7356 newindex = put_node(fd, np->wn_child, newindex, regionmask, 7357 prefixtree); 7358 7359 return newindex; 7360 } 7361 7362 7363 /* 7364 * ":mkspell [-ascii] outfile infile ..." 7365 * ":mkspell [-ascii] addfile" 7366 */ 7367 void 7368 ex_mkspell(eap) 7369 exarg_T *eap; 7370 { 7371 int fcount; 7372 char_u **fnames; 7373 char_u *arg = eap->arg; 7374 int ascii = FALSE; 7375 7376 if (STRNCMP(arg, "-ascii", 6) == 0) 7377 { 7378 ascii = TRUE; 7379 arg = skipwhite(arg + 6); 7380 } 7381 7382 /* Expand all the remaining arguments (e.g., $VIMRUNTIME). */ 7383 if (get_arglist_exp(arg, &fcount, &fnames) == OK) 7384 { 7385 mkspell(fcount, fnames, ascii, eap->forceit, FALSE); 7386 FreeWild(fcount, fnames); 7387 } 7388 } 7389 7390 /* 7391 * Create a Vim spell file from one or more word lists. 7392 * "fnames[0]" is the output file name. 7393 * "fnames[fcount - 1]" is the last input file name. 7394 * Exception: when "fnames[0]" ends in ".add" it's used as the input file name 7395 * and ".spl" is appended to make the output file name. 7396 */ 7397 static void 7398 mkspell(fcount, fnames, ascii, overwrite, added_word) 7399 int fcount; 7400 char_u **fnames; 7401 int ascii; /* -ascii argument given */ 7402 int overwrite; /* overwrite existing output file */ 7403 int added_word; /* invoked through "zg" */ 7404 { 7405 char_u fname[MAXPATHL]; 7406 char_u wfname[MAXPATHL]; 7407 char_u **innames; 7408 int incount; 7409 afffile_T *(afile[8]); 7410 int i; 7411 int len; 7412 struct stat st; 7413 int error = FALSE; 7414 spellinfo_T spin; 7415 7416 vim_memset(&spin, 0, sizeof(spin)); 7417 spin.si_verbose = !added_word; 7418 spin.si_ascii = ascii; 7419 spin.si_followup = TRUE; 7420 spin.si_rem_accents = TRUE; 7421 ga_init2(&spin.si_rep, (int)sizeof(fromto_T), 20); 7422 ga_init2(&spin.si_sal, (int)sizeof(fromto_T), 20); 7423 ga_init2(&spin.si_map, (int)sizeof(char_u), 100); 7424 ga_init2(&spin.si_prefcond, (int)sizeof(char_u *), 50); 7425 spin.si_newcompID = 127; /* start compound ID at first maximum */ 7426 7427 /* default: fnames[0] is output file, following are input files */ 7428 innames = &fnames[1]; 7429 incount = fcount - 1; 7430 7431 if (fcount >= 1) 7432 { 7433 len = STRLEN(fnames[0]); 7434 if (fcount == 1 && len > 4 && STRCMP(fnames[0] + len - 4, ".add") == 0) 7435 { 7436 /* For ":mkspell path/en.latin1.add" output file is 7437 * "path/en.latin1.add.spl". */ 7438 innames = &fnames[0]; 7439 incount = 1; 7440 vim_snprintf((char *)wfname, sizeof(wfname), "%s.spl", fnames[0]); 7441 } 7442 else if (fcount == 1) 7443 { 7444 /* For ":mkspell path/vim" output file is "path/vim.latin1.spl". */ 7445 innames = &fnames[0]; 7446 incount = 1; 7447 vim_snprintf((char *)wfname, sizeof(wfname), "%s.%s.spl", fnames[0], 7448 spin.si_ascii ? (char_u *)"ascii" : spell_enc()); 7449 } 7450 else if (len > 4 && STRCMP(fnames[0] + len - 4, ".spl") == 0) 7451 { 7452 /* Name ends in ".spl", use as the file name. */ 7453 vim_strncpy(wfname, fnames[0], sizeof(wfname) - 1); 7454 } 7455 else 7456 /* Name should be language, make the file name from it. */ 7457 vim_snprintf((char *)wfname, sizeof(wfname), "%s.%s.spl", fnames[0], 7458 spin.si_ascii ? (char_u *)"ascii" : spell_enc()); 7459 7460 /* Check for .ascii.spl. */ 7461 if (strstr((char *)gettail(wfname), ".ascii.") != NULL) 7462 spin.si_ascii = TRUE; 7463 7464 /* Check for .add.spl. */ 7465 if (strstr((char *)gettail(wfname), ".add.") != NULL) 7466 spin.si_add = TRUE; 7467 } 7468 7469 if (incount <= 0) 7470 EMSG(_(e_invarg)); /* need at least output and input names */ 7471 else if (vim_strchr(gettail(wfname), '_') != NULL) 7472 EMSG(_("E751: Output file name must not have region name")); 7473 else if (incount > 8) 7474 EMSG(_("E754: Only up to 8 regions supported")); 7475 else 7476 { 7477 /* Check for overwriting before doing things that may take a lot of 7478 * time. */ 7479 if (!overwrite && mch_stat((char *)wfname, &st) >= 0) 7480 { 7481 EMSG(_(e_exists)); 7482 return; 7483 } 7484 if (mch_isdir(wfname)) 7485 { 7486 EMSG2(_(e_isadir2), wfname); 7487 return; 7488 } 7489 7490 /* 7491 * Init the aff and dic pointers. 7492 * Get the region names if there are more than 2 arguments. 7493 */ 7494 for (i = 0; i < incount; ++i) 7495 { 7496 afile[i] = NULL; 7497 7498 if (incount > 1) 7499 { 7500 len = STRLEN(innames[i]); 7501 if (STRLEN(gettail(innames[i])) < 5 7502 || innames[i][len - 3] != '_') 7503 { 7504 EMSG2(_("E755: Invalid region in %s"), innames[i]); 7505 return; 7506 } 7507 spin.si_region_name[i * 2] = TOLOWER_ASC(innames[i][len - 2]); 7508 spin.si_region_name[i * 2 + 1] = 7509 TOLOWER_ASC(innames[i][len - 1]); 7510 } 7511 } 7512 spin.si_region_count = incount; 7513 7514 spin.si_foldroot = wordtree_alloc(&spin); 7515 spin.si_keeproot = wordtree_alloc(&spin); 7516 spin.si_prefroot = wordtree_alloc(&spin); 7517 if (spin.si_foldroot == NULL 7518 || spin.si_keeproot == NULL 7519 || spin.si_prefroot == NULL) 7520 { 7521 free_blocks(spin.si_blocks); 7522 return; 7523 } 7524 7525 /* When not producing a .add.spl file clear the character table when 7526 * we encounter one in the .aff file. This means we dump the current 7527 * one in the .spl file if the .aff file doesn't define one. That's 7528 * better than guessing the contents, the table will match a 7529 * previously loaded spell file. */ 7530 if (!spin.si_add) 7531 spin.si_clear_chartab = TRUE; 7532 7533 /* 7534 * Read all the .aff and .dic files. 7535 * Text is converted to 'encoding'. 7536 * Words are stored in the case-folded and keep-case trees. 7537 */ 7538 for (i = 0; i < incount && !error; ++i) 7539 { 7540 spin.si_conv.vc_type = CONV_NONE; 7541 spin.si_region = 1 << i; 7542 7543 vim_snprintf((char *)fname, sizeof(fname), "%s.aff", innames[i]); 7544 if (mch_stat((char *)fname, &st) >= 0) 7545 { 7546 /* Read the .aff file. Will init "spin->si_conv" based on the 7547 * "SET" line. */ 7548 afile[i] = spell_read_aff(&spin, fname); 7549 if (afile[i] == NULL) 7550 error = TRUE; 7551 else 7552 { 7553 /* Read the .dic file and store the words in the trees. */ 7554 vim_snprintf((char *)fname, sizeof(fname), "%s.dic", 7555 innames[i]); 7556 if (spell_read_dic(&spin, fname, afile[i]) == FAIL) 7557 error = TRUE; 7558 } 7559 } 7560 else 7561 { 7562 /* No .aff file, try reading the file as a word list. Store 7563 * the words in the trees. */ 7564 if (spell_read_wordfile(&spin, innames[i]) == FAIL) 7565 error = TRUE; 7566 } 7567 7568 #ifdef FEAT_MBYTE 7569 /* Free any conversion stuff. */ 7570 convert_setup(&spin.si_conv, NULL, NULL); 7571 #endif 7572 } 7573 7574 if (spin.si_compflags != NULL && spin.si_nobreak) 7575 MSG(_("Warning: both compounding and NOBREAK specified")); 7576 7577 if (!error) 7578 { 7579 /* 7580 * Combine tails in the tree. 7581 */ 7582 if (spin.si_verbose || p_verbose > 2) 7583 { 7584 if (!spin.si_verbose) 7585 verbose_enter(); 7586 MSG(_(msg_compressing)); 7587 out_flush(); 7588 if (!spin.si_verbose) 7589 verbose_leave(); 7590 } 7591 wordtree_compress(&spin, spin.si_foldroot); 7592 wordtree_compress(&spin, spin.si_keeproot); 7593 wordtree_compress(&spin, spin.si_prefroot); 7594 } 7595 7596 if (!error) 7597 { 7598 /* 7599 * Write the info in the spell file. 7600 */ 7601 if (spin.si_verbose || p_verbose > 2) 7602 { 7603 if (!spin.si_verbose) 7604 verbose_enter(); 7605 smsg((char_u *)_("Writing spell file %s ..."), wfname); 7606 out_flush(); 7607 if (!spin.si_verbose) 7608 verbose_leave(); 7609 } 7610 7611 error = write_vim_spell(&spin, wfname) == FAIL; 7612 7613 if (spin.si_verbose || p_verbose > 2) 7614 { 7615 if (!spin.si_verbose) 7616 verbose_enter(); 7617 MSG(_("Done!")); 7618 smsg((char_u *)_("Estimated runtime memory use: %d bytes"), 7619 spin.si_memtot); 7620 out_flush(); 7621 if (!spin.si_verbose) 7622 verbose_leave(); 7623 } 7624 7625 /* If the file is loaded need to reload it. */ 7626 if (!error) 7627 spell_reload_one(wfname, added_word); 7628 } 7629 7630 /* Free the allocated memory. */ 7631 ga_clear(&spin.si_rep); 7632 ga_clear(&spin.si_sal); 7633 ga_clear(&spin.si_map); 7634 ga_clear(&spin.si_prefcond); 7635 7636 /* Free the .aff file structures. */ 7637 for (i = 0; i < incount; ++i) 7638 if (afile[i] != NULL) 7639 spell_free_aff(afile[i]); 7640 7641 /* Free all the bits and pieces at once. */ 7642 free_blocks(spin.si_blocks); 7643 } 7644 } 7645 7646 7647 /* 7648 * ":[count]spellgood {word}" 7649 * ":[count]spellwrong {word}" 7650 */ 7651 void 7652 ex_spell(eap) 7653 exarg_T *eap; 7654 { 7655 spell_add_word(eap->arg, STRLEN(eap->arg), eap->cmdidx == CMD_spellwrong, 7656 eap->forceit ? 0 : (int)eap->line2); 7657 } 7658 7659 /* 7660 * Add "word[len]" to 'spellfile' as a good or bad word. 7661 */ 7662 void 7663 spell_add_word(word, len, bad, index) 7664 char_u *word; 7665 int len; 7666 int bad; 7667 int index; /* "zG" and "zW": zero, otherwise index in 7668 'spellfile' */ 7669 { 7670 FILE *fd; 7671 buf_T *buf = NULL; 7672 int new_spf = FALSE; 7673 struct stat st; 7674 char_u *fname; 7675 char_u fnamebuf[MAXPATHL]; 7676 char_u line[MAXWLEN * 2]; 7677 long fpos, fpos_next = 0; 7678 int i; 7679 char_u *spf; 7680 7681 if (index == 0) /* use internal wordlist */ 7682 { 7683 if (int_wordlist == NULL) 7684 { 7685 int_wordlist = vim_tempname('s'); 7686 if (int_wordlist == NULL) 7687 return; 7688 } 7689 fname = int_wordlist; 7690 } 7691 else 7692 { 7693 /* If 'spellfile' isn't set figure out a good default value. */ 7694 if (*curbuf->b_p_spf == NUL) 7695 { 7696 init_spellfile(); 7697 new_spf = TRUE; 7698 } 7699 7700 if (*curbuf->b_p_spf == NUL) 7701 { 7702 EMSG(_("E764: 'spellfile' is not set")); 7703 return; 7704 } 7705 7706 for (spf = curbuf->b_p_spf, i = 1; *spf != NUL; ++i) 7707 { 7708 copy_option_part(&spf, fnamebuf, MAXPATHL, ","); 7709 if (i == index) 7710 break; 7711 if (*spf == NUL) 7712 { 7713 EMSGN(_("E765: 'spellfile' does not have %ld entries"), index); 7714 return; 7715 } 7716 } 7717 7718 /* Check that the user isn't editing the .add file somewhere. */ 7719 buf = buflist_findname_exp(fnamebuf); 7720 if (buf != NULL && buf->b_ml.ml_mfp == NULL) 7721 buf = NULL; 7722 if (buf != NULL && bufIsChanged(buf)) 7723 { 7724 EMSG(_(e_bufloaded)); 7725 return; 7726 } 7727 7728 fname = fnamebuf; 7729 } 7730 7731 if (bad) 7732 { 7733 /* When the word also appears as good word we need to remove that one, 7734 * since its flags sort before the one with WF_BANNED. */ 7735 fd = mch_fopen((char *)fname, "r"); 7736 if (fd != NULL) 7737 { 7738 while (!vim_fgets(line, MAXWLEN * 2, fd)) 7739 { 7740 fpos = fpos_next; 7741 fpos_next = ftell(fd); 7742 if (STRNCMP(word, line, len) == 0 7743 && (line[len] == '/' || line[len] < ' ')) 7744 { 7745 /* Found duplicate word. Remove it by writing a '#' at 7746 * the start of the line. Mixing reading and writing 7747 * doesn't work for all systems, close the file first. */ 7748 fclose(fd); 7749 fd = mch_fopen((char *)fname, "r+"); 7750 if (fd == NULL) 7751 break; 7752 if (fseek(fd, fpos, SEEK_SET) == 0) 7753 fputc('#', fd); 7754 fseek(fd, fpos_next, SEEK_SET); 7755 } 7756 } 7757 fclose(fd); 7758 } 7759 } 7760 7761 fd = mch_fopen((char *)fname, "a"); 7762 if (fd == NULL && new_spf) 7763 { 7764 /* We just initialized the 'spellfile' option and can't open the file. 7765 * We may need to create the "spell" directory first. We already 7766 * checked the runtime directory is writable in init_spellfile(). */ 7767 STRCPY(NameBuff, fname); 7768 *gettail_sep(NameBuff) = NUL; 7769 if (mch_stat((char *)NameBuff, &st) < 0) 7770 { 7771 /* The directory doesn't exist. Try creating it and opening the 7772 * file again. */ 7773 vim_mkdir(NameBuff, 0755); 7774 fd = mch_fopen((char *)fname, "a"); 7775 } 7776 } 7777 7778 if (fd == NULL) 7779 EMSG2(_(e_notopen), fname); 7780 else 7781 { 7782 if (bad) 7783 fprintf(fd, "%.*s/!\n", len, word); 7784 else 7785 fprintf(fd, "%.*s\n", len, word); 7786 fclose(fd); 7787 7788 /* Update the .add.spl file. */ 7789 mkspell(1, &fname, FALSE, TRUE, TRUE); 7790 7791 /* If the .add file is edited somewhere, reload it. */ 7792 if (buf != NULL) 7793 buf_reload(buf); 7794 7795 redraw_all_later(NOT_VALID); 7796 } 7797 } 7798 7799 /* 7800 * Initialize 'spellfile' for the current buffer. 7801 */ 7802 static void 7803 init_spellfile() 7804 { 7805 char_u buf[MAXPATHL]; 7806 int l; 7807 char_u *fname; 7808 char_u *rtp; 7809 char_u *lend; 7810 int aspath = FALSE; 7811 char_u *lstart = curbuf->b_p_spl; 7812 7813 if (*curbuf->b_p_spl != NUL && curbuf->b_langp.ga_len > 0) 7814 { 7815 /* Find the end of the language name. Exclude the region. If there 7816 * is a path separator remember the start of the tail. */ 7817 for (lend = curbuf->b_p_spl; *lend != NUL 7818 && vim_strchr((char_u *)",._", *lend) == NULL; ++lend) 7819 if (vim_ispathsep(*lend)) 7820 { 7821 aspath = TRUE; 7822 lstart = lend + 1; 7823 } 7824 7825 /* Loop over all entries in 'runtimepath'. Use the first one where we 7826 * are allowed to write. */ 7827 rtp = p_rtp; 7828 while (*rtp != NUL) 7829 { 7830 if (aspath) 7831 /* Use directory of an entry with path, e.g., for 7832 * "/dir/lg.utf-8.spl" use "/dir". */ 7833 vim_strncpy(buf, curbuf->b_p_spl, lstart - curbuf->b_p_spl - 1); 7834 else 7835 /* Copy the path from 'runtimepath' to buf[]. */ 7836 copy_option_part(&rtp, buf, MAXPATHL, ","); 7837 if (filewritable(buf) == 2) 7838 { 7839 /* Use the first language name from 'spelllang' and the 7840 * encoding used in the first loaded .spl file. */ 7841 if (aspath) 7842 vim_strncpy(buf, curbuf->b_p_spl, lend - curbuf->b_p_spl); 7843 else 7844 { 7845 l = STRLEN(buf); 7846 vim_snprintf((char *)buf + l, MAXPATHL - l, 7847 "/spell/%.*s", (int)(lend - lstart), lstart); 7848 } 7849 l = STRLEN(buf); 7850 fname = LANGP_ENTRY(curbuf->b_langp, 0)->lp_slang->sl_fname; 7851 vim_snprintf((char *)buf + l, MAXPATHL - l, ".%s.add", 7852 fname != NULL 7853 && strstr((char *)gettail(fname), ".ascii.") != NULL 7854 ? (char_u *)"ascii" : spell_enc()); 7855 set_option_value((char_u *)"spellfile", 0L, buf, OPT_LOCAL); 7856 break; 7857 } 7858 aspath = FALSE; 7859 } 7860 } 7861 } 7862 7863 7864 /* 7865 * Init the chartab used for spelling for ASCII. 7866 * EBCDIC is not supported! 7867 */ 7868 static void 7869 clear_spell_chartab(sp) 7870 spelltab_T *sp; 7871 { 7872 int i; 7873 7874 /* Init everything to FALSE. */ 7875 vim_memset(sp->st_isw, FALSE, sizeof(sp->st_isw)); 7876 vim_memset(sp->st_isu, FALSE, sizeof(sp->st_isu)); 7877 for (i = 0; i < 256; ++i) 7878 { 7879 sp->st_fold[i] = i; 7880 sp->st_upper[i] = i; 7881 } 7882 7883 /* We include digits. A word shouldn't start with a digit, but handling 7884 * that is done separately. */ 7885 for (i = '0'; i <= '9'; ++i) 7886 sp->st_isw[i] = TRUE; 7887 for (i = 'A'; i <= 'Z'; ++i) 7888 { 7889 sp->st_isw[i] = TRUE; 7890 sp->st_isu[i] = TRUE; 7891 sp->st_fold[i] = i + 0x20; 7892 } 7893 for (i = 'a'; i <= 'z'; ++i) 7894 { 7895 sp->st_isw[i] = TRUE; 7896 sp->st_upper[i] = i - 0x20; 7897 } 7898 } 7899 7900 /* 7901 * Init the chartab used for spelling. Only depends on 'encoding'. 7902 * Called once while starting up and when 'encoding' changes. 7903 * The default is to use isalpha(), but the spell file should define the word 7904 * characters to make it possible that 'encoding' differs from the current 7905 * locale. For utf-8 we don't use isalpha() but our own functions. 7906 */ 7907 void 7908 init_spell_chartab() 7909 { 7910 int i; 7911 7912 did_set_spelltab = FALSE; 7913 clear_spell_chartab(&spelltab); 7914 #ifdef FEAT_MBYTE 7915 if (enc_dbcs) 7916 { 7917 /* DBCS: assume double-wide characters are word characters. */ 7918 for (i = 128; i <= 255; ++i) 7919 if (MB_BYTE2LEN(i) == 2) 7920 spelltab.st_isw[i] = TRUE; 7921 } 7922 else if (enc_utf8) 7923 { 7924 for (i = 128; i < 256; ++i) 7925 { 7926 spelltab.st_isu[i] = utf_isupper(i); 7927 spelltab.st_isw[i] = spelltab.st_isu[i] || utf_islower(i); 7928 spelltab.st_fold[i] = utf_fold(i); 7929 spelltab.st_upper[i] = utf_toupper(i); 7930 } 7931 } 7932 else 7933 #endif 7934 { 7935 /* Rough guess: use locale-dependent library functions. */ 7936 for (i = 128; i < 256; ++i) 7937 { 7938 if (MB_ISUPPER(i)) 7939 { 7940 spelltab.st_isw[i] = TRUE; 7941 spelltab.st_isu[i] = TRUE; 7942 spelltab.st_fold[i] = MB_TOLOWER(i); 7943 } 7944 else if (MB_ISLOWER(i)) 7945 { 7946 spelltab.st_isw[i] = TRUE; 7947 spelltab.st_upper[i] = MB_TOUPPER(i); 7948 } 7949 } 7950 } 7951 } 7952 7953 /* 7954 * Set the spell character tables from strings in the affix file. 7955 */ 7956 static int 7957 set_spell_chartab(fol, low, upp) 7958 char_u *fol; 7959 char_u *low; 7960 char_u *upp; 7961 { 7962 /* We build the new tables here first, so that we can compare with the 7963 * previous one. */ 7964 spelltab_T new_st; 7965 char_u *pf = fol, *pl = low, *pu = upp; 7966 int f, l, u; 7967 7968 clear_spell_chartab(&new_st); 7969 7970 while (*pf != NUL) 7971 { 7972 if (*pl == NUL || *pu == NUL) 7973 { 7974 EMSG(_(e_affform)); 7975 return FAIL; 7976 } 7977 #ifdef FEAT_MBYTE 7978 f = mb_ptr2char_adv(&pf); 7979 l = mb_ptr2char_adv(&pl); 7980 u = mb_ptr2char_adv(&pu); 7981 #else 7982 f = *pf++; 7983 l = *pl++; 7984 u = *pu++; 7985 #endif 7986 /* Every character that appears is a word character. */ 7987 if (f < 256) 7988 new_st.st_isw[f] = TRUE; 7989 if (l < 256) 7990 new_st.st_isw[l] = TRUE; 7991 if (u < 256) 7992 new_st.st_isw[u] = TRUE; 7993 7994 /* if "LOW" and "FOL" are not the same the "LOW" char needs 7995 * case-folding */ 7996 if (l < 256 && l != f) 7997 { 7998 if (f >= 256) 7999 { 8000 EMSG(_(e_affrange)); 8001 return FAIL; 8002 } 8003 new_st.st_fold[l] = f; 8004 } 8005 8006 /* if "UPP" and "FOL" are not the same the "UPP" char needs 8007 * case-folding, it's upper case and the "UPP" is the upper case of 8008 * "FOL" . */ 8009 if (u < 256 && u != f) 8010 { 8011 if (f >= 256) 8012 { 8013 EMSG(_(e_affrange)); 8014 return FAIL; 8015 } 8016 new_st.st_fold[u] = f; 8017 new_st.st_isu[u] = TRUE; 8018 new_st.st_upper[f] = u; 8019 } 8020 } 8021 8022 if (*pl != NUL || *pu != NUL) 8023 { 8024 EMSG(_(e_affform)); 8025 return FAIL; 8026 } 8027 8028 return set_spell_finish(&new_st); 8029 } 8030 8031 /* 8032 * Set the spell character tables from strings in the .spl file. 8033 */ 8034 static void 8035 set_spell_charflags(flags, cnt, fol) 8036 char_u *flags; 8037 int cnt; /* length of "flags" */ 8038 char_u *fol; 8039 { 8040 /* We build the new tables here first, so that we can compare with the 8041 * previous one. */ 8042 spelltab_T new_st; 8043 int i; 8044 char_u *p = fol; 8045 int c; 8046 8047 clear_spell_chartab(&new_st); 8048 8049 for (i = 0; i < 128; ++i) 8050 { 8051 if (i < cnt) 8052 { 8053 new_st.st_isw[i + 128] = (flags[i] & CF_WORD) != 0; 8054 new_st.st_isu[i + 128] = (flags[i] & CF_UPPER) != 0; 8055 } 8056 8057 if (*p != NUL) 8058 { 8059 #ifdef FEAT_MBYTE 8060 c = mb_ptr2char_adv(&p); 8061 #else 8062 c = *p++; 8063 #endif 8064 new_st.st_fold[i + 128] = c; 8065 if (i + 128 != c && new_st.st_isu[i + 128] && c < 256) 8066 new_st.st_upper[c] = i + 128; 8067 } 8068 } 8069 8070 (void)set_spell_finish(&new_st); 8071 } 8072 8073 static int 8074 set_spell_finish(new_st) 8075 spelltab_T *new_st; 8076 { 8077 int i; 8078 8079 if (did_set_spelltab) 8080 { 8081 /* check that it's the same table */ 8082 for (i = 0; i < 256; ++i) 8083 { 8084 if (spelltab.st_isw[i] != new_st->st_isw[i] 8085 || spelltab.st_isu[i] != new_st->st_isu[i] 8086 || spelltab.st_fold[i] != new_st->st_fold[i] 8087 || spelltab.st_upper[i] != new_st->st_upper[i]) 8088 { 8089 EMSG(_("E763: Word characters differ between spell files")); 8090 return FAIL; 8091 } 8092 } 8093 } 8094 else 8095 { 8096 /* copy the new spelltab into the one being used */ 8097 spelltab = *new_st; 8098 did_set_spelltab = TRUE; 8099 } 8100 8101 return OK; 8102 } 8103 8104 /* 8105 * Return TRUE if "p" points to a word character. 8106 * As a special case we see "midword" characters as word character when it is 8107 * followed by a word character. This finds they'there but not 'they there'. 8108 * Thus this only works properly when past the first character of the word. 8109 */ 8110 static int 8111 spell_iswordp(p, buf) 8112 char_u *p; 8113 buf_T *buf; /* buffer used */ 8114 { 8115 #ifdef FEAT_MBYTE 8116 char_u *s; 8117 int l; 8118 int c; 8119 8120 if (has_mbyte) 8121 { 8122 l = MB_BYTE2LEN(*p); 8123 s = p; 8124 if (l == 1) 8125 { 8126 /* be quick for ASCII */ 8127 if (buf->b_spell_ismw[*p]) 8128 { 8129 s = p + 1; /* skip a mid-word character */ 8130 l = MB_BYTE2LEN(*s); 8131 } 8132 } 8133 else 8134 { 8135 c = mb_ptr2char(p); 8136 if (c < 256 ? buf->b_spell_ismw[c] 8137 : (buf->b_spell_ismw_mb != NULL 8138 && vim_strchr(buf->b_spell_ismw_mb, c) != NULL)) 8139 { 8140 s = p + l; 8141 l = MB_BYTE2LEN(*s); 8142 } 8143 } 8144 8145 c = mb_ptr2char(s); 8146 if (c > 255) 8147 return mb_get_class(s) >= 2; 8148 return spelltab.st_isw[c]; 8149 } 8150 #endif 8151 8152 return spelltab.st_isw[buf->b_spell_ismw[*p] ? p[1] : p[0]]; 8153 } 8154 8155 /* 8156 * Return TRUE if "p" points to a word character. 8157 * Unlike spell_iswordp() this doesn't check for "midword" characters. 8158 */ 8159 static int 8160 spell_iswordp_nmw(p) 8161 char_u *p; 8162 { 8163 #ifdef FEAT_MBYTE 8164 int c; 8165 8166 if (has_mbyte) 8167 { 8168 c = mb_ptr2char(p); 8169 if (c > 255) 8170 return mb_get_class(p) >= 2; 8171 return spelltab.st_isw[c]; 8172 } 8173 #endif 8174 return spelltab.st_isw[*p]; 8175 } 8176 8177 #ifdef FEAT_MBYTE 8178 /* 8179 * Return TRUE if "p" points to a word character. 8180 * Wide version of spell_iswordp(). 8181 */ 8182 static int 8183 spell_iswordp_w(p, buf) 8184 int *p; 8185 buf_T *buf; 8186 { 8187 int *s; 8188 8189 if (*p < 256 ? buf->b_spell_ismw[*p] 8190 : (buf->b_spell_ismw_mb != NULL 8191 && vim_strchr(buf->b_spell_ismw_mb, *p) != NULL)) 8192 s = p + 1; 8193 else 8194 s = p; 8195 8196 if (*s > 255) 8197 { 8198 if (enc_utf8) 8199 return utf_class(*s) >= 2; 8200 if (enc_dbcs) 8201 return dbcs_class((unsigned)*s >> 8, *s & 0xff) >= 2; 8202 return 0; 8203 } 8204 return spelltab.st_isw[*s]; 8205 } 8206 #endif 8207 8208 /* 8209 * Write the table with prefix conditions to the .spl file. 8210 * When "fd" is NULL only count the length of what is written. 8211 */ 8212 static int 8213 write_spell_prefcond(fd, gap) 8214 FILE *fd; 8215 garray_T *gap; 8216 { 8217 int i; 8218 char_u *p; 8219 int len; 8220 int totlen; 8221 8222 if (fd != NULL) 8223 put_bytes(fd, (long_u)gap->ga_len, 2); /* <prefcondcnt> */ 8224 8225 totlen = 2 + gap->ga_len; /* length of <prefcondcnt> and <condlen> bytes */ 8226 8227 for (i = 0; i < gap->ga_len; ++i) 8228 { 8229 /* <prefcond> : <condlen> <condstr> */ 8230 p = ((char_u **)gap->ga_data)[i]; 8231 if (p != NULL) 8232 { 8233 len = STRLEN(p); 8234 if (fd != NULL) 8235 { 8236 fputc(len, fd); 8237 fwrite(p, (size_t)len, (size_t)1, fd); 8238 } 8239 totlen += len; 8240 } 8241 else if (fd != NULL) 8242 fputc(0, fd); 8243 } 8244 8245 return totlen; 8246 } 8247 8248 /* 8249 * Case-fold "str[len]" into "buf[buflen]". The result is NUL terminated. 8250 * Uses the character definitions from the .spl file. 8251 * When using a multi-byte 'encoding' the length may change! 8252 * Returns FAIL when something wrong. 8253 */ 8254 static int 8255 spell_casefold(str, len, buf, buflen) 8256 char_u *str; 8257 int len; 8258 char_u *buf; 8259 int buflen; 8260 { 8261 int i; 8262 8263 if (len >= buflen) 8264 { 8265 buf[0] = NUL; 8266 return FAIL; /* result will not fit */ 8267 } 8268 8269 #ifdef FEAT_MBYTE 8270 if (has_mbyte) 8271 { 8272 int outi = 0; 8273 char_u *p; 8274 int c; 8275 8276 /* Fold one character at a time. */ 8277 for (p = str; p < str + len; ) 8278 { 8279 if (outi + MB_MAXBYTES > buflen) 8280 { 8281 buf[outi] = NUL; 8282 return FAIL; 8283 } 8284 c = mb_cptr2char_adv(&p); 8285 outi += mb_char2bytes(SPELL_TOFOLD(c), buf + outi); 8286 } 8287 buf[outi] = NUL; 8288 } 8289 else 8290 #endif 8291 { 8292 /* Be quick for non-multibyte encodings. */ 8293 for (i = 0; i < len; ++i) 8294 buf[i] = spelltab.st_fold[str[i]]; 8295 buf[i] = NUL; 8296 } 8297 8298 return OK; 8299 } 8300 8301 #define SPS_BEST 1 8302 #define SPS_FAST 2 8303 #define SPS_DOUBLE 4 8304 8305 static int sps_flags = SPS_BEST; 8306 static int sps_limit = 9999; 8307 8308 /* 8309 * Check the 'spellsuggest' option. Return FAIL if it's wrong. 8310 * Sets "sps_flags" and "sps_limit". 8311 */ 8312 int 8313 spell_check_sps() 8314 { 8315 char_u *p; 8316 char_u *s; 8317 char_u buf[MAXPATHL]; 8318 int f; 8319 8320 sps_flags = 0; 8321 sps_limit = 9999; 8322 8323 for (p = p_sps; *p != NUL; ) 8324 { 8325 copy_option_part(&p, buf, MAXPATHL, ","); 8326 8327 f = 0; 8328 if (VIM_ISDIGIT(*buf)) 8329 { 8330 s = buf; 8331 sps_limit = getdigits(&s); 8332 if (*s != NUL && !VIM_ISDIGIT(*s)) 8333 f = -1; 8334 } 8335 else if (STRCMP(buf, "best") == 0) 8336 f = SPS_BEST; 8337 else if (STRCMP(buf, "fast") == 0) 8338 f = SPS_FAST; 8339 else if (STRCMP(buf, "double") == 0) 8340 f = SPS_DOUBLE; 8341 else if (STRNCMP(buf, "expr:", 5) != 0 8342 && STRNCMP(buf, "file:", 5) != 0) 8343 f = -1; 8344 8345 if (f == -1 || (sps_flags != 0 && f != 0)) 8346 { 8347 sps_flags = SPS_BEST; 8348 sps_limit = 9999; 8349 return FAIL; 8350 } 8351 if (f != 0) 8352 sps_flags = f; 8353 } 8354 8355 if (sps_flags == 0) 8356 sps_flags = SPS_BEST; 8357 8358 return OK; 8359 } 8360 8361 /* Remember what "z?" replaced. */ 8362 static char_u *repl_from = NULL; 8363 static char_u *repl_to = NULL; 8364 8365 /* 8366 * "z?": Find badly spelled word under or after the cursor. 8367 * Give suggestions for the properly spelled word. 8368 * When "count" is non-zero use that suggestion. 8369 */ 8370 void 8371 spell_suggest(count) 8372 int count; 8373 { 8374 char_u *line; 8375 pos_T prev_cursor = curwin->w_cursor; 8376 char_u wcopy[MAXWLEN + 2]; 8377 char_u *p; 8378 int i; 8379 int c; 8380 suginfo_T sug; 8381 suggest_T *stp; 8382 int mouse_used; 8383 int need_cap; 8384 int limit; 8385 int selected = count; 8386 8387 /* Find the start of the badly spelled word. */ 8388 if (spell_move_to(curwin, FORWARD, TRUE, TRUE, NULL) == 0 8389 || curwin->w_cursor.col > prev_cursor.col) 8390 { 8391 if (!curwin->w_p_spell || *curbuf->b_p_spl == NUL) 8392 return; 8393 8394 /* No bad word or it starts after the cursor: use the word under the 8395 * cursor. */ 8396 curwin->w_cursor = prev_cursor; 8397 line = ml_get_curline(); 8398 p = line + curwin->w_cursor.col; 8399 /* Backup to before start of word. */ 8400 while (p > line && spell_iswordp_nmw(p)) 8401 mb_ptr_back(line, p); 8402 /* Forward to start of word. */ 8403 while (*p != NUL && !spell_iswordp_nmw(p)) 8404 mb_ptr_adv(p); 8405 8406 if (!spell_iswordp_nmw(p)) /* No word found. */ 8407 { 8408 beep_flush(); 8409 return; 8410 } 8411 curwin->w_cursor.col = p - line; 8412 } 8413 8414 /* Get the word and its length. */ 8415 8416 /* Figure out if the word should be capitalised. */ 8417 need_cap = check_need_cap(curwin->w_cursor.lnum, curwin->w_cursor.col); 8418 8419 line = ml_get_curline(); 8420 8421 /* Get the list of suggestions. Limit to 'lines' - 2 or the number in 8422 * 'spellsuggest', whatever is smaller. */ 8423 if (sps_limit > (int)Rows - 2) 8424 limit = (int)Rows - 2; 8425 else 8426 limit = sps_limit; 8427 spell_find_suggest(line + curwin->w_cursor.col, &sug, limit, 8428 TRUE, need_cap); 8429 8430 if (sug.su_ga.ga_len == 0) 8431 MSG(_("Sorry, no suggestions")); 8432 else if (count > 0) 8433 { 8434 if (count > sug.su_ga.ga_len) 8435 smsg((char_u *)_("Sorry, only %ld suggestions"), 8436 (long)sug.su_ga.ga_len); 8437 } 8438 else 8439 { 8440 vim_free(repl_from); 8441 repl_from = NULL; 8442 vim_free(repl_to); 8443 repl_to = NULL; 8444 8445 #ifdef FEAT_RIGHTLEFT 8446 /* When 'rightleft' is set the list is drawn right-left. */ 8447 cmdmsg_rl = curwin->w_p_rl; 8448 if (cmdmsg_rl) 8449 msg_col = Columns - 1; 8450 #endif 8451 8452 /* List the suggestions. */ 8453 msg_start(); 8454 lines_left = Rows; /* avoid more prompt */ 8455 vim_snprintf((char *)IObuff, IOSIZE, _("Change \"%.*s\" to:"), 8456 sug.su_badlen, sug.su_badptr); 8457 #ifdef FEAT_RIGHTLEFT 8458 if (cmdmsg_rl && STRNCMP(IObuff, "Change", 6) == 0) 8459 { 8460 /* And now the rabbit from the high hat: Avoid showing the 8461 * untranslated message rightleft. */ 8462 vim_snprintf((char *)IObuff, IOSIZE, ":ot \"%.*s\" egnahC", 8463 sug.su_badlen, sug.su_badptr); 8464 } 8465 #endif 8466 msg_puts(IObuff); 8467 msg_clr_eos(); 8468 msg_putchar('\n'); 8469 8470 msg_scroll = TRUE; 8471 for (i = 0; i < sug.su_ga.ga_len; ++i) 8472 { 8473 stp = &SUG(sug.su_ga, i); 8474 8475 /* The suggested word may replace only part of the bad word, add 8476 * the not replaced part. */ 8477 STRCPY(wcopy, stp->st_word); 8478 if (sug.su_badlen > stp->st_orglen) 8479 vim_strncpy(wcopy + STRLEN(wcopy), 8480 sug.su_badptr + stp->st_orglen, 8481 sug.su_badlen - stp->st_orglen); 8482 vim_snprintf((char *)IObuff, IOSIZE, "%2d", i + 1); 8483 #ifdef FEAT_RIGHTLEFT 8484 if (cmdmsg_rl) 8485 rl_mirror(IObuff); 8486 #endif 8487 msg_puts(IObuff); 8488 8489 vim_snprintf((char *)IObuff, IOSIZE, " \"%s\"", wcopy); 8490 msg_puts(IObuff); 8491 8492 /* The word may replace more than "su_badlen". */ 8493 if (sug.su_badlen < stp->st_orglen) 8494 { 8495 vim_snprintf((char *)IObuff, IOSIZE, _(" < \"%.*s\""), 8496 stp->st_orglen, sug.su_badptr); 8497 msg_puts(IObuff); 8498 } 8499 8500 if (p_verbose > 0) 8501 { 8502 /* Add the score. */ 8503 if (sps_flags & (SPS_DOUBLE | SPS_BEST)) 8504 vim_snprintf((char *)IObuff, IOSIZE, " (%s%d - %d)", 8505 stp->st_salscore ? "s " : "", 8506 stp->st_score, stp->st_altscore); 8507 else 8508 vim_snprintf((char *)IObuff, IOSIZE, " (%d)", 8509 stp->st_score); 8510 #ifdef FEAT_RIGHTLEFT 8511 if (cmdmsg_rl) 8512 /* Mirror the numbers, but keep the leading space. */ 8513 rl_mirror(IObuff + 1); 8514 #endif 8515 msg_advance(30); 8516 msg_puts(IObuff); 8517 } 8518 msg_putchar('\n'); 8519 } 8520 8521 #ifdef FEAT_RIGHTLEFT 8522 cmdmsg_rl = FALSE; 8523 msg_col = 0; 8524 #endif 8525 /* Ask for choice. */ 8526 selected = prompt_for_number(&mouse_used); 8527 if (mouse_used) 8528 selected -= lines_left; 8529 } 8530 8531 if (selected > 0 && selected <= sug.su_ga.ga_len && u_save_cursor() == OK) 8532 { 8533 /* Save the from and to text for :spellrepall. */ 8534 stp = &SUG(sug.su_ga, selected - 1); 8535 repl_from = vim_strnsave(sug.su_badptr, stp->st_orglen); 8536 repl_to = vim_strsave(stp->st_word); 8537 8538 /* Replace the word. */ 8539 p = alloc(STRLEN(line) - stp->st_orglen + STRLEN(stp->st_word) + 1); 8540 if (p != NULL) 8541 { 8542 c = sug.su_badptr - line; 8543 mch_memmove(p, line, c); 8544 STRCPY(p + c, stp->st_word); 8545 STRCAT(p, sug.su_badptr + stp->st_orglen); 8546 ml_replace(curwin->w_cursor.lnum, p, FALSE); 8547 curwin->w_cursor.col = c; 8548 changed_bytes(curwin->w_cursor.lnum, c); 8549 8550 /* For redo we use a change-word command. */ 8551 ResetRedobuff(); 8552 AppendToRedobuff((char_u *)"ciw"); 8553 AppendToRedobuff(stp->st_word); 8554 AppendCharToRedobuff(ESC); 8555 } 8556 } 8557 else 8558 curwin->w_cursor = prev_cursor; 8559 8560 spell_find_cleanup(&sug); 8561 } 8562 8563 /* 8564 * Check if the word at line "lnum" column "col" is required to start with a 8565 * capital. This uses 'spellcapcheck' of the current buffer. 8566 */ 8567 static int 8568 check_need_cap(lnum, col) 8569 linenr_T lnum; 8570 colnr_T col; 8571 { 8572 int need_cap = FALSE; 8573 char_u *line; 8574 char_u *line_copy = NULL; 8575 char_u *p; 8576 colnr_T endcol; 8577 regmatch_T regmatch; 8578 8579 if (curbuf->b_cap_prog == NULL) 8580 return FALSE; 8581 8582 line = ml_get_curline(); 8583 endcol = 0; 8584 if ((int)(skipwhite(line) - line) >= (int)col) 8585 { 8586 /* At start of line, check if previous line is empty or sentence 8587 * ends there. */ 8588 if (lnum == 1) 8589 need_cap = TRUE; 8590 else 8591 { 8592 line = ml_get(lnum - 1); 8593 if (*skipwhite(line) == NUL) 8594 need_cap = TRUE; 8595 else 8596 { 8597 /* Append a space in place of the line break. */ 8598 line_copy = concat_str(line, (char_u *)" "); 8599 line = line_copy; 8600 endcol = STRLEN(line); 8601 } 8602 } 8603 } 8604 else 8605 endcol = col; 8606 8607 if (endcol > 0) 8608 { 8609 /* Check if sentence ends before the bad word. */ 8610 regmatch.regprog = curbuf->b_cap_prog; 8611 regmatch.rm_ic = FALSE; 8612 p = line + endcol; 8613 for (;;) 8614 { 8615 mb_ptr_back(line, p); 8616 if (p == line || spell_iswordp_nmw(p)) 8617 break; 8618 if (vim_regexec(®match, p, 0) 8619 && regmatch.endp[0] == line + endcol) 8620 { 8621 need_cap = TRUE; 8622 break; 8623 } 8624 } 8625 } 8626 8627 vim_free(line_copy); 8628 8629 return need_cap; 8630 } 8631 8632 8633 /* 8634 * ":spellrepall" 8635 */ 8636 /*ARGSUSED*/ 8637 void 8638 ex_spellrepall(eap) 8639 exarg_T *eap; 8640 { 8641 pos_T pos = curwin->w_cursor; 8642 char_u *frompat; 8643 int addlen; 8644 char_u *line; 8645 char_u *p; 8646 int save_ws = p_ws; 8647 linenr_T prev_lnum = 0; 8648 8649 if (repl_from == NULL || repl_to == NULL) 8650 { 8651 EMSG(_("E752: No previous spell replacement")); 8652 return; 8653 } 8654 addlen = STRLEN(repl_to) - STRLEN(repl_from); 8655 8656 frompat = alloc(STRLEN(repl_from) + 7); 8657 if (frompat == NULL) 8658 return; 8659 sprintf((char *)frompat, "\\V\\<%s\\>", repl_from); 8660 p_ws = FALSE; 8661 8662 sub_nsubs = 0; 8663 sub_nlines = 0; 8664 curwin->w_cursor.lnum = 0; 8665 while (!got_int) 8666 { 8667 if (do_search(NULL, '/', frompat, 1L, SEARCH_KEEP) == 0 8668 || u_save_cursor() == FAIL) 8669 break; 8670 8671 /* Only replace when the right word isn't there yet. This happens 8672 * when changing "etc" to "etc.". */ 8673 line = ml_get_curline(); 8674 if (addlen <= 0 || STRNCMP(line + curwin->w_cursor.col, 8675 repl_to, STRLEN(repl_to)) != 0) 8676 { 8677 p = alloc(STRLEN(line) + addlen + 1); 8678 if (p == NULL) 8679 break; 8680 mch_memmove(p, line, curwin->w_cursor.col); 8681 STRCPY(p + curwin->w_cursor.col, repl_to); 8682 STRCAT(p, line + curwin->w_cursor.col + STRLEN(repl_from)); 8683 ml_replace(curwin->w_cursor.lnum, p, FALSE); 8684 changed_bytes(curwin->w_cursor.lnum, curwin->w_cursor.col); 8685 8686 if (curwin->w_cursor.lnum != prev_lnum) 8687 { 8688 ++sub_nlines; 8689 prev_lnum = curwin->w_cursor.lnum; 8690 } 8691 ++sub_nsubs; 8692 } 8693 curwin->w_cursor.col += STRLEN(repl_to); 8694 } 8695 8696 p_ws = save_ws; 8697 curwin->w_cursor = pos; 8698 vim_free(frompat); 8699 8700 if (sub_nsubs == 0) 8701 EMSG2(_("E753: Not found: %s"), repl_from); 8702 else 8703 do_sub_msg(FALSE); 8704 } 8705 8706 /* 8707 * Find spell suggestions for "word". Return them in the growarray "*gap" as 8708 * a list of allocated strings. 8709 */ 8710 void 8711 spell_suggest_list(gap, word, maxcount, need_cap) 8712 garray_T *gap; 8713 char_u *word; 8714 int maxcount; /* maximum nr of suggestions */ 8715 int need_cap; /* 'spellcapcheck' matched */ 8716 { 8717 suginfo_T sug; 8718 int i; 8719 suggest_T *stp; 8720 char_u *wcopy; 8721 8722 spell_find_suggest(word, &sug, maxcount, FALSE, need_cap); 8723 8724 /* Make room in "gap". */ 8725 ga_init2(gap, sizeof(char_u *), sug.su_ga.ga_len + 1); 8726 if (ga_grow(gap, sug.su_ga.ga_len) == FAIL) 8727 return; 8728 8729 for (i = 0; i < sug.su_ga.ga_len; ++i) 8730 { 8731 stp = &SUG(sug.su_ga, i); 8732 8733 /* The suggested word may replace only part of "word", add the not 8734 * replaced part. */ 8735 wcopy = alloc(STRLEN(stp->st_word) 8736 + STRLEN(sug.su_badptr + stp->st_orglen) + 1); 8737 if (wcopy == NULL) 8738 break; 8739 STRCPY(wcopy, stp->st_word); 8740 STRCAT(wcopy, sug.su_badptr + stp->st_orglen); 8741 ((char_u **)gap->ga_data)[gap->ga_len++] = wcopy; 8742 } 8743 8744 spell_find_cleanup(&sug); 8745 } 8746 8747 /* 8748 * Find spell suggestions for the word at the start of "badptr". 8749 * Return the suggestions in "su->su_ga". 8750 * The maximum number of suggestions is "maxcount". 8751 * Note: does use info for the current window. 8752 * This is based on the mechanisms of Aspell, but completely reimplemented. 8753 */ 8754 static void 8755 spell_find_suggest(badptr, su, maxcount, banbadword, need_cap) 8756 char_u *badptr; 8757 suginfo_T *su; 8758 int maxcount; 8759 int banbadword; /* don't include badword in suggestions */ 8760 int need_cap; /* word should start with capital */ 8761 { 8762 int attr = 0; 8763 char_u buf[MAXPATHL]; 8764 char_u *p; 8765 int do_combine = FALSE; 8766 char_u *sps_copy; 8767 #ifdef FEAT_EVAL 8768 static int expr_busy = FALSE; 8769 #endif 8770 int c; 8771 int i; 8772 langp_T *lp; 8773 8774 /* 8775 * Set the info in "*su". 8776 */ 8777 vim_memset(su, 0, sizeof(suginfo_T)); 8778 ga_init2(&su->su_ga, (int)sizeof(suggest_T), 10); 8779 ga_init2(&su->su_sga, (int)sizeof(suggest_T), 10); 8780 if (*badptr == NUL) 8781 return; 8782 hash_init(&su->su_banned); 8783 8784 su->su_badptr = badptr; 8785 su->su_badlen = spell_check(curwin, su->su_badptr, &attr, NULL); 8786 su->su_maxcount = maxcount; 8787 su->su_maxscore = SCORE_MAXINIT; 8788 8789 if (su->su_badlen >= MAXWLEN) 8790 su->su_badlen = MAXWLEN - 1; /* just in case */ 8791 vim_strncpy(su->su_badword, su->su_badptr, su->su_badlen); 8792 (void)spell_casefold(su->su_badptr, su->su_badlen, 8793 su->su_fbadword, MAXWLEN); 8794 /* get caps flags for bad word */ 8795 su->su_badflags = badword_captype(su->su_badptr, 8796 su->su_badptr + su->su_badlen); 8797 if (need_cap) 8798 su->su_badflags |= WF_ONECAP; 8799 8800 /* Find the default language for sound folding. We simply use the first 8801 * one in 'spelllang' that supports sound folding. That's good for when 8802 * using multiple files for one language, it's not that bad when mixing 8803 * languages (e.g., "pl,en"). */ 8804 for (i = 0; i < curbuf->b_langp.ga_len; ++i) 8805 { 8806 lp = LANGP_ENTRY(curbuf->b_langp, i); 8807 if (lp->lp_sallang != NULL) 8808 { 8809 su->su_sallang = lp->lp_sallang; 8810 break; 8811 } 8812 } 8813 8814 /* If the word is not capitalised and spell_check() doesn't consider the 8815 * word to be bad then it might need to be capitalised. Add a suggestion 8816 * for that. */ 8817 c = PTR2CHAR(su->su_badptr); 8818 if (!SPELL_ISUPPER(c) && attr == 0) 8819 { 8820 make_case_word(su->su_badword, buf, WF_ONECAP); 8821 add_suggestion(su, &su->su_ga, buf, su->su_badlen, SCORE_ICASE, 8822 0, TRUE, su->su_sallang); 8823 } 8824 8825 /* Ban the bad word itself. It may appear in another region. */ 8826 if (banbadword) 8827 add_banned(su, su->su_badword); 8828 8829 /* Make a copy of 'spellsuggest', because the expression may change it. */ 8830 sps_copy = vim_strsave(p_sps); 8831 if (sps_copy == NULL) 8832 return; 8833 8834 /* Loop over the items in 'spellsuggest'. */ 8835 for (p = sps_copy; *p != NUL; ) 8836 { 8837 copy_option_part(&p, buf, MAXPATHL, ","); 8838 8839 if (STRNCMP(buf, "expr:", 5) == 0) 8840 { 8841 #ifdef FEAT_EVAL 8842 /* Evaluate an expression. Skip this when called recursively, 8843 * when using spellsuggest() in the expression. */ 8844 if (!expr_busy) 8845 { 8846 expr_busy = TRUE; 8847 spell_suggest_expr(su, buf + 5); 8848 expr_busy = FALSE; 8849 } 8850 #endif 8851 } 8852 else if (STRNCMP(buf, "file:", 5) == 0) 8853 /* Use list of suggestions in a file. */ 8854 spell_suggest_file(su, buf + 5); 8855 else 8856 { 8857 /* Use internal method. */ 8858 spell_suggest_intern(su); 8859 if (sps_flags & SPS_DOUBLE) 8860 do_combine = TRUE; 8861 } 8862 } 8863 8864 vim_free(sps_copy); 8865 8866 if (do_combine) 8867 /* Combine the two list of suggestions. This must be done last, 8868 * because sorting changes the order again. */ 8869 score_combine(su); 8870 } 8871 8872 #ifdef FEAT_EVAL 8873 /* 8874 * Find suggestions by evaluating expression "expr". 8875 */ 8876 static void 8877 spell_suggest_expr(su, expr) 8878 suginfo_T *su; 8879 char_u *expr; 8880 { 8881 list_T *list; 8882 listitem_T *li; 8883 int score; 8884 char_u *p; 8885 8886 /* The work is split up in a few parts to avoid having to export 8887 * suginfo_T. 8888 * First evaluate the expression and get the resulting list. */ 8889 list = eval_spell_expr(su->su_badword, expr); 8890 if (list != NULL) 8891 { 8892 /* Loop over the items in the list. */ 8893 for (li = list->lv_first; li != NULL; li = li->li_next) 8894 if (li->li_tv.v_type == VAR_LIST) 8895 { 8896 /* Get the word and the score from the items. */ 8897 score = get_spellword(li->li_tv.vval.v_list, &p); 8898 if (score >= 0) 8899 add_suggestion(su, &su->su_ga, p, 8900 su->su_badlen, score, 0, TRUE, su->su_sallang); 8901 } 8902 list_unref(list); 8903 } 8904 8905 /* Sort the suggestions and truncate at "maxcount". */ 8906 (void)cleanup_suggestions(&su->su_ga, su->su_maxscore, su->su_maxcount); 8907 } 8908 #endif 8909 8910 /* 8911 * Find suggestions in file "fname". Used for "file:" in 'spellsuggest'. 8912 */ 8913 static void 8914 spell_suggest_file(su, fname) 8915 suginfo_T *su; 8916 char_u *fname; 8917 { 8918 FILE *fd; 8919 char_u line[MAXWLEN * 2]; 8920 char_u *p; 8921 int len; 8922 char_u cword[MAXWLEN]; 8923 8924 /* Open the file. */ 8925 fd = mch_fopen((char *)fname, "r"); 8926 if (fd == NULL) 8927 { 8928 EMSG2(_(e_notopen), fname); 8929 return; 8930 } 8931 8932 /* Read it line by line. */ 8933 while (!vim_fgets(line, MAXWLEN * 2, fd) && !got_int) 8934 { 8935 line_breakcheck(); 8936 8937 p = vim_strchr(line, '/'); 8938 if (p == NULL) 8939 continue; /* No Tab found, just skip the line. */ 8940 *p++ = NUL; 8941 if (STRICMP(su->su_badword, line) == 0) 8942 { 8943 /* Match! Isolate the good word, until CR or NL. */ 8944 for (len = 0; p[len] >= ' '; ++len) 8945 ; 8946 p[len] = NUL; 8947 8948 /* If the suggestion doesn't have specific case duplicate the case 8949 * of the bad word. */ 8950 if (captype(p, NULL) == 0) 8951 { 8952 make_case_word(p, cword, su->su_badflags); 8953 p = cword; 8954 } 8955 8956 add_suggestion(su, &su->su_ga, p, su->su_badlen, 8957 SCORE_FILE, 0, TRUE, su->su_sallang); 8958 } 8959 } 8960 8961 fclose(fd); 8962 8963 /* Sort the suggestions and truncate at "maxcount". */ 8964 (void)cleanup_suggestions(&su->su_ga, su->su_maxscore, su->su_maxcount); 8965 } 8966 8967 /* 8968 * Find suggestions for the internal method indicated by "sps_flags". 8969 */ 8970 static void 8971 spell_suggest_intern(su) 8972 suginfo_T *su; 8973 { 8974 /* 8975 * 1. Try special cases, such as repeating a word: "the the" -> "the". 8976 * 8977 * Set a maximum score to limit the combination of operations that is 8978 * tried. 8979 */ 8980 suggest_try_special(su); 8981 8982 /* 8983 * 2. Try inserting/deleting/swapping/changing a letter, use REP entries 8984 * from the .aff file and inserting a space (split the word). 8985 */ 8986 suggest_try_change(su); 8987 8988 /* For the resulting top-scorers compute the sound-a-like score. */ 8989 if (sps_flags & SPS_DOUBLE) 8990 score_comp_sal(su); 8991 8992 /* 8993 * 3. Try finding sound-a-like words. 8994 * 8995 * Only do this when we don't have a lot of suggestions yet, because it's 8996 * very slow and often doesn't find new suggestions. 8997 */ 8998 if ((sps_flags & SPS_DOUBLE) 8999 || (!(sps_flags & SPS_FAST) 9000 && su->su_ga.ga_len < SUG_CLEAN_COUNT(su))) 9001 { 9002 /* Allow a higher score now. */ 9003 su->su_maxscore = SCORE_MAXMAX; 9004 suggest_try_soundalike(su); 9005 } 9006 9007 /* When CTRL-C was hit while searching do show the results. */ 9008 ui_breakcheck(); 9009 if (got_int) 9010 { 9011 (void)vgetc(); 9012 got_int = FALSE; 9013 } 9014 9015 if ((sps_flags & SPS_DOUBLE) == 0 && su->su_ga.ga_len != 0) 9016 { 9017 if (sps_flags & SPS_BEST) 9018 /* Adjust the word score for how it sounds like. */ 9019 rescore_suggestions(su); 9020 9021 /* Sort the suggestions and truncate at "maxcount". */ 9022 (void)cleanup_suggestions(&su->su_ga, su->su_maxscore, su->su_maxcount); 9023 } 9024 } 9025 9026 /* 9027 * Free the info put in "*su" by spell_find_suggest(). 9028 */ 9029 static void 9030 spell_find_cleanup(su) 9031 suginfo_T *su; 9032 { 9033 int i; 9034 9035 /* Free the suggestions. */ 9036 for (i = 0; i < su->su_ga.ga_len; ++i) 9037 vim_free(SUG(su->su_ga, i).st_word); 9038 ga_clear(&su->su_ga); 9039 for (i = 0; i < su->su_sga.ga_len; ++i) 9040 vim_free(SUG(su->su_sga, i).st_word); 9041 ga_clear(&su->su_sga); 9042 9043 /* Free the banned words. */ 9044 free_banned(su); 9045 } 9046 9047 /* 9048 * Make a copy of "word", with the first letter upper or lower cased, to 9049 * "wcopy[MAXWLEN]". "word" must not be empty. 9050 * The result is NUL terminated. 9051 */ 9052 static void 9053 onecap_copy(word, wcopy, upper) 9054 char_u *word; 9055 char_u *wcopy; 9056 int upper; /* TRUE: first letter made upper case */ 9057 { 9058 char_u *p; 9059 int c; 9060 int l; 9061 9062 p = word; 9063 #ifdef FEAT_MBYTE 9064 if (has_mbyte) 9065 c = mb_cptr2char_adv(&p); 9066 else 9067 #endif 9068 c = *p++; 9069 if (upper) 9070 c = SPELL_TOUPPER(c); 9071 else 9072 c = SPELL_TOFOLD(c); 9073 #ifdef FEAT_MBYTE 9074 if (has_mbyte) 9075 l = mb_char2bytes(c, wcopy); 9076 else 9077 #endif 9078 { 9079 l = 1; 9080 wcopy[0] = c; 9081 } 9082 vim_strncpy(wcopy + l, p, MAXWLEN - l - 1); 9083 } 9084 9085 /* 9086 * Make a copy of "word" with all the letters upper cased into 9087 * "wcopy[MAXWLEN]". The result is NUL terminated. 9088 */ 9089 static void 9090 allcap_copy(word, wcopy) 9091 char_u *word; 9092 char_u *wcopy; 9093 { 9094 char_u *s; 9095 char_u *d; 9096 int c; 9097 9098 d = wcopy; 9099 for (s = word; *s != NUL; ) 9100 { 9101 #ifdef FEAT_MBYTE 9102 if (has_mbyte) 9103 c = mb_cptr2char_adv(&s); 9104 else 9105 #endif 9106 c = *s++; 9107 9108 #ifdef FEAT_MBYTE 9109 /* We only change � to SS when we are certain latin1 is used. It 9110 * would cause weird errors in other 8-bit encodings. */ 9111 if (enc_latin1like && c == 0xdf) 9112 { 9113 c = 'S'; 9114 if (d - wcopy >= MAXWLEN - 1) 9115 break; 9116 *d++ = c; 9117 } 9118 else 9119 #endif 9120 c = SPELL_TOUPPER(c); 9121 9122 #ifdef FEAT_MBYTE 9123 if (has_mbyte) 9124 { 9125 if (d - wcopy >= MAXWLEN - MB_MAXBYTES) 9126 break; 9127 d += mb_char2bytes(c, d); 9128 } 9129 else 9130 #endif 9131 { 9132 if (d - wcopy >= MAXWLEN - 1) 9133 break; 9134 *d++ = c; 9135 } 9136 } 9137 *d = NUL; 9138 } 9139 9140 /* 9141 * Try finding suggestions by recognizing specific situations. 9142 */ 9143 static void 9144 suggest_try_special(su) 9145 suginfo_T *su; 9146 { 9147 char_u *p; 9148 size_t len; 9149 int c; 9150 char_u word[MAXWLEN]; 9151 9152 /* 9153 * Recognize a word that is repeated: "the the". 9154 */ 9155 p = skiptowhite(su->su_fbadword); 9156 len = p - su->su_fbadword; 9157 p = skipwhite(p); 9158 if (STRLEN(p) == len && STRNCMP(su->su_fbadword, p, len) == 0) 9159 { 9160 /* Include badflags: if the badword is onecap or allcap 9161 * use that for the goodword too: "The the" -> "The". */ 9162 c = su->su_fbadword[len]; 9163 su->su_fbadword[len] = NUL; 9164 make_case_word(su->su_fbadword, word, su->su_badflags); 9165 su->su_fbadword[len] = c; 9166 add_suggestion(su, &su->su_ga, word, su->su_badlen, SCORE_DEL, 9167 0, TRUE, su->su_sallang); 9168 } 9169 } 9170 9171 /* 9172 * Try finding suggestions by adding/removing/swapping letters. 9173 * 9174 * This uses a state machine. At each node in the tree we try various 9175 * operations. When trying if an operation work "depth" is increased and the 9176 * stack[] is used to store info. This allows combinations, thus insert one 9177 * character, replace one and delete another. The number of changes is 9178 * limited by su->su_maxscore, checked in try_deeper(). 9179 * 9180 * After implementing this I noticed an article by Kemal Oflazer that 9181 * describes something similar: "Error-tolerant Finite State Recognition with 9182 * Applications to Morphological Analysis and Spelling Correction" (1996). 9183 * The implementation in the article is simplified and requires a stack of 9184 * unknown depth. The implementation here only needs a stack depth of the 9185 * length of the word. 9186 */ 9187 static void 9188 suggest_try_change(su) 9189 suginfo_T *su; 9190 { 9191 char_u fword[MAXWLEN]; /* copy of the bad word, case-folded */ 9192 char_u tword[MAXWLEN]; /* good word collected so far */ 9193 trystate_T stack[MAXWLEN]; 9194 char_u preword[MAXWLEN * 3]; /* word found with proper case; 9195 * concatanation of prefix compound 9196 * words and split word. NUL terminated 9197 * when going deeper but not when coming 9198 * back. */ 9199 char_u compflags[MAXWLEN]; /* compound flags, one for each word */ 9200 trystate_T *sp; 9201 int newscore; 9202 langp_T *lp; 9203 char_u *byts, *fbyts, *pbyts; 9204 idx_T *idxs, *fidxs, *pidxs; 9205 int depth; 9206 int c, c2, c3; 9207 int n; 9208 int flags; 9209 garray_T *gap; 9210 idx_T arridx; 9211 int len; 9212 char_u *p; 9213 fromto_T *ftp; 9214 int fl = 0, tl; 9215 int repextra = 0; /* extra bytes in fword[] from REP item */ 9216 slang_T *slang; 9217 int fword_ends; 9218 int lpi; 9219 9220 /* We make a copy of the case-folded bad word, so that we can modify it 9221 * to find matches (esp. REP items). Append some more text, changing 9222 * chars after the bad word may help. */ 9223 STRCPY(fword, su->su_fbadword); 9224 n = STRLEN(fword); 9225 p = su->su_badptr + su->su_badlen; 9226 (void)spell_casefold(p, STRLEN(p), fword + n, MAXWLEN - n); 9227 9228 for (lpi = 0; lpi < curbuf->b_langp.ga_len; ++lpi) 9229 { 9230 lp = LANGP_ENTRY(curbuf->b_langp, lpi); 9231 slang = lp->lp_slang; 9232 9233 /* If reloading a spell file fails it's still in the list but 9234 * everything has been cleared. */ 9235 if (slang->sl_fbyts == NULL) 9236 continue; 9237 9238 /* 9239 * Go through the whole case-fold tree, try changes at each node. 9240 * "tword[]" contains the word collected from nodes in the tree. 9241 * "fword[]" the word we are trying to match with (initially the bad 9242 * word). 9243 */ 9244 depth = 0; 9245 sp = &stack[0]; 9246 vim_memset(sp, 0, sizeof(trystate_T)); 9247 sp->ts_curi = 1; 9248 9249 /* 9250 * When there are postponed prefixes we need to use these first. At 9251 * the end of the prefix we continue in the case-fold tree. 9252 */ 9253 fbyts = slang->sl_fbyts; 9254 fidxs = slang->sl_fidxs; 9255 pbyts = slang->sl_pbyts; 9256 pidxs = slang->sl_pidxs; 9257 if (pbyts != NULL) 9258 { 9259 byts = pbyts; 9260 idxs = pidxs; 9261 sp->ts_prefixdepth = PFD_PREFIXTREE; 9262 sp->ts_state = STATE_NOPREFIX; /* try without prefix first */ 9263 } 9264 else 9265 { 9266 byts = fbyts; 9267 idxs = fidxs; 9268 sp->ts_prefixdepth = PFD_NOPREFIX; 9269 sp->ts_state = STATE_START; 9270 } 9271 9272 /* 9273 * Loop to find all suggestions. At each round we either: 9274 * - For the current state try one operation, advance "ts_curi", 9275 * increase "depth". 9276 * - When a state is done go to the next, set "ts_state". 9277 * - When all states are tried decrease "depth". 9278 */ 9279 while (depth >= 0 && !got_int) 9280 { 9281 sp = &stack[depth]; 9282 switch (sp->ts_state) 9283 { 9284 case STATE_START: 9285 case STATE_NOPREFIX: 9286 /* 9287 * Start of node: Deal with NUL bytes, which means 9288 * tword[] may end here. 9289 */ 9290 arridx = sp->ts_arridx; /* current node in the tree */ 9291 len = byts[arridx]; /* bytes in this node */ 9292 arridx += sp->ts_curi; /* index of current byte */ 9293 9294 if (sp->ts_prefixdepth == PFD_PREFIXTREE) 9295 { 9296 /* Skip over the NUL bytes, we use them later. */ 9297 for (n = 0; n < len && byts[arridx + n] == 0; ++n) 9298 ; 9299 sp->ts_curi += n; 9300 9301 /* Always past NUL bytes now. */ 9302 n = (int)sp->ts_state; 9303 sp->ts_state = STATE_ENDNUL; 9304 sp->ts_save_badflags = su->su_badflags; 9305 9306 /* At end of a prefix or at start of prefixtree: check for 9307 * following word. */ 9308 if (byts[arridx] == 0 || n == (int)STATE_NOPREFIX) 9309 { 9310 /* Set su->su_badflags to the caps type at this 9311 * position. Use the caps type until here for the 9312 * prefix itself. */ 9313 #ifdef FEAT_MBYTE 9314 if (has_mbyte) 9315 n = nofold_len(fword, sp->ts_fidx, su->su_badptr); 9316 else 9317 #endif 9318 n = sp->ts_fidx; 9319 flags = badword_captype(su->su_badptr, 9320 su->su_badptr + n); 9321 su->su_badflags = badword_captype(su->su_badptr + n, 9322 su->su_badptr + su->su_badlen); 9323 ++depth; 9324 stack[depth] = stack[depth - 1]; 9325 sp = &stack[depth]; 9326 sp->ts_prefixdepth = depth - 1; 9327 byts = fbyts; 9328 idxs = fidxs; 9329 sp->ts_state = STATE_START; 9330 sp->ts_curi = 1; /* start just after length byte */ 9331 sp->ts_arridx = 0; 9332 9333 /* Move the prefix to preword[] with the right case 9334 * and make find_keepcap_word() works. */ 9335 tword[sp->ts_twordlen] = NUL; 9336 make_case_word(tword + sp->ts_splitoff, 9337 preword + sp->ts_prewordlen, 9338 flags); 9339 sp->ts_prewordlen = STRLEN(preword); 9340 sp->ts_splitoff = sp->ts_twordlen; 9341 } 9342 break; 9343 } 9344 9345 if (sp->ts_curi > len || byts[arridx] != 0) 9346 { 9347 /* Past bytes in node and/or past NUL bytes. */ 9348 sp->ts_state = STATE_ENDNUL; 9349 sp->ts_save_badflags = su->su_badflags; 9350 break; 9351 } 9352 9353 /* 9354 * End of word in tree. 9355 */ 9356 ++sp->ts_curi; /* eat one NUL byte */ 9357 9358 flags = (int)idxs[arridx]; 9359 fword_ends = (fword[sp->ts_fidx] == NUL 9360 || !spell_iswordp(fword + sp->ts_fidx, curbuf)); 9361 tword[sp->ts_twordlen] = NUL; 9362 9363 if (sp->ts_prefixdepth <= PFD_NOTSPECIAL 9364 && (sp->ts_flags & TSF_PREFIXOK) == 0) 9365 { 9366 /* There was a prefix before the word. Check that the 9367 * prefix can be used with this word. */ 9368 /* Count the length of the NULs in the prefix. If there 9369 * are none this must be the first try without a prefix. 9370 */ 9371 n = stack[sp->ts_prefixdepth].ts_arridx; 9372 len = pbyts[n++]; 9373 for (c = 0; c < len && pbyts[n + c] == 0; ++c) 9374 ; 9375 if (c > 0) 9376 { 9377 c = valid_word_prefix(c, n, flags, 9378 tword + sp->ts_splitoff, slang, FALSE); 9379 if (c == 0) 9380 break; 9381 9382 /* Use the WF_RARE flag for a rare prefix. */ 9383 if (c & WF_RAREPFX) 9384 flags |= WF_RARE; 9385 9386 /* Tricky: when checking for both prefix and 9387 * compounding we run into the prefix flag first. 9388 * Remember that it's OK, so that we accept the prefix 9389 * when arriving at a compound flag. */ 9390 sp->ts_flags |= TSF_PREFIXOK; 9391 } 9392 } 9393 9394 /* Check NEEDCOMPOUND: can't use word without compounding. */ 9395 if (sp->ts_complen == sp->ts_compsplit && fword_ends 9396 && (flags & WF_NEEDCOMP)) 9397 break; 9398 9399 if (sp->ts_complen > sp->ts_compsplit) 9400 { 9401 if (slang->sl_nobreak) 9402 { 9403 /* There was a word before this word. When there was 9404 * no change in this word (it was correct) add the 9405 * first word as a suggestion. If this word was 9406 * corrected too, we need to check if a correct word 9407 * follows. */ 9408 if (sp->ts_fidx - sp->ts_splitfidx 9409 == sp->ts_twordlen - sp->ts_splitoff 9410 && STRNCMP(fword + sp->ts_splitfidx, 9411 tword + sp->ts_splitoff, 9412 sp->ts_fidx - sp->ts_splitfidx) == 0) 9413 { 9414 preword[sp->ts_prewordlen] = NUL; 9415 add_suggestion(su, &su->su_ga, preword, 9416 sp->ts_splitfidx - repextra, 9417 sp->ts_score, 0, FALSE, 9418 lp->lp_sallang); 9419 break; 9420 } 9421 } 9422 else 9423 { 9424 /* There was a compound word before this word. If 9425 * this word does not support compounding then give up 9426 * (splitting is tried for the word without compound 9427 * flag). */ 9428 if (((unsigned)flags >> 24) == 0 9429 || sp->ts_twordlen - sp->ts_splitoff 9430 < slang->sl_compminlen) 9431 break; 9432 #ifdef FEAT_MBYTE 9433 /* For multi-byte chars check character length against 9434 * COMPOUNDMIN. */ 9435 if (has_mbyte 9436 && slang->sl_compminlen > 0 9437 && mb_charlen(tword + sp->ts_splitoff) 9438 < slang->sl_compminlen) 9439 break; 9440 #endif 9441 9442 compflags[sp->ts_complen] = ((unsigned)flags >> 24); 9443 compflags[sp->ts_complen + 1] = NUL; 9444 vim_strncpy(preword + sp->ts_prewordlen, 9445 tword + sp->ts_splitoff, 9446 sp->ts_twordlen - sp->ts_splitoff); 9447 p = preword; 9448 while (*skiptowhite(p) != NUL) 9449 p = skipwhite(skiptowhite(p)); 9450 if (fword_ends && !can_compound(slang, p, 9451 compflags + sp->ts_compsplit)) 9452 break; 9453 9454 /* Get pointer to last char of previous word. */ 9455 p = preword + sp->ts_prewordlen; 9456 mb_ptr_back(preword, p); 9457 } 9458 } 9459 else 9460 p = NULL; 9461 9462 /* 9463 * Form the word with proper case in preword. 9464 * If there is a word from a previous split, append. 9465 */ 9466 if (flags & WF_KEEPCAP) 9467 /* Must find the word in the keep-case tree. */ 9468 find_keepcap_word(slang, tword + sp->ts_splitoff, 9469 preword + sp->ts_prewordlen); 9470 else 9471 { 9472 /* Include badflags: if the badword is onecap or allcap 9473 * use that for the goodword too. But if the badword is 9474 * allcap and it's only one char long use onecap. */ 9475 c = su->su_badflags; 9476 if ((c & WF_ALLCAP) 9477 #ifdef FEAT_MBYTE 9478 && su->su_badlen == (*mb_ptr2len)(su->su_badptr) 9479 #else 9480 && su->su_badlen == 1 9481 #endif 9482 ) 9483 c = WF_ONECAP; 9484 c |= flags; 9485 9486 /* When appending a compound word after a word character 9487 * don't use Onecap. */ 9488 if (p != NULL && spell_iswordp_nmw(p)) 9489 c &= ~WF_ONECAP; 9490 make_case_word(tword + sp->ts_splitoff, 9491 preword + sp->ts_prewordlen, c); 9492 } 9493 9494 /* Don't use a banned word. It may appear again as a good 9495 * word, thus remember it. */ 9496 if (flags & WF_BANNED) 9497 { 9498 add_banned(su, preword + sp->ts_prewordlen); 9499 break; 9500 } 9501 if (was_banned(su, preword + sp->ts_prewordlen) 9502 || was_banned(su, preword)) 9503 break; 9504 9505 newscore = 0; 9506 if ((flags & WF_REGION) 9507 && (((unsigned)flags >> 16) & lp->lp_region) == 0) 9508 newscore += SCORE_REGION; 9509 if (flags & WF_RARE) 9510 newscore += SCORE_RARE; 9511 9512 if (!spell_valid_case(su->su_badflags, 9513 captype(preword + sp->ts_prewordlen, NULL))) 9514 newscore += SCORE_ICASE; 9515 9516 if (fword_ends && sp->ts_fidx >= sp->ts_fidxtry) 9517 { 9518 /* The badword also ends: add suggestions. Give a penalty 9519 * when changing non-word char to word char, e.g., "thes," 9520 * -> "these". */ 9521 p = fword + sp->ts_fidx; 9522 #ifdef FEAT_MBYTE 9523 if (has_mbyte) 9524 mb_ptr_back(fword, p); 9525 else 9526 #endif 9527 --p; 9528 if (!spell_iswordp(p, curbuf)) 9529 { 9530 p = preword + STRLEN(preword); 9531 #ifdef FEAT_MBYTE 9532 if (has_mbyte) 9533 mb_ptr_back(preword, p); 9534 else 9535 #endif 9536 --p; 9537 if (spell_iswordp(p, curbuf)) 9538 newscore += SCORE_NONWORD; 9539 } 9540 9541 add_suggestion(su, &su->su_ga, preword, 9542 sp->ts_fidx - repextra, 9543 sp->ts_score + newscore, 0, FALSE, 9544 lp->lp_sallang); 9545 } 9546 else if ((sp->ts_fidx >= sp->ts_fidxtry || fword_ends) 9547 #ifdef FEAT_MBYTE 9548 /* Don't split halfway a character. */ 9549 && (!has_mbyte || sp->ts_tcharlen == 0) 9550 #endif 9551 ) 9552 { 9553 int try_compound; 9554 9555 /* Get here in two situations: 9556 * 1. The word in the tree ends but the badword continues: 9557 * If the word allows compounding try that. Otherwise 9558 * try a split by inserting a space. For both check 9559 * that a valid words starts at fword[sp->ts_fidx]. 9560 * For NOBREAK do like compounding to be able to check 9561 * if the next word is valid. 9562 * 2. The badword does end, but it was due to a change 9563 * (e.g., a swap). No need to split, but do check that 9564 * the following word is valid. 9565 */ 9566 try_compound = FALSE; 9567 if (!fword_ends 9568 && slang->sl_compprog != NULL 9569 && ((unsigned)flags >> 24) != 0 9570 && sp->ts_twordlen - sp->ts_splitoff 9571 >= slang->sl_compminlen 9572 #ifdef FEAT_MBYTE 9573 && (!has_mbyte 9574 || slang->sl_compminlen == 0 9575 || mb_charlen(tword + sp->ts_splitoff) 9576 >= slang->sl_compminlen) 9577 #endif 9578 && (slang->sl_compsylmax < MAXWLEN 9579 || sp->ts_complen + 1 - sp->ts_compsplit 9580 < slang->sl_compmax) 9581 && (byte_in_str(sp->ts_complen == sp->ts_compsplit 9582 ? slang->sl_compstartflags 9583 : slang->sl_compallflags, 9584 ((unsigned)flags >> 24)))) 9585 { 9586 try_compound = TRUE; 9587 compflags[sp->ts_complen] = ((unsigned)flags >> 24); 9588 compflags[sp->ts_complen + 1] = NUL; 9589 } 9590 9591 /* For NOBREAK we never try splitting, it won't make any 9592 * word valid. */ 9593 if (slang->sl_nobreak) 9594 try_compound = TRUE; 9595 9596 /* If we could add a compound word, and it's also possible 9597 * to split at this point, do the split first and set 9598 * TSF_DIDSPLIT to avoid doing it again. */ 9599 else if (!fword_ends 9600 && try_compound 9601 && (sp->ts_flags & TSF_DIDSPLIT) == 0) 9602 { 9603 try_compound = FALSE; 9604 sp->ts_flags |= TSF_DIDSPLIT; 9605 --sp->ts_curi; /* do the same NUL again */ 9606 compflags[sp->ts_complen] = NUL; 9607 } 9608 else 9609 sp->ts_flags &= ~TSF_DIDSPLIT; 9610 9611 if (!try_compound && !fword_ends) 9612 { 9613 /* If we're going to split need to check that the 9614 * words so far are valid for compounding. If there 9615 * is only one word it must not have the NEEDCOMPOUND 9616 * flag. */ 9617 if (sp->ts_complen == sp->ts_compsplit 9618 && (flags & WF_NEEDCOMP)) 9619 break; 9620 p = preword; 9621 while (*skiptowhite(p) != NUL) 9622 p = skipwhite(skiptowhite(p)); 9623 if (sp->ts_complen > sp->ts_compsplit 9624 && !can_compound(slang, p, 9625 compflags + sp->ts_compsplit)) 9626 break; 9627 newscore += SCORE_SPLIT; 9628 } 9629 9630 if (try_deeper(su, stack, depth, newscore)) 9631 { 9632 /* Save things to be restored at STATE_SPLITUNDO. */ 9633 sp->ts_save_badflags = su->su_badflags; 9634 sp->ts_state = STATE_SPLITUNDO; 9635 9636 ++depth; 9637 sp = &stack[depth]; 9638 9639 /* Append a space to preword when splitting. */ 9640 if (!try_compound && !fword_ends) 9641 STRCAT(preword, " "); 9642 sp->ts_prewordlen = STRLEN(preword); 9643 sp->ts_splitoff = sp->ts_twordlen; 9644 sp->ts_splitfidx = sp->ts_fidx; 9645 9646 /* If the badword has a non-word character at this 9647 * position skip it. That means replacing the 9648 * non-word character with a space. Always skip a 9649 * character when the word ends. */ 9650 if ((!try_compound 9651 && !spell_iswordp_nmw(fword + sp->ts_fidx)) 9652 || fword_ends) 9653 { 9654 int l; 9655 9656 #ifdef FEAT_MBYTE 9657 if (has_mbyte) 9658 l = MB_BYTE2LEN(fword[sp->ts_fidx]); 9659 else 9660 #endif 9661 l = 1; 9662 if (fword_ends) 9663 { 9664 /* Copy the skipped character to preword. */ 9665 mch_memmove(preword + sp->ts_prewordlen, 9666 fword + sp->ts_fidx, l); 9667 sp->ts_prewordlen += l; 9668 preword[sp->ts_prewordlen] = NUL; 9669 } 9670 else 9671 sp->ts_score -= SCORE_SPLIT - SCORE_SUBST; 9672 sp->ts_fidx += l; 9673 } 9674 9675 /* When compounding include compound flag in 9676 * compflags[] (already set above). When splitting we 9677 * may start compounding over again. */ 9678 if (try_compound) 9679 ++sp->ts_complen; 9680 else 9681 sp->ts_compsplit = sp->ts_complen; 9682 sp->ts_prefixdepth = PFD_NOPREFIX; 9683 9684 /* set su->su_badflags to the caps type at this 9685 * position */ 9686 #ifdef FEAT_MBYTE 9687 if (has_mbyte) 9688 n = nofold_len(fword, sp->ts_fidx, su->su_badptr); 9689 else 9690 #endif 9691 n = sp->ts_fidx; 9692 su->su_badflags = badword_captype(su->su_badptr + n, 9693 su->su_badptr + su->su_badlen); 9694 9695 /* Restart at top of the tree. */ 9696 sp->ts_arridx = 0; 9697 9698 /* If there are postponed prefixes, try these too. */ 9699 if (pbyts != NULL) 9700 { 9701 byts = pbyts; 9702 idxs = pidxs; 9703 sp->ts_prefixdepth = PFD_PREFIXTREE; 9704 sp->ts_state = STATE_NOPREFIX; 9705 } 9706 } 9707 } 9708 break; 9709 9710 case STATE_SPLITUNDO: 9711 /* Undo the changes done for word split or compound word. */ 9712 su->su_badflags = sp->ts_save_badflags; 9713 9714 /* Continue looking for NUL bytes. */ 9715 sp->ts_state = STATE_START; 9716 9717 /* In case we went into the prefix tree. */ 9718 byts = fbyts; 9719 idxs = fidxs; 9720 break; 9721 9722 case STATE_ENDNUL: 9723 /* Past the NUL bytes in the node. */ 9724 su->su_badflags = sp->ts_save_badflags; 9725 if (fword[sp->ts_fidx] == NUL 9726 #ifdef FEAT_MBYTE 9727 && sp->ts_tcharlen == 0 9728 #endif 9729 ) 9730 { 9731 /* The badword ends, can't use the bytes in this node. */ 9732 sp->ts_state = STATE_DEL; 9733 break; 9734 } 9735 sp->ts_state = STATE_PLAIN; 9736 /*FALLTHROUGH*/ 9737 9738 case STATE_PLAIN: 9739 /* 9740 * Go over all possible bytes at this node, add each to 9741 * tword[] and use child node. "ts_curi" is the index. 9742 */ 9743 arridx = sp->ts_arridx; 9744 if (sp->ts_curi > byts[arridx]) 9745 { 9746 /* Done all bytes at this node, do next state. When still 9747 * at already changed bytes skip the other tricks. */ 9748 if (sp->ts_fidx >= sp->ts_fidxtry) 9749 sp->ts_state = STATE_DEL; 9750 else 9751 sp->ts_state = STATE_FINAL; 9752 } 9753 else 9754 { 9755 arridx += sp->ts_curi++; 9756 c = byts[arridx]; 9757 9758 /* Normal byte, go one level deeper. If it's not equal to 9759 * the byte in the bad word adjust the score. But don't 9760 * even try when the byte was already changed. */ 9761 if (c == fword[sp->ts_fidx] 9762 #ifdef FEAT_MBYTE 9763 || (sp->ts_tcharlen > 0 9764 && sp->ts_isdiff != DIFF_NONE) 9765 #endif 9766 ) 9767 newscore = 0; 9768 else 9769 newscore = SCORE_SUBST; 9770 if ((newscore == 0 || sp->ts_fidx >= sp->ts_fidxtry) 9771 && try_deeper(su, stack, depth, newscore)) 9772 { 9773 ++depth; 9774 sp = &stack[depth]; 9775 ++sp->ts_fidx; 9776 tword[sp->ts_twordlen++] = c; 9777 sp->ts_arridx = idxs[arridx]; 9778 #ifdef FEAT_MBYTE 9779 if (newscore == SCORE_SUBST) 9780 sp->ts_isdiff = DIFF_YES; 9781 if (has_mbyte) 9782 { 9783 /* Multi-byte characters are a bit complicated to 9784 * handle: They differ when any of the bytes 9785 * differ and then their length may also differ. */ 9786 if (sp->ts_tcharlen == 0) 9787 { 9788 /* First byte. */ 9789 sp->ts_tcharidx = 0; 9790 sp->ts_tcharlen = MB_BYTE2LEN(c); 9791 sp->ts_fcharstart = sp->ts_fidx - 1; 9792 sp->ts_isdiff = (newscore != 0) 9793 ? DIFF_YES : DIFF_NONE; 9794 } 9795 else if (sp->ts_isdiff == DIFF_INSERT) 9796 /* When inserting trail bytes don't advance in 9797 * the bad word. */ 9798 --sp->ts_fidx; 9799 if (++sp->ts_tcharidx == sp->ts_tcharlen) 9800 { 9801 /* Last byte of character. */ 9802 if (sp->ts_isdiff == DIFF_YES) 9803 { 9804 /* Correct ts_fidx for the byte length of 9805 * the character (we didn't check that 9806 * before). */ 9807 sp->ts_fidx = sp->ts_fcharstart 9808 + MB_BYTE2LEN( 9809 fword[sp->ts_fcharstart]); 9810 9811 /* For changing a composing character 9812 * adjust the score from SCORE_SUBST to 9813 * SCORE_SUBCOMP. */ 9814 if (enc_utf8 9815 && utf_iscomposing( 9816 mb_ptr2char(tword 9817 + sp->ts_twordlen 9818 - sp->ts_tcharlen)) 9819 && utf_iscomposing( 9820 mb_ptr2char(fword 9821 + sp->ts_fcharstart))) 9822 sp->ts_score -= 9823 SCORE_SUBST - SCORE_SUBCOMP; 9824 9825 /* For a similar character adjust score 9826 * from SCORE_SUBST to SCORE_SIMILAR. */ 9827 else if (slang->sl_has_map 9828 && similar_chars(slang, 9829 mb_ptr2char(tword 9830 + sp->ts_twordlen 9831 - sp->ts_tcharlen), 9832 mb_ptr2char(fword 9833 + sp->ts_fcharstart))) 9834 sp->ts_score -= 9835 SCORE_SUBST - SCORE_SIMILAR; 9836 } 9837 else if (sp->ts_isdiff == DIFF_INSERT 9838 && sp->ts_twordlen > sp->ts_tcharlen) 9839 { 9840 p = tword + sp->ts_twordlen 9841 - sp->ts_tcharlen; 9842 c = mb_ptr2char(p); 9843 if (enc_utf8 && utf_iscomposing(c)) 9844 { 9845 /* Inserting a composing char doesn't 9846 * count that much. */ 9847 sp->ts_score -= SCORE_INS 9848 - SCORE_INSCOMP; 9849 } 9850 else 9851 { 9852 /* If the previous character was the 9853 * same, thus doubling a character, 9854 * give a bonus to the score. */ 9855 mb_ptr_back(tword, p); 9856 if (c == mb_ptr2char(p)) 9857 sp->ts_score -= SCORE_INS 9858 - SCORE_INSDUP; 9859 } 9860 } 9861 9862 /* Starting a new char, reset the length. */ 9863 sp->ts_tcharlen = 0; 9864 } 9865 } 9866 else 9867 #endif 9868 { 9869 /* If we found a similar char adjust the score. 9870 * We do this after calling try_deeper() because 9871 * it's slow. */ 9872 if (newscore != 0 9873 && slang->sl_has_map 9874 && similar_chars(slang, 9875 c, fword[sp->ts_fidx - 1])) 9876 sp->ts_score -= SCORE_SUBST - SCORE_SIMILAR; 9877 } 9878 } 9879 } 9880 break; 9881 9882 case STATE_DEL: 9883 #ifdef FEAT_MBYTE 9884 /* When past the first byte of a multi-byte char don't try 9885 * delete/insert/swap a character. */ 9886 if (has_mbyte && sp->ts_tcharlen > 0) 9887 { 9888 sp->ts_state = STATE_FINAL; 9889 break; 9890 } 9891 #endif 9892 /* 9893 * Try skipping one character in the bad word (delete it). 9894 */ 9895 sp->ts_state = STATE_INS; 9896 sp->ts_curi = 1; 9897 if (fword[sp->ts_fidx] != NUL 9898 && try_deeper(su, stack, depth, SCORE_DEL)) 9899 { 9900 ++depth; 9901 9902 /* Advance over the character in fword[]. Give a bonus to 9903 * the score if the same character is following "nn" -> 9904 * "n". */ 9905 #ifdef FEAT_MBYTE 9906 if (has_mbyte) 9907 { 9908 c = mb_ptr2char(fword + sp->ts_fidx); 9909 stack[depth].ts_fidx += MB_BYTE2LEN(fword[sp->ts_fidx]); 9910 if (enc_utf8 && utf_iscomposing(c)) 9911 stack[depth].ts_score -= SCORE_DEL - SCORE_DELCOMP; 9912 else if (c == mb_ptr2char(fword + stack[depth].ts_fidx)) 9913 stack[depth].ts_score -= SCORE_DEL - SCORE_DELDUP; 9914 } 9915 else 9916 #endif 9917 { 9918 ++stack[depth].ts_fidx; 9919 if (fword[sp->ts_fidx] == fword[sp->ts_fidx + 1]) 9920 stack[depth].ts_score -= SCORE_DEL - SCORE_DELDUP; 9921 } 9922 break; 9923 } 9924 /*FALLTHROUGH*/ 9925 9926 case STATE_INS: 9927 /* Insert one byte. Do this for each possible byte at this 9928 * node. */ 9929 n = sp->ts_arridx; 9930 if (sp->ts_curi > byts[n]) 9931 { 9932 /* Done all bytes at this node, do next state. */ 9933 sp->ts_state = STATE_SWAP; 9934 } 9935 else 9936 { 9937 /* Do one more byte at this node. Skip NUL bytes. */ 9938 n += sp->ts_curi++; 9939 c = byts[n]; 9940 if (c != 0 && try_deeper(su, stack, depth, SCORE_INS)) 9941 { 9942 ++depth; 9943 sp = &stack[depth]; 9944 tword[sp->ts_twordlen++] = c; 9945 sp->ts_arridx = idxs[n]; 9946 #ifdef FEAT_MBYTE 9947 if (has_mbyte) 9948 { 9949 fl = MB_BYTE2LEN(c); 9950 if (fl > 1) 9951 { 9952 /* There are following bytes for the same 9953 * character. We must find all bytes before 9954 * trying delete/insert/swap/etc. */ 9955 sp->ts_tcharlen = fl; 9956 sp->ts_tcharidx = 1; 9957 sp->ts_isdiff = DIFF_INSERT; 9958 } 9959 } 9960 else 9961 fl = 1; 9962 if (fl == 1) 9963 #endif 9964 { 9965 /* If the previous character was the same, thus 9966 * doubling a character, give a bonus to the 9967 * score. */ 9968 if (sp->ts_twordlen >= 2 9969 && tword[sp->ts_twordlen - 2] == c) 9970 sp->ts_score -= SCORE_INS - SCORE_INSDUP; 9971 } 9972 } 9973 } 9974 break; 9975 9976 case STATE_SWAP: 9977 /* 9978 * Swap two bytes in the bad word: "12" -> "21". 9979 * We change "fword" here, it's changed back afterwards. 9980 */ 9981 p = fword + sp->ts_fidx; 9982 c = *p; 9983 if (c == NUL) 9984 { 9985 /* End of word, can't swap or replace. */ 9986 sp->ts_state = STATE_FINAL; 9987 break; 9988 } 9989 #ifdef FEAT_MBYTE 9990 if (has_mbyte) 9991 { 9992 n = mb_cptr2len(p); 9993 c = mb_ptr2char(p); 9994 c2 = mb_ptr2char(p + n); 9995 } 9996 else 9997 #endif 9998 c2 = p[1]; 9999 if (c == c2) 10000 { 10001 /* Characters are identical, swap won't do anything. */ 10002 sp->ts_state = STATE_SWAP3; 10003 break; 10004 } 10005 if (c2 != NUL && try_deeper(su, stack, depth, SCORE_SWAP)) 10006 { 10007 sp->ts_state = STATE_UNSWAP; 10008 ++depth; 10009 #ifdef FEAT_MBYTE 10010 if (has_mbyte) 10011 { 10012 fl = mb_char2len(c2); 10013 mch_memmove(p, p + n, fl); 10014 mb_char2bytes(c, p + fl); 10015 stack[depth].ts_fidxtry = sp->ts_fidx + n + fl; 10016 } 10017 else 10018 #endif 10019 { 10020 p[0] = c2; 10021 p[1] = c; 10022 stack[depth].ts_fidxtry = sp->ts_fidx + 2; 10023 } 10024 } 10025 else 10026 /* If this swap doesn't work then SWAP3 won't either. */ 10027 sp->ts_state = STATE_REP_INI; 10028 break; 10029 10030 case STATE_UNSWAP: 10031 /* Undo the STATE_SWAP swap: "21" -> "12". */ 10032 p = fword + sp->ts_fidx; 10033 #ifdef FEAT_MBYTE 10034 if (has_mbyte) 10035 { 10036 n = MB_BYTE2LEN(*p); 10037 c = mb_ptr2char(p + n); 10038 mch_memmove(p + MB_BYTE2LEN(p[n]), p, n); 10039 mb_char2bytes(c, p); 10040 } 10041 else 10042 #endif 10043 { 10044 c = *p; 10045 *p = p[1]; 10046 p[1] = c; 10047 } 10048 /*FALLTHROUGH*/ 10049 10050 case STATE_SWAP3: 10051 /* Swap two bytes, skipping one: "123" -> "321". We change 10052 * "fword" here, it's changed back afterwards. */ 10053 p = fword + sp->ts_fidx; 10054 #ifdef FEAT_MBYTE 10055 if (has_mbyte) 10056 { 10057 n = mb_cptr2len(p); 10058 c = mb_ptr2char(p); 10059 fl = mb_cptr2len(p + n); 10060 c2 = mb_ptr2char(p + n); 10061 c3 = mb_ptr2char(p + n + fl); 10062 } 10063 else 10064 #endif 10065 { 10066 c = *p; 10067 c2 = p[1]; 10068 c3 = p[2]; 10069 } 10070 10071 /* When characters are identical: "121" then SWAP3 result is 10072 * identical, ROT3L result is same as SWAP: "211", ROT3L 10073 * result is same as SWAP on next char: "112". Thus skip all 10074 * swapping. Also skip when c3 is NUL. */ 10075 if (c == c3 || c3 == NUL) 10076 { 10077 sp->ts_state = STATE_REP_INI; 10078 break; 10079 } 10080 if (try_deeper(su, stack, depth, SCORE_SWAP3)) 10081 { 10082 sp->ts_state = STATE_UNSWAP3; 10083 ++depth; 10084 #ifdef FEAT_MBYTE 10085 if (has_mbyte) 10086 { 10087 tl = mb_char2len(c3); 10088 mch_memmove(p, p + n + fl, tl); 10089 mb_char2bytes(c2, p + tl); 10090 mb_char2bytes(c, p + fl + tl); 10091 stack[depth].ts_fidxtry = sp->ts_fidx + n + fl + tl; 10092 } 10093 else 10094 #endif 10095 { 10096 p[0] = p[2]; 10097 p[2] = c; 10098 stack[depth].ts_fidxtry = sp->ts_fidx + 3; 10099 } 10100 } 10101 else 10102 sp->ts_state = STATE_REP_INI; 10103 break; 10104 10105 case STATE_UNSWAP3: 10106 /* Undo STATE_SWAP3: "321" -> "123" */ 10107 p = fword + sp->ts_fidx; 10108 #ifdef FEAT_MBYTE 10109 if (has_mbyte) 10110 { 10111 n = MB_BYTE2LEN(*p); 10112 c2 = mb_ptr2char(p + n); 10113 fl = MB_BYTE2LEN(p[n]); 10114 c = mb_ptr2char(p + n + fl); 10115 tl = MB_BYTE2LEN(p[n + fl]); 10116 mch_memmove(p + fl + tl, p, n); 10117 mb_char2bytes(c, p); 10118 mb_char2bytes(c2, p + tl); 10119 } 10120 else 10121 #endif 10122 { 10123 c = *p; 10124 *p = p[2]; 10125 p[2] = c; 10126 } 10127 10128 /* Rotate three characters left: "123" -> "231". We change 10129 * "fword" here, it's changed back afterwards. */ 10130 if (try_deeper(su, stack, depth, SCORE_SWAP3)) 10131 { 10132 sp->ts_state = STATE_UNROT3L; 10133 ++depth; 10134 p = fword + sp->ts_fidx; 10135 #ifdef FEAT_MBYTE 10136 if (has_mbyte) 10137 { 10138 n = mb_cptr2len(p); 10139 c = mb_ptr2char(p); 10140 fl = mb_cptr2len(p + n); 10141 fl += mb_cptr2len(p + n + fl); 10142 mch_memmove(p, p + n, fl); 10143 mb_char2bytes(c, p + fl); 10144 stack[depth].ts_fidxtry = sp->ts_fidx + n + fl; 10145 } 10146 else 10147 #endif 10148 { 10149 c = *p; 10150 *p = p[1]; 10151 p[1] = p[2]; 10152 p[2] = c; 10153 stack[depth].ts_fidxtry = sp->ts_fidx + 3; 10154 } 10155 } 10156 else 10157 sp->ts_state = STATE_REP_INI; 10158 break; 10159 10160 case STATE_UNROT3L: 10161 /* Undo ROT3L: "231" -> "123" */ 10162 p = fword + sp->ts_fidx; 10163 #ifdef FEAT_MBYTE 10164 if (has_mbyte) 10165 { 10166 n = MB_BYTE2LEN(*p); 10167 n += MB_BYTE2LEN(p[n]); 10168 c = mb_ptr2char(p + n); 10169 tl = MB_BYTE2LEN(p[n]); 10170 mch_memmove(p + tl, p, n); 10171 mb_char2bytes(c, p); 10172 } 10173 else 10174 #endif 10175 { 10176 c = p[2]; 10177 p[2] = p[1]; 10178 p[1] = *p; 10179 *p = c; 10180 } 10181 10182 /* Rotate three bytes right: "123" -> "312". We change 10183 * "fword" here, it's changed back afterwards. */ 10184 if (try_deeper(su, stack, depth, SCORE_SWAP3)) 10185 { 10186 sp->ts_state = STATE_UNROT3R; 10187 ++depth; 10188 p = fword + sp->ts_fidx; 10189 #ifdef FEAT_MBYTE 10190 if (has_mbyte) 10191 { 10192 n = mb_cptr2len(p); 10193 n += mb_cptr2len(p + n); 10194 c = mb_ptr2char(p + n); 10195 tl = mb_cptr2len(p + n); 10196 mch_memmove(p + tl, p, n); 10197 mb_char2bytes(c, p); 10198 stack[depth].ts_fidxtry = sp->ts_fidx + n + tl; 10199 } 10200 else 10201 #endif 10202 { 10203 c = p[2]; 10204 p[2] = p[1]; 10205 p[1] = *p; 10206 *p = c; 10207 stack[depth].ts_fidxtry = sp->ts_fidx + 3; 10208 } 10209 } 10210 else 10211 sp->ts_state = STATE_REP_INI; 10212 break; 10213 10214 case STATE_UNROT3R: 10215 /* Undo ROT3R: "312" -> "123" */ 10216 p = fword + sp->ts_fidx; 10217 #ifdef FEAT_MBYTE 10218 if (has_mbyte) 10219 { 10220 c = mb_ptr2char(p); 10221 tl = MB_BYTE2LEN(*p); 10222 n = MB_BYTE2LEN(p[tl]); 10223 n += MB_BYTE2LEN(p[tl + n]); 10224 mch_memmove(p, p + tl, n); 10225 mb_char2bytes(c, p + n); 10226 } 10227 else 10228 #endif 10229 { 10230 c = *p; 10231 *p = p[1]; 10232 p[1] = p[2]; 10233 p[2] = c; 10234 } 10235 /*FALLTHROUGH*/ 10236 10237 case STATE_REP_INI: 10238 /* Check if matching with REP items from the .aff file would 10239 * work. Quickly skip if: 10240 * - there are no REP items 10241 * - the score is going to be too high anyway 10242 * - already applied a REP item or swapped here */ 10243 if (lp->lp_replang == NULL 10244 || sp->ts_score + SCORE_REP >= su->su_maxscore 10245 || sp->ts_fidx < sp->ts_fidxtry) 10246 { 10247 sp->ts_state = STATE_FINAL; 10248 break; 10249 } 10250 gap = &lp->lp_replang->sl_rep; 10251 10252 /* Use the first byte to quickly find the first entry that 10253 * may match. If the index is -1 there is none. */ 10254 sp->ts_curi = lp->lp_replang->sl_rep_first[fword[sp->ts_fidx]]; 10255 if (sp->ts_curi < 0) 10256 { 10257 sp->ts_state = STATE_FINAL; 10258 break; 10259 } 10260 10261 sp->ts_state = STATE_REP; 10262 /*FALLTHROUGH*/ 10263 10264 case STATE_REP: 10265 /* Try matching with REP items from the .aff file. For each 10266 * match replace the characters and check if the resulting 10267 * word is valid. */ 10268 p = fword + sp->ts_fidx; 10269 10270 gap = &lp->lp_replang->sl_rep; 10271 while (sp->ts_curi < gap->ga_len) 10272 { 10273 ftp = (fromto_T *)gap->ga_data + sp->ts_curi++; 10274 if (*ftp->ft_from != *p) 10275 { 10276 /* past possible matching entries */ 10277 sp->ts_curi = gap->ga_len; 10278 break; 10279 } 10280 if (STRNCMP(ftp->ft_from, p, STRLEN(ftp->ft_from)) == 0 10281 && try_deeper(su, stack, depth, SCORE_REP)) 10282 { 10283 /* Need to undo this afterwards. */ 10284 sp->ts_state = STATE_REP_UNDO; 10285 10286 /* Change the "from" to the "to" string. */ 10287 ++depth; 10288 fl = STRLEN(ftp->ft_from); 10289 tl = STRLEN(ftp->ft_to); 10290 if (fl != tl) 10291 { 10292 mch_memmove(p + tl, p + fl, STRLEN(p + fl) + 1); 10293 repextra += tl - fl; 10294 } 10295 mch_memmove(p, ftp->ft_to, tl); 10296 stack[depth].ts_fidxtry = sp->ts_fidx + tl; 10297 #ifdef FEAT_MBYTE 10298 stack[depth].ts_tcharlen = 0; 10299 #endif 10300 break; 10301 } 10302 } 10303 10304 if (sp->ts_curi >= gap->ga_len && sp->ts_state == STATE_REP) 10305 /* No (more) matches. */ 10306 sp->ts_state = STATE_FINAL; 10307 10308 break; 10309 10310 case STATE_REP_UNDO: 10311 /* Undo a REP replacement and continue with the next one. */ 10312 ftp = (fromto_T *)lp->lp_replang->sl_rep.ga_data 10313 + sp->ts_curi - 1; 10314 fl = STRLEN(ftp->ft_from); 10315 tl = STRLEN(ftp->ft_to); 10316 p = fword + sp->ts_fidx; 10317 if (fl != tl) 10318 { 10319 mch_memmove(p + fl, p + tl, STRLEN(p + tl) + 1); 10320 repextra -= tl - fl; 10321 } 10322 mch_memmove(p, ftp->ft_from, fl); 10323 sp->ts_state = STATE_REP; 10324 break; 10325 10326 default: 10327 /* Did all possible states at this level, go up one level. */ 10328 --depth; 10329 10330 if (depth >= 0 && stack[depth].ts_prefixdepth == PFD_PREFIXTREE) 10331 { 10332 /* Continue in or go back to the prefix tree. */ 10333 byts = pbyts; 10334 idxs = pidxs; 10335 } 10336 10337 /* Don't check for CTRL-C too often, it takes time. */ 10338 line_breakcheck(); 10339 } 10340 } 10341 } 10342 } 10343 10344 /* 10345 * Try going one level deeper in the tree. 10346 */ 10347 static int 10348 try_deeper(su, stack, depth, score_add) 10349 suginfo_T *su; 10350 trystate_T *stack; 10351 int depth; 10352 int score_add; 10353 { 10354 int newscore; 10355 10356 /* Refuse to go deeper if the scrore is getting too big. */ 10357 newscore = stack[depth].ts_score + score_add; 10358 if (newscore >= su->su_maxscore) 10359 return FALSE; 10360 10361 stack[depth + 1] = stack[depth]; 10362 stack[depth + 1].ts_state = STATE_START; 10363 stack[depth + 1].ts_score = newscore; 10364 stack[depth + 1].ts_curi = 1; /* start just after length byte */ 10365 stack[depth + 1].ts_flags = 0; 10366 return TRUE; 10367 } 10368 10369 #ifdef FEAT_MBYTE 10370 /* 10371 * Case-folding may change the number of bytes: Count nr of chars in 10372 * fword[flen] and return the byte length of that many chars in "word". 10373 */ 10374 static int 10375 nofold_len(fword, flen, word) 10376 char_u *fword; 10377 int flen; 10378 char_u *word; 10379 { 10380 char_u *p; 10381 int i = 0; 10382 10383 for (p = fword; p < fword + flen; mb_ptr_adv(p)) 10384 ++i; 10385 for (p = word; i > 0; mb_ptr_adv(p)) 10386 --i; 10387 return (int)(p - word); 10388 } 10389 #endif 10390 10391 /* 10392 * "fword" is a good word with case folded. Find the matching keep-case 10393 * words and put it in "kword". 10394 * Theoretically there could be several keep-case words that result in the 10395 * same case-folded word, but we only find one... 10396 */ 10397 static void 10398 find_keepcap_word(slang, fword, kword) 10399 slang_T *slang; 10400 char_u *fword; 10401 char_u *kword; 10402 { 10403 char_u uword[MAXWLEN]; /* "fword" in upper-case */ 10404 int depth; 10405 idx_T tryidx; 10406 10407 /* The following arrays are used at each depth in the tree. */ 10408 idx_T arridx[MAXWLEN]; 10409 int round[MAXWLEN]; 10410 int fwordidx[MAXWLEN]; 10411 int uwordidx[MAXWLEN]; 10412 int kwordlen[MAXWLEN]; 10413 10414 int flen, ulen; 10415 int l; 10416 int len; 10417 int c; 10418 idx_T lo, hi, m; 10419 char_u *p; 10420 char_u *byts = slang->sl_kbyts; /* array with bytes of the words */ 10421 idx_T *idxs = slang->sl_kidxs; /* array with indexes */ 10422 10423 if (byts == NULL) 10424 { 10425 /* array is empty: "cannot happen" */ 10426 *kword = NUL; 10427 return; 10428 } 10429 10430 /* Make an all-cap version of "fword". */ 10431 allcap_copy(fword, uword); 10432 10433 /* 10434 * Each character needs to be tried both case-folded and upper-case. 10435 * All this gets very complicated if we keep in mind that changing case 10436 * may change the byte length of a multi-byte character... 10437 */ 10438 depth = 0; 10439 arridx[0] = 0; 10440 round[0] = 0; 10441 fwordidx[0] = 0; 10442 uwordidx[0] = 0; 10443 kwordlen[0] = 0; 10444 while (depth >= 0) 10445 { 10446 if (fword[fwordidx[depth]] == NUL) 10447 { 10448 /* We are at the end of "fword". If the tree allows a word to end 10449 * here we have found a match. */ 10450 if (byts[arridx[depth] + 1] == 0) 10451 { 10452 kword[kwordlen[depth]] = NUL; 10453 return; 10454 } 10455 10456 /* kword is getting too long, continue one level up */ 10457 --depth; 10458 } 10459 else if (++round[depth] > 2) 10460 { 10461 /* tried both fold-case and upper-case character, continue one 10462 * level up */ 10463 --depth; 10464 } 10465 else 10466 { 10467 /* 10468 * round[depth] == 1: Try using the folded-case character. 10469 * round[depth] == 2: Try using the upper-case character. 10470 */ 10471 #ifdef FEAT_MBYTE 10472 if (has_mbyte) 10473 { 10474 flen = mb_cptr2len(fword + fwordidx[depth]); 10475 ulen = mb_cptr2len(uword + uwordidx[depth]); 10476 } 10477 else 10478 #endif 10479 ulen = flen = 1; 10480 if (round[depth] == 1) 10481 { 10482 p = fword + fwordidx[depth]; 10483 l = flen; 10484 } 10485 else 10486 { 10487 p = uword + uwordidx[depth]; 10488 l = ulen; 10489 } 10490 10491 for (tryidx = arridx[depth]; l > 0; --l) 10492 { 10493 /* Perform a binary search in the list of accepted bytes. */ 10494 len = byts[tryidx++]; 10495 c = *p++; 10496 lo = tryidx; 10497 hi = tryidx + len - 1; 10498 while (lo < hi) 10499 { 10500 m = (lo + hi) / 2; 10501 if (byts[m] > c) 10502 hi = m - 1; 10503 else if (byts[m] < c) 10504 lo = m + 1; 10505 else 10506 { 10507 lo = hi = m; 10508 break; 10509 } 10510 } 10511 10512 /* Stop if there is no matching byte. */ 10513 if (hi < lo || byts[lo] != c) 10514 break; 10515 10516 /* Continue at the child (if there is one). */ 10517 tryidx = idxs[lo]; 10518 } 10519 10520 if (l == 0) 10521 { 10522 /* 10523 * Found the matching char. Copy it to "kword" and go a 10524 * level deeper. 10525 */ 10526 if (round[depth] == 1) 10527 { 10528 STRNCPY(kword + kwordlen[depth], fword + fwordidx[depth], 10529 flen); 10530 kwordlen[depth + 1] = kwordlen[depth] + flen; 10531 } 10532 else 10533 { 10534 STRNCPY(kword + kwordlen[depth], uword + uwordidx[depth], 10535 ulen); 10536 kwordlen[depth + 1] = kwordlen[depth] + ulen; 10537 } 10538 fwordidx[depth + 1] = fwordidx[depth] + flen; 10539 uwordidx[depth + 1] = uwordidx[depth] + ulen; 10540 10541 ++depth; 10542 arridx[depth] = tryidx; 10543 round[depth] = 0; 10544 } 10545 } 10546 } 10547 10548 /* Didn't find it: "cannot happen". */ 10549 *kword = NUL; 10550 } 10551 10552 /* 10553 * Compute the sound-a-like score for suggestions in su->su_ga and add them to 10554 * su->su_sga. 10555 */ 10556 static void 10557 score_comp_sal(su) 10558 suginfo_T *su; 10559 { 10560 langp_T *lp; 10561 char_u badsound[MAXWLEN]; 10562 int i; 10563 suggest_T *stp; 10564 suggest_T *sstp; 10565 int score; 10566 int lpi; 10567 10568 if (ga_grow(&su->su_sga, su->su_ga.ga_len) == FAIL) 10569 return; 10570 10571 /* Use the sound-folding of the first language that supports it. */ 10572 for (lpi = 0; lpi < curbuf->b_langp.ga_len; ++lpi) 10573 { 10574 lp = LANGP_ENTRY(curbuf->b_langp, lpi); 10575 if (lp->lp_slang->sl_sal.ga_len > 0) 10576 { 10577 /* soundfold the bad word */ 10578 spell_soundfold(lp->lp_slang, su->su_fbadword, TRUE, badsound); 10579 10580 for (i = 0; i < su->su_ga.ga_len; ++i) 10581 { 10582 stp = &SUG(su->su_ga, i); 10583 10584 /* Case-fold the suggested word, sound-fold it and compute the 10585 * sound-a-like score. */ 10586 score = stp_sal_score(stp, su, lp->lp_slang, badsound); 10587 if (score < SCORE_MAXMAX) 10588 { 10589 /* Add the suggestion. */ 10590 sstp = &SUG(su->su_sga, su->su_sga.ga_len); 10591 sstp->st_word = vim_strsave(stp->st_word); 10592 if (sstp->st_word != NULL) 10593 { 10594 sstp->st_score = score; 10595 sstp->st_altscore = 0; 10596 sstp->st_orglen = stp->st_orglen; 10597 ++su->su_sga.ga_len; 10598 } 10599 } 10600 } 10601 break; 10602 } 10603 } 10604 } 10605 10606 /* 10607 * Combine the list of suggestions in su->su_ga and su->su_sga. 10608 * They are intwined. 10609 */ 10610 static void 10611 score_combine(su) 10612 suginfo_T *su; 10613 { 10614 int i; 10615 int j; 10616 garray_T ga; 10617 garray_T *gap; 10618 langp_T *lp; 10619 suggest_T *stp; 10620 char_u *p; 10621 char_u badsound[MAXWLEN]; 10622 int round; 10623 int lpi; 10624 10625 /* Add the alternate score to su_ga. */ 10626 for (lpi = 0; lpi < curbuf->b_langp.ga_len; ++lpi) 10627 { 10628 lp = LANGP_ENTRY(curbuf->b_langp, lpi); 10629 if (lp->lp_slang->sl_sal.ga_len > 0) 10630 { 10631 /* soundfold the bad word */ 10632 spell_soundfold(lp->lp_slang, su->su_fbadword, TRUE, badsound); 10633 10634 for (i = 0; i < su->su_ga.ga_len; ++i) 10635 { 10636 stp = &SUG(su->su_ga, i); 10637 stp->st_altscore = stp_sal_score(stp, su, lp->lp_slang, 10638 badsound); 10639 if (stp->st_altscore == SCORE_MAXMAX) 10640 stp->st_score = (stp->st_score * 3 + SCORE_BIG) / 4; 10641 else 10642 stp->st_score = (stp->st_score * 3 10643 + stp->st_altscore) / 4; 10644 stp->st_salscore = FALSE; 10645 } 10646 break; 10647 } 10648 } 10649 10650 /* Add the alternate score to su_sga. */ 10651 for (i = 0; i < su->su_sga.ga_len; ++i) 10652 { 10653 stp = &SUG(su->su_sga, i); 10654 stp->st_altscore = spell_edit_score(su->su_badword, stp->st_word); 10655 if (stp->st_score == SCORE_MAXMAX) 10656 stp->st_score = (SCORE_BIG * 7 + stp->st_altscore) / 8; 10657 else 10658 stp->st_score = (stp->st_score * 7 + stp->st_altscore) / 8; 10659 stp->st_salscore = TRUE; 10660 } 10661 10662 /* Sort the suggestions and truncate at "maxcount" for both lists. */ 10663 (void)cleanup_suggestions(&su->su_ga, su->su_maxscore, su->su_maxcount); 10664 (void)cleanup_suggestions(&su->su_sga, su->su_maxscore, su->su_maxcount); 10665 10666 ga_init2(&ga, (int)sizeof(suginfo_T), 1); 10667 if (ga_grow(&ga, su->su_ga.ga_len + su->su_sga.ga_len) == FAIL) 10668 return; 10669 10670 stp = &SUG(ga, 0); 10671 for (i = 0; i < su->su_ga.ga_len || i < su->su_sga.ga_len; ++i) 10672 { 10673 /* round 1: get a suggestion from su_ga 10674 * round 2: get a suggestion from su_sga */ 10675 for (round = 1; round <= 2; ++round) 10676 { 10677 gap = round == 1 ? &su->su_ga : &su->su_sga; 10678 if (i < gap->ga_len) 10679 { 10680 /* Don't add a word if it's already there. */ 10681 p = SUG(*gap, i).st_word; 10682 for (j = 0; j < ga.ga_len; ++j) 10683 if (STRCMP(stp[j].st_word, p) == 0) 10684 break; 10685 if (j == ga.ga_len) 10686 stp[ga.ga_len++] = SUG(*gap, i); 10687 else 10688 vim_free(p); 10689 } 10690 } 10691 } 10692 10693 ga_clear(&su->su_ga); 10694 ga_clear(&su->su_sga); 10695 10696 /* Truncate the list to the number of suggestions that will be displayed. */ 10697 if (ga.ga_len > su->su_maxcount) 10698 { 10699 for (i = su->su_maxcount; i < ga.ga_len; ++i) 10700 vim_free(stp[i].st_word); 10701 ga.ga_len = su->su_maxcount; 10702 } 10703 10704 su->su_ga = ga; 10705 } 10706 10707 /* 10708 * For the goodword in "stp" compute the soundalike score compared to the 10709 * badword. 10710 */ 10711 static int 10712 stp_sal_score(stp, su, slang, badsound) 10713 suggest_T *stp; 10714 suginfo_T *su; 10715 slang_T *slang; 10716 char_u *badsound; /* sound-folded badword */ 10717 { 10718 char_u *p; 10719 char_u badsound2[MAXWLEN]; 10720 char_u fword[MAXWLEN]; 10721 char_u goodsound[MAXWLEN]; 10722 10723 if (stp->st_orglen <= su->su_badlen) 10724 p = badsound; 10725 else 10726 { 10727 /* soundfold the bad word with more characters following */ 10728 (void)spell_casefold(su->su_badptr, stp->st_orglen, fword, MAXWLEN); 10729 10730 /* When joining two words the sound often changes a lot. E.g., "t he" 10731 * sounds like "t h" while "the" sounds like "@". Avoid that by 10732 * removing the space. Don't do it when the good word also contains a 10733 * space. */ 10734 if (vim_iswhite(su->su_badptr[su->su_badlen]) 10735 && *skiptowhite(stp->st_word) == NUL) 10736 for (p = fword; *(p = skiptowhite(p)) != NUL; ) 10737 mch_memmove(p, p + 1, STRLEN(p)); 10738 10739 spell_soundfold(slang, fword, TRUE, badsound2); 10740 p = badsound2; 10741 } 10742 10743 /* Sound-fold the word and compute the score for the difference. */ 10744 spell_soundfold(slang, stp->st_word, FALSE, goodsound); 10745 10746 return soundalike_score(goodsound, p); 10747 } 10748 10749 /* 10750 * Find suggestions by comparing the word in a sound-a-like form. 10751 * Note: This doesn't support postponed prefixes. 10752 */ 10753 static void 10754 suggest_try_soundalike(su) 10755 suginfo_T *su; 10756 { 10757 char_u salword[MAXWLEN]; 10758 char_u tword[MAXWLEN]; 10759 char_u tsalword[MAXWLEN]; 10760 idx_T arridx[MAXWLEN]; 10761 int curi[MAXWLEN]; 10762 langp_T *lp; 10763 char_u *byts; 10764 idx_T *idxs; 10765 int depth; 10766 int c; 10767 idx_T n; 10768 int round; 10769 int flags; 10770 int sound_score; 10771 int local_score; 10772 int lpi; 10773 slang_T *slang; 10774 10775 /* Do this for all languages that support sound folding. */ 10776 for (lpi = 0; lpi < curbuf->b_langp.ga_len; ++lpi) 10777 { 10778 lp = LANGP_ENTRY(curbuf->b_langp, lpi); 10779 slang = lp->lp_slang; 10780 if (slang->sl_sal.ga_len > 0) 10781 { 10782 /* soundfold the bad word */ 10783 spell_soundfold(slang, su->su_fbadword, TRUE, salword); 10784 10785 /* 10786 * Go through the whole tree, soundfold each word and compare. 10787 * round 1: use the case-folded tree. 10788 * round 2: use the keep-case tree. 10789 */ 10790 for (round = 1; round <= 2; ++round) 10791 { 10792 if (round == 1) 10793 { 10794 byts = slang->sl_fbyts; 10795 idxs = slang->sl_fidxs; 10796 } 10797 else 10798 { 10799 byts = slang->sl_kbyts; 10800 idxs = slang->sl_kidxs; 10801 if (byts == NULL) /* no keep-case words */ 10802 continue; 10803 } 10804 10805 depth = 0; 10806 arridx[0] = 0; 10807 curi[0] = 1; 10808 while (depth >= 0 && !got_int) 10809 { 10810 if (curi[depth] > byts[arridx[depth]]) 10811 { 10812 /* Done all bytes at this node, go up one level. */ 10813 --depth; 10814 line_breakcheck(); 10815 } 10816 else 10817 { 10818 /* Do one more byte at this node. */ 10819 n = arridx[depth] + curi[depth]; 10820 ++curi[depth]; 10821 c = byts[n]; 10822 if (c == 0) 10823 { 10824 /* End of word, deal with the word. */ 10825 flags = (int)idxs[n]; 10826 if (round == 2 || (flags & WF_KEEPCAP) == 0) 10827 { 10828 tword[depth] = NUL; 10829 /* Sound-fold. Only in keep-case tree need to 10830 * case-fold the word. */ 10831 spell_soundfold(slang, tword, 10832 round == 1, tsalword); 10833 10834 /* Compute the edit distance between the 10835 * sound-a-like words. */ 10836 sound_score = soundalike_score(salword, 10837 tsalword); 10838 10839 /* Add a penalty for words in another region. */ 10840 if ((flags & WF_REGION) && (((unsigned)flags 10841 >> 16) & lp->lp_region) == 0) 10842 local_score = SCORE_REGION; 10843 else 10844 local_score = 0; 10845 sound_score += local_score; 10846 10847 if (sound_score < SCORE_MAXMAX) 10848 { 10849 char_u cword[MAXWLEN]; 10850 char_u *p; 10851 int score; 10852 10853 flags |= su->su_badflags; 10854 if (round == 1 && (flags & WF_CAPMASK) != 0) 10855 { 10856 /* Need to fix case according to 10857 * "flags". */ 10858 make_case_word(tword, cword, flags); 10859 p = cword; 10860 } 10861 else 10862 p = tword; 10863 10864 if (sps_flags & SPS_DOUBLE) 10865 add_suggestion(su, &su->su_sga, p, 10866 su->su_badlen, 10867 sound_score, 0, FALSE, 10868 lp->lp_sallang); 10869 else 10870 { 10871 /* Compute the score. */ 10872 score = spell_edit_score( 10873 su->su_badword, p) 10874 + local_score; 10875 if (sps_flags & SPS_BEST) 10876 /* give a bonus for the good word 10877 * sounding the same as the bad 10878 * word */ 10879 add_suggestion(su, &su->su_ga, p, 10880 su->su_badlen, 10881 RESCORE(score, sound_score), 10882 sound_score, TRUE, 10883 lp->lp_sallang); 10884 else 10885 add_suggestion(su, &su->su_ga, p, 10886 su->su_badlen, 10887 score + sound_score, 10888 0, FALSE, 10889 lp->lp_sallang); 10890 } 10891 } 10892 } 10893 10894 /* Skip over other NUL bytes. */ 10895 while (byts[n + 1] == 0) 10896 { 10897 ++n; 10898 ++curi[depth]; 10899 } 10900 } 10901 else 10902 { 10903 /* Normal char, go one level deeper. */ 10904 tword[depth++] = c; 10905 arridx[depth] = idxs[n]; 10906 curi[depth] = 1; 10907 } 10908 } 10909 } 10910 } 10911 } 10912 } 10913 } 10914 10915 /* 10916 * Copy "fword" to "cword", fixing case according to "flags". 10917 */ 10918 static void 10919 make_case_word(fword, cword, flags) 10920 char_u *fword; 10921 char_u *cword; 10922 int flags; 10923 { 10924 if (flags & WF_ALLCAP) 10925 /* Make it all upper-case */ 10926 allcap_copy(fword, cword); 10927 else if (flags & WF_ONECAP) 10928 /* Make the first letter upper-case */ 10929 onecap_copy(fword, cword, TRUE); 10930 else 10931 /* Use goodword as-is. */ 10932 STRCPY(cword, fword); 10933 } 10934 10935 /* 10936 * Use map string "map" for languages "lp". 10937 */ 10938 static void 10939 set_map_str(lp, map) 10940 slang_T *lp; 10941 char_u *map; 10942 { 10943 char_u *p; 10944 int headc = 0; 10945 int c; 10946 int i; 10947 10948 if (*map == NUL) 10949 { 10950 lp->sl_has_map = FALSE; 10951 return; 10952 } 10953 lp->sl_has_map = TRUE; 10954 10955 /* Init the array and hash table empty. */ 10956 for (i = 0; i < 256; ++i) 10957 lp->sl_map_array[i] = 0; 10958 #ifdef FEAT_MBYTE 10959 hash_init(&lp->sl_map_hash); 10960 #endif 10961 10962 /* 10963 * The similar characters are stored separated with slashes: 10964 * "aaa/bbb/ccc/". Fill sl_map_array[c] with the character before c and 10965 * before the same slash. For characters above 255 sl_map_hash is used. 10966 */ 10967 for (p = map; *p != NUL; ) 10968 { 10969 #ifdef FEAT_MBYTE 10970 c = mb_cptr2char_adv(&p); 10971 #else 10972 c = *p++; 10973 #endif 10974 if (c == '/') 10975 headc = 0; 10976 else 10977 { 10978 if (headc == 0) 10979 headc = c; 10980 10981 #ifdef FEAT_MBYTE 10982 /* Characters above 255 don't fit in sl_map_array[], put them in 10983 * the hash table. Each entry is the char, a NUL the headchar and 10984 * a NUL. */ 10985 if (c >= 256) 10986 { 10987 int cl = mb_char2len(c); 10988 int headcl = mb_char2len(headc); 10989 char_u *b; 10990 hash_T hash; 10991 hashitem_T *hi; 10992 10993 b = alloc((unsigned)(cl + headcl + 2)); 10994 if (b == NULL) 10995 return; 10996 mb_char2bytes(c, b); 10997 b[cl] = NUL; 10998 mb_char2bytes(headc, b + cl + 1); 10999 b[cl + 1 + headcl] = NUL; 11000 hash = hash_hash(b); 11001 hi = hash_lookup(&lp->sl_map_hash, b, hash); 11002 if (HASHITEM_EMPTY(hi)) 11003 hash_add_item(&lp->sl_map_hash, hi, b, hash); 11004 else 11005 { 11006 /* This should have been checked when generating the .spl 11007 * file. */ 11008 EMSG(_("E999: duplicate char in MAP entry")); 11009 vim_free(b); 11010 } 11011 } 11012 else 11013 #endif 11014 lp->sl_map_array[c] = headc; 11015 } 11016 } 11017 } 11018 11019 /* 11020 * Return TRUE if "c1" and "c2" are similar characters according to the MAP 11021 * lines in the .aff file. 11022 */ 11023 static int 11024 similar_chars(slang, c1, c2) 11025 slang_T *slang; 11026 int c1; 11027 int c2; 11028 { 11029 int m1, m2; 11030 #ifdef FEAT_MBYTE 11031 char_u buf[MB_MAXBYTES]; 11032 hashitem_T *hi; 11033 11034 if (c1 >= 256) 11035 { 11036 buf[mb_char2bytes(c1, buf)] = 0; 11037 hi = hash_find(&slang->sl_map_hash, buf); 11038 if (HASHITEM_EMPTY(hi)) 11039 m1 = 0; 11040 else 11041 m1 = mb_ptr2char(hi->hi_key + STRLEN(hi->hi_key) + 1); 11042 } 11043 else 11044 #endif 11045 m1 = slang->sl_map_array[c1]; 11046 if (m1 == 0) 11047 return FALSE; 11048 11049 11050 #ifdef FEAT_MBYTE 11051 if (c2 >= 256) 11052 { 11053 buf[mb_char2bytes(c2, buf)] = 0; 11054 hi = hash_find(&slang->sl_map_hash, buf); 11055 if (HASHITEM_EMPTY(hi)) 11056 m2 = 0; 11057 else 11058 m2 = mb_ptr2char(hi->hi_key + STRLEN(hi->hi_key) + 1); 11059 } 11060 else 11061 #endif 11062 m2 = slang->sl_map_array[c2]; 11063 11064 return m1 == m2; 11065 } 11066 11067 /* 11068 * Add a suggestion to the list of suggestions. 11069 * Do not add a duplicate suggestion or suggestions with a bad score. 11070 * When "use_score" is not zero it's used, otherwise the score is computed 11071 * with spell_edit_score(). 11072 */ 11073 static void 11074 add_suggestion(su, gap, goodword, badlen, score, altscore, had_bonus, slang) 11075 suginfo_T *su; 11076 garray_T *gap; 11077 char_u *goodword; 11078 int badlen; /* length of bad word used */ 11079 int score; 11080 int altscore; 11081 int had_bonus; /* value for st_had_bonus */ 11082 slang_T *slang; /* language for sound folding */ 11083 { 11084 suggest_T *stp; 11085 int i; 11086 char_u *p = NULL; 11087 int c = 0; 11088 11089 /* Check that the word wasn't banned. */ 11090 if (was_banned(su, goodword)) 11091 return; 11092 11093 /* If past "su_badlen" and the rest is identical stop at "su_badlen". 11094 * Remove the common part from "goodword". */ 11095 i = badlen - su->su_badlen; 11096 if (i > 0) 11097 { 11098 /* This assumes there was no case folding or it didn't change the 11099 * length... */ 11100 p = goodword + STRLEN(goodword) - i; 11101 if (p > goodword && STRNICMP(su->su_badptr + su->su_badlen, p, i) == 0) 11102 { 11103 badlen = su->su_badlen; 11104 c = *p; 11105 *p = NUL; 11106 } 11107 else 11108 p = NULL; 11109 } 11110 else if (i < 0) 11111 { 11112 /* When replacing part of the word check that we actually change 11113 * something. For "the the" a suggestion can be replacing the first 11114 * "the" with itself, since "the" wasn't banned. */ 11115 if (badlen == (int)STRLEN(goodword) 11116 && STRNCMP(su->su_badword, goodword, badlen) == 0) 11117 return; 11118 } 11119 11120 11121 if (score <= su->su_maxscore) 11122 { 11123 /* Check if the word is already there. Also check the length that is 11124 * being replaced "thes," -> "these" is a different suggestion from 11125 * "thes" -> "these". */ 11126 stp = &SUG(*gap, 0); 11127 for (i = gap->ga_len - 1; i >= 0; --i) 11128 if (STRCMP(stp[i].st_word, goodword) == 0 11129 && stp[i].st_orglen == badlen) 11130 { 11131 /* Found it. Remember the lowest score. */ 11132 if (stp[i].st_score > score) 11133 { 11134 stp[i].st_score = score; 11135 stp[i].st_altscore = altscore; 11136 stp[i].st_had_bonus = had_bonus; 11137 } 11138 if (stp[i].st_slang == NULL) 11139 stp[i].st_slang = slang; 11140 break; 11141 } 11142 11143 if (i < 0 && ga_grow(gap, 1) == OK) 11144 { 11145 /* Add a suggestion. */ 11146 stp = &SUG(*gap, gap->ga_len); 11147 stp->st_word = vim_strsave(goodword); 11148 if (stp->st_word != NULL) 11149 { 11150 stp->st_score = score; 11151 stp->st_altscore = altscore; 11152 stp->st_had_bonus = had_bonus; 11153 stp->st_orglen = badlen; 11154 stp->st_slang = slang; 11155 ++gap->ga_len; 11156 11157 /* If we have too many suggestions now, sort the list and keep 11158 * the best suggestions. */ 11159 if (gap->ga_len > SUG_MAX_COUNT(su)) 11160 su->su_maxscore = cleanup_suggestions(gap, su->su_maxscore, 11161 SUG_CLEAN_COUNT(su)); 11162 } 11163 } 11164 } 11165 11166 if (p != NULL) 11167 *p = c; /* restore "goodword" */ 11168 } 11169 11170 /* 11171 * Add a word to be banned. 11172 */ 11173 static void 11174 add_banned(su, word) 11175 suginfo_T *su; 11176 char_u *word; 11177 { 11178 char_u *s = vim_strsave(word); 11179 hash_T hash; 11180 hashitem_T *hi; 11181 11182 if (s != NULL) 11183 { 11184 hash = hash_hash(s); 11185 hi = hash_lookup(&su->su_banned, s, hash); 11186 if (HASHITEM_EMPTY(hi)) 11187 hash_add_item(&su->su_banned, hi, s, hash); 11188 else 11189 vim_free(s); 11190 } 11191 } 11192 11193 /* 11194 * Return TRUE if a word appears in the list of banned words. 11195 */ 11196 static int 11197 was_banned(su, word) 11198 suginfo_T *su; 11199 char_u *word; 11200 { 11201 hashitem_T *hi = hash_find(&su->su_banned, word); 11202 11203 return !HASHITEM_EMPTY(hi); 11204 } 11205 11206 /* 11207 * Free the banned words in "su". 11208 */ 11209 static void 11210 free_banned(su) 11211 suginfo_T *su; 11212 { 11213 int todo; 11214 hashitem_T *hi; 11215 11216 todo = su->su_banned.ht_used; 11217 for (hi = su->su_banned.ht_array; todo > 0; ++hi) 11218 { 11219 if (!HASHITEM_EMPTY(hi)) 11220 { 11221 vim_free(hi->hi_key); 11222 --todo; 11223 } 11224 } 11225 hash_clear(&su->su_banned); 11226 } 11227 11228 /* 11229 * Recompute the score if sound-folding is possible. This is slow, 11230 * thus only done for the final results. 11231 */ 11232 static void 11233 rescore_suggestions(su) 11234 suginfo_T *su; 11235 { 11236 langp_T *lp; 11237 suggest_T *stp; 11238 char_u sal_badword[MAXWLEN]; 11239 char_u sal_badword2[MAXWLEN]; 11240 int i; 11241 int lpi; 11242 slang_T *slang_first = NULL; 11243 slang_T *slang; 11244 11245 for (lpi = 0; lpi < curbuf->b_langp.ga_len; ++lpi) 11246 { 11247 lp = LANGP_ENTRY(curbuf->b_langp, lpi); 11248 if (lp->lp_slang->sl_sal.ga_len > 0) 11249 { 11250 /* soundfold the bad word */ 11251 slang_first = lp->lp_slang; 11252 spell_soundfold(slang_first, su->su_fbadword, TRUE, sal_badword); 11253 break; 11254 } 11255 } 11256 11257 if (slang_first != NULL) 11258 { 11259 for (i = 0; i < su->su_ga.ga_len; ++i) 11260 { 11261 /* Only rescore suggestions that have no sal score yet and do have 11262 * a language. */ 11263 stp = &SUG(su->su_ga, i); 11264 if (!stp->st_had_bonus && stp->st_slang != NULL) 11265 { 11266 slang = stp->st_slang; 11267 if (slang->sl_sal.ga_len > 0) 11268 { 11269 if (slang == slang_first) 11270 stp->st_altscore = stp_sal_score(stp, su, 11271 slang, sal_badword); 11272 else 11273 { 11274 spell_soundfold(slang, su->su_fbadword, 11275 TRUE, sal_badword2); 11276 stp->st_altscore = stp_sal_score(stp, su, 11277 slang, sal_badword2); 11278 } 11279 if (stp->st_altscore == SCORE_MAXMAX) 11280 stp->st_altscore = SCORE_BIG; 11281 stp->st_score = RESCORE(stp->st_score, stp->st_altscore); 11282 } 11283 } 11284 } 11285 } 11286 } 11287 11288 static int 11289 #ifdef __BORLANDC__ 11290 _RTLENTRYF 11291 #endif 11292 sug_compare __ARGS((const void *s1, const void *s2)); 11293 11294 /* 11295 * Function given to qsort() to sort the suggestions on st_score. 11296 */ 11297 static int 11298 #ifdef __BORLANDC__ 11299 _RTLENTRYF 11300 #endif 11301 sug_compare(s1, s2) 11302 const void *s1; 11303 const void *s2; 11304 { 11305 suggest_T *p1 = (suggest_T *)s1; 11306 suggest_T *p2 = (suggest_T *)s2; 11307 int n = p1->st_score - p2->st_score; 11308 11309 if (n == 0) 11310 return p1->st_altscore - p2->st_altscore; 11311 return n; 11312 } 11313 11314 /* 11315 * Cleanup the suggestions: 11316 * - Sort on score. 11317 * - Remove words that won't be displayed. 11318 * Returns the maximum score in the list or "maxscore" unmodified. 11319 */ 11320 static int 11321 cleanup_suggestions(gap, maxscore, keep) 11322 garray_T *gap; 11323 int maxscore; 11324 int keep; /* nr of suggestions to keep */ 11325 { 11326 suggest_T *stp = &SUG(*gap, 0); 11327 int i; 11328 11329 /* Sort the list. */ 11330 qsort(gap->ga_data, (size_t)gap->ga_len, sizeof(suggest_T), sug_compare); 11331 11332 /* Truncate the list to the number of suggestions that will be displayed. */ 11333 if (gap->ga_len > keep) 11334 { 11335 for (i = keep; i < gap->ga_len; ++i) 11336 vim_free(stp[i].st_word); 11337 gap->ga_len = keep; 11338 return stp[keep - 1].st_score; 11339 } 11340 return maxscore; 11341 } 11342 11343 #if defined(FEAT_EVAL) || defined(PROTO) 11344 /* 11345 * Soundfold a string, for soundfold(). 11346 * Result is in allocated memory, NULL for an error. 11347 */ 11348 char_u * 11349 eval_soundfold(word) 11350 char_u *word; 11351 { 11352 langp_T *lp; 11353 char_u sound[MAXWLEN]; 11354 int lpi; 11355 11356 if (curwin->w_p_spell && *curbuf->b_p_spl != NUL) 11357 /* Use the sound-folding of the first language that supports it. */ 11358 for (lpi = 0; lpi < curbuf->b_langp.ga_len; ++lpi) 11359 { 11360 lp = LANGP_ENTRY(curbuf->b_langp, lpi); 11361 if (lp->lp_slang->sl_sal.ga_len > 0) 11362 { 11363 /* soundfold the word */ 11364 spell_soundfold(lp->lp_slang, word, FALSE, sound); 11365 return vim_strsave(sound); 11366 } 11367 } 11368 11369 /* No language with sound folding, return word as-is. */ 11370 return vim_strsave(word); 11371 } 11372 #endif 11373 11374 /* 11375 * Turn "inword" into its sound-a-like equivalent in "res[MAXWLEN]". 11376 * 11377 * There are many ways to turn a word into a sound-a-like representation. The 11378 * oldest is Soundex (1918!). A nice overview can be found in "Approximate 11379 * swedish name matching - survey and test of different algorithms" by Klas 11380 * Erikson. 11381 * 11382 * We support two methods: 11383 * 1. SOFOFROM/SOFOTO do a simple character mapping. 11384 * 2. SAL items define a more advanced sound-folding (and much slower). 11385 */ 11386 static void 11387 spell_soundfold(slang, inword, folded, res) 11388 slang_T *slang; 11389 char_u *inword; 11390 int folded; /* "inword" is already case-folded */ 11391 char_u *res; 11392 { 11393 char_u fword[MAXWLEN]; 11394 char_u *word; 11395 11396 if (slang->sl_sofo) 11397 /* SOFOFROM and SOFOTO used */ 11398 spell_soundfold_sofo(slang, inword, res); 11399 else 11400 { 11401 /* SAL items used. Requires the word to be case-folded. */ 11402 if (folded) 11403 word = inword; 11404 else 11405 { 11406 (void)spell_casefold(inword, STRLEN(inword), fword, MAXWLEN); 11407 word = fword; 11408 } 11409 11410 #ifdef FEAT_MBYTE 11411 if (has_mbyte) 11412 spell_soundfold_wsal(slang, word, res); 11413 else 11414 #endif 11415 spell_soundfold_sal(slang, word, res); 11416 } 11417 } 11418 11419 /* 11420 * Perform sound folding of "inword" into "res" according to SOFOFROM and 11421 * SOFOTO lines. 11422 */ 11423 static void 11424 spell_soundfold_sofo(slang, inword, res) 11425 slang_T *slang; 11426 char_u *inword; 11427 char_u *res; 11428 { 11429 char_u *s; 11430 int ri = 0; 11431 int c; 11432 11433 #ifdef FEAT_MBYTE 11434 if (has_mbyte) 11435 { 11436 int prevc = 0; 11437 int *ip; 11438 11439 /* The sl_sal_first[] table contains the translation for chars up to 11440 * 255, sl_sal the rest. */ 11441 for (s = inword; *s != NUL; ) 11442 { 11443 c = mb_cptr2char_adv(&s); 11444 if (enc_utf8 ? utf_class(c) == 0 : vim_iswhite(c)) 11445 c = ' '; 11446 else if (c < 256) 11447 c = slang->sl_sal_first[c]; 11448 else 11449 { 11450 ip = ((int **)slang->sl_sal.ga_data)[c & 0xff]; 11451 if (ip == NULL) /* empty list, can't match */ 11452 c = NUL; 11453 else 11454 for (;;) /* find "c" in the list */ 11455 { 11456 if (*ip == 0) /* not found */ 11457 { 11458 c = NUL; 11459 break; 11460 } 11461 if (*ip == c) /* match! */ 11462 { 11463 c = ip[1]; 11464 break; 11465 } 11466 ip += 2; 11467 } 11468 } 11469 11470 if (c != NUL && c != prevc) 11471 { 11472 ri += mb_char2bytes(c, res + ri); 11473 if (ri + MB_MAXBYTES > MAXWLEN) 11474 break; 11475 prevc = c; 11476 } 11477 } 11478 } 11479 else 11480 #endif 11481 { 11482 /* The sl_sal_first[] table contains the translation. */ 11483 for (s = inword; (c = *s) != NUL; ++s) 11484 { 11485 if (vim_iswhite(c)) 11486 c = ' '; 11487 else 11488 c = slang->sl_sal_first[c]; 11489 if (c != NUL && (ri == 0 || res[ri - 1] != c)) 11490 res[ri++] = c; 11491 } 11492 } 11493 11494 res[ri] = NUL; 11495 } 11496 11497 static void 11498 spell_soundfold_sal(slang, inword, res) 11499 slang_T *slang; 11500 char_u *inword; 11501 char_u *res; 11502 { 11503 salitem_T *smp; 11504 char_u word[MAXWLEN]; 11505 char_u *s = inword; 11506 char_u *t; 11507 char_u *pf; 11508 int i, j, z; 11509 int reslen; 11510 int n, k = 0; 11511 int z0; 11512 int k0; 11513 int n0; 11514 int c; 11515 int pri; 11516 int p0 = -333; 11517 int c0; 11518 11519 /* Remove accents, if wanted. We actually remove all non-word characters. 11520 * But keep white space. We need a copy, the word may be changed here. */ 11521 if (slang->sl_rem_accents) 11522 { 11523 t = word; 11524 while (*s != NUL) 11525 { 11526 if (vim_iswhite(*s)) 11527 { 11528 *t++ = ' '; 11529 s = skipwhite(s); 11530 } 11531 else 11532 { 11533 if (spell_iswordp_nmw(s)) 11534 *t++ = *s; 11535 ++s; 11536 } 11537 } 11538 *t = NUL; 11539 } 11540 else 11541 STRCPY(word, s); 11542 11543 smp = (salitem_T *)slang->sl_sal.ga_data; 11544 11545 /* 11546 * This comes from Aspell phonet.cpp. Converted from C++ to C. 11547 * Changed to keep spaces. 11548 */ 11549 i = reslen = z = 0; 11550 while ((c = word[i]) != NUL) 11551 { 11552 /* Start with the first rule that has the character in the word. */ 11553 n = slang->sl_sal_first[c]; 11554 z0 = 0; 11555 11556 if (n >= 0) 11557 { 11558 /* check all rules for the same letter */ 11559 for (; (s = smp[n].sm_lead)[0] == c; ++n) 11560 { 11561 /* Quickly skip entries that don't match the word. Most 11562 * entries are less then three chars, optimize for that. */ 11563 k = smp[n].sm_leadlen; 11564 if (k > 1) 11565 { 11566 if (word[i + 1] != s[1]) 11567 continue; 11568 if (k > 2) 11569 { 11570 for (j = 2; j < k; ++j) 11571 if (word[i + j] != s[j]) 11572 break; 11573 if (j < k) 11574 continue; 11575 } 11576 } 11577 11578 if ((pf = smp[n].sm_oneof) != NULL) 11579 { 11580 /* Check for match with one of the chars in "sm_oneof". */ 11581 while (*pf != NUL && *pf != word[i + k]) 11582 ++pf; 11583 if (*pf == NUL) 11584 continue; 11585 ++k; 11586 } 11587 s = smp[n].sm_rules; 11588 pri = 5; /* default priority */ 11589 11590 p0 = *s; 11591 k0 = k; 11592 while (*s == '-' && k > 1) 11593 { 11594 k--; 11595 s++; 11596 } 11597 if (*s == '<') 11598 s++; 11599 if (VIM_ISDIGIT(*s)) 11600 { 11601 /* determine priority */ 11602 pri = *s - '0'; 11603 s++; 11604 } 11605 if (*s == '^' && *(s + 1) == '^') 11606 s++; 11607 11608 if (*s == NUL 11609 || (*s == '^' 11610 && (i == 0 || !(word[i - 1] == ' ' 11611 || spell_iswordp(word + i - 1, curbuf))) 11612 && (*(s + 1) != '$' 11613 || (!spell_iswordp(word + i + k0, curbuf)))) 11614 || (*s == '$' && i > 0 11615 && spell_iswordp(word + i - 1, curbuf) 11616 && (!spell_iswordp(word + i + k0, curbuf)))) 11617 { 11618 /* search for followup rules, if: */ 11619 /* followup and k > 1 and NO '-' in searchstring */ 11620 c0 = word[i + k - 1]; 11621 n0 = slang->sl_sal_first[c0]; 11622 11623 if (slang->sl_followup && k > 1 && n0 >= 0 11624 && p0 != '-' && word[i + k] != NUL) 11625 { 11626 /* test follow-up rule for "word[i + k]" */ 11627 for ( ; (s = smp[n0].sm_lead)[0] == c0; ++n0) 11628 { 11629 /* Quickly skip entries that don't match the word. 11630 * */ 11631 k0 = smp[n0].sm_leadlen; 11632 if (k0 > 1) 11633 { 11634 if (word[i + k] != s[1]) 11635 continue; 11636 if (k0 > 2) 11637 { 11638 pf = word + i + k + 1; 11639 for (j = 2; j < k0; ++j) 11640 if (*pf++ != s[j]) 11641 break; 11642 if (j < k0) 11643 continue; 11644 } 11645 } 11646 k0 += k - 1; 11647 11648 if ((pf = smp[n0].sm_oneof) != NULL) 11649 { 11650 /* Check for match with one of the chars in 11651 * "sm_oneof". */ 11652 while (*pf != NUL && *pf != word[i + k0]) 11653 ++pf; 11654 if (*pf == NUL) 11655 continue; 11656 ++k0; 11657 } 11658 11659 p0 = 5; 11660 s = smp[n0].sm_rules; 11661 while (*s == '-') 11662 { 11663 /* "k0" gets NOT reduced because 11664 * "if (k0 == k)" */ 11665 s++; 11666 } 11667 if (*s == '<') 11668 s++; 11669 if (VIM_ISDIGIT(*s)) 11670 { 11671 p0 = *s - '0'; 11672 s++; 11673 } 11674 11675 if (*s == NUL 11676 /* *s == '^' cuts */ 11677 || (*s == '$' 11678 && !spell_iswordp(word + i + k0, 11679 curbuf))) 11680 { 11681 if (k0 == k) 11682 /* this is just a piece of the string */ 11683 continue; 11684 11685 if (p0 < pri) 11686 /* priority too low */ 11687 continue; 11688 /* rule fits; stop search */ 11689 break; 11690 } 11691 } 11692 11693 if (p0 >= pri && smp[n0].sm_lead[0] == c0) 11694 continue; 11695 } 11696 11697 /* replace string */ 11698 s = smp[n].sm_to; 11699 if (s == NULL) 11700 s = (char_u *)""; 11701 pf = smp[n].sm_rules; 11702 p0 = (vim_strchr(pf, '<') != NULL) ? 1 : 0; 11703 if (p0 == 1 && z == 0) 11704 { 11705 /* rule with '<' is used */ 11706 if (reslen > 0 && *s != NUL && (res[reslen - 1] == c 11707 || res[reslen - 1] == *s)) 11708 reslen--; 11709 z0 = 1; 11710 z = 1; 11711 k0 = 0; 11712 while (*s != NUL && word[i + k0] != NUL) 11713 { 11714 word[i + k0] = *s; 11715 k0++; 11716 s++; 11717 } 11718 if (k > k0) 11719 mch_memmove(word + i + k0, word + i + k, 11720 STRLEN(word + i + k) + 1); 11721 11722 /* new "actual letter" */ 11723 c = word[i]; 11724 } 11725 else 11726 { 11727 /* no '<' rule used */ 11728 i += k - 1; 11729 z = 0; 11730 while (*s != NUL && s[1] != NUL && reslen < MAXWLEN) 11731 { 11732 if (reslen == 0 || res[reslen - 1] != *s) 11733 res[reslen++] = *s; 11734 s++; 11735 } 11736 /* new "actual letter" */ 11737 c = *s; 11738 if (strstr((char *)pf, "^^") != NULL) 11739 { 11740 if (c != NUL) 11741 res[reslen++] = c; 11742 mch_memmove(word, word + i + 1, 11743 STRLEN(word + i + 1) + 1); 11744 i = 0; 11745 z0 = 1; 11746 } 11747 } 11748 break; 11749 } 11750 } 11751 } 11752 else if (vim_iswhite(c)) 11753 { 11754 c = ' '; 11755 k = 1; 11756 } 11757 11758 if (z0 == 0) 11759 { 11760 if (k && !p0 && reslen < MAXWLEN && c != NUL 11761 && (!slang->sl_collapse || reslen == 0 11762 || res[reslen - 1] != c)) 11763 /* condense only double letters */ 11764 res[reslen++] = c; 11765 11766 i++; 11767 z = 0; 11768 k = 0; 11769 } 11770 } 11771 11772 res[reslen] = NUL; 11773 } 11774 11775 #ifdef FEAT_MBYTE 11776 /* 11777 * Turn "inword" into its sound-a-like equivalent in "res[MAXWLEN]". 11778 * Multi-byte version of spell_soundfold(). 11779 */ 11780 static void 11781 spell_soundfold_wsal(slang, inword, res) 11782 slang_T *slang; 11783 char_u *inword; 11784 char_u *res; 11785 { 11786 salitem_T *smp = (salitem_T *)slang->sl_sal.ga_data; 11787 int word[MAXWLEN]; 11788 int wres[MAXWLEN]; 11789 int l; 11790 char_u *s; 11791 int *ws; 11792 char_u *t; 11793 int *pf; 11794 int i, j, z; 11795 int reslen; 11796 int n, k = 0; 11797 int z0; 11798 int k0; 11799 int n0; 11800 int c; 11801 int pri; 11802 int p0 = -333; 11803 int c0; 11804 int did_white = FALSE; 11805 11806 /* 11807 * Convert the multi-byte string to a wide-character string. 11808 * Remove accents, if wanted. We actually remove all non-word characters. 11809 * But keep white space. 11810 */ 11811 n = 0; 11812 for (s = inword; *s != NUL; ) 11813 { 11814 t = s; 11815 c = mb_cptr2char_adv(&s); 11816 if (slang->sl_rem_accents) 11817 { 11818 if (enc_utf8 ? utf_class(c) == 0 : vim_iswhite(c)) 11819 { 11820 if (did_white) 11821 continue; 11822 c = ' '; 11823 did_white = TRUE; 11824 } 11825 else 11826 { 11827 did_white = FALSE; 11828 if (!spell_iswordp_nmw(t)) 11829 continue; 11830 } 11831 } 11832 word[n++] = c; 11833 } 11834 word[n] = NUL; 11835 11836 /* 11837 * This comes from Aspell phonet.cpp. 11838 * Converted from C++ to C. Added support for multi-byte chars. 11839 * Changed to keep spaces. 11840 */ 11841 i = reslen = z = 0; 11842 while ((c = word[i]) != NUL) 11843 { 11844 /* Start with the first rule that has the character in the word. */ 11845 n = slang->sl_sal_first[c & 0xff]; 11846 z0 = 0; 11847 11848 if (n >= 0) 11849 { 11850 /* check all rules for the same index byte */ 11851 for (; ((ws = smp[n].sm_lead_w)[0] & 0xff) == (c & 0xff); ++n) 11852 { 11853 /* Quickly skip entries that don't match the word. Most 11854 * entries are less then three chars, optimize for that. */ 11855 if (c != ws[0]) 11856 continue; 11857 k = smp[n].sm_leadlen; 11858 if (k > 1) 11859 { 11860 if (word[i + 1] != ws[1]) 11861 continue; 11862 if (k > 2) 11863 { 11864 for (j = 2; j < k; ++j) 11865 if (word[i + j] != ws[j]) 11866 break; 11867 if (j < k) 11868 continue; 11869 } 11870 } 11871 11872 if ((pf = smp[n].sm_oneof_w) != NULL) 11873 { 11874 /* Check for match with one of the chars in "sm_oneof". */ 11875 while (*pf != NUL && *pf != word[i + k]) 11876 ++pf; 11877 if (*pf == NUL) 11878 continue; 11879 ++k; 11880 } 11881 s = smp[n].sm_rules; 11882 pri = 5; /* default priority */ 11883 11884 p0 = *s; 11885 k0 = k; 11886 while (*s == '-' && k > 1) 11887 { 11888 k--; 11889 s++; 11890 } 11891 if (*s == '<') 11892 s++; 11893 if (VIM_ISDIGIT(*s)) 11894 { 11895 /* determine priority */ 11896 pri = *s - '0'; 11897 s++; 11898 } 11899 if (*s == '^' && *(s + 1) == '^') 11900 s++; 11901 11902 if (*s == NUL 11903 || (*s == '^' 11904 && (i == 0 || !(word[i - 1] == ' ' 11905 || spell_iswordp_w(word + i - 1, curbuf))) 11906 && (*(s + 1) != '$' 11907 || (!spell_iswordp_w(word + i + k0, curbuf)))) 11908 || (*s == '$' && i > 0 11909 && spell_iswordp_w(word + i - 1, curbuf) 11910 && (!spell_iswordp_w(word + i + k0, curbuf)))) 11911 { 11912 /* search for followup rules, if: */ 11913 /* followup and k > 1 and NO '-' in searchstring */ 11914 c0 = word[i + k - 1]; 11915 n0 = slang->sl_sal_first[c0 & 0xff]; 11916 11917 if (slang->sl_followup && k > 1 && n0 >= 0 11918 && p0 != '-' && word[i + k] != NUL) 11919 { 11920 /* Test follow-up rule for "word[i + k]"; loop over 11921 * all entries with the same index byte. */ 11922 for ( ; ((ws = smp[n0].sm_lead_w)[0] & 0xff) 11923 == (c0 & 0xff); ++n0) 11924 { 11925 /* Quickly skip entries that don't match the word. 11926 */ 11927 if (c0 != ws[0]) 11928 continue; 11929 k0 = smp[n0].sm_leadlen; 11930 if (k0 > 1) 11931 { 11932 if (word[i + k] != ws[1]) 11933 continue; 11934 if (k0 > 2) 11935 { 11936 pf = word + i + k + 1; 11937 for (j = 2; j < k0; ++j) 11938 if (*pf++ != ws[j]) 11939 break; 11940 if (j < k0) 11941 continue; 11942 } 11943 } 11944 k0 += k - 1; 11945 11946 if ((pf = smp[n0].sm_oneof_w) != NULL) 11947 { 11948 /* Check for match with one of the chars in 11949 * "sm_oneof". */ 11950 while (*pf != NUL && *pf != word[i + k0]) 11951 ++pf; 11952 if (*pf == NUL) 11953 continue; 11954 ++k0; 11955 } 11956 11957 p0 = 5; 11958 s = smp[n0].sm_rules; 11959 while (*s == '-') 11960 { 11961 /* "k0" gets NOT reduced because 11962 * "if (k0 == k)" */ 11963 s++; 11964 } 11965 if (*s == '<') 11966 s++; 11967 if (VIM_ISDIGIT(*s)) 11968 { 11969 p0 = *s - '0'; 11970 s++; 11971 } 11972 11973 if (*s == NUL 11974 /* *s == '^' cuts */ 11975 || (*s == '$' 11976 && !spell_iswordp_w(word + i + k0, 11977 curbuf))) 11978 { 11979 if (k0 == k) 11980 /* this is just a piece of the string */ 11981 continue; 11982 11983 if (p0 < pri) 11984 /* priority too low */ 11985 continue; 11986 /* rule fits; stop search */ 11987 break; 11988 } 11989 } 11990 11991 if (p0 >= pri && (smp[n0].sm_lead_w[0] & 0xff) 11992 == (c0 & 0xff)) 11993 continue; 11994 } 11995 11996 /* replace string */ 11997 ws = smp[n].sm_to_w; 11998 s = smp[n].sm_rules; 11999 p0 = (vim_strchr(s, '<') != NULL) ? 1 : 0; 12000 if (p0 == 1 && z == 0) 12001 { 12002 /* rule with '<' is used */ 12003 if (reslen > 0 && ws != NULL && *ws != NUL 12004 && (wres[reslen - 1] == c 12005 || wres[reslen - 1] == *ws)) 12006 reslen--; 12007 z0 = 1; 12008 z = 1; 12009 k0 = 0; 12010 if (ws != NULL) 12011 while (*ws != NUL && word[i + k0] != NUL) 12012 { 12013 word[i + k0] = *ws; 12014 k0++; 12015 ws++; 12016 } 12017 if (k > k0) 12018 mch_memmove(word + i + k0, word + i + k, 12019 sizeof(int) * (STRLEN(word + i + k) + 1)); 12020 12021 /* new "actual letter" */ 12022 c = word[i]; 12023 } 12024 else 12025 { 12026 /* no '<' rule used */ 12027 i += k - 1; 12028 z = 0; 12029 if (ws != NULL) 12030 while (*ws != NUL && ws[1] != NUL 12031 && reslen < MAXWLEN) 12032 { 12033 if (reslen == 0 || wres[reslen - 1] != *ws) 12034 wres[reslen++] = *ws; 12035 ws++; 12036 } 12037 /* new "actual letter" */ 12038 if (ws == NULL) 12039 c = NUL; 12040 else 12041 c = *ws; 12042 if (strstr((char *)s, "^^") != NULL) 12043 { 12044 if (c != NUL) 12045 wres[reslen++] = c; 12046 mch_memmove(word, word + i + 1, 12047 sizeof(int) * (STRLEN(word + i + 1) + 1)); 12048 i = 0; 12049 z0 = 1; 12050 } 12051 } 12052 break; 12053 } 12054 } 12055 } 12056 else if (vim_iswhite(c)) 12057 { 12058 c = ' '; 12059 k = 1; 12060 } 12061 12062 if (z0 == 0) 12063 { 12064 if (k && !p0 && reslen < MAXWLEN && c != NUL 12065 && (!slang->sl_collapse || reslen == 0 12066 || wres[reslen - 1] != c)) 12067 /* condense only double letters */ 12068 wres[reslen++] = c; 12069 12070 i++; 12071 z = 0; 12072 k = 0; 12073 } 12074 } 12075 12076 /* Convert wide characters in "wres" to a multi-byte string in "res". */ 12077 l = 0; 12078 for (n = 0; n < reslen; ++n) 12079 { 12080 l += mb_char2bytes(wres[n], res + l); 12081 if (l + MB_MAXBYTES > MAXWLEN) 12082 break; 12083 } 12084 res[l] = NUL; 12085 } 12086 #endif 12087 12088 /* 12089 * Compute a score for two sound-a-like words. 12090 * This permits up to two inserts/deletes/swaps/etc. to keep things fast. 12091 * Instead of a generic loop we write out the code. That keeps it fast by 12092 * avoiding checks that will not be possible. 12093 */ 12094 static int 12095 soundalike_score(goodstart, badstart) 12096 char_u *goodstart; /* sound-folded good word */ 12097 char_u *badstart; /* sound-folded bad word */ 12098 { 12099 char_u *goodsound = goodstart; 12100 char_u *badsound = badstart; 12101 int goodlen; 12102 int badlen; 12103 int n; 12104 char_u *pl, *ps; 12105 char_u *pl2, *ps2; 12106 int score = 0; 12107 12108 /* adding/inserting "*" at the start (word starts with vowel) shouldn't be 12109 * counted so much, vowels halfway the word aren't counted at all. */ 12110 if ((*badsound == '*' || *goodsound == '*') && *badsound != *goodsound) 12111 { 12112 score = SCORE_DEL / 2; 12113 if (*badsound == '*') 12114 ++badsound; 12115 else 12116 ++goodsound; 12117 } 12118 12119 goodlen = STRLEN(goodsound); 12120 badlen = STRLEN(badsound); 12121 12122 /* Return quickly if the lenghts are too different to be fixed by two 12123 * changes. */ 12124 n = goodlen - badlen; 12125 if (n < -2 || n > 2) 12126 return SCORE_MAXMAX; 12127 12128 if (n > 0) 12129 { 12130 pl = goodsound; /* goodsound is longest */ 12131 ps = badsound; 12132 } 12133 else 12134 { 12135 pl = badsound; /* badsound is longest */ 12136 ps = goodsound; 12137 } 12138 12139 /* Skip over the identical part. */ 12140 while (*pl == *ps && *pl != NUL) 12141 { 12142 ++pl; 12143 ++ps; 12144 } 12145 12146 switch (n) 12147 { 12148 case -2: 12149 case 2: 12150 /* 12151 * Must delete two characters from "pl". 12152 */ 12153 ++pl; /* first delete */ 12154 while (*pl == *ps) 12155 { 12156 ++pl; 12157 ++ps; 12158 } 12159 /* strings must be equal after second delete */ 12160 if (STRCMP(pl + 1, ps) == 0) 12161 return score + SCORE_DEL * 2; 12162 12163 /* Failed to compare. */ 12164 break; 12165 12166 case -1: 12167 case 1: 12168 /* 12169 * Minimal one delete from "pl" required. 12170 */ 12171 12172 /* 1: delete */ 12173 pl2 = pl + 1; 12174 ps2 = ps; 12175 while (*pl2 == *ps2) 12176 { 12177 if (*pl2 == NUL) /* reached the end */ 12178 return score + SCORE_DEL; 12179 ++pl2; 12180 ++ps2; 12181 } 12182 12183 /* 2: delete then swap, then rest must be equal */ 12184 if (pl2[0] == ps2[1] && pl2[1] == ps2[0] 12185 && STRCMP(pl2 + 2, ps2 + 2) == 0) 12186 return score + SCORE_DEL + SCORE_SWAP; 12187 12188 /* 3: delete then substitute, then the rest must be equal */ 12189 if (STRCMP(pl2 + 1, ps2 + 1) == 0) 12190 return score + SCORE_DEL + SCORE_SUBST; 12191 12192 /* 4: first swap then delete */ 12193 if (pl[0] == ps[1] && pl[1] == ps[0]) 12194 { 12195 pl2 = pl + 2; /* swap, skip two chars */ 12196 ps2 = ps + 2; 12197 while (*pl2 == *ps2) 12198 { 12199 ++pl2; 12200 ++ps2; 12201 } 12202 /* delete a char and then strings must be equal */ 12203 if (STRCMP(pl2 + 1, ps2) == 0) 12204 return score + SCORE_SWAP + SCORE_DEL; 12205 } 12206 12207 /* 5: first substitute then delete */ 12208 pl2 = pl + 1; /* substitute, skip one char */ 12209 ps2 = ps + 1; 12210 while (*pl2 == *ps2) 12211 { 12212 ++pl2; 12213 ++ps2; 12214 } 12215 /* delete a char and then strings must be equal */ 12216 if (STRCMP(pl2 + 1, ps2) == 0) 12217 return score + SCORE_SUBST + SCORE_DEL; 12218 12219 /* Failed to compare. */ 12220 break; 12221 12222 case 0: 12223 /* 12224 * Lenghts are equal, thus changes must result in same length: An 12225 * insert is only possible in combination with a delete. 12226 * 1: check if for identical strings 12227 */ 12228 if (*pl == NUL) 12229 return score; 12230 12231 /* 2: swap */ 12232 if (pl[0] == ps[1] && pl[1] == ps[0]) 12233 { 12234 pl2 = pl + 2; /* swap, skip two chars */ 12235 ps2 = ps + 2; 12236 while (*pl2 == *ps2) 12237 { 12238 if (*pl2 == NUL) /* reached the end */ 12239 return score + SCORE_SWAP; 12240 ++pl2; 12241 ++ps2; 12242 } 12243 /* 3: swap and swap again */ 12244 if (pl2[0] == ps2[1] && pl2[1] == ps2[0] 12245 && STRCMP(pl2 + 2, ps2 + 2) == 0) 12246 return score + SCORE_SWAP + SCORE_SWAP; 12247 12248 /* 4: swap and substitute */ 12249 if (STRCMP(pl2 + 1, ps2 + 1) == 0) 12250 return score + SCORE_SWAP + SCORE_SUBST; 12251 } 12252 12253 /* 5: substitute */ 12254 pl2 = pl + 1; 12255 ps2 = ps + 1; 12256 while (*pl2 == *ps2) 12257 { 12258 if (*pl2 == NUL) /* reached the end */ 12259 return score + SCORE_SUBST; 12260 ++pl2; 12261 ++ps2; 12262 } 12263 12264 /* 6: substitute and swap */ 12265 if (pl2[0] == ps2[1] && pl2[1] == ps2[0] 12266 && STRCMP(pl2 + 2, ps2 + 2) == 0) 12267 return score + SCORE_SUBST + SCORE_SWAP; 12268 12269 /* 7: substitute and substitute */ 12270 if (STRCMP(pl2 + 1, ps2 + 1) == 0) 12271 return score + SCORE_SUBST + SCORE_SUBST; 12272 12273 /* 8: insert then delete */ 12274 pl2 = pl; 12275 ps2 = ps + 1; 12276 while (*pl2 == *ps2) 12277 { 12278 ++pl2; 12279 ++ps2; 12280 } 12281 if (STRCMP(pl2 + 1, ps2) == 0) 12282 return score + SCORE_INS + SCORE_DEL; 12283 12284 /* 9: delete then insert */ 12285 pl2 = pl + 1; 12286 ps2 = ps; 12287 while (*pl2 == *ps2) 12288 { 12289 ++pl2; 12290 ++ps2; 12291 } 12292 if (STRCMP(pl2, ps2 + 1) == 0) 12293 return score + SCORE_INS + SCORE_DEL; 12294 12295 /* Failed to compare. */ 12296 break; 12297 } 12298 12299 return SCORE_MAXMAX; 12300 } 12301 12302 /* 12303 * Compute the "edit distance" to turn "badword" into "goodword". The less 12304 * deletes/inserts/substitutes/swaps are required the lower the score. 12305 * 12306 * The algorithm is described by Du and Chang, 1992. 12307 * The implementation of the algorithm comes from Aspell editdist.cpp, 12308 * edit_distance(). It has been converted from C++ to C and modified to 12309 * support multi-byte characters. 12310 */ 12311 static int 12312 spell_edit_score(badword, goodword) 12313 char_u *badword; 12314 char_u *goodword; 12315 { 12316 int *cnt; 12317 int badlen, goodlen; /* lenghts including NUL */ 12318 int j, i; 12319 int t; 12320 int bc, gc; 12321 int pbc, pgc; 12322 #ifdef FEAT_MBYTE 12323 char_u *p; 12324 int wbadword[MAXWLEN]; 12325 int wgoodword[MAXWLEN]; 12326 12327 if (has_mbyte) 12328 { 12329 /* Get the characters from the multi-byte strings and put them in an 12330 * int array for easy access. */ 12331 for (p = badword, badlen = 0; *p != NUL; ) 12332 wbadword[badlen++] = mb_cptr2char_adv(&p); 12333 wbadword[badlen++] = 0; 12334 for (p = goodword, goodlen = 0; *p != NUL; ) 12335 wgoodword[goodlen++] = mb_cptr2char_adv(&p); 12336 wgoodword[goodlen++] = 0; 12337 } 12338 else 12339 #endif 12340 { 12341 badlen = STRLEN(badword) + 1; 12342 goodlen = STRLEN(goodword) + 1; 12343 } 12344 12345 /* We use "cnt" as an array: CNT(badword_idx, goodword_idx). */ 12346 #define CNT(a, b) cnt[(a) + (b) * (badlen + 1)] 12347 cnt = (int *)lalloc((long_u)(sizeof(int) * (badlen + 1) * (goodlen + 1)), 12348 TRUE); 12349 if (cnt == NULL) 12350 return 0; /* out of memory */ 12351 12352 CNT(0, 0) = 0; 12353 for (j = 1; j <= goodlen; ++j) 12354 CNT(0, j) = CNT(0, j - 1) + SCORE_DEL; 12355 12356 for (i = 1; i <= badlen; ++i) 12357 { 12358 CNT(i, 0) = CNT(i - 1, 0) + SCORE_INS; 12359 for (j = 1; j <= goodlen; ++j) 12360 { 12361 #ifdef FEAT_MBYTE 12362 if (has_mbyte) 12363 { 12364 bc = wbadword[i - 1]; 12365 gc = wgoodword[j - 1]; 12366 } 12367 else 12368 #endif 12369 { 12370 bc = badword[i - 1]; 12371 gc = goodword[j - 1]; 12372 } 12373 if (bc == gc) 12374 CNT(i, j) = CNT(i - 1, j - 1); 12375 else 12376 { 12377 /* Use a better score when there is only a case difference. */ 12378 if (SPELL_TOFOLD(bc) == SPELL_TOFOLD(gc)) 12379 CNT(i, j) = SCORE_ICASE + CNT(i - 1, j - 1); 12380 else 12381 CNT(i, j) = SCORE_SUBST + CNT(i - 1, j - 1); 12382 12383 if (i > 1 && j > 1) 12384 { 12385 #ifdef FEAT_MBYTE 12386 if (has_mbyte) 12387 { 12388 pbc = wbadword[i - 2]; 12389 pgc = wgoodword[j - 2]; 12390 } 12391 else 12392 #endif 12393 { 12394 pbc = badword[i - 2]; 12395 pgc = goodword[j - 2]; 12396 } 12397 if (bc == pgc && pbc == gc) 12398 { 12399 t = SCORE_SWAP + CNT(i - 2, j - 2); 12400 if (t < CNT(i, j)) 12401 CNT(i, j) = t; 12402 } 12403 } 12404 t = SCORE_DEL + CNT(i - 1, j); 12405 if (t < CNT(i, j)) 12406 CNT(i, j) = t; 12407 t = SCORE_INS + CNT(i, j - 1); 12408 if (t < CNT(i, j)) 12409 CNT(i, j) = t; 12410 } 12411 } 12412 } 12413 12414 i = CNT(badlen - 1, goodlen - 1); 12415 vim_free(cnt); 12416 return i; 12417 } 12418 12419 /* 12420 * ":spelldump" 12421 */ 12422 /*ARGSUSED*/ 12423 void 12424 ex_spelldump(eap) 12425 exarg_T *eap; 12426 { 12427 buf_T *buf = curbuf; 12428 langp_T *lp; 12429 slang_T *slang; 12430 idx_T arridx[MAXWLEN]; 12431 int curi[MAXWLEN]; 12432 char_u word[MAXWLEN]; 12433 int c; 12434 char_u *byts; 12435 idx_T *idxs; 12436 linenr_T lnum = 0; 12437 int round; 12438 int depth; 12439 int n; 12440 int flags; 12441 char_u *region_names = NULL; /* region names being used */ 12442 int do_region = TRUE; /* dump region names and numbers */ 12443 char_u *p; 12444 int lpi; 12445 12446 if (no_spell_checking(curwin)) 12447 return; 12448 12449 /* Create a new empty buffer by splitting the window. */ 12450 do_cmdline_cmd((char_u *)"new"); 12451 if (!bufempty() || !buf_valid(buf)) 12452 return; 12453 12454 /* Find out if we can support regions: All languages must support the same 12455 * regions or none at all. */ 12456 for (lpi = 0; lpi < buf->b_langp.ga_len; ++lpi) 12457 { 12458 lp = LANGP_ENTRY(buf->b_langp, lpi); 12459 p = lp->lp_slang->sl_regions; 12460 if (p[0] != 0) 12461 { 12462 if (region_names == NULL) /* first language with regions */ 12463 region_names = p; 12464 else if (STRCMP(region_names, p) != 0) 12465 { 12466 do_region = FALSE; /* region names are different */ 12467 break; 12468 } 12469 } 12470 } 12471 12472 if (do_region && region_names != NULL) 12473 { 12474 vim_snprintf((char *)IObuff, IOSIZE, "/regions=%s", region_names); 12475 ml_append(lnum++, IObuff, (colnr_T)0, FALSE); 12476 } 12477 else 12478 do_region = FALSE; 12479 12480 /* 12481 * Loop over all files loaded for the entries in 'spelllang'. 12482 */ 12483 for (lpi = 0; lpi < buf->b_langp.ga_len; ++lpi) 12484 { 12485 lp = LANGP_ENTRY(buf->b_langp, lpi); 12486 slang = lp->lp_slang; 12487 if (slang->sl_fbyts == NULL) /* reloading failed */ 12488 continue; 12489 12490 vim_snprintf((char *)IObuff, IOSIZE, "# file: %s", slang->sl_fname); 12491 ml_append(lnum++, IObuff, (colnr_T)0, FALSE); 12492 12493 /* round 1: case-folded tree 12494 * round 2: keep-case tree */ 12495 for (round = 1; round <= 2; ++round) 12496 { 12497 if (round == 1) 12498 { 12499 byts = slang->sl_fbyts; 12500 idxs = slang->sl_fidxs; 12501 } 12502 else 12503 { 12504 byts = slang->sl_kbyts; 12505 idxs = slang->sl_kidxs; 12506 } 12507 if (byts == NULL) 12508 continue; /* array is empty */ 12509 12510 depth = 0; 12511 arridx[0] = 0; 12512 curi[0] = 1; 12513 while (depth >= 0 && !got_int) 12514 { 12515 if (curi[depth] > byts[arridx[depth]]) 12516 { 12517 /* Done all bytes at this node, go up one level. */ 12518 --depth; 12519 line_breakcheck(); 12520 } 12521 else 12522 { 12523 /* Do one more byte at this node. */ 12524 n = arridx[depth] + curi[depth]; 12525 ++curi[depth]; 12526 c = byts[n]; 12527 if (c == 0) 12528 { 12529 /* End of word, deal with the word. 12530 * Don't use keep-case words in the fold-case tree, 12531 * they will appear in the keep-case tree. 12532 * Only use the word when the region matches. */ 12533 flags = (int)idxs[n]; 12534 if ((round == 2 || (flags & WF_KEEPCAP) == 0) 12535 && (flags & WF_NEEDCOMP) == 0 12536 && (do_region 12537 || (flags & WF_REGION) == 0 12538 || (((unsigned)flags >> 16) 12539 & lp->lp_region) != 0)) 12540 { 12541 word[depth] = NUL; 12542 if (!do_region) 12543 flags &= ~WF_REGION; 12544 12545 /* Dump the basic word if there is no prefix or 12546 * when it's the first one. */ 12547 c = (unsigned)flags >> 24; 12548 if (c == 0 || curi[depth] == 2) 12549 dump_word(word, round, flags, lnum++); 12550 12551 /* Apply the prefix, if there is one. */ 12552 if (c != 0) 12553 lnum = dump_prefixes(slang, word, round, 12554 flags, lnum); 12555 } 12556 } 12557 else 12558 { 12559 /* Normal char, go one level deeper. */ 12560 word[depth++] = c; 12561 arridx[depth] = idxs[n]; 12562 curi[depth] = 1; 12563 } 12564 } 12565 } 12566 } 12567 } 12568 12569 /* Delete the empty line that we started with. */ 12570 if (curbuf->b_ml.ml_line_count > 1) 12571 ml_delete(curbuf->b_ml.ml_line_count, FALSE); 12572 12573 redraw_later(NOT_VALID); 12574 } 12575 12576 /* 12577 * Dump one word: apply case modifications and append a line to the buffer. 12578 */ 12579 static void 12580 dump_word(word, round, flags, lnum) 12581 char_u *word; 12582 int round; 12583 int flags; 12584 linenr_T lnum; 12585 { 12586 int keepcap = FALSE; 12587 char_u *p; 12588 char_u cword[MAXWLEN]; 12589 char_u badword[MAXWLEN + 10]; 12590 int i; 12591 12592 if (round == 1 && (flags & WF_CAPMASK) != 0) 12593 { 12594 /* Need to fix case according to "flags". */ 12595 make_case_word(word, cword, flags); 12596 p = cword; 12597 } 12598 else 12599 { 12600 p = word; 12601 if (round == 2 && ((captype(word, NULL) & WF_KEEPCAP) == 0 12602 || (flags & WF_FIXCAP) != 0)) 12603 keepcap = TRUE; 12604 } 12605 12606 /* Add flags and regions after a slash. */ 12607 if ((flags & (WF_BANNED | WF_RARE | WF_REGION)) || keepcap) 12608 { 12609 STRCPY(badword, p); 12610 STRCAT(badword, "/"); 12611 if (keepcap) 12612 STRCAT(badword, "="); 12613 if (flags & WF_BANNED) 12614 STRCAT(badword, "!"); 12615 else if (flags & WF_RARE) 12616 STRCAT(badword, "?"); 12617 if (flags & WF_REGION) 12618 for (i = 0; i < 7; ++i) 12619 if (flags & (0x10000 << i)) 12620 sprintf((char *)badword + STRLEN(badword), "%d", i + 1); 12621 p = badword; 12622 } 12623 12624 ml_append(lnum, p, (colnr_T)0, FALSE); 12625 } 12626 12627 /* 12628 * For ":spelldump": Find matching prefixes for "word". Prepend each to 12629 * "word" and append a line to the buffer. 12630 * Return the updated line number. 12631 */ 12632 static linenr_T 12633 dump_prefixes(slang, word, round, flags, startlnum) 12634 slang_T *slang; 12635 char_u *word; /* case-folded word */ 12636 int round; 12637 int flags; /* flags with prefix ID */ 12638 linenr_T startlnum; 12639 { 12640 idx_T arridx[MAXWLEN]; 12641 int curi[MAXWLEN]; 12642 char_u prefix[MAXWLEN]; 12643 char_u word_up[MAXWLEN]; 12644 int has_word_up = FALSE; 12645 int c; 12646 char_u *byts; 12647 idx_T *idxs; 12648 linenr_T lnum = startlnum; 12649 int depth; 12650 int n; 12651 int len; 12652 int i; 12653 12654 /* if the word starts with a lower-case letter make the word with an 12655 * upper-case letter in word_up[]. */ 12656 c = PTR2CHAR(word); 12657 if (SPELL_TOUPPER(c) != c) 12658 { 12659 onecap_copy(word, word_up, TRUE); 12660 has_word_up = TRUE; 12661 } 12662 12663 byts = slang->sl_pbyts; 12664 idxs = slang->sl_pidxs; 12665 if (byts != NULL) /* array not is empty */ 12666 { 12667 /* 12668 * Loop over all prefixes, building them byte-by-byte in prefix[]. 12669 * When at the end of a prefix check that it supports "flags". 12670 */ 12671 depth = 0; 12672 arridx[0] = 0; 12673 curi[0] = 1; 12674 while (depth >= 0 && !got_int) 12675 { 12676 n = arridx[depth]; 12677 len = byts[n]; 12678 if (curi[depth] > len) 12679 { 12680 /* Done all bytes at this node, go up one level. */ 12681 --depth; 12682 line_breakcheck(); 12683 } 12684 else 12685 { 12686 /* Do one more byte at this node. */ 12687 n += curi[depth]; 12688 ++curi[depth]; 12689 c = byts[n]; 12690 if (c == 0) 12691 { 12692 /* End of prefix, find out how many IDs there are. */ 12693 for (i = 1; i < len; ++i) 12694 if (byts[n + i] != 0) 12695 break; 12696 curi[depth] += i - 1; 12697 12698 c = valid_word_prefix(i, n, flags, word, slang, FALSE); 12699 if (c != 0) 12700 { 12701 vim_strncpy(prefix + depth, word, MAXWLEN - depth - 1); 12702 dump_word(prefix, round, 12703 (c & WF_RAREPFX) ? (flags | WF_RARE) 12704 : flags, lnum++); 12705 } 12706 12707 /* Check for prefix that matches the word when the 12708 * first letter is upper-case, but only if the prefix has 12709 * a condition. */ 12710 if (has_word_up) 12711 { 12712 c = valid_word_prefix(i, n, flags, word_up, slang, 12713 TRUE); 12714 if (c != 0) 12715 { 12716 vim_strncpy(prefix + depth, word_up, 12717 MAXWLEN - depth - 1); 12718 dump_word(prefix, round, 12719 (c & WF_RAREPFX) ? (flags | WF_RARE) 12720 : flags, lnum++); 12721 } 12722 } 12723 } 12724 else 12725 { 12726 /* Normal char, go one level deeper. */ 12727 prefix[depth++] = c; 12728 arridx[depth] = idxs[n]; 12729 curi[depth] = 1; 12730 } 12731 } 12732 } 12733 } 12734 12735 return lnum; 12736 } 12737 12738 /* 12739 * Move "p" to end of word. 12740 */ 12741 char_u * 12742 spell_to_word_end(start, buf) 12743 char_u *start; 12744 buf_T *buf; 12745 { 12746 char_u *p = start; 12747 12748 while (*p != NUL && spell_iswordp(p, buf)) 12749 mb_ptr_adv(p); 12750 return p; 12751 } 12752 12753 #if defined(FEAT_INS_EXPAND) || defined(PROTO) 12754 static int spell_expand_need_cap; 12755 12756 /* 12757 * Find start of the word in front of the cursor. We don't check if it is 12758 * badly spelled, with completion we can only change the word in front of the 12759 * cursor. 12760 * Used for Insert mode completion CTRL-X ?. 12761 * Returns the column number of the word. 12762 */ 12763 int 12764 spell_word_start(startcol) 12765 int startcol; 12766 { 12767 char_u *line; 12768 char_u *p; 12769 int col = 0; 12770 12771 if (no_spell_checking(curwin)) 12772 return startcol; 12773 12774 /* Find a word character before "startcol". */ 12775 line = ml_get_curline(); 12776 for (p = line + startcol; p > line; ) 12777 { 12778 mb_ptr_back(line, p); 12779 if (spell_iswordp_nmw(p)) 12780 break; 12781 } 12782 12783 /* Go back to start of the word. */ 12784 while (p > line) 12785 { 12786 col = p - line; 12787 mb_ptr_back(line, p); 12788 if (!spell_iswordp(p, curbuf)) 12789 break; 12790 col = 0; 12791 } 12792 12793 /* Need to check for 'spellcapcheck' now, the word is removed before 12794 * expand_spelling() is called. Therefore the ugly global variable. */ 12795 spell_expand_need_cap = check_need_cap(curwin->w_cursor.lnum, col); 12796 12797 return col; 12798 } 12799 12800 /* 12801 * Get list of spelling suggestions. 12802 * Used for Insert mode completion CTRL-X ?. 12803 * Returns the number of matches. The matches are in "matchp[]", array of 12804 * allocated strings. 12805 */ 12806 /*ARGSUSED*/ 12807 int 12808 expand_spelling(lnum, col, pat, matchp) 12809 linenr_T lnum; 12810 int col; 12811 char_u *pat; 12812 char_u ***matchp; 12813 { 12814 garray_T ga; 12815 12816 spell_suggest_list(&ga, pat, 100, spell_expand_need_cap); 12817 *matchp = ga.ga_data; 12818 return ga.ga_len; 12819 } 12820 #endif 12821 12822 #endif /* FEAT_SYN_HL */ 12823