1 /* vi:set ts=8 sts=4 sw=4: 2 * 3 * VIM - Vi IMproved by Bram Moolenaar 4 * 5 * Do ":help uganda" in Vim to read copying and usage conditions. 6 * Do ":help credits" in Vim to see a list of people who contributed. 7 * See README.txt for an overview of the Vim source code. 8 */ 9 10 /* 11 * spell.c: code for spell checking 12 * 13 * The spell checking mechanism uses a tree (aka trie). Each node in the tree 14 * has a list of bytes that can appear (siblings). For each byte there is a 15 * pointer to the node with the byte that follows in the word (child). 16 * 17 * A NUL byte is used where the word may end. The bytes are sorted, so that 18 * binary searching can be used and the NUL bytes are at the start. The 19 * number of possible bytes is stored before the list of bytes. 20 * 21 * The tree uses two arrays: "byts" stores the characters, "idxs" stores 22 * either the next index or flags. The tree starts at index 0. For example, 23 * to lookup "vi" this sequence is followed: 24 * i = 0 25 * len = byts[i] 26 * n = where "v" appears in byts[i + 1] to byts[i + len] 27 * i = idxs[n] 28 * len = byts[i] 29 * n = where "i" appears in byts[i + 1] to byts[i + len] 30 * i = idxs[n] 31 * len = byts[i] 32 * find that byts[i + 1] is 0, idxs[i + 1] has flags for "vi". 33 * 34 * There are two word trees: one with case-folded words and one with words in 35 * original case. The second one is only used for keep-case words and is 36 * usually small. 37 * 38 * There is one additional tree for when not all prefixes are applied when 39 * generating the .spl file. This tree stores all the possible prefixes, as 40 * if they were words. At each word (prefix) end the prefix nr is stored, the 41 * following word must support this prefix nr. And the condition nr is 42 * stored, used to lookup the condition that the word must match with. 43 * 44 * Thanks to Olaf Seibert for providing an example implementation of this tree 45 * and the compression mechanism. 46 * LZ trie ideas: 47 * http://www.irb.hr/hr/home/ristov/papers/RistovLZtrieRevision1.pdf 48 * More papers: http://www-igm.univ-mlv.fr/~laporte/publi_en.html 49 * 50 * Matching involves checking the caps type: Onecap ALLCAP KeepCap. 51 * 52 * Why doesn't Vim use aspell/ispell/myspell/etc.? 53 * See ":help develop-spell". 54 */ 55 56 /* Use SPELL_PRINTTREE for debugging: dump the word tree after adding a word. 57 * Only use it for small word lists! */ 58 #if 0 59 # define SPELL_PRINTTREE 60 #endif 61 62 /* Use SPELL_COMPRESS_ALLWAYS for debugging: compress the word tree after 63 * adding a word. Only use it for small word lists! */ 64 #if 0 65 # define SPELL_COMPRESS_ALLWAYS 66 #endif 67 68 /* Use DEBUG_TRIEWALK to print the changes made in suggest_trie_walk() for a 69 * specific word. */ 70 #if 0 71 # define DEBUG_TRIEWALK 72 #endif 73 74 /* 75 * Use this to adjust the score after finding suggestions, based on the 76 * suggested word sounding like the bad word. This is much faster than doing 77 * it for every possible suggestion. 78 * Disadvantage: When "the" is typed as "hte" it sounds quite different ("@" 79 * vs "ht") and goes down in the list. 80 * Used when 'spellsuggest' is set to "best". 81 */ 82 #define RESCORE(word_score, sound_score) ((3 * word_score + sound_score) / 4) 83 84 /* 85 * Do the opposite: based on a maximum end score and a known sound score, 86 * compute the maximum word score that can be used. 87 */ 88 #define MAXSCORE(word_score, sound_score) ((4 * word_score - sound_score) / 3) 89 90 /* 91 * Vim spell file format: <HEADER> 92 * <SECTIONS> 93 * <LWORDTREE> 94 * <KWORDTREE> 95 * <PREFIXTREE> 96 * 97 * <HEADER>: <fileID> <versionnr> 98 * 99 * <fileID> 8 bytes "VIMspell" 100 * <versionnr> 1 byte VIMSPELLVERSION 101 * 102 * 103 * Sections make it possible to add information to the .spl file without 104 * making it incompatible with previous versions. There are two kinds of 105 * sections: 106 * 1. Not essential for correct spell checking. E.g. for making suggestions. 107 * These are skipped when not supported. 108 * 2. Optional information, but essential for spell checking when present. 109 * E.g. conditions for affixes. When this section is present but not 110 * supported an error message is given. 111 * 112 * <SECTIONS>: <section> ... <sectionend> 113 * 114 * <section>: <sectionID> <sectionflags> <sectionlen> (section contents) 115 * 116 * <sectionID> 1 byte number from 0 to 254 identifying the section 117 * 118 * <sectionflags> 1 byte SNF_REQUIRED: this section is required for correct 119 * spell checking 120 * 121 * <sectionlen> 4 bytes length of section contents, MSB first 122 * 123 * <sectionend> 1 byte SN_END 124 * 125 * 126 * sectionID == SN_INFO: <infotext> 127 * <infotext> N bytes free format text with spell file info (version, 128 * website, etc) 129 * 130 * sectionID == SN_REGION: <regionname> ... 131 * <regionname> 2 bytes Up to 8 region names: ca, au, etc. Lower case. 132 * First <regionname> is region 1. 133 * 134 * sectionID == SN_CHARFLAGS: <charflagslen> <charflags> 135 * <folcharslen> <folchars> 136 * <charflagslen> 1 byte Number of bytes in <charflags> (should be 128). 137 * <charflags> N bytes List of flags (first one is for character 128): 138 * 0x01 word character CF_WORD 139 * 0x02 upper-case character CF_UPPER 140 * <folcharslen> 2 bytes Number of bytes in <folchars>. 141 * <folchars> N bytes Folded characters, first one is for character 128. 142 * 143 * sectionID == SN_MIDWORD: <midword> 144 * <midword> N bytes Characters that are word characters only when used 145 * in the middle of a word. 146 * 147 * sectionID == SN_PREFCOND: <prefcondcnt> <prefcond> ... 148 * <prefcondcnt> 2 bytes Number of <prefcond> items following. 149 * <prefcond> : <condlen> <condstr> 150 * <condlen> 1 byte Length of <condstr>. 151 * <condstr> N bytes Condition for the prefix. 152 * 153 * sectionID == SN_REP: <repcount> <rep> ... 154 * <repcount> 2 bytes number of <rep> items, MSB first. 155 * <rep> : <repfromlen> <repfrom> <reptolen> <repto> 156 * <repfromlen> 1 byte length of <repfrom> 157 * <repfrom> N bytes "from" part of replacement 158 * <reptolen> 1 byte length of <repto> 159 * <repto> N bytes "to" part of replacement 160 * 161 * sectionID == SN_REPSAL: <repcount> <rep> ... 162 * just like SN_REP but for soundfolded words 163 * 164 * sectionID == SN_SAL: <salflags> <salcount> <sal> ... 165 * <salflags> 1 byte flags for soundsalike conversion: 166 * SAL_F0LLOWUP 167 * SAL_COLLAPSE 168 * SAL_REM_ACCENTS 169 * <salcount> 2 bytes number of <sal> items following 170 * <sal> : <salfromlen> <salfrom> <saltolen> <salto> 171 * <salfromlen> 1 byte length of <salfrom> 172 * <salfrom> N bytes "from" part of soundsalike 173 * <saltolen> 1 byte length of <salto> 174 * <salto> N bytes "to" part of soundsalike 175 * 176 * sectionID == SN_SOFO: <sofofromlen> <sofofrom> <sofotolen> <sofoto> 177 * <sofofromlen> 2 bytes length of <sofofrom> 178 * <sofofrom> N bytes "from" part of soundfold 179 * <sofotolen> 2 bytes length of <sofoto> 180 * <sofoto> N bytes "to" part of soundfold 181 * 182 * sectionID == SN_SUGFILE: <timestamp> 183 * <timestamp> 8 bytes time in seconds that must match with .sug file 184 * 185 * sectionID == SN_NOSPLITSUGS: nothing 186 * 187 * sectionID == SN_NOCOMPOUNDSUGS: nothing 188 * 189 * sectionID == SN_WORDS: <word> ... 190 * <word> N bytes NUL terminated common word 191 * 192 * sectionID == SN_MAP: <mapstr> 193 * <mapstr> N bytes String with sequences of similar characters, 194 * separated by slashes. 195 * 196 * sectionID == SN_COMPOUND: <compmax> <compminlen> <compsylmax> <compoptions> 197 * <comppatcount> <comppattern> ... <compflags> 198 * <compmax> 1 byte Maximum nr of words in compound word. 199 * <compminlen> 1 byte Minimal word length for compounding. 200 * <compsylmax> 1 byte Maximum nr of syllables in compound word. 201 * <compoptions> 2 bytes COMP_ flags. 202 * <comppatcount> 2 bytes number of <comppattern> following 203 * <compflags> N bytes Flags from COMPOUNDRULE items, separated by 204 * slashes. 205 * 206 * <comppattern>: <comppatlen> <comppattext> 207 * <comppatlen> 1 byte length of <comppattext> 208 * <comppattext> N bytes end or begin chars from CHECKCOMPOUNDPATTERN 209 * 210 * sectionID == SN_NOBREAK: (empty, its presence is what matters) 211 * 212 * sectionID == SN_SYLLABLE: <syllable> 213 * <syllable> N bytes String from SYLLABLE item. 214 * 215 * <LWORDTREE>: <wordtree> 216 * 217 * <KWORDTREE>: <wordtree> 218 * 219 * <PREFIXTREE>: <wordtree> 220 * 221 * 222 * <wordtree>: <nodecount> <nodedata> ... 223 * 224 * <nodecount> 4 bytes Number of nodes following. MSB first. 225 * 226 * <nodedata>: <siblingcount> <sibling> ... 227 * 228 * <siblingcount> 1 byte Number of siblings in this node. The siblings 229 * follow in sorted order. 230 * 231 * <sibling>: <byte> [ <nodeidx> <xbyte> 232 * | <flags> [<flags2>] [<region>] [<affixID>] 233 * | [<pflags>] <affixID> <prefcondnr> ] 234 * 235 * <byte> 1 byte Byte value of the sibling. Special cases: 236 * BY_NOFLAGS: End of word without flags and for all 237 * regions. 238 * For PREFIXTREE <affixID> and 239 * <prefcondnr> follow. 240 * BY_FLAGS: End of word, <flags> follow. 241 * For PREFIXTREE <pflags>, <affixID> 242 * and <prefcondnr> follow. 243 * BY_FLAGS2: End of word, <flags> and <flags2> 244 * follow. Not used in PREFIXTREE. 245 * BY_INDEX: Child of sibling is shared, <nodeidx> 246 * and <xbyte> follow. 247 * 248 * <nodeidx> 3 bytes Index of child for this sibling, MSB first. 249 * 250 * <xbyte> 1 byte byte value of the sibling. 251 * 252 * <flags> 1 byte bitmask of: 253 * WF_ALLCAP word must have only capitals 254 * WF_ONECAP first char of word must be capital 255 * WF_KEEPCAP keep-case word 256 * WF_FIXCAP keep-case word, all caps not allowed 257 * WF_RARE rare word 258 * WF_BANNED bad word 259 * WF_REGION <region> follows 260 * WF_AFX <affixID> follows 261 * 262 * <flags2> 1 byte Bitmask of: 263 * WF_HAS_AFF >> 8 word includes affix 264 * WF_NEEDCOMP >> 8 word only valid in compound 265 * WF_NOSUGGEST >> 8 word not used for suggestions 266 * WF_COMPROOT >> 8 word already a compound 267 * WF_NOCOMPBEF >> 8 no compounding before this word 268 * WF_NOCOMPAFT >> 8 no compounding after this word 269 * 270 * <pflags> 1 byte bitmask of: 271 * WFP_RARE rare prefix 272 * WFP_NC non-combining prefix 273 * WFP_UP letter after prefix made upper case 274 * 275 * <region> 1 byte Bitmask for regions in which word is valid. When 276 * omitted it's valid in all regions. 277 * Lowest bit is for region 1. 278 * 279 * <affixID> 1 byte ID of affix that can be used with this word. In 280 * PREFIXTREE used for the required prefix ID. 281 * 282 * <prefcondnr> 2 bytes Prefix condition number, index in <prefcond> list 283 * from HEADER. 284 * 285 * All text characters are in 'encoding', but stored as single bytes. 286 */ 287 288 /* 289 * Vim .sug file format: <SUGHEADER> 290 * <SUGWORDTREE> 291 * <SUGTABLE> 292 * 293 * <SUGHEADER>: <fileID> <versionnr> <timestamp> 294 * 295 * <fileID> 6 bytes "VIMsug" 296 * <versionnr> 1 byte VIMSUGVERSION 297 * <timestamp> 8 bytes timestamp that must match with .spl file 298 * 299 * 300 * <SUGWORDTREE>: <wordtree> (see above, no flags or region used) 301 * 302 * 303 * <SUGTABLE>: <sugwcount> <sugline> ... 304 * 305 * <sugwcount> 4 bytes number of <sugline> following 306 * 307 * <sugline>: <sugnr> ... NUL 308 * 309 * <sugnr>: X bytes word number that results in this soundfolded word, 310 * stored as an offset to the previous number in as 311 * few bytes as possible, see offset2bytes()) 312 */ 313 314 #include "vim.h" 315 316 #if defined(FEAT_SPELL) || defined(PROTO) 317 318 #ifndef UNIX /* it's in os_unix.h for Unix */ 319 # include <time.h> /* for time_t */ 320 #endif 321 322 #define MAXWLEN 254 /* Assume max. word len is this many bytes. 323 Some places assume a word length fits in a 324 byte, thus it can't be above 255. 325 Must be >= PFD_NOTSPECIAL. */ 326 327 /* Type used for indexes in the word tree need to be at least 4 bytes. If int 328 * is 8 bytes we could use something smaller, but what? */ 329 #if VIM_SIZEOF_INT > 3 330 typedef int idx_T; 331 #else 332 typedef long idx_T; 333 #endif 334 335 #ifdef VMS 336 # define SPL_FNAME_TMPL "%s_%s.spl" 337 # define SPL_FNAME_ADD "_add." 338 # define SPL_FNAME_ASCII "_ascii." 339 #else 340 # define SPL_FNAME_TMPL "%s.%s.spl" 341 # define SPL_FNAME_ADD ".add." 342 # define SPL_FNAME_ASCII ".ascii." 343 #endif 344 345 /* Flags used for a word. Only the lowest byte can be used, the region byte 346 * comes above it. */ 347 #define WF_REGION 0x01 /* region byte follows */ 348 #define WF_ONECAP 0x02 /* word with one capital (or all capitals) */ 349 #define WF_ALLCAP 0x04 /* word must be all capitals */ 350 #define WF_RARE 0x08 /* rare word */ 351 #define WF_BANNED 0x10 /* bad word */ 352 #define WF_AFX 0x20 /* affix ID follows */ 353 #define WF_FIXCAP 0x40 /* keep-case word, allcap not allowed */ 354 #define WF_KEEPCAP 0x80 /* keep-case word */ 355 356 /* for <flags2>, shifted up one byte to be used in wn_flags */ 357 #define WF_HAS_AFF 0x0100 /* word includes affix */ 358 #define WF_NEEDCOMP 0x0200 /* word only valid in compound */ 359 #define WF_NOSUGGEST 0x0400 /* word not to be suggested */ 360 #define WF_COMPROOT 0x0800 /* already compounded word, COMPOUNDROOT */ 361 #define WF_NOCOMPBEF 0x1000 /* no compounding before this word */ 362 #define WF_NOCOMPAFT 0x2000 /* no compounding after this word */ 363 364 /* only used for su_badflags */ 365 #define WF_MIXCAP 0x20 /* mix of upper and lower case: macaRONI */ 366 367 #define WF_CAPMASK (WF_ONECAP | WF_ALLCAP | WF_KEEPCAP | WF_FIXCAP) 368 369 /* flags for <pflags> */ 370 #define WFP_RARE 0x01 /* rare prefix */ 371 #define WFP_NC 0x02 /* prefix is not combining */ 372 #define WFP_UP 0x04 /* to-upper prefix */ 373 #define WFP_COMPPERMIT 0x08 /* prefix with COMPOUNDPERMITFLAG */ 374 #define WFP_COMPFORBID 0x10 /* prefix with COMPOUNDFORBIDFLAG */ 375 376 /* Flags for postponed prefixes in "sl_pidxs". Must be above affixID (one 377 * byte) and prefcondnr (two bytes). */ 378 #define WF_RAREPFX (WFP_RARE << 24) /* rare postponed prefix */ 379 #define WF_PFX_NC (WFP_NC << 24) /* non-combining postponed prefix */ 380 #define WF_PFX_UP (WFP_UP << 24) /* to-upper postponed prefix */ 381 #define WF_PFX_COMPPERMIT (WFP_COMPPERMIT << 24) /* postponed prefix with 382 * COMPOUNDPERMITFLAG */ 383 #define WF_PFX_COMPFORBID (WFP_COMPFORBID << 24) /* postponed prefix with 384 * COMPOUNDFORBIDFLAG */ 385 386 387 /* flags for <compoptions> */ 388 #define COMP_CHECKDUP 1 /* CHECKCOMPOUNDDUP */ 389 #define COMP_CHECKREP 2 /* CHECKCOMPOUNDREP */ 390 #define COMP_CHECKCASE 4 /* CHECKCOMPOUNDCASE */ 391 #define COMP_CHECKTRIPLE 8 /* CHECKCOMPOUNDTRIPLE */ 392 393 /* Special byte values for <byte>. Some are only used in the tree for 394 * postponed prefixes, some only in the other trees. This is a bit messy... */ 395 #define BY_NOFLAGS 0 /* end of word without flags or region; for 396 * postponed prefix: no <pflags> */ 397 #define BY_INDEX 1 /* child is shared, index follows */ 398 #define BY_FLAGS 2 /* end of word, <flags> byte follows; for 399 * postponed prefix: <pflags> follows */ 400 #define BY_FLAGS2 3 /* end of word, <flags> and <flags2> bytes 401 * follow; never used in prefix tree */ 402 #define BY_SPECIAL BY_FLAGS2 /* highest special byte value */ 403 404 /* Info from "REP", "REPSAL" and "SAL" entries in ".aff" file used in si_rep, 405 * si_repsal, sl_rep, and si_sal. Not for sl_sal! 406 * One replacement: from "ft_from" to "ft_to". */ 407 typedef struct fromto_S 408 { 409 char_u *ft_from; 410 char_u *ft_to; 411 } fromto_T; 412 413 /* Info from "SAL" entries in ".aff" file used in sl_sal. 414 * The info is split for quick processing by spell_soundfold(). 415 * Note that "sm_oneof" and "sm_rules" point into sm_lead. */ 416 typedef struct salitem_S 417 { 418 char_u *sm_lead; /* leading letters */ 419 int sm_leadlen; /* length of "sm_lead" */ 420 char_u *sm_oneof; /* letters from () or NULL */ 421 char_u *sm_rules; /* rules like ^, $, priority */ 422 char_u *sm_to; /* replacement. */ 423 #ifdef FEAT_MBYTE 424 int *sm_lead_w; /* wide character copy of "sm_lead" */ 425 int *sm_oneof_w; /* wide character copy of "sm_oneof" */ 426 int *sm_to_w; /* wide character copy of "sm_to" */ 427 #endif 428 } salitem_T; 429 430 #ifdef FEAT_MBYTE 431 typedef int salfirst_T; 432 #else 433 typedef short salfirst_T; 434 #endif 435 436 /* Values for SP_*ERROR are negative, positive values are used by 437 * read_cnt_string(). */ 438 #define SP_TRUNCERROR -1 /* spell file truncated error */ 439 #define SP_FORMERROR -2 /* format error in spell file */ 440 #define SP_OTHERERROR -3 /* other error while reading spell file */ 441 442 /* 443 * Structure used to store words and other info for one language, loaded from 444 * a .spl file. 445 * The main access is through the tree in "sl_fbyts/sl_fidxs", storing the 446 * case-folded words. "sl_kbyts/sl_kidxs" is for keep-case words. 447 * 448 * The "byts" array stores the possible bytes in each tree node, preceded by 449 * the number of possible bytes, sorted on byte value: 450 * <len> <byte1> <byte2> ... 451 * The "idxs" array stores the index of the child node corresponding to the 452 * byte in "byts". 453 * Exception: when the byte is zero, the word may end here and "idxs" holds 454 * the flags, region mask and affixID for the word. There may be several 455 * zeros in sequence for alternative flag/region/affixID combinations. 456 */ 457 typedef struct slang_S slang_T; 458 struct slang_S 459 { 460 slang_T *sl_next; /* next language */ 461 char_u *sl_name; /* language name "en", "en.rare", "nl", etc. */ 462 char_u *sl_fname; /* name of .spl file */ 463 int sl_add; /* TRUE if it's a .add file. */ 464 465 char_u *sl_fbyts; /* case-folded word bytes */ 466 idx_T *sl_fidxs; /* case-folded word indexes */ 467 char_u *sl_kbyts; /* keep-case word bytes */ 468 idx_T *sl_kidxs; /* keep-case word indexes */ 469 char_u *sl_pbyts; /* prefix tree word bytes */ 470 idx_T *sl_pidxs; /* prefix tree word indexes */ 471 472 char_u *sl_info; /* infotext string or NULL */ 473 474 char_u sl_regions[17]; /* table with up to 8 region names plus NUL */ 475 476 char_u *sl_midword; /* MIDWORD string or NULL */ 477 478 hashtab_T sl_wordcount; /* hashtable with word count, wordcount_T */ 479 480 int sl_compmax; /* COMPOUNDWORDMAX (default: MAXWLEN) */ 481 int sl_compminlen; /* COMPOUNDMIN (default: 0) */ 482 int sl_compsylmax; /* COMPOUNDSYLMAX (default: MAXWLEN) */ 483 int sl_compoptions; /* COMP_* flags */ 484 garray_T sl_comppat; /* CHECKCOMPOUNDPATTERN items */ 485 regprog_T *sl_compprog; /* COMPOUNDRULE turned into a regexp progrm 486 * (NULL when no compounding) */ 487 char_u *sl_comprules; /* all COMPOUNDRULE concatenated (or NULL) */ 488 char_u *sl_compstartflags; /* flags for first compound word */ 489 char_u *sl_compallflags; /* all flags for compound words */ 490 char_u sl_nobreak; /* When TRUE: no spaces between words */ 491 char_u *sl_syllable; /* SYLLABLE repeatable chars or NULL */ 492 garray_T sl_syl_items; /* syllable items */ 493 494 int sl_prefixcnt; /* number of items in "sl_prefprog" */ 495 regprog_T **sl_prefprog; /* table with regprogs for prefixes */ 496 497 garray_T sl_rep; /* list of fromto_T entries from REP lines */ 498 short sl_rep_first[256]; /* indexes where byte first appears, -1 if 499 there is none */ 500 garray_T sl_sal; /* list of salitem_T entries from SAL lines */ 501 salfirst_T sl_sal_first[256]; /* indexes where byte first appears, -1 if 502 there is none */ 503 int sl_followup; /* SAL followup */ 504 int sl_collapse; /* SAL collapse_result */ 505 int sl_rem_accents; /* SAL remove_accents */ 506 int sl_sofo; /* SOFOFROM and SOFOTO instead of SAL items: 507 * "sl_sal_first" maps chars, when has_mbyte 508 * "sl_sal" is a list of wide char lists. */ 509 garray_T sl_repsal; /* list of fromto_T entries from REPSAL lines */ 510 short sl_repsal_first[256]; /* sl_rep_first for REPSAL lines */ 511 int sl_nosplitsugs; /* don't suggest splitting a word */ 512 int sl_nocompoundsugs; /* don't suggest compounding */ 513 514 /* Info from the .sug file. Loaded on demand. */ 515 time_t sl_sugtime; /* timestamp for .sug file */ 516 char_u *sl_sbyts; /* soundfolded word bytes */ 517 idx_T *sl_sidxs; /* soundfolded word indexes */ 518 buf_T *sl_sugbuf; /* buffer with word number table */ 519 int sl_sugloaded; /* TRUE when .sug file was loaded or failed to 520 load */ 521 522 int sl_has_map; /* TRUE if there is a MAP line */ 523 #ifdef FEAT_MBYTE 524 hashtab_T sl_map_hash; /* MAP for multi-byte chars */ 525 int sl_map_array[256]; /* MAP for first 256 chars */ 526 #else 527 char_u sl_map_array[256]; /* MAP for first 256 chars */ 528 #endif 529 hashtab_T sl_sounddone; /* table with soundfolded words that have 530 handled, see add_sound_suggest() */ 531 }; 532 533 /* First language that is loaded, start of the linked list of loaded 534 * languages. */ 535 static slang_T *first_lang = NULL; 536 537 /* Flags used in .spl file for soundsalike flags. */ 538 #define SAL_F0LLOWUP 1 539 #define SAL_COLLAPSE 2 540 #define SAL_REM_ACCENTS 4 541 542 /* 543 * Structure used in "b_langp", filled from 'spelllang'. 544 */ 545 typedef struct langp_S 546 { 547 slang_T *lp_slang; /* info for this language */ 548 slang_T *lp_sallang; /* language used for sound folding or NULL */ 549 slang_T *lp_replang; /* language used for REP items or NULL */ 550 int lp_region; /* bitmask for region or REGION_ALL */ 551 } langp_T; 552 553 #define LANGP_ENTRY(ga, i) (((langp_T *)(ga).ga_data) + (i)) 554 555 #define REGION_ALL 0xff /* word valid in all regions */ 556 557 #define VIMSPELLMAGIC "VIMspell" /* string at start of Vim spell file */ 558 #define VIMSPELLMAGICL 8 559 #define VIMSPELLVERSION 50 560 561 #define VIMSUGMAGIC "VIMsug" /* string at start of Vim .sug file */ 562 #define VIMSUGMAGICL 6 563 #define VIMSUGVERSION 1 564 565 /* Section IDs. Only renumber them when VIMSPELLVERSION changes! */ 566 #define SN_REGION 0 /* <regionname> section */ 567 #define SN_CHARFLAGS 1 /* charflags section */ 568 #define SN_MIDWORD 2 /* <midword> section */ 569 #define SN_PREFCOND 3 /* <prefcond> section */ 570 #define SN_REP 4 /* REP items section */ 571 #define SN_SAL 5 /* SAL items section */ 572 #define SN_SOFO 6 /* soundfolding section */ 573 #define SN_MAP 7 /* MAP items section */ 574 #define SN_COMPOUND 8 /* compound words section */ 575 #define SN_SYLLABLE 9 /* syllable section */ 576 #define SN_NOBREAK 10 /* NOBREAK section */ 577 #define SN_SUGFILE 11 /* timestamp for .sug file */ 578 #define SN_REPSAL 12 /* REPSAL items section */ 579 #define SN_WORDS 13 /* common words */ 580 #define SN_NOSPLITSUGS 14 /* don't split word for suggestions */ 581 #define SN_INFO 15 /* info section */ 582 #define SN_NOCOMPOUNDSUGS 16 /* don't compound for suggestions */ 583 #define SN_END 255 /* end of sections */ 584 585 #define SNF_REQUIRED 1 /* <sectionflags>: required section */ 586 587 /* Result values. Lower number is accepted over higher one. */ 588 #define SP_BANNED -1 589 #define SP_OK 0 590 #define SP_RARE 1 591 #define SP_LOCAL 2 592 #define SP_BAD 3 593 594 /* file used for "zG" and "zW" */ 595 static char_u *int_wordlist = NULL; 596 597 typedef struct wordcount_S 598 { 599 short_u wc_count; /* nr of times word was seen */ 600 char_u wc_word[1]; /* word, actually longer */ 601 } wordcount_T; 602 603 static wordcount_T dumwc; 604 #define WC_KEY_OFF (unsigned)(dumwc.wc_word - (char_u *)&dumwc) 605 #define HI2WC(hi) ((wordcount_T *)((hi)->hi_key - WC_KEY_OFF)) 606 #define MAXWORDCOUNT 0xffff 607 608 /* 609 * Information used when looking for suggestions. 610 */ 611 typedef struct suginfo_S 612 { 613 garray_T su_ga; /* suggestions, contains "suggest_T" */ 614 int su_maxcount; /* max. number of suggestions displayed */ 615 int su_maxscore; /* maximum score for adding to su_ga */ 616 int su_sfmaxscore; /* idem, for when doing soundfold words */ 617 garray_T su_sga; /* like su_ga, sound-folded scoring */ 618 char_u *su_badptr; /* start of bad word in line */ 619 int su_badlen; /* length of detected bad word in line */ 620 int su_badflags; /* caps flags for bad word */ 621 char_u su_badword[MAXWLEN]; /* bad word truncated at su_badlen */ 622 char_u su_fbadword[MAXWLEN]; /* su_badword case-folded */ 623 char_u su_sal_badword[MAXWLEN]; /* su_badword soundfolded */ 624 hashtab_T su_banned; /* table with banned words */ 625 slang_T *su_sallang; /* default language for sound folding */ 626 } suginfo_T; 627 628 /* One word suggestion. Used in "si_ga". */ 629 typedef struct suggest_S 630 { 631 char_u *st_word; /* suggested word, allocated string */ 632 int st_wordlen; /* STRLEN(st_word) */ 633 int st_orglen; /* length of replaced text */ 634 int st_score; /* lower is better */ 635 int st_altscore; /* used when st_score compares equal */ 636 int st_salscore; /* st_score is for soundalike */ 637 int st_had_bonus; /* bonus already included in score */ 638 slang_T *st_slang; /* language used for sound folding */ 639 } suggest_T; 640 641 #define SUG(ga, i) (((suggest_T *)(ga).ga_data)[i]) 642 643 /* TRUE if a word appears in the list of banned words. */ 644 #define WAS_BANNED(su, word) (!HASHITEM_EMPTY(hash_find(&su->su_banned, word))) 645 646 /* Number of suggestions kept when cleaning up. We need to keep more than 647 * what is displayed, because when rescore_suggestions() is called the score 648 * may change and wrong suggestions may be removed later. */ 649 #define SUG_CLEAN_COUNT(su) ((su)->su_maxcount < 130 ? 150 : (su)->su_maxcount + 20) 650 651 /* Threshold for sorting and cleaning up suggestions. Don't want to keep lots 652 * of suggestions that are not going to be displayed. */ 653 #define SUG_MAX_COUNT(su) (SUG_CLEAN_COUNT(su) + 50) 654 655 /* score for various changes */ 656 #define SCORE_SPLIT 149 /* split bad word */ 657 #define SCORE_SPLIT_NO 249 /* split bad word with NOSPLITSUGS */ 658 #define SCORE_ICASE 52 /* slightly different case */ 659 #define SCORE_REGION 200 /* word is for different region */ 660 #define SCORE_RARE 180 /* rare word */ 661 #define SCORE_SWAP 75 /* swap two characters */ 662 #define SCORE_SWAP3 110 /* swap two characters in three */ 663 #define SCORE_REP 65 /* REP replacement */ 664 #define SCORE_SUBST 93 /* substitute a character */ 665 #define SCORE_SIMILAR 33 /* substitute a similar character */ 666 #define SCORE_SUBCOMP 33 /* substitute a composing character */ 667 #define SCORE_DEL 94 /* delete a character */ 668 #define SCORE_DELDUP 66 /* delete a duplicated character */ 669 #define SCORE_DELCOMP 28 /* delete a composing character */ 670 #define SCORE_INS 96 /* insert a character */ 671 #define SCORE_INSDUP 67 /* insert a duplicate character */ 672 #define SCORE_INSCOMP 30 /* insert a composing character */ 673 #define SCORE_NONWORD 103 /* change non-word to word char */ 674 675 #define SCORE_FILE 30 /* suggestion from a file */ 676 #define SCORE_MAXINIT 350 /* Initial maximum score: higher == slower. 677 * 350 allows for about three changes. */ 678 679 #define SCORE_COMMON1 30 /* subtracted for words seen before */ 680 #define SCORE_COMMON2 40 /* subtracted for words often seen */ 681 #define SCORE_COMMON3 50 /* subtracted for words very often seen */ 682 #define SCORE_THRES2 10 /* word count threshold for COMMON2 */ 683 #define SCORE_THRES3 100 /* word count threshold for COMMON3 */ 684 685 /* When trying changed soundfold words it becomes slow when trying more than 686 * two changes. With less then two changes it's slightly faster but we miss a 687 * few good suggestions. In rare cases we need to try three of four changes. 688 */ 689 #define SCORE_SFMAX1 200 /* maximum score for first try */ 690 #define SCORE_SFMAX2 300 /* maximum score for second try */ 691 #define SCORE_SFMAX3 400 /* maximum score for third try */ 692 693 #define SCORE_BIG SCORE_INS * 3 /* big difference */ 694 #define SCORE_MAXMAX 999999 /* accept any score */ 695 #define SCORE_LIMITMAX 350 /* for spell_edit_score_limit() */ 696 697 /* for spell_edit_score_limit() we need to know the minimum value of 698 * SCORE_ICASE, SCORE_SWAP, SCORE_DEL, SCORE_SIMILAR and SCORE_INS */ 699 #define SCORE_EDIT_MIN SCORE_SIMILAR 700 701 /* 702 * Structure to store info for word matching. 703 */ 704 typedef struct matchinf_S 705 { 706 langp_T *mi_lp; /* info for language and region */ 707 708 /* pointers to original text to be checked */ 709 char_u *mi_word; /* start of word being checked */ 710 char_u *mi_end; /* end of matching word so far */ 711 char_u *mi_fend; /* next char to be added to mi_fword */ 712 char_u *mi_cend; /* char after what was used for 713 mi_capflags */ 714 715 /* case-folded text */ 716 char_u mi_fword[MAXWLEN + 1]; /* mi_word case-folded */ 717 int mi_fwordlen; /* nr of valid bytes in mi_fword */ 718 719 /* for when checking word after a prefix */ 720 int mi_prefarridx; /* index in sl_pidxs with list of 721 affixID/condition */ 722 int mi_prefcnt; /* number of entries at mi_prefarridx */ 723 int mi_prefixlen; /* byte length of prefix */ 724 #ifdef FEAT_MBYTE 725 int mi_cprefixlen; /* byte length of prefix in original 726 case */ 727 #else 728 # define mi_cprefixlen mi_prefixlen /* it's the same value */ 729 #endif 730 731 /* for when checking a compound word */ 732 int mi_compoff; /* start of following word offset */ 733 char_u mi_compflags[MAXWLEN]; /* flags for compound words used */ 734 int mi_complen; /* nr of compound words used */ 735 int mi_compextra; /* nr of COMPOUNDROOT words */ 736 737 /* others */ 738 int mi_result; /* result so far: SP_BAD, SP_OK, etc. */ 739 int mi_capflags; /* WF_ONECAP WF_ALLCAP WF_KEEPCAP */ 740 win_T *mi_win; /* buffer being checked */ 741 742 /* for NOBREAK */ 743 int mi_result2; /* "mi_resul" without following word */ 744 char_u *mi_end2; /* "mi_end" without following word */ 745 } matchinf_T; 746 747 /* 748 * The tables used for recognizing word characters according to spelling. 749 * These are only used for the first 256 characters of 'encoding'. 750 */ 751 typedef struct spelltab_S 752 { 753 char_u st_isw[256]; /* flags: is word char */ 754 char_u st_isu[256]; /* flags: is uppercase char */ 755 char_u st_fold[256]; /* chars: folded case */ 756 char_u st_upper[256]; /* chars: upper case */ 757 } spelltab_T; 758 759 static spelltab_T spelltab; 760 static int did_set_spelltab; 761 762 #define CF_WORD 0x01 763 #define CF_UPPER 0x02 764 765 static void clear_spell_chartab(spelltab_T *sp); 766 static int set_spell_finish(spelltab_T *new_st); 767 static int spell_iswordp(char_u *p, win_T *wp); 768 static int spell_iswordp_nmw(char_u *p, win_T *wp); 769 #ifdef FEAT_MBYTE 770 static int spell_mb_isword_class(int cl, win_T *wp); 771 static int spell_iswordp_w(int *p, win_T *wp); 772 #endif 773 static int write_spell_prefcond(FILE *fd, garray_T *gap); 774 775 /* 776 * For finding suggestions: At each node in the tree these states are tried: 777 */ 778 typedef enum 779 { 780 STATE_START = 0, /* At start of node check for NUL bytes (goodword 781 * ends); if badword ends there is a match, otherwise 782 * try splitting word. */ 783 STATE_NOPREFIX, /* try without prefix */ 784 STATE_SPLITUNDO, /* Undo splitting. */ 785 STATE_ENDNUL, /* Past NUL bytes at start of the node. */ 786 STATE_PLAIN, /* Use each byte of the node. */ 787 STATE_DEL, /* Delete a byte from the bad word. */ 788 STATE_INS_PREP, /* Prepare for inserting bytes. */ 789 STATE_INS, /* Insert a byte in the bad word. */ 790 STATE_SWAP, /* Swap two bytes. */ 791 STATE_UNSWAP, /* Undo swap two characters. */ 792 STATE_SWAP3, /* Swap two characters over three. */ 793 STATE_UNSWAP3, /* Undo Swap two characters over three. */ 794 STATE_UNROT3L, /* Undo rotate three characters left */ 795 STATE_UNROT3R, /* Undo rotate three characters right */ 796 STATE_REP_INI, /* Prepare for using REP items. */ 797 STATE_REP, /* Use matching REP items from the .aff file. */ 798 STATE_REP_UNDO, /* Undo a REP item replacement. */ 799 STATE_FINAL /* End of this node. */ 800 } state_T; 801 802 /* 803 * Struct to keep the state at each level in suggest_try_change(). 804 */ 805 typedef struct trystate_S 806 { 807 state_T ts_state; /* state at this level, STATE_ */ 808 int ts_score; /* score */ 809 idx_T ts_arridx; /* index in tree array, start of node */ 810 short ts_curi; /* index in list of child nodes */ 811 char_u ts_fidx; /* index in fword[], case-folded bad word */ 812 char_u ts_fidxtry; /* ts_fidx at which bytes may be changed */ 813 char_u ts_twordlen; /* valid length of tword[] */ 814 char_u ts_prefixdepth; /* stack depth for end of prefix or 815 * PFD_PREFIXTREE or PFD_NOPREFIX */ 816 char_u ts_flags; /* TSF_ flags */ 817 #ifdef FEAT_MBYTE 818 char_u ts_tcharlen; /* number of bytes in tword character */ 819 char_u ts_tcharidx; /* current byte index in tword character */ 820 char_u ts_isdiff; /* DIFF_ values */ 821 char_u ts_fcharstart; /* index in fword where badword char started */ 822 #endif 823 char_u ts_prewordlen; /* length of word in "preword[]" */ 824 char_u ts_splitoff; /* index in "tword" after last split */ 825 char_u ts_splitfidx; /* "ts_fidx" at word split */ 826 char_u ts_complen; /* nr of compound words used */ 827 char_u ts_compsplit; /* index for "compflags" where word was spit */ 828 char_u ts_save_badflags; /* su_badflags saved here */ 829 char_u ts_delidx; /* index in fword for char that was deleted, 830 valid when "ts_flags" has TSF_DIDDEL */ 831 } trystate_T; 832 833 /* values for ts_isdiff */ 834 #define DIFF_NONE 0 /* no different byte (yet) */ 835 #define DIFF_YES 1 /* different byte found */ 836 #define DIFF_INSERT 2 /* inserting character */ 837 838 /* values for ts_flags */ 839 #define TSF_PREFIXOK 1 /* already checked that prefix is OK */ 840 #define TSF_DIDSPLIT 2 /* tried split at this point */ 841 #define TSF_DIDDEL 4 /* did a delete, "ts_delidx" has index */ 842 843 /* special values ts_prefixdepth */ 844 #define PFD_NOPREFIX 0xff /* not using prefixes */ 845 #define PFD_PREFIXTREE 0xfe /* walking through the prefix tree */ 846 #define PFD_NOTSPECIAL 0xfd /* highest value that's not special */ 847 848 /* mode values for find_word */ 849 #define FIND_FOLDWORD 0 /* find word case-folded */ 850 #define FIND_KEEPWORD 1 /* find keep-case word */ 851 #define FIND_PREFIX 2 /* find word after prefix */ 852 #define FIND_COMPOUND 3 /* find case-folded compound word */ 853 #define FIND_KEEPCOMPOUND 4 /* find keep-case compound word */ 854 855 static slang_T *slang_alloc(char_u *lang); 856 static void slang_free(slang_T *lp); 857 static void slang_clear(slang_T *lp); 858 static void slang_clear_sug(slang_T *lp); 859 static void find_word(matchinf_T *mip, int mode); 860 static int match_checkcompoundpattern(char_u *ptr, int wlen, garray_T *gap); 861 static int can_compound(slang_T *slang, char_u *word, char_u *flags); 862 static int can_be_compound(trystate_T *sp, slang_T *slang, char_u *compflags, int flag); 863 static int match_compoundrule(slang_T *slang, char_u *compflags); 864 static int valid_word_prefix(int totprefcnt, int arridx, int flags, char_u *word, slang_T *slang, int cond_req); 865 static void find_prefix(matchinf_T *mip, int mode); 866 static int fold_more(matchinf_T *mip); 867 static int spell_valid_case(int wordflags, int treeflags); 868 static int no_spell_checking(win_T *wp); 869 static void spell_load_lang(char_u *lang); 870 static char_u *spell_enc(void); 871 static void int_wordlist_spl(char_u *fname); 872 static void spell_load_cb(char_u *fname, void *cookie); 873 static slang_T *spell_load_file(char_u *fname, char_u *lang, slang_T *old_lp, int silent); 874 static char_u *read_cnt_string(FILE *fd, int cnt_bytes, int *lenp); 875 static int read_region_section(FILE *fd, slang_T *slang, int len); 876 static int read_charflags_section(FILE *fd); 877 static int read_prefcond_section(FILE *fd, slang_T *lp); 878 static int read_rep_section(FILE *fd, garray_T *gap, short *first); 879 static int read_sal_section(FILE *fd, slang_T *slang); 880 static int read_words_section(FILE *fd, slang_T *lp, int len); 881 static void count_common_word(slang_T *lp, char_u *word, int len, int count); 882 static int score_wordcount_adj(slang_T *slang, int score, char_u *word, int split); 883 static int read_sofo_section(FILE *fd, slang_T *slang); 884 static int read_compound(FILE *fd, slang_T *slang, int len); 885 static int byte_in_str(char_u *str, int byte); 886 static int init_syl_tab(slang_T *slang); 887 static int count_syllables(slang_T *slang, char_u *word); 888 static int set_sofo(slang_T *lp, char_u *from, char_u *to); 889 static void set_sal_first(slang_T *lp); 890 #ifdef FEAT_MBYTE 891 static int *mb_str2wide(char_u *s); 892 #endif 893 static int spell_read_tree(FILE *fd, char_u **bytsp, idx_T **idxsp, int prefixtree, int prefixcnt); 894 static idx_T read_tree_node(FILE *fd, char_u *byts, idx_T *idxs, int maxidx, idx_T startidx, int prefixtree, int maxprefcondnr); 895 static void clear_midword(win_T *buf); 896 static void use_midword(slang_T *lp, win_T *buf); 897 static int find_region(char_u *rp, char_u *region); 898 static int captype(char_u *word, char_u *end); 899 static int badword_captype(char_u *word, char_u *end); 900 static void spell_reload_one(char_u *fname, int added_word); 901 static void set_spell_charflags(char_u *flags, int cnt, char_u *upp); 902 static int set_spell_chartab(char_u *fol, char_u *low, char_u *upp); 903 static int spell_casefold(char_u *p, int len, char_u *buf, int buflen); 904 static int check_need_cap(linenr_T lnum, colnr_T col); 905 static void spell_find_suggest(char_u *badptr, int badlen, suginfo_T *su, int maxcount, int banbadword, int need_cap, int interactive); 906 #ifdef FEAT_EVAL 907 static void spell_suggest_expr(suginfo_T *su, char_u *expr); 908 #endif 909 static void spell_suggest_file(suginfo_T *su, char_u *fname); 910 static void spell_suggest_intern(suginfo_T *su, int interactive); 911 static void suggest_load_files(void); 912 static void tree_count_words(char_u *byts, idx_T *idxs); 913 static void spell_find_cleanup(suginfo_T *su); 914 static void onecap_copy(char_u *word, char_u *wcopy, int upper); 915 static void allcap_copy(char_u *word, char_u *wcopy); 916 static void suggest_try_special(suginfo_T *su); 917 static void suggest_try_change(suginfo_T *su); 918 static void suggest_trie_walk(suginfo_T *su, langp_T *lp, char_u *fword, int soundfold); 919 static void go_deeper(trystate_T *stack, int depth, int score_add); 920 #ifdef FEAT_MBYTE 921 static int nofold_len(char_u *fword, int flen, char_u *word); 922 #endif 923 static void find_keepcap_word(slang_T *slang, char_u *fword, char_u *kword); 924 static void score_comp_sal(suginfo_T *su); 925 static void score_combine(suginfo_T *su); 926 static int stp_sal_score(suggest_T *stp, suginfo_T *su, slang_T *slang, char_u *badsound); 927 static void suggest_try_soundalike_prep(void); 928 static void suggest_try_soundalike(suginfo_T *su); 929 static void suggest_try_soundalike_finish(void); 930 static void add_sound_suggest(suginfo_T *su, char_u *goodword, int score, langp_T *lp); 931 static int soundfold_find(slang_T *slang, char_u *word); 932 static void make_case_word(char_u *fword, char_u *cword, int flags); 933 static void set_map_str(slang_T *lp, char_u *map); 934 static int similar_chars(slang_T *slang, int c1, int c2); 935 static void add_suggestion(suginfo_T *su, garray_T *gap, char_u *goodword, int badlen, int score, int altscore, int had_bonus, slang_T *slang, int maxsf); 936 static void check_suggestions(suginfo_T *su, garray_T *gap); 937 static void add_banned(suginfo_T *su, char_u *word); 938 static void rescore_suggestions(suginfo_T *su); 939 static void rescore_one(suginfo_T *su, suggest_T *stp); 940 static int cleanup_suggestions(garray_T *gap, int maxscore, int keep); 941 static void spell_soundfold(slang_T *slang, char_u *inword, int folded, char_u *res); 942 static void spell_soundfold_sofo(slang_T *slang, char_u *inword, char_u *res); 943 static void spell_soundfold_sal(slang_T *slang, char_u *inword, char_u *res); 944 #ifdef FEAT_MBYTE 945 static void spell_soundfold_wsal(slang_T *slang, char_u *inword, char_u *res); 946 #endif 947 static int soundalike_score(char_u *goodsound, char_u *badsound); 948 static int spell_edit_score(slang_T *slang, char_u *badword, char_u *goodword); 949 static int spell_edit_score_limit(slang_T *slang, char_u *badword, char_u *goodword, int limit); 950 #ifdef FEAT_MBYTE 951 static int spell_edit_score_limit_w(slang_T *slang, char_u *badword, char_u *goodword, int limit); 952 #endif 953 static void dump_word(slang_T *slang, char_u *word, char_u *pat, int *dir, int round, int flags, linenr_T lnum); 954 static linenr_T dump_prefixes(slang_T *slang, char_u *word, char_u *pat, int *dir, int round, int flags, linenr_T startlnum); 955 static buf_T *open_spellbuf(void); 956 static void close_spellbuf(buf_T *buf); 957 958 /* 959 * Use our own character-case definitions, because the current locale may 960 * differ from what the .spl file uses. 961 * These must not be called with negative number! 962 */ 963 #ifndef FEAT_MBYTE 964 /* Non-multi-byte implementation. */ 965 # define SPELL_TOFOLD(c) ((c) < 256 ? (int)spelltab.st_fold[c] : (c)) 966 # define SPELL_TOUPPER(c) ((c) < 256 ? (int)spelltab.st_upper[c] : (c)) 967 # define SPELL_ISUPPER(c) ((c) < 256 ? spelltab.st_isu[c] : FALSE) 968 #else 969 # if defined(HAVE_WCHAR_H) 970 # include <wchar.h> /* for towupper() and towlower() */ 971 # endif 972 /* Multi-byte implementation. For Unicode we can call utf_*(), but don't do 973 * that for ASCII, because we don't want to use 'casemap' here. Otherwise use 974 * the "w" library function for characters above 255 if available. */ 975 # ifdef HAVE_TOWLOWER 976 # define SPELL_TOFOLD(c) (enc_utf8 && (c) >= 128 ? utf_fold(c) \ 977 : (c) < 256 ? (int)spelltab.st_fold[c] : (int)towlower(c)) 978 # else 979 # define SPELL_TOFOLD(c) (enc_utf8 && (c) >= 128 ? utf_fold(c) \ 980 : (c) < 256 ? (int)spelltab.st_fold[c] : (c)) 981 # endif 982 983 # ifdef HAVE_TOWUPPER 984 # define SPELL_TOUPPER(c) (enc_utf8 && (c) >= 128 ? utf_toupper(c) \ 985 : (c) < 256 ? (int)spelltab.st_upper[c] : (int)towupper(c)) 986 # else 987 # define SPELL_TOUPPER(c) (enc_utf8 && (c) >= 128 ? utf_toupper(c) \ 988 : (c) < 256 ? (int)spelltab.st_upper[c] : (c)) 989 # endif 990 991 # ifdef HAVE_ISWUPPER 992 # define SPELL_ISUPPER(c) (enc_utf8 && (c) >= 128 ? utf_isupper(c) \ 993 : (c) < 256 ? spelltab.st_isu[c] : iswupper(c)) 994 # else 995 # define SPELL_ISUPPER(c) (enc_utf8 && (c) >= 128 ? utf_isupper(c) \ 996 : (c) < 256 ? spelltab.st_isu[c] : (FALSE)) 997 # endif 998 #endif 999 1000 1001 static char *e_format = N_("E759: Format error in spell file"); 1002 static char *e_spell_trunc = N_("E758: Truncated spell file"); 1003 static char *e_afftrailing = N_("Trailing text in %s line %d: %s"); 1004 static char *e_affname = N_("Affix name too long in %s line %d: %s"); 1005 static char *e_affform = N_("E761: Format error in affix file FOL, LOW or UPP"); 1006 static char *e_affrange = N_("E762: Character in FOL, LOW or UPP is out of range"); 1007 static char *msg_compressing = N_("Compressing word tree..."); 1008 1009 /* Remember what "z?" replaced. */ 1010 static char_u *repl_from = NULL; 1011 static char_u *repl_to = NULL; 1012 1013 /* 1014 * Main spell-checking function. 1015 * "ptr" points to a character that could be the start of a word. 1016 * "*attrp" is set to the highlight index for a badly spelled word. For a 1017 * non-word or when it's OK it remains unchanged. 1018 * This must only be called when 'spelllang' is not empty. 1019 * 1020 * "capcol" is used to check for a Capitalised word after the end of a 1021 * sentence. If it's zero then perform the check. Return the column where to 1022 * check next, or -1 when no sentence end was found. If it's NULL then don't 1023 * worry. 1024 * 1025 * Returns the length of the word in bytes, also when it's OK, so that the 1026 * caller can skip over the word. 1027 */ 1028 int 1029 spell_check( 1030 win_T *wp, /* current window */ 1031 char_u *ptr, 1032 hlf_T *attrp, 1033 int *capcol, /* column to check for Capital */ 1034 int docount) /* count good words */ 1035 { 1036 matchinf_T mi; /* Most things are put in "mi" so that it can 1037 be passed to functions quickly. */ 1038 int nrlen = 0; /* found a number first */ 1039 int c; 1040 int wrongcaplen = 0; 1041 int lpi; 1042 int count_word = docount; 1043 1044 /* A word never starts at a space or a control character. Return quickly 1045 * then, skipping over the character. */ 1046 if (*ptr <= ' ') 1047 return 1; 1048 1049 /* Return here when loading language files failed. */ 1050 if (wp->w_s->b_langp.ga_len == 0) 1051 return 1; 1052 1053 vim_memset(&mi, 0, sizeof(matchinf_T)); 1054 1055 /* A number is always OK. Also skip hexadecimal numbers 0xFF99 and 1056 * 0X99FF. But always do check spelling to find "3GPP" and "11 1057 * julifeest". */ 1058 if (*ptr >= '0' && *ptr <= '9') 1059 { 1060 if (*ptr == '0' && (ptr[1] == 'b' || ptr[1] == 'B')) 1061 mi.mi_end = skipbin(ptr + 2); 1062 else if (*ptr == '0' && (ptr[1] == 'x' || ptr[1] == 'X')) 1063 mi.mi_end = skiphex(ptr + 2); 1064 else 1065 mi.mi_end = skipdigits(ptr); 1066 nrlen = (int)(mi.mi_end - ptr); 1067 } 1068 1069 /* Find the normal end of the word (until the next non-word character). */ 1070 mi.mi_word = ptr; 1071 mi.mi_fend = ptr; 1072 if (spell_iswordp(mi.mi_fend, wp)) 1073 { 1074 do 1075 { 1076 mb_ptr_adv(mi.mi_fend); 1077 } while (*mi.mi_fend != NUL && spell_iswordp(mi.mi_fend, wp)); 1078 1079 if (capcol != NULL && *capcol == 0 && wp->w_s->b_cap_prog != NULL) 1080 { 1081 /* Check word starting with capital letter. */ 1082 c = PTR2CHAR(ptr); 1083 if (!SPELL_ISUPPER(c)) 1084 wrongcaplen = (int)(mi.mi_fend - ptr); 1085 } 1086 } 1087 if (capcol != NULL) 1088 *capcol = -1; 1089 1090 /* We always use the characters up to the next non-word character, 1091 * also for bad words. */ 1092 mi.mi_end = mi.mi_fend; 1093 1094 /* Check caps type later. */ 1095 mi.mi_capflags = 0; 1096 mi.mi_cend = NULL; 1097 mi.mi_win = wp; 1098 1099 /* case-fold the word with one non-word character, so that we can check 1100 * for the word end. */ 1101 if (*mi.mi_fend != NUL) 1102 mb_ptr_adv(mi.mi_fend); 1103 1104 (void)spell_casefold(ptr, (int)(mi.mi_fend - ptr), mi.mi_fword, 1105 MAXWLEN + 1); 1106 mi.mi_fwordlen = (int)STRLEN(mi.mi_fword); 1107 1108 /* The word is bad unless we recognize it. */ 1109 mi.mi_result = SP_BAD; 1110 mi.mi_result2 = SP_BAD; 1111 1112 /* 1113 * Loop over the languages specified in 'spelllang'. 1114 * We check them all, because a word may be matched longer in another 1115 * language. 1116 */ 1117 for (lpi = 0; lpi < wp->w_s->b_langp.ga_len; ++lpi) 1118 { 1119 mi.mi_lp = LANGP_ENTRY(wp->w_s->b_langp, lpi); 1120 1121 /* If reloading fails the language is still in the list but everything 1122 * has been cleared. */ 1123 if (mi.mi_lp->lp_slang->sl_fidxs == NULL) 1124 continue; 1125 1126 /* Check for a matching word in case-folded words. */ 1127 find_word(&mi, FIND_FOLDWORD); 1128 1129 /* Check for a matching word in keep-case words. */ 1130 find_word(&mi, FIND_KEEPWORD); 1131 1132 /* Check for matching prefixes. */ 1133 find_prefix(&mi, FIND_FOLDWORD); 1134 1135 /* For a NOBREAK language, may want to use a word without a following 1136 * word as a backup. */ 1137 if (mi.mi_lp->lp_slang->sl_nobreak && mi.mi_result == SP_BAD 1138 && mi.mi_result2 != SP_BAD) 1139 { 1140 mi.mi_result = mi.mi_result2; 1141 mi.mi_end = mi.mi_end2; 1142 } 1143 1144 /* Count the word in the first language where it's found to be OK. */ 1145 if (count_word && mi.mi_result == SP_OK) 1146 { 1147 count_common_word(mi.mi_lp->lp_slang, ptr, 1148 (int)(mi.mi_end - ptr), 1); 1149 count_word = FALSE; 1150 } 1151 } 1152 1153 if (mi.mi_result != SP_OK) 1154 { 1155 /* If we found a number skip over it. Allows for "42nd". Do flag 1156 * rare and local words, e.g., "3GPP". */ 1157 if (nrlen > 0) 1158 { 1159 if (mi.mi_result == SP_BAD || mi.mi_result == SP_BANNED) 1160 return nrlen; 1161 } 1162 1163 /* When we are at a non-word character there is no error, just 1164 * skip over the character (try looking for a word after it). */ 1165 else if (!spell_iswordp_nmw(ptr, wp)) 1166 { 1167 if (capcol != NULL && wp->w_s->b_cap_prog != NULL) 1168 { 1169 regmatch_T regmatch; 1170 int r; 1171 1172 /* Check for end of sentence. */ 1173 regmatch.regprog = wp->w_s->b_cap_prog; 1174 regmatch.rm_ic = FALSE; 1175 r = vim_regexec(®match, ptr, 0); 1176 wp->w_s->b_cap_prog = regmatch.regprog; 1177 if (r) 1178 *capcol = (int)(regmatch.endp[0] - ptr); 1179 } 1180 1181 #ifdef FEAT_MBYTE 1182 if (has_mbyte) 1183 return (*mb_ptr2len)(ptr); 1184 #endif 1185 return 1; 1186 } 1187 else if (mi.mi_end == ptr) 1188 /* Always include at least one character. Required for when there 1189 * is a mixup in "midword". */ 1190 mb_ptr_adv(mi.mi_end); 1191 else if (mi.mi_result == SP_BAD 1192 && LANGP_ENTRY(wp->w_s->b_langp, 0)->lp_slang->sl_nobreak) 1193 { 1194 char_u *p, *fp; 1195 int save_result = mi.mi_result; 1196 1197 /* First language in 'spelllang' is NOBREAK. Find first position 1198 * at which any word would be valid. */ 1199 mi.mi_lp = LANGP_ENTRY(wp->w_s->b_langp, 0); 1200 if (mi.mi_lp->lp_slang->sl_fidxs != NULL) 1201 { 1202 p = mi.mi_word; 1203 fp = mi.mi_fword; 1204 for (;;) 1205 { 1206 mb_ptr_adv(p); 1207 mb_ptr_adv(fp); 1208 if (p >= mi.mi_end) 1209 break; 1210 mi.mi_compoff = (int)(fp - mi.mi_fword); 1211 find_word(&mi, FIND_COMPOUND); 1212 if (mi.mi_result != SP_BAD) 1213 { 1214 mi.mi_end = p; 1215 break; 1216 } 1217 } 1218 mi.mi_result = save_result; 1219 } 1220 } 1221 1222 if (mi.mi_result == SP_BAD || mi.mi_result == SP_BANNED) 1223 *attrp = HLF_SPB; 1224 else if (mi.mi_result == SP_RARE) 1225 *attrp = HLF_SPR; 1226 else 1227 *attrp = HLF_SPL; 1228 } 1229 1230 if (wrongcaplen > 0 && (mi.mi_result == SP_OK || mi.mi_result == SP_RARE)) 1231 { 1232 /* Report SpellCap only when the word isn't badly spelled. */ 1233 *attrp = HLF_SPC; 1234 return wrongcaplen; 1235 } 1236 1237 return (int)(mi.mi_end - ptr); 1238 } 1239 1240 /* 1241 * Check if the word at "mip->mi_word" is in the tree. 1242 * When "mode" is FIND_FOLDWORD check in fold-case word tree. 1243 * When "mode" is FIND_KEEPWORD check in keep-case word tree. 1244 * When "mode" is FIND_PREFIX check for word after prefix in fold-case word 1245 * tree. 1246 * 1247 * For a match mip->mi_result is updated. 1248 */ 1249 static void 1250 find_word(matchinf_T *mip, int mode) 1251 { 1252 idx_T arridx = 0; 1253 int endlen[MAXWLEN]; /* length at possible word endings */ 1254 idx_T endidx[MAXWLEN]; /* possible word endings */ 1255 int endidxcnt = 0; 1256 int len; 1257 int wlen = 0; 1258 int flen; 1259 int c; 1260 char_u *ptr; 1261 idx_T lo, hi, m; 1262 #ifdef FEAT_MBYTE 1263 char_u *s; 1264 #endif 1265 char_u *p; 1266 int res = SP_BAD; 1267 slang_T *slang = mip->mi_lp->lp_slang; 1268 unsigned flags; 1269 char_u *byts; 1270 idx_T *idxs; 1271 int word_ends; 1272 int prefix_found; 1273 int nobreak_result; 1274 1275 if (mode == FIND_KEEPWORD || mode == FIND_KEEPCOMPOUND) 1276 { 1277 /* Check for word with matching case in keep-case tree. */ 1278 ptr = mip->mi_word; 1279 flen = 9999; /* no case folding, always enough bytes */ 1280 byts = slang->sl_kbyts; 1281 idxs = slang->sl_kidxs; 1282 1283 if (mode == FIND_KEEPCOMPOUND) 1284 /* Skip over the previously found word(s). */ 1285 wlen += mip->mi_compoff; 1286 } 1287 else 1288 { 1289 /* Check for case-folded in case-folded tree. */ 1290 ptr = mip->mi_fword; 1291 flen = mip->mi_fwordlen; /* available case-folded bytes */ 1292 byts = slang->sl_fbyts; 1293 idxs = slang->sl_fidxs; 1294 1295 if (mode == FIND_PREFIX) 1296 { 1297 /* Skip over the prefix. */ 1298 wlen = mip->mi_prefixlen; 1299 flen -= mip->mi_prefixlen; 1300 } 1301 else if (mode == FIND_COMPOUND) 1302 { 1303 /* Skip over the previously found word(s). */ 1304 wlen = mip->mi_compoff; 1305 flen -= mip->mi_compoff; 1306 } 1307 1308 } 1309 1310 if (byts == NULL) 1311 return; /* array is empty */ 1312 1313 /* 1314 * Repeat advancing in the tree until: 1315 * - there is a byte that doesn't match, 1316 * - we reach the end of the tree, 1317 * - or we reach the end of the line. 1318 */ 1319 for (;;) 1320 { 1321 if (flen <= 0 && *mip->mi_fend != NUL) 1322 flen = fold_more(mip); 1323 1324 len = byts[arridx++]; 1325 1326 /* If the first possible byte is a zero the word could end here. 1327 * Remember this index, we first check for the longest word. */ 1328 if (byts[arridx] == 0) 1329 { 1330 if (endidxcnt == MAXWLEN) 1331 { 1332 /* Must be a corrupted spell file. */ 1333 EMSG(_(e_format)); 1334 return; 1335 } 1336 endlen[endidxcnt] = wlen; 1337 endidx[endidxcnt++] = arridx++; 1338 --len; 1339 1340 /* Skip over the zeros, there can be several flag/region 1341 * combinations. */ 1342 while (len > 0 && byts[arridx] == 0) 1343 { 1344 ++arridx; 1345 --len; 1346 } 1347 if (len == 0) 1348 break; /* no children, word must end here */ 1349 } 1350 1351 /* Stop looking at end of the line. */ 1352 if (ptr[wlen] == NUL) 1353 break; 1354 1355 /* Perform a binary search in the list of accepted bytes. */ 1356 c = ptr[wlen]; 1357 if (c == TAB) /* <Tab> is handled like <Space> */ 1358 c = ' '; 1359 lo = arridx; 1360 hi = arridx + len - 1; 1361 while (lo < hi) 1362 { 1363 m = (lo + hi) / 2; 1364 if (byts[m] > c) 1365 hi = m - 1; 1366 else if (byts[m] < c) 1367 lo = m + 1; 1368 else 1369 { 1370 lo = hi = m; 1371 break; 1372 } 1373 } 1374 1375 /* Stop if there is no matching byte. */ 1376 if (hi < lo || byts[lo] != c) 1377 break; 1378 1379 /* Continue at the child (if there is one). */ 1380 arridx = idxs[lo]; 1381 ++wlen; 1382 --flen; 1383 1384 /* One space in the good word may stand for several spaces in the 1385 * checked word. */ 1386 if (c == ' ') 1387 { 1388 for (;;) 1389 { 1390 if (flen <= 0 && *mip->mi_fend != NUL) 1391 flen = fold_more(mip); 1392 if (ptr[wlen] != ' ' && ptr[wlen] != TAB) 1393 break; 1394 ++wlen; 1395 --flen; 1396 } 1397 } 1398 } 1399 1400 /* 1401 * Verify that one of the possible endings is valid. Try the longest 1402 * first. 1403 */ 1404 while (endidxcnt > 0) 1405 { 1406 --endidxcnt; 1407 arridx = endidx[endidxcnt]; 1408 wlen = endlen[endidxcnt]; 1409 1410 #ifdef FEAT_MBYTE 1411 if ((*mb_head_off)(ptr, ptr + wlen) > 0) 1412 continue; /* not at first byte of character */ 1413 #endif 1414 if (spell_iswordp(ptr + wlen, mip->mi_win)) 1415 { 1416 if (slang->sl_compprog == NULL && !slang->sl_nobreak) 1417 continue; /* next char is a word character */ 1418 word_ends = FALSE; 1419 } 1420 else 1421 word_ends = TRUE; 1422 /* The prefix flag is before compound flags. Once a valid prefix flag 1423 * has been found we try compound flags. */ 1424 prefix_found = FALSE; 1425 1426 #ifdef FEAT_MBYTE 1427 if (mode != FIND_KEEPWORD && has_mbyte) 1428 { 1429 /* Compute byte length in original word, length may change 1430 * when folding case. This can be slow, take a shortcut when the 1431 * case-folded word is equal to the keep-case word. */ 1432 p = mip->mi_word; 1433 if (STRNCMP(ptr, p, wlen) != 0) 1434 { 1435 for (s = ptr; s < ptr + wlen; mb_ptr_adv(s)) 1436 mb_ptr_adv(p); 1437 wlen = (int)(p - mip->mi_word); 1438 } 1439 } 1440 #endif 1441 1442 /* Check flags and region. For FIND_PREFIX check the condition and 1443 * prefix ID. 1444 * Repeat this if there are more flags/region alternatives until there 1445 * is a match. */ 1446 res = SP_BAD; 1447 for (len = byts[arridx - 1]; len > 0 && byts[arridx] == 0; 1448 --len, ++arridx) 1449 { 1450 flags = idxs[arridx]; 1451 1452 /* For the fold-case tree check that the case of the checked word 1453 * matches with what the word in the tree requires. 1454 * For keep-case tree the case is always right. For prefixes we 1455 * don't bother to check. */ 1456 if (mode == FIND_FOLDWORD) 1457 { 1458 if (mip->mi_cend != mip->mi_word + wlen) 1459 { 1460 /* mi_capflags was set for a different word length, need 1461 * to do it again. */ 1462 mip->mi_cend = mip->mi_word + wlen; 1463 mip->mi_capflags = captype(mip->mi_word, mip->mi_cend); 1464 } 1465 1466 if (mip->mi_capflags == WF_KEEPCAP 1467 || !spell_valid_case(mip->mi_capflags, flags)) 1468 continue; 1469 } 1470 1471 /* When mode is FIND_PREFIX the word must support the prefix: 1472 * check the prefix ID and the condition. Do that for the list at 1473 * mip->mi_prefarridx that find_prefix() filled. */ 1474 else if (mode == FIND_PREFIX && !prefix_found) 1475 { 1476 c = valid_word_prefix(mip->mi_prefcnt, mip->mi_prefarridx, 1477 flags, 1478 mip->mi_word + mip->mi_cprefixlen, slang, 1479 FALSE); 1480 if (c == 0) 1481 continue; 1482 1483 /* Use the WF_RARE flag for a rare prefix. */ 1484 if (c & WF_RAREPFX) 1485 flags |= WF_RARE; 1486 prefix_found = TRUE; 1487 } 1488 1489 if (slang->sl_nobreak) 1490 { 1491 if ((mode == FIND_COMPOUND || mode == FIND_KEEPCOMPOUND) 1492 && (flags & WF_BANNED) == 0) 1493 { 1494 /* NOBREAK: found a valid following word. That's all we 1495 * need to know, so return. */ 1496 mip->mi_result = SP_OK; 1497 break; 1498 } 1499 } 1500 1501 else if ((mode == FIND_COMPOUND || mode == FIND_KEEPCOMPOUND 1502 || !word_ends)) 1503 { 1504 /* If there is no compound flag or the word is shorter than 1505 * COMPOUNDMIN reject it quickly. 1506 * Makes you wonder why someone puts a compound flag on a word 1507 * that's too short... Myspell compatibility requires this 1508 * anyway. */ 1509 if (((unsigned)flags >> 24) == 0 1510 || wlen - mip->mi_compoff < slang->sl_compminlen) 1511 continue; 1512 #ifdef FEAT_MBYTE 1513 /* For multi-byte chars check character length against 1514 * COMPOUNDMIN. */ 1515 if (has_mbyte 1516 && slang->sl_compminlen > 0 1517 && mb_charlen_len(mip->mi_word + mip->mi_compoff, 1518 wlen - mip->mi_compoff) < slang->sl_compminlen) 1519 continue; 1520 #endif 1521 1522 /* Limit the number of compound words to COMPOUNDWORDMAX if no 1523 * maximum for syllables is specified. */ 1524 if (!word_ends && mip->mi_complen + mip->mi_compextra + 2 1525 > slang->sl_compmax 1526 && slang->sl_compsylmax == MAXWLEN) 1527 continue; 1528 1529 /* Don't allow compounding on a side where an affix was added, 1530 * unless COMPOUNDPERMITFLAG was used. */ 1531 if (mip->mi_complen > 0 && (flags & WF_NOCOMPBEF)) 1532 continue; 1533 if (!word_ends && (flags & WF_NOCOMPAFT)) 1534 continue; 1535 1536 /* Quickly check if compounding is possible with this flag. */ 1537 if (!byte_in_str(mip->mi_complen == 0 1538 ? slang->sl_compstartflags 1539 : slang->sl_compallflags, 1540 ((unsigned)flags >> 24))) 1541 continue; 1542 1543 /* If there is a match with a CHECKCOMPOUNDPATTERN rule 1544 * discard the compound word. */ 1545 if (match_checkcompoundpattern(ptr, wlen, &slang->sl_comppat)) 1546 continue; 1547 1548 if (mode == FIND_COMPOUND) 1549 { 1550 int capflags; 1551 1552 /* Need to check the caps type of the appended compound 1553 * word. */ 1554 #ifdef FEAT_MBYTE 1555 if (has_mbyte && STRNCMP(ptr, mip->mi_word, 1556 mip->mi_compoff) != 0) 1557 { 1558 /* case folding may have changed the length */ 1559 p = mip->mi_word; 1560 for (s = ptr; s < ptr + mip->mi_compoff; mb_ptr_adv(s)) 1561 mb_ptr_adv(p); 1562 } 1563 else 1564 #endif 1565 p = mip->mi_word + mip->mi_compoff; 1566 capflags = captype(p, mip->mi_word + wlen); 1567 if (capflags == WF_KEEPCAP || (capflags == WF_ALLCAP 1568 && (flags & WF_FIXCAP) != 0)) 1569 continue; 1570 1571 if (capflags != WF_ALLCAP) 1572 { 1573 /* When the character before the word is a word 1574 * character we do not accept a Onecap word. We do 1575 * accept a no-caps word, even when the dictionary 1576 * word specifies ONECAP. */ 1577 mb_ptr_back(mip->mi_word, p); 1578 if (spell_iswordp_nmw(p, mip->mi_win) 1579 ? capflags == WF_ONECAP 1580 : (flags & WF_ONECAP) != 0 1581 && capflags != WF_ONECAP) 1582 continue; 1583 } 1584 } 1585 1586 /* If the word ends the sequence of compound flags of the 1587 * words must match with one of the COMPOUNDRULE items and 1588 * the number of syllables must not be too large. */ 1589 mip->mi_compflags[mip->mi_complen] = ((unsigned)flags >> 24); 1590 mip->mi_compflags[mip->mi_complen + 1] = NUL; 1591 if (word_ends) 1592 { 1593 char_u fword[MAXWLEN]; 1594 1595 if (slang->sl_compsylmax < MAXWLEN) 1596 { 1597 /* "fword" is only needed for checking syllables. */ 1598 if (ptr == mip->mi_word) 1599 (void)spell_casefold(ptr, wlen, fword, MAXWLEN); 1600 else 1601 vim_strncpy(fword, ptr, endlen[endidxcnt]); 1602 } 1603 if (!can_compound(slang, fword, mip->mi_compflags)) 1604 continue; 1605 } 1606 else if (slang->sl_comprules != NULL 1607 && !match_compoundrule(slang, mip->mi_compflags)) 1608 /* The compound flags collected so far do not match any 1609 * COMPOUNDRULE, discard the compounded word. */ 1610 continue; 1611 } 1612 1613 /* Check NEEDCOMPOUND: can't use word without compounding. */ 1614 else if (flags & WF_NEEDCOMP) 1615 continue; 1616 1617 nobreak_result = SP_OK; 1618 1619 if (!word_ends) 1620 { 1621 int save_result = mip->mi_result; 1622 char_u *save_end = mip->mi_end; 1623 langp_T *save_lp = mip->mi_lp; 1624 int lpi; 1625 1626 /* Check that a valid word follows. If there is one and we 1627 * are compounding, it will set "mi_result", thus we are 1628 * always finished here. For NOBREAK we only check that a 1629 * valid word follows. 1630 * Recursive! */ 1631 if (slang->sl_nobreak) 1632 mip->mi_result = SP_BAD; 1633 1634 /* Find following word in case-folded tree. */ 1635 mip->mi_compoff = endlen[endidxcnt]; 1636 #ifdef FEAT_MBYTE 1637 if (has_mbyte && mode == FIND_KEEPWORD) 1638 { 1639 /* Compute byte length in case-folded word from "wlen": 1640 * byte length in keep-case word. Length may change when 1641 * folding case. This can be slow, take a shortcut when 1642 * the case-folded word is equal to the keep-case word. */ 1643 p = mip->mi_fword; 1644 if (STRNCMP(ptr, p, wlen) != 0) 1645 { 1646 for (s = ptr; s < ptr + wlen; mb_ptr_adv(s)) 1647 mb_ptr_adv(p); 1648 mip->mi_compoff = (int)(p - mip->mi_fword); 1649 } 1650 } 1651 #endif 1652 c = mip->mi_compoff; 1653 ++mip->mi_complen; 1654 if (flags & WF_COMPROOT) 1655 ++mip->mi_compextra; 1656 1657 /* For NOBREAK we need to try all NOBREAK languages, at least 1658 * to find the ".add" file(s). */ 1659 for (lpi = 0; lpi < mip->mi_win->w_s->b_langp.ga_len; ++lpi) 1660 { 1661 if (slang->sl_nobreak) 1662 { 1663 mip->mi_lp = LANGP_ENTRY(mip->mi_win->w_s->b_langp, lpi); 1664 if (mip->mi_lp->lp_slang->sl_fidxs == NULL 1665 || !mip->mi_lp->lp_slang->sl_nobreak) 1666 continue; 1667 } 1668 1669 find_word(mip, FIND_COMPOUND); 1670 1671 /* When NOBREAK any word that matches is OK. Otherwise we 1672 * need to find the longest match, thus try with keep-case 1673 * and prefix too. */ 1674 if (!slang->sl_nobreak || mip->mi_result == SP_BAD) 1675 { 1676 /* Find following word in keep-case tree. */ 1677 mip->mi_compoff = wlen; 1678 find_word(mip, FIND_KEEPCOMPOUND); 1679 1680 #if 0 /* Disabled, a prefix must not appear halfway a compound word, 1681 unless the COMPOUNDPERMITFLAG is used and then it can't be a 1682 postponed prefix. */ 1683 if (!slang->sl_nobreak || mip->mi_result == SP_BAD) 1684 { 1685 /* Check for following word with prefix. */ 1686 mip->mi_compoff = c; 1687 find_prefix(mip, FIND_COMPOUND); 1688 } 1689 #endif 1690 } 1691 1692 if (!slang->sl_nobreak) 1693 break; 1694 } 1695 --mip->mi_complen; 1696 if (flags & WF_COMPROOT) 1697 --mip->mi_compextra; 1698 mip->mi_lp = save_lp; 1699 1700 if (slang->sl_nobreak) 1701 { 1702 nobreak_result = mip->mi_result; 1703 mip->mi_result = save_result; 1704 mip->mi_end = save_end; 1705 } 1706 else 1707 { 1708 if (mip->mi_result == SP_OK) 1709 break; 1710 continue; 1711 } 1712 } 1713 1714 if (flags & WF_BANNED) 1715 res = SP_BANNED; 1716 else if (flags & WF_REGION) 1717 { 1718 /* Check region. */ 1719 if ((mip->mi_lp->lp_region & (flags >> 16)) != 0) 1720 res = SP_OK; 1721 else 1722 res = SP_LOCAL; 1723 } 1724 else if (flags & WF_RARE) 1725 res = SP_RARE; 1726 else 1727 res = SP_OK; 1728 1729 /* Always use the longest match and the best result. For NOBREAK 1730 * we separately keep the longest match without a following good 1731 * word as a fall-back. */ 1732 if (nobreak_result == SP_BAD) 1733 { 1734 if (mip->mi_result2 > res) 1735 { 1736 mip->mi_result2 = res; 1737 mip->mi_end2 = mip->mi_word + wlen; 1738 } 1739 else if (mip->mi_result2 == res 1740 && mip->mi_end2 < mip->mi_word + wlen) 1741 mip->mi_end2 = mip->mi_word + wlen; 1742 } 1743 else if (mip->mi_result > res) 1744 { 1745 mip->mi_result = res; 1746 mip->mi_end = mip->mi_word + wlen; 1747 } 1748 else if (mip->mi_result == res && mip->mi_end < mip->mi_word + wlen) 1749 mip->mi_end = mip->mi_word + wlen; 1750 1751 if (mip->mi_result == SP_OK) 1752 break; 1753 } 1754 1755 if (mip->mi_result == SP_OK) 1756 break; 1757 } 1758 } 1759 1760 /* 1761 * Return TRUE if there is a match between the word ptr[wlen] and 1762 * CHECKCOMPOUNDPATTERN rules, assuming that we will concatenate with another 1763 * word. 1764 * A match means that the first part of CHECKCOMPOUNDPATTERN matches at the 1765 * end of ptr[wlen] and the second part matches after it. 1766 */ 1767 static int 1768 match_checkcompoundpattern( 1769 char_u *ptr, 1770 int wlen, 1771 garray_T *gap) /* &sl_comppat */ 1772 { 1773 int i; 1774 char_u *p; 1775 int len; 1776 1777 for (i = 0; i + 1 < gap->ga_len; i += 2) 1778 { 1779 p = ((char_u **)gap->ga_data)[i + 1]; 1780 if (STRNCMP(ptr + wlen, p, STRLEN(p)) == 0) 1781 { 1782 /* Second part matches at start of following compound word, now 1783 * check if first part matches at end of previous word. */ 1784 p = ((char_u **)gap->ga_data)[i]; 1785 len = (int)STRLEN(p); 1786 if (len <= wlen && STRNCMP(ptr + wlen - len, p, len) == 0) 1787 return TRUE; 1788 } 1789 } 1790 return FALSE; 1791 } 1792 1793 /* 1794 * Return TRUE if "flags" is a valid sequence of compound flags and "word" 1795 * does not have too many syllables. 1796 */ 1797 static int 1798 can_compound(slang_T *slang, char_u *word, char_u *flags) 1799 { 1800 #ifdef FEAT_MBYTE 1801 char_u uflags[MAXWLEN * 2]; 1802 int i; 1803 #endif 1804 char_u *p; 1805 1806 if (slang->sl_compprog == NULL) 1807 return FALSE; 1808 #ifdef FEAT_MBYTE 1809 if (enc_utf8) 1810 { 1811 /* Need to convert the single byte flags to utf8 characters. */ 1812 p = uflags; 1813 for (i = 0; flags[i] != NUL; ++i) 1814 p += mb_char2bytes(flags[i], p); 1815 *p = NUL; 1816 p = uflags; 1817 } 1818 else 1819 #endif 1820 p = flags; 1821 if (!vim_regexec_prog(&slang->sl_compprog, FALSE, p, 0)) 1822 return FALSE; 1823 1824 /* Count the number of syllables. This may be slow, do it last. If there 1825 * are too many syllables AND the number of compound words is above 1826 * COMPOUNDWORDMAX then compounding is not allowed. */ 1827 if (slang->sl_compsylmax < MAXWLEN 1828 && count_syllables(slang, word) > slang->sl_compsylmax) 1829 return (int)STRLEN(flags) < slang->sl_compmax; 1830 return TRUE; 1831 } 1832 1833 /* 1834 * Return TRUE when the sequence of flags in "compflags" plus "flag" can 1835 * possibly form a valid compounded word. This also checks the COMPOUNDRULE 1836 * lines if they don't contain wildcards. 1837 */ 1838 static int 1839 can_be_compound( 1840 trystate_T *sp, 1841 slang_T *slang, 1842 char_u *compflags, 1843 int flag) 1844 { 1845 /* If the flag doesn't appear in sl_compstartflags or sl_compallflags 1846 * then it can't possibly compound. */ 1847 if (!byte_in_str(sp->ts_complen == sp->ts_compsplit 1848 ? slang->sl_compstartflags : slang->sl_compallflags, flag)) 1849 return FALSE; 1850 1851 /* If there are no wildcards, we can check if the flags collected so far 1852 * possibly can form a match with COMPOUNDRULE patterns. This only 1853 * makes sense when we have two or more words. */ 1854 if (slang->sl_comprules != NULL && sp->ts_complen > sp->ts_compsplit) 1855 { 1856 int v; 1857 1858 compflags[sp->ts_complen] = flag; 1859 compflags[sp->ts_complen + 1] = NUL; 1860 v = match_compoundrule(slang, compflags + sp->ts_compsplit); 1861 compflags[sp->ts_complen] = NUL; 1862 return v; 1863 } 1864 1865 return TRUE; 1866 } 1867 1868 1869 /* 1870 * Return TRUE if the compound flags in compflags[] match the start of any 1871 * compound rule. This is used to stop trying a compound if the flags 1872 * collected so far can't possibly match any compound rule. 1873 * Caller must check that slang->sl_comprules is not NULL. 1874 */ 1875 static int 1876 match_compoundrule(slang_T *slang, char_u *compflags) 1877 { 1878 char_u *p; 1879 int i; 1880 int c; 1881 1882 /* loop over all the COMPOUNDRULE entries */ 1883 for (p = slang->sl_comprules; *p != NUL; ++p) 1884 { 1885 /* loop over the flags in the compound word we have made, match 1886 * them against the current rule entry */ 1887 for (i = 0; ; ++i) 1888 { 1889 c = compflags[i]; 1890 if (c == NUL) 1891 /* found a rule that matches for the flags we have so far */ 1892 return TRUE; 1893 if (*p == '/' || *p == NUL) 1894 break; /* end of rule, it's too short */ 1895 if (*p == '[') 1896 { 1897 int match = FALSE; 1898 1899 /* compare against all the flags in [] */ 1900 ++p; 1901 while (*p != ']' && *p != NUL) 1902 if (*p++ == c) 1903 match = TRUE; 1904 if (!match) 1905 break; /* none matches */ 1906 } 1907 else if (*p != c) 1908 break; /* flag of word doesn't match flag in pattern */ 1909 ++p; 1910 } 1911 1912 /* Skip to the next "/", where the next pattern starts. */ 1913 p = vim_strchr(p, '/'); 1914 if (p == NULL) 1915 break; 1916 } 1917 1918 /* Checked all the rules and none of them match the flags, so there 1919 * can't possibly be a compound starting with these flags. */ 1920 return FALSE; 1921 } 1922 1923 /* 1924 * Return non-zero if the prefix indicated by "arridx" matches with the prefix 1925 * ID in "flags" for the word "word". 1926 * The WF_RAREPFX flag is included in the return value for a rare prefix. 1927 */ 1928 static int 1929 valid_word_prefix( 1930 int totprefcnt, /* nr of prefix IDs */ 1931 int arridx, /* idx in sl_pidxs[] */ 1932 int flags, 1933 char_u *word, 1934 slang_T *slang, 1935 int cond_req) /* only use prefixes with a condition */ 1936 { 1937 int prefcnt; 1938 int pidx; 1939 regprog_T **rp; 1940 int prefid; 1941 1942 prefid = (unsigned)flags >> 24; 1943 for (prefcnt = totprefcnt - 1; prefcnt >= 0; --prefcnt) 1944 { 1945 pidx = slang->sl_pidxs[arridx + prefcnt]; 1946 1947 /* Check the prefix ID. */ 1948 if (prefid != (pidx & 0xff)) 1949 continue; 1950 1951 /* Check if the prefix doesn't combine and the word already has a 1952 * suffix. */ 1953 if ((flags & WF_HAS_AFF) && (pidx & WF_PFX_NC)) 1954 continue; 1955 1956 /* Check the condition, if there is one. The condition index is 1957 * stored in the two bytes above the prefix ID byte. */ 1958 rp = &slang->sl_prefprog[((unsigned)pidx >> 8) & 0xffff]; 1959 if (*rp != NULL) 1960 { 1961 if (!vim_regexec_prog(rp, FALSE, word, 0)) 1962 continue; 1963 } 1964 else if (cond_req) 1965 continue; 1966 1967 /* It's a match! Return the WF_ flags. */ 1968 return pidx; 1969 } 1970 return 0; 1971 } 1972 1973 /* 1974 * Check if the word at "mip->mi_word" has a matching prefix. 1975 * If it does, then check the following word. 1976 * 1977 * If "mode" is "FIND_COMPOUND" then do the same after another word, find a 1978 * prefix in a compound word. 1979 * 1980 * For a match mip->mi_result is updated. 1981 */ 1982 static void 1983 find_prefix(matchinf_T *mip, int mode) 1984 { 1985 idx_T arridx = 0; 1986 int len; 1987 int wlen = 0; 1988 int flen; 1989 int c; 1990 char_u *ptr; 1991 idx_T lo, hi, m; 1992 slang_T *slang = mip->mi_lp->lp_slang; 1993 char_u *byts; 1994 idx_T *idxs; 1995 1996 byts = slang->sl_pbyts; 1997 if (byts == NULL) 1998 return; /* array is empty */ 1999 2000 /* We use the case-folded word here, since prefixes are always 2001 * case-folded. */ 2002 ptr = mip->mi_fword; 2003 flen = mip->mi_fwordlen; /* available case-folded bytes */ 2004 if (mode == FIND_COMPOUND) 2005 { 2006 /* Skip over the previously found word(s). */ 2007 ptr += mip->mi_compoff; 2008 flen -= mip->mi_compoff; 2009 } 2010 idxs = slang->sl_pidxs; 2011 2012 /* 2013 * Repeat advancing in the tree until: 2014 * - there is a byte that doesn't match, 2015 * - we reach the end of the tree, 2016 * - or we reach the end of the line. 2017 */ 2018 for (;;) 2019 { 2020 if (flen == 0 && *mip->mi_fend != NUL) 2021 flen = fold_more(mip); 2022 2023 len = byts[arridx++]; 2024 2025 /* If the first possible byte is a zero the prefix could end here. 2026 * Check if the following word matches and supports the prefix. */ 2027 if (byts[arridx] == 0) 2028 { 2029 /* There can be several prefixes with different conditions. We 2030 * try them all, since we don't know which one will give the 2031 * longest match. The word is the same each time, pass the list 2032 * of possible prefixes to find_word(). */ 2033 mip->mi_prefarridx = arridx; 2034 mip->mi_prefcnt = len; 2035 while (len > 0 && byts[arridx] == 0) 2036 { 2037 ++arridx; 2038 --len; 2039 } 2040 mip->mi_prefcnt -= len; 2041 2042 /* Find the word that comes after the prefix. */ 2043 mip->mi_prefixlen = wlen; 2044 if (mode == FIND_COMPOUND) 2045 /* Skip over the previously found word(s). */ 2046 mip->mi_prefixlen += mip->mi_compoff; 2047 2048 #ifdef FEAT_MBYTE 2049 if (has_mbyte) 2050 { 2051 /* Case-folded length may differ from original length. */ 2052 mip->mi_cprefixlen = nofold_len(mip->mi_fword, 2053 mip->mi_prefixlen, mip->mi_word); 2054 } 2055 else 2056 mip->mi_cprefixlen = mip->mi_prefixlen; 2057 #endif 2058 find_word(mip, FIND_PREFIX); 2059 2060 2061 if (len == 0) 2062 break; /* no children, word must end here */ 2063 } 2064 2065 /* Stop looking at end of the line. */ 2066 if (ptr[wlen] == NUL) 2067 break; 2068 2069 /* Perform a binary search in the list of accepted bytes. */ 2070 c = ptr[wlen]; 2071 lo = arridx; 2072 hi = arridx + len - 1; 2073 while (lo < hi) 2074 { 2075 m = (lo + hi) / 2; 2076 if (byts[m] > c) 2077 hi = m - 1; 2078 else if (byts[m] < c) 2079 lo = m + 1; 2080 else 2081 { 2082 lo = hi = m; 2083 break; 2084 } 2085 } 2086 2087 /* Stop if there is no matching byte. */ 2088 if (hi < lo || byts[lo] != c) 2089 break; 2090 2091 /* Continue at the child (if there is one). */ 2092 arridx = idxs[lo]; 2093 ++wlen; 2094 --flen; 2095 } 2096 } 2097 2098 /* 2099 * Need to fold at least one more character. Do until next non-word character 2100 * for efficiency. Include the non-word character too. 2101 * Return the length of the folded chars in bytes. 2102 */ 2103 static int 2104 fold_more(matchinf_T *mip) 2105 { 2106 int flen; 2107 char_u *p; 2108 2109 p = mip->mi_fend; 2110 do 2111 { 2112 mb_ptr_adv(mip->mi_fend); 2113 } while (*mip->mi_fend != NUL && spell_iswordp(mip->mi_fend, mip->mi_win)); 2114 2115 /* Include the non-word character so that we can check for the word end. */ 2116 if (*mip->mi_fend != NUL) 2117 mb_ptr_adv(mip->mi_fend); 2118 2119 (void)spell_casefold(p, (int)(mip->mi_fend - p), 2120 mip->mi_fword + mip->mi_fwordlen, 2121 MAXWLEN - mip->mi_fwordlen); 2122 flen = (int)STRLEN(mip->mi_fword + mip->mi_fwordlen); 2123 mip->mi_fwordlen += flen; 2124 return flen; 2125 } 2126 2127 /* 2128 * Check case flags for a word. Return TRUE if the word has the requested 2129 * case. 2130 */ 2131 static int 2132 spell_valid_case( 2133 int wordflags, /* flags for the checked word. */ 2134 int treeflags) /* flags for the word in the spell tree */ 2135 { 2136 return ((wordflags == WF_ALLCAP && (treeflags & WF_FIXCAP) == 0) 2137 || ((treeflags & (WF_ALLCAP | WF_KEEPCAP)) == 0 2138 && ((treeflags & WF_ONECAP) == 0 2139 || (wordflags & WF_ONECAP) != 0))); 2140 } 2141 2142 /* 2143 * Return TRUE if spell checking is not enabled. 2144 */ 2145 static int 2146 no_spell_checking(win_T *wp) 2147 { 2148 if (!wp->w_p_spell || *wp->w_s->b_p_spl == NUL 2149 || wp->w_s->b_langp.ga_len == 0) 2150 { 2151 EMSG(_("E756: Spell checking is not enabled")); 2152 return TRUE; 2153 } 2154 return FALSE; 2155 } 2156 2157 /* 2158 * Move to next spell error. 2159 * "curline" is FALSE for "[s", "]s", "[S" and "]S". 2160 * "curline" is TRUE to find word under/after cursor in the same line. 2161 * For Insert mode completion "dir" is BACKWARD and "curline" is TRUE: move 2162 * to after badly spelled word before the cursor. 2163 * Return 0 if not found, length of the badly spelled word otherwise. 2164 */ 2165 int 2166 spell_move_to( 2167 win_T *wp, 2168 int dir, /* FORWARD or BACKWARD */ 2169 int allwords, /* TRUE for "[s"/"]s", FALSE for "[S"/"]S" */ 2170 int curline, 2171 hlf_T *attrp) /* return: attributes of bad word or NULL 2172 (only when "dir" is FORWARD) */ 2173 { 2174 linenr_T lnum; 2175 pos_T found_pos; 2176 int found_len = 0; 2177 char_u *line; 2178 char_u *p; 2179 char_u *endp; 2180 hlf_T attr; 2181 int len; 2182 #ifdef FEAT_SYN_HL 2183 int has_syntax = syntax_present(wp); 2184 #endif 2185 int col; 2186 int can_spell; 2187 char_u *buf = NULL; 2188 int buflen = 0; 2189 int skip = 0; 2190 int capcol = -1; 2191 int found_one = FALSE; 2192 int wrapped = FALSE; 2193 2194 if (no_spell_checking(wp)) 2195 return 0; 2196 2197 /* 2198 * Start looking for bad word at the start of the line, because we can't 2199 * start halfway a word, we don't know where it starts or ends. 2200 * 2201 * When searching backwards, we continue in the line to find the last 2202 * bad word (in the cursor line: before the cursor). 2203 * 2204 * We concatenate the start of the next line, so that wrapped words work 2205 * (e.g. "et<line-break>cetera"). Doesn't work when searching backwards 2206 * though... 2207 */ 2208 lnum = wp->w_cursor.lnum; 2209 clearpos(&found_pos); 2210 2211 while (!got_int) 2212 { 2213 line = ml_get_buf(wp->w_buffer, lnum, FALSE); 2214 2215 len = (int)STRLEN(line); 2216 if (buflen < len + MAXWLEN + 2) 2217 { 2218 vim_free(buf); 2219 buflen = len + MAXWLEN + 2; 2220 buf = alloc(buflen); 2221 if (buf == NULL) 2222 break; 2223 } 2224 2225 /* In first line check first word for Capital. */ 2226 if (lnum == 1) 2227 capcol = 0; 2228 2229 /* For checking first word with a capital skip white space. */ 2230 if (capcol == 0) 2231 capcol = (int)(skipwhite(line) - line); 2232 else if (curline && wp == curwin) 2233 { 2234 /* For spellbadword(): check if first word needs a capital. */ 2235 col = (int)(skipwhite(line) - line); 2236 if (check_need_cap(lnum, col)) 2237 capcol = col; 2238 2239 /* Need to get the line again, may have looked at the previous 2240 * one. */ 2241 line = ml_get_buf(wp->w_buffer, lnum, FALSE); 2242 } 2243 2244 /* Copy the line into "buf" and append the start of the next line if 2245 * possible. */ 2246 STRCPY(buf, line); 2247 if (lnum < wp->w_buffer->b_ml.ml_line_count) 2248 spell_cat_line(buf + STRLEN(buf), 2249 ml_get_buf(wp->w_buffer, lnum + 1, FALSE), MAXWLEN); 2250 2251 p = buf + skip; 2252 endp = buf + len; 2253 while (p < endp) 2254 { 2255 /* When searching backward don't search after the cursor. Unless 2256 * we wrapped around the end of the buffer. */ 2257 if (dir == BACKWARD 2258 && lnum == wp->w_cursor.lnum 2259 && !wrapped 2260 && (colnr_T)(p - buf) >= wp->w_cursor.col) 2261 break; 2262 2263 /* start of word */ 2264 attr = HLF_COUNT; 2265 len = spell_check(wp, p, &attr, &capcol, FALSE); 2266 2267 if (attr != HLF_COUNT) 2268 { 2269 /* We found a bad word. Check the attribute. */ 2270 if (allwords || attr == HLF_SPB) 2271 { 2272 /* When searching forward only accept a bad word after 2273 * the cursor. */ 2274 if (dir == BACKWARD 2275 || lnum != wp->w_cursor.lnum 2276 || (lnum == wp->w_cursor.lnum 2277 && (wrapped 2278 || (colnr_T)(curline ? p - buf + len 2279 : p - buf) 2280 > wp->w_cursor.col))) 2281 { 2282 #ifdef FEAT_SYN_HL 2283 if (has_syntax) 2284 { 2285 col = (int)(p - buf); 2286 (void)syn_get_id(wp, lnum, (colnr_T)col, 2287 FALSE, &can_spell, FALSE); 2288 if (!can_spell) 2289 attr = HLF_COUNT; 2290 } 2291 else 2292 #endif 2293 can_spell = TRUE; 2294 2295 if (can_spell) 2296 { 2297 found_one = TRUE; 2298 found_pos.lnum = lnum; 2299 found_pos.col = (int)(p - buf); 2300 #ifdef FEAT_VIRTUALEDIT 2301 found_pos.coladd = 0; 2302 #endif 2303 if (dir == FORWARD) 2304 { 2305 /* No need to search further. */ 2306 wp->w_cursor = found_pos; 2307 vim_free(buf); 2308 if (attrp != NULL) 2309 *attrp = attr; 2310 return len; 2311 } 2312 else if (curline) 2313 /* Insert mode completion: put cursor after 2314 * the bad word. */ 2315 found_pos.col += len; 2316 found_len = len; 2317 } 2318 } 2319 else 2320 found_one = TRUE; 2321 } 2322 } 2323 2324 /* advance to character after the word */ 2325 p += len; 2326 capcol -= len; 2327 } 2328 2329 if (dir == BACKWARD && found_pos.lnum != 0) 2330 { 2331 /* Use the last match in the line (before the cursor). */ 2332 wp->w_cursor = found_pos; 2333 vim_free(buf); 2334 return found_len; 2335 } 2336 2337 if (curline) 2338 break; /* only check cursor line */ 2339 2340 /* Advance to next line. */ 2341 if (dir == BACKWARD) 2342 { 2343 /* If we are back at the starting line and searched it again there 2344 * is no match, give up. */ 2345 if (lnum == wp->w_cursor.lnum && wrapped) 2346 break; 2347 2348 if (lnum > 1) 2349 --lnum; 2350 else if (!p_ws) 2351 break; /* at first line and 'nowrapscan' */ 2352 else 2353 { 2354 /* Wrap around to the end of the buffer. May search the 2355 * starting line again and accept the last match. */ 2356 lnum = wp->w_buffer->b_ml.ml_line_count; 2357 wrapped = TRUE; 2358 if (!shortmess(SHM_SEARCH)) 2359 give_warning((char_u *)_(top_bot_msg), TRUE); 2360 } 2361 capcol = -1; 2362 } 2363 else 2364 { 2365 if (lnum < wp->w_buffer->b_ml.ml_line_count) 2366 ++lnum; 2367 else if (!p_ws) 2368 break; /* at first line and 'nowrapscan' */ 2369 else 2370 { 2371 /* Wrap around to the start of the buffer. May search the 2372 * starting line again and accept the first match. */ 2373 lnum = 1; 2374 wrapped = TRUE; 2375 if (!shortmess(SHM_SEARCH)) 2376 give_warning((char_u *)_(bot_top_msg), TRUE); 2377 } 2378 2379 /* If we are back at the starting line and there is no match then 2380 * give up. */ 2381 if (lnum == wp->w_cursor.lnum && (!found_one || wrapped)) 2382 break; 2383 2384 /* Skip the characters at the start of the next line that were 2385 * included in a match crossing line boundaries. */ 2386 if (attr == HLF_COUNT) 2387 skip = (int)(p - endp); 2388 else 2389 skip = 0; 2390 2391 /* Capcol skips over the inserted space. */ 2392 --capcol; 2393 2394 /* But after empty line check first word in next line */ 2395 if (*skipwhite(line) == NUL) 2396 capcol = 0; 2397 } 2398 2399 line_breakcheck(); 2400 } 2401 2402 vim_free(buf); 2403 return 0; 2404 } 2405 2406 /* 2407 * For spell checking: concatenate the start of the following line "line" into 2408 * "buf", blanking-out special characters. Copy less then "maxlen" bytes. 2409 * Keep the blanks at the start of the next line, this is used in win_line() 2410 * to skip those bytes if the word was OK. 2411 */ 2412 void 2413 spell_cat_line(char_u *buf, char_u *line, int maxlen) 2414 { 2415 char_u *p; 2416 int n; 2417 2418 p = skipwhite(line); 2419 while (vim_strchr((char_u *)"*#/\"\t", *p) != NULL) 2420 p = skipwhite(p + 1); 2421 2422 if (*p != NUL) 2423 { 2424 /* Only worth concatenating if there is something else than spaces to 2425 * concatenate. */ 2426 n = (int)(p - line) + 1; 2427 if (n < maxlen - 1) 2428 { 2429 vim_memset(buf, ' ', n); 2430 vim_strncpy(buf + n, p, maxlen - 1 - n); 2431 } 2432 } 2433 } 2434 2435 /* 2436 * Structure used for the cookie argument of do_in_runtimepath(). 2437 */ 2438 typedef struct spelload_S 2439 { 2440 char_u sl_lang[MAXWLEN + 1]; /* language name */ 2441 slang_T *sl_slang; /* resulting slang_T struct */ 2442 int sl_nobreak; /* NOBREAK language found */ 2443 } spelload_T; 2444 2445 /* 2446 * Load word list(s) for "lang" from Vim spell file(s). 2447 * "lang" must be the language without the region: e.g., "en". 2448 */ 2449 static void 2450 spell_load_lang(char_u *lang) 2451 { 2452 char_u fname_enc[85]; 2453 int r; 2454 spelload_T sl; 2455 #ifdef FEAT_AUTOCMD 2456 int round; 2457 #endif 2458 2459 /* Copy the language name to pass it to spell_load_cb() as a cookie. 2460 * It's truncated when an error is detected. */ 2461 STRCPY(sl.sl_lang, lang); 2462 sl.sl_slang = NULL; 2463 sl.sl_nobreak = FALSE; 2464 2465 #ifdef FEAT_AUTOCMD 2466 /* We may retry when no spell file is found for the language, an 2467 * autocommand may load it then. */ 2468 for (round = 1; round <= 2; ++round) 2469 #endif 2470 { 2471 /* 2472 * Find the first spell file for "lang" in 'runtimepath' and load it. 2473 */ 2474 vim_snprintf((char *)fname_enc, sizeof(fname_enc) - 5, 2475 #ifdef VMS 2476 "spell/%s_%s.spl", 2477 #else 2478 "spell/%s.%s.spl", 2479 #endif 2480 lang, spell_enc()); 2481 r = do_in_runtimepath(fname_enc, 0, spell_load_cb, &sl); 2482 2483 if (r == FAIL && *sl.sl_lang != NUL) 2484 { 2485 /* Try loading the ASCII version. */ 2486 vim_snprintf((char *)fname_enc, sizeof(fname_enc) - 5, 2487 #ifdef VMS 2488 "spell/%s_ascii.spl", 2489 #else 2490 "spell/%s.ascii.spl", 2491 #endif 2492 lang); 2493 r = do_in_runtimepath(fname_enc, 0, spell_load_cb, &sl); 2494 2495 #ifdef FEAT_AUTOCMD 2496 if (r == FAIL && *sl.sl_lang != NUL && round == 1 2497 && apply_autocmds(EVENT_SPELLFILEMISSING, lang, 2498 curbuf->b_fname, FALSE, curbuf)) 2499 continue; 2500 break; 2501 #endif 2502 } 2503 #ifdef FEAT_AUTOCMD 2504 break; 2505 #endif 2506 } 2507 2508 if (r == FAIL) 2509 { 2510 smsg((char_u *) 2511 #ifdef VMS 2512 _("Warning: Cannot find word list \"%s_%s.spl\" or \"%s_ascii.spl\""), 2513 #else 2514 _("Warning: Cannot find word list \"%s.%s.spl\" or \"%s.ascii.spl\""), 2515 #endif 2516 lang, spell_enc(), lang); 2517 } 2518 else if (sl.sl_slang != NULL) 2519 { 2520 /* At least one file was loaded, now load ALL the additions. */ 2521 STRCPY(fname_enc + STRLEN(fname_enc) - 3, "add.spl"); 2522 do_in_runtimepath(fname_enc, DIP_ALL, spell_load_cb, &sl); 2523 } 2524 } 2525 2526 /* 2527 * Return the encoding used for spell checking: Use 'encoding', except that we 2528 * use "latin1" for "latin9". And limit to 60 characters (just in case). 2529 */ 2530 static char_u * 2531 spell_enc(void) 2532 { 2533 2534 #ifdef FEAT_MBYTE 2535 if (STRLEN(p_enc) < 60 && STRCMP(p_enc, "iso-8859-15") != 0) 2536 return p_enc; 2537 #endif 2538 return (char_u *)"latin1"; 2539 } 2540 2541 /* 2542 * Get the name of the .spl file for the internal wordlist into 2543 * "fname[MAXPATHL]". 2544 */ 2545 static void 2546 int_wordlist_spl(char_u *fname) 2547 { 2548 vim_snprintf((char *)fname, MAXPATHL, SPL_FNAME_TMPL, 2549 int_wordlist, spell_enc()); 2550 } 2551 2552 /* 2553 * Allocate a new slang_T for language "lang". "lang" can be NULL. 2554 * Caller must fill "sl_next". 2555 */ 2556 static slang_T * 2557 slang_alloc(char_u *lang) 2558 { 2559 slang_T *lp; 2560 2561 lp = (slang_T *)alloc_clear(sizeof(slang_T)); 2562 if (lp != NULL) 2563 { 2564 if (lang != NULL) 2565 lp->sl_name = vim_strsave(lang); 2566 ga_init2(&lp->sl_rep, sizeof(fromto_T), 10); 2567 ga_init2(&lp->sl_repsal, sizeof(fromto_T), 10); 2568 lp->sl_compmax = MAXWLEN; 2569 lp->sl_compsylmax = MAXWLEN; 2570 hash_init(&lp->sl_wordcount); 2571 } 2572 2573 return lp; 2574 } 2575 2576 /* 2577 * Free the contents of an slang_T and the structure itself. 2578 */ 2579 static void 2580 slang_free(slang_T *lp) 2581 { 2582 vim_free(lp->sl_name); 2583 vim_free(lp->sl_fname); 2584 slang_clear(lp); 2585 vim_free(lp); 2586 } 2587 2588 /* 2589 * Clear an slang_T so that the file can be reloaded. 2590 */ 2591 static void 2592 slang_clear(slang_T *lp) 2593 { 2594 garray_T *gap; 2595 fromto_T *ftp; 2596 salitem_T *smp; 2597 int i; 2598 int round; 2599 2600 vim_free(lp->sl_fbyts); 2601 lp->sl_fbyts = NULL; 2602 vim_free(lp->sl_kbyts); 2603 lp->sl_kbyts = NULL; 2604 vim_free(lp->sl_pbyts); 2605 lp->sl_pbyts = NULL; 2606 2607 vim_free(lp->sl_fidxs); 2608 lp->sl_fidxs = NULL; 2609 vim_free(lp->sl_kidxs); 2610 lp->sl_kidxs = NULL; 2611 vim_free(lp->sl_pidxs); 2612 lp->sl_pidxs = NULL; 2613 2614 for (round = 1; round <= 2; ++round) 2615 { 2616 gap = round == 1 ? &lp->sl_rep : &lp->sl_repsal; 2617 while (gap->ga_len > 0) 2618 { 2619 ftp = &((fromto_T *)gap->ga_data)[--gap->ga_len]; 2620 vim_free(ftp->ft_from); 2621 vim_free(ftp->ft_to); 2622 } 2623 ga_clear(gap); 2624 } 2625 2626 gap = &lp->sl_sal; 2627 if (lp->sl_sofo) 2628 { 2629 /* "ga_len" is set to 1 without adding an item for latin1 */ 2630 if (gap->ga_data != NULL) 2631 /* SOFOFROM and SOFOTO items: free lists of wide characters. */ 2632 for (i = 0; i < gap->ga_len; ++i) 2633 vim_free(((int **)gap->ga_data)[i]); 2634 } 2635 else 2636 /* SAL items: free salitem_T items */ 2637 while (gap->ga_len > 0) 2638 { 2639 smp = &((salitem_T *)gap->ga_data)[--gap->ga_len]; 2640 vim_free(smp->sm_lead); 2641 /* Don't free sm_oneof and sm_rules, they point into sm_lead. */ 2642 vim_free(smp->sm_to); 2643 #ifdef FEAT_MBYTE 2644 vim_free(smp->sm_lead_w); 2645 vim_free(smp->sm_oneof_w); 2646 vim_free(smp->sm_to_w); 2647 #endif 2648 } 2649 ga_clear(gap); 2650 2651 for (i = 0; i < lp->sl_prefixcnt; ++i) 2652 vim_regfree(lp->sl_prefprog[i]); 2653 lp->sl_prefixcnt = 0; 2654 vim_free(lp->sl_prefprog); 2655 lp->sl_prefprog = NULL; 2656 2657 vim_free(lp->sl_info); 2658 lp->sl_info = NULL; 2659 2660 vim_free(lp->sl_midword); 2661 lp->sl_midword = NULL; 2662 2663 vim_regfree(lp->sl_compprog); 2664 vim_free(lp->sl_comprules); 2665 vim_free(lp->sl_compstartflags); 2666 vim_free(lp->sl_compallflags); 2667 lp->sl_compprog = NULL; 2668 lp->sl_comprules = NULL; 2669 lp->sl_compstartflags = NULL; 2670 lp->sl_compallflags = NULL; 2671 2672 vim_free(lp->sl_syllable); 2673 lp->sl_syllable = NULL; 2674 ga_clear(&lp->sl_syl_items); 2675 2676 ga_clear_strings(&lp->sl_comppat); 2677 2678 hash_clear_all(&lp->sl_wordcount, WC_KEY_OFF); 2679 hash_init(&lp->sl_wordcount); 2680 2681 #ifdef FEAT_MBYTE 2682 hash_clear_all(&lp->sl_map_hash, 0); 2683 #endif 2684 2685 /* Clear info from .sug file. */ 2686 slang_clear_sug(lp); 2687 2688 lp->sl_compmax = MAXWLEN; 2689 lp->sl_compminlen = 0; 2690 lp->sl_compsylmax = MAXWLEN; 2691 lp->sl_regions[0] = NUL; 2692 } 2693 2694 /* 2695 * Clear the info from the .sug file in "lp". 2696 */ 2697 static void 2698 slang_clear_sug(slang_T *lp) 2699 { 2700 vim_free(lp->sl_sbyts); 2701 lp->sl_sbyts = NULL; 2702 vim_free(lp->sl_sidxs); 2703 lp->sl_sidxs = NULL; 2704 close_spellbuf(lp->sl_sugbuf); 2705 lp->sl_sugbuf = NULL; 2706 lp->sl_sugloaded = FALSE; 2707 lp->sl_sugtime = 0; 2708 } 2709 2710 /* 2711 * Load one spell file and store the info into a slang_T. 2712 * Invoked through do_in_runtimepath(). 2713 */ 2714 static void 2715 spell_load_cb(char_u *fname, void *cookie) 2716 { 2717 spelload_T *slp = (spelload_T *)cookie; 2718 slang_T *slang; 2719 2720 slang = spell_load_file(fname, slp->sl_lang, NULL, FALSE); 2721 if (slang != NULL) 2722 { 2723 /* When a previously loaded file has NOBREAK also use it for the 2724 * ".add" files. */ 2725 if (slp->sl_nobreak && slang->sl_add) 2726 slang->sl_nobreak = TRUE; 2727 else if (slang->sl_nobreak) 2728 slp->sl_nobreak = TRUE; 2729 2730 slp->sl_slang = slang; 2731 } 2732 } 2733 2734 /* 2735 * Load one spell file and store the info into a slang_T. 2736 * 2737 * This is invoked in three ways: 2738 * - From spell_load_cb() to load a spell file for the first time. "lang" is 2739 * the language name, "old_lp" is NULL. Will allocate an slang_T. 2740 * - To reload a spell file that was changed. "lang" is NULL and "old_lp" 2741 * points to the existing slang_T. 2742 * - Just after writing a .spl file; it's read back to produce the .sug file. 2743 * "old_lp" is NULL and "lang" is NULL. Will allocate an slang_T. 2744 * 2745 * Returns the slang_T the spell file was loaded into. NULL for error. 2746 */ 2747 static slang_T * 2748 spell_load_file( 2749 char_u *fname, 2750 char_u *lang, 2751 slang_T *old_lp, 2752 int silent) /* no error if file doesn't exist */ 2753 { 2754 FILE *fd; 2755 char_u buf[VIMSPELLMAGICL]; 2756 char_u *p; 2757 int i; 2758 int n; 2759 int len; 2760 char_u *save_sourcing_name = sourcing_name; 2761 linenr_T save_sourcing_lnum = sourcing_lnum; 2762 slang_T *lp = NULL; 2763 int c = 0; 2764 int res; 2765 2766 fd = mch_fopen((char *)fname, "r"); 2767 if (fd == NULL) 2768 { 2769 if (!silent) 2770 EMSG2(_(e_notopen), fname); 2771 else if (p_verbose > 2) 2772 { 2773 verbose_enter(); 2774 smsg((char_u *)e_notopen, fname); 2775 verbose_leave(); 2776 } 2777 goto endFAIL; 2778 } 2779 if (p_verbose > 2) 2780 { 2781 verbose_enter(); 2782 smsg((char_u *)_("Reading spell file \"%s\""), fname); 2783 verbose_leave(); 2784 } 2785 2786 if (old_lp == NULL) 2787 { 2788 lp = slang_alloc(lang); 2789 if (lp == NULL) 2790 goto endFAIL; 2791 2792 /* Remember the file name, used to reload the file when it's updated. */ 2793 lp->sl_fname = vim_strsave(fname); 2794 if (lp->sl_fname == NULL) 2795 goto endFAIL; 2796 2797 /* Check for .add.spl (_add.spl for VMS). */ 2798 lp->sl_add = strstr((char *)gettail(fname), SPL_FNAME_ADD) != NULL; 2799 } 2800 else 2801 lp = old_lp; 2802 2803 /* Set sourcing_name, so that error messages mention the file name. */ 2804 sourcing_name = fname; 2805 sourcing_lnum = 0; 2806 2807 /* 2808 * <HEADER>: <fileID> 2809 */ 2810 for (i = 0; i < VIMSPELLMAGICL; ++i) 2811 buf[i] = getc(fd); /* <fileID> */ 2812 if (STRNCMP(buf, VIMSPELLMAGIC, VIMSPELLMAGICL) != 0) 2813 { 2814 EMSG(_("E757: This does not look like a spell file")); 2815 goto endFAIL; 2816 } 2817 c = getc(fd); /* <versionnr> */ 2818 if (c < VIMSPELLVERSION) 2819 { 2820 EMSG(_("E771: Old spell file, needs to be updated")); 2821 goto endFAIL; 2822 } 2823 else if (c > VIMSPELLVERSION) 2824 { 2825 EMSG(_("E772: Spell file is for newer version of Vim")); 2826 goto endFAIL; 2827 } 2828 2829 2830 /* 2831 * <SECTIONS>: <section> ... <sectionend> 2832 * <section>: <sectionID> <sectionflags> <sectionlen> (section contents) 2833 */ 2834 for (;;) 2835 { 2836 n = getc(fd); /* <sectionID> or <sectionend> */ 2837 if (n == SN_END) 2838 break; 2839 c = getc(fd); /* <sectionflags> */ 2840 len = get4c(fd); /* <sectionlen> */ 2841 if (len < 0) 2842 goto truncerr; 2843 2844 res = 0; 2845 switch (n) 2846 { 2847 case SN_INFO: 2848 lp->sl_info = read_string(fd, len); /* <infotext> */ 2849 if (lp->sl_info == NULL) 2850 goto endFAIL; 2851 break; 2852 2853 case SN_REGION: 2854 res = read_region_section(fd, lp, len); 2855 break; 2856 2857 case SN_CHARFLAGS: 2858 res = read_charflags_section(fd); 2859 break; 2860 2861 case SN_MIDWORD: 2862 lp->sl_midword = read_string(fd, len); /* <midword> */ 2863 if (lp->sl_midword == NULL) 2864 goto endFAIL; 2865 break; 2866 2867 case SN_PREFCOND: 2868 res = read_prefcond_section(fd, lp); 2869 break; 2870 2871 case SN_REP: 2872 res = read_rep_section(fd, &lp->sl_rep, lp->sl_rep_first); 2873 break; 2874 2875 case SN_REPSAL: 2876 res = read_rep_section(fd, &lp->sl_repsal, lp->sl_repsal_first); 2877 break; 2878 2879 case SN_SAL: 2880 res = read_sal_section(fd, lp); 2881 break; 2882 2883 case SN_SOFO: 2884 res = read_sofo_section(fd, lp); 2885 break; 2886 2887 case SN_MAP: 2888 p = read_string(fd, len); /* <mapstr> */ 2889 if (p == NULL) 2890 goto endFAIL; 2891 set_map_str(lp, p); 2892 vim_free(p); 2893 break; 2894 2895 case SN_WORDS: 2896 res = read_words_section(fd, lp, len); 2897 break; 2898 2899 case SN_SUGFILE: 2900 lp->sl_sugtime = get8ctime(fd); /* <timestamp> */ 2901 break; 2902 2903 case SN_NOSPLITSUGS: 2904 lp->sl_nosplitsugs = TRUE; 2905 break; 2906 2907 case SN_NOCOMPOUNDSUGS: 2908 lp->sl_nocompoundsugs = TRUE; 2909 break; 2910 2911 case SN_COMPOUND: 2912 res = read_compound(fd, lp, len); 2913 break; 2914 2915 case SN_NOBREAK: 2916 lp->sl_nobreak = TRUE; 2917 break; 2918 2919 case SN_SYLLABLE: 2920 lp->sl_syllable = read_string(fd, len); /* <syllable> */ 2921 if (lp->sl_syllable == NULL) 2922 goto endFAIL; 2923 if (init_syl_tab(lp) == FAIL) 2924 goto endFAIL; 2925 break; 2926 2927 default: 2928 /* Unsupported section. When it's required give an error 2929 * message. When it's not required skip the contents. */ 2930 if (c & SNF_REQUIRED) 2931 { 2932 EMSG(_("E770: Unsupported section in spell file")); 2933 goto endFAIL; 2934 } 2935 while (--len >= 0) 2936 if (getc(fd) < 0) 2937 goto truncerr; 2938 break; 2939 } 2940 someerror: 2941 if (res == SP_FORMERROR) 2942 { 2943 EMSG(_(e_format)); 2944 goto endFAIL; 2945 } 2946 if (res == SP_TRUNCERROR) 2947 { 2948 truncerr: 2949 EMSG(_(e_spell_trunc)); 2950 goto endFAIL; 2951 } 2952 if (res == SP_OTHERERROR) 2953 goto endFAIL; 2954 } 2955 2956 /* <LWORDTREE> */ 2957 res = spell_read_tree(fd, &lp->sl_fbyts, &lp->sl_fidxs, FALSE, 0); 2958 if (res != 0) 2959 goto someerror; 2960 2961 /* <KWORDTREE> */ 2962 res = spell_read_tree(fd, &lp->sl_kbyts, &lp->sl_kidxs, FALSE, 0); 2963 if (res != 0) 2964 goto someerror; 2965 2966 /* <PREFIXTREE> */ 2967 res = spell_read_tree(fd, &lp->sl_pbyts, &lp->sl_pidxs, TRUE, 2968 lp->sl_prefixcnt); 2969 if (res != 0) 2970 goto someerror; 2971 2972 /* For a new file link it in the list of spell files. */ 2973 if (old_lp == NULL && lang != NULL) 2974 { 2975 lp->sl_next = first_lang; 2976 first_lang = lp; 2977 } 2978 2979 goto endOK; 2980 2981 endFAIL: 2982 if (lang != NULL) 2983 /* truncating the name signals the error to spell_load_lang() */ 2984 *lang = NUL; 2985 if (lp != NULL && old_lp == NULL) 2986 slang_free(lp); 2987 lp = NULL; 2988 2989 endOK: 2990 if (fd != NULL) 2991 fclose(fd); 2992 sourcing_name = save_sourcing_name; 2993 sourcing_lnum = save_sourcing_lnum; 2994 2995 return lp; 2996 } 2997 2998 /* 2999 * Read a length field from "fd" in "cnt_bytes" bytes. 3000 * Allocate memory, read the string into it and add a NUL at the end. 3001 * Returns NULL when the count is zero. 3002 * Sets "*cntp" to SP_*ERROR when there is an error, length of the result 3003 * otherwise. 3004 */ 3005 static char_u * 3006 read_cnt_string(FILE *fd, int cnt_bytes, int *cntp) 3007 { 3008 int cnt = 0; 3009 int i; 3010 char_u *str; 3011 3012 /* read the length bytes, MSB first */ 3013 for (i = 0; i < cnt_bytes; ++i) 3014 cnt = (cnt << 8) + getc(fd); 3015 if (cnt < 0) 3016 { 3017 *cntp = SP_TRUNCERROR; 3018 return NULL; 3019 } 3020 *cntp = cnt; 3021 if (cnt == 0) 3022 return NULL; /* nothing to read, return NULL */ 3023 3024 str = read_string(fd, cnt); 3025 if (str == NULL) 3026 *cntp = SP_OTHERERROR; 3027 return str; 3028 } 3029 3030 /* 3031 * Read SN_REGION: <regionname> ... 3032 * Return SP_*ERROR flags. 3033 */ 3034 static int 3035 read_region_section(FILE *fd, slang_T *lp, int len) 3036 { 3037 int i; 3038 3039 if (len > 16) 3040 return SP_FORMERROR; 3041 for (i = 0; i < len; ++i) 3042 lp->sl_regions[i] = getc(fd); /* <regionname> */ 3043 lp->sl_regions[len] = NUL; 3044 return 0; 3045 } 3046 3047 /* 3048 * Read SN_CHARFLAGS section: <charflagslen> <charflags> 3049 * <folcharslen> <folchars> 3050 * Return SP_*ERROR flags. 3051 */ 3052 static int 3053 read_charflags_section(FILE *fd) 3054 { 3055 char_u *flags; 3056 char_u *fol; 3057 int flagslen, follen; 3058 3059 /* <charflagslen> <charflags> */ 3060 flags = read_cnt_string(fd, 1, &flagslen); 3061 if (flagslen < 0) 3062 return flagslen; 3063 3064 /* <folcharslen> <folchars> */ 3065 fol = read_cnt_string(fd, 2, &follen); 3066 if (follen < 0) 3067 { 3068 vim_free(flags); 3069 return follen; 3070 } 3071 3072 /* Set the word-char flags and fill SPELL_ISUPPER() table. */ 3073 if (flags != NULL && fol != NULL) 3074 set_spell_charflags(flags, flagslen, fol); 3075 3076 vim_free(flags); 3077 vim_free(fol); 3078 3079 /* When <charflagslen> is zero then <fcharlen> must also be zero. */ 3080 if ((flags == NULL) != (fol == NULL)) 3081 return SP_FORMERROR; 3082 return 0; 3083 } 3084 3085 /* 3086 * Read SN_PREFCOND section. 3087 * Return SP_*ERROR flags. 3088 */ 3089 static int 3090 read_prefcond_section(FILE *fd, slang_T *lp) 3091 { 3092 int cnt; 3093 int i; 3094 int n; 3095 char_u *p; 3096 char_u buf[MAXWLEN + 1]; 3097 3098 /* <prefcondcnt> <prefcond> ... */ 3099 cnt = get2c(fd); /* <prefcondcnt> */ 3100 if (cnt <= 0) 3101 return SP_FORMERROR; 3102 3103 lp->sl_prefprog = (regprog_T **)alloc_clear( 3104 (unsigned)sizeof(regprog_T *) * cnt); 3105 if (lp->sl_prefprog == NULL) 3106 return SP_OTHERERROR; 3107 lp->sl_prefixcnt = cnt; 3108 3109 for (i = 0; i < cnt; ++i) 3110 { 3111 /* <prefcond> : <condlen> <condstr> */ 3112 n = getc(fd); /* <condlen> */ 3113 if (n < 0 || n >= MAXWLEN) 3114 return SP_FORMERROR; 3115 3116 /* When <condlen> is zero we have an empty condition. Otherwise 3117 * compile the regexp program used to check for the condition. */ 3118 if (n > 0) 3119 { 3120 buf[0] = '^'; /* always match at one position only */ 3121 p = buf + 1; 3122 while (n-- > 0) 3123 *p++ = getc(fd); /* <condstr> */ 3124 *p = NUL; 3125 lp->sl_prefprog[i] = vim_regcomp(buf, RE_MAGIC + RE_STRING); 3126 } 3127 } 3128 return 0; 3129 } 3130 3131 /* 3132 * Read REP or REPSAL items section from "fd": <repcount> <rep> ... 3133 * Return SP_*ERROR flags. 3134 */ 3135 static int 3136 read_rep_section(FILE *fd, garray_T *gap, short *first) 3137 { 3138 int cnt; 3139 fromto_T *ftp; 3140 int i; 3141 3142 cnt = get2c(fd); /* <repcount> */ 3143 if (cnt < 0) 3144 return SP_TRUNCERROR; 3145 3146 if (ga_grow(gap, cnt) == FAIL) 3147 return SP_OTHERERROR; 3148 3149 /* <rep> : <repfromlen> <repfrom> <reptolen> <repto> */ 3150 for (; gap->ga_len < cnt; ++gap->ga_len) 3151 { 3152 ftp = &((fromto_T *)gap->ga_data)[gap->ga_len]; 3153 ftp->ft_from = read_cnt_string(fd, 1, &i); 3154 if (i < 0) 3155 return i; 3156 if (i == 0) 3157 return SP_FORMERROR; 3158 ftp->ft_to = read_cnt_string(fd, 1, &i); 3159 if (i <= 0) 3160 { 3161 vim_free(ftp->ft_from); 3162 if (i < 0) 3163 return i; 3164 return SP_FORMERROR; 3165 } 3166 } 3167 3168 /* Fill the first-index table. */ 3169 for (i = 0; i < 256; ++i) 3170 first[i] = -1; 3171 for (i = 0; i < gap->ga_len; ++i) 3172 { 3173 ftp = &((fromto_T *)gap->ga_data)[i]; 3174 if (first[*ftp->ft_from] == -1) 3175 first[*ftp->ft_from] = i; 3176 } 3177 return 0; 3178 } 3179 3180 /* 3181 * Read SN_SAL section: <salflags> <salcount> <sal> ... 3182 * Return SP_*ERROR flags. 3183 */ 3184 static int 3185 read_sal_section(FILE *fd, slang_T *slang) 3186 { 3187 int i; 3188 int cnt; 3189 garray_T *gap; 3190 salitem_T *smp; 3191 int ccnt; 3192 char_u *p; 3193 int c = NUL; 3194 3195 slang->sl_sofo = FALSE; 3196 3197 i = getc(fd); /* <salflags> */ 3198 if (i & SAL_F0LLOWUP) 3199 slang->sl_followup = TRUE; 3200 if (i & SAL_COLLAPSE) 3201 slang->sl_collapse = TRUE; 3202 if (i & SAL_REM_ACCENTS) 3203 slang->sl_rem_accents = TRUE; 3204 3205 cnt = get2c(fd); /* <salcount> */ 3206 if (cnt < 0) 3207 return SP_TRUNCERROR; 3208 3209 gap = &slang->sl_sal; 3210 ga_init2(gap, sizeof(salitem_T), 10); 3211 if (ga_grow(gap, cnt + 1) == FAIL) 3212 return SP_OTHERERROR; 3213 3214 /* <sal> : <salfromlen> <salfrom> <saltolen> <salto> */ 3215 for (; gap->ga_len < cnt; ++gap->ga_len) 3216 { 3217 smp = &((salitem_T *)gap->ga_data)[gap->ga_len]; 3218 ccnt = getc(fd); /* <salfromlen> */ 3219 if (ccnt < 0) 3220 return SP_TRUNCERROR; 3221 if ((p = alloc(ccnt + 2)) == NULL) 3222 return SP_OTHERERROR; 3223 smp->sm_lead = p; 3224 3225 /* Read up to the first special char into sm_lead. */ 3226 for (i = 0; i < ccnt; ++i) 3227 { 3228 c = getc(fd); /* <salfrom> */ 3229 if (vim_strchr((char_u *)"0123456789(-<^$", c) != NULL) 3230 break; 3231 *p++ = c; 3232 } 3233 smp->sm_leadlen = (int)(p - smp->sm_lead); 3234 *p++ = NUL; 3235 3236 /* Put (abc) chars in sm_oneof, if any. */ 3237 if (c == '(') 3238 { 3239 smp->sm_oneof = p; 3240 for (++i; i < ccnt; ++i) 3241 { 3242 c = getc(fd); /* <salfrom> */ 3243 if (c == ')') 3244 break; 3245 *p++ = c; 3246 } 3247 *p++ = NUL; 3248 if (++i < ccnt) 3249 c = getc(fd); 3250 } 3251 else 3252 smp->sm_oneof = NULL; 3253 3254 /* Any following chars go in sm_rules. */ 3255 smp->sm_rules = p; 3256 if (i < ccnt) 3257 /* store the char we got while checking for end of sm_lead */ 3258 *p++ = c; 3259 for (++i; i < ccnt; ++i) 3260 *p++ = getc(fd); /* <salfrom> */ 3261 *p++ = NUL; 3262 3263 /* <saltolen> <salto> */ 3264 smp->sm_to = read_cnt_string(fd, 1, &ccnt); 3265 if (ccnt < 0) 3266 { 3267 vim_free(smp->sm_lead); 3268 return ccnt; 3269 } 3270 3271 #ifdef FEAT_MBYTE 3272 if (has_mbyte) 3273 { 3274 /* convert the multi-byte strings to wide char strings */ 3275 smp->sm_lead_w = mb_str2wide(smp->sm_lead); 3276 smp->sm_leadlen = mb_charlen(smp->sm_lead); 3277 if (smp->sm_oneof == NULL) 3278 smp->sm_oneof_w = NULL; 3279 else 3280 smp->sm_oneof_w = mb_str2wide(smp->sm_oneof); 3281 if (smp->sm_to == NULL) 3282 smp->sm_to_w = NULL; 3283 else 3284 smp->sm_to_w = mb_str2wide(smp->sm_to); 3285 if (smp->sm_lead_w == NULL 3286 || (smp->sm_oneof_w == NULL && smp->sm_oneof != NULL) 3287 || (smp->sm_to_w == NULL && smp->sm_to != NULL)) 3288 { 3289 vim_free(smp->sm_lead); 3290 vim_free(smp->sm_to); 3291 vim_free(smp->sm_lead_w); 3292 vim_free(smp->sm_oneof_w); 3293 vim_free(smp->sm_to_w); 3294 return SP_OTHERERROR; 3295 } 3296 } 3297 #endif 3298 } 3299 3300 if (gap->ga_len > 0) 3301 { 3302 /* Add one extra entry to mark the end with an empty sm_lead. Avoids 3303 * that we need to check the index every time. */ 3304 smp = &((salitem_T *)gap->ga_data)[gap->ga_len]; 3305 if ((p = alloc(1)) == NULL) 3306 return SP_OTHERERROR; 3307 p[0] = NUL; 3308 smp->sm_lead = p; 3309 smp->sm_leadlen = 0; 3310 smp->sm_oneof = NULL; 3311 smp->sm_rules = p; 3312 smp->sm_to = NULL; 3313 #ifdef FEAT_MBYTE 3314 if (has_mbyte) 3315 { 3316 smp->sm_lead_w = mb_str2wide(smp->sm_lead); 3317 smp->sm_leadlen = 0; 3318 smp->sm_oneof_w = NULL; 3319 smp->sm_to_w = NULL; 3320 } 3321 #endif 3322 ++gap->ga_len; 3323 } 3324 3325 /* Fill the first-index table. */ 3326 set_sal_first(slang); 3327 3328 return 0; 3329 } 3330 3331 /* 3332 * Read SN_WORDS: <word> ... 3333 * Return SP_*ERROR flags. 3334 */ 3335 static int 3336 read_words_section(FILE *fd, slang_T *lp, int len) 3337 { 3338 int done = 0; 3339 int i; 3340 int c; 3341 char_u word[MAXWLEN]; 3342 3343 while (done < len) 3344 { 3345 /* Read one word at a time. */ 3346 for (i = 0; ; ++i) 3347 { 3348 c = getc(fd); 3349 if (c == EOF) 3350 return SP_TRUNCERROR; 3351 word[i] = c; 3352 if (word[i] == NUL) 3353 break; 3354 if (i == MAXWLEN - 1) 3355 return SP_FORMERROR; 3356 } 3357 3358 /* Init the count to 10. */ 3359 count_common_word(lp, word, -1, 10); 3360 done += i + 1; 3361 } 3362 return 0; 3363 } 3364 3365 /* 3366 * Add a word to the hashtable of common words. 3367 * If it's already there then the counter is increased. 3368 */ 3369 static void 3370 count_common_word( 3371 slang_T *lp, 3372 char_u *word, 3373 int len, /* word length, -1 for upto NUL */ 3374 int count) /* 1 to count once, 10 to init */ 3375 { 3376 hash_T hash; 3377 hashitem_T *hi; 3378 wordcount_T *wc; 3379 char_u buf[MAXWLEN]; 3380 char_u *p; 3381 3382 if (len == -1) 3383 p = word; 3384 else 3385 { 3386 vim_strncpy(buf, word, len); 3387 p = buf; 3388 } 3389 3390 hash = hash_hash(p); 3391 hi = hash_lookup(&lp->sl_wordcount, p, hash); 3392 if (HASHITEM_EMPTY(hi)) 3393 { 3394 wc = (wordcount_T *)alloc((unsigned)(sizeof(wordcount_T) + STRLEN(p))); 3395 if (wc == NULL) 3396 return; 3397 STRCPY(wc->wc_word, p); 3398 wc->wc_count = count; 3399 hash_add_item(&lp->sl_wordcount, hi, wc->wc_word, hash); 3400 } 3401 else 3402 { 3403 wc = HI2WC(hi); 3404 if ((wc->wc_count += count) < (unsigned)count) /* check for overflow */ 3405 wc->wc_count = MAXWORDCOUNT; 3406 } 3407 } 3408 3409 /* 3410 * Adjust the score of common words. 3411 */ 3412 static int 3413 score_wordcount_adj( 3414 slang_T *slang, 3415 int score, 3416 char_u *word, 3417 int split) /* word was split, less bonus */ 3418 { 3419 hashitem_T *hi; 3420 wordcount_T *wc; 3421 int bonus; 3422 int newscore; 3423 3424 hi = hash_find(&slang->sl_wordcount, word); 3425 if (!HASHITEM_EMPTY(hi)) 3426 { 3427 wc = HI2WC(hi); 3428 if (wc->wc_count < SCORE_THRES2) 3429 bonus = SCORE_COMMON1; 3430 else if (wc->wc_count < SCORE_THRES3) 3431 bonus = SCORE_COMMON2; 3432 else 3433 bonus = SCORE_COMMON3; 3434 if (split) 3435 newscore = score - bonus / 2; 3436 else 3437 newscore = score - bonus; 3438 if (newscore < 0) 3439 return 0; 3440 return newscore; 3441 } 3442 return score; 3443 } 3444 3445 /* 3446 * SN_SOFO: <sofofromlen> <sofofrom> <sofotolen> <sofoto> 3447 * Return SP_*ERROR flags. 3448 */ 3449 static int 3450 read_sofo_section(FILE *fd, slang_T *slang) 3451 { 3452 int cnt; 3453 char_u *from, *to; 3454 int res; 3455 3456 slang->sl_sofo = TRUE; 3457 3458 /* <sofofromlen> <sofofrom> */ 3459 from = read_cnt_string(fd, 2, &cnt); 3460 if (cnt < 0) 3461 return cnt; 3462 3463 /* <sofotolen> <sofoto> */ 3464 to = read_cnt_string(fd, 2, &cnt); 3465 if (cnt < 0) 3466 { 3467 vim_free(from); 3468 return cnt; 3469 } 3470 3471 /* Store the info in slang->sl_sal and/or slang->sl_sal_first. */ 3472 if (from != NULL && to != NULL) 3473 res = set_sofo(slang, from, to); 3474 else if (from != NULL || to != NULL) 3475 res = SP_FORMERROR; /* only one of two strings is an error */ 3476 else 3477 res = 0; 3478 3479 vim_free(from); 3480 vim_free(to); 3481 return res; 3482 } 3483 3484 /* 3485 * Read the compound section from the .spl file: 3486 * <compmax> <compminlen> <compsylmax> <compoptions> <compflags> 3487 * Returns SP_*ERROR flags. 3488 */ 3489 static int 3490 read_compound(FILE *fd, slang_T *slang, int len) 3491 { 3492 int todo = len; 3493 int c; 3494 int atstart; 3495 char_u *pat; 3496 char_u *pp; 3497 char_u *cp; 3498 char_u *ap; 3499 char_u *crp; 3500 int cnt; 3501 garray_T *gap; 3502 3503 if (todo < 2) 3504 return SP_FORMERROR; /* need at least two bytes */ 3505 3506 --todo; 3507 c = getc(fd); /* <compmax> */ 3508 if (c < 2) 3509 c = MAXWLEN; 3510 slang->sl_compmax = c; 3511 3512 --todo; 3513 c = getc(fd); /* <compminlen> */ 3514 if (c < 1) 3515 c = 0; 3516 slang->sl_compminlen = c; 3517 3518 --todo; 3519 c = getc(fd); /* <compsylmax> */ 3520 if (c < 1) 3521 c = MAXWLEN; 3522 slang->sl_compsylmax = c; 3523 3524 c = getc(fd); /* <compoptions> */ 3525 if (c != 0) 3526 ungetc(c, fd); /* be backwards compatible with Vim 7.0b */ 3527 else 3528 { 3529 --todo; 3530 c = getc(fd); /* only use the lower byte for now */ 3531 --todo; 3532 slang->sl_compoptions = c; 3533 3534 gap = &slang->sl_comppat; 3535 c = get2c(fd); /* <comppatcount> */ 3536 todo -= 2; 3537 ga_init2(gap, sizeof(char_u *), c); 3538 if (ga_grow(gap, c) == OK) 3539 while (--c >= 0) 3540 { 3541 ((char_u **)(gap->ga_data))[gap->ga_len++] = 3542 read_cnt_string(fd, 1, &cnt); 3543 /* <comppatlen> <comppattext> */ 3544 if (cnt < 0) 3545 return cnt; 3546 todo -= cnt + 1; 3547 } 3548 } 3549 if (todo < 0) 3550 return SP_FORMERROR; 3551 3552 /* Turn the COMPOUNDRULE items into a regexp pattern: 3553 * "a[bc]/a*b+" -> "^\(a[bc]\|a*b\+\)$". 3554 * Inserting backslashes may double the length, "^\(\)$<Nul>" is 7 bytes. 3555 * Conversion to utf-8 may double the size. */ 3556 c = todo * 2 + 7; 3557 #ifdef FEAT_MBYTE 3558 if (enc_utf8) 3559 c += todo * 2; 3560 #endif 3561 pat = alloc((unsigned)c); 3562 if (pat == NULL) 3563 return SP_OTHERERROR; 3564 3565 /* We also need a list of all flags that can appear at the start and one 3566 * for all flags. */ 3567 cp = alloc(todo + 1); 3568 if (cp == NULL) 3569 { 3570 vim_free(pat); 3571 return SP_OTHERERROR; 3572 } 3573 slang->sl_compstartflags = cp; 3574 *cp = NUL; 3575 3576 ap = alloc(todo + 1); 3577 if (ap == NULL) 3578 { 3579 vim_free(pat); 3580 return SP_OTHERERROR; 3581 } 3582 slang->sl_compallflags = ap; 3583 *ap = NUL; 3584 3585 /* And a list of all patterns in their original form, for checking whether 3586 * compounding may work in match_compoundrule(). This is freed when we 3587 * encounter a wildcard, the check doesn't work then. */ 3588 crp = alloc(todo + 1); 3589 slang->sl_comprules = crp; 3590 3591 pp = pat; 3592 *pp++ = '^'; 3593 *pp++ = '\\'; 3594 *pp++ = '('; 3595 3596 atstart = 1; 3597 while (todo-- > 0) 3598 { 3599 c = getc(fd); /* <compflags> */ 3600 if (c == EOF) 3601 { 3602 vim_free(pat); 3603 return SP_TRUNCERROR; 3604 } 3605 3606 /* Add all flags to "sl_compallflags". */ 3607 if (vim_strchr((char_u *)"?*+[]/", c) == NULL 3608 && !byte_in_str(slang->sl_compallflags, c)) 3609 { 3610 *ap++ = c; 3611 *ap = NUL; 3612 } 3613 3614 if (atstart != 0) 3615 { 3616 /* At start of item: copy flags to "sl_compstartflags". For a 3617 * [abc] item set "atstart" to 2 and copy up to the ']'. */ 3618 if (c == '[') 3619 atstart = 2; 3620 else if (c == ']') 3621 atstart = 0; 3622 else 3623 { 3624 if (!byte_in_str(slang->sl_compstartflags, c)) 3625 { 3626 *cp++ = c; 3627 *cp = NUL; 3628 } 3629 if (atstart == 1) 3630 atstart = 0; 3631 } 3632 } 3633 3634 /* Copy flag to "sl_comprules", unless we run into a wildcard. */ 3635 if (crp != NULL) 3636 { 3637 if (c == '?' || c == '+' || c == '*') 3638 { 3639 vim_free(slang->sl_comprules); 3640 slang->sl_comprules = NULL; 3641 crp = NULL; 3642 } 3643 else 3644 *crp++ = c; 3645 } 3646 3647 if (c == '/') /* slash separates two items */ 3648 { 3649 *pp++ = '\\'; 3650 *pp++ = '|'; 3651 atstart = 1; 3652 } 3653 else /* normal char, "[abc]" and '*' are copied as-is */ 3654 { 3655 if (c == '?' || c == '+' || c == '~') 3656 *pp++ = '\\'; /* "a?" becomes "a\?", "a+" becomes "a\+" */ 3657 #ifdef FEAT_MBYTE 3658 if (enc_utf8) 3659 pp += mb_char2bytes(c, pp); 3660 else 3661 #endif 3662 *pp++ = c; 3663 } 3664 } 3665 3666 *pp++ = '\\'; 3667 *pp++ = ')'; 3668 *pp++ = '$'; 3669 *pp = NUL; 3670 3671 if (crp != NULL) 3672 *crp = NUL; 3673 3674 slang->sl_compprog = vim_regcomp(pat, RE_MAGIC + RE_STRING + RE_STRICT); 3675 vim_free(pat); 3676 if (slang->sl_compprog == NULL) 3677 return SP_FORMERROR; 3678 3679 return 0; 3680 } 3681 3682 /* 3683 * Return TRUE if byte "n" appears in "str". 3684 * Like strchr() but independent of locale. 3685 */ 3686 static int 3687 byte_in_str(char_u *str, int n) 3688 { 3689 char_u *p; 3690 3691 for (p = str; *p != NUL; ++p) 3692 if (*p == n) 3693 return TRUE; 3694 return FALSE; 3695 } 3696 3697 #define SY_MAXLEN 30 3698 typedef struct syl_item_S 3699 { 3700 char_u sy_chars[SY_MAXLEN]; /* the sequence of chars */ 3701 int sy_len; 3702 } syl_item_T; 3703 3704 /* 3705 * Truncate "slang->sl_syllable" at the first slash and put the following items 3706 * in "slang->sl_syl_items". 3707 */ 3708 static int 3709 init_syl_tab(slang_T *slang) 3710 { 3711 char_u *p; 3712 char_u *s; 3713 int l; 3714 syl_item_T *syl; 3715 3716 ga_init2(&slang->sl_syl_items, sizeof(syl_item_T), 4); 3717 p = vim_strchr(slang->sl_syllable, '/'); 3718 while (p != NULL) 3719 { 3720 *p++ = NUL; 3721 if (*p == NUL) /* trailing slash */ 3722 break; 3723 s = p; 3724 p = vim_strchr(p, '/'); 3725 if (p == NULL) 3726 l = (int)STRLEN(s); 3727 else 3728 l = (int)(p - s); 3729 if (l >= SY_MAXLEN) 3730 return SP_FORMERROR; 3731 if (ga_grow(&slang->sl_syl_items, 1) == FAIL) 3732 return SP_OTHERERROR; 3733 syl = ((syl_item_T *)slang->sl_syl_items.ga_data) 3734 + slang->sl_syl_items.ga_len++; 3735 vim_strncpy(syl->sy_chars, s, l); 3736 syl->sy_len = l; 3737 } 3738 return OK; 3739 } 3740 3741 /* 3742 * Count the number of syllables in "word". 3743 * When "word" contains spaces the syllables after the last space are counted. 3744 * Returns zero if syllables are not defines. 3745 */ 3746 static int 3747 count_syllables(slang_T *slang, char_u *word) 3748 { 3749 int cnt = 0; 3750 int skip = FALSE; 3751 char_u *p; 3752 int len; 3753 int i; 3754 syl_item_T *syl; 3755 int c; 3756 3757 if (slang->sl_syllable == NULL) 3758 return 0; 3759 3760 for (p = word; *p != NUL; p += len) 3761 { 3762 /* When running into a space reset counter. */ 3763 if (*p == ' ') 3764 { 3765 len = 1; 3766 cnt = 0; 3767 continue; 3768 } 3769 3770 /* Find longest match of syllable items. */ 3771 len = 0; 3772 for (i = 0; i < slang->sl_syl_items.ga_len; ++i) 3773 { 3774 syl = ((syl_item_T *)slang->sl_syl_items.ga_data) + i; 3775 if (syl->sy_len > len 3776 && STRNCMP(p, syl->sy_chars, syl->sy_len) == 0) 3777 len = syl->sy_len; 3778 } 3779 if (len != 0) /* found a match, count syllable */ 3780 { 3781 ++cnt; 3782 skip = FALSE; 3783 } 3784 else 3785 { 3786 /* No recognized syllable item, at least a syllable char then? */ 3787 #ifdef FEAT_MBYTE 3788 c = mb_ptr2char(p); 3789 len = (*mb_ptr2len)(p); 3790 #else 3791 c = *p; 3792 len = 1; 3793 #endif 3794 if (vim_strchr(slang->sl_syllable, c) == NULL) 3795 skip = FALSE; /* No, search for next syllable */ 3796 else if (!skip) 3797 { 3798 ++cnt; /* Yes, count it */ 3799 skip = TRUE; /* don't count following syllable chars */ 3800 } 3801 } 3802 } 3803 return cnt; 3804 } 3805 3806 /* 3807 * Set the SOFOFROM and SOFOTO items in language "lp". 3808 * Returns SP_*ERROR flags when there is something wrong. 3809 */ 3810 static int 3811 set_sofo(slang_T *lp, char_u *from, char_u *to) 3812 { 3813 int i; 3814 3815 #ifdef FEAT_MBYTE 3816 garray_T *gap; 3817 char_u *s; 3818 char_u *p; 3819 int c; 3820 int *inp; 3821 3822 if (has_mbyte) 3823 { 3824 /* Use "sl_sal" as an array with 256 pointers to a list of wide 3825 * characters. The index is the low byte of the character. 3826 * The list contains from-to pairs with a terminating NUL. 3827 * sl_sal_first[] is used for latin1 "from" characters. */ 3828 gap = &lp->sl_sal; 3829 ga_init2(gap, sizeof(int *), 1); 3830 if (ga_grow(gap, 256) == FAIL) 3831 return SP_OTHERERROR; 3832 vim_memset(gap->ga_data, 0, sizeof(int *) * 256); 3833 gap->ga_len = 256; 3834 3835 /* First count the number of items for each list. Temporarily use 3836 * sl_sal_first[] for this. */ 3837 for (p = from, s = to; *p != NUL && *s != NUL; ) 3838 { 3839 c = mb_cptr2char_adv(&p); 3840 mb_cptr_adv(s); 3841 if (c >= 256) 3842 ++lp->sl_sal_first[c & 0xff]; 3843 } 3844 if (*p != NUL || *s != NUL) /* lengths differ */ 3845 return SP_FORMERROR; 3846 3847 /* Allocate the lists. */ 3848 for (i = 0; i < 256; ++i) 3849 if (lp->sl_sal_first[i] > 0) 3850 { 3851 p = alloc(sizeof(int) * (lp->sl_sal_first[i] * 2 + 1)); 3852 if (p == NULL) 3853 return SP_OTHERERROR; 3854 ((int **)gap->ga_data)[i] = (int *)p; 3855 *(int *)p = 0; 3856 } 3857 3858 /* Put the characters up to 255 in sl_sal_first[] the rest in a sl_sal 3859 * list. */ 3860 vim_memset(lp->sl_sal_first, 0, sizeof(salfirst_T) * 256); 3861 for (p = from, s = to; *p != NUL && *s != NUL; ) 3862 { 3863 c = mb_cptr2char_adv(&p); 3864 i = mb_cptr2char_adv(&s); 3865 if (c >= 256) 3866 { 3867 /* Append the from-to chars at the end of the list with 3868 * the low byte. */ 3869 inp = ((int **)gap->ga_data)[c & 0xff]; 3870 while (*inp != 0) 3871 ++inp; 3872 *inp++ = c; /* from char */ 3873 *inp++ = i; /* to char */ 3874 *inp++ = NUL; /* NUL at the end */ 3875 } 3876 else 3877 /* mapping byte to char is done in sl_sal_first[] */ 3878 lp->sl_sal_first[c] = i; 3879 } 3880 } 3881 else 3882 #endif 3883 { 3884 /* mapping bytes to bytes is done in sl_sal_first[] */ 3885 if (STRLEN(from) != STRLEN(to)) 3886 return SP_FORMERROR; 3887 3888 for (i = 0; to[i] != NUL; ++i) 3889 lp->sl_sal_first[from[i]] = to[i]; 3890 lp->sl_sal.ga_len = 1; /* indicates we have soundfolding */ 3891 } 3892 3893 return 0; 3894 } 3895 3896 /* 3897 * Fill the first-index table for "lp". 3898 */ 3899 static void 3900 set_sal_first(slang_T *lp) 3901 { 3902 salfirst_T *sfirst; 3903 int i; 3904 salitem_T *smp; 3905 int c; 3906 garray_T *gap = &lp->sl_sal; 3907 3908 sfirst = lp->sl_sal_first; 3909 for (i = 0; i < 256; ++i) 3910 sfirst[i] = -1; 3911 smp = (salitem_T *)gap->ga_data; 3912 for (i = 0; i < gap->ga_len; ++i) 3913 { 3914 #ifdef FEAT_MBYTE 3915 if (has_mbyte) 3916 /* Use the lowest byte of the first character. For latin1 it's 3917 * the character, for other encodings it should differ for most 3918 * characters. */ 3919 c = *smp[i].sm_lead_w & 0xff; 3920 else 3921 #endif 3922 c = *smp[i].sm_lead; 3923 if (sfirst[c] == -1) 3924 { 3925 sfirst[c] = i; 3926 #ifdef FEAT_MBYTE 3927 if (has_mbyte) 3928 { 3929 int n; 3930 3931 /* Make sure all entries with this byte are following each 3932 * other. Move the ones that are in the wrong position. Do 3933 * keep the same ordering! */ 3934 while (i + 1 < gap->ga_len 3935 && (*smp[i + 1].sm_lead_w & 0xff) == c) 3936 /* Skip over entry with same index byte. */ 3937 ++i; 3938 3939 for (n = 1; i + n < gap->ga_len; ++n) 3940 if ((*smp[i + n].sm_lead_w & 0xff) == c) 3941 { 3942 salitem_T tsal; 3943 3944 /* Move entry with same index byte after the entries 3945 * we already found. */ 3946 ++i; 3947 --n; 3948 tsal = smp[i + n]; 3949 mch_memmove(smp + i + 1, smp + i, 3950 sizeof(salitem_T) * n); 3951 smp[i] = tsal; 3952 } 3953 } 3954 #endif 3955 } 3956 } 3957 } 3958 3959 #ifdef FEAT_MBYTE 3960 /* 3961 * Turn a multi-byte string into a wide character string. 3962 * Return it in allocated memory (NULL for out-of-memory) 3963 */ 3964 static int * 3965 mb_str2wide(char_u *s) 3966 { 3967 int *res; 3968 char_u *p; 3969 int i = 0; 3970 3971 res = (int *)alloc(sizeof(int) * (mb_charlen(s) + 1)); 3972 if (res != NULL) 3973 { 3974 for (p = s; *p != NUL; ) 3975 res[i++] = mb_ptr2char_adv(&p); 3976 res[i] = NUL; 3977 } 3978 return res; 3979 } 3980 #endif 3981 3982 /* 3983 * Read a tree from the .spl or .sug file. 3984 * Allocates the memory and stores pointers in "bytsp" and "idxsp". 3985 * This is skipped when the tree has zero length. 3986 * Returns zero when OK, SP_ value for an error. 3987 */ 3988 static int 3989 spell_read_tree( 3990 FILE *fd, 3991 char_u **bytsp, 3992 idx_T **idxsp, 3993 int prefixtree, /* TRUE for the prefix tree */ 3994 int prefixcnt) /* when "prefixtree" is TRUE: prefix count */ 3995 { 3996 int len; 3997 int idx; 3998 char_u *bp; 3999 idx_T *ip; 4000 4001 /* The tree size was computed when writing the file, so that we can 4002 * allocate it as one long block. <nodecount> */ 4003 len = get4c(fd); 4004 if (len < 0) 4005 return SP_TRUNCERROR; 4006 if (len > 0) 4007 { 4008 /* Allocate the byte array. */ 4009 bp = lalloc((long_u)len, TRUE); 4010 if (bp == NULL) 4011 return SP_OTHERERROR; 4012 *bytsp = bp; 4013 4014 /* Allocate the index array. */ 4015 ip = (idx_T *)lalloc_clear((long_u)(len * sizeof(int)), TRUE); 4016 if (ip == NULL) 4017 return SP_OTHERERROR; 4018 *idxsp = ip; 4019 4020 /* Recursively read the tree and store it in the array. */ 4021 idx = read_tree_node(fd, bp, ip, len, 0, prefixtree, prefixcnt); 4022 if (idx < 0) 4023 return idx; 4024 } 4025 return 0; 4026 } 4027 4028 /* 4029 * Read one row of siblings from the spell file and store it in the byte array 4030 * "byts" and index array "idxs". Recursively read the children. 4031 * 4032 * NOTE: The code here must match put_node()! 4033 * 4034 * Returns the index (>= 0) following the siblings. 4035 * Returns SP_TRUNCERROR if the file is shorter than expected. 4036 * Returns SP_FORMERROR if there is a format error. 4037 */ 4038 static idx_T 4039 read_tree_node( 4040 FILE *fd, 4041 char_u *byts, 4042 idx_T *idxs, 4043 int maxidx, /* size of arrays */ 4044 idx_T startidx, /* current index in "byts" and "idxs" */ 4045 int prefixtree, /* TRUE for reading PREFIXTREE */ 4046 int maxprefcondnr) /* maximum for <prefcondnr> */ 4047 { 4048 int len; 4049 int i; 4050 int n; 4051 idx_T idx = startidx; 4052 int c; 4053 int c2; 4054 #define SHARED_MASK 0x8000000 4055 4056 len = getc(fd); /* <siblingcount> */ 4057 if (len <= 0) 4058 return SP_TRUNCERROR; 4059 4060 if (startidx + len >= maxidx) 4061 return SP_FORMERROR; 4062 byts[idx++] = len; 4063 4064 /* Read the byte values, flag/region bytes and shared indexes. */ 4065 for (i = 1; i <= len; ++i) 4066 { 4067 c = getc(fd); /* <byte> */ 4068 if (c < 0) 4069 return SP_TRUNCERROR; 4070 if (c <= BY_SPECIAL) 4071 { 4072 if (c == BY_NOFLAGS && !prefixtree) 4073 { 4074 /* No flags, all regions. */ 4075 idxs[idx] = 0; 4076 c = 0; 4077 } 4078 else if (c != BY_INDEX) 4079 { 4080 if (prefixtree) 4081 { 4082 /* Read the optional pflags byte, the prefix ID and the 4083 * condition nr. In idxs[] store the prefix ID in the low 4084 * byte, the condition index shifted up 8 bits, the flags 4085 * shifted up 24 bits. */ 4086 if (c == BY_FLAGS) 4087 c = getc(fd) << 24; /* <pflags> */ 4088 else 4089 c = 0; 4090 4091 c |= getc(fd); /* <affixID> */ 4092 4093 n = get2c(fd); /* <prefcondnr> */ 4094 if (n >= maxprefcondnr) 4095 return SP_FORMERROR; 4096 c |= (n << 8); 4097 } 4098 else /* c must be BY_FLAGS or BY_FLAGS2 */ 4099 { 4100 /* Read flags and optional region and prefix ID. In 4101 * idxs[] the flags go in the low two bytes, region above 4102 * that and prefix ID above the region. */ 4103 c2 = c; 4104 c = getc(fd); /* <flags> */ 4105 if (c2 == BY_FLAGS2) 4106 c = (getc(fd) << 8) + c; /* <flags2> */ 4107 if (c & WF_REGION) 4108 c = (getc(fd) << 16) + c; /* <region> */ 4109 if (c & WF_AFX) 4110 c = (getc(fd) << 24) + c; /* <affixID> */ 4111 } 4112 4113 idxs[idx] = c; 4114 c = 0; 4115 } 4116 else /* c == BY_INDEX */ 4117 { 4118 /* <nodeidx> */ 4119 n = get3c(fd); 4120 if (n < 0 || n >= maxidx) 4121 return SP_FORMERROR; 4122 idxs[idx] = n + SHARED_MASK; 4123 c = getc(fd); /* <xbyte> */ 4124 } 4125 } 4126 byts[idx++] = c; 4127 } 4128 4129 /* Recursively read the children for non-shared siblings. 4130 * Skip the end-of-word ones (zero byte value) and the shared ones (and 4131 * remove SHARED_MASK) */ 4132 for (i = 1; i <= len; ++i) 4133 if (byts[startidx + i] != 0) 4134 { 4135 if (idxs[startidx + i] & SHARED_MASK) 4136 idxs[startidx + i] &= ~SHARED_MASK; 4137 else 4138 { 4139 idxs[startidx + i] = idx; 4140 idx = read_tree_node(fd, byts, idxs, maxidx, idx, 4141 prefixtree, maxprefcondnr); 4142 if (idx < 0) 4143 break; 4144 } 4145 } 4146 4147 return idx; 4148 } 4149 4150 /* 4151 * Parse 'spelllang' and set w_s->b_langp accordingly. 4152 * Returns NULL if it's OK, an error message otherwise. 4153 */ 4154 char_u * 4155 did_set_spelllang(win_T *wp) 4156 { 4157 garray_T ga; 4158 char_u *splp; 4159 char_u *region; 4160 char_u region_cp[3]; 4161 int filename; 4162 int region_mask; 4163 slang_T *slang; 4164 int c; 4165 char_u lang[MAXWLEN + 1]; 4166 char_u spf_name[MAXPATHL]; 4167 int len; 4168 char_u *p; 4169 int round; 4170 char_u *spf; 4171 char_u *use_region = NULL; 4172 int dont_use_region = FALSE; 4173 int nobreak = FALSE; 4174 int i, j; 4175 langp_T *lp, *lp2; 4176 static int recursive = FALSE; 4177 char_u *ret_msg = NULL; 4178 char_u *spl_copy; 4179 4180 /* We don't want to do this recursively. May happen when a language is 4181 * not available and the SpellFileMissing autocommand opens a new buffer 4182 * in which 'spell' is set. */ 4183 if (recursive) 4184 return NULL; 4185 recursive = TRUE; 4186 4187 ga_init2(&ga, sizeof(langp_T), 2); 4188 clear_midword(wp); 4189 4190 /* Make a copy of 'spelllang', the SpellFileMissing autocommands may change 4191 * it under our fingers. */ 4192 spl_copy = vim_strsave(wp->w_s->b_p_spl); 4193 if (spl_copy == NULL) 4194 goto theend; 4195 4196 #ifdef FEAT_MBYTE 4197 wp->w_s->b_cjk = 0; 4198 #endif 4199 4200 /* Loop over comma separated language names. */ 4201 for (splp = spl_copy; *splp != NUL; ) 4202 { 4203 /* Get one language name. */ 4204 copy_option_part(&splp, lang, MAXWLEN, ","); 4205 region = NULL; 4206 len = (int)STRLEN(lang); 4207 4208 if (STRCMP(lang, "cjk") == 0) 4209 { 4210 #ifdef FEAT_MBYTE 4211 wp->w_s->b_cjk = 1; 4212 #endif 4213 continue; 4214 } 4215 4216 /* If the name ends in ".spl" use it as the name of the spell file. 4217 * If there is a region name let "region" point to it and remove it 4218 * from the name. */ 4219 if (len > 4 && fnamecmp(lang + len - 4, ".spl") == 0) 4220 { 4221 filename = TRUE; 4222 4223 /* Locate a region and remove it from the file name. */ 4224 p = vim_strchr(gettail(lang), '_'); 4225 if (p != NULL && ASCII_ISALPHA(p[1]) && ASCII_ISALPHA(p[2]) 4226 && !ASCII_ISALPHA(p[3])) 4227 { 4228 vim_strncpy(region_cp, p + 1, 2); 4229 mch_memmove(p, p + 3, len - (p - lang) - 2); 4230 len -= 3; 4231 region = region_cp; 4232 } 4233 else 4234 dont_use_region = TRUE; 4235 4236 /* Check if we loaded this language before. */ 4237 for (slang = first_lang; slang != NULL; slang = slang->sl_next) 4238 if (fullpathcmp(lang, slang->sl_fname, FALSE) == FPC_SAME) 4239 break; 4240 } 4241 else 4242 { 4243 filename = FALSE; 4244 if (len > 3 && lang[len - 3] == '_') 4245 { 4246 region = lang + len - 2; 4247 len -= 3; 4248 lang[len] = NUL; 4249 } 4250 else 4251 dont_use_region = TRUE; 4252 4253 /* Check if we loaded this language before. */ 4254 for (slang = first_lang; slang != NULL; slang = slang->sl_next) 4255 if (STRICMP(lang, slang->sl_name) == 0) 4256 break; 4257 } 4258 4259 if (region != NULL) 4260 { 4261 /* If the region differs from what was used before then don't 4262 * use it for 'spellfile'. */ 4263 if (use_region != NULL && STRCMP(region, use_region) != 0) 4264 dont_use_region = TRUE; 4265 use_region = region; 4266 } 4267 4268 /* If not found try loading the language now. */ 4269 if (slang == NULL) 4270 { 4271 if (filename) 4272 (void)spell_load_file(lang, lang, NULL, FALSE); 4273 else 4274 { 4275 spell_load_lang(lang); 4276 #ifdef FEAT_AUTOCMD 4277 /* SpellFileMissing autocommands may do anything, including 4278 * destroying the buffer we are using... */ 4279 if (!buf_valid(wp->w_buffer)) 4280 { 4281 ret_msg = (char_u *)"E797: SpellFileMissing autocommand deleted buffer"; 4282 goto theend; 4283 } 4284 #endif 4285 } 4286 } 4287 4288 /* 4289 * Loop over the languages, there can be several files for "lang". 4290 */ 4291 for (slang = first_lang; slang != NULL; slang = slang->sl_next) 4292 if (filename ? fullpathcmp(lang, slang->sl_fname, FALSE) == FPC_SAME 4293 : STRICMP(lang, slang->sl_name) == 0) 4294 { 4295 region_mask = REGION_ALL; 4296 if (!filename && region != NULL) 4297 { 4298 /* find region in sl_regions */ 4299 c = find_region(slang->sl_regions, region); 4300 if (c == REGION_ALL) 4301 { 4302 if (slang->sl_add) 4303 { 4304 if (*slang->sl_regions != NUL) 4305 /* This addition file is for other regions. */ 4306 region_mask = 0; 4307 } 4308 else 4309 /* This is probably an error. Give a warning and 4310 * accept the words anyway. */ 4311 smsg((char_u *) 4312 _("Warning: region %s not supported"), 4313 region); 4314 } 4315 else 4316 region_mask = 1 << c; 4317 } 4318 4319 if (region_mask != 0) 4320 { 4321 if (ga_grow(&ga, 1) == FAIL) 4322 { 4323 ga_clear(&ga); 4324 ret_msg = e_outofmem; 4325 goto theend; 4326 } 4327 LANGP_ENTRY(ga, ga.ga_len)->lp_slang = slang; 4328 LANGP_ENTRY(ga, ga.ga_len)->lp_region = region_mask; 4329 ++ga.ga_len; 4330 use_midword(slang, wp); 4331 if (slang->sl_nobreak) 4332 nobreak = TRUE; 4333 } 4334 } 4335 } 4336 4337 /* round 0: load int_wordlist, if possible. 4338 * round 1: load first name in 'spellfile'. 4339 * round 2: load second name in 'spellfile. 4340 * etc. */ 4341 spf = curwin->w_s->b_p_spf; 4342 for (round = 0; round == 0 || *spf != NUL; ++round) 4343 { 4344 if (round == 0) 4345 { 4346 /* Internal wordlist, if there is one. */ 4347 if (int_wordlist == NULL) 4348 continue; 4349 int_wordlist_spl(spf_name); 4350 } 4351 else 4352 { 4353 /* One entry in 'spellfile'. */ 4354 copy_option_part(&spf, spf_name, MAXPATHL - 5, ","); 4355 STRCAT(spf_name, ".spl"); 4356 4357 /* If it was already found above then skip it. */ 4358 for (c = 0; c < ga.ga_len; ++c) 4359 { 4360 p = LANGP_ENTRY(ga, c)->lp_slang->sl_fname; 4361 if (p != NULL && fullpathcmp(spf_name, p, FALSE) == FPC_SAME) 4362 break; 4363 } 4364 if (c < ga.ga_len) 4365 continue; 4366 } 4367 4368 /* Check if it was loaded already. */ 4369 for (slang = first_lang; slang != NULL; slang = slang->sl_next) 4370 if (fullpathcmp(spf_name, slang->sl_fname, FALSE) == FPC_SAME) 4371 break; 4372 if (slang == NULL) 4373 { 4374 /* Not loaded, try loading it now. The language name includes the 4375 * region name, the region is ignored otherwise. for int_wordlist 4376 * use an arbitrary name. */ 4377 if (round == 0) 4378 STRCPY(lang, "internal wordlist"); 4379 else 4380 { 4381 vim_strncpy(lang, gettail(spf_name), MAXWLEN); 4382 p = vim_strchr(lang, '.'); 4383 if (p != NULL) 4384 *p = NUL; /* truncate at ".encoding.add" */ 4385 } 4386 slang = spell_load_file(spf_name, lang, NULL, TRUE); 4387 4388 /* If one of the languages has NOBREAK we assume the addition 4389 * files also have this. */ 4390 if (slang != NULL && nobreak) 4391 slang->sl_nobreak = TRUE; 4392 } 4393 if (slang != NULL && ga_grow(&ga, 1) == OK) 4394 { 4395 region_mask = REGION_ALL; 4396 if (use_region != NULL && !dont_use_region) 4397 { 4398 /* find region in sl_regions */ 4399 c = find_region(slang->sl_regions, use_region); 4400 if (c != REGION_ALL) 4401 region_mask = 1 << c; 4402 else if (*slang->sl_regions != NUL) 4403 /* This spell file is for other regions. */ 4404 region_mask = 0; 4405 } 4406 4407 if (region_mask != 0) 4408 { 4409 LANGP_ENTRY(ga, ga.ga_len)->lp_slang = slang; 4410 LANGP_ENTRY(ga, ga.ga_len)->lp_sallang = NULL; 4411 LANGP_ENTRY(ga, ga.ga_len)->lp_replang = NULL; 4412 LANGP_ENTRY(ga, ga.ga_len)->lp_region = region_mask; 4413 ++ga.ga_len; 4414 use_midword(slang, wp); 4415 } 4416 } 4417 } 4418 4419 /* Everything is fine, store the new b_langp value. */ 4420 ga_clear(&wp->w_s->b_langp); 4421 wp->w_s->b_langp = ga; 4422 4423 /* For each language figure out what language to use for sound folding and 4424 * REP items. If the language doesn't support it itself use another one 4425 * with the same name. E.g. for "en-math" use "en". */ 4426 for (i = 0; i < ga.ga_len; ++i) 4427 { 4428 lp = LANGP_ENTRY(ga, i); 4429 4430 /* sound folding */ 4431 if (lp->lp_slang->sl_sal.ga_len > 0) 4432 /* language does sound folding itself */ 4433 lp->lp_sallang = lp->lp_slang; 4434 else 4435 /* find first similar language that does sound folding */ 4436 for (j = 0; j < ga.ga_len; ++j) 4437 { 4438 lp2 = LANGP_ENTRY(ga, j); 4439 if (lp2->lp_slang->sl_sal.ga_len > 0 4440 && STRNCMP(lp->lp_slang->sl_name, 4441 lp2->lp_slang->sl_name, 2) == 0) 4442 { 4443 lp->lp_sallang = lp2->lp_slang; 4444 break; 4445 } 4446 } 4447 4448 /* REP items */ 4449 if (lp->lp_slang->sl_rep.ga_len > 0) 4450 /* language has REP items itself */ 4451 lp->lp_replang = lp->lp_slang; 4452 else 4453 /* find first similar language that has REP items */ 4454 for (j = 0; j < ga.ga_len; ++j) 4455 { 4456 lp2 = LANGP_ENTRY(ga, j); 4457 if (lp2->lp_slang->sl_rep.ga_len > 0 4458 && STRNCMP(lp->lp_slang->sl_name, 4459 lp2->lp_slang->sl_name, 2) == 0) 4460 { 4461 lp->lp_replang = lp2->lp_slang; 4462 break; 4463 } 4464 } 4465 } 4466 4467 theend: 4468 vim_free(spl_copy); 4469 recursive = FALSE; 4470 redraw_win_later(wp, NOT_VALID); 4471 return ret_msg; 4472 } 4473 4474 /* 4475 * Clear the midword characters for buffer "buf". 4476 */ 4477 static void 4478 clear_midword(win_T *wp) 4479 { 4480 vim_memset(wp->w_s->b_spell_ismw, 0, 256); 4481 #ifdef FEAT_MBYTE 4482 vim_free(wp->w_s->b_spell_ismw_mb); 4483 wp->w_s->b_spell_ismw_mb = NULL; 4484 #endif 4485 } 4486 4487 /* 4488 * Use the "sl_midword" field of language "lp" for buffer "buf". 4489 * They add up to any currently used midword characters. 4490 */ 4491 static void 4492 use_midword(slang_T *lp, win_T *wp) 4493 { 4494 char_u *p; 4495 4496 if (lp->sl_midword == NULL) /* there aren't any */ 4497 return; 4498 4499 for (p = lp->sl_midword; *p != NUL; ) 4500 #ifdef FEAT_MBYTE 4501 if (has_mbyte) 4502 { 4503 int c, l, n; 4504 char_u *bp; 4505 4506 c = mb_ptr2char(p); 4507 l = (*mb_ptr2len)(p); 4508 if (c < 256 && l <= 2) 4509 wp->w_s->b_spell_ismw[c] = TRUE; 4510 else if (wp->w_s->b_spell_ismw_mb == NULL) 4511 /* First multi-byte char in "b_spell_ismw_mb". */ 4512 wp->w_s->b_spell_ismw_mb = vim_strnsave(p, l); 4513 else 4514 { 4515 /* Append multi-byte chars to "b_spell_ismw_mb". */ 4516 n = (int)STRLEN(wp->w_s->b_spell_ismw_mb); 4517 bp = vim_strnsave(wp->w_s->b_spell_ismw_mb, n + l); 4518 if (bp != NULL) 4519 { 4520 vim_free(wp->w_s->b_spell_ismw_mb); 4521 wp->w_s->b_spell_ismw_mb = bp; 4522 vim_strncpy(bp + n, p, l); 4523 } 4524 } 4525 p += l; 4526 } 4527 else 4528 #endif 4529 wp->w_s->b_spell_ismw[*p++] = TRUE; 4530 } 4531 4532 /* 4533 * Find the region "region[2]" in "rp" (points to "sl_regions"). 4534 * Each region is simply stored as the two characters of it's name. 4535 * Returns the index if found (first is 0), REGION_ALL if not found. 4536 */ 4537 static int 4538 find_region(char_u *rp, char_u *region) 4539 { 4540 int i; 4541 4542 for (i = 0; ; i += 2) 4543 { 4544 if (rp[i] == NUL) 4545 return REGION_ALL; 4546 if (rp[i] == region[0] && rp[i + 1] == region[1]) 4547 break; 4548 } 4549 return i / 2; 4550 } 4551 4552 /* 4553 * Return case type of word: 4554 * w word 0 4555 * Word WF_ONECAP 4556 * W WORD WF_ALLCAP 4557 * WoRd wOrd WF_KEEPCAP 4558 */ 4559 static int 4560 captype( 4561 char_u *word, 4562 char_u *end) /* When NULL use up to NUL byte. */ 4563 { 4564 char_u *p; 4565 int c; 4566 int firstcap; 4567 int allcap; 4568 int past_second = FALSE; /* past second word char */ 4569 4570 /* find first letter */ 4571 for (p = word; !spell_iswordp_nmw(p, curwin); mb_ptr_adv(p)) 4572 if (end == NULL ? *p == NUL : p >= end) 4573 return 0; /* only non-word characters, illegal word */ 4574 #ifdef FEAT_MBYTE 4575 if (has_mbyte) 4576 c = mb_ptr2char_adv(&p); 4577 else 4578 #endif 4579 c = *p++; 4580 firstcap = allcap = SPELL_ISUPPER(c); 4581 4582 /* 4583 * Need to check all letters to find a word with mixed upper/lower. 4584 * But a word with an upper char only at start is a ONECAP. 4585 */ 4586 for ( ; end == NULL ? *p != NUL : p < end; mb_ptr_adv(p)) 4587 if (spell_iswordp_nmw(p, curwin)) 4588 { 4589 c = PTR2CHAR(p); 4590 if (!SPELL_ISUPPER(c)) 4591 { 4592 /* UUl -> KEEPCAP */ 4593 if (past_second && allcap) 4594 return WF_KEEPCAP; 4595 allcap = FALSE; 4596 } 4597 else if (!allcap) 4598 /* UlU -> KEEPCAP */ 4599 return WF_KEEPCAP; 4600 past_second = TRUE; 4601 } 4602 4603 if (allcap) 4604 return WF_ALLCAP; 4605 if (firstcap) 4606 return WF_ONECAP; 4607 return 0; 4608 } 4609 4610 /* 4611 * Like captype() but for a KEEPCAP word add ONECAP if the word starts with a 4612 * capital. So that make_case_word() can turn WOrd into Word. 4613 * Add ALLCAP for "WOrD". 4614 */ 4615 static int 4616 badword_captype(char_u *word, char_u *end) 4617 { 4618 int flags = captype(word, end); 4619 int c; 4620 int l, u; 4621 int first; 4622 char_u *p; 4623 4624 if (flags & WF_KEEPCAP) 4625 { 4626 /* Count the number of UPPER and lower case letters. */ 4627 l = u = 0; 4628 first = FALSE; 4629 for (p = word; p < end; mb_ptr_adv(p)) 4630 { 4631 c = PTR2CHAR(p); 4632 if (SPELL_ISUPPER(c)) 4633 { 4634 ++u; 4635 if (p == word) 4636 first = TRUE; 4637 } 4638 else 4639 ++l; 4640 } 4641 4642 /* If there are more UPPER than lower case letters suggest an 4643 * ALLCAP word. Otherwise, if the first letter is UPPER then 4644 * suggest ONECAP. Exception: "ALl" most likely should be "All", 4645 * require three upper case letters. */ 4646 if (u > l && u > 2) 4647 flags |= WF_ALLCAP; 4648 else if (first) 4649 flags |= WF_ONECAP; 4650 4651 if (u >= 2 && l >= 2) /* maCARONI maCAroni */ 4652 flags |= WF_MIXCAP; 4653 } 4654 return flags; 4655 } 4656 4657 /* 4658 * Delete the internal wordlist and its .spl file. 4659 */ 4660 void 4661 spell_delete_wordlist(void) 4662 { 4663 char_u fname[MAXPATHL]; 4664 4665 if (int_wordlist != NULL) 4666 { 4667 mch_remove(int_wordlist); 4668 int_wordlist_spl(fname); 4669 mch_remove(fname); 4670 vim_free(int_wordlist); 4671 int_wordlist = NULL; 4672 } 4673 } 4674 4675 #if defined(FEAT_MBYTE) || defined(EXITFREE) || defined(PROTO) 4676 /* 4677 * Free all languages. 4678 */ 4679 void 4680 spell_free_all(void) 4681 { 4682 slang_T *slang; 4683 buf_T *buf; 4684 4685 /* Go through all buffers and handle 'spelllang'. <VN> */ 4686 for (buf = firstbuf; buf != NULL; buf = buf->b_next) 4687 ga_clear(&buf->b_s.b_langp); 4688 4689 while (first_lang != NULL) 4690 { 4691 slang = first_lang; 4692 first_lang = slang->sl_next; 4693 slang_free(slang); 4694 } 4695 4696 spell_delete_wordlist(); 4697 4698 vim_free(repl_to); 4699 repl_to = NULL; 4700 vim_free(repl_from); 4701 repl_from = NULL; 4702 } 4703 #endif 4704 4705 #if defined(FEAT_MBYTE) || defined(PROTO) 4706 /* 4707 * Clear all spelling tables and reload them. 4708 * Used after 'encoding' is set and when ":mkspell" was used. 4709 */ 4710 void 4711 spell_reload(void) 4712 { 4713 win_T *wp; 4714 4715 /* Initialize the table for spell_iswordp(). */ 4716 init_spell_chartab(); 4717 4718 /* Unload all allocated memory. */ 4719 spell_free_all(); 4720 4721 /* Go through all buffers and handle 'spelllang'. */ 4722 for (wp = firstwin; wp != NULL; wp = wp->w_next) 4723 { 4724 /* Only load the wordlists when 'spelllang' is set and there is a 4725 * window for this buffer in which 'spell' is set. */ 4726 if (*wp->w_s->b_p_spl != NUL) 4727 { 4728 if (wp->w_p_spell) 4729 { 4730 (void)did_set_spelllang(wp); 4731 # ifdef FEAT_WINDOWS 4732 break; 4733 # endif 4734 } 4735 } 4736 } 4737 } 4738 #endif 4739 4740 /* 4741 * Reload the spell file "fname" if it's loaded. 4742 */ 4743 static void 4744 spell_reload_one( 4745 char_u *fname, 4746 int added_word) /* invoked through "zg" */ 4747 { 4748 slang_T *slang; 4749 int didit = FALSE; 4750 4751 for (slang = first_lang; slang != NULL; slang = slang->sl_next) 4752 { 4753 if (fullpathcmp(fname, slang->sl_fname, FALSE) == FPC_SAME) 4754 { 4755 slang_clear(slang); 4756 if (spell_load_file(fname, NULL, slang, FALSE) == NULL) 4757 /* reloading failed, clear the language */ 4758 slang_clear(slang); 4759 redraw_all_later(SOME_VALID); 4760 didit = TRUE; 4761 } 4762 } 4763 4764 /* When "zg" was used and the file wasn't loaded yet, should redo 4765 * 'spelllang' to load it now. */ 4766 if (added_word && !didit) 4767 did_set_spelllang(curwin); 4768 } 4769 4770 4771 /* 4772 * Functions for ":mkspell". 4773 */ 4774 4775 #define MAXLINELEN 500 /* Maximum length in bytes of a line in a .aff 4776 and .dic file. */ 4777 /* 4778 * Main structure to store the contents of a ".aff" file. 4779 */ 4780 typedef struct afffile_S 4781 { 4782 char_u *af_enc; /* "SET", normalized, alloc'ed string or NULL */ 4783 int af_flagtype; /* AFT_CHAR, AFT_LONG, AFT_NUM or AFT_CAPLONG */ 4784 unsigned af_rare; /* RARE ID for rare word */ 4785 unsigned af_keepcase; /* KEEPCASE ID for keep-case word */ 4786 unsigned af_bad; /* BAD ID for banned word */ 4787 unsigned af_needaffix; /* NEEDAFFIX ID */ 4788 unsigned af_circumfix; /* CIRCUMFIX ID */ 4789 unsigned af_needcomp; /* NEEDCOMPOUND ID */ 4790 unsigned af_comproot; /* COMPOUNDROOT ID */ 4791 unsigned af_compforbid; /* COMPOUNDFORBIDFLAG ID */ 4792 unsigned af_comppermit; /* COMPOUNDPERMITFLAG ID */ 4793 unsigned af_nosuggest; /* NOSUGGEST ID */ 4794 int af_pfxpostpone; /* postpone prefixes without chop string and 4795 without flags */ 4796 int af_ignoreextra; /* IGNOREEXTRA present */ 4797 hashtab_T af_pref; /* hashtable for prefixes, affheader_T */ 4798 hashtab_T af_suff; /* hashtable for suffixes, affheader_T */ 4799 hashtab_T af_comp; /* hashtable for compound flags, compitem_T */ 4800 } afffile_T; 4801 4802 #define AFT_CHAR 0 /* flags are one character */ 4803 #define AFT_LONG 1 /* flags are two characters */ 4804 #define AFT_CAPLONG 2 /* flags are one or two characters */ 4805 #define AFT_NUM 3 /* flags are numbers, comma separated */ 4806 4807 typedef struct affentry_S affentry_T; 4808 /* Affix entry from ".aff" file. Used for prefixes and suffixes. */ 4809 struct affentry_S 4810 { 4811 affentry_T *ae_next; /* next affix with same name/number */ 4812 char_u *ae_chop; /* text to chop off basic word (can be NULL) */ 4813 char_u *ae_add; /* text to add to basic word (can be NULL) */ 4814 char_u *ae_flags; /* flags on the affix (can be NULL) */ 4815 char_u *ae_cond; /* condition (NULL for ".") */ 4816 regprog_T *ae_prog; /* regexp program for ae_cond or NULL */ 4817 char ae_compforbid; /* COMPOUNDFORBIDFLAG found */ 4818 char ae_comppermit; /* COMPOUNDPERMITFLAG found */ 4819 }; 4820 4821 #ifdef FEAT_MBYTE 4822 # define AH_KEY_LEN 17 /* 2 x 8 bytes + NUL */ 4823 #else 4824 # define AH_KEY_LEN 7 /* 6 digits + NUL */ 4825 #endif 4826 4827 /* Affix header from ".aff" file. Used for af_pref and af_suff. */ 4828 typedef struct affheader_S 4829 { 4830 char_u ah_key[AH_KEY_LEN]; /* key for hashtab == name of affix */ 4831 unsigned ah_flag; /* affix name as number, uses "af_flagtype" */ 4832 int ah_newID; /* prefix ID after renumbering; 0 if not used */ 4833 int ah_combine; /* suffix may combine with prefix */ 4834 int ah_follows; /* another affix block should be following */ 4835 affentry_T *ah_first; /* first affix entry */ 4836 } affheader_T; 4837 4838 #define HI2AH(hi) ((affheader_T *)(hi)->hi_key) 4839 4840 /* Flag used in compound items. */ 4841 typedef struct compitem_S 4842 { 4843 char_u ci_key[AH_KEY_LEN]; /* key for hashtab == name of compound */ 4844 unsigned ci_flag; /* affix name as number, uses "af_flagtype" */ 4845 int ci_newID; /* affix ID after renumbering. */ 4846 } compitem_T; 4847 4848 #define HI2CI(hi) ((compitem_T *)(hi)->hi_key) 4849 4850 /* 4851 * Structure that is used to store the items in the word tree. This avoids 4852 * the need to keep track of each allocated thing, everything is freed all at 4853 * once after ":mkspell" is done. 4854 * Note: "sb_next" must be just before "sb_data" to make sure the alignment of 4855 * "sb_data" is correct for systems where pointers must be aligned on 4856 * pointer-size boundaries and sizeof(pointer) > sizeof(int) (e.g., Sparc). 4857 */ 4858 #define SBLOCKSIZE 16000 /* size of sb_data */ 4859 typedef struct sblock_S sblock_T; 4860 struct sblock_S 4861 { 4862 int sb_used; /* nr of bytes already in use */ 4863 sblock_T *sb_next; /* next block in list */ 4864 char_u sb_data[1]; /* data, actually longer */ 4865 }; 4866 4867 /* 4868 * A node in the tree. 4869 */ 4870 typedef struct wordnode_S wordnode_T; 4871 struct wordnode_S 4872 { 4873 union /* shared to save space */ 4874 { 4875 char_u hashkey[6]; /* the hash key, only used while compressing */ 4876 int index; /* index in written nodes (valid after first 4877 round) */ 4878 } wn_u1; 4879 union /* shared to save space */ 4880 { 4881 wordnode_T *next; /* next node with same hash key */ 4882 wordnode_T *wnode; /* parent node that will write this node */ 4883 } wn_u2; 4884 wordnode_T *wn_child; /* child (next byte in word) */ 4885 wordnode_T *wn_sibling; /* next sibling (alternate byte in word, 4886 always sorted) */ 4887 int wn_refs; /* Nr. of references to this node. Only 4888 relevant for first node in a list of 4889 siblings, in following siblings it is 4890 always one. */ 4891 char_u wn_byte; /* Byte for this node. NUL for word end */ 4892 4893 /* Info for when "wn_byte" is NUL. 4894 * In PREFIXTREE "wn_region" is used for the prefcondnr. 4895 * In the soundfolded word tree "wn_flags" has the MSW of the wordnr and 4896 * "wn_region" the LSW of the wordnr. */ 4897 char_u wn_affixID; /* supported/required prefix ID or 0 */ 4898 short_u wn_flags; /* WF_ flags */ 4899 short wn_region; /* region mask */ 4900 4901 #ifdef SPELL_PRINTTREE 4902 int wn_nr; /* sequence nr for printing */ 4903 #endif 4904 }; 4905 4906 #define WN_MASK 0xffff /* mask relevant bits of "wn_flags" */ 4907 4908 #define HI2WN(hi) (wordnode_T *)((hi)->hi_key) 4909 4910 /* 4911 * Info used while reading the spell files. 4912 */ 4913 typedef struct spellinfo_S 4914 { 4915 wordnode_T *si_foldroot; /* tree with case-folded words */ 4916 long si_foldwcount; /* nr of words in si_foldroot */ 4917 4918 wordnode_T *si_keeproot; /* tree with keep-case words */ 4919 long si_keepwcount; /* nr of words in si_keeproot */ 4920 4921 wordnode_T *si_prefroot; /* tree with postponed prefixes */ 4922 4923 long si_sugtree; /* creating the soundfolding trie */ 4924 4925 sblock_T *si_blocks; /* memory blocks used */ 4926 long si_blocks_cnt; /* memory blocks allocated */ 4927 int si_did_emsg; /* TRUE when ran out of memory */ 4928 4929 long si_compress_cnt; /* words to add before lowering 4930 compression limit */ 4931 wordnode_T *si_first_free; /* List of nodes that have been freed during 4932 compression, linked by "wn_child" field. */ 4933 long si_free_count; /* number of nodes in si_first_free */ 4934 #ifdef SPELL_PRINTTREE 4935 int si_wordnode_nr; /* sequence nr for nodes */ 4936 #endif 4937 buf_T *si_spellbuf; /* buffer used to store soundfold word table */ 4938 4939 int si_ascii; /* handling only ASCII words */ 4940 int si_add; /* addition file */ 4941 int si_clear_chartab; /* when TRUE clear char tables */ 4942 int si_region; /* region mask */ 4943 vimconv_T si_conv; /* for conversion to 'encoding' */ 4944 int si_memtot; /* runtime memory used */ 4945 int si_verbose; /* verbose messages */ 4946 int si_msg_count; /* number of words added since last message */ 4947 char_u *si_info; /* info text chars or NULL */ 4948 int si_region_count; /* number of regions supported (1 when there 4949 are no regions) */ 4950 char_u si_region_name[17]; /* region names; used only if 4951 * si_region_count > 1) */ 4952 4953 garray_T si_rep; /* list of fromto_T entries from REP lines */ 4954 garray_T si_repsal; /* list of fromto_T entries from REPSAL lines */ 4955 garray_T si_sal; /* list of fromto_T entries from SAL lines */ 4956 char_u *si_sofofr; /* SOFOFROM text */ 4957 char_u *si_sofoto; /* SOFOTO text */ 4958 int si_nosugfile; /* NOSUGFILE item found */ 4959 int si_nosplitsugs; /* NOSPLITSUGS item found */ 4960 int si_nocompoundsugs; /* NOCOMPOUNDSUGS item found */ 4961 int si_followup; /* soundsalike: ? */ 4962 int si_collapse; /* soundsalike: ? */ 4963 hashtab_T si_commonwords; /* hashtable for common words */ 4964 time_t si_sugtime; /* timestamp for .sug file */ 4965 int si_rem_accents; /* soundsalike: remove accents */ 4966 garray_T si_map; /* MAP info concatenated */ 4967 char_u *si_midword; /* MIDWORD chars or NULL */ 4968 int si_compmax; /* max nr of words for compounding */ 4969 int si_compminlen; /* minimal length for compounding */ 4970 int si_compsylmax; /* max nr of syllables for compounding */ 4971 int si_compoptions; /* COMP_ flags */ 4972 garray_T si_comppat; /* CHECKCOMPOUNDPATTERN items, each stored as 4973 a string */ 4974 char_u *si_compflags; /* flags used for compounding */ 4975 char_u si_nobreak; /* NOBREAK */ 4976 char_u *si_syllable; /* syllable string */ 4977 garray_T si_prefcond; /* table with conditions for postponed 4978 * prefixes, each stored as a string */ 4979 int si_newprefID; /* current value for ah_newID */ 4980 int si_newcompID; /* current value for compound ID */ 4981 } spellinfo_T; 4982 4983 static afffile_T *spell_read_aff(spellinfo_T *spin, char_u *fname); 4984 static int is_aff_rule(char_u **items, int itemcnt, char *rulename, int mincount); 4985 static void aff_process_flags(afffile_T *affile, affentry_T *entry); 4986 static int spell_info_item(char_u *s); 4987 static unsigned affitem2flag(int flagtype, char_u *item, char_u *fname, int lnum); 4988 static unsigned get_affitem(int flagtype, char_u **pp); 4989 static void process_compflags(spellinfo_T *spin, afffile_T *aff, char_u *compflags); 4990 static void check_renumber(spellinfo_T *spin); 4991 static int flag_in_afflist(int flagtype, char_u *afflist, unsigned flag); 4992 static void aff_check_number(int spinval, int affval, char *name); 4993 static void aff_check_string(char_u *spinval, char_u *affval, char *name); 4994 static int str_equal(char_u *s1, char_u *s2); 4995 static void add_fromto(spellinfo_T *spin, garray_T *gap, char_u *from, char_u *to); 4996 static int sal_to_bool(char_u *s); 4997 static void spell_free_aff(afffile_T *aff); 4998 static int spell_read_dic(spellinfo_T *spin, char_u *fname, afffile_T *affile); 4999 static int get_affix_flags(afffile_T *affile, char_u *afflist); 5000 static int get_pfxlist(afffile_T *affile, char_u *afflist, char_u *store_afflist); 5001 static void get_compflags(afffile_T *affile, char_u *afflist, char_u *store_afflist); 5002 static int store_aff_word(spellinfo_T *spin, char_u *word, char_u *afflist, afffile_T *affile, hashtab_T *ht, hashtab_T *xht, int condit, int flags, char_u *pfxlist, int pfxlen); 5003 static int spell_read_wordfile(spellinfo_T *spin, char_u *fname); 5004 static void *getroom(spellinfo_T *spin, size_t len, int align); 5005 static char_u *getroom_save(spellinfo_T *spin, char_u *s); 5006 static void free_blocks(sblock_T *bl); 5007 static wordnode_T *wordtree_alloc(spellinfo_T *spin); 5008 static int store_word(spellinfo_T *spin, char_u *word, int flags, int region, char_u *pfxlist, int need_affix); 5009 static int tree_add_word(spellinfo_T *spin, char_u *word, wordnode_T *tree, int flags, int region, int affixID); 5010 static wordnode_T *get_wordnode(spellinfo_T *spin); 5011 static int deref_wordnode(spellinfo_T *spin, wordnode_T *node); 5012 static void free_wordnode(spellinfo_T *spin, wordnode_T *n); 5013 static void wordtree_compress(spellinfo_T *spin, wordnode_T *root); 5014 static int node_compress(spellinfo_T *spin, wordnode_T *node, hashtab_T *ht, int *tot); 5015 static int node_equal(wordnode_T *n1, wordnode_T *n2); 5016 static int write_vim_spell(spellinfo_T *spin, char_u *fname); 5017 static void clear_node(wordnode_T *node); 5018 static int put_node(FILE *fd, wordnode_T *node, int idx, int regionmask, int prefixtree); 5019 static void spell_make_sugfile(spellinfo_T *spin, char_u *wfname); 5020 static int sug_filltree(spellinfo_T *spin, slang_T *slang); 5021 static int sug_maketable(spellinfo_T *spin); 5022 static int sug_filltable(spellinfo_T *spin, wordnode_T *node, int startwordnr, garray_T *gap); 5023 static int offset2bytes(int nr, char_u *buf); 5024 static int bytes2offset(char_u **pp); 5025 static void sug_write(spellinfo_T *spin, char_u *fname); 5026 static void mkspell(int fcount, char_u **fnames, int ascii, int over_write, int added_word); 5027 static void spell_message(spellinfo_T *spin, char_u *str); 5028 static void init_spellfile(void); 5029 5030 /* In the postponed prefixes tree wn_flags is used to store the WFP_ flags, 5031 * but it must be negative to indicate the prefix tree to tree_add_word(). 5032 * Use a negative number with the lower 8 bits zero. */ 5033 #define PFX_FLAGS -256 5034 5035 /* flags for "condit" argument of store_aff_word() */ 5036 #define CONDIT_COMB 1 /* affix must combine */ 5037 #define CONDIT_CFIX 2 /* affix must have CIRCUMFIX flag */ 5038 #define CONDIT_SUF 4 /* add a suffix for matching flags */ 5039 #define CONDIT_AFF 8 /* word already has an affix */ 5040 5041 /* 5042 * Tunable parameters for when the tree is compressed. See 'mkspellmem'. 5043 */ 5044 static long compress_start = 30000; /* memory / SBLOCKSIZE */ 5045 static long compress_inc = 100; /* memory / SBLOCKSIZE */ 5046 static long compress_added = 500000; /* word count */ 5047 5048 #ifdef SPELL_PRINTTREE 5049 /* 5050 * For debugging the tree code: print the current tree in a (more or less) 5051 * readable format, so that we can see what happens when adding a word and/or 5052 * compressing the tree. 5053 * Based on code from Olaf Seibert. 5054 */ 5055 #define PRINTLINESIZE 1000 5056 #define PRINTWIDTH 6 5057 5058 #define PRINTSOME(l, depth, fmt, a1, a2) vim_snprintf(l + depth * PRINTWIDTH, \ 5059 PRINTLINESIZE - PRINTWIDTH * depth, fmt, a1, a2) 5060 5061 static char line1[PRINTLINESIZE]; 5062 static char line2[PRINTLINESIZE]; 5063 static char line3[PRINTLINESIZE]; 5064 5065 static void 5066 spell_clear_flags(wordnode_T *node) 5067 { 5068 wordnode_T *np; 5069 5070 for (np = node; np != NULL; np = np->wn_sibling) 5071 { 5072 np->wn_u1.index = FALSE; 5073 spell_clear_flags(np->wn_child); 5074 } 5075 } 5076 5077 static void 5078 spell_print_node(wordnode_T *node, int depth) 5079 { 5080 if (node->wn_u1.index) 5081 { 5082 /* Done this node before, print the reference. */ 5083 PRINTSOME(line1, depth, "(%d)", node->wn_nr, 0); 5084 PRINTSOME(line2, depth, " ", 0, 0); 5085 PRINTSOME(line3, depth, " ", 0, 0); 5086 msg((char_u *)line1); 5087 msg((char_u *)line2); 5088 msg((char_u *)line3); 5089 } 5090 else 5091 { 5092 node->wn_u1.index = TRUE; 5093 5094 if (node->wn_byte != NUL) 5095 { 5096 if (node->wn_child != NULL) 5097 PRINTSOME(line1, depth, " %c -> ", node->wn_byte, 0); 5098 else 5099 /* Cannot happen? */ 5100 PRINTSOME(line1, depth, " %c ???", node->wn_byte, 0); 5101 } 5102 else 5103 PRINTSOME(line1, depth, " $ ", 0, 0); 5104 5105 PRINTSOME(line2, depth, "%d/%d ", node->wn_nr, node->wn_refs); 5106 5107 if (node->wn_sibling != NULL) 5108 PRINTSOME(line3, depth, " | ", 0, 0); 5109 else 5110 PRINTSOME(line3, depth, " ", 0, 0); 5111 5112 if (node->wn_byte == NUL) 5113 { 5114 msg((char_u *)line1); 5115 msg((char_u *)line2); 5116 msg((char_u *)line3); 5117 } 5118 5119 /* do the children */ 5120 if (node->wn_byte != NUL && node->wn_child != NULL) 5121 spell_print_node(node->wn_child, depth + 1); 5122 5123 /* do the siblings */ 5124 if (node->wn_sibling != NULL) 5125 { 5126 /* get rid of all parent details except | */ 5127 STRCPY(line1, line3); 5128 STRCPY(line2, line3); 5129 spell_print_node(node->wn_sibling, depth); 5130 } 5131 } 5132 } 5133 5134 static void 5135 spell_print_tree(wordnode_T *root) 5136 { 5137 if (root != NULL) 5138 { 5139 /* Clear the "wn_u1.index" fields, used to remember what has been 5140 * done. */ 5141 spell_clear_flags(root); 5142 5143 /* Recursively print the tree. */ 5144 spell_print_node(root, 0); 5145 } 5146 } 5147 #endif /* SPELL_PRINTTREE */ 5148 5149 /* 5150 * Read the affix file "fname". 5151 * Returns an afffile_T, NULL for complete failure. 5152 */ 5153 static afffile_T * 5154 spell_read_aff(spellinfo_T *spin, char_u *fname) 5155 { 5156 FILE *fd; 5157 afffile_T *aff; 5158 char_u rline[MAXLINELEN]; 5159 char_u *line; 5160 char_u *pc = NULL; 5161 #define MAXITEMCNT 30 5162 char_u *(items[MAXITEMCNT]); 5163 int itemcnt; 5164 char_u *p; 5165 int lnum = 0; 5166 affheader_T *cur_aff = NULL; 5167 int did_postpone_prefix = FALSE; 5168 int aff_todo = 0; 5169 hashtab_T *tp; 5170 char_u *low = NULL; 5171 char_u *fol = NULL; 5172 char_u *upp = NULL; 5173 int do_rep; 5174 int do_repsal; 5175 int do_sal; 5176 int do_mapline; 5177 int found_map = FALSE; 5178 hashitem_T *hi; 5179 int l; 5180 int compminlen = 0; /* COMPOUNDMIN value */ 5181 int compsylmax = 0; /* COMPOUNDSYLMAX value */ 5182 int compoptions = 0; /* COMP_ flags */ 5183 int compmax = 0; /* COMPOUNDWORDMAX value */ 5184 char_u *compflags = NULL; /* COMPOUNDFLAG and COMPOUNDRULE 5185 concatenated */ 5186 char_u *midword = NULL; /* MIDWORD value */ 5187 char_u *syllable = NULL; /* SYLLABLE value */ 5188 char_u *sofofrom = NULL; /* SOFOFROM value */ 5189 char_u *sofoto = NULL; /* SOFOTO value */ 5190 5191 /* 5192 * Open the file. 5193 */ 5194 fd = mch_fopen((char *)fname, "r"); 5195 if (fd == NULL) 5196 { 5197 EMSG2(_(e_notopen), fname); 5198 return NULL; 5199 } 5200 5201 vim_snprintf((char *)IObuff, IOSIZE, _("Reading affix file %s ..."), fname); 5202 spell_message(spin, IObuff); 5203 5204 /* Only do REP lines when not done in another .aff file already. */ 5205 do_rep = spin->si_rep.ga_len == 0; 5206 5207 /* Only do REPSAL lines when not done in another .aff file already. */ 5208 do_repsal = spin->si_repsal.ga_len == 0; 5209 5210 /* Only do SAL lines when not done in another .aff file already. */ 5211 do_sal = spin->si_sal.ga_len == 0; 5212 5213 /* Only do MAP lines when not done in another .aff file already. */ 5214 do_mapline = spin->si_map.ga_len == 0; 5215 5216 /* 5217 * Allocate and init the afffile_T structure. 5218 */ 5219 aff = (afffile_T *)getroom(spin, sizeof(afffile_T), TRUE); 5220 if (aff == NULL) 5221 { 5222 fclose(fd); 5223 return NULL; 5224 } 5225 hash_init(&aff->af_pref); 5226 hash_init(&aff->af_suff); 5227 hash_init(&aff->af_comp); 5228 5229 /* 5230 * Read all the lines in the file one by one. 5231 */ 5232 while (!vim_fgets(rline, MAXLINELEN, fd) && !got_int) 5233 { 5234 line_breakcheck(); 5235 ++lnum; 5236 5237 /* Skip comment lines. */ 5238 if (*rline == '#') 5239 continue; 5240 5241 /* Convert from "SET" to 'encoding' when needed. */ 5242 vim_free(pc); 5243 #ifdef FEAT_MBYTE 5244 if (spin->si_conv.vc_type != CONV_NONE) 5245 { 5246 pc = string_convert(&spin->si_conv, rline, NULL); 5247 if (pc == NULL) 5248 { 5249 smsg((char_u *)_("Conversion failure for word in %s line %d: %s"), 5250 fname, lnum, rline); 5251 continue; 5252 } 5253 line = pc; 5254 } 5255 else 5256 #endif 5257 { 5258 pc = NULL; 5259 line = rline; 5260 } 5261 5262 /* Split the line up in white separated items. Put a NUL after each 5263 * item. */ 5264 itemcnt = 0; 5265 for (p = line; ; ) 5266 { 5267 while (*p != NUL && *p <= ' ') /* skip white space and CR/NL */ 5268 ++p; 5269 if (*p == NUL) 5270 break; 5271 if (itemcnt == MAXITEMCNT) /* too many items */ 5272 break; 5273 items[itemcnt++] = p; 5274 /* A few items have arbitrary text argument, don't split them. */ 5275 if (itemcnt == 2 && spell_info_item(items[0])) 5276 while (*p >= ' ' || *p == TAB) /* skip until CR/NL */ 5277 ++p; 5278 else 5279 while (*p > ' ') /* skip until white space or CR/NL */ 5280 ++p; 5281 if (*p == NUL) 5282 break; 5283 *p++ = NUL; 5284 } 5285 5286 /* Handle non-empty lines. */ 5287 if (itemcnt > 0) 5288 { 5289 if (is_aff_rule(items, itemcnt, "SET", 2) && aff->af_enc == NULL) 5290 { 5291 #ifdef FEAT_MBYTE 5292 /* Setup for conversion from "ENC" to 'encoding'. */ 5293 aff->af_enc = enc_canonize(items[1]); 5294 if (aff->af_enc != NULL && !spin->si_ascii 5295 && convert_setup(&spin->si_conv, aff->af_enc, 5296 p_enc) == FAIL) 5297 smsg((char_u *)_("Conversion in %s not supported: from %s to %s"), 5298 fname, aff->af_enc, p_enc); 5299 spin->si_conv.vc_fail = TRUE; 5300 #else 5301 smsg((char_u *)_("Conversion in %s not supported"), fname); 5302 #endif 5303 } 5304 else if (is_aff_rule(items, itemcnt, "FLAG", 2) 5305 && aff->af_flagtype == AFT_CHAR) 5306 { 5307 if (STRCMP(items[1], "long") == 0) 5308 aff->af_flagtype = AFT_LONG; 5309 else if (STRCMP(items[1], "num") == 0) 5310 aff->af_flagtype = AFT_NUM; 5311 else if (STRCMP(items[1], "caplong") == 0) 5312 aff->af_flagtype = AFT_CAPLONG; 5313 else 5314 smsg((char_u *)_("Invalid value for FLAG in %s line %d: %s"), 5315 fname, lnum, items[1]); 5316 if (aff->af_rare != 0 5317 || aff->af_keepcase != 0 5318 || aff->af_bad != 0 5319 || aff->af_needaffix != 0 5320 || aff->af_circumfix != 0 5321 || aff->af_needcomp != 0 5322 || aff->af_comproot != 0 5323 || aff->af_nosuggest != 0 5324 || compflags != NULL 5325 || aff->af_suff.ht_used > 0 5326 || aff->af_pref.ht_used > 0) 5327 smsg((char_u *)_("FLAG after using flags in %s line %d: %s"), 5328 fname, lnum, items[1]); 5329 } 5330 else if (spell_info_item(items[0])) 5331 { 5332 p = (char_u *)getroom(spin, 5333 (spin->si_info == NULL ? 0 : STRLEN(spin->si_info)) 5334 + STRLEN(items[0]) 5335 + STRLEN(items[1]) + 3, FALSE); 5336 if (p != NULL) 5337 { 5338 if (spin->si_info != NULL) 5339 { 5340 STRCPY(p, spin->si_info); 5341 STRCAT(p, "\n"); 5342 } 5343 STRCAT(p, items[0]); 5344 STRCAT(p, " "); 5345 STRCAT(p, items[1]); 5346 spin->si_info = p; 5347 } 5348 } 5349 else if (is_aff_rule(items, itemcnt, "MIDWORD", 2) 5350 && midword == NULL) 5351 { 5352 midword = getroom_save(spin, items[1]); 5353 } 5354 else if (is_aff_rule(items, itemcnt, "TRY", 2)) 5355 { 5356 /* ignored, we look in the tree for what chars may appear */ 5357 } 5358 /* TODO: remove "RAR" later */ 5359 else if ((is_aff_rule(items, itemcnt, "RAR", 2) 5360 || is_aff_rule(items, itemcnt, "RARE", 2)) 5361 && aff->af_rare == 0) 5362 { 5363 aff->af_rare = affitem2flag(aff->af_flagtype, items[1], 5364 fname, lnum); 5365 } 5366 /* TODO: remove "KEP" later */ 5367 else if ((is_aff_rule(items, itemcnt, "KEP", 2) 5368 || is_aff_rule(items, itemcnt, "KEEPCASE", 2)) 5369 && aff->af_keepcase == 0) 5370 { 5371 aff->af_keepcase = affitem2flag(aff->af_flagtype, items[1], 5372 fname, lnum); 5373 } 5374 else if ((is_aff_rule(items, itemcnt, "BAD", 2) 5375 || is_aff_rule(items, itemcnt, "FORBIDDENWORD", 2)) 5376 && aff->af_bad == 0) 5377 { 5378 aff->af_bad = affitem2flag(aff->af_flagtype, items[1], 5379 fname, lnum); 5380 } 5381 else if (is_aff_rule(items, itemcnt, "NEEDAFFIX", 2) 5382 && aff->af_needaffix == 0) 5383 { 5384 aff->af_needaffix = affitem2flag(aff->af_flagtype, items[1], 5385 fname, lnum); 5386 } 5387 else if (is_aff_rule(items, itemcnt, "CIRCUMFIX", 2) 5388 && aff->af_circumfix == 0) 5389 { 5390 aff->af_circumfix = affitem2flag(aff->af_flagtype, items[1], 5391 fname, lnum); 5392 } 5393 else if (is_aff_rule(items, itemcnt, "NOSUGGEST", 2) 5394 && aff->af_nosuggest == 0) 5395 { 5396 aff->af_nosuggest = affitem2flag(aff->af_flagtype, items[1], 5397 fname, lnum); 5398 } 5399 else if ((is_aff_rule(items, itemcnt, "NEEDCOMPOUND", 2) 5400 || is_aff_rule(items, itemcnt, "ONLYINCOMPOUND", 2)) 5401 && aff->af_needcomp == 0) 5402 { 5403 aff->af_needcomp = affitem2flag(aff->af_flagtype, items[1], 5404 fname, lnum); 5405 } 5406 else if (is_aff_rule(items, itemcnt, "COMPOUNDROOT", 2) 5407 && aff->af_comproot == 0) 5408 { 5409 aff->af_comproot = affitem2flag(aff->af_flagtype, items[1], 5410 fname, lnum); 5411 } 5412 else if (is_aff_rule(items, itemcnt, "COMPOUNDFORBIDFLAG", 2) 5413 && aff->af_compforbid == 0) 5414 { 5415 aff->af_compforbid = affitem2flag(aff->af_flagtype, items[1], 5416 fname, lnum); 5417 if (aff->af_pref.ht_used > 0) 5418 smsg((char_u *)_("Defining COMPOUNDFORBIDFLAG after PFX item may give wrong results in %s line %d"), 5419 fname, lnum); 5420 } 5421 else if (is_aff_rule(items, itemcnt, "COMPOUNDPERMITFLAG", 2) 5422 && aff->af_comppermit == 0) 5423 { 5424 aff->af_comppermit = affitem2flag(aff->af_flagtype, items[1], 5425 fname, lnum); 5426 if (aff->af_pref.ht_used > 0) 5427 smsg((char_u *)_("Defining COMPOUNDPERMITFLAG after PFX item may give wrong results in %s line %d"), 5428 fname, lnum); 5429 } 5430 else if (is_aff_rule(items, itemcnt, "COMPOUNDFLAG", 2) 5431 && compflags == NULL) 5432 { 5433 /* Turn flag "c" into COMPOUNDRULE compatible string "c+", 5434 * "Na" into "Na+", "1234" into "1234+". */ 5435 p = getroom(spin, STRLEN(items[1]) + 2, FALSE); 5436 if (p != NULL) 5437 { 5438 STRCPY(p, items[1]); 5439 STRCAT(p, "+"); 5440 compflags = p; 5441 } 5442 } 5443 else if (is_aff_rule(items, itemcnt, "COMPOUNDRULES", 2)) 5444 { 5445 /* We don't use the count, but do check that it's a number and 5446 * not COMPOUNDRULE mistyped. */ 5447 if (atoi((char *)items[1]) == 0) 5448 smsg((char_u *)_("Wrong COMPOUNDRULES value in %s line %d: %s"), 5449 fname, lnum, items[1]); 5450 } 5451 else if (is_aff_rule(items, itemcnt, "COMPOUNDRULE", 2)) 5452 { 5453 /* Don't use the first rule if it is a number. */ 5454 if (compflags != NULL || *skipdigits(items[1]) != NUL) 5455 { 5456 /* Concatenate this string to previously defined ones, 5457 * using a slash to separate them. */ 5458 l = (int)STRLEN(items[1]) + 1; 5459 if (compflags != NULL) 5460 l += (int)STRLEN(compflags) + 1; 5461 p = getroom(spin, l, FALSE); 5462 if (p != NULL) 5463 { 5464 if (compflags != NULL) 5465 { 5466 STRCPY(p, compflags); 5467 STRCAT(p, "/"); 5468 } 5469 STRCAT(p, items[1]); 5470 compflags = p; 5471 } 5472 } 5473 } 5474 else if (is_aff_rule(items, itemcnt, "COMPOUNDWORDMAX", 2) 5475 && compmax == 0) 5476 { 5477 compmax = atoi((char *)items[1]); 5478 if (compmax == 0) 5479 smsg((char_u *)_("Wrong COMPOUNDWORDMAX value in %s line %d: %s"), 5480 fname, lnum, items[1]); 5481 } 5482 else if (is_aff_rule(items, itemcnt, "COMPOUNDMIN", 2) 5483 && compminlen == 0) 5484 { 5485 compminlen = atoi((char *)items[1]); 5486 if (compminlen == 0) 5487 smsg((char_u *)_("Wrong COMPOUNDMIN value in %s line %d: %s"), 5488 fname, lnum, items[1]); 5489 } 5490 else if (is_aff_rule(items, itemcnt, "COMPOUNDSYLMAX", 2) 5491 && compsylmax == 0) 5492 { 5493 compsylmax = atoi((char *)items[1]); 5494 if (compsylmax == 0) 5495 smsg((char_u *)_("Wrong COMPOUNDSYLMAX value in %s line %d: %s"), 5496 fname, lnum, items[1]); 5497 } 5498 else if (is_aff_rule(items, itemcnt, "CHECKCOMPOUNDDUP", 1)) 5499 { 5500 compoptions |= COMP_CHECKDUP; 5501 } 5502 else if (is_aff_rule(items, itemcnt, "CHECKCOMPOUNDREP", 1)) 5503 { 5504 compoptions |= COMP_CHECKREP; 5505 } 5506 else if (is_aff_rule(items, itemcnt, "CHECKCOMPOUNDCASE", 1)) 5507 { 5508 compoptions |= COMP_CHECKCASE; 5509 } 5510 else if (is_aff_rule(items, itemcnt, "CHECKCOMPOUNDTRIPLE", 1)) 5511 { 5512 compoptions |= COMP_CHECKTRIPLE; 5513 } 5514 else if (is_aff_rule(items, itemcnt, "CHECKCOMPOUNDPATTERN", 2)) 5515 { 5516 if (atoi((char *)items[1]) == 0) 5517 smsg((char_u *)_("Wrong CHECKCOMPOUNDPATTERN value in %s line %d: %s"), 5518 fname, lnum, items[1]); 5519 } 5520 else if (is_aff_rule(items, itemcnt, "CHECKCOMPOUNDPATTERN", 3)) 5521 { 5522 garray_T *gap = &spin->si_comppat; 5523 int i; 5524 5525 /* Only add the couple if it isn't already there. */ 5526 for (i = 0; i < gap->ga_len - 1; i += 2) 5527 if (STRCMP(((char_u **)(gap->ga_data))[i], items[1]) == 0 5528 && STRCMP(((char_u **)(gap->ga_data))[i + 1], 5529 items[2]) == 0) 5530 break; 5531 if (i >= gap->ga_len && ga_grow(gap, 2) == OK) 5532 { 5533 ((char_u **)(gap->ga_data))[gap->ga_len++] 5534 = getroom_save(spin, items[1]); 5535 ((char_u **)(gap->ga_data))[gap->ga_len++] 5536 = getroom_save(spin, items[2]); 5537 } 5538 } 5539 else if (is_aff_rule(items, itemcnt, "SYLLABLE", 2) 5540 && syllable == NULL) 5541 { 5542 syllable = getroom_save(spin, items[1]); 5543 } 5544 else if (is_aff_rule(items, itemcnt, "NOBREAK", 1)) 5545 { 5546 spin->si_nobreak = TRUE; 5547 } 5548 else if (is_aff_rule(items, itemcnt, "NOSPLITSUGS", 1)) 5549 { 5550 spin->si_nosplitsugs = TRUE; 5551 } 5552 else if (is_aff_rule(items, itemcnt, "NOCOMPOUNDSUGS", 1)) 5553 { 5554 spin->si_nocompoundsugs = TRUE; 5555 } 5556 else if (is_aff_rule(items, itemcnt, "NOSUGFILE", 1)) 5557 { 5558 spin->si_nosugfile = TRUE; 5559 } 5560 else if (is_aff_rule(items, itemcnt, "PFXPOSTPONE", 1)) 5561 { 5562 aff->af_pfxpostpone = TRUE; 5563 } 5564 else if (is_aff_rule(items, itemcnt, "IGNOREEXTRA", 1)) 5565 { 5566 aff->af_ignoreextra = TRUE; 5567 } 5568 else if ((STRCMP(items[0], "PFX") == 0 5569 || STRCMP(items[0], "SFX") == 0) 5570 && aff_todo == 0 5571 && itemcnt >= 4) 5572 { 5573 int lasti = 4; 5574 char_u key[AH_KEY_LEN]; 5575 5576 if (*items[0] == 'P') 5577 tp = &aff->af_pref; 5578 else 5579 tp = &aff->af_suff; 5580 5581 /* Myspell allows the same affix name to be used multiple 5582 * times. The affix files that do this have an undocumented 5583 * "S" flag on all but the last block, thus we check for that 5584 * and store it in ah_follows. */ 5585 vim_strncpy(key, items[1], AH_KEY_LEN - 1); 5586 hi = hash_find(tp, key); 5587 if (!HASHITEM_EMPTY(hi)) 5588 { 5589 cur_aff = HI2AH(hi); 5590 if (cur_aff->ah_combine != (*items[2] == 'Y')) 5591 smsg((char_u *)_("Different combining flag in continued affix block in %s line %d: %s"), 5592 fname, lnum, items[1]); 5593 if (!cur_aff->ah_follows) 5594 smsg((char_u *)_("Duplicate affix in %s line %d: %s"), 5595 fname, lnum, items[1]); 5596 } 5597 else 5598 { 5599 /* New affix letter. */ 5600 cur_aff = (affheader_T *)getroom(spin, 5601 sizeof(affheader_T), TRUE); 5602 if (cur_aff == NULL) 5603 break; 5604 cur_aff->ah_flag = affitem2flag(aff->af_flagtype, items[1], 5605 fname, lnum); 5606 if (cur_aff->ah_flag == 0 || STRLEN(items[1]) >= AH_KEY_LEN) 5607 break; 5608 if (cur_aff->ah_flag == aff->af_bad 5609 || cur_aff->ah_flag == aff->af_rare 5610 || cur_aff->ah_flag == aff->af_keepcase 5611 || cur_aff->ah_flag == aff->af_needaffix 5612 || cur_aff->ah_flag == aff->af_circumfix 5613 || cur_aff->ah_flag == aff->af_nosuggest 5614 || cur_aff->ah_flag == aff->af_needcomp 5615 || cur_aff->ah_flag == aff->af_comproot) 5616 smsg((char_u *)_("Affix also used for BAD/RARE/KEEPCASE/NEEDAFFIX/NEEDCOMPOUND/NOSUGGEST in %s line %d: %s"), 5617 fname, lnum, items[1]); 5618 STRCPY(cur_aff->ah_key, items[1]); 5619 hash_add(tp, cur_aff->ah_key); 5620 5621 cur_aff->ah_combine = (*items[2] == 'Y'); 5622 } 5623 5624 /* Check for the "S" flag, which apparently means that another 5625 * block with the same affix name is following. */ 5626 if (itemcnt > lasti && STRCMP(items[lasti], "S") == 0) 5627 { 5628 ++lasti; 5629 cur_aff->ah_follows = TRUE; 5630 } 5631 else 5632 cur_aff->ah_follows = FALSE; 5633 5634 /* Myspell allows extra text after the item, but that might 5635 * mean mistakes go unnoticed. Require a comment-starter. */ 5636 if (itemcnt > lasti && *items[lasti] != '#') 5637 smsg((char_u *)_(e_afftrailing), fname, lnum, items[lasti]); 5638 5639 if (STRCMP(items[2], "Y") != 0 && STRCMP(items[2], "N") != 0) 5640 smsg((char_u *)_("Expected Y or N in %s line %d: %s"), 5641 fname, lnum, items[2]); 5642 5643 if (*items[0] == 'P' && aff->af_pfxpostpone) 5644 { 5645 if (cur_aff->ah_newID == 0) 5646 { 5647 /* Use a new number in the .spl file later, to be able 5648 * to handle multiple .aff files. */ 5649 check_renumber(spin); 5650 cur_aff->ah_newID = ++spin->si_newprefID; 5651 5652 /* We only really use ah_newID if the prefix is 5653 * postponed. We know that only after handling all 5654 * the items. */ 5655 did_postpone_prefix = FALSE; 5656 } 5657 else 5658 /* Did use the ID in a previous block. */ 5659 did_postpone_prefix = TRUE; 5660 } 5661 5662 aff_todo = atoi((char *)items[3]); 5663 } 5664 else if ((STRCMP(items[0], "PFX") == 0 5665 || STRCMP(items[0], "SFX") == 0) 5666 && aff_todo > 0 5667 && STRCMP(cur_aff->ah_key, items[1]) == 0 5668 && itemcnt >= 5) 5669 { 5670 affentry_T *aff_entry; 5671 int upper = FALSE; 5672 int lasti = 5; 5673 5674 /* Myspell allows extra text after the item, but that might 5675 * mean mistakes go unnoticed. Require a comment-starter, 5676 * unless IGNOREEXTRA is used. Hunspell uses a "-" item. */ 5677 if (itemcnt > lasti 5678 && !aff->af_ignoreextra 5679 && *items[lasti] != '#' 5680 && (STRCMP(items[lasti], "-") != 0 5681 || itemcnt != lasti + 1)) 5682 smsg((char_u *)_(e_afftrailing), fname, lnum, items[lasti]); 5683 5684 /* New item for an affix letter. */ 5685 --aff_todo; 5686 aff_entry = (affentry_T *)getroom(spin, 5687 sizeof(affentry_T), TRUE); 5688 if (aff_entry == NULL) 5689 break; 5690 5691 if (STRCMP(items[2], "0") != 0) 5692 aff_entry->ae_chop = getroom_save(spin, items[2]); 5693 if (STRCMP(items[3], "0") != 0) 5694 { 5695 aff_entry->ae_add = getroom_save(spin, items[3]); 5696 5697 /* Recognize flags on the affix: abcd/XYZ */ 5698 aff_entry->ae_flags = vim_strchr(aff_entry->ae_add, '/'); 5699 if (aff_entry->ae_flags != NULL) 5700 { 5701 *aff_entry->ae_flags++ = NUL; 5702 aff_process_flags(aff, aff_entry); 5703 } 5704 } 5705 5706 /* Don't use an affix entry with non-ASCII characters when 5707 * "spin->si_ascii" is TRUE. */ 5708 if (!spin->si_ascii || !(has_non_ascii(aff_entry->ae_chop) 5709 || has_non_ascii(aff_entry->ae_add))) 5710 { 5711 aff_entry->ae_next = cur_aff->ah_first; 5712 cur_aff->ah_first = aff_entry; 5713 5714 if (STRCMP(items[4], ".") != 0) 5715 { 5716 char_u buf[MAXLINELEN]; 5717 5718 aff_entry->ae_cond = getroom_save(spin, items[4]); 5719 if (*items[0] == 'P') 5720 sprintf((char *)buf, "^%s", items[4]); 5721 else 5722 sprintf((char *)buf, "%s$", items[4]); 5723 aff_entry->ae_prog = vim_regcomp(buf, 5724 RE_MAGIC + RE_STRING + RE_STRICT); 5725 if (aff_entry->ae_prog == NULL) 5726 smsg((char_u *)_("Broken condition in %s line %d: %s"), 5727 fname, lnum, items[4]); 5728 } 5729 5730 /* For postponed prefixes we need an entry in si_prefcond 5731 * for the condition. Use an existing one if possible. 5732 * Can't be done for an affix with flags, ignoring 5733 * COMPOUNDFORBIDFLAG and COMPOUNDPERMITFLAG. */ 5734 if (*items[0] == 'P' && aff->af_pfxpostpone 5735 && aff_entry->ae_flags == NULL) 5736 { 5737 /* When the chop string is one lower-case letter and 5738 * the add string ends in the upper-case letter we set 5739 * the "upper" flag, clear "ae_chop" and remove the 5740 * letters from "ae_add". The condition must either 5741 * be empty or start with the same letter. */ 5742 if (aff_entry->ae_chop != NULL 5743 && aff_entry->ae_add != NULL 5744 #ifdef FEAT_MBYTE 5745 && aff_entry->ae_chop[(*mb_ptr2len)( 5746 aff_entry->ae_chop)] == NUL 5747 #else 5748 && aff_entry->ae_chop[1] == NUL 5749 #endif 5750 ) 5751 { 5752 int c, c_up; 5753 5754 c = PTR2CHAR(aff_entry->ae_chop); 5755 c_up = SPELL_TOUPPER(c); 5756 if (c_up != c 5757 && (aff_entry->ae_cond == NULL 5758 || PTR2CHAR(aff_entry->ae_cond) == c)) 5759 { 5760 p = aff_entry->ae_add 5761 + STRLEN(aff_entry->ae_add); 5762 mb_ptr_back(aff_entry->ae_add, p); 5763 if (PTR2CHAR(p) == c_up) 5764 { 5765 upper = TRUE; 5766 aff_entry->ae_chop = NULL; 5767 *p = NUL; 5768 5769 /* The condition is matched with the 5770 * actual word, thus must check for the 5771 * upper-case letter. */ 5772 if (aff_entry->ae_cond != NULL) 5773 { 5774 char_u buf[MAXLINELEN]; 5775 #ifdef FEAT_MBYTE 5776 if (has_mbyte) 5777 { 5778 onecap_copy(items[4], buf, TRUE); 5779 aff_entry->ae_cond = getroom_save( 5780 spin, buf); 5781 } 5782 else 5783 #endif 5784 *aff_entry->ae_cond = c_up; 5785 if (aff_entry->ae_cond != NULL) 5786 { 5787 sprintf((char *)buf, "^%s", 5788 aff_entry->ae_cond); 5789 vim_regfree(aff_entry->ae_prog); 5790 aff_entry->ae_prog = vim_regcomp( 5791 buf, RE_MAGIC + RE_STRING); 5792 } 5793 } 5794 } 5795 } 5796 } 5797 5798 if (aff_entry->ae_chop == NULL 5799 && aff_entry->ae_flags == NULL) 5800 { 5801 int idx; 5802 char_u **pp; 5803 int n; 5804 5805 /* Find a previously used condition. */ 5806 for (idx = spin->si_prefcond.ga_len - 1; idx >= 0; 5807 --idx) 5808 { 5809 p = ((char_u **)spin->si_prefcond.ga_data)[idx]; 5810 if (str_equal(p, aff_entry->ae_cond)) 5811 break; 5812 } 5813 if (idx < 0 && ga_grow(&spin->si_prefcond, 1) == OK) 5814 { 5815 /* Not found, add a new condition. */ 5816 idx = spin->si_prefcond.ga_len++; 5817 pp = ((char_u **)spin->si_prefcond.ga_data) 5818 + idx; 5819 if (aff_entry->ae_cond == NULL) 5820 *pp = NULL; 5821 else 5822 *pp = getroom_save(spin, 5823 aff_entry->ae_cond); 5824 } 5825 5826 /* Add the prefix to the prefix tree. */ 5827 if (aff_entry->ae_add == NULL) 5828 p = (char_u *)""; 5829 else 5830 p = aff_entry->ae_add; 5831 5832 /* PFX_FLAGS is a negative number, so that 5833 * tree_add_word() knows this is the prefix tree. */ 5834 n = PFX_FLAGS; 5835 if (!cur_aff->ah_combine) 5836 n |= WFP_NC; 5837 if (upper) 5838 n |= WFP_UP; 5839 if (aff_entry->ae_comppermit) 5840 n |= WFP_COMPPERMIT; 5841 if (aff_entry->ae_compforbid) 5842 n |= WFP_COMPFORBID; 5843 tree_add_word(spin, p, spin->si_prefroot, n, 5844 idx, cur_aff->ah_newID); 5845 did_postpone_prefix = TRUE; 5846 } 5847 5848 /* Didn't actually use ah_newID, backup si_newprefID. */ 5849 if (aff_todo == 0 && !did_postpone_prefix) 5850 { 5851 --spin->si_newprefID; 5852 cur_aff->ah_newID = 0; 5853 } 5854 } 5855 } 5856 } 5857 else if (is_aff_rule(items, itemcnt, "FOL", 2) && fol == NULL) 5858 { 5859 fol = vim_strsave(items[1]); 5860 } 5861 else if (is_aff_rule(items, itemcnt, "LOW", 2) && low == NULL) 5862 { 5863 low = vim_strsave(items[1]); 5864 } 5865 else if (is_aff_rule(items, itemcnt, "UPP", 2) && upp == NULL) 5866 { 5867 upp = vim_strsave(items[1]); 5868 } 5869 else if (is_aff_rule(items, itemcnt, "REP", 2) 5870 || is_aff_rule(items, itemcnt, "REPSAL", 2)) 5871 { 5872 /* Ignore REP/REPSAL count */; 5873 if (!isdigit(*items[1])) 5874 smsg((char_u *)_("Expected REP(SAL) count in %s line %d"), 5875 fname, lnum); 5876 } 5877 else if ((STRCMP(items[0], "REP") == 0 5878 || STRCMP(items[0], "REPSAL") == 0) 5879 && itemcnt >= 3) 5880 { 5881 /* REP/REPSAL item */ 5882 /* Myspell ignores extra arguments, we require it starts with 5883 * # to detect mistakes. */ 5884 if (itemcnt > 3 && items[3][0] != '#') 5885 smsg((char_u *)_(e_afftrailing), fname, lnum, items[3]); 5886 if (items[0][3] == 'S' ? do_repsal : do_rep) 5887 { 5888 /* Replace underscore with space (can't include a space 5889 * directly). */ 5890 for (p = items[1]; *p != NUL; mb_ptr_adv(p)) 5891 if (*p == '_') 5892 *p = ' '; 5893 for (p = items[2]; *p != NUL; mb_ptr_adv(p)) 5894 if (*p == '_') 5895 *p = ' '; 5896 add_fromto(spin, items[0][3] == 'S' 5897 ? &spin->si_repsal 5898 : &spin->si_rep, items[1], items[2]); 5899 } 5900 } 5901 else if (is_aff_rule(items, itemcnt, "MAP", 2)) 5902 { 5903 /* MAP item or count */ 5904 if (!found_map) 5905 { 5906 /* First line contains the count. */ 5907 found_map = TRUE; 5908 if (!isdigit(*items[1])) 5909 smsg((char_u *)_("Expected MAP count in %s line %d"), 5910 fname, lnum); 5911 } 5912 else if (do_mapline) 5913 { 5914 int c; 5915 5916 /* Check that every character appears only once. */ 5917 for (p = items[1]; *p != NUL; ) 5918 { 5919 #ifdef FEAT_MBYTE 5920 c = mb_ptr2char_adv(&p); 5921 #else 5922 c = *p++; 5923 #endif 5924 if ((spin->si_map.ga_len > 0 5925 && vim_strchr(spin->si_map.ga_data, c) 5926 != NULL) 5927 || vim_strchr(p, c) != NULL) 5928 smsg((char_u *)_("Duplicate character in MAP in %s line %d"), 5929 fname, lnum); 5930 } 5931 5932 /* We simply concatenate all the MAP strings, separated by 5933 * slashes. */ 5934 ga_concat(&spin->si_map, items[1]); 5935 ga_append(&spin->si_map, '/'); 5936 } 5937 } 5938 /* Accept "SAL from to" and "SAL from to #comment". */ 5939 else if (is_aff_rule(items, itemcnt, "SAL", 3)) 5940 { 5941 if (do_sal) 5942 { 5943 /* SAL item (sounds-a-like) 5944 * Either one of the known keys or a from-to pair. */ 5945 if (STRCMP(items[1], "followup") == 0) 5946 spin->si_followup = sal_to_bool(items[2]); 5947 else if (STRCMP(items[1], "collapse_result") == 0) 5948 spin->si_collapse = sal_to_bool(items[2]); 5949 else if (STRCMP(items[1], "remove_accents") == 0) 5950 spin->si_rem_accents = sal_to_bool(items[2]); 5951 else 5952 /* when "to" is "_" it means empty */ 5953 add_fromto(spin, &spin->si_sal, items[1], 5954 STRCMP(items[2], "_") == 0 ? (char_u *)"" 5955 : items[2]); 5956 } 5957 } 5958 else if (is_aff_rule(items, itemcnt, "SOFOFROM", 2) 5959 && sofofrom == NULL) 5960 { 5961 sofofrom = getroom_save(spin, items[1]); 5962 } 5963 else if (is_aff_rule(items, itemcnt, "SOFOTO", 2) 5964 && sofoto == NULL) 5965 { 5966 sofoto = getroom_save(spin, items[1]); 5967 } 5968 else if (STRCMP(items[0], "COMMON") == 0) 5969 { 5970 int i; 5971 5972 for (i = 1; i < itemcnt; ++i) 5973 { 5974 if (HASHITEM_EMPTY(hash_find(&spin->si_commonwords, 5975 items[i]))) 5976 { 5977 p = vim_strsave(items[i]); 5978 if (p == NULL) 5979 break; 5980 hash_add(&spin->si_commonwords, p); 5981 } 5982 } 5983 } 5984 else 5985 smsg((char_u *)_("Unrecognized or duplicate item in %s line %d: %s"), 5986 fname, lnum, items[0]); 5987 } 5988 } 5989 5990 if (fol != NULL || low != NULL || upp != NULL) 5991 { 5992 if (spin->si_clear_chartab) 5993 { 5994 /* Clear the char type tables, don't want to use any of the 5995 * currently used spell properties. */ 5996 init_spell_chartab(); 5997 spin->si_clear_chartab = FALSE; 5998 } 5999 6000 /* 6001 * Don't write a word table for an ASCII file, so that we don't check 6002 * for conflicts with a word table that matches 'encoding'. 6003 * Don't write one for utf-8 either, we use utf_*() and 6004 * mb_get_class(), the list of chars in the file will be incomplete. 6005 */ 6006 if (!spin->si_ascii 6007 #ifdef FEAT_MBYTE 6008 && !enc_utf8 6009 #endif 6010 ) 6011 { 6012 if (fol == NULL || low == NULL || upp == NULL) 6013 smsg((char_u *)_("Missing FOL/LOW/UPP line in %s"), fname); 6014 else 6015 (void)set_spell_chartab(fol, low, upp); 6016 } 6017 6018 vim_free(fol); 6019 vim_free(low); 6020 vim_free(upp); 6021 } 6022 6023 /* Use compound specifications of the .aff file for the spell info. */ 6024 if (compmax != 0) 6025 { 6026 aff_check_number(spin->si_compmax, compmax, "COMPOUNDWORDMAX"); 6027 spin->si_compmax = compmax; 6028 } 6029 6030 if (compminlen != 0) 6031 { 6032 aff_check_number(spin->si_compminlen, compminlen, "COMPOUNDMIN"); 6033 spin->si_compminlen = compminlen; 6034 } 6035 6036 if (compsylmax != 0) 6037 { 6038 if (syllable == NULL) 6039 smsg((char_u *)_("COMPOUNDSYLMAX used without SYLLABLE")); 6040 aff_check_number(spin->si_compsylmax, compsylmax, "COMPOUNDSYLMAX"); 6041 spin->si_compsylmax = compsylmax; 6042 } 6043 6044 if (compoptions != 0) 6045 { 6046 aff_check_number(spin->si_compoptions, compoptions, "COMPOUND options"); 6047 spin->si_compoptions |= compoptions; 6048 } 6049 6050 if (compflags != NULL) 6051 process_compflags(spin, aff, compflags); 6052 6053 /* Check that we didn't use too many renumbered flags. */ 6054 if (spin->si_newcompID < spin->si_newprefID) 6055 { 6056 if (spin->si_newcompID == 127 || spin->si_newcompID == 255) 6057 MSG(_("Too many postponed prefixes")); 6058 else if (spin->si_newprefID == 0 || spin->si_newprefID == 127) 6059 MSG(_("Too many compound flags")); 6060 else 6061 MSG(_("Too many postponed prefixes and/or compound flags")); 6062 } 6063 6064 if (syllable != NULL) 6065 { 6066 aff_check_string(spin->si_syllable, syllable, "SYLLABLE"); 6067 spin->si_syllable = syllable; 6068 } 6069 6070 if (sofofrom != NULL || sofoto != NULL) 6071 { 6072 if (sofofrom == NULL || sofoto == NULL) 6073 smsg((char_u *)_("Missing SOFO%s line in %s"), 6074 sofofrom == NULL ? "FROM" : "TO", fname); 6075 else if (spin->si_sal.ga_len > 0) 6076 smsg((char_u *)_("Both SAL and SOFO lines in %s"), fname); 6077 else 6078 { 6079 aff_check_string(spin->si_sofofr, sofofrom, "SOFOFROM"); 6080 aff_check_string(spin->si_sofoto, sofoto, "SOFOTO"); 6081 spin->si_sofofr = sofofrom; 6082 spin->si_sofoto = sofoto; 6083 } 6084 } 6085 6086 if (midword != NULL) 6087 { 6088 aff_check_string(spin->si_midword, midword, "MIDWORD"); 6089 spin->si_midword = midword; 6090 } 6091 6092 vim_free(pc); 6093 fclose(fd); 6094 return aff; 6095 } 6096 6097 /* 6098 * Return TRUE when items[0] equals "rulename", there are "mincount" items or 6099 * a comment is following after item "mincount". 6100 */ 6101 static int 6102 is_aff_rule( 6103 char_u **items, 6104 int itemcnt, 6105 char *rulename, 6106 int mincount) 6107 { 6108 return (STRCMP(items[0], rulename) == 0 6109 && (itemcnt == mincount 6110 || (itemcnt > mincount && items[mincount][0] == '#'))); 6111 } 6112 6113 /* 6114 * For affix "entry" move COMPOUNDFORBIDFLAG and COMPOUNDPERMITFLAG from 6115 * ae_flags to ae_comppermit and ae_compforbid. 6116 */ 6117 static void 6118 aff_process_flags(afffile_T *affile, affentry_T *entry) 6119 { 6120 char_u *p; 6121 char_u *prevp; 6122 unsigned flag; 6123 6124 if (entry->ae_flags != NULL 6125 && (affile->af_compforbid != 0 || affile->af_comppermit != 0)) 6126 { 6127 for (p = entry->ae_flags; *p != NUL; ) 6128 { 6129 prevp = p; 6130 flag = get_affitem(affile->af_flagtype, &p); 6131 if (flag == affile->af_comppermit || flag == affile->af_compforbid) 6132 { 6133 STRMOVE(prevp, p); 6134 p = prevp; 6135 if (flag == affile->af_comppermit) 6136 entry->ae_comppermit = TRUE; 6137 else 6138 entry->ae_compforbid = TRUE; 6139 } 6140 if (affile->af_flagtype == AFT_NUM && *p == ',') 6141 ++p; 6142 } 6143 if (*entry->ae_flags == NUL) 6144 entry->ae_flags = NULL; /* nothing left */ 6145 } 6146 } 6147 6148 /* 6149 * Return TRUE if "s" is the name of an info item in the affix file. 6150 */ 6151 static int 6152 spell_info_item(char_u *s) 6153 { 6154 return STRCMP(s, "NAME") == 0 6155 || STRCMP(s, "HOME") == 0 6156 || STRCMP(s, "VERSION") == 0 6157 || STRCMP(s, "AUTHOR") == 0 6158 || STRCMP(s, "EMAIL") == 0 6159 || STRCMP(s, "COPYRIGHT") == 0; 6160 } 6161 6162 /* 6163 * Turn an affix flag name into a number, according to the FLAG type. 6164 * returns zero for failure. 6165 */ 6166 static unsigned 6167 affitem2flag( 6168 int flagtype, 6169 char_u *item, 6170 char_u *fname, 6171 int lnum) 6172 { 6173 unsigned res; 6174 char_u *p = item; 6175 6176 res = get_affitem(flagtype, &p); 6177 if (res == 0) 6178 { 6179 if (flagtype == AFT_NUM) 6180 smsg((char_u *)_("Flag is not a number in %s line %d: %s"), 6181 fname, lnum, item); 6182 else 6183 smsg((char_u *)_("Illegal flag in %s line %d: %s"), 6184 fname, lnum, item); 6185 } 6186 if (*p != NUL) 6187 { 6188 smsg((char_u *)_(e_affname), fname, lnum, item); 6189 return 0; 6190 } 6191 6192 return res; 6193 } 6194 6195 /* 6196 * Get one affix name from "*pp" and advance the pointer. 6197 * Returns zero for an error, still advances the pointer then. 6198 */ 6199 static unsigned 6200 get_affitem(int flagtype, char_u **pp) 6201 { 6202 int res; 6203 6204 if (flagtype == AFT_NUM) 6205 { 6206 if (!VIM_ISDIGIT(**pp)) 6207 { 6208 ++*pp; /* always advance, avoid getting stuck */ 6209 return 0; 6210 } 6211 res = getdigits(pp); 6212 } 6213 else 6214 { 6215 #ifdef FEAT_MBYTE 6216 res = mb_ptr2char_adv(pp); 6217 #else 6218 res = *(*pp)++; 6219 #endif 6220 if (flagtype == AFT_LONG || (flagtype == AFT_CAPLONG 6221 && res >= 'A' && res <= 'Z')) 6222 { 6223 if (**pp == NUL) 6224 return 0; 6225 #ifdef FEAT_MBYTE 6226 res = mb_ptr2char_adv(pp) + (res << 16); 6227 #else 6228 res = *(*pp)++ + (res << 16); 6229 #endif 6230 } 6231 } 6232 return res; 6233 } 6234 6235 /* 6236 * Process the "compflags" string used in an affix file and append it to 6237 * spin->si_compflags. 6238 * The processing involves changing the affix names to ID numbers, so that 6239 * they fit in one byte. 6240 */ 6241 static void 6242 process_compflags( 6243 spellinfo_T *spin, 6244 afffile_T *aff, 6245 char_u *compflags) 6246 { 6247 char_u *p; 6248 char_u *prevp; 6249 unsigned flag; 6250 compitem_T *ci; 6251 int id; 6252 int len; 6253 char_u *tp; 6254 char_u key[AH_KEY_LEN]; 6255 hashitem_T *hi; 6256 6257 /* Make room for the old and the new compflags, concatenated with a / in 6258 * between. Processing it makes it shorter, but we don't know by how 6259 * much, thus allocate the maximum. */ 6260 len = (int)STRLEN(compflags) + 1; 6261 if (spin->si_compflags != NULL) 6262 len += (int)STRLEN(spin->si_compflags) + 1; 6263 p = getroom(spin, len, FALSE); 6264 if (p == NULL) 6265 return; 6266 if (spin->si_compflags != NULL) 6267 { 6268 STRCPY(p, spin->si_compflags); 6269 STRCAT(p, "/"); 6270 } 6271 spin->si_compflags = p; 6272 tp = p + STRLEN(p); 6273 6274 for (p = compflags; *p != NUL; ) 6275 { 6276 if (vim_strchr((char_u *)"/?*+[]", *p) != NULL) 6277 /* Copy non-flag characters directly. */ 6278 *tp++ = *p++; 6279 else 6280 { 6281 /* First get the flag number, also checks validity. */ 6282 prevp = p; 6283 flag = get_affitem(aff->af_flagtype, &p); 6284 if (flag != 0) 6285 { 6286 /* Find the flag in the hashtable. If it was used before, use 6287 * the existing ID. Otherwise add a new entry. */ 6288 vim_strncpy(key, prevp, p - prevp); 6289 hi = hash_find(&aff->af_comp, key); 6290 if (!HASHITEM_EMPTY(hi)) 6291 id = HI2CI(hi)->ci_newID; 6292 else 6293 { 6294 ci = (compitem_T *)getroom(spin, sizeof(compitem_T), TRUE); 6295 if (ci == NULL) 6296 break; 6297 STRCPY(ci->ci_key, key); 6298 ci->ci_flag = flag; 6299 /* Avoid using a flag ID that has a special meaning in a 6300 * regexp (also inside []). */ 6301 do 6302 { 6303 check_renumber(spin); 6304 id = spin->si_newcompID--; 6305 } while (vim_strchr((char_u *)"/?*+[]\\-^", id) != NULL); 6306 ci->ci_newID = id; 6307 hash_add(&aff->af_comp, ci->ci_key); 6308 } 6309 *tp++ = id; 6310 } 6311 if (aff->af_flagtype == AFT_NUM && *p == ',') 6312 ++p; 6313 } 6314 } 6315 6316 *tp = NUL; 6317 } 6318 6319 /* 6320 * Check that the new IDs for postponed affixes and compounding don't overrun 6321 * each other. We have almost 255 available, but start at 0-127 to avoid 6322 * using two bytes for utf-8. When the 0-127 range is used up go to 128-255. 6323 * When that is used up an error message is given. 6324 */ 6325 static void 6326 check_renumber(spellinfo_T *spin) 6327 { 6328 if (spin->si_newprefID == spin->si_newcompID && spin->si_newcompID < 128) 6329 { 6330 spin->si_newprefID = 127; 6331 spin->si_newcompID = 255; 6332 } 6333 } 6334 6335 /* 6336 * Return TRUE if flag "flag" appears in affix list "afflist". 6337 */ 6338 static int 6339 flag_in_afflist(int flagtype, char_u *afflist, unsigned flag) 6340 { 6341 char_u *p; 6342 unsigned n; 6343 6344 switch (flagtype) 6345 { 6346 case AFT_CHAR: 6347 return vim_strchr(afflist, flag) != NULL; 6348 6349 case AFT_CAPLONG: 6350 case AFT_LONG: 6351 for (p = afflist; *p != NUL; ) 6352 { 6353 #ifdef FEAT_MBYTE 6354 n = mb_ptr2char_adv(&p); 6355 #else 6356 n = *p++; 6357 #endif 6358 if ((flagtype == AFT_LONG || (n >= 'A' && n <= 'Z')) 6359 && *p != NUL) 6360 #ifdef FEAT_MBYTE 6361 n = mb_ptr2char_adv(&p) + (n << 16); 6362 #else 6363 n = *p++ + (n << 16); 6364 #endif 6365 if (n == flag) 6366 return TRUE; 6367 } 6368 break; 6369 6370 case AFT_NUM: 6371 for (p = afflist; *p != NUL; ) 6372 { 6373 n = getdigits(&p); 6374 if (n == flag) 6375 return TRUE; 6376 if (*p != NUL) /* skip over comma */ 6377 ++p; 6378 } 6379 break; 6380 } 6381 return FALSE; 6382 } 6383 6384 /* 6385 * Give a warning when "spinval" and "affval" numbers are set and not the same. 6386 */ 6387 static void 6388 aff_check_number(int spinval, int affval, char *name) 6389 { 6390 if (spinval != 0 && spinval != affval) 6391 smsg((char_u *)_("%s value differs from what is used in another .aff file"), name); 6392 } 6393 6394 /* 6395 * Give a warning when "spinval" and "affval" strings are set and not the same. 6396 */ 6397 static void 6398 aff_check_string(char_u *spinval, char_u *affval, char *name) 6399 { 6400 if (spinval != NULL && STRCMP(spinval, affval) != 0) 6401 smsg((char_u *)_("%s value differs from what is used in another .aff file"), name); 6402 } 6403 6404 /* 6405 * Return TRUE if strings "s1" and "s2" are equal. Also consider both being 6406 * NULL as equal. 6407 */ 6408 static int 6409 str_equal(char_u *s1, char_u *s2) 6410 { 6411 if (s1 == NULL || s2 == NULL) 6412 return s1 == s2; 6413 return STRCMP(s1, s2) == 0; 6414 } 6415 6416 /* 6417 * Add a from-to item to "gap". Used for REP and SAL items. 6418 * They are stored case-folded. 6419 */ 6420 static void 6421 add_fromto( 6422 spellinfo_T *spin, 6423 garray_T *gap, 6424 char_u *from, 6425 char_u *to) 6426 { 6427 fromto_T *ftp; 6428 char_u word[MAXWLEN]; 6429 6430 if (ga_grow(gap, 1) == OK) 6431 { 6432 ftp = ((fromto_T *)gap->ga_data) + gap->ga_len; 6433 (void)spell_casefold(from, (int)STRLEN(from), word, MAXWLEN); 6434 ftp->ft_from = getroom_save(spin, word); 6435 (void)spell_casefold(to, (int)STRLEN(to), word, MAXWLEN); 6436 ftp->ft_to = getroom_save(spin, word); 6437 ++gap->ga_len; 6438 } 6439 } 6440 6441 /* 6442 * Convert a boolean argument in a SAL line to TRUE or FALSE; 6443 */ 6444 static int 6445 sal_to_bool(char_u *s) 6446 { 6447 return STRCMP(s, "1") == 0 || STRCMP(s, "true") == 0; 6448 } 6449 6450 /* 6451 * Free the structure filled by spell_read_aff(). 6452 */ 6453 static void 6454 spell_free_aff(afffile_T *aff) 6455 { 6456 hashtab_T *ht; 6457 hashitem_T *hi; 6458 int todo; 6459 affheader_T *ah; 6460 affentry_T *ae; 6461 6462 vim_free(aff->af_enc); 6463 6464 /* All this trouble to free the "ae_prog" items... */ 6465 for (ht = &aff->af_pref; ; ht = &aff->af_suff) 6466 { 6467 todo = (int)ht->ht_used; 6468 for (hi = ht->ht_array; todo > 0; ++hi) 6469 { 6470 if (!HASHITEM_EMPTY(hi)) 6471 { 6472 --todo; 6473 ah = HI2AH(hi); 6474 for (ae = ah->ah_first; ae != NULL; ae = ae->ae_next) 6475 vim_regfree(ae->ae_prog); 6476 } 6477 } 6478 if (ht == &aff->af_suff) 6479 break; 6480 } 6481 6482 hash_clear(&aff->af_pref); 6483 hash_clear(&aff->af_suff); 6484 hash_clear(&aff->af_comp); 6485 } 6486 6487 /* 6488 * Read dictionary file "fname". 6489 * Returns OK or FAIL; 6490 */ 6491 static int 6492 spell_read_dic(spellinfo_T *spin, char_u *fname, afffile_T *affile) 6493 { 6494 hashtab_T ht; 6495 char_u line[MAXLINELEN]; 6496 char_u *p; 6497 char_u *afflist; 6498 char_u store_afflist[MAXWLEN]; 6499 int pfxlen; 6500 int need_affix; 6501 char_u *dw; 6502 char_u *pc; 6503 char_u *w; 6504 int l; 6505 hash_T hash; 6506 hashitem_T *hi; 6507 FILE *fd; 6508 int lnum = 1; 6509 int non_ascii = 0; 6510 int retval = OK; 6511 char_u message[MAXLINELEN + MAXWLEN]; 6512 int flags; 6513 int duplicate = 0; 6514 6515 /* 6516 * Open the file. 6517 */ 6518 fd = mch_fopen((char *)fname, "r"); 6519 if (fd == NULL) 6520 { 6521 EMSG2(_(e_notopen), fname); 6522 return FAIL; 6523 } 6524 6525 /* The hashtable is only used to detect duplicated words. */ 6526 hash_init(&ht); 6527 6528 vim_snprintf((char *)IObuff, IOSIZE, 6529 _("Reading dictionary file %s ..."), fname); 6530 spell_message(spin, IObuff); 6531 6532 /* start with a message for the first line */ 6533 spin->si_msg_count = 999999; 6534 6535 /* Read and ignore the first line: word count. */ 6536 (void)vim_fgets(line, MAXLINELEN, fd); 6537 if (!vim_isdigit(*skipwhite(line))) 6538 EMSG2(_("E760: No word count in %s"), fname); 6539 6540 /* 6541 * Read all the lines in the file one by one. 6542 * The words are converted to 'encoding' here, before being added to 6543 * the hashtable. 6544 */ 6545 while (!vim_fgets(line, MAXLINELEN, fd) && !got_int) 6546 { 6547 line_breakcheck(); 6548 ++lnum; 6549 if (line[0] == '#' || line[0] == '/') 6550 continue; /* comment line */ 6551 6552 /* Remove CR, LF and white space from the end. White space halfway 6553 * the word is kept to allow e.g., "et al.". */ 6554 l = (int)STRLEN(line); 6555 while (l > 0 && line[l - 1] <= ' ') 6556 --l; 6557 if (l == 0) 6558 continue; /* empty line */ 6559 line[l] = NUL; 6560 6561 #ifdef FEAT_MBYTE 6562 /* Convert from "SET" to 'encoding' when needed. */ 6563 if (spin->si_conv.vc_type != CONV_NONE) 6564 { 6565 pc = string_convert(&spin->si_conv, line, NULL); 6566 if (pc == NULL) 6567 { 6568 smsg((char_u *)_("Conversion failure for word in %s line %d: %s"), 6569 fname, lnum, line); 6570 continue; 6571 } 6572 w = pc; 6573 } 6574 else 6575 #endif 6576 { 6577 pc = NULL; 6578 w = line; 6579 } 6580 6581 /* Truncate the word at the "/", set "afflist" to what follows. 6582 * Replace "\/" by "/" and "\\" by "\". */ 6583 afflist = NULL; 6584 for (p = w; *p != NUL; mb_ptr_adv(p)) 6585 { 6586 if (*p == '\\' && (p[1] == '\\' || p[1] == '/')) 6587 STRMOVE(p, p + 1); 6588 else if (*p == '/') 6589 { 6590 *p = NUL; 6591 afflist = p + 1; 6592 break; 6593 } 6594 } 6595 6596 /* Skip non-ASCII words when "spin->si_ascii" is TRUE. */ 6597 if (spin->si_ascii && has_non_ascii(w)) 6598 { 6599 ++non_ascii; 6600 vim_free(pc); 6601 continue; 6602 } 6603 6604 /* This takes time, print a message every 10000 words. */ 6605 if (spin->si_verbose && spin->si_msg_count > 10000) 6606 { 6607 spin->si_msg_count = 0; 6608 vim_snprintf((char *)message, sizeof(message), 6609 _("line %6d, word %6d - %s"), 6610 lnum, spin->si_foldwcount + spin->si_keepwcount, w); 6611 msg_start(); 6612 msg_puts_long_attr(message, 0); 6613 msg_clr_eos(); 6614 msg_didout = FALSE; 6615 msg_col = 0; 6616 out_flush(); 6617 } 6618 6619 /* Store the word in the hashtable to be able to find duplicates. */ 6620 dw = (char_u *)getroom_save(spin, w); 6621 if (dw == NULL) 6622 { 6623 retval = FAIL; 6624 vim_free(pc); 6625 break; 6626 } 6627 6628 hash = hash_hash(dw); 6629 hi = hash_lookup(&ht, dw, hash); 6630 if (!HASHITEM_EMPTY(hi)) 6631 { 6632 if (p_verbose > 0) 6633 smsg((char_u *)_("Duplicate word in %s line %d: %s"), 6634 fname, lnum, dw); 6635 else if (duplicate == 0) 6636 smsg((char_u *)_("First duplicate word in %s line %d: %s"), 6637 fname, lnum, dw); 6638 ++duplicate; 6639 } 6640 else 6641 hash_add_item(&ht, hi, dw, hash); 6642 6643 flags = 0; 6644 store_afflist[0] = NUL; 6645 pfxlen = 0; 6646 need_affix = FALSE; 6647 if (afflist != NULL) 6648 { 6649 /* Extract flags from the affix list. */ 6650 flags |= get_affix_flags(affile, afflist); 6651 6652 if (affile->af_needaffix != 0 && flag_in_afflist( 6653 affile->af_flagtype, afflist, affile->af_needaffix)) 6654 need_affix = TRUE; 6655 6656 if (affile->af_pfxpostpone) 6657 /* Need to store the list of prefix IDs with the word. */ 6658 pfxlen = get_pfxlist(affile, afflist, store_afflist); 6659 6660 if (spin->si_compflags != NULL) 6661 /* Need to store the list of compound flags with the word. 6662 * Concatenate them to the list of prefix IDs. */ 6663 get_compflags(affile, afflist, store_afflist + pfxlen); 6664 } 6665 6666 /* Add the word to the word tree(s). */ 6667 if (store_word(spin, dw, flags, spin->si_region, 6668 store_afflist, need_affix) == FAIL) 6669 retval = FAIL; 6670 6671 if (afflist != NULL) 6672 { 6673 /* Find all matching suffixes and add the resulting words. 6674 * Additionally do matching prefixes that combine. */ 6675 if (store_aff_word(spin, dw, afflist, affile, 6676 &affile->af_suff, &affile->af_pref, 6677 CONDIT_SUF, flags, store_afflist, pfxlen) == FAIL) 6678 retval = FAIL; 6679 6680 /* Find all matching prefixes and add the resulting words. */ 6681 if (store_aff_word(spin, dw, afflist, affile, 6682 &affile->af_pref, NULL, 6683 CONDIT_SUF, flags, store_afflist, pfxlen) == FAIL) 6684 retval = FAIL; 6685 } 6686 6687 vim_free(pc); 6688 } 6689 6690 if (duplicate > 0) 6691 smsg((char_u *)_("%d duplicate word(s) in %s"), duplicate, fname); 6692 if (spin->si_ascii && non_ascii > 0) 6693 smsg((char_u *)_("Ignored %d word(s) with non-ASCII characters in %s"), 6694 non_ascii, fname); 6695 hash_clear(&ht); 6696 6697 fclose(fd); 6698 return retval; 6699 } 6700 6701 /* 6702 * Check for affix flags in "afflist" that are turned into word flags. 6703 * Return WF_ flags. 6704 */ 6705 static int 6706 get_affix_flags(afffile_T *affile, char_u *afflist) 6707 { 6708 int flags = 0; 6709 6710 if (affile->af_keepcase != 0 && flag_in_afflist( 6711 affile->af_flagtype, afflist, affile->af_keepcase)) 6712 flags |= WF_KEEPCAP | WF_FIXCAP; 6713 if (affile->af_rare != 0 && flag_in_afflist( 6714 affile->af_flagtype, afflist, affile->af_rare)) 6715 flags |= WF_RARE; 6716 if (affile->af_bad != 0 && flag_in_afflist( 6717 affile->af_flagtype, afflist, affile->af_bad)) 6718 flags |= WF_BANNED; 6719 if (affile->af_needcomp != 0 && flag_in_afflist( 6720 affile->af_flagtype, afflist, affile->af_needcomp)) 6721 flags |= WF_NEEDCOMP; 6722 if (affile->af_comproot != 0 && flag_in_afflist( 6723 affile->af_flagtype, afflist, affile->af_comproot)) 6724 flags |= WF_COMPROOT; 6725 if (affile->af_nosuggest != 0 && flag_in_afflist( 6726 affile->af_flagtype, afflist, affile->af_nosuggest)) 6727 flags |= WF_NOSUGGEST; 6728 return flags; 6729 } 6730 6731 /* 6732 * Get the list of prefix IDs from the affix list "afflist". 6733 * Used for PFXPOSTPONE. 6734 * Put the resulting flags in "store_afflist[MAXWLEN]" with a terminating NUL 6735 * and return the number of affixes. 6736 */ 6737 static int 6738 get_pfxlist( 6739 afffile_T *affile, 6740 char_u *afflist, 6741 char_u *store_afflist) 6742 { 6743 char_u *p; 6744 char_u *prevp; 6745 int cnt = 0; 6746 int id; 6747 char_u key[AH_KEY_LEN]; 6748 hashitem_T *hi; 6749 6750 for (p = afflist; *p != NUL; ) 6751 { 6752 prevp = p; 6753 if (get_affitem(affile->af_flagtype, &p) != 0) 6754 { 6755 /* A flag is a postponed prefix flag if it appears in "af_pref" 6756 * and it's ID is not zero. */ 6757 vim_strncpy(key, prevp, p - prevp); 6758 hi = hash_find(&affile->af_pref, key); 6759 if (!HASHITEM_EMPTY(hi)) 6760 { 6761 id = HI2AH(hi)->ah_newID; 6762 if (id != 0) 6763 store_afflist[cnt++] = id; 6764 } 6765 } 6766 if (affile->af_flagtype == AFT_NUM && *p == ',') 6767 ++p; 6768 } 6769 6770 store_afflist[cnt] = NUL; 6771 return cnt; 6772 } 6773 6774 /* 6775 * Get the list of compound IDs from the affix list "afflist" that are used 6776 * for compound words. 6777 * Puts the flags in "store_afflist[]". 6778 */ 6779 static void 6780 get_compflags( 6781 afffile_T *affile, 6782 char_u *afflist, 6783 char_u *store_afflist) 6784 { 6785 char_u *p; 6786 char_u *prevp; 6787 int cnt = 0; 6788 char_u key[AH_KEY_LEN]; 6789 hashitem_T *hi; 6790 6791 for (p = afflist; *p != NUL; ) 6792 { 6793 prevp = p; 6794 if (get_affitem(affile->af_flagtype, &p) != 0) 6795 { 6796 /* A flag is a compound flag if it appears in "af_comp". */ 6797 vim_strncpy(key, prevp, p - prevp); 6798 hi = hash_find(&affile->af_comp, key); 6799 if (!HASHITEM_EMPTY(hi)) 6800 store_afflist[cnt++] = HI2CI(hi)->ci_newID; 6801 } 6802 if (affile->af_flagtype == AFT_NUM && *p == ',') 6803 ++p; 6804 } 6805 6806 store_afflist[cnt] = NUL; 6807 } 6808 6809 /* 6810 * Apply affixes to a word and store the resulting words. 6811 * "ht" is the hashtable with affentry_T that need to be applied, either 6812 * prefixes or suffixes. 6813 * "xht", when not NULL, is the prefix hashtable, to be used additionally on 6814 * the resulting words for combining affixes. 6815 * 6816 * Returns FAIL when out of memory. 6817 */ 6818 static int 6819 store_aff_word( 6820 spellinfo_T *spin, /* spell info */ 6821 char_u *word, /* basic word start */ 6822 char_u *afflist, /* list of names of supported affixes */ 6823 afffile_T *affile, 6824 hashtab_T *ht, 6825 hashtab_T *xht, 6826 int condit, /* CONDIT_SUF et al. */ 6827 int flags, /* flags for the word */ 6828 char_u *pfxlist, /* list of prefix IDs */ 6829 int pfxlen) /* nr of flags in "pfxlist" for prefixes, rest 6830 * is compound flags */ 6831 { 6832 int todo; 6833 hashitem_T *hi; 6834 affheader_T *ah; 6835 affentry_T *ae; 6836 char_u newword[MAXWLEN]; 6837 int retval = OK; 6838 int i, j; 6839 char_u *p; 6840 int use_flags; 6841 char_u *use_pfxlist; 6842 int use_pfxlen; 6843 int need_affix; 6844 char_u store_afflist[MAXWLEN]; 6845 char_u pfx_pfxlist[MAXWLEN]; 6846 size_t wordlen = STRLEN(word); 6847 int use_condit; 6848 6849 todo = (int)ht->ht_used; 6850 for (hi = ht->ht_array; todo > 0 && retval == OK; ++hi) 6851 { 6852 if (!HASHITEM_EMPTY(hi)) 6853 { 6854 --todo; 6855 ah = HI2AH(hi); 6856 6857 /* Check that the affix combines, if required, and that the word 6858 * supports this affix. */ 6859 if (((condit & CONDIT_COMB) == 0 || ah->ah_combine) 6860 && flag_in_afflist(affile->af_flagtype, afflist, 6861 ah->ah_flag)) 6862 { 6863 /* Loop over all affix entries with this name. */ 6864 for (ae = ah->ah_first; ae != NULL; ae = ae->ae_next) 6865 { 6866 /* Check the condition. It's not logical to match case 6867 * here, but it is required for compatibility with 6868 * Myspell. 6869 * Another requirement from Myspell is that the chop 6870 * string is shorter than the word itself. 6871 * For prefixes, when "PFXPOSTPONE" was used, only do 6872 * prefixes with a chop string and/or flags. 6873 * When a previously added affix had CIRCUMFIX this one 6874 * must have it too, if it had not then this one must not 6875 * have one either. */ 6876 if ((xht != NULL || !affile->af_pfxpostpone 6877 || ae->ae_chop != NULL 6878 || ae->ae_flags != NULL) 6879 && (ae->ae_chop == NULL 6880 || STRLEN(ae->ae_chop) < wordlen) 6881 && (ae->ae_prog == NULL 6882 || vim_regexec_prog(&ae->ae_prog, FALSE, 6883 word, (colnr_T)0)) 6884 && (((condit & CONDIT_CFIX) == 0) 6885 == ((condit & CONDIT_AFF) == 0 6886 || ae->ae_flags == NULL 6887 || !flag_in_afflist(affile->af_flagtype, 6888 ae->ae_flags, affile->af_circumfix)))) 6889 { 6890 /* Match. Remove the chop and add the affix. */ 6891 if (xht == NULL) 6892 { 6893 /* prefix: chop/add at the start of the word */ 6894 if (ae->ae_add == NULL) 6895 *newword = NUL; 6896 else 6897 vim_strncpy(newword, ae->ae_add, MAXWLEN - 1); 6898 p = word; 6899 if (ae->ae_chop != NULL) 6900 { 6901 /* Skip chop string. */ 6902 #ifdef FEAT_MBYTE 6903 if (has_mbyte) 6904 { 6905 i = mb_charlen(ae->ae_chop); 6906 for ( ; i > 0; --i) 6907 mb_ptr_adv(p); 6908 } 6909 else 6910 #endif 6911 p += STRLEN(ae->ae_chop); 6912 } 6913 STRCAT(newword, p); 6914 } 6915 else 6916 { 6917 /* suffix: chop/add at the end of the word */ 6918 vim_strncpy(newword, word, MAXWLEN - 1); 6919 if (ae->ae_chop != NULL) 6920 { 6921 /* Remove chop string. */ 6922 p = newword + STRLEN(newword); 6923 i = (int)MB_CHARLEN(ae->ae_chop); 6924 for ( ; i > 0; --i) 6925 mb_ptr_back(newword, p); 6926 *p = NUL; 6927 } 6928 if (ae->ae_add != NULL) 6929 STRCAT(newword, ae->ae_add); 6930 } 6931 6932 use_flags = flags; 6933 use_pfxlist = pfxlist; 6934 use_pfxlen = pfxlen; 6935 need_affix = FALSE; 6936 use_condit = condit | CONDIT_COMB | CONDIT_AFF; 6937 if (ae->ae_flags != NULL) 6938 { 6939 /* Extract flags from the affix list. */ 6940 use_flags |= get_affix_flags(affile, ae->ae_flags); 6941 6942 if (affile->af_needaffix != 0 && flag_in_afflist( 6943 affile->af_flagtype, ae->ae_flags, 6944 affile->af_needaffix)) 6945 need_affix = TRUE; 6946 6947 /* When there is a CIRCUMFIX flag the other affix 6948 * must also have it and we don't add the word 6949 * with one affix. */ 6950 if (affile->af_circumfix != 0 && flag_in_afflist( 6951 affile->af_flagtype, ae->ae_flags, 6952 affile->af_circumfix)) 6953 { 6954 use_condit |= CONDIT_CFIX; 6955 if ((condit & CONDIT_CFIX) == 0) 6956 need_affix = TRUE; 6957 } 6958 6959 if (affile->af_pfxpostpone 6960 || spin->si_compflags != NULL) 6961 { 6962 if (affile->af_pfxpostpone) 6963 /* Get prefix IDS from the affix list. */ 6964 use_pfxlen = get_pfxlist(affile, 6965 ae->ae_flags, store_afflist); 6966 else 6967 use_pfxlen = 0; 6968 use_pfxlist = store_afflist; 6969 6970 /* Combine the prefix IDs. Avoid adding the 6971 * same ID twice. */ 6972 for (i = 0; i < pfxlen; ++i) 6973 { 6974 for (j = 0; j < use_pfxlen; ++j) 6975 if (pfxlist[i] == use_pfxlist[j]) 6976 break; 6977 if (j == use_pfxlen) 6978 use_pfxlist[use_pfxlen++] = pfxlist[i]; 6979 } 6980 6981 if (spin->si_compflags != NULL) 6982 /* Get compound IDS from the affix list. */ 6983 get_compflags(affile, ae->ae_flags, 6984 use_pfxlist + use_pfxlen); 6985 6986 /* Combine the list of compound flags. 6987 * Concatenate them to the prefix IDs list. 6988 * Avoid adding the same ID twice. */ 6989 for (i = pfxlen; pfxlist[i] != NUL; ++i) 6990 { 6991 for (j = use_pfxlen; 6992 use_pfxlist[j] != NUL; ++j) 6993 if (pfxlist[i] == use_pfxlist[j]) 6994 break; 6995 if (use_pfxlist[j] == NUL) 6996 { 6997 use_pfxlist[j++] = pfxlist[i]; 6998 use_pfxlist[j] = NUL; 6999 } 7000 } 7001 } 7002 } 7003 7004 /* Obey a "COMPOUNDFORBIDFLAG" of the affix: don't 7005 * use the compound flags. */ 7006 if (use_pfxlist != NULL && ae->ae_compforbid) 7007 { 7008 vim_strncpy(pfx_pfxlist, use_pfxlist, use_pfxlen); 7009 use_pfxlist = pfx_pfxlist; 7010 } 7011 7012 /* When there are postponed prefixes... */ 7013 if (spin->si_prefroot != NULL 7014 && spin->si_prefroot->wn_sibling != NULL) 7015 { 7016 /* ... add a flag to indicate an affix was used. */ 7017 use_flags |= WF_HAS_AFF; 7018 7019 /* ... don't use a prefix list if combining 7020 * affixes is not allowed. But do use the 7021 * compound flags after them. */ 7022 if (!ah->ah_combine && use_pfxlist != NULL) 7023 use_pfxlist += use_pfxlen; 7024 } 7025 7026 /* When compounding is supported and there is no 7027 * "COMPOUNDPERMITFLAG" then forbid compounding on the 7028 * side where the affix is applied. */ 7029 if (spin->si_compflags != NULL && !ae->ae_comppermit) 7030 { 7031 if (xht != NULL) 7032 use_flags |= WF_NOCOMPAFT; 7033 else 7034 use_flags |= WF_NOCOMPBEF; 7035 } 7036 7037 /* Store the modified word. */ 7038 if (store_word(spin, newword, use_flags, 7039 spin->si_region, use_pfxlist, 7040 need_affix) == FAIL) 7041 retval = FAIL; 7042 7043 /* When added a prefix or a first suffix and the affix 7044 * has flags may add a(nother) suffix. RECURSIVE! */ 7045 if ((condit & CONDIT_SUF) && ae->ae_flags != NULL) 7046 if (store_aff_word(spin, newword, ae->ae_flags, 7047 affile, &affile->af_suff, xht, 7048 use_condit & (xht == NULL 7049 ? ~0 : ~CONDIT_SUF), 7050 use_flags, use_pfxlist, pfxlen) == FAIL) 7051 retval = FAIL; 7052 7053 /* When added a suffix and combining is allowed also 7054 * try adding a prefix additionally. Both for the 7055 * word flags and for the affix flags. RECURSIVE! */ 7056 if (xht != NULL && ah->ah_combine) 7057 { 7058 if (store_aff_word(spin, newword, 7059 afflist, affile, 7060 xht, NULL, use_condit, 7061 use_flags, use_pfxlist, 7062 pfxlen) == FAIL 7063 || (ae->ae_flags != NULL 7064 && store_aff_word(spin, newword, 7065 ae->ae_flags, affile, 7066 xht, NULL, use_condit, 7067 use_flags, use_pfxlist, 7068 pfxlen) == FAIL)) 7069 retval = FAIL; 7070 } 7071 } 7072 } 7073 } 7074 } 7075 } 7076 7077 return retval; 7078 } 7079 7080 /* 7081 * Read a file with a list of words. 7082 */ 7083 static int 7084 spell_read_wordfile(spellinfo_T *spin, char_u *fname) 7085 { 7086 FILE *fd; 7087 long lnum = 0; 7088 char_u rline[MAXLINELEN]; 7089 char_u *line; 7090 char_u *pc = NULL; 7091 char_u *p; 7092 int l; 7093 int retval = OK; 7094 int did_word = FALSE; 7095 int non_ascii = 0; 7096 int flags; 7097 int regionmask; 7098 7099 /* 7100 * Open the file. 7101 */ 7102 fd = mch_fopen((char *)fname, "r"); 7103 if (fd == NULL) 7104 { 7105 EMSG2(_(e_notopen), fname); 7106 return FAIL; 7107 } 7108 7109 vim_snprintf((char *)IObuff, IOSIZE, _("Reading word file %s ..."), fname); 7110 spell_message(spin, IObuff); 7111 7112 /* 7113 * Read all the lines in the file one by one. 7114 */ 7115 while (!vim_fgets(rline, MAXLINELEN, fd) && !got_int) 7116 { 7117 line_breakcheck(); 7118 ++lnum; 7119 7120 /* Skip comment lines. */ 7121 if (*rline == '#') 7122 continue; 7123 7124 /* Remove CR, LF and white space from the end. */ 7125 l = (int)STRLEN(rline); 7126 while (l > 0 && rline[l - 1] <= ' ') 7127 --l; 7128 if (l == 0) 7129 continue; /* empty or blank line */ 7130 rline[l] = NUL; 7131 7132 /* Convert from "/encoding={encoding}" to 'encoding' when needed. */ 7133 vim_free(pc); 7134 #ifdef FEAT_MBYTE 7135 if (spin->si_conv.vc_type != CONV_NONE) 7136 { 7137 pc = string_convert(&spin->si_conv, rline, NULL); 7138 if (pc == NULL) 7139 { 7140 smsg((char_u *)_("Conversion failure for word in %s line %d: %s"), 7141 fname, lnum, rline); 7142 continue; 7143 } 7144 line = pc; 7145 } 7146 else 7147 #endif 7148 { 7149 pc = NULL; 7150 line = rline; 7151 } 7152 7153 if (*line == '/') 7154 { 7155 ++line; 7156 if (STRNCMP(line, "encoding=", 9) == 0) 7157 { 7158 if (spin->si_conv.vc_type != CONV_NONE) 7159 smsg((char_u *)_("Duplicate /encoding= line ignored in %s line %d: %s"), 7160 fname, lnum, line - 1); 7161 else if (did_word) 7162 smsg((char_u *)_("/encoding= line after word ignored in %s line %d: %s"), 7163 fname, lnum, line - 1); 7164 else 7165 { 7166 #ifdef FEAT_MBYTE 7167 char_u *enc; 7168 7169 /* Setup for conversion to 'encoding'. */ 7170 line += 9; 7171 enc = enc_canonize(line); 7172 if (enc != NULL && !spin->si_ascii 7173 && convert_setup(&spin->si_conv, enc, 7174 p_enc) == FAIL) 7175 smsg((char_u *)_("Conversion in %s not supported: from %s to %s"), 7176 fname, line, p_enc); 7177 vim_free(enc); 7178 spin->si_conv.vc_fail = TRUE; 7179 #else 7180 smsg((char_u *)_("Conversion in %s not supported"), fname); 7181 #endif 7182 } 7183 continue; 7184 } 7185 7186 if (STRNCMP(line, "regions=", 8) == 0) 7187 { 7188 if (spin->si_region_count > 1) 7189 smsg((char_u *)_("Duplicate /regions= line ignored in %s line %d: %s"), 7190 fname, lnum, line); 7191 else 7192 { 7193 line += 8; 7194 if (STRLEN(line) > 16) 7195 smsg((char_u *)_("Too many regions in %s line %d: %s"), 7196 fname, lnum, line); 7197 else 7198 { 7199 spin->si_region_count = (int)STRLEN(line) / 2; 7200 STRCPY(spin->si_region_name, line); 7201 7202 /* Adjust the mask for a word valid in all regions. */ 7203 spin->si_region = (1 << spin->si_region_count) - 1; 7204 } 7205 } 7206 continue; 7207 } 7208 7209 smsg((char_u *)_("/ line ignored in %s line %d: %s"), 7210 fname, lnum, line - 1); 7211 continue; 7212 } 7213 7214 flags = 0; 7215 regionmask = spin->si_region; 7216 7217 /* Check for flags and region after a slash. */ 7218 p = vim_strchr(line, '/'); 7219 if (p != NULL) 7220 { 7221 *p++ = NUL; 7222 while (*p != NUL) 7223 { 7224 if (*p == '=') /* keep-case word */ 7225 flags |= WF_KEEPCAP | WF_FIXCAP; 7226 else if (*p == '!') /* Bad, bad, wicked word. */ 7227 flags |= WF_BANNED; 7228 else if (*p == '?') /* Rare word. */ 7229 flags |= WF_RARE; 7230 else if (VIM_ISDIGIT(*p)) /* region number(s) */ 7231 { 7232 if ((flags & WF_REGION) == 0) /* first one */ 7233 regionmask = 0; 7234 flags |= WF_REGION; 7235 7236 l = *p - '0'; 7237 if (l > spin->si_region_count) 7238 { 7239 smsg((char_u *)_("Invalid region nr in %s line %d: %s"), 7240 fname, lnum, p); 7241 break; 7242 } 7243 regionmask |= 1 << (l - 1); 7244 } 7245 else 7246 { 7247 smsg((char_u *)_("Unrecognized flags in %s line %d: %s"), 7248 fname, lnum, p); 7249 break; 7250 } 7251 ++p; 7252 } 7253 } 7254 7255 /* Skip non-ASCII words when "spin->si_ascii" is TRUE. */ 7256 if (spin->si_ascii && has_non_ascii(line)) 7257 { 7258 ++non_ascii; 7259 continue; 7260 } 7261 7262 /* Normal word: store it. */ 7263 if (store_word(spin, line, flags, regionmask, NULL, FALSE) == FAIL) 7264 { 7265 retval = FAIL; 7266 break; 7267 } 7268 did_word = TRUE; 7269 } 7270 7271 vim_free(pc); 7272 fclose(fd); 7273 7274 if (spin->si_ascii && non_ascii > 0) 7275 { 7276 vim_snprintf((char *)IObuff, IOSIZE, 7277 _("Ignored %d words with non-ASCII characters"), non_ascii); 7278 spell_message(spin, IObuff); 7279 } 7280 7281 return retval; 7282 } 7283 7284 /* 7285 * Get part of an sblock_T, "len" bytes long. 7286 * This avoids calling free() for every little struct we use (and keeping 7287 * track of them). 7288 * The memory is cleared to all zeros. 7289 * Returns NULL when out of memory. 7290 */ 7291 static void * 7292 getroom( 7293 spellinfo_T *spin, 7294 size_t len, /* length needed */ 7295 int align) /* align for pointer */ 7296 { 7297 char_u *p; 7298 sblock_T *bl = spin->si_blocks; 7299 7300 if (align && bl != NULL) 7301 /* Round size up for alignment. On some systems structures need to be 7302 * aligned to the size of a pointer (e.g., SPARC). */ 7303 bl->sb_used = (bl->sb_used + sizeof(char *) - 1) 7304 & ~(sizeof(char *) - 1); 7305 7306 if (bl == NULL || bl->sb_used + len > SBLOCKSIZE) 7307 { 7308 if (len >= SBLOCKSIZE) 7309 bl = NULL; 7310 else 7311 /* Allocate a block of memory. It is not freed until much later. */ 7312 bl = (sblock_T *)alloc_clear( 7313 (unsigned)(sizeof(sblock_T) + SBLOCKSIZE)); 7314 if (bl == NULL) 7315 { 7316 if (!spin->si_did_emsg) 7317 { 7318 EMSG(_("E845: Insufficient memory, word list will be incomplete")); 7319 spin->si_did_emsg = TRUE; 7320 } 7321 return NULL; 7322 } 7323 bl->sb_next = spin->si_blocks; 7324 spin->si_blocks = bl; 7325 bl->sb_used = 0; 7326 ++spin->si_blocks_cnt; 7327 } 7328 7329 p = bl->sb_data + bl->sb_used; 7330 bl->sb_used += (int)len; 7331 7332 return p; 7333 } 7334 7335 /* 7336 * Make a copy of a string into memory allocated with getroom(). 7337 * Returns NULL when out of memory. 7338 */ 7339 static char_u * 7340 getroom_save(spellinfo_T *spin, char_u *s) 7341 { 7342 char_u *sc; 7343 7344 sc = (char_u *)getroom(spin, STRLEN(s) + 1, FALSE); 7345 if (sc != NULL) 7346 STRCPY(sc, s); 7347 return sc; 7348 } 7349 7350 7351 /* 7352 * Free the list of allocated sblock_T. 7353 */ 7354 static void 7355 free_blocks(sblock_T *bl) 7356 { 7357 sblock_T *next; 7358 7359 while (bl != NULL) 7360 { 7361 next = bl->sb_next; 7362 vim_free(bl); 7363 bl = next; 7364 } 7365 } 7366 7367 /* 7368 * Allocate the root of a word tree. 7369 * Returns NULL when out of memory. 7370 */ 7371 static wordnode_T * 7372 wordtree_alloc(spellinfo_T *spin) 7373 { 7374 return (wordnode_T *)getroom(spin, sizeof(wordnode_T), TRUE); 7375 } 7376 7377 /* 7378 * Store a word in the tree(s). 7379 * Always store it in the case-folded tree. For a keep-case word this is 7380 * useful when the word can also be used with all caps (no WF_FIXCAP flag) and 7381 * used to find suggestions. 7382 * For a keep-case word also store it in the keep-case tree. 7383 * When "pfxlist" is not NULL store the word for each postponed prefix ID and 7384 * compound flag. 7385 */ 7386 static int 7387 store_word( 7388 spellinfo_T *spin, 7389 char_u *word, 7390 int flags, /* extra flags, WF_BANNED */ 7391 int region, /* supported region(s) */ 7392 char_u *pfxlist, /* list of prefix IDs or NULL */ 7393 int need_affix) /* only store word with affix ID */ 7394 { 7395 int len = (int)STRLEN(word); 7396 int ct = captype(word, word + len); 7397 char_u foldword[MAXWLEN]; 7398 int res = OK; 7399 char_u *p; 7400 7401 (void)spell_casefold(word, len, foldword, MAXWLEN); 7402 for (p = pfxlist; res == OK; ++p) 7403 { 7404 if (!need_affix || (p != NULL && *p != NUL)) 7405 res = tree_add_word(spin, foldword, spin->si_foldroot, ct | flags, 7406 region, p == NULL ? 0 : *p); 7407 if (p == NULL || *p == NUL) 7408 break; 7409 } 7410 ++spin->si_foldwcount; 7411 7412 if (res == OK && (ct == WF_KEEPCAP || (flags & WF_KEEPCAP))) 7413 { 7414 for (p = pfxlist; res == OK; ++p) 7415 { 7416 if (!need_affix || (p != NULL && *p != NUL)) 7417 res = tree_add_word(spin, word, spin->si_keeproot, flags, 7418 region, p == NULL ? 0 : *p); 7419 if (p == NULL || *p == NUL) 7420 break; 7421 } 7422 ++spin->si_keepwcount; 7423 } 7424 return res; 7425 } 7426 7427 /* 7428 * Add word "word" to a word tree at "root". 7429 * When "flags" < 0 we are adding to the prefix tree where "flags" is used for 7430 * "rare" and "region" is the condition nr. 7431 * Returns FAIL when out of memory. 7432 */ 7433 static int 7434 tree_add_word( 7435 spellinfo_T *spin, 7436 char_u *word, 7437 wordnode_T *root, 7438 int flags, 7439 int region, 7440 int affixID) 7441 { 7442 wordnode_T *node = root; 7443 wordnode_T *np; 7444 wordnode_T *copyp, **copyprev; 7445 wordnode_T **prev = NULL; 7446 int i; 7447 7448 /* Add each byte of the word to the tree, including the NUL at the end. */ 7449 for (i = 0; ; ++i) 7450 { 7451 /* When there is more than one reference to this node we need to make 7452 * a copy, so that we can modify it. Copy the whole list of siblings 7453 * (we don't optimize for a partly shared list of siblings). */ 7454 if (node != NULL && node->wn_refs > 1) 7455 { 7456 --node->wn_refs; 7457 copyprev = prev; 7458 for (copyp = node; copyp != NULL; copyp = copyp->wn_sibling) 7459 { 7460 /* Allocate a new node and copy the info. */ 7461 np = get_wordnode(spin); 7462 if (np == NULL) 7463 return FAIL; 7464 np->wn_child = copyp->wn_child; 7465 if (np->wn_child != NULL) 7466 ++np->wn_child->wn_refs; /* child gets extra ref */ 7467 np->wn_byte = copyp->wn_byte; 7468 if (np->wn_byte == NUL) 7469 { 7470 np->wn_flags = copyp->wn_flags; 7471 np->wn_region = copyp->wn_region; 7472 np->wn_affixID = copyp->wn_affixID; 7473 } 7474 7475 /* Link the new node in the list, there will be one ref. */ 7476 np->wn_refs = 1; 7477 if (copyprev != NULL) 7478 *copyprev = np; 7479 copyprev = &np->wn_sibling; 7480 7481 /* Let "node" point to the head of the copied list. */ 7482 if (copyp == node) 7483 node = np; 7484 } 7485 } 7486 7487 /* Look for the sibling that has the same character. They are sorted 7488 * on byte value, thus stop searching when a sibling is found with a 7489 * higher byte value. For zero bytes (end of word) the sorting is 7490 * done on flags and then on affixID. */ 7491 while (node != NULL 7492 && (node->wn_byte < word[i] 7493 || (node->wn_byte == NUL 7494 && (flags < 0 7495 ? node->wn_affixID < (unsigned)affixID 7496 : (node->wn_flags < (unsigned)(flags & WN_MASK) 7497 || (node->wn_flags == (flags & WN_MASK) 7498 && (spin->si_sugtree 7499 ? (node->wn_region & 0xffff) < region 7500 : node->wn_affixID 7501 < (unsigned)affixID))))))) 7502 { 7503 prev = &node->wn_sibling; 7504 node = *prev; 7505 } 7506 if (node == NULL 7507 || node->wn_byte != word[i] 7508 || (word[i] == NUL 7509 && (flags < 0 7510 || spin->si_sugtree 7511 || node->wn_flags != (flags & WN_MASK) 7512 || node->wn_affixID != affixID))) 7513 { 7514 /* Allocate a new node. */ 7515 np = get_wordnode(spin); 7516 if (np == NULL) 7517 return FAIL; 7518 np->wn_byte = word[i]; 7519 7520 /* If "node" is NULL this is a new child or the end of the sibling 7521 * list: ref count is one. Otherwise use ref count of sibling and 7522 * make ref count of sibling one (matters when inserting in front 7523 * of the list of siblings). */ 7524 if (node == NULL) 7525 np->wn_refs = 1; 7526 else 7527 { 7528 np->wn_refs = node->wn_refs; 7529 node->wn_refs = 1; 7530 } 7531 if (prev != NULL) 7532 *prev = np; 7533 np->wn_sibling = node; 7534 node = np; 7535 } 7536 7537 if (word[i] == NUL) 7538 { 7539 node->wn_flags = flags; 7540 node->wn_region |= region; 7541 node->wn_affixID = affixID; 7542 break; 7543 } 7544 prev = &node->wn_child; 7545 node = *prev; 7546 } 7547 #ifdef SPELL_PRINTTREE 7548 smsg((char_u *)"Added \"%s\"", word); 7549 spell_print_tree(root->wn_sibling); 7550 #endif 7551 7552 /* count nr of words added since last message */ 7553 ++spin->si_msg_count; 7554 7555 if (spin->si_compress_cnt > 1) 7556 { 7557 if (--spin->si_compress_cnt == 1) 7558 /* Did enough words to lower the block count limit. */ 7559 spin->si_blocks_cnt += compress_inc; 7560 } 7561 7562 /* 7563 * When we have allocated lots of memory we need to compress the word tree 7564 * to free up some room. But compression is slow, and we might actually 7565 * need that room, thus only compress in the following situations: 7566 * 1. When not compressed before (si_compress_cnt == 0): when using 7567 * "compress_start" blocks. 7568 * 2. When compressed before and used "compress_inc" blocks before 7569 * adding "compress_added" words (si_compress_cnt > 1). 7570 * 3. When compressed before, added "compress_added" words 7571 * (si_compress_cnt == 1) and the number of free nodes drops below the 7572 * maximum word length. 7573 */ 7574 #ifndef SPELL_COMPRESS_ALLWAYS 7575 if (spin->si_compress_cnt == 1 7576 ? spin->si_free_count < MAXWLEN 7577 : spin->si_blocks_cnt >= compress_start) 7578 #endif 7579 { 7580 /* Decrement the block counter. The effect is that we compress again 7581 * when the freed up room has been used and another "compress_inc" 7582 * blocks have been allocated. Unless "compress_added" words have 7583 * been added, then the limit is put back again. */ 7584 spin->si_blocks_cnt -= compress_inc; 7585 spin->si_compress_cnt = compress_added; 7586 7587 if (spin->si_verbose) 7588 { 7589 msg_start(); 7590 msg_puts((char_u *)_(msg_compressing)); 7591 msg_clr_eos(); 7592 msg_didout = FALSE; 7593 msg_col = 0; 7594 out_flush(); 7595 } 7596 7597 /* Compress both trees. Either they both have many nodes, which makes 7598 * compression useful, or one of them is small, which means 7599 * compression goes fast. But when filling the soundfold word tree 7600 * there is no keep-case tree. */ 7601 wordtree_compress(spin, spin->si_foldroot); 7602 if (affixID >= 0) 7603 wordtree_compress(spin, spin->si_keeproot); 7604 } 7605 7606 return OK; 7607 } 7608 7609 /* 7610 * Check the 'mkspellmem' option. Return FAIL if it's wrong. 7611 * Sets "sps_flags". 7612 */ 7613 int 7614 spell_check_msm(void) 7615 { 7616 char_u *p = p_msm; 7617 long start = 0; 7618 long incr = 0; 7619 long added = 0; 7620 7621 if (!VIM_ISDIGIT(*p)) 7622 return FAIL; 7623 /* block count = (value * 1024) / SBLOCKSIZE (but avoid overflow)*/ 7624 start = (getdigits(&p) * 10) / (SBLOCKSIZE / 102); 7625 if (*p != ',') 7626 return FAIL; 7627 ++p; 7628 if (!VIM_ISDIGIT(*p)) 7629 return FAIL; 7630 incr = (getdigits(&p) * 102) / (SBLOCKSIZE / 10); 7631 if (*p != ',') 7632 return FAIL; 7633 ++p; 7634 if (!VIM_ISDIGIT(*p)) 7635 return FAIL; 7636 added = getdigits(&p) * 1024; 7637 if (*p != NUL) 7638 return FAIL; 7639 7640 if (start == 0 || incr == 0 || added == 0 || incr > start) 7641 return FAIL; 7642 7643 compress_start = start; 7644 compress_inc = incr; 7645 compress_added = added; 7646 return OK; 7647 } 7648 7649 7650 /* 7651 * Get a wordnode_T, either from the list of previously freed nodes or 7652 * allocate a new one. 7653 * Returns NULL when out of memory. 7654 */ 7655 static wordnode_T * 7656 get_wordnode(spellinfo_T *spin) 7657 { 7658 wordnode_T *n; 7659 7660 if (spin->si_first_free == NULL) 7661 n = (wordnode_T *)getroom(spin, sizeof(wordnode_T), TRUE); 7662 else 7663 { 7664 n = spin->si_first_free; 7665 spin->si_first_free = n->wn_child; 7666 vim_memset(n, 0, sizeof(wordnode_T)); 7667 --spin->si_free_count; 7668 } 7669 #ifdef SPELL_PRINTTREE 7670 if (n != NULL) 7671 n->wn_nr = ++spin->si_wordnode_nr; 7672 #endif 7673 return n; 7674 } 7675 7676 /* 7677 * Decrement the reference count on a node (which is the head of a list of 7678 * siblings). If the reference count becomes zero free the node and its 7679 * siblings. 7680 * Returns the number of nodes actually freed. 7681 */ 7682 static int 7683 deref_wordnode(spellinfo_T *spin, wordnode_T *node) 7684 { 7685 wordnode_T *np; 7686 int cnt = 0; 7687 7688 if (--node->wn_refs == 0) 7689 { 7690 for (np = node; np != NULL; np = np->wn_sibling) 7691 { 7692 if (np->wn_child != NULL) 7693 cnt += deref_wordnode(spin, np->wn_child); 7694 free_wordnode(spin, np); 7695 ++cnt; 7696 } 7697 ++cnt; /* length field */ 7698 } 7699 return cnt; 7700 } 7701 7702 /* 7703 * Free a wordnode_T for re-use later. 7704 * Only the "wn_child" field becomes invalid. 7705 */ 7706 static void 7707 free_wordnode(spellinfo_T *spin, wordnode_T *n) 7708 { 7709 n->wn_child = spin->si_first_free; 7710 spin->si_first_free = n; 7711 ++spin->si_free_count; 7712 } 7713 7714 /* 7715 * Compress a tree: find tails that are identical and can be shared. 7716 */ 7717 static void 7718 wordtree_compress(spellinfo_T *spin, wordnode_T *root) 7719 { 7720 hashtab_T ht; 7721 int n; 7722 int tot = 0; 7723 int perc; 7724 7725 /* Skip the root itself, it's not actually used. The first sibling is the 7726 * start of the tree. */ 7727 if (root->wn_sibling != NULL) 7728 { 7729 hash_init(&ht); 7730 n = node_compress(spin, root->wn_sibling, &ht, &tot); 7731 7732 #ifndef SPELL_PRINTTREE 7733 if (spin->si_verbose || p_verbose > 2) 7734 #endif 7735 { 7736 if (tot > 1000000) 7737 perc = (tot - n) / (tot / 100); 7738 else if (tot == 0) 7739 perc = 0; 7740 else 7741 perc = (tot - n) * 100 / tot; 7742 vim_snprintf((char *)IObuff, IOSIZE, 7743 _("Compressed %d of %d nodes; %d (%d%%) remaining"), 7744 n, tot, tot - n, perc); 7745 spell_message(spin, IObuff); 7746 } 7747 #ifdef SPELL_PRINTTREE 7748 spell_print_tree(root->wn_sibling); 7749 #endif 7750 hash_clear(&ht); 7751 } 7752 } 7753 7754 /* 7755 * Compress a node, its siblings and its children, depth first. 7756 * Returns the number of compressed nodes. 7757 */ 7758 static int 7759 node_compress( 7760 spellinfo_T *spin, 7761 wordnode_T *node, 7762 hashtab_T *ht, 7763 int *tot) /* total count of nodes before compressing, 7764 incremented while going through the tree */ 7765 { 7766 wordnode_T *np; 7767 wordnode_T *tp; 7768 wordnode_T *child; 7769 hash_T hash; 7770 hashitem_T *hi; 7771 int len = 0; 7772 unsigned nr, n; 7773 int compressed = 0; 7774 7775 /* 7776 * Go through the list of siblings. Compress each child and then try 7777 * finding an identical child to replace it. 7778 * Note that with "child" we mean not just the node that is pointed to, 7779 * but the whole list of siblings of which the child node is the first. 7780 */ 7781 for (np = node; np != NULL && !got_int; np = np->wn_sibling) 7782 { 7783 ++len; 7784 if ((child = np->wn_child) != NULL) 7785 { 7786 /* Compress the child first. This fills hashkey. */ 7787 compressed += node_compress(spin, child, ht, tot); 7788 7789 /* Try to find an identical child. */ 7790 hash = hash_hash(child->wn_u1.hashkey); 7791 hi = hash_lookup(ht, child->wn_u1.hashkey, hash); 7792 if (!HASHITEM_EMPTY(hi)) 7793 { 7794 /* There are children we encountered before with a hash value 7795 * identical to the current child. Now check if there is one 7796 * that is really identical. */ 7797 for (tp = HI2WN(hi); tp != NULL; tp = tp->wn_u2.next) 7798 if (node_equal(child, tp)) 7799 { 7800 /* Found one! Now use that child in place of the 7801 * current one. This means the current child and all 7802 * its siblings is unlinked from the tree. */ 7803 ++tp->wn_refs; 7804 compressed += deref_wordnode(spin, child); 7805 np->wn_child = tp; 7806 break; 7807 } 7808 if (tp == NULL) 7809 { 7810 /* No other child with this hash value equals the child of 7811 * the node, add it to the linked list after the first 7812 * item. */ 7813 tp = HI2WN(hi); 7814 child->wn_u2.next = tp->wn_u2.next; 7815 tp->wn_u2.next = child; 7816 } 7817 } 7818 else 7819 /* No other child has this hash value, add it to the 7820 * hashtable. */ 7821 hash_add_item(ht, hi, child->wn_u1.hashkey, hash); 7822 } 7823 } 7824 *tot += len + 1; /* add one for the node that stores the length */ 7825 7826 /* 7827 * Make a hash key for the node and its siblings, so that we can quickly 7828 * find a lookalike node. This must be done after compressing the sibling 7829 * list, otherwise the hash key would become invalid by the compression. 7830 */ 7831 node->wn_u1.hashkey[0] = len; 7832 nr = 0; 7833 for (np = node; np != NULL; np = np->wn_sibling) 7834 { 7835 if (np->wn_byte == NUL) 7836 /* end node: use wn_flags, wn_region and wn_affixID */ 7837 n = np->wn_flags + (np->wn_region << 8) + (np->wn_affixID << 16); 7838 else 7839 /* byte node: use the byte value and the child pointer */ 7840 n = (unsigned)(np->wn_byte + ((long_u)np->wn_child << 8)); 7841 nr = nr * 101 + n; 7842 } 7843 7844 /* Avoid NUL bytes, it terminates the hash key. */ 7845 n = nr & 0xff; 7846 node->wn_u1.hashkey[1] = n == 0 ? 1 : n; 7847 n = (nr >> 8) & 0xff; 7848 node->wn_u1.hashkey[2] = n == 0 ? 1 : n; 7849 n = (nr >> 16) & 0xff; 7850 node->wn_u1.hashkey[3] = n == 0 ? 1 : n; 7851 n = (nr >> 24) & 0xff; 7852 node->wn_u1.hashkey[4] = n == 0 ? 1 : n; 7853 node->wn_u1.hashkey[5] = NUL; 7854 7855 /* Check for CTRL-C pressed now and then. */ 7856 fast_breakcheck(); 7857 7858 return compressed; 7859 } 7860 7861 /* 7862 * Return TRUE when two nodes have identical siblings and children. 7863 */ 7864 static int 7865 node_equal(wordnode_T *n1, wordnode_T *n2) 7866 { 7867 wordnode_T *p1; 7868 wordnode_T *p2; 7869 7870 for (p1 = n1, p2 = n2; p1 != NULL && p2 != NULL; 7871 p1 = p1->wn_sibling, p2 = p2->wn_sibling) 7872 if (p1->wn_byte != p2->wn_byte 7873 || (p1->wn_byte == NUL 7874 ? (p1->wn_flags != p2->wn_flags 7875 || p1->wn_region != p2->wn_region 7876 || p1->wn_affixID != p2->wn_affixID) 7877 : (p1->wn_child != p2->wn_child))) 7878 break; 7879 7880 return p1 == NULL && p2 == NULL; 7881 } 7882 7883 static int 7884 #ifdef __BORLANDC__ 7885 _RTLENTRYF 7886 #endif 7887 rep_compare(const void *s1, const void *s2); 7888 7889 /* 7890 * Function given to qsort() to sort the REP items on "from" string. 7891 */ 7892 static int 7893 #ifdef __BORLANDC__ 7894 _RTLENTRYF 7895 #endif 7896 rep_compare(const void *s1, const void *s2) 7897 { 7898 fromto_T *p1 = (fromto_T *)s1; 7899 fromto_T *p2 = (fromto_T *)s2; 7900 7901 return STRCMP(p1->ft_from, p2->ft_from); 7902 } 7903 7904 /* 7905 * Write the Vim .spl file "fname". 7906 * Return FAIL or OK; 7907 */ 7908 static int 7909 write_vim_spell(spellinfo_T *spin, char_u *fname) 7910 { 7911 FILE *fd; 7912 int regionmask; 7913 int round; 7914 wordnode_T *tree; 7915 int nodecount; 7916 int i; 7917 int l; 7918 garray_T *gap; 7919 fromto_T *ftp; 7920 char_u *p; 7921 int rr; 7922 int retval = OK; 7923 size_t fwv = 1; /* collect return value of fwrite() to avoid 7924 warnings from picky compiler */ 7925 7926 fd = mch_fopen((char *)fname, "w"); 7927 if (fd == NULL) 7928 { 7929 EMSG2(_(e_notopen), fname); 7930 return FAIL; 7931 } 7932 7933 /* <HEADER>: <fileID> <versionnr> */ 7934 /* <fileID> */ 7935 fwv &= fwrite(VIMSPELLMAGIC, VIMSPELLMAGICL, (size_t)1, fd); 7936 if (fwv != (size_t)1) 7937 /* Catch first write error, don't try writing more. */ 7938 goto theend; 7939 7940 putc(VIMSPELLVERSION, fd); /* <versionnr> */ 7941 7942 /* 7943 * <SECTIONS>: <section> ... <sectionend> 7944 */ 7945 7946 /* SN_INFO: <infotext> */ 7947 if (spin->si_info != NULL) 7948 { 7949 putc(SN_INFO, fd); /* <sectionID> */ 7950 putc(0, fd); /* <sectionflags> */ 7951 7952 i = (int)STRLEN(spin->si_info); 7953 put_bytes(fd, (long_u)i, 4); /* <sectionlen> */ 7954 fwv &= fwrite(spin->si_info, (size_t)i, (size_t)1, fd); /* <infotext> */ 7955 } 7956 7957 /* SN_REGION: <regionname> ... 7958 * Write the region names only if there is more than one. */ 7959 if (spin->si_region_count > 1) 7960 { 7961 putc(SN_REGION, fd); /* <sectionID> */ 7962 putc(SNF_REQUIRED, fd); /* <sectionflags> */ 7963 l = spin->si_region_count * 2; 7964 put_bytes(fd, (long_u)l, 4); /* <sectionlen> */ 7965 fwv &= fwrite(spin->si_region_name, (size_t)l, (size_t)1, fd); 7966 /* <regionname> ... */ 7967 regionmask = (1 << spin->si_region_count) - 1; 7968 } 7969 else 7970 regionmask = 0; 7971 7972 /* SN_CHARFLAGS: <charflagslen> <charflags> <folcharslen> <folchars> 7973 * 7974 * The table with character flags and the table for case folding. 7975 * This makes sure the same characters are recognized as word characters 7976 * when generating an when using a spell file. 7977 * Skip this for ASCII, the table may conflict with the one used for 7978 * 'encoding'. 7979 * Also skip this for an .add.spl file, the main spell file must contain 7980 * the table (avoids that it conflicts). File is shorter too. 7981 */ 7982 if (!spin->si_ascii && !spin->si_add) 7983 { 7984 char_u folchars[128 * 8]; 7985 int flags; 7986 7987 putc(SN_CHARFLAGS, fd); /* <sectionID> */ 7988 putc(SNF_REQUIRED, fd); /* <sectionflags> */ 7989 7990 /* Form the <folchars> string first, we need to know its length. */ 7991 l = 0; 7992 for (i = 128; i < 256; ++i) 7993 { 7994 #ifdef FEAT_MBYTE 7995 if (has_mbyte) 7996 l += mb_char2bytes(spelltab.st_fold[i], folchars + l); 7997 else 7998 #endif 7999 folchars[l++] = spelltab.st_fold[i]; 8000 } 8001 put_bytes(fd, (long_u)(1 + 128 + 2 + l), 4); /* <sectionlen> */ 8002 8003 fputc(128, fd); /* <charflagslen> */ 8004 for (i = 128; i < 256; ++i) 8005 { 8006 flags = 0; 8007 if (spelltab.st_isw[i]) 8008 flags |= CF_WORD; 8009 if (spelltab.st_isu[i]) 8010 flags |= CF_UPPER; 8011 fputc(flags, fd); /* <charflags> */ 8012 } 8013 8014 put_bytes(fd, (long_u)l, 2); /* <folcharslen> */ 8015 fwv &= fwrite(folchars, (size_t)l, (size_t)1, fd); /* <folchars> */ 8016 } 8017 8018 /* SN_MIDWORD: <midword> */ 8019 if (spin->si_midword != NULL) 8020 { 8021 putc(SN_MIDWORD, fd); /* <sectionID> */ 8022 putc(SNF_REQUIRED, fd); /* <sectionflags> */ 8023 8024 i = (int)STRLEN(spin->si_midword); 8025 put_bytes(fd, (long_u)i, 4); /* <sectionlen> */ 8026 fwv &= fwrite(spin->si_midword, (size_t)i, (size_t)1, fd); 8027 /* <midword> */ 8028 } 8029 8030 /* SN_PREFCOND: <prefcondcnt> <prefcond> ... */ 8031 if (spin->si_prefcond.ga_len > 0) 8032 { 8033 putc(SN_PREFCOND, fd); /* <sectionID> */ 8034 putc(SNF_REQUIRED, fd); /* <sectionflags> */ 8035 8036 l = write_spell_prefcond(NULL, &spin->si_prefcond); 8037 put_bytes(fd, (long_u)l, 4); /* <sectionlen> */ 8038 8039 write_spell_prefcond(fd, &spin->si_prefcond); 8040 } 8041 8042 /* SN_REP: <repcount> <rep> ... 8043 * SN_SAL: <salflags> <salcount> <sal> ... 8044 * SN_REPSAL: <repcount> <rep> ... */ 8045 8046 /* round 1: SN_REP section 8047 * round 2: SN_SAL section (unless SN_SOFO is used) 8048 * round 3: SN_REPSAL section */ 8049 for (round = 1; round <= 3; ++round) 8050 { 8051 if (round == 1) 8052 gap = &spin->si_rep; 8053 else if (round == 2) 8054 { 8055 /* Don't write SN_SAL when using a SN_SOFO section */ 8056 if (spin->si_sofofr != NULL && spin->si_sofoto != NULL) 8057 continue; 8058 gap = &spin->si_sal; 8059 } 8060 else 8061 gap = &spin->si_repsal; 8062 8063 /* Don't write the section if there are no items. */ 8064 if (gap->ga_len == 0) 8065 continue; 8066 8067 /* Sort the REP/REPSAL items. */ 8068 if (round != 2) 8069 qsort(gap->ga_data, (size_t)gap->ga_len, 8070 sizeof(fromto_T), rep_compare); 8071 8072 i = round == 1 ? SN_REP : (round == 2 ? SN_SAL : SN_REPSAL); 8073 putc(i, fd); /* <sectionID> */ 8074 8075 /* This is for making suggestions, section is not required. */ 8076 putc(0, fd); /* <sectionflags> */ 8077 8078 /* Compute the length of what follows. */ 8079 l = 2; /* count <repcount> or <salcount> */ 8080 for (i = 0; i < gap->ga_len; ++i) 8081 { 8082 ftp = &((fromto_T *)gap->ga_data)[i]; 8083 l += 1 + (int)STRLEN(ftp->ft_from); /* count <*fromlen> and <*from> */ 8084 l += 1 + (int)STRLEN(ftp->ft_to); /* count <*tolen> and <*to> */ 8085 } 8086 if (round == 2) 8087 ++l; /* count <salflags> */ 8088 put_bytes(fd, (long_u)l, 4); /* <sectionlen> */ 8089 8090 if (round == 2) 8091 { 8092 i = 0; 8093 if (spin->si_followup) 8094 i |= SAL_F0LLOWUP; 8095 if (spin->si_collapse) 8096 i |= SAL_COLLAPSE; 8097 if (spin->si_rem_accents) 8098 i |= SAL_REM_ACCENTS; 8099 putc(i, fd); /* <salflags> */ 8100 } 8101 8102 put_bytes(fd, (long_u)gap->ga_len, 2); /* <repcount> or <salcount> */ 8103 for (i = 0; i < gap->ga_len; ++i) 8104 { 8105 /* <rep> : <repfromlen> <repfrom> <reptolen> <repto> */ 8106 /* <sal> : <salfromlen> <salfrom> <saltolen> <salto> */ 8107 ftp = &((fromto_T *)gap->ga_data)[i]; 8108 for (rr = 1; rr <= 2; ++rr) 8109 { 8110 p = rr == 1 ? ftp->ft_from : ftp->ft_to; 8111 l = (int)STRLEN(p); 8112 putc(l, fd); 8113 if (l > 0) 8114 fwv &= fwrite(p, l, (size_t)1, fd); 8115 } 8116 } 8117 8118 } 8119 8120 /* SN_SOFO: <sofofromlen> <sofofrom> <sofotolen> <sofoto> 8121 * This is for making suggestions, section is not required. */ 8122 if (spin->si_sofofr != NULL && spin->si_sofoto != NULL) 8123 { 8124 putc(SN_SOFO, fd); /* <sectionID> */ 8125 putc(0, fd); /* <sectionflags> */ 8126 8127 l = (int)STRLEN(spin->si_sofofr); 8128 put_bytes(fd, (long_u)(l + STRLEN(spin->si_sofoto) + 4), 4); 8129 /* <sectionlen> */ 8130 8131 put_bytes(fd, (long_u)l, 2); /* <sofofromlen> */ 8132 fwv &= fwrite(spin->si_sofofr, l, (size_t)1, fd); /* <sofofrom> */ 8133 8134 l = (int)STRLEN(spin->si_sofoto); 8135 put_bytes(fd, (long_u)l, 2); /* <sofotolen> */ 8136 fwv &= fwrite(spin->si_sofoto, l, (size_t)1, fd); /* <sofoto> */ 8137 } 8138 8139 /* SN_WORDS: <word> ... 8140 * This is for making suggestions, section is not required. */ 8141 if (spin->si_commonwords.ht_used > 0) 8142 { 8143 putc(SN_WORDS, fd); /* <sectionID> */ 8144 putc(0, fd); /* <sectionflags> */ 8145 8146 /* round 1: count the bytes 8147 * round 2: write the bytes */ 8148 for (round = 1; round <= 2; ++round) 8149 { 8150 int todo; 8151 int len = 0; 8152 hashitem_T *hi; 8153 8154 todo = (int)spin->si_commonwords.ht_used; 8155 for (hi = spin->si_commonwords.ht_array; todo > 0; ++hi) 8156 if (!HASHITEM_EMPTY(hi)) 8157 { 8158 l = (int)STRLEN(hi->hi_key) + 1; 8159 len += l; 8160 if (round == 2) /* <word> */ 8161 fwv &= fwrite(hi->hi_key, (size_t)l, (size_t)1, fd); 8162 --todo; 8163 } 8164 if (round == 1) 8165 put_bytes(fd, (long_u)len, 4); /* <sectionlen> */ 8166 } 8167 } 8168 8169 /* SN_MAP: <mapstr> 8170 * This is for making suggestions, section is not required. */ 8171 if (spin->si_map.ga_len > 0) 8172 { 8173 putc(SN_MAP, fd); /* <sectionID> */ 8174 putc(0, fd); /* <sectionflags> */ 8175 l = spin->si_map.ga_len; 8176 put_bytes(fd, (long_u)l, 4); /* <sectionlen> */ 8177 fwv &= fwrite(spin->si_map.ga_data, (size_t)l, (size_t)1, fd); 8178 /* <mapstr> */ 8179 } 8180 8181 /* SN_SUGFILE: <timestamp> 8182 * This is used to notify that a .sug file may be available and at the 8183 * same time allows for checking that a .sug file that is found matches 8184 * with this .spl file. That's because the word numbers must be exactly 8185 * right. */ 8186 if (!spin->si_nosugfile 8187 && (spin->si_sal.ga_len > 0 8188 || (spin->si_sofofr != NULL && spin->si_sofoto != NULL))) 8189 { 8190 putc(SN_SUGFILE, fd); /* <sectionID> */ 8191 putc(0, fd); /* <sectionflags> */ 8192 put_bytes(fd, (long_u)8, 4); /* <sectionlen> */ 8193 8194 /* Set si_sugtime and write it to the file. */ 8195 spin->si_sugtime = time(NULL); 8196 put_time(fd, spin->si_sugtime); /* <timestamp> */ 8197 } 8198 8199 /* SN_NOSPLITSUGS: nothing 8200 * This is used to notify that no suggestions with word splits are to be 8201 * made. */ 8202 if (spin->si_nosplitsugs) 8203 { 8204 putc(SN_NOSPLITSUGS, fd); /* <sectionID> */ 8205 putc(0, fd); /* <sectionflags> */ 8206 put_bytes(fd, (long_u)0, 4); /* <sectionlen> */ 8207 } 8208 8209 /* SN_NOCOMPUNDSUGS: nothing 8210 * This is used to notify that no suggestions with compounds are to be 8211 * made. */ 8212 if (spin->si_nocompoundsugs) 8213 { 8214 putc(SN_NOCOMPOUNDSUGS, fd); /* <sectionID> */ 8215 putc(0, fd); /* <sectionflags> */ 8216 put_bytes(fd, (long_u)0, 4); /* <sectionlen> */ 8217 } 8218 8219 /* SN_COMPOUND: compound info. 8220 * We don't mark it required, when not supported all compound words will 8221 * be bad words. */ 8222 if (spin->si_compflags != NULL) 8223 { 8224 putc(SN_COMPOUND, fd); /* <sectionID> */ 8225 putc(0, fd); /* <sectionflags> */ 8226 8227 l = (int)STRLEN(spin->si_compflags); 8228 for (i = 0; i < spin->si_comppat.ga_len; ++i) 8229 l += (int)STRLEN(((char_u **)(spin->si_comppat.ga_data))[i]) + 1; 8230 put_bytes(fd, (long_u)(l + 7), 4); /* <sectionlen> */ 8231 8232 putc(spin->si_compmax, fd); /* <compmax> */ 8233 putc(spin->si_compminlen, fd); /* <compminlen> */ 8234 putc(spin->si_compsylmax, fd); /* <compsylmax> */ 8235 putc(0, fd); /* for Vim 7.0b compatibility */ 8236 putc(spin->si_compoptions, fd); /* <compoptions> */ 8237 put_bytes(fd, (long_u)spin->si_comppat.ga_len, 2); 8238 /* <comppatcount> */ 8239 for (i = 0; i < spin->si_comppat.ga_len; ++i) 8240 { 8241 p = ((char_u **)(spin->si_comppat.ga_data))[i]; 8242 putc((int)STRLEN(p), fd); /* <comppatlen> */ 8243 fwv &= fwrite(p, (size_t)STRLEN(p), (size_t)1, fd); 8244 /* <comppattext> */ 8245 } 8246 /* <compflags> */ 8247 fwv &= fwrite(spin->si_compflags, (size_t)STRLEN(spin->si_compflags), 8248 (size_t)1, fd); 8249 } 8250 8251 /* SN_NOBREAK: NOBREAK flag */ 8252 if (spin->si_nobreak) 8253 { 8254 putc(SN_NOBREAK, fd); /* <sectionID> */ 8255 putc(0, fd); /* <sectionflags> */ 8256 8257 /* It's empty, the presence of the section flags the feature. */ 8258 put_bytes(fd, (long_u)0, 4); /* <sectionlen> */ 8259 } 8260 8261 /* SN_SYLLABLE: syllable info. 8262 * We don't mark it required, when not supported syllables will not be 8263 * counted. */ 8264 if (spin->si_syllable != NULL) 8265 { 8266 putc(SN_SYLLABLE, fd); /* <sectionID> */ 8267 putc(0, fd); /* <sectionflags> */ 8268 8269 l = (int)STRLEN(spin->si_syllable); 8270 put_bytes(fd, (long_u)l, 4); /* <sectionlen> */ 8271 fwv &= fwrite(spin->si_syllable, (size_t)l, (size_t)1, fd); 8272 /* <syllable> */ 8273 } 8274 8275 /* end of <SECTIONS> */ 8276 putc(SN_END, fd); /* <sectionend> */ 8277 8278 8279 /* 8280 * <LWORDTREE> <KWORDTREE> <PREFIXTREE> 8281 */ 8282 spin->si_memtot = 0; 8283 for (round = 1; round <= 3; ++round) 8284 { 8285 if (round == 1) 8286 tree = spin->si_foldroot->wn_sibling; 8287 else if (round == 2) 8288 tree = spin->si_keeproot->wn_sibling; 8289 else 8290 tree = spin->si_prefroot->wn_sibling; 8291 8292 /* Clear the index and wnode fields in the tree. */ 8293 clear_node(tree); 8294 8295 /* Count the number of nodes. Needed to be able to allocate the 8296 * memory when reading the nodes. Also fills in index for shared 8297 * nodes. */ 8298 nodecount = put_node(NULL, tree, 0, regionmask, round == 3); 8299 8300 /* number of nodes in 4 bytes */ 8301 put_bytes(fd, (long_u)nodecount, 4); /* <nodecount> */ 8302 spin->si_memtot += nodecount + nodecount * sizeof(int); 8303 8304 /* Write the nodes. */ 8305 (void)put_node(fd, tree, 0, regionmask, round == 3); 8306 } 8307 8308 /* Write another byte to check for errors (file system full). */ 8309 if (putc(0, fd) == EOF) 8310 retval = FAIL; 8311 theend: 8312 if (fclose(fd) == EOF) 8313 retval = FAIL; 8314 8315 if (fwv != (size_t)1) 8316 retval = FAIL; 8317 if (retval == FAIL) 8318 EMSG(_(e_write)); 8319 8320 return retval; 8321 } 8322 8323 /* 8324 * Clear the index and wnode fields of "node", it siblings and its 8325 * children. This is needed because they are a union with other items to save 8326 * space. 8327 */ 8328 static void 8329 clear_node(wordnode_T *node) 8330 { 8331 wordnode_T *np; 8332 8333 if (node != NULL) 8334 for (np = node; np != NULL; np = np->wn_sibling) 8335 { 8336 np->wn_u1.index = 0; 8337 np->wn_u2.wnode = NULL; 8338 8339 if (np->wn_byte != NUL) 8340 clear_node(np->wn_child); 8341 } 8342 } 8343 8344 8345 /* 8346 * Dump a word tree at node "node". 8347 * 8348 * This first writes the list of possible bytes (siblings). Then for each 8349 * byte recursively write the children. 8350 * 8351 * NOTE: The code here must match the code in read_tree_node(), since 8352 * assumptions are made about the indexes (so that we don't have to write them 8353 * in the file). 8354 * 8355 * Returns the number of nodes used. 8356 */ 8357 static int 8358 put_node( 8359 FILE *fd, /* NULL when only counting */ 8360 wordnode_T *node, 8361 int idx, 8362 int regionmask, 8363 int prefixtree) /* TRUE for PREFIXTREE */ 8364 { 8365 int newindex = idx; 8366 int siblingcount = 0; 8367 wordnode_T *np; 8368 int flags; 8369 8370 /* If "node" is zero the tree is empty. */ 8371 if (node == NULL) 8372 return 0; 8373 8374 /* Store the index where this node is written. */ 8375 node->wn_u1.index = idx; 8376 8377 /* Count the number of siblings. */ 8378 for (np = node; np != NULL; np = np->wn_sibling) 8379 ++siblingcount; 8380 8381 /* Write the sibling count. */ 8382 if (fd != NULL) 8383 putc(siblingcount, fd); /* <siblingcount> */ 8384 8385 /* Write each sibling byte and optionally extra info. */ 8386 for (np = node; np != NULL; np = np->wn_sibling) 8387 { 8388 if (np->wn_byte == 0) 8389 { 8390 if (fd != NULL) 8391 { 8392 /* For a NUL byte (end of word) write the flags etc. */ 8393 if (prefixtree) 8394 { 8395 /* In PREFIXTREE write the required affixID and the 8396 * associated condition nr (stored in wn_region). The 8397 * byte value is misused to store the "rare" and "not 8398 * combining" flags */ 8399 if (np->wn_flags == (short_u)PFX_FLAGS) 8400 putc(BY_NOFLAGS, fd); /* <byte> */ 8401 else 8402 { 8403 putc(BY_FLAGS, fd); /* <byte> */ 8404 putc(np->wn_flags, fd); /* <pflags> */ 8405 } 8406 putc(np->wn_affixID, fd); /* <affixID> */ 8407 put_bytes(fd, (long_u)np->wn_region, 2); /* <prefcondnr> */ 8408 } 8409 else 8410 { 8411 /* For word trees we write the flag/region items. */ 8412 flags = np->wn_flags; 8413 if (regionmask != 0 && np->wn_region != regionmask) 8414 flags |= WF_REGION; 8415 if (np->wn_affixID != 0) 8416 flags |= WF_AFX; 8417 if (flags == 0) 8418 { 8419 /* word without flags or region */ 8420 putc(BY_NOFLAGS, fd); /* <byte> */ 8421 } 8422 else 8423 { 8424 if (np->wn_flags >= 0x100) 8425 { 8426 putc(BY_FLAGS2, fd); /* <byte> */ 8427 putc(flags, fd); /* <flags> */ 8428 putc((unsigned)flags >> 8, fd); /* <flags2> */ 8429 } 8430 else 8431 { 8432 putc(BY_FLAGS, fd); /* <byte> */ 8433 putc(flags, fd); /* <flags> */ 8434 } 8435 if (flags & WF_REGION) 8436 putc(np->wn_region, fd); /* <region> */ 8437 if (flags & WF_AFX) 8438 putc(np->wn_affixID, fd); /* <affixID> */ 8439 } 8440 } 8441 } 8442 } 8443 else 8444 { 8445 if (np->wn_child->wn_u1.index != 0 8446 && np->wn_child->wn_u2.wnode != node) 8447 { 8448 /* The child is written elsewhere, write the reference. */ 8449 if (fd != NULL) 8450 { 8451 putc(BY_INDEX, fd); /* <byte> */ 8452 /* <nodeidx> */ 8453 put_bytes(fd, (long_u)np->wn_child->wn_u1.index, 3); 8454 } 8455 } 8456 else if (np->wn_child->wn_u2.wnode == NULL) 8457 /* We will write the child below and give it an index. */ 8458 np->wn_child->wn_u2.wnode = node; 8459 8460 if (fd != NULL) 8461 if (putc(np->wn_byte, fd) == EOF) /* <byte> or <xbyte> */ 8462 { 8463 EMSG(_(e_write)); 8464 return 0; 8465 } 8466 } 8467 } 8468 8469 /* Space used in the array when reading: one for each sibling and one for 8470 * the count. */ 8471 newindex += siblingcount + 1; 8472 8473 /* Recursively dump the children of each sibling. */ 8474 for (np = node; np != NULL; np = np->wn_sibling) 8475 if (np->wn_byte != 0 && np->wn_child->wn_u2.wnode == node) 8476 newindex = put_node(fd, np->wn_child, newindex, regionmask, 8477 prefixtree); 8478 8479 return newindex; 8480 } 8481 8482 8483 /* 8484 * ":mkspell [-ascii] outfile infile ..." 8485 * ":mkspell [-ascii] addfile" 8486 */ 8487 void 8488 ex_mkspell(exarg_T *eap) 8489 { 8490 int fcount; 8491 char_u **fnames; 8492 char_u *arg = eap->arg; 8493 int ascii = FALSE; 8494 8495 if (STRNCMP(arg, "-ascii", 6) == 0) 8496 { 8497 ascii = TRUE; 8498 arg = skipwhite(arg + 6); 8499 } 8500 8501 /* Expand all the remaining arguments (e.g., $VIMRUNTIME). */ 8502 if (get_arglist_exp(arg, &fcount, &fnames, FALSE) == OK) 8503 { 8504 mkspell(fcount, fnames, ascii, eap->forceit, FALSE); 8505 FreeWild(fcount, fnames); 8506 } 8507 } 8508 8509 /* 8510 * Create the .sug file. 8511 * Uses the soundfold info in "spin". 8512 * Writes the file with the name "wfname", with ".spl" changed to ".sug". 8513 */ 8514 static void 8515 spell_make_sugfile(spellinfo_T *spin, char_u *wfname) 8516 { 8517 char_u *fname = NULL; 8518 int len; 8519 slang_T *slang; 8520 int free_slang = FALSE; 8521 8522 /* 8523 * Read back the .spl file that was written. This fills the required 8524 * info for soundfolding. This also uses less memory than the 8525 * pointer-linked version of the trie. And it avoids having two versions 8526 * of the code for the soundfolding stuff. 8527 * It might have been done already by spell_reload_one(). 8528 */ 8529 for (slang = first_lang; slang != NULL; slang = slang->sl_next) 8530 if (fullpathcmp(wfname, slang->sl_fname, FALSE) == FPC_SAME) 8531 break; 8532 if (slang == NULL) 8533 { 8534 spell_message(spin, (char_u *)_("Reading back spell file...")); 8535 slang = spell_load_file(wfname, NULL, NULL, FALSE); 8536 if (slang == NULL) 8537 return; 8538 free_slang = TRUE; 8539 } 8540 8541 /* 8542 * Clear the info in "spin" that is used. 8543 */ 8544 spin->si_blocks = NULL; 8545 spin->si_blocks_cnt = 0; 8546 spin->si_compress_cnt = 0; /* will stay at 0 all the time*/ 8547 spin->si_free_count = 0; 8548 spin->si_first_free = NULL; 8549 spin->si_foldwcount = 0; 8550 8551 /* 8552 * Go through the trie of good words, soundfold each word and add it to 8553 * the soundfold trie. 8554 */ 8555 spell_message(spin, (char_u *)_("Performing soundfolding...")); 8556 if (sug_filltree(spin, slang) == FAIL) 8557 goto theend; 8558 8559 /* 8560 * Create the table which links each soundfold word with a list of the 8561 * good words it may come from. Creates buffer "spin->si_spellbuf". 8562 * This also removes the wordnr from the NUL byte entries to make 8563 * compression possible. 8564 */ 8565 if (sug_maketable(spin) == FAIL) 8566 goto theend; 8567 8568 smsg((char_u *)_("Number of words after soundfolding: %ld"), 8569 (long)spin->si_spellbuf->b_ml.ml_line_count); 8570 8571 /* 8572 * Compress the soundfold trie. 8573 */ 8574 spell_message(spin, (char_u *)_(msg_compressing)); 8575 wordtree_compress(spin, spin->si_foldroot); 8576 8577 /* 8578 * Write the .sug file. 8579 * Make the file name by changing ".spl" to ".sug". 8580 */ 8581 fname = alloc(MAXPATHL); 8582 if (fname == NULL) 8583 goto theend; 8584 vim_strncpy(fname, wfname, MAXPATHL - 1); 8585 len = (int)STRLEN(fname); 8586 fname[len - 2] = 'u'; 8587 fname[len - 1] = 'g'; 8588 sug_write(spin, fname); 8589 8590 theend: 8591 vim_free(fname); 8592 if (free_slang) 8593 slang_free(slang); 8594 free_blocks(spin->si_blocks); 8595 close_spellbuf(spin->si_spellbuf); 8596 } 8597 8598 /* 8599 * Build the soundfold trie for language "slang". 8600 */ 8601 static int 8602 sug_filltree(spellinfo_T *spin, slang_T *slang) 8603 { 8604 char_u *byts; 8605 idx_T *idxs; 8606 int depth; 8607 idx_T arridx[MAXWLEN]; 8608 int curi[MAXWLEN]; 8609 char_u tword[MAXWLEN]; 8610 char_u tsalword[MAXWLEN]; 8611 int c; 8612 idx_T n; 8613 unsigned words_done = 0; 8614 int wordcount[MAXWLEN]; 8615 8616 /* We use si_foldroot for the soundfolded trie. */ 8617 spin->si_foldroot = wordtree_alloc(spin); 8618 if (spin->si_foldroot == NULL) 8619 return FAIL; 8620 8621 /* let tree_add_word() know we're adding to the soundfolded tree */ 8622 spin->si_sugtree = TRUE; 8623 8624 /* 8625 * Go through the whole case-folded tree, soundfold each word and put it 8626 * in the trie. 8627 */ 8628 byts = slang->sl_fbyts; 8629 idxs = slang->sl_fidxs; 8630 8631 arridx[0] = 0; 8632 curi[0] = 1; 8633 wordcount[0] = 0; 8634 8635 depth = 0; 8636 while (depth >= 0 && !got_int) 8637 { 8638 if (curi[depth] > byts[arridx[depth]]) 8639 { 8640 /* Done all bytes at this node, go up one level. */ 8641 idxs[arridx[depth]] = wordcount[depth]; 8642 if (depth > 0) 8643 wordcount[depth - 1] += wordcount[depth]; 8644 8645 --depth; 8646 line_breakcheck(); 8647 } 8648 else 8649 { 8650 8651 /* Do one more byte at this node. */ 8652 n = arridx[depth] + curi[depth]; 8653 ++curi[depth]; 8654 8655 c = byts[n]; 8656 if (c == 0) 8657 { 8658 /* Sound-fold the word. */ 8659 tword[depth] = NUL; 8660 spell_soundfold(slang, tword, TRUE, tsalword); 8661 8662 /* We use the "flags" field for the MSB of the wordnr, 8663 * "region" for the LSB of the wordnr. */ 8664 if (tree_add_word(spin, tsalword, spin->si_foldroot, 8665 words_done >> 16, words_done & 0xffff, 8666 0) == FAIL) 8667 return FAIL; 8668 8669 ++words_done; 8670 ++wordcount[depth]; 8671 8672 /* Reset the block count each time to avoid compression 8673 * kicking in. */ 8674 spin->si_blocks_cnt = 0; 8675 8676 /* Skip over any other NUL bytes (same word with different 8677 * flags). */ 8678 while (byts[n + 1] == 0) 8679 { 8680 ++n; 8681 ++curi[depth]; 8682 } 8683 } 8684 else 8685 { 8686 /* Normal char, go one level deeper. */ 8687 tword[depth++] = c; 8688 arridx[depth] = idxs[n]; 8689 curi[depth] = 1; 8690 wordcount[depth] = 0; 8691 } 8692 } 8693 } 8694 8695 smsg((char_u *)_("Total number of words: %d"), words_done); 8696 8697 return OK; 8698 } 8699 8700 /* 8701 * Make the table that links each word in the soundfold trie to the words it 8702 * can be produced from. 8703 * This is not unlike lines in a file, thus use a memfile to be able to access 8704 * the table efficiently. 8705 * Returns FAIL when out of memory. 8706 */ 8707 static int 8708 sug_maketable(spellinfo_T *spin) 8709 { 8710 garray_T ga; 8711 int res = OK; 8712 8713 /* Allocate a buffer, open a memline for it and create the swap file 8714 * (uses a temp file, not a .swp file). */ 8715 spin->si_spellbuf = open_spellbuf(); 8716 if (spin->si_spellbuf == NULL) 8717 return FAIL; 8718 8719 /* Use a buffer to store the line info, avoids allocating many small 8720 * pieces of memory. */ 8721 ga_init2(&ga, 1, 100); 8722 8723 /* recursively go through the tree */ 8724 if (sug_filltable(spin, spin->si_foldroot->wn_sibling, 0, &ga) == -1) 8725 res = FAIL; 8726 8727 ga_clear(&ga); 8728 return res; 8729 } 8730 8731 /* 8732 * Fill the table for one node and its children. 8733 * Returns the wordnr at the start of the node. 8734 * Returns -1 when out of memory. 8735 */ 8736 static int 8737 sug_filltable( 8738 spellinfo_T *spin, 8739 wordnode_T *node, 8740 int startwordnr, 8741 garray_T *gap) /* place to store line of numbers */ 8742 { 8743 wordnode_T *p, *np; 8744 int wordnr = startwordnr; 8745 int nr; 8746 int prev_nr; 8747 8748 for (p = node; p != NULL; p = p->wn_sibling) 8749 { 8750 if (p->wn_byte == NUL) 8751 { 8752 gap->ga_len = 0; 8753 prev_nr = 0; 8754 for (np = p; np != NULL && np->wn_byte == NUL; np = np->wn_sibling) 8755 { 8756 if (ga_grow(gap, 10) == FAIL) 8757 return -1; 8758 8759 nr = (np->wn_flags << 16) + (np->wn_region & 0xffff); 8760 /* Compute the offset from the previous nr and store the 8761 * offset in a way that it takes a minimum number of bytes. 8762 * It's a bit like utf-8, but without the need to mark 8763 * following bytes. */ 8764 nr -= prev_nr; 8765 prev_nr += nr; 8766 gap->ga_len += offset2bytes(nr, 8767 (char_u *)gap->ga_data + gap->ga_len); 8768 } 8769 8770 /* add the NUL byte */ 8771 ((char_u *)gap->ga_data)[gap->ga_len++] = NUL; 8772 8773 if (ml_append_buf(spin->si_spellbuf, (linenr_T)wordnr, 8774 gap->ga_data, gap->ga_len, TRUE) == FAIL) 8775 return -1; 8776 ++wordnr; 8777 8778 /* Remove extra NUL entries, we no longer need them. We don't 8779 * bother freeing the nodes, the won't be reused anyway. */ 8780 while (p->wn_sibling != NULL && p->wn_sibling->wn_byte == NUL) 8781 p->wn_sibling = p->wn_sibling->wn_sibling; 8782 8783 /* Clear the flags on the remaining NUL node, so that compression 8784 * works a lot better. */ 8785 p->wn_flags = 0; 8786 p->wn_region = 0; 8787 } 8788 else 8789 { 8790 wordnr = sug_filltable(spin, p->wn_child, wordnr, gap); 8791 if (wordnr == -1) 8792 return -1; 8793 } 8794 } 8795 return wordnr; 8796 } 8797 8798 /* 8799 * Convert an offset into a minimal number of bytes. 8800 * Similar to utf_char2byters, but use 8 bits in followup bytes and avoid NUL 8801 * bytes. 8802 */ 8803 static int 8804 offset2bytes(int nr, char_u *buf) 8805 { 8806 int rem; 8807 int b1, b2, b3, b4; 8808 8809 /* Split the number in parts of base 255. We need to avoid NUL bytes. */ 8810 b1 = nr % 255 + 1; 8811 rem = nr / 255; 8812 b2 = rem % 255 + 1; 8813 rem = rem / 255; 8814 b3 = rem % 255 + 1; 8815 b4 = rem / 255 + 1; 8816 8817 if (b4 > 1 || b3 > 0x1f) /* 4 bytes */ 8818 { 8819 buf[0] = 0xe0 + b4; 8820 buf[1] = b3; 8821 buf[2] = b2; 8822 buf[3] = b1; 8823 return 4; 8824 } 8825 if (b3 > 1 || b2 > 0x3f ) /* 3 bytes */ 8826 { 8827 buf[0] = 0xc0 + b3; 8828 buf[1] = b2; 8829 buf[2] = b1; 8830 return 3; 8831 } 8832 if (b2 > 1 || b1 > 0x7f ) /* 2 bytes */ 8833 { 8834 buf[0] = 0x80 + b2; 8835 buf[1] = b1; 8836 return 2; 8837 } 8838 /* 1 byte */ 8839 buf[0] = b1; 8840 return 1; 8841 } 8842 8843 /* 8844 * Opposite of offset2bytes(). 8845 * "pp" points to the bytes and is advanced over it. 8846 * Returns the offset. 8847 */ 8848 static int 8849 bytes2offset(char_u **pp) 8850 { 8851 char_u *p = *pp; 8852 int nr; 8853 int c; 8854 8855 c = *p++; 8856 if ((c & 0x80) == 0x00) /* 1 byte */ 8857 { 8858 nr = c - 1; 8859 } 8860 else if ((c & 0xc0) == 0x80) /* 2 bytes */ 8861 { 8862 nr = (c & 0x3f) - 1; 8863 nr = nr * 255 + (*p++ - 1); 8864 } 8865 else if ((c & 0xe0) == 0xc0) /* 3 bytes */ 8866 { 8867 nr = (c & 0x1f) - 1; 8868 nr = nr * 255 + (*p++ - 1); 8869 nr = nr * 255 + (*p++ - 1); 8870 } 8871 else /* 4 bytes */ 8872 { 8873 nr = (c & 0x0f) - 1; 8874 nr = nr * 255 + (*p++ - 1); 8875 nr = nr * 255 + (*p++ - 1); 8876 nr = nr * 255 + (*p++ - 1); 8877 } 8878 8879 *pp = p; 8880 return nr; 8881 } 8882 8883 /* 8884 * Write the .sug file in "fname". 8885 */ 8886 static void 8887 sug_write(spellinfo_T *spin, char_u *fname) 8888 { 8889 FILE *fd; 8890 wordnode_T *tree; 8891 int nodecount; 8892 int wcount; 8893 char_u *line; 8894 linenr_T lnum; 8895 int len; 8896 8897 /* Create the file. Note that an existing file is silently overwritten! */ 8898 fd = mch_fopen((char *)fname, "w"); 8899 if (fd == NULL) 8900 { 8901 EMSG2(_(e_notopen), fname); 8902 return; 8903 } 8904 8905 vim_snprintf((char *)IObuff, IOSIZE, 8906 _("Writing suggestion file %s ..."), fname); 8907 spell_message(spin, IObuff); 8908 8909 /* 8910 * <SUGHEADER>: <fileID> <versionnr> <timestamp> 8911 */ 8912 if (fwrite(VIMSUGMAGIC, VIMSUGMAGICL, (size_t)1, fd) != 1) /* <fileID> */ 8913 { 8914 EMSG(_(e_write)); 8915 goto theend; 8916 } 8917 putc(VIMSUGVERSION, fd); /* <versionnr> */ 8918 8919 /* Write si_sugtime to the file. */ 8920 put_time(fd, spin->si_sugtime); /* <timestamp> */ 8921 8922 /* 8923 * <SUGWORDTREE> 8924 */ 8925 spin->si_memtot = 0; 8926 tree = spin->si_foldroot->wn_sibling; 8927 8928 /* Clear the index and wnode fields in the tree. */ 8929 clear_node(tree); 8930 8931 /* Count the number of nodes. Needed to be able to allocate the 8932 * memory when reading the nodes. Also fills in index for shared 8933 * nodes. */ 8934 nodecount = put_node(NULL, tree, 0, 0, FALSE); 8935 8936 /* number of nodes in 4 bytes */ 8937 put_bytes(fd, (long_u)nodecount, 4); /* <nodecount> */ 8938 spin->si_memtot += nodecount + nodecount * sizeof(int); 8939 8940 /* Write the nodes. */ 8941 (void)put_node(fd, tree, 0, 0, FALSE); 8942 8943 /* 8944 * <SUGTABLE>: <sugwcount> <sugline> ... 8945 */ 8946 wcount = spin->si_spellbuf->b_ml.ml_line_count; 8947 put_bytes(fd, (long_u)wcount, 4); /* <sugwcount> */ 8948 8949 for (lnum = 1; lnum <= (linenr_T)wcount; ++lnum) 8950 { 8951 /* <sugline>: <sugnr> ... NUL */ 8952 line = ml_get_buf(spin->si_spellbuf, lnum, FALSE); 8953 len = (int)STRLEN(line) + 1; 8954 if (fwrite(line, (size_t)len, (size_t)1, fd) == 0) 8955 { 8956 EMSG(_(e_write)); 8957 goto theend; 8958 } 8959 spin->si_memtot += len; 8960 } 8961 8962 /* Write another byte to check for errors. */ 8963 if (putc(0, fd) == EOF) 8964 EMSG(_(e_write)); 8965 8966 vim_snprintf((char *)IObuff, IOSIZE, 8967 _("Estimated runtime memory use: %d bytes"), spin->si_memtot); 8968 spell_message(spin, IObuff); 8969 8970 theend: 8971 /* close the file */ 8972 fclose(fd); 8973 } 8974 8975 /* 8976 * Open a spell buffer. This is a nameless buffer that is not in the buffer 8977 * list and only contains text lines. Can use a swapfile to reduce memory 8978 * use. 8979 * Most other fields are invalid! Esp. watch out for string options being 8980 * NULL and there is no undo info. 8981 * Returns NULL when out of memory. 8982 */ 8983 static buf_T * 8984 open_spellbuf(void) 8985 { 8986 buf_T *buf; 8987 8988 buf = (buf_T *)alloc_clear(sizeof(buf_T)); 8989 if (buf != NULL) 8990 { 8991 buf->b_spell = TRUE; 8992 buf->b_p_swf = TRUE; /* may create a swap file */ 8993 #ifdef FEAT_CRYPT 8994 buf->b_p_key = empty_option; 8995 #endif 8996 ml_open(buf); 8997 ml_open_file(buf); /* create swap file now */ 8998 } 8999 return buf; 9000 } 9001 9002 /* 9003 * Close the buffer used for spell info. 9004 */ 9005 static void 9006 close_spellbuf(buf_T *buf) 9007 { 9008 if (buf != NULL) 9009 { 9010 ml_close(buf, TRUE); 9011 vim_free(buf); 9012 } 9013 } 9014 9015 9016 /* 9017 * Create a Vim spell file from one or more word lists. 9018 * "fnames[0]" is the output file name. 9019 * "fnames[fcount - 1]" is the last input file name. 9020 * Exception: when "fnames[0]" ends in ".add" it's used as the input file name 9021 * and ".spl" is appended to make the output file name. 9022 */ 9023 static void 9024 mkspell( 9025 int fcount, 9026 char_u **fnames, 9027 int ascii, /* -ascii argument given */ 9028 int over_write, /* overwrite existing output file */ 9029 int added_word) /* invoked through "zg" */ 9030 { 9031 char_u *fname = NULL; 9032 char_u *wfname; 9033 char_u **innames; 9034 int incount; 9035 afffile_T *(afile[8]); 9036 int i; 9037 int len; 9038 struct stat st; 9039 int error = FALSE; 9040 spellinfo_T spin; 9041 9042 vim_memset(&spin, 0, sizeof(spin)); 9043 spin.si_verbose = !added_word; 9044 spin.si_ascii = ascii; 9045 spin.si_followup = TRUE; 9046 spin.si_rem_accents = TRUE; 9047 ga_init2(&spin.si_rep, (int)sizeof(fromto_T), 20); 9048 ga_init2(&spin.si_repsal, (int)sizeof(fromto_T), 20); 9049 ga_init2(&spin.si_sal, (int)sizeof(fromto_T), 20); 9050 ga_init2(&spin.si_map, (int)sizeof(char_u), 100); 9051 ga_init2(&spin.si_comppat, (int)sizeof(char_u *), 20); 9052 ga_init2(&spin.si_prefcond, (int)sizeof(char_u *), 50); 9053 hash_init(&spin.si_commonwords); 9054 spin.si_newcompID = 127; /* start compound ID at first maximum */ 9055 9056 /* default: fnames[0] is output file, following are input files */ 9057 innames = &fnames[1]; 9058 incount = fcount - 1; 9059 9060 wfname = alloc(MAXPATHL); 9061 if (wfname == NULL) 9062 return; 9063 9064 if (fcount >= 1) 9065 { 9066 len = (int)STRLEN(fnames[0]); 9067 if (fcount == 1 && len > 4 && STRCMP(fnames[0] + len - 4, ".add") == 0) 9068 { 9069 /* For ":mkspell path/en.latin1.add" output file is 9070 * "path/en.latin1.add.spl". */ 9071 innames = &fnames[0]; 9072 incount = 1; 9073 vim_snprintf((char *)wfname, MAXPATHL, "%s.spl", fnames[0]); 9074 } 9075 else if (fcount == 1) 9076 { 9077 /* For ":mkspell path/vim" output file is "path/vim.latin1.spl". */ 9078 innames = &fnames[0]; 9079 incount = 1; 9080 vim_snprintf((char *)wfname, MAXPATHL, SPL_FNAME_TMPL, 9081 fnames[0], spin.si_ascii ? (char_u *)"ascii" : spell_enc()); 9082 } 9083 else if (len > 4 && STRCMP(fnames[0] + len - 4, ".spl") == 0) 9084 { 9085 /* Name ends in ".spl", use as the file name. */ 9086 vim_strncpy(wfname, fnames[0], MAXPATHL - 1); 9087 } 9088 else 9089 /* Name should be language, make the file name from it. */ 9090 vim_snprintf((char *)wfname, MAXPATHL, SPL_FNAME_TMPL, 9091 fnames[0], spin.si_ascii ? (char_u *)"ascii" : spell_enc()); 9092 9093 /* Check for .ascii.spl. */ 9094 if (strstr((char *)gettail(wfname), SPL_FNAME_ASCII) != NULL) 9095 spin.si_ascii = TRUE; 9096 9097 /* Check for .add.spl. */ 9098 if (strstr((char *)gettail(wfname), SPL_FNAME_ADD) != NULL) 9099 spin.si_add = TRUE; 9100 } 9101 9102 if (incount <= 0) 9103 EMSG(_(e_invarg)); /* need at least output and input names */ 9104 else if (vim_strchr(gettail(wfname), '_') != NULL) 9105 EMSG(_("E751: Output file name must not have region name")); 9106 else if (incount > 8) 9107 EMSG(_("E754: Only up to 8 regions supported")); 9108 else 9109 { 9110 /* Check for overwriting before doing things that may take a lot of 9111 * time. */ 9112 if (!over_write && mch_stat((char *)wfname, &st) >= 0) 9113 { 9114 EMSG(_(e_exists)); 9115 goto theend; 9116 } 9117 if (mch_isdir(wfname)) 9118 { 9119 EMSG2(_(e_isadir2), wfname); 9120 goto theend; 9121 } 9122 9123 fname = alloc(MAXPATHL); 9124 if (fname == NULL) 9125 goto theend; 9126 9127 /* 9128 * Init the aff and dic pointers. 9129 * Get the region names if there are more than 2 arguments. 9130 */ 9131 for (i = 0; i < incount; ++i) 9132 { 9133 afile[i] = NULL; 9134 9135 if (incount > 1) 9136 { 9137 len = (int)STRLEN(innames[i]); 9138 if (STRLEN(gettail(innames[i])) < 5 9139 || innames[i][len - 3] != '_') 9140 { 9141 EMSG2(_("E755: Invalid region in %s"), innames[i]); 9142 goto theend; 9143 } 9144 spin.si_region_name[i * 2] = TOLOWER_ASC(innames[i][len - 2]); 9145 spin.si_region_name[i * 2 + 1] = 9146 TOLOWER_ASC(innames[i][len - 1]); 9147 } 9148 } 9149 spin.si_region_count = incount; 9150 9151 spin.si_foldroot = wordtree_alloc(&spin); 9152 spin.si_keeproot = wordtree_alloc(&spin); 9153 spin.si_prefroot = wordtree_alloc(&spin); 9154 if (spin.si_foldroot == NULL 9155 || spin.si_keeproot == NULL 9156 || spin.si_prefroot == NULL) 9157 { 9158 free_blocks(spin.si_blocks); 9159 goto theend; 9160 } 9161 9162 /* When not producing a .add.spl file clear the character table when 9163 * we encounter one in the .aff file. This means we dump the current 9164 * one in the .spl file if the .aff file doesn't define one. That's 9165 * better than guessing the contents, the table will match a 9166 * previously loaded spell file. */ 9167 if (!spin.si_add) 9168 spin.si_clear_chartab = TRUE; 9169 9170 /* 9171 * Read all the .aff and .dic files. 9172 * Text is converted to 'encoding'. 9173 * Words are stored in the case-folded and keep-case trees. 9174 */ 9175 for (i = 0; i < incount && !error; ++i) 9176 { 9177 spin.si_conv.vc_type = CONV_NONE; 9178 spin.si_region = 1 << i; 9179 9180 vim_snprintf((char *)fname, MAXPATHL, "%s.aff", innames[i]); 9181 if (mch_stat((char *)fname, &st) >= 0) 9182 { 9183 /* Read the .aff file. Will init "spin->si_conv" based on the 9184 * "SET" line. */ 9185 afile[i] = spell_read_aff(&spin, fname); 9186 if (afile[i] == NULL) 9187 error = TRUE; 9188 else 9189 { 9190 /* Read the .dic file and store the words in the trees. */ 9191 vim_snprintf((char *)fname, MAXPATHL, "%s.dic", 9192 innames[i]); 9193 if (spell_read_dic(&spin, fname, afile[i]) == FAIL) 9194 error = TRUE; 9195 } 9196 } 9197 else 9198 { 9199 /* No .aff file, try reading the file as a word list. Store 9200 * the words in the trees. */ 9201 if (spell_read_wordfile(&spin, innames[i]) == FAIL) 9202 error = TRUE; 9203 } 9204 9205 #ifdef FEAT_MBYTE 9206 /* Free any conversion stuff. */ 9207 convert_setup(&spin.si_conv, NULL, NULL); 9208 #endif 9209 } 9210 9211 if (spin.si_compflags != NULL && spin.si_nobreak) 9212 MSG(_("Warning: both compounding and NOBREAK specified")); 9213 9214 if (!error && !got_int) 9215 { 9216 /* 9217 * Combine tails in the tree. 9218 */ 9219 spell_message(&spin, (char_u *)_(msg_compressing)); 9220 wordtree_compress(&spin, spin.si_foldroot); 9221 wordtree_compress(&spin, spin.si_keeproot); 9222 wordtree_compress(&spin, spin.si_prefroot); 9223 } 9224 9225 if (!error && !got_int) 9226 { 9227 /* 9228 * Write the info in the spell file. 9229 */ 9230 vim_snprintf((char *)IObuff, IOSIZE, 9231 _("Writing spell file %s ..."), wfname); 9232 spell_message(&spin, IObuff); 9233 9234 error = write_vim_spell(&spin, wfname) == FAIL; 9235 9236 spell_message(&spin, (char_u *)_("Done!")); 9237 vim_snprintf((char *)IObuff, IOSIZE, 9238 _("Estimated runtime memory use: %d bytes"), spin.si_memtot); 9239 spell_message(&spin, IObuff); 9240 9241 /* 9242 * If the file is loaded need to reload it. 9243 */ 9244 if (!error) 9245 spell_reload_one(wfname, added_word); 9246 } 9247 9248 /* Free the allocated memory. */ 9249 ga_clear(&spin.si_rep); 9250 ga_clear(&spin.si_repsal); 9251 ga_clear(&spin.si_sal); 9252 ga_clear(&spin.si_map); 9253 ga_clear(&spin.si_comppat); 9254 ga_clear(&spin.si_prefcond); 9255 hash_clear_all(&spin.si_commonwords, 0); 9256 9257 /* Free the .aff file structures. */ 9258 for (i = 0; i < incount; ++i) 9259 if (afile[i] != NULL) 9260 spell_free_aff(afile[i]); 9261 9262 /* Free all the bits and pieces at once. */ 9263 free_blocks(spin.si_blocks); 9264 9265 /* 9266 * If there is soundfolding info and no NOSUGFILE item create the 9267 * .sug file with the soundfolded word trie. 9268 */ 9269 if (spin.si_sugtime != 0 && !error && !got_int) 9270 spell_make_sugfile(&spin, wfname); 9271 9272 } 9273 9274 theend: 9275 vim_free(fname); 9276 vim_free(wfname); 9277 } 9278 9279 /* 9280 * Display a message for spell file processing when 'verbose' is set or using 9281 * ":mkspell". "str" can be IObuff. 9282 */ 9283 static void 9284 spell_message(spellinfo_T *spin, char_u *str) 9285 { 9286 if (spin->si_verbose || p_verbose > 2) 9287 { 9288 if (!spin->si_verbose) 9289 verbose_enter(); 9290 MSG(str); 9291 out_flush(); 9292 if (!spin->si_verbose) 9293 verbose_leave(); 9294 } 9295 } 9296 9297 /* 9298 * ":[count]spellgood {word}" 9299 * ":[count]spellwrong {word}" 9300 * ":[count]spellundo {word}" 9301 */ 9302 void 9303 ex_spell(exarg_T *eap) 9304 { 9305 spell_add_word(eap->arg, (int)STRLEN(eap->arg), eap->cmdidx == CMD_spellwrong, 9306 eap->forceit ? 0 : (int)eap->line2, 9307 eap->cmdidx == CMD_spellundo); 9308 } 9309 9310 /* 9311 * Add "word[len]" to 'spellfile' as a good or bad word. 9312 */ 9313 void 9314 spell_add_word( 9315 char_u *word, 9316 int len, 9317 int bad, 9318 int idx, /* "zG" and "zW": zero, otherwise index in 9319 'spellfile' */ 9320 int undo) /* TRUE for "zug", "zuG", "zuw" and "zuW" */ 9321 { 9322 FILE *fd = NULL; 9323 buf_T *buf = NULL; 9324 int new_spf = FALSE; 9325 char_u *fname; 9326 char_u *fnamebuf = NULL; 9327 char_u line[MAXWLEN * 2]; 9328 long fpos, fpos_next = 0; 9329 int i; 9330 char_u *spf; 9331 9332 if (idx == 0) /* use internal wordlist */ 9333 { 9334 if (int_wordlist == NULL) 9335 { 9336 int_wordlist = vim_tempname('s', FALSE); 9337 if (int_wordlist == NULL) 9338 return; 9339 } 9340 fname = int_wordlist; 9341 } 9342 else 9343 { 9344 /* If 'spellfile' isn't set figure out a good default value. */ 9345 if (*curwin->w_s->b_p_spf == NUL) 9346 { 9347 init_spellfile(); 9348 new_spf = TRUE; 9349 } 9350 9351 if (*curwin->w_s->b_p_spf == NUL) 9352 { 9353 EMSG2(_(e_notset), "spellfile"); 9354 return; 9355 } 9356 fnamebuf = alloc(MAXPATHL); 9357 if (fnamebuf == NULL) 9358 return; 9359 9360 for (spf = curwin->w_s->b_p_spf, i = 1; *spf != NUL; ++i) 9361 { 9362 copy_option_part(&spf, fnamebuf, MAXPATHL, ","); 9363 if (i == idx) 9364 break; 9365 if (*spf == NUL) 9366 { 9367 EMSGN(_("E765: 'spellfile' does not have %ld entries"), idx); 9368 vim_free(fnamebuf); 9369 return; 9370 } 9371 } 9372 9373 /* Check that the user isn't editing the .add file somewhere. */ 9374 buf = buflist_findname_exp(fnamebuf); 9375 if (buf != NULL && buf->b_ml.ml_mfp == NULL) 9376 buf = NULL; 9377 if (buf != NULL && bufIsChanged(buf)) 9378 { 9379 EMSG(_(e_bufloaded)); 9380 vim_free(fnamebuf); 9381 return; 9382 } 9383 9384 fname = fnamebuf; 9385 } 9386 9387 if (bad || undo) 9388 { 9389 /* When the word appears as good word we need to remove that one, 9390 * since its flags sort before the one with WF_BANNED. */ 9391 fd = mch_fopen((char *)fname, "r"); 9392 if (fd != NULL) 9393 { 9394 while (!vim_fgets(line, MAXWLEN * 2, fd)) 9395 { 9396 fpos = fpos_next; 9397 fpos_next = ftell(fd); 9398 if (STRNCMP(word, line, len) == 0 9399 && (line[len] == '/' || line[len] < ' ')) 9400 { 9401 /* Found duplicate word. Remove it by writing a '#' at 9402 * the start of the line. Mixing reading and writing 9403 * doesn't work for all systems, close the file first. */ 9404 fclose(fd); 9405 fd = mch_fopen((char *)fname, "r+"); 9406 if (fd == NULL) 9407 break; 9408 if (fseek(fd, fpos, SEEK_SET) == 0) 9409 { 9410 fputc('#', fd); 9411 if (undo) 9412 { 9413 home_replace(NULL, fname, NameBuff, MAXPATHL, TRUE); 9414 smsg((char_u *)_("Word '%.*s' removed from %s"), 9415 len, word, NameBuff); 9416 } 9417 } 9418 fseek(fd, fpos_next, SEEK_SET); 9419 } 9420 } 9421 if (fd != NULL) 9422 fclose(fd); 9423 } 9424 } 9425 9426 if (!undo) 9427 { 9428 fd = mch_fopen((char *)fname, "a"); 9429 if (fd == NULL && new_spf) 9430 { 9431 char_u *p; 9432 9433 /* We just initialized the 'spellfile' option and can't open the 9434 * file. We may need to create the "spell" directory first. We 9435 * already checked the runtime directory is writable in 9436 * init_spellfile(). */ 9437 if (!dir_of_file_exists(fname) && (p = gettail_sep(fname)) != fname) 9438 { 9439 int c = *p; 9440 9441 /* The directory doesn't exist. Try creating it and opening 9442 * the file again. */ 9443 *p = NUL; 9444 vim_mkdir(fname, 0755); 9445 *p = c; 9446 fd = mch_fopen((char *)fname, "a"); 9447 } 9448 } 9449 9450 if (fd == NULL) 9451 EMSG2(_(e_notopen), fname); 9452 else 9453 { 9454 if (bad) 9455 fprintf(fd, "%.*s/!\n", len, word); 9456 else 9457 fprintf(fd, "%.*s\n", len, word); 9458 fclose(fd); 9459 9460 home_replace(NULL, fname, NameBuff, MAXPATHL, TRUE); 9461 smsg((char_u *)_("Word '%.*s' added to %s"), len, word, NameBuff); 9462 } 9463 } 9464 9465 if (fd != NULL) 9466 { 9467 /* Update the .add.spl file. */ 9468 mkspell(1, &fname, FALSE, TRUE, TRUE); 9469 9470 /* If the .add file is edited somewhere, reload it. */ 9471 if (buf != NULL) 9472 buf_reload(buf, buf->b_orig_mode); 9473 9474 redraw_all_later(SOME_VALID); 9475 } 9476 vim_free(fnamebuf); 9477 } 9478 9479 /* 9480 * Initialize 'spellfile' for the current buffer. 9481 */ 9482 static void 9483 init_spellfile(void) 9484 { 9485 char_u *buf; 9486 int l; 9487 char_u *fname; 9488 char_u *rtp; 9489 char_u *lend; 9490 int aspath = FALSE; 9491 char_u *lstart = curbuf->b_s.b_p_spl; 9492 9493 if (*curwin->w_s->b_p_spl != NUL && curwin->w_s->b_langp.ga_len > 0) 9494 { 9495 buf = alloc(MAXPATHL); 9496 if (buf == NULL) 9497 return; 9498 9499 /* Find the end of the language name. Exclude the region. If there 9500 * is a path separator remember the start of the tail. */ 9501 for (lend = curwin->w_s->b_p_spl; *lend != NUL 9502 && vim_strchr((char_u *)",._", *lend) == NULL; ++lend) 9503 if (vim_ispathsep(*lend)) 9504 { 9505 aspath = TRUE; 9506 lstart = lend + 1; 9507 } 9508 9509 /* Loop over all entries in 'runtimepath'. Use the first one where we 9510 * are allowed to write. */ 9511 rtp = p_rtp; 9512 while (*rtp != NUL) 9513 { 9514 if (aspath) 9515 /* Use directory of an entry with path, e.g., for 9516 * "/dir/lg.utf-8.spl" use "/dir". */ 9517 vim_strncpy(buf, curbuf->b_s.b_p_spl, 9518 lstart - curbuf->b_s.b_p_spl - 1); 9519 else 9520 /* Copy the path from 'runtimepath' to buf[]. */ 9521 copy_option_part(&rtp, buf, MAXPATHL, ","); 9522 if (filewritable(buf) == 2) 9523 { 9524 /* Use the first language name from 'spelllang' and the 9525 * encoding used in the first loaded .spl file. */ 9526 if (aspath) 9527 vim_strncpy(buf, curbuf->b_s.b_p_spl, 9528 lend - curbuf->b_s.b_p_spl); 9529 else 9530 { 9531 /* Create the "spell" directory if it doesn't exist yet. */ 9532 l = (int)STRLEN(buf); 9533 vim_snprintf((char *)buf + l, MAXPATHL - l, "/spell"); 9534 if (filewritable(buf) != 2) 9535 vim_mkdir(buf, 0755); 9536 9537 l = (int)STRLEN(buf); 9538 vim_snprintf((char *)buf + l, MAXPATHL - l, 9539 "/%.*s", (int)(lend - lstart), lstart); 9540 } 9541 l = (int)STRLEN(buf); 9542 fname = LANGP_ENTRY(curwin->w_s->b_langp, 0) 9543 ->lp_slang->sl_fname; 9544 vim_snprintf((char *)buf + l, MAXPATHL - l, ".%s.add", 9545 fname != NULL 9546 && strstr((char *)gettail(fname), ".ascii.") != NULL 9547 ? (char_u *)"ascii" : spell_enc()); 9548 set_option_value((char_u *)"spellfile", 0L, buf, OPT_LOCAL); 9549 break; 9550 } 9551 aspath = FALSE; 9552 } 9553 9554 vim_free(buf); 9555 } 9556 } 9557 9558 9559 /* 9560 * Init the chartab used for spelling for ASCII. 9561 * EBCDIC is not supported! 9562 */ 9563 static void 9564 clear_spell_chartab(spelltab_T *sp) 9565 { 9566 int i; 9567 9568 /* Init everything to FALSE. */ 9569 vim_memset(sp->st_isw, FALSE, sizeof(sp->st_isw)); 9570 vim_memset(sp->st_isu, FALSE, sizeof(sp->st_isu)); 9571 for (i = 0; i < 256; ++i) 9572 { 9573 sp->st_fold[i] = i; 9574 sp->st_upper[i] = i; 9575 } 9576 9577 /* We include digits. A word shouldn't start with a digit, but handling 9578 * that is done separately. */ 9579 for (i = '0'; i <= '9'; ++i) 9580 sp->st_isw[i] = TRUE; 9581 for (i = 'A'; i <= 'Z'; ++i) 9582 { 9583 sp->st_isw[i] = TRUE; 9584 sp->st_isu[i] = TRUE; 9585 sp->st_fold[i] = i + 0x20; 9586 } 9587 for (i = 'a'; i <= 'z'; ++i) 9588 { 9589 sp->st_isw[i] = TRUE; 9590 sp->st_upper[i] = i - 0x20; 9591 } 9592 } 9593 9594 /* 9595 * Init the chartab used for spelling. Only depends on 'encoding'. 9596 * Called once while starting up and when 'encoding' changes. 9597 * The default is to use isalpha(), but the spell file should define the word 9598 * characters to make it possible that 'encoding' differs from the current 9599 * locale. For utf-8 we don't use isalpha() but our own functions. 9600 */ 9601 void 9602 init_spell_chartab(void) 9603 { 9604 int i; 9605 9606 did_set_spelltab = FALSE; 9607 clear_spell_chartab(&spelltab); 9608 #ifdef FEAT_MBYTE 9609 if (enc_dbcs) 9610 { 9611 /* DBCS: assume double-wide characters are word characters. */ 9612 for (i = 128; i <= 255; ++i) 9613 if (MB_BYTE2LEN(i) == 2) 9614 spelltab.st_isw[i] = TRUE; 9615 } 9616 else if (enc_utf8) 9617 { 9618 for (i = 128; i < 256; ++i) 9619 { 9620 int f = utf_fold(i); 9621 int u = utf_toupper(i); 9622 9623 spelltab.st_isu[i] = utf_isupper(i); 9624 spelltab.st_isw[i] = spelltab.st_isu[i] || utf_islower(i); 9625 /* The folded/upper-cased value is different between latin1 and 9626 * utf8 for 0xb5, causing E763 for no good reason. Use the latin1 9627 * value for utf-8 to avoid this. */ 9628 spelltab.st_fold[i] = (f < 256) ? f : i; 9629 spelltab.st_upper[i] = (u < 256) ? u : i; 9630 } 9631 } 9632 else 9633 #endif 9634 { 9635 /* Rough guess: use locale-dependent library functions. */ 9636 for (i = 128; i < 256; ++i) 9637 { 9638 if (MB_ISUPPER(i)) 9639 { 9640 spelltab.st_isw[i] = TRUE; 9641 spelltab.st_isu[i] = TRUE; 9642 spelltab.st_fold[i] = MB_TOLOWER(i); 9643 } 9644 else if (MB_ISLOWER(i)) 9645 { 9646 spelltab.st_isw[i] = TRUE; 9647 spelltab.st_upper[i] = MB_TOUPPER(i); 9648 } 9649 } 9650 } 9651 } 9652 9653 /* 9654 * Set the spell character tables from strings in the affix file. 9655 */ 9656 static int 9657 set_spell_chartab(char_u *fol, char_u *low, char_u *upp) 9658 { 9659 /* We build the new tables here first, so that we can compare with the 9660 * previous one. */ 9661 spelltab_T new_st; 9662 char_u *pf = fol, *pl = low, *pu = upp; 9663 int f, l, u; 9664 9665 clear_spell_chartab(&new_st); 9666 9667 while (*pf != NUL) 9668 { 9669 if (*pl == NUL || *pu == NUL) 9670 { 9671 EMSG(_(e_affform)); 9672 return FAIL; 9673 } 9674 #ifdef FEAT_MBYTE 9675 f = mb_ptr2char_adv(&pf); 9676 l = mb_ptr2char_adv(&pl); 9677 u = mb_ptr2char_adv(&pu); 9678 #else 9679 f = *pf++; 9680 l = *pl++; 9681 u = *pu++; 9682 #endif 9683 /* Every character that appears is a word character. */ 9684 if (f < 256) 9685 new_st.st_isw[f] = TRUE; 9686 if (l < 256) 9687 new_st.st_isw[l] = TRUE; 9688 if (u < 256) 9689 new_st.st_isw[u] = TRUE; 9690 9691 /* if "LOW" and "FOL" are not the same the "LOW" char needs 9692 * case-folding */ 9693 if (l < 256 && l != f) 9694 { 9695 if (f >= 256) 9696 { 9697 EMSG(_(e_affrange)); 9698 return FAIL; 9699 } 9700 new_st.st_fold[l] = f; 9701 } 9702 9703 /* if "UPP" and "FOL" are not the same the "UPP" char needs 9704 * case-folding, it's upper case and the "UPP" is the upper case of 9705 * "FOL" . */ 9706 if (u < 256 && u != f) 9707 { 9708 if (f >= 256) 9709 { 9710 EMSG(_(e_affrange)); 9711 return FAIL; 9712 } 9713 new_st.st_fold[u] = f; 9714 new_st.st_isu[u] = TRUE; 9715 new_st.st_upper[f] = u; 9716 } 9717 } 9718 9719 if (*pl != NUL || *pu != NUL) 9720 { 9721 EMSG(_(e_affform)); 9722 return FAIL; 9723 } 9724 9725 return set_spell_finish(&new_st); 9726 } 9727 9728 /* 9729 * Set the spell character tables from strings in the .spl file. 9730 */ 9731 static void 9732 set_spell_charflags( 9733 char_u *flags, 9734 int cnt, /* length of "flags" */ 9735 char_u *fol) 9736 { 9737 /* We build the new tables here first, so that we can compare with the 9738 * previous one. */ 9739 spelltab_T new_st; 9740 int i; 9741 char_u *p = fol; 9742 int c; 9743 9744 clear_spell_chartab(&new_st); 9745 9746 for (i = 0; i < 128; ++i) 9747 { 9748 if (i < cnt) 9749 { 9750 new_st.st_isw[i + 128] = (flags[i] & CF_WORD) != 0; 9751 new_st.st_isu[i + 128] = (flags[i] & CF_UPPER) != 0; 9752 } 9753 9754 if (*p != NUL) 9755 { 9756 #ifdef FEAT_MBYTE 9757 c = mb_ptr2char_adv(&p); 9758 #else 9759 c = *p++; 9760 #endif 9761 new_st.st_fold[i + 128] = c; 9762 if (i + 128 != c && new_st.st_isu[i + 128] && c < 256) 9763 new_st.st_upper[c] = i + 128; 9764 } 9765 } 9766 9767 (void)set_spell_finish(&new_st); 9768 } 9769 9770 static int 9771 set_spell_finish(spelltab_T *new_st) 9772 { 9773 int i; 9774 9775 if (did_set_spelltab) 9776 { 9777 /* check that it's the same table */ 9778 for (i = 0; i < 256; ++i) 9779 { 9780 if (spelltab.st_isw[i] != new_st->st_isw[i] 9781 || spelltab.st_isu[i] != new_st->st_isu[i] 9782 || spelltab.st_fold[i] != new_st->st_fold[i] 9783 || spelltab.st_upper[i] != new_st->st_upper[i]) 9784 { 9785 EMSG(_("E763: Word characters differ between spell files")); 9786 return FAIL; 9787 } 9788 } 9789 } 9790 else 9791 { 9792 /* copy the new spelltab into the one being used */ 9793 spelltab = *new_st; 9794 did_set_spelltab = TRUE; 9795 } 9796 9797 return OK; 9798 } 9799 9800 /* 9801 * Return TRUE if "p" points to a word character. 9802 * As a special case we see "midword" characters as word character when it is 9803 * followed by a word character. This finds they'there but not 'they there'. 9804 * Thus this only works properly when past the first character of the word. 9805 */ 9806 static int 9807 spell_iswordp( 9808 char_u *p, 9809 win_T *wp) /* buffer used */ 9810 { 9811 #ifdef FEAT_MBYTE 9812 char_u *s; 9813 int l; 9814 int c; 9815 9816 if (has_mbyte) 9817 { 9818 l = MB_BYTE2LEN(*p); 9819 s = p; 9820 if (l == 1) 9821 { 9822 /* be quick for ASCII */ 9823 if (wp->w_s->b_spell_ismw[*p]) 9824 s = p + 1; /* skip a mid-word character */ 9825 } 9826 else 9827 { 9828 c = mb_ptr2char(p); 9829 if (c < 256 ? wp->w_s->b_spell_ismw[c] 9830 : (wp->w_s->b_spell_ismw_mb != NULL 9831 && vim_strchr(wp->w_s->b_spell_ismw_mb, c) != NULL)) 9832 s = p + l; 9833 } 9834 9835 c = mb_ptr2char(s); 9836 if (c > 255) 9837 return spell_mb_isword_class(mb_get_class(s), wp); 9838 return spelltab.st_isw[c]; 9839 } 9840 #endif 9841 9842 return spelltab.st_isw[wp->w_s->b_spell_ismw[*p] ? p[1] : p[0]]; 9843 } 9844 9845 /* 9846 * Return TRUE if "p" points to a word character. 9847 * Unlike spell_iswordp() this doesn't check for "midword" characters. 9848 */ 9849 static int 9850 spell_iswordp_nmw(char_u *p, win_T *wp) 9851 { 9852 #ifdef FEAT_MBYTE 9853 int c; 9854 9855 if (has_mbyte) 9856 { 9857 c = mb_ptr2char(p); 9858 if (c > 255) 9859 return spell_mb_isword_class(mb_get_class(p), wp); 9860 return spelltab.st_isw[c]; 9861 } 9862 #endif 9863 return spelltab.st_isw[*p]; 9864 } 9865 9866 #ifdef FEAT_MBYTE 9867 /* 9868 * Return TRUE if word class indicates a word character. 9869 * Only for characters above 255. 9870 * Unicode subscript and superscript are not considered word characters. 9871 * See also dbcs_class() and utf_class() in mbyte.c. 9872 */ 9873 static int 9874 spell_mb_isword_class(int cl, win_T *wp) 9875 { 9876 if (wp->w_s->b_cjk) 9877 /* East Asian characters are not considered word characters. */ 9878 return cl == 2 || cl == 0x2800; 9879 return cl >= 2 && cl != 0x2070 && cl != 0x2080; 9880 } 9881 9882 /* 9883 * Return TRUE if "p" points to a word character. 9884 * Wide version of spell_iswordp(). 9885 */ 9886 static int 9887 spell_iswordp_w(int *p, win_T *wp) 9888 { 9889 int *s; 9890 9891 if (*p < 256 ? wp->w_s->b_spell_ismw[*p] 9892 : (wp->w_s->b_spell_ismw_mb != NULL 9893 && vim_strchr(wp->w_s->b_spell_ismw_mb, *p) != NULL)) 9894 s = p + 1; 9895 else 9896 s = p; 9897 9898 if (*s > 255) 9899 { 9900 if (enc_utf8) 9901 return spell_mb_isword_class(utf_class(*s), wp); 9902 if (enc_dbcs) 9903 return spell_mb_isword_class( 9904 dbcs_class((unsigned)*s >> 8, *s & 0xff), wp); 9905 return 0; 9906 } 9907 return spelltab.st_isw[*s]; 9908 } 9909 #endif 9910 9911 /* 9912 * Write the table with prefix conditions to the .spl file. 9913 * When "fd" is NULL only count the length of what is written. 9914 */ 9915 static int 9916 write_spell_prefcond(FILE *fd, garray_T *gap) 9917 { 9918 int i; 9919 char_u *p; 9920 int len; 9921 int totlen; 9922 size_t x = 1; /* collect return value of fwrite() */ 9923 9924 if (fd != NULL) 9925 put_bytes(fd, (long_u)gap->ga_len, 2); /* <prefcondcnt> */ 9926 9927 totlen = 2 + gap->ga_len; /* length of <prefcondcnt> and <condlen> bytes */ 9928 9929 for (i = 0; i < gap->ga_len; ++i) 9930 { 9931 /* <prefcond> : <condlen> <condstr> */ 9932 p = ((char_u **)gap->ga_data)[i]; 9933 if (p != NULL) 9934 { 9935 len = (int)STRLEN(p); 9936 if (fd != NULL) 9937 { 9938 fputc(len, fd); 9939 x &= fwrite(p, (size_t)len, (size_t)1, fd); 9940 } 9941 totlen += len; 9942 } 9943 else if (fd != NULL) 9944 fputc(0, fd); 9945 } 9946 9947 return totlen; 9948 } 9949 9950 /* 9951 * Case-fold "str[len]" into "buf[buflen]". The result is NUL terminated. 9952 * Uses the character definitions from the .spl file. 9953 * When using a multi-byte 'encoding' the length may change! 9954 * Returns FAIL when something wrong. 9955 */ 9956 static int 9957 spell_casefold( 9958 char_u *str, 9959 int len, 9960 char_u *buf, 9961 int buflen) 9962 { 9963 int i; 9964 9965 if (len >= buflen) 9966 { 9967 buf[0] = NUL; 9968 return FAIL; /* result will not fit */ 9969 } 9970 9971 #ifdef FEAT_MBYTE 9972 if (has_mbyte) 9973 { 9974 int outi = 0; 9975 char_u *p; 9976 int c; 9977 9978 /* Fold one character at a time. */ 9979 for (p = str; p < str + len; ) 9980 { 9981 if (outi + MB_MAXBYTES > buflen) 9982 { 9983 buf[outi] = NUL; 9984 return FAIL; 9985 } 9986 c = mb_cptr2char_adv(&p); 9987 outi += mb_char2bytes(SPELL_TOFOLD(c), buf + outi); 9988 } 9989 buf[outi] = NUL; 9990 } 9991 else 9992 #endif 9993 { 9994 /* Be quick for non-multibyte encodings. */ 9995 for (i = 0; i < len; ++i) 9996 buf[i] = spelltab.st_fold[str[i]]; 9997 buf[i] = NUL; 9998 } 9999 10000 return OK; 10001 } 10002 10003 /* values for sps_flags */ 10004 #define SPS_BEST 1 10005 #define SPS_FAST 2 10006 #define SPS_DOUBLE 4 10007 10008 static int sps_flags = SPS_BEST; /* flags from 'spellsuggest' */ 10009 static int sps_limit = 9999; /* max nr of suggestions given */ 10010 10011 /* 10012 * Check the 'spellsuggest' option. Return FAIL if it's wrong. 10013 * Sets "sps_flags" and "sps_limit". 10014 */ 10015 int 10016 spell_check_sps(void) 10017 { 10018 char_u *p; 10019 char_u *s; 10020 char_u buf[MAXPATHL]; 10021 int f; 10022 10023 sps_flags = 0; 10024 sps_limit = 9999; 10025 10026 for (p = p_sps; *p != NUL; ) 10027 { 10028 copy_option_part(&p, buf, MAXPATHL, ","); 10029 10030 f = 0; 10031 if (VIM_ISDIGIT(*buf)) 10032 { 10033 s = buf; 10034 sps_limit = getdigits(&s); 10035 if (*s != NUL && !VIM_ISDIGIT(*s)) 10036 f = -1; 10037 } 10038 else if (STRCMP(buf, "best") == 0) 10039 f = SPS_BEST; 10040 else if (STRCMP(buf, "fast") == 0) 10041 f = SPS_FAST; 10042 else if (STRCMP(buf, "double") == 0) 10043 f = SPS_DOUBLE; 10044 else if (STRNCMP(buf, "expr:", 5) != 0 10045 && STRNCMP(buf, "file:", 5) != 0) 10046 f = -1; 10047 10048 if (f == -1 || (sps_flags != 0 && f != 0)) 10049 { 10050 sps_flags = SPS_BEST; 10051 sps_limit = 9999; 10052 return FAIL; 10053 } 10054 if (f != 0) 10055 sps_flags = f; 10056 } 10057 10058 if (sps_flags == 0) 10059 sps_flags = SPS_BEST; 10060 10061 return OK; 10062 } 10063 10064 /* 10065 * "z=": Find badly spelled word under or after the cursor. 10066 * Give suggestions for the properly spelled word. 10067 * In Visual mode use the highlighted word as the bad word. 10068 * When "count" is non-zero use that suggestion. 10069 */ 10070 void 10071 spell_suggest(int count) 10072 { 10073 char_u *line; 10074 pos_T prev_cursor = curwin->w_cursor; 10075 char_u wcopy[MAXWLEN + 2]; 10076 char_u *p; 10077 int i; 10078 int c; 10079 suginfo_T sug; 10080 suggest_T *stp; 10081 int mouse_used; 10082 int need_cap; 10083 int limit; 10084 int selected = count; 10085 int badlen = 0; 10086 int msg_scroll_save = msg_scroll; 10087 10088 if (no_spell_checking(curwin)) 10089 return; 10090 10091 if (VIsual_active) 10092 { 10093 /* Use the Visually selected text as the bad word. But reject 10094 * a multi-line selection. */ 10095 if (curwin->w_cursor.lnum != VIsual.lnum) 10096 { 10097 vim_beep(BO_SPELL); 10098 return; 10099 } 10100 badlen = (int)curwin->w_cursor.col - (int)VIsual.col; 10101 if (badlen < 0) 10102 badlen = -badlen; 10103 else 10104 curwin->w_cursor.col = VIsual.col; 10105 ++badlen; 10106 end_visual_mode(); 10107 } 10108 /* Find the start of the badly spelled word. */ 10109 else if (spell_move_to(curwin, FORWARD, TRUE, TRUE, NULL) == 0 10110 || curwin->w_cursor.col > prev_cursor.col) 10111 { 10112 /* No bad word or it starts after the cursor: use the word under the 10113 * cursor. */ 10114 curwin->w_cursor = prev_cursor; 10115 line = ml_get_curline(); 10116 p = line + curwin->w_cursor.col; 10117 /* Backup to before start of word. */ 10118 while (p > line && spell_iswordp_nmw(p, curwin)) 10119 mb_ptr_back(line, p); 10120 /* Forward to start of word. */ 10121 while (*p != NUL && !spell_iswordp_nmw(p, curwin)) 10122 mb_ptr_adv(p); 10123 10124 if (!spell_iswordp_nmw(p, curwin)) /* No word found. */ 10125 { 10126 beep_flush(); 10127 return; 10128 } 10129 curwin->w_cursor.col = (colnr_T)(p - line); 10130 } 10131 10132 /* Get the word and its length. */ 10133 10134 /* Figure out if the word should be capitalised. */ 10135 need_cap = check_need_cap(curwin->w_cursor.lnum, curwin->w_cursor.col); 10136 10137 /* Make a copy of current line since autocommands may free the line. */ 10138 line = vim_strsave(ml_get_curline()); 10139 if (line == NULL) 10140 goto skip; 10141 10142 /* Get the list of suggestions. Limit to 'lines' - 2 or the number in 10143 * 'spellsuggest', whatever is smaller. */ 10144 if (sps_limit > (int)Rows - 2) 10145 limit = (int)Rows - 2; 10146 else 10147 limit = sps_limit; 10148 spell_find_suggest(line + curwin->w_cursor.col, badlen, &sug, limit, 10149 TRUE, need_cap, TRUE); 10150 10151 if (sug.su_ga.ga_len == 0) 10152 MSG(_("Sorry, no suggestions")); 10153 else if (count > 0) 10154 { 10155 if (count > sug.su_ga.ga_len) 10156 smsg((char_u *)_("Sorry, only %ld suggestions"), 10157 (long)sug.su_ga.ga_len); 10158 } 10159 else 10160 { 10161 vim_free(repl_from); 10162 repl_from = NULL; 10163 vim_free(repl_to); 10164 repl_to = NULL; 10165 10166 #ifdef FEAT_RIGHTLEFT 10167 /* When 'rightleft' is set the list is drawn right-left. */ 10168 cmdmsg_rl = curwin->w_p_rl; 10169 if (cmdmsg_rl) 10170 msg_col = Columns - 1; 10171 #endif 10172 10173 /* List the suggestions. */ 10174 msg_start(); 10175 msg_row = Rows - 1; /* for when 'cmdheight' > 1 */ 10176 lines_left = Rows; /* avoid more prompt */ 10177 vim_snprintf((char *)IObuff, IOSIZE, _("Change \"%.*s\" to:"), 10178 sug.su_badlen, sug.su_badptr); 10179 #ifdef FEAT_RIGHTLEFT 10180 if (cmdmsg_rl && STRNCMP(IObuff, "Change", 6) == 0) 10181 { 10182 /* And now the rabbit from the high hat: Avoid showing the 10183 * untranslated message rightleft. */ 10184 vim_snprintf((char *)IObuff, IOSIZE, ":ot \"%.*s\" egnahC", 10185 sug.su_badlen, sug.su_badptr); 10186 } 10187 #endif 10188 msg_puts(IObuff); 10189 msg_clr_eos(); 10190 msg_putchar('\n'); 10191 10192 msg_scroll = TRUE; 10193 for (i = 0; i < sug.su_ga.ga_len; ++i) 10194 { 10195 stp = &SUG(sug.su_ga, i); 10196 10197 /* The suggested word may replace only part of the bad word, add 10198 * the not replaced part. */ 10199 vim_strncpy(wcopy, stp->st_word, MAXWLEN); 10200 if (sug.su_badlen > stp->st_orglen) 10201 vim_strncpy(wcopy + stp->st_wordlen, 10202 sug.su_badptr + stp->st_orglen, 10203 sug.su_badlen - stp->st_orglen); 10204 vim_snprintf((char *)IObuff, IOSIZE, "%2d", i + 1); 10205 #ifdef FEAT_RIGHTLEFT 10206 if (cmdmsg_rl) 10207 rl_mirror(IObuff); 10208 #endif 10209 msg_puts(IObuff); 10210 10211 vim_snprintf((char *)IObuff, IOSIZE, " \"%s\"", wcopy); 10212 msg_puts(IObuff); 10213 10214 /* The word may replace more than "su_badlen". */ 10215 if (sug.su_badlen < stp->st_orglen) 10216 { 10217 vim_snprintf((char *)IObuff, IOSIZE, _(" < \"%.*s\""), 10218 stp->st_orglen, sug.su_badptr); 10219 msg_puts(IObuff); 10220 } 10221 10222 if (p_verbose > 0) 10223 { 10224 /* Add the score. */ 10225 if (sps_flags & (SPS_DOUBLE | SPS_BEST)) 10226 vim_snprintf((char *)IObuff, IOSIZE, " (%s%d - %d)", 10227 stp->st_salscore ? "s " : "", 10228 stp->st_score, stp->st_altscore); 10229 else 10230 vim_snprintf((char *)IObuff, IOSIZE, " (%d)", 10231 stp->st_score); 10232 #ifdef FEAT_RIGHTLEFT 10233 if (cmdmsg_rl) 10234 /* Mirror the numbers, but keep the leading space. */ 10235 rl_mirror(IObuff + 1); 10236 #endif 10237 msg_advance(30); 10238 msg_puts(IObuff); 10239 } 10240 msg_putchar('\n'); 10241 } 10242 10243 #ifdef FEAT_RIGHTLEFT 10244 cmdmsg_rl = FALSE; 10245 msg_col = 0; 10246 #endif 10247 /* Ask for choice. */ 10248 selected = prompt_for_number(&mouse_used); 10249 if (mouse_used) 10250 selected -= lines_left; 10251 lines_left = Rows; /* avoid more prompt */ 10252 /* don't delay for 'smd' in normal_cmd() */ 10253 msg_scroll = msg_scroll_save; 10254 } 10255 10256 if (selected > 0 && selected <= sug.su_ga.ga_len && u_save_cursor() == OK) 10257 { 10258 /* Save the from and to text for :spellrepall. */ 10259 stp = &SUG(sug.su_ga, selected - 1); 10260 if (sug.su_badlen > stp->st_orglen) 10261 { 10262 /* Replacing less than "su_badlen", append the remainder to 10263 * repl_to. */ 10264 repl_from = vim_strnsave(sug.su_badptr, sug.su_badlen); 10265 vim_snprintf((char *)IObuff, IOSIZE, "%s%.*s", stp->st_word, 10266 sug.su_badlen - stp->st_orglen, 10267 sug.su_badptr + stp->st_orglen); 10268 repl_to = vim_strsave(IObuff); 10269 } 10270 else 10271 { 10272 /* Replacing su_badlen or more, use the whole word. */ 10273 repl_from = vim_strnsave(sug.su_badptr, stp->st_orglen); 10274 repl_to = vim_strsave(stp->st_word); 10275 } 10276 10277 /* Replace the word. */ 10278 p = alloc((unsigned)STRLEN(line) - stp->st_orglen 10279 + stp->st_wordlen + 1); 10280 if (p != NULL) 10281 { 10282 c = (int)(sug.su_badptr - line); 10283 mch_memmove(p, line, c); 10284 STRCPY(p + c, stp->st_word); 10285 STRCAT(p, sug.su_badptr + stp->st_orglen); 10286 ml_replace(curwin->w_cursor.lnum, p, FALSE); 10287 curwin->w_cursor.col = c; 10288 10289 /* For redo we use a change-word command. */ 10290 ResetRedobuff(); 10291 AppendToRedobuff((char_u *)"ciw"); 10292 AppendToRedobuffLit(p + c, 10293 stp->st_wordlen + sug.su_badlen - stp->st_orglen); 10294 AppendCharToRedobuff(ESC); 10295 10296 /* After this "p" may be invalid. */ 10297 changed_bytes(curwin->w_cursor.lnum, c); 10298 } 10299 } 10300 else 10301 curwin->w_cursor = prev_cursor; 10302 10303 spell_find_cleanup(&sug); 10304 skip: 10305 vim_free(line); 10306 } 10307 10308 /* 10309 * Check if the word at line "lnum" column "col" is required to start with a 10310 * capital. This uses 'spellcapcheck' of the current buffer. 10311 */ 10312 static int 10313 check_need_cap(linenr_T lnum, colnr_T col) 10314 { 10315 int need_cap = FALSE; 10316 char_u *line; 10317 char_u *line_copy = NULL; 10318 char_u *p; 10319 colnr_T endcol; 10320 regmatch_T regmatch; 10321 10322 if (curwin->w_s->b_cap_prog == NULL) 10323 return FALSE; 10324 10325 line = ml_get_curline(); 10326 endcol = 0; 10327 if ((int)(skipwhite(line) - line) >= (int)col) 10328 { 10329 /* At start of line, check if previous line is empty or sentence 10330 * ends there. */ 10331 if (lnum == 1) 10332 need_cap = TRUE; 10333 else 10334 { 10335 line = ml_get(lnum - 1); 10336 if (*skipwhite(line) == NUL) 10337 need_cap = TRUE; 10338 else 10339 { 10340 /* Append a space in place of the line break. */ 10341 line_copy = concat_str(line, (char_u *)" "); 10342 line = line_copy; 10343 endcol = (colnr_T)STRLEN(line); 10344 } 10345 } 10346 } 10347 else 10348 endcol = col; 10349 10350 if (endcol > 0) 10351 { 10352 /* Check if sentence ends before the bad word. */ 10353 regmatch.regprog = curwin->w_s->b_cap_prog; 10354 regmatch.rm_ic = FALSE; 10355 p = line + endcol; 10356 for (;;) 10357 { 10358 mb_ptr_back(line, p); 10359 if (p == line || spell_iswordp_nmw(p, curwin)) 10360 break; 10361 if (vim_regexec(®match, p, 0) 10362 && regmatch.endp[0] == line + endcol) 10363 { 10364 need_cap = TRUE; 10365 break; 10366 } 10367 } 10368 curwin->w_s->b_cap_prog = regmatch.regprog; 10369 } 10370 10371 vim_free(line_copy); 10372 10373 return need_cap; 10374 } 10375 10376 10377 /* 10378 * ":spellrepall" 10379 */ 10380 void 10381 ex_spellrepall(exarg_T *eap UNUSED) 10382 { 10383 pos_T pos = curwin->w_cursor; 10384 char_u *frompat; 10385 int addlen; 10386 char_u *line; 10387 char_u *p; 10388 int save_ws = p_ws; 10389 linenr_T prev_lnum = 0; 10390 10391 if (repl_from == NULL || repl_to == NULL) 10392 { 10393 EMSG(_("E752: No previous spell replacement")); 10394 return; 10395 } 10396 addlen = (int)(STRLEN(repl_to) - STRLEN(repl_from)); 10397 10398 frompat = alloc((unsigned)STRLEN(repl_from) + 7); 10399 if (frompat == NULL) 10400 return; 10401 sprintf((char *)frompat, "\\V\\<%s\\>", repl_from); 10402 p_ws = FALSE; 10403 10404 sub_nsubs = 0; 10405 sub_nlines = 0; 10406 curwin->w_cursor.lnum = 0; 10407 while (!got_int) 10408 { 10409 if (do_search(NULL, '/', frompat, 1L, SEARCH_KEEP, NULL) == 0 10410 || u_save_cursor() == FAIL) 10411 break; 10412 10413 /* Only replace when the right word isn't there yet. This happens 10414 * when changing "etc" to "etc.". */ 10415 line = ml_get_curline(); 10416 if (addlen <= 0 || STRNCMP(line + curwin->w_cursor.col, 10417 repl_to, STRLEN(repl_to)) != 0) 10418 { 10419 p = alloc((unsigned)STRLEN(line) + addlen + 1); 10420 if (p == NULL) 10421 break; 10422 mch_memmove(p, line, curwin->w_cursor.col); 10423 STRCPY(p + curwin->w_cursor.col, repl_to); 10424 STRCAT(p, line + curwin->w_cursor.col + STRLEN(repl_from)); 10425 ml_replace(curwin->w_cursor.lnum, p, FALSE); 10426 changed_bytes(curwin->w_cursor.lnum, curwin->w_cursor.col); 10427 10428 if (curwin->w_cursor.lnum != prev_lnum) 10429 { 10430 ++sub_nlines; 10431 prev_lnum = curwin->w_cursor.lnum; 10432 } 10433 ++sub_nsubs; 10434 } 10435 curwin->w_cursor.col += (colnr_T)STRLEN(repl_to); 10436 } 10437 10438 p_ws = save_ws; 10439 curwin->w_cursor = pos; 10440 vim_free(frompat); 10441 10442 if (sub_nsubs == 0) 10443 EMSG2(_("E753: Not found: %s"), repl_from); 10444 else 10445 do_sub_msg(FALSE); 10446 } 10447 10448 /* 10449 * Find spell suggestions for "word". Return them in the growarray "*gap" as 10450 * a list of allocated strings. 10451 */ 10452 void 10453 spell_suggest_list( 10454 garray_T *gap, 10455 char_u *word, 10456 int maxcount, /* maximum nr of suggestions */ 10457 int need_cap, /* 'spellcapcheck' matched */ 10458 int interactive) 10459 { 10460 suginfo_T sug; 10461 int i; 10462 suggest_T *stp; 10463 char_u *wcopy; 10464 10465 spell_find_suggest(word, 0, &sug, maxcount, FALSE, need_cap, interactive); 10466 10467 /* Make room in "gap". */ 10468 ga_init2(gap, sizeof(char_u *), sug.su_ga.ga_len + 1); 10469 if (ga_grow(gap, sug.su_ga.ga_len) == OK) 10470 { 10471 for (i = 0; i < sug.su_ga.ga_len; ++i) 10472 { 10473 stp = &SUG(sug.su_ga, i); 10474 10475 /* The suggested word may replace only part of "word", add the not 10476 * replaced part. */ 10477 wcopy = alloc(stp->st_wordlen 10478 + (unsigned)STRLEN(sug.su_badptr + stp->st_orglen) + 1); 10479 if (wcopy == NULL) 10480 break; 10481 STRCPY(wcopy, stp->st_word); 10482 STRCPY(wcopy + stp->st_wordlen, sug.su_badptr + stp->st_orglen); 10483 ((char_u **)gap->ga_data)[gap->ga_len++] = wcopy; 10484 } 10485 } 10486 10487 spell_find_cleanup(&sug); 10488 } 10489 10490 /* 10491 * Find spell suggestions for the word at the start of "badptr". 10492 * Return the suggestions in "su->su_ga". 10493 * The maximum number of suggestions is "maxcount". 10494 * Note: does use info for the current window. 10495 * This is based on the mechanisms of Aspell, but completely reimplemented. 10496 */ 10497 static void 10498 spell_find_suggest( 10499 char_u *badptr, 10500 int badlen, /* length of bad word or 0 if unknown */ 10501 suginfo_T *su, 10502 int maxcount, 10503 int banbadword, /* don't include badword in suggestions */ 10504 int need_cap, /* word should start with capital */ 10505 int interactive) 10506 { 10507 hlf_T attr = HLF_COUNT; 10508 char_u buf[MAXPATHL]; 10509 char_u *p; 10510 int do_combine = FALSE; 10511 char_u *sps_copy; 10512 #ifdef FEAT_EVAL 10513 static int expr_busy = FALSE; 10514 #endif 10515 int c; 10516 int i; 10517 langp_T *lp; 10518 10519 /* 10520 * Set the info in "*su". 10521 */ 10522 vim_memset(su, 0, sizeof(suginfo_T)); 10523 ga_init2(&su->su_ga, (int)sizeof(suggest_T), 10); 10524 ga_init2(&su->su_sga, (int)sizeof(suggest_T), 10); 10525 if (*badptr == NUL) 10526 return; 10527 hash_init(&su->su_banned); 10528 10529 su->su_badptr = badptr; 10530 if (badlen != 0) 10531 su->su_badlen = badlen; 10532 else 10533 su->su_badlen = spell_check(curwin, su->su_badptr, &attr, NULL, FALSE); 10534 su->su_maxcount = maxcount; 10535 su->su_maxscore = SCORE_MAXINIT; 10536 10537 if (su->su_badlen >= MAXWLEN) 10538 su->su_badlen = MAXWLEN - 1; /* just in case */ 10539 vim_strncpy(su->su_badword, su->su_badptr, su->su_badlen); 10540 (void)spell_casefold(su->su_badptr, su->su_badlen, 10541 su->su_fbadword, MAXWLEN); 10542 /* get caps flags for bad word */ 10543 su->su_badflags = badword_captype(su->su_badptr, 10544 su->su_badptr + su->su_badlen); 10545 if (need_cap) 10546 su->su_badflags |= WF_ONECAP; 10547 10548 /* Find the default language for sound folding. We simply use the first 10549 * one in 'spelllang' that supports sound folding. That's good for when 10550 * using multiple files for one language, it's not that bad when mixing 10551 * languages (e.g., "pl,en"). */ 10552 for (i = 0; i < curbuf->b_s.b_langp.ga_len; ++i) 10553 { 10554 lp = LANGP_ENTRY(curbuf->b_s.b_langp, i); 10555 if (lp->lp_sallang != NULL) 10556 { 10557 su->su_sallang = lp->lp_sallang; 10558 break; 10559 } 10560 } 10561 10562 /* Soundfold the bad word with the default sound folding, so that we don't 10563 * have to do this many times. */ 10564 if (su->su_sallang != NULL) 10565 spell_soundfold(su->su_sallang, su->su_fbadword, TRUE, 10566 su->su_sal_badword); 10567 10568 /* If the word is not capitalised and spell_check() doesn't consider the 10569 * word to be bad then it might need to be capitalised. Add a suggestion 10570 * for that. */ 10571 c = PTR2CHAR(su->su_badptr); 10572 if (!SPELL_ISUPPER(c) && attr == HLF_COUNT) 10573 { 10574 make_case_word(su->su_badword, buf, WF_ONECAP); 10575 add_suggestion(su, &su->su_ga, buf, su->su_badlen, SCORE_ICASE, 10576 0, TRUE, su->su_sallang, FALSE); 10577 } 10578 10579 /* Ban the bad word itself. It may appear in another region. */ 10580 if (banbadword) 10581 add_banned(su, su->su_badword); 10582 10583 /* Make a copy of 'spellsuggest', because the expression may change it. */ 10584 sps_copy = vim_strsave(p_sps); 10585 if (sps_copy == NULL) 10586 return; 10587 10588 /* Loop over the items in 'spellsuggest'. */ 10589 for (p = sps_copy; *p != NUL; ) 10590 { 10591 copy_option_part(&p, buf, MAXPATHL, ","); 10592 10593 if (STRNCMP(buf, "expr:", 5) == 0) 10594 { 10595 #ifdef FEAT_EVAL 10596 /* Evaluate an expression. Skip this when called recursively, 10597 * when using spellsuggest() in the expression. */ 10598 if (!expr_busy) 10599 { 10600 expr_busy = TRUE; 10601 spell_suggest_expr(su, buf + 5); 10602 expr_busy = FALSE; 10603 } 10604 #endif 10605 } 10606 else if (STRNCMP(buf, "file:", 5) == 0) 10607 /* Use list of suggestions in a file. */ 10608 spell_suggest_file(su, buf + 5); 10609 else 10610 { 10611 /* Use internal method. */ 10612 spell_suggest_intern(su, interactive); 10613 if (sps_flags & SPS_DOUBLE) 10614 do_combine = TRUE; 10615 } 10616 } 10617 10618 vim_free(sps_copy); 10619 10620 if (do_combine) 10621 /* Combine the two list of suggestions. This must be done last, 10622 * because sorting changes the order again. */ 10623 score_combine(su); 10624 } 10625 10626 #ifdef FEAT_EVAL 10627 /* 10628 * Find suggestions by evaluating expression "expr". 10629 */ 10630 static void 10631 spell_suggest_expr(suginfo_T *su, char_u *expr) 10632 { 10633 list_T *list; 10634 listitem_T *li; 10635 int score; 10636 char_u *p; 10637 10638 /* The work is split up in a few parts to avoid having to export 10639 * suginfo_T. 10640 * First evaluate the expression and get the resulting list. */ 10641 list = eval_spell_expr(su->su_badword, expr); 10642 if (list != NULL) 10643 { 10644 /* Loop over the items in the list. */ 10645 for (li = list->lv_first; li != NULL; li = li->li_next) 10646 if (li->li_tv.v_type == VAR_LIST) 10647 { 10648 /* Get the word and the score from the items. */ 10649 score = get_spellword(li->li_tv.vval.v_list, &p); 10650 if (score >= 0 && score <= su->su_maxscore) 10651 add_suggestion(su, &su->su_ga, p, su->su_badlen, 10652 score, 0, TRUE, su->su_sallang, FALSE); 10653 } 10654 list_unref(list); 10655 } 10656 10657 /* Remove bogus suggestions, sort and truncate at "maxcount". */ 10658 check_suggestions(su, &su->su_ga); 10659 (void)cleanup_suggestions(&su->su_ga, su->su_maxscore, su->su_maxcount); 10660 } 10661 #endif 10662 10663 /* 10664 * Find suggestions in file "fname". Used for "file:" in 'spellsuggest'. 10665 */ 10666 static void 10667 spell_suggest_file(suginfo_T *su, char_u *fname) 10668 { 10669 FILE *fd; 10670 char_u line[MAXWLEN * 2]; 10671 char_u *p; 10672 int len; 10673 char_u cword[MAXWLEN]; 10674 10675 /* Open the file. */ 10676 fd = mch_fopen((char *)fname, "r"); 10677 if (fd == NULL) 10678 { 10679 EMSG2(_(e_notopen), fname); 10680 return; 10681 } 10682 10683 /* Read it line by line. */ 10684 while (!vim_fgets(line, MAXWLEN * 2, fd) && !got_int) 10685 { 10686 line_breakcheck(); 10687 10688 p = vim_strchr(line, '/'); 10689 if (p == NULL) 10690 continue; /* No Tab found, just skip the line. */ 10691 *p++ = NUL; 10692 if (STRICMP(su->su_badword, line) == 0) 10693 { 10694 /* Match! Isolate the good word, until CR or NL. */ 10695 for (len = 0; p[len] >= ' '; ++len) 10696 ; 10697 p[len] = NUL; 10698 10699 /* If the suggestion doesn't have specific case duplicate the case 10700 * of the bad word. */ 10701 if (captype(p, NULL) == 0) 10702 { 10703 make_case_word(p, cword, su->su_badflags); 10704 p = cword; 10705 } 10706 10707 add_suggestion(su, &su->su_ga, p, su->su_badlen, 10708 SCORE_FILE, 0, TRUE, su->su_sallang, FALSE); 10709 } 10710 } 10711 10712 fclose(fd); 10713 10714 /* Remove bogus suggestions, sort and truncate at "maxcount". */ 10715 check_suggestions(su, &su->su_ga); 10716 (void)cleanup_suggestions(&su->su_ga, su->su_maxscore, su->su_maxcount); 10717 } 10718 10719 /* 10720 * Find suggestions for the internal method indicated by "sps_flags". 10721 */ 10722 static void 10723 spell_suggest_intern(suginfo_T *su, int interactive) 10724 { 10725 /* 10726 * Load the .sug file(s) that are available and not done yet. 10727 */ 10728 suggest_load_files(); 10729 10730 /* 10731 * 1. Try special cases, such as repeating a word: "the the" -> "the". 10732 * 10733 * Set a maximum score to limit the combination of operations that is 10734 * tried. 10735 */ 10736 suggest_try_special(su); 10737 10738 /* 10739 * 2. Try inserting/deleting/swapping/changing a letter, use REP entries 10740 * from the .aff file and inserting a space (split the word). 10741 */ 10742 suggest_try_change(su); 10743 10744 /* For the resulting top-scorers compute the sound-a-like score. */ 10745 if (sps_flags & SPS_DOUBLE) 10746 score_comp_sal(su); 10747 10748 /* 10749 * 3. Try finding sound-a-like words. 10750 */ 10751 if ((sps_flags & SPS_FAST) == 0) 10752 { 10753 if (sps_flags & SPS_BEST) 10754 /* Adjust the word score for the suggestions found so far for how 10755 * they sounds like. */ 10756 rescore_suggestions(su); 10757 10758 /* 10759 * While going through the soundfold tree "su_maxscore" is the score 10760 * for the soundfold word, limits the changes that are being tried, 10761 * and "su_sfmaxscore" the rescored score, which is set by 10762 * cleanup_suggestions(). 10763 * First find words with a small edit distance, because this is much 10764 * faster and often already finds the top-N suggestions. If we didn't 10765 * find many suggestions try again with a higher edit distance. 10766 * "sl_sounddone" is used to avoid doing the same word twice. 10767 */ 10768 suggest_try_soundalike_prep(); 10769 su->su_maxscore = SCORE_SFMAX1; 10770 su->su_sfmaxscore = SCORE_MAXINIT * 3; 10771 suggest_try_soundalike(su); 10772 if (su->su_ga.ga_len < SUG_CLEAN_COUNT(su)) 10773 { 10774 /* We didn't find enough matches, try again, allowing more 10775 * changes to the soundfold word. */ 10776 su->su_maxscore = SCORE_SFMAX2; 10777 suggest_try_soundalike(su); 10778 if (su->su_ga.ga_len < SUG_CLEAN_COUNT(su)) 10779 { 10780 /* Still didn't find enough matches, try again, allowing even 10781 * more changes to the soundfold word. */ 10782 su->su_maxscore = SCORE_SFMAX3; 10783 suggest_try_soundalike(su); 10784 } 10785 } 10786 su->su_maxscore = su->su_sfmaxscore; 10787 suggest_try_soundalike_finish(); 10788 } 10789 10790 /* When CTRL-C was hit while searching do show the results. Only clear 10791 * got_int when using a command, not for spellsuggest(). */ 10792 ui_breakcheck(); 10793 if (interactive && got_int) 10794 { 10795 (void)vgetc(); 10796 got_int = FALSE; 10797 } 10798 10799 if ((sps_flags & SPS_DOUBLE) == 0 && su->su_ga.ga_len != 0) 10800 { 10801 if (sps_flags & SPS_BEST) 10802 /* Adjust the word score for how it sounds like. */ 10803 rescore_suggestions(su); 10804 10805 /* Remove bogus suggestions, sort and truncate at "maxcount". */ 10806 check_suggestions(su, &su->su_ga); 10807 (void)cleanup_suggestions(&su->su_ga, su->su_maxscore, su->su_maxcount); 10808 } 10809 } 10810 10811 /* 10812 * Load the .sug files for languages that have one and weren't loaded yet. 10813 */ 10814 static void 10815 suggest_load_files(void) 10816 { 10817 langp_T *lp; 10818 int lpi; 10819 slang_T *slang; 10820 char_u *dotp; 10821 FILE *fd; 10822 char_u buf[MAXWLEN]; 10823 int i; 10824 time_t timestamp; 10825 int wcount; 10826 int wordnr; 10827 garray_T ga; 10828 int c; 10829 10830 /* Do this for all languages that support sound folding. */ 10831 for (lpi = 0; lpi < curwin->w_s->b_langp.ga_len; ++lpi) 10832 { 10833 lp = LANGP_ENTRY(curwin->w_s->b_langp, lpi); 10834 slang = lp->lp_slang; 10835 if (slang->sl_sugtime != 0 && !slang->sl_sugloaded) 10836 { 10837 /* Change ".spl" to ".sug" and open the file. When the file isn't 10838 * found silently skip it. Do set "sl_sugloaded" so that we 10839 * don't try again and again. */ 10840 slang->sl_sugloaded = TRUE; 10841 10842 dotp = vim_strrchr(slang->sl_fname, '.'); 10843 if (dotp == NULL || fnamecmp(dotp, ".spl") != 0) 10844 continue; 10845 STRCPY(dotp, ".sug"); 10846 fd = mch_fopen((char *)slang->sl_fname, "r"); 10847 if (fd == NULL) 10848 goto nextone; 10849 10850 /* 10851 * <SUGHEADER>: <fileID> <versionnr> <timestamp> 10852 */ 10853 for (i = 0; i < VIMSUGMAGICL; ++i) 10854 buf[i] = getc(fd); /* <fileID> */ 10855 if (STRNCMP(buf, VIMSUGMAGIC, VIMSUGMAGICL) != 0) 10856 { 10857 EMSG2(_("E778: This does not look like a .sug file: %s"), 10858 slang->sl_fname); 10859 goto nextone; 10860 } 10861 c = getc(fd); /* <versionnr> */ 10862 if (c < VIMSUGVERSION) 10863 { 10864 EMSG2(_("E779: Old .sug file, needs to be updated: %s"), 10865 slang->sl_fname); 10866 goto nextone; 10867 } 10868 else if (c > VIMSUGVERSION) 10869 { 10870 EMSG2(_("E780: .sug file is for newer version of Vim: %s"), 10871 slang->sl_fname); 10872 goto nextone; 10873 } 10874 10875 /* Check the timestamp, it must be exactly the same as the one in 10876 * the .spl file. Otherwise the word numbers won't match. */ 10877 timestamp = get8ctime(fd); /* <timestamp> */ 10878 if (timestamp != slang->sl_sugtime) 10879 { 10880 EMSG2(_("E781: .sug file doesn't match .spl file: %s"), 10881 slang->sl_fname); 10882 goto nextone; 10883 } 10884 10885 /* 10886 * <SUGWORDTREE>: <wordtree> 10887 * Read the trie with the soundfolded words. 10888 */ 10889 if (spell_read_tree(fd, &slang->sl_sbyts, &slang->sl_sidxs, 10890 FALSE, 0) != 0) 10891 { 10892 someerror: 10893 EMSG2(_("E782: error while reading .sug file: %s"), 10894 slang->sl_fname); 10895 slang_clear_sug(slang); 10896 goto nextone; 10897 } 10898 10899 /* 10900 * <SUGTABLE>: <sugwcount> <sugline> ... 10901 * 10902 * Read the table with word numbers. We use a file buffer for 10903 * this, because it's so much like a file with lines. Makes it 10904 * possible to swap the info and save on memory use. 10905 */ 10906 slang->sl_sugbuf = open_spellbuf(); 10907 if (slang->sl_sugbuf == NULL) 10908 goto someerror; 10909 /* <sugwcount> */ 10910 wcount = get4c(fd); 10911 if (wcount < 0) 10912 goto someerror; 10913 10914 /* Read all the wordnr lists into the buffer, one NUL terminated 10915 * list per line. */ 10916 ga_init2(&ga, 1, 100); 10917 for (wordnr = 0; wordnr < wcount; ++wordnr) 10918 { 10919 ga.ga_len = 0; 10920 for (;;) 10921 { 10922 c = getc(fd); /* <sugline> */ 10923 if (c < 0 || ga_grow(&ga, 1) == FAIL) 10924 goto someerror; 10925 ((char_u *)ga.ga_data)[ga.ga_len++] = c; 10926 if (c == NUL) 10927 break; 10928 } 10929 if (ml_append_buf(slang->sl_sugbuf, (linenr_T)wordnr, 10930 ga.ga_data, ga.ga_len, TRUE) == FAIL) 10931 goto someerror; 10932 } 10933 ga_clear(&ga); 10934 10935 /* 10936 * Need to put word counts in the word tries, so that we can find 10937 * a word by its number. 10938 */ 10939 tree_count_words(slang->sl_fbyts, slang->sl_fidxs); 10940 tree_count_words(slang->sl_sbyts, slang->sl_sidxs); 10941 10942 nextone: 10943 if (fd != NULL) 10944 fclose(fd); 10945 STRCPY(dotp, ".spl"); 10946 } 10947 } 10948 } 10949 10950 10951 /* 10952 * Fill in the wordcount fields for a trie. 10953 * Returns the total number of words. 10954 */ 10955 static void 10956 tree_count_words(char_u *byts, idx_T *idxs) 10957 { 10958 int depth; 10959 idx_T arridx[MAXWLEN]; 10960 int curi[MAXWLEN]; 10961 int c; 10962 idx_T n; 10963 int wordcount[MAXWLEN]; 10964 10965 arridx[0] = 0; 10966 curi[0] = 1; 10967 wordcount[0] = 0; 10968 depth = 0; 10969 while (depth >= 0 && !got_int) 10970 { 10971 if (curi[depth] > byts[arridx[depth]]) 10972 { 10973 /* Done all bytes at this node, go up one level. */ 10974 idxs[arridx[depth]] = wordcount[depth]; 10975 if (depth > 0) 10976 wordcount[depth - 1] += wordcount[depth]; 10977 10978 --depth; 10979 fast_breakcheck(); 10980 } 10981 else 10982 { 10983 /* Do one more byte at this node. */ 10984 n = arridx[depth] + curi[depth]; 10985 ++curi[depth]; 10986 10987 c = byts[n]; 10988 if (c == 0) 10989 { 10990 /* End of word, count it. */ 10991 ++wordcount[depth]; 10992 10993 /* Skip over any other NUL bytes (same word with different 10994 * flags). */ 10995 while (byts[n + 1] == 0) 10996 { 10997 ++n; 10998 ++curi[depth]; 10999 } 11000 } 11001 else 11002 { 11003 /* Normal char, go one level deeper to count the words. */ 11004 ++depth; 11005 arridx[depth] = idxs[n]; 11006 curi[depth] = 1; 11007 wordcount[depth] = 0; 11008 } 11009 } 11010 } 11011 } 11012 11013 /* 11014 * Free the info put in "*su" by spell_find_suggest(). 11015 */ 11016 static void 11017 spell_find_cleanup(suginfo_T *su) 11018 { 11019 int i; 11020 11021 /* Free the suggestions. */ 11022 for (i = 0; i < su->su_ga.ga_len; ++i) 11023 vim_free(SUG(su->su_ga, i).st_word); 11024 ga_clear(&su->su_ga); 11025 for (i = 0; i < su->su_sga.ga_len; ++i) 11026 vim_free(SUG(su->su_sga, i).st_word); 11027 ga_clear(&su->su_sga); 11028 11029 /* Free the banned words. */ 11030 hash_clear_all(&su->su_banned, 0); 11031 } 11032 11033 /* 11034 * Make a copy of "word", with the first letter upper or lower cased, to 11035 * "wcopy[MAXWLEN]". "word" must not be empty. 11036 * The result is NUL terminated. 11037 */ 11038 static void 11039 onecap_copy( 11040 char_u *word, 11041 char_u *wcopy, 11042 int upper) /* TRUE: first letter made upper case */ 11043 { 11044 char_u *p; 11045 int c; 11046 int l; 11047 11048 p = word; 11049 #ifdef FEAT_MBYTE 11050 if (has_mbyte) 11051 c = mb_cptr2char_adv(&p); 11052 else 11053 #endif 11054 c = *p++; 11055 if (upper) 11056 c = SPELL_TOUPPER(c); 11057 else 11058 c = SPELL_TOFOLD(c); 11059 #ifdef FEAT_MBYTE 11060 if (has_mbyte) 11061 l = mb_char2bytes(c, wcopy); 11062 else 11063 #endif 11064 { 11065 l = 1; 11066 wcopy[0] = c; 11067 } 11068 vim_strncpy(wcopy + l, p, MAXWLEN - l - 1); 11069 } 11070 11071 /* 11072 * Make a copy of "word" with all the letters upper cased into 11073 * "wcopy[MAXWLEN]". The result is NUL terminated. 11074 */ 11075 static void 11076 allcap_copy(char_u *word, char_u *wcopy) 11077 { 11078 char_u *s; 11079 char_u *d; 11080 int c; 11081 11082 d = wcopy; 11083 for (s = word; *s != NUL; ) 11084 { 11085 #ifdef FEAT_MBYTE 11086 if (has_mbyte) 11087 c = mb_cptr2char_adv(&s); 11088 else 11089 #endif 11090 c = *s++; 11091 11092 #ifdef FEAT_MBYTE 11093 /* We only change 0xdf to SS when we are certain latin1 is used. It 11094 * would cause weird errors in other 8-bit encodings. */ 11095 if (enc_latin1like && c == 0xdf) 11096 { 11097 c = 'S'; 11098 if (d - wcopy >= MAXWLEN - 1) 11099 break; 11100 *d++ = c; 11101 } 11102 else 11103 #endif 11104 c = SPELL_TOUPPER(c); 11105 11106 #ifdef FEAT_MBYTE 11107 if (has_mbyte) 11108 { 11109 if (d - wcopy >= MAXWLEN - MB_MAXBYTES) 11110 break; 11111 d += mb_char2bytes(c, d); 11112 } 11113 else 11114 #endif 11115 { 11116 if (d - wcopy >= MAXWLEN - 1) 11117 break; 11118 *d++ = c; 11119 } 11120 } 11121 *d = NUL; 11122 } 11123 11124 /* 11125 * Try finding suggestions by recognizing specific situations. 11126 */ 11127 static void 11128 suggest_try_special(suginfo_T *su) 11129 { 11130 char_u *p; 11131 size_t len; 11132 int c; 11133 char_u word[MAXWLEN]; 11134 11135 /* 11136 * Recognize a word that is repeated: "the the". 11137 */ 11138 p = skiptowhite(su->su_fbadword); 11139 len = p - su->su_fbadword; 11140 p = skipwhite(p); 11141 if (STRLEN(p) == len && STRNCMP(su->su_fbadword, p, len) == 0) 11142 { 11143 /* Include badflags: if the badword is onecap or allcap 11144 * use that for the goodword too: "The the" -> "The". */ 11145 c = su->su_fbadword[len]; 11146 su->su_fbadword[len] = NUL; 11147 make_case_word(su->su_fbadword, word, su->su_badflags); 11148 su->su_fbadword[len] = c; 11149 11150 /* Give a soundalike score of 0, compute the score as if deleting one 11151 * character. */ 11152 add_suggestion(su, &su->su_ga, word, su->su_badlen, 11153 RESCORE(SCORE_REP, 0), 0, TRUE, su->su_sallang, FALSE); 11154 } 11155 } 11156 11157 /* 11158 * Change the 0 to 1 to measure how much time is spent in each state. 11159 * Output is dumped in "suggestprof". 11160 */ 11161 #if 0 11162 # define SUGGEST_PROFILE 11163 proftime_T current; 11164 proftime_T total; 11165 proftime_T times[STATE_FINAL + 1]; 11166 long counts[STATE_FINAL + 1]; 11167 11168 static void 11169 prof_init(void) 11170 { 11171 for (int i = 0; i <= STATE_FINAL; ++i) 11172 { 11173 profile_zero(×[i]); 11174 counts[i] = 0; 11175 } 11176 profile_start(¤t); 11177 profile_start(&total); 11178 } 11179 11180 /* call before changing state */ 11181 static void 11182 prof_store(state_T state) 11183 { 11184 profile_end(¤t); 11185 profile_add(×[state], ¤t); 11186 ++counts[state]; 11187 profile_start(¤t); 11188 } 11189 # define PROF_STORE(state) prof_store(state); 11190 11191 static void 11192 prof_report(char *name) 11193 { 11194 FILE *fd = fopen("suggestprof", "a"); 11195 11196 profile_end(&total); 11197 fprintf(fd, "-----------------------\n"); 11198 fprintf(fd, "%s: %s\n", name, profile_msg(&total)); 11199 for (int i = 0; i <= STATE_FINAL; ++i) 11200 fprintf(fd, "%d: %s (%ld)\n", i, profile_msg(×[i]), counts[i]); 11201 fclose(fd); 11202 } 11203 #else 11204 # define PROF_STORE(state) 11205 #endif 11206 11207 /* 11208 * Try finding suggestions by adding/removing/swapping letters. 11209 */ 11210 static void 11211 suggest_try_change(suginfo_T *su) 11212 { 11213 char_u fword[MAXWLEN]; /* copy of the bad word, case-folded */ 11214 int n; 11215 char_u *p; 11216 int lpi; 11217 langp_T *lp; 11218 11219 /* We make a copy of the case-folded bad word, so that we can modify it 11220 * to find matches (esp. REP items). Append some more text, changing 11221 * chars after the bad word may help. */ 11222 STRCPY(fword, su->su_fbadword); 11223 n = (int)STRLEN(fword); 11224 p = su->su_badptr + su->su_badlen; 11225 (void)spell_casefold(p, (int)STRLEN(p), fword + n, MAXWLEN - n); 11226 11227 for (lpi = 0; lpi < curwin->w_s->b_langp.ga_len; ++lpi) 11228 { 11229 lp = LANGP_ENTRY(curwin->w_s->b_langp, lpi); 11230 11231 /* If reloading a spell file fails it's still in the list but 11232 * everything has been cleared. */ 11233 if (lp->lp_slang->sl_fbyts == NULL) 11234 continue; 11235 11236 /* Try it for this language. Will add possible suggestions. */ 11237 #ifdef SUGGEST_PROFILE 11238 prof_init(); 11239 #endif 11240 suggest_trie_walk(su, lp, fword, FALSE); 11241 #ifdef SUGGEST_PROFILE 11242 prof_report("try_change"); 11243 #endif 11244 } 11245 } 11246 11247 /* Check the maximum score, if we go over it we won't try this change. */ 11248 #define TRY_DEEPER(su, stack, depth, add) \ 11249 (stack[depth].ts_score + (add) < su->su_maxscore) 11250 11251 /* 11252 * Try finding suggestions by adding/removing/swapping letters. 11253 * 11254 * This uses a state machine. At each node in the tree we try various 11255 * operations. When trying if an operation works "depth" is increased and the 11256 * stack[] is used to store info. This allows combinations, thus insert one 11257 * character, replace one and delete another. The number of changes is 11258 * limited by su->su_maxscore. 11259 * 11260 * After implementing this I noticed an article by Kemal Oflazer that 11261 * describes something similar: "Error-tolerant Finite State Recognition with 11262 * Applications to Morphological Analysis and Spelling Correction" (1996). 11263 * The implementation in the article is simplified and requires a stack of 11264 * unknown depth. The implementation here only needs a stack depth equal to 11265 * the length of the word. 11266 * 11267 * This is also used for the sound-folded word, "soundfold" is TRUE then. 11268 * The mechanism is the same, but we find a match with a sound-folded word 11269 * that comes from one or more original words. Each of these words may be 11270 * added, this is done by add_sound_suggest(). 11271 * Don't use: 11272 * the prefix tree or the keep-case tree 11273 * "su->su_badlen" 11274 * anything to do with upper and lower case 11275 * anything to do with word or non-word characters ("spell_iswordp()") 11276 * banned words 11277 * word flags (rare, region, compounding) 11278 * word splitting for now 11279 * "similar_chars()" 11280 * use "slang->sl_repsal" instead of "lp->lp_replang->sl_rep" 11281 */ 11282 static void 11283 suggest_trie_walk( 11284 suginfo_T *su, 11285 langp_T *lp, 11286 char_u *fword, 11287 int soundfold) 11288 { 11289 char_u tword[MAXWLEN]; /* good word collected so far */ 11290 trystate_T stack[MAXWLEN]; 11291 char_u preword[MAXWLEN * 3]; /* word found with proper case; 11292 * concatenation of prefix compound 11293 * words and split word. NUL terminated 11294 * when going deeper but not when coming 11295 * back. */ 11296 char_u compflags[MAXWLEN]; /* compound flags, one for each word */ 11297 trystate_T *sp; 11298 int newscore; 11299 int score; 11300 char_u *byts, *fbyts, *pbyts; 11301 idx_T *idxs, *fidxs, *pidxs; 11302 int depth; 11303 int c, c2, c3; 11304 int n = 0; 11305 int flags; 11306 garray_T *gap; 11307 idx_T arridx; 11308 int len; 11309 char_u *p; 11310 fromto_T *ftp; 11311 int fl = 0, tl; 11312 int repextra = 0; /* extra bytes in fword[] from REP item */ 11313 slang_T *slang = lp->lp_slang; 11314 int fword_ends; 11315 int goodword_ends; 11316 #ifdef DEBUG_TRIEWALK 11317 /* Stores the name of the change made at each level. */ 11318 char_u changename[MAXWLEN][80]; 11319 #endif 11320 int breakcheckcount = 1000; 11321 int compound_ok; 11322 11323 /* 11324 * Go through the whole case-fold tree, try changes at each node. 11325 * "tword[]" contains the word collected from nodes in the tree. 11326 * "fword[]" the word we are trying to match with (initially the bad 11327 * word). 11328 */ 11329 depth = 0; 11330 sp = &stack[0]; 11331 vim_memset(sp, 0, sizeof(trystate_T)); 11332 sp->ts_curi = 1; 11333 11334 if (soundfold) 11335 { 11336 /* Going through the soundfold tree. */ 11337 byts = fbyts = slang->sl_sbyts; 11338 idxs = fidxs = slang->sl_sidxs; 11339 pbyts = NULL; 11340 pidxs = NULL; 11341 sp->ts_prefixdepth = PFD_NOPREFIX; 11342 sp->ts_state = STATE_START; 11343 } 11344 else 11345 { 11346 /* 11347 * When there are postponed prefixes we need to use these first. At 11348 * the end of the prefix we continue in the case-fold tree. 11349 */ 11350 fbyts = slang->sl_fbyts; 11351 fidxs = slang->sl_fidxs; 11352 pbyts = slang->sl_pbyts; 11353 pidxs = slang->sl_pidxs; 11354 if (pbyts != NULL) 11355 { 11356 byts = pbyts; 11357 idxs = pidxs; 11358 sp->ts_prefixdepth = PFD_PREFIXTREE; 11359 sp->ts_state = STATE_NOPREFIX; /* try without prefix first */ 11360 } 11361 else 11362 { 11363 byts = fbyts; 11364 idxs = fidxs; 11365 sp->ts_prefixdepth = PFD_NOPREFIX; 11366 sp->ts_state = STATE_START; 11367 } 11368 } 11369 11370 /* 11371 * Loop to find all suggestions. At each round we either: 11372 * - For the current state try one operation, advance "ts_curi", 11373 * increase "depth". 11374 * - When a state is done go to the next, set "ts_state". 11375 * - When all states are tried decrease "depth". 11376 */ 11377 while (depth >= 0 && !got_int) 11378 { 11379 sp = &stack[depth]; 11380 switch (sp->ts_state) 11381 { 11382 case STATE_START: 11383 case STATE_NOPREFIX: 11384 /* 11385 * Start of node: Deal with NUL bytes, which means 11386 * tword[] may end here. 11387 */ 11388 arridx = sp->ts_arridx; /* current node in the tree */ 11389 len = byts[arridx]; /* bytes in this node */ 11390 arridx += sp->ts_curi; /* index of current byte */ 11391 11392 if (sp->ts_prefixdepth == PFD_PREFIXTREE) 11393 { 11394 /* Skip over the NUL bytes, we use them later. */ 11395 for (n = 0; n < len && byts[arridx + n] == 0; ++n) 11396 ; 11397 sp->ts_curi += n; 11398 11399 /* Always past NUL bytes now. */ 11400 n = (int)sp->ts_state; 11401 PROF_STORE(sp->ts_state) 11402 sp->ts_state = STATE_ENDNUL; 11403 sp->ts_save_badflags = su->su_badflags; 11404 11405 /* At end of a prefix or at start of prefixtree: check for 11406 * following word. */ 11407 if (byts[arridx] == 0 || n == (int)STATE_NOPREFIX) 11408 { 11409 /* Set su->su_badflags to the caps type at this position. 11410 * Use the caps type until here for the prefix itself. */ 11411 #ifdef FEAT_MBYTE 11412 if (has_mbyte) 11413 n = nofold_len(fword, sp->ts_fidx, su->su_badptr); 11414 else 11415 #endif 11416 n = sp->ts_fidx; 11417 flags = badword_captype(su->su_badptr, su->su_badptr + n); 11418 su->su_badflags = badword_captype(su->su_badptr + n, 11419 su->su_badptr + su->su_badlen); 11420 #ifdef DEBUG_TRIEWALK 11421 sprintf(changename[depth], "prefix"); 11422 #endif 11423 go_deeper(stack, depth, 0); 11424 ++depth; 11425 sp = &stack[depth]; 11426 sp->ts_prefixdepth = depth - 1; 11427 byts = fbyts; 11428 idxs = fidxs; 11429 sp->ts_arridx = 0; 11430 11431 /* Move the prefix to preword[] with the right case 11432 * and make find_keepcap_word() works. */ 11433 tword[sp->ts_twordlen] = NUL; 11434 make_case_word(tword + sp->ts_splitoff, 11435 preword + sp->ts_prewordlen, flags); 11436 sp->ts_prewordlen = (char_u)STRLEN(preword); 11437 sp->ts_splitoff = sp->ts_twordlen; 11438 } 11439 break; 11440 } 11441 11442 if (sp->ts_curi > len || byts[arridx] != 0) 11443 { 11444 /* Past bytes in node and/or past NUL bytes. */ 11445 PROF_STORE(sp->ts_state) 11446 sp->ts_state = STATE_ENDNUL; 11447 sp->ts_save_badflags = su->su_badflags; 11448 break; 11449 } 11450 11451 /* 11452 * End of word in tree. 11453 */ 11454 ++sp->ts_curi; /* eat one NUL byte */ 11455 11456 flags = (int)idxs[arridx]; 11457 11458 /* Skip words with the NOSUGGEST flag. */ 11459 if (flags & WF_NOSUGGEST) 11460 break; 11461 11462 fword_ends = (fword[sp->ts_fidx] == NUL 11463 || (soundfold 11464 ? vim_iswhite(fword[sp->ts_fidx]) 11465 : !spell_iswordp(fword + sp->ts_fidx, curwin))); 11466 tword[sp->ts_twordlen] = NUL; 11467 11468 if (sp->ts_prefixdepth <= PFD_NOTSPECIAL 11469 && (sp->ts_flags & TSF_PREFIXOK) == 0) 11470 { 11471 /* There was a prefix before the word. Check that the prefix 11472 * can be used with this word. */ 11473 /* Count the length of the NULs in the prefix. If there are 11474 * none this must be the first try without a prefix. */ 11475 n = stack[sp->ts_prefixdepth].ts_arridx; 11476 len = pbyts[n++]; 11477 for (c = 0; c < len && pbyts[n + c] == 0; ++c) 11478 ; 11479 if (c > 0) 11480 { 11481 c = valid_word_prefix(c, n, flags, 11482 tword + sp->ts_splitoff, slang, FALSE); 11483 if (c == 0) 11484 break; 11485 11486 /* Use the WF_RARE flag for a rare prefix. */ 11487 if (c & WF_RAREPFX) 11488 flags |= WF_RARE; 11489 11490 /* Tricky: when checking for both prefix and compounding 11491 * we run into the prefix flag first. 11492 * Remember that it's OK, so that we accept the prefix 11493 * when arriving at a compound flag. */ 11494 sp->ts_flags |= TSF_PREFIXOK; 11495 } 11496 } 11497 11498 /* Check NEEDCOMPOUND: can't use word without compounding. Do try 11499 * appending another compound word below. */ 11500 if (sp->ts_complen == sp->ts_compsplit && fword_ends 11501 && (flags & WF_NEEDCOMP)) 11502 goodword_ends = FALSE; 11503 else 11504 goodword_ends = TRUE; 11505 11506 p = NULL; 11507 compound_ok = TRUE; 11508 if (sp->ts_complen > sp->ts_compsplit) 11509 { 11510 if (slang->sl_nobreak) 11511 { 11512 /* There was a word before this word. When there was no 11513 * change in this word (it was correct) add the first word 11514 * as a suggestion. If this word was corrected too, we 11515 * need to check if a correct word follows. */ 11516 if (sp->ts_fidx - sp->ts_splitfidx 11517 == sp->ts_twordlen - sp->ts_splitoff 11518 && STRNCMP(fword + sp->ts_splitfidx, 11519 tword + sp->ts_splitoff, 11520 sp->ts_fidx - sp->ts_splitfidx) == 0) 11521 { 11522 preword[sp->ts_prewordlen] = NUL; 11523 newscore = score_wordcount_adj(slang, sp->ts_score, 11524 preword + sp->ts_prewordlen, 11525 sp->ts_prewordlen > 0); 11526 /* Add the suggestion if the score isn't too bad. */ 11527 if (newscore <= su->su_maxscore) 11528 add_suggestion(su, &su->su_ga, preword, 11529 sp->ts_splitfidx - repextra, 11530 newscore, 0, FALSE, 11531 lp->lp_sallang, FALSE); 11532 break; 11533 } 11534 } 11535 else 11536 { 11537 /* There was a compound word before this word. If this 11538 * word does not support compounding then give up 11539 * (splitting is tried for the word without compound 11540 * flag). */ 11541 if (((unsigned)flags >> 24) == 0 11542 || sp->ts_twordlen - sp->ts_splitoff 11543 < slang->sl_compminlen) 11544 break; 11545 #ifdef FEAT_MBYTE 11546 /* For multi-byte chars check character length against 11547 * COMPOUNDMIN. */ 11548 if (has_mbyte 11549 && slang->sl_compminlen > 0 11550 && mb_charlen(tword + sp->ts_splitoff) 11551 < slang->sl_compminlen) 11552 break; 11553 #endif 11554 11555 compflags[sp->ts_complen] = ((unsigned)flags >> 24); 11556 compflags[sp->ts_complen + 1] = NUL; 11557 vim_strncpy(preword + sp->ts_prewordlen, 11558 tword + sp->ts_splitoff, 11559 sp->ts_twordlen - sp->ts_splitoff); 11560 11561 /* Verify CHECKCOMPOUNDPATTERN rules. */ 11562 if (match_checkcompoundpattern(preword, sp->ts_prewordlen, 11563 &slang->sl_comppat)) 11564 compound_ok = FALSE; 11565 11566 if (compound_ok) 11567 { 11568 p = preword; 11569 while (*skiptowhite(p) != NUL) 11570 p = skipwhite(skiptowhite(p)); 11571 if (fword_ends && !can_compound(slang, p, 11572 compflags + sp->ts_compsplit)) 11573 /* Compound is not allowed. But it may still be 11574 * possible if we add another (short) word. */ 11575 compound_ok = FALSE; 11576 } 11577 11578 /* Get pointer to last char of previous word. */ 11579 p = preword + sp->ts_prewordlen; 11580 mb_ptr_back(preword, p); 11581 } 11582 } 11583 11584 /* 11585 * Form the word with proper case in preword. 11586 * If there is a word from a previous split, append. 11587 * For the soundfold tree don't change the case, simply append. 11588 */ 11589 if (soundfold) 11590 STRCPY(preword + sp->ts_prewordlen, tword + sp->ts_splitoff); 11591 else if (flags & WF_KEEPCAP) 11592 /* Must find the word in the keep-case tree. */ 11593 find_keepcap_word(slang, tword + sp->ts_splitoff, 11594 preword + sp->ts_prewordlen); 11595 else 11596 { 11597 /* Include badflags: If the badword is onecap or allcap 11598 * use that for the goodword too. But if the badword is 11599 * allcap and it's only one char long use onecap. */ 11600 c = su->su_badflags; 11601 if ((c & WF_ALLCAP) 11602 #ifdef FEAT_MBYTE 11603 && su->su_badlen == (*mb_ptr2len)(su->su_badptr) 11604 #else 11605 && su->su_badlen == 1 11606 #endif 11607 ) 11608 c = WF_ONECAP; 11609 c |= flags; 11610 11611 /* When appending a compound word after a word character don't 11612 * use Onecap. */ 11613 if (p != NULL && spell_iswordp_nmw(p, curwin)) 11614 c &= ~WF_ONECAP; 11615 make_case_word(tword + sp->ts_splitoff, 11616 preword + sp->ts_prewordlen, c); 11617 } 11618 11619 if (!soundfold) 11620 { 11621 /* Don't use a banned word. It may appear again as a good 11622 * word, thus remember it. */ 11623 if (flags & WF_BANNED) 11624 { 11625 add_banned(su, preword + sp->ts_prewordlen); 11626 break; 11627 } 11628 if ((sp->ts_complen == sp->ts_compsplit 11629 && WAS_BANNED(su, preword + sp->ts_prewordlen)) 11630 || WAS_BANNED(su, preword)) 11631 { 11632 if (slang->sl_compprog == NULL) 11633 break; 11634 /* the word so far was banned but we may try compounding */ 11635 goodword_ends = FALSE; 11636 } 11637 } 11638 11639 newscore = 0; 11640 if (!soundfold) /* soundfold words don't have flags */ 11641 { 11642 if ((flags & WF_REGION) 11643 && (((unsigned)flags >> 16) & lp->lp_region) == 0) 11644 newscore += SCORE_REGION; 11645 if (flags & WF_RARE) 11646 newscore += SCORE_RARE; 11647 11648 if (!spell_valid_case(su->su_badflags, 11649 captype(preword + sp->ts_prewordlen, NULL))) 11650 newscore += SCORE_ICASE; 11651 } 11652 11653 /* TODO: how about splitting in the soundfold tree? */ 11654 if (fword_ends 11655 && goodword_ends 11656 && sp->ts_fidx >= sp->ts_fidxtry 11657 && compound_ok) 11658 { 11659 /* The badword also ends: add suggestions. */ 11660 #ifdef DEBUG_TRIEWALK 11661 if (soundfold && STRCMP(preword, "smwrd") == 0) 11662 { 11663 int j; 11664 11665 /* print the stack of changes that brought us here */ 11666 smsg("------ %s -------", fword); 11667 for (j = 0; j < depth; ++j) 11668 smsg("%s", changename[j]); 11669 } 11670 #endif 11671 if (soundfold) 11672 { 11673 /* For soundfolded words we need to find the original 11674 * words, the edit distance and then add them. */ 11675 add_sound_suggest(su, preword, sp->ts_score, lp); 11676 } 11677 else if (sp->ts_fidx > 0) 11678 { 11679 /* Give a penalty when changing non-word char to word 11680 * char, e.g., "thes," -> "these". */ 11681 p = fword + sp->ts_fidx; 11682 mb_ptr_back(fword, p); 11683 if (!spell_iswordp(p, curwin)) 11684 { 11685 p = preword + STRLEN(preword); 11686 mb_ptr_back(preword, p); 11687 if (spell_iswordp(p, curwin)) 11688 newscore += SCORE_NONWORD; 11689 } 11690 11691 /* Give a bonus to words seen before. */ 11692 score = score_wordcount_adj(slang, 11693 sp->ts_score + newscore, 11694 preword + sp->ts_prewordlen, 11695 sp->ts_prewordlen > 0); 11696 11697 /* Add the suggestion if the score isn't too bad. */ 11698 if (score <= su->su_maxscore) 11699 { 11700 add_suggestion(su, &su->su_ga, preword, 11701 sp->ts_fidx - repextra, 11702 score, 0, FALSE, lp->lp_sallang, FALSE); 11703 11704 if (su->su_badflags & WF_MIXCAP) 11705 { 11706 /* We really don't know if the word should be 11707 * upper or lower case, add both. */ 11708 c = captype(preword, NULL); 11709 if (c == 0 || c == WF_ALLCAP) 11710 { 11711 make_case_word(tword + sp->ts_splitoff, 11712 preword + sp->ts_prewordlen, 11713 c == 0 ? WF_ALLCAP : 0); 11714 11715 add_suggestion(su, &su->su_ga, preword, 11716 sp->ts_fidx - repextra, 11717 score + SCORE_ICASE, 0, FALSE, 11718 lp->lp_sallang, FALSE); 11719 } 11720 } 11721 } 11722 } 11723 } 11724 11725 /* 11726 * Try word split and/or compounding. 11727 */ 11728 if ((sp->ts_fidx >= sp->ts_fidxtry || fword_ends) 11729 #ifdef FEAT_MBYTE 11730 /* Don't split halfway a character. */ 11731 && (!has_mbyte || sp->ts_tcharlen == 0) 11732 #endif 11733 ) 11734 { 11735 int try_compound; 11736 int try_split; 11737 11738 /* If past the end of the bad word don't try a split. 11739 * Otherwise try changing the next word. E.g., find 11740 * suggestions for "the the" where the second "the" is 11741 * different. It's done like a split. 11742 * TODO: word split for soundfold words */ 11743 try_split = (sp->ts_fidx - repextra < su->su_badlen) 11744 && !soundfold; 11745 11746 /* Get here in several situations: 11747 * 1. The word in the tree ends: 11748 * If the word allows compounding try that. Otherwise try 11749 * a split by inserting a space. For both check that a 11750 * valid words starts at fword[sp->ts_fidx]. 11751 * For NOBREAK do like compounding to be able to check if 11752 * the next word is valid. 11753 * 2. The badword does end, but it was due to a change (e.g., 11754 * a swap). No need to split, but do check that the 11755 * following word is valid. 11756 * 3. The badword and the word in the tree end. It may still 11757 * be possible to compound another (short) word. 11758 */ 11759 try_compound = FALSE; 11760 if (!soundfold 11761 && !slang->sl_nocompoundsugs 11762 && slang->sl_compprog != NULL 11763 && ((unsigned)flags >> 24) != 0 11764 && sp->ts_twordlen - sp->ts_splitoff 11765 >= slang->sl_compminlen 11766 #ifdef FEAT_MBYTE 11767 && (!has_mbyte 11768 || slang->sl_compminlen == 0 11769 || mb_charlen(tword + sp->ts_splitoff) 11770 >= slang->sl_compminlen) 11771 #endif 11772 && (slang->sl_compsylmax < MAXWLEN 11773 || sp->ts_complen + 1 - sp->ts_compsplit 11774 < slang->sl_compmax) 11775 && (can_be_compound(sp, slang, 11776 compflags, ((unsigned)flags >> 24)))) 11777 11778 { 11779 try_compound = TRUE; 11780 compflags[sp->ts_complen] = ((unsigned)flags >> 24); 11781 compflags[sp->ts_complen + 1] = NUL; 11782 } 11783 11784 /* For NOBREAK we never try splitting, it won't make any word 11785 * valid. */ 11786 if (slang->sl_nobreak && !slang->sl_nocompoundsugs) 11787 try_compound = TRUE; 11788 11789 /* If we could add a compound word, and it's also possible to 11790 * split at this point, do the split first and set 11791 * TSF_DIDSPLIT to avoid doing it again. */ 11792 else if (!fword_ends 11793 && try_compound 11794 && (sp->ts_flags & TSF_DIDSPLIT) == 0) 11795 { 11796 try_compound = FALSE; 11797 sp->ts_flags |= TSF_DIDSPLIT; 11798 --sp->ts_curi; /* do the same NUL again */ 11799 compflags[sp->ts_complen] = NUL; 11800 } 11801 else 11802 sp->ts_flags &= ~TSF_DIDSPLIT; 11803 11804 if (try_split || try_compound) 11805 { 11806 if (!try_compound && (!fword_ends || !goodword_ends)) 11807 { 11808 /* If we're going to split need to check that the 11809 * words so far are valid for compounding. If there 11810 * is only one word it must not have the NEEDCOMPOUND 11811 * flag. */ 11812 if (sp->ts_complen == sp->ts_compsplit 11813 && (flags & WF_NEEDCOMP)) 11814 break; 11815 p = preword; 11816 while (*skiptowhite(p) != NUL) 11817 p = skipwhite(skiptowhite(p)); 11818 if (sp->ts_complen > sp->ts_compsplit 11819 && !can_compound(slang, p, 11820 compflags + sp->ts_compsplit)) 11821 break; 11822 11823 if (slang->sl_nosplitsugs) 11824 newscore += SCORE_SPLIT_NO; 11825 else 11826 newscore += SCORE_SPLIT; 11827 11828 /* Give a bonus to words seen before. */ 11829 newscore = score_wordcount_adj(slang, newscore, 11830 preword + sp->ts_prewordlen, TRUE); 11831 } 11832 11833 if (TRY_DEEPER(su, stack, depth, newscore)) 11834 { 11835 go_deeper(stack, depth, newscore); 11836 #ifdef DEBUG_TRIEWALK 11837 if (!try_compound && !fword_ends) 11838 sprintf(changename[depth], "%.*s-%s: split", 11839 sp->ts_twordlen, tword, fword + sp->ts_fidx); 11840 else 11841 sprintf(changename[depth], "%.*s-%s: compound", 11842 sp->ts_twordlen, tword, fword + sp->ts_fidx); 11843 #endif 11844 /* Save things to be restored at STATE_SPLITUNDO. */ 11845 sp->ts_save_badflags = su->su_badflags; 11846 PROF_STORE(sp->ts_state) 11847 sp->ts_state = STATE_SPLITUNDO; 11848 11849 ++depth; 11850 sp = &stack[depth]; 11851 11852 /* Append a space to preword when splitting. */ 11853 if (!try_compound && !fword_ends) 11854 STRCAT(preword, " "); 11855 sp->ts_prewordlen = (char_u)STRLEN(preword); 11856 sp->ts_splitoff = sp->ts_twordlen; 11857 sp->ts_splitfidx = sp->ts_fidx; 11858 11859 /* If the badword has a non-word character at this 11860 * position skip it. That means replacing the 11861 * non-word character with a space. Always skip a 11862 * character when the word ends. But only when the 11863 * good word can end. */ 11864 if (((!try_compound && !spell_iswordp_nmw(fword 11865 + sp->ts_fidx, 11866 curwin)) 11867 || fword_ends) 11868 && fword[sp->ts_fidx] != NUL 11869 && goodword_ends) 11870 { 11871 int l; 11872 11873 #ifdef FEAT_MBYTE 11874 if (has_mbyte) 11875 l = MB_BYTE2LEN(fword[sp->ts_fidx]); 11876 else 11877 #endif 11878 l = 1; 11879 if (fword_ends) 11880 { 11881 /* Copy the skipped character to preword. */ 11882 mch_memmove(preword + sp->ts_prewordlen, 11883 fword + sp->ts_fidx, l); 11884 sp->ts_prewordlen += l; 11885 preword[sp->ts_prewordlen] = NUL; 11886 } 11887 else 11888 sp->ts_score -= SCORE_SPLIT - SCORE_SUBST; 11889 sp->ts_fidx += l; 11890 } 11891 11892 /* When compounding include compound flag in 11893 * compflags[] (already set above). When splitting we 11894 * may start compounding over again. */ 11895 if (try_compound) 11896 ++sp->ts_complen; 11897 else 11898 sp->ts_compsplit = sp->ts_complen; 11899 sp->ts_prefixdepth = PFD_NOPREFIX; 11900 11901 /* set su->su_badflags to the caps type at this 11902 * position */ 11903 #ifdef FEAT_MBYTE 11904 if (has_mbyte) 11905 n = nofold_len(fword, sp->ts_fidx, su->su_badptr); 11906 else 11907 #endif 11908 n = sp->ts_fidx; 11909 su->su_badflags = badword_captype(su->su_badptr + n, 11910 su->su_badptr + su->su_badlen); 11911 11912 /* Restart at top of the tree. */ 11913 sp->ts_arridx = 0; 11914 11915 /* If there are postponed prefixes, try these too. */ 11916 if (pbyts != NULL) 11917 { 11918 byts = pbyts; 11919 idxs = pidxs; 11920 sp->ts_prefixdepth = PFD_PREFIXTREE; 11921 PROF_STORE(sp->ts_state) 11922 sp->ts_state = STATE_NOPREFIX; 11923 } 11924 } 11925 } 11926 } 11927 break; 11928 11929 case STATE_SPLITUNDO: 11930 /* Undo the changes done for word split or compound word. */ 11931 su->su_badflags = sp->ts_save_badflags; 11932 11933 /* Continue looking for NUL bytes. */ 11934 PROF_STORE(sp->ts_state) 11935 sp->ts_state = STATE_START; 11936 11937 /* In case we went into the prefix tree. */ 11938 byts = fbyts; 11939 idxs = fidxs; 11940 break; 11941 11942 case STATE_ENDNUL: 11943 /* Past the NUL bytes in the node. */ 11944 su->su_badflags = sp->ts_save_badflags; 11945 if (fword[sp->ts_fidx] == NUL 11946 #ifdef FEAT_MBYTE 11947 && sp->ts_tcharlen == 0 11948 #endif 11949 ) 11950 { 11951 /* The badword ends, can't use STATE_PLAIN. */ 11952 PROF_STORE(sp->ts_state) 11953 sp->ts_state = STATE_DEL; 11954 break; 11955 } 11956 PROF_STORE(sp->ts_state) 11957 sp->ts_state = STATE_PLAIN; 11958 /*FALLTHROUGH*/ 11959 11960 case STATE_PLAIN: 11961 /* 11962 * Go over all possible bytes at this node, add each to tword[] 11963 * and use child node. "ts_curi" is the index. 11964 */ 11965 arridx = sp->ts_arridx; 11966 if (sp->ts_curi > byts[arridx]) 11967 { 11968 /* Done all bytes at this node, do next state. When still at 11969 * already changed bytes skip the other tricks. */ 11970 PROF_STORE(sp->ts_state) 11971 if (sp->ts_fidx >= sp->ts_fidxtry) 11972 sp->ts_state = STATE_DEL; 11973 else 11974 sp->ts_state = STATE_FINAL; 11975 } 11976 else 11977 { 11978 arridx += sp->ts_curi++; 11979 c = byts[arridx]; 11980 11981 /* Normal byte, go one level deeper. If it's not equal to the 11982 * byte in the bad word adjust the score. But don't even try 11983 * when the byte was already changed. And don't try when we 11984 * just deleted this byte, accepting it is always cheaper than 11985 * delete + substitute. */ 11986 if (c == fword[sp->ts_fidx] 11987 #ifdef FEAT_MBYTE 11988 || (sp->ts_tcharlen > 0 && sp->ts_isdiff != DIFF_NONE) 11989 #endif 11990 ) 11991 newscore = 0; 11992 else 11993 newscore = SCORE_SUBST; 11994 if ((newscore == 0 11995 || (sp->ts_fidx >= sp->ts_fidxtry 11996 && ((sp->ts_flags & TSF_DIDDEL) == 0 11997 || c != fword[sp->ts_delidx]))) 11998 && TRY_DEEPER(su, stack, depth, newscore)) 11999 { 12000 go_deeper(stack, depth, newscore); 12001 #ifdef DEBUG_TRIEWALK 12002 if (newscore > 0) 12003 sprintf(changename[depth], "%.*s-%s: subst %c to %c", 12004 sp->ts_twordlen, tword, fword + sp->ts_fidx, 12005 fword[sp->ts_fidx], c); 12006 else 12007 sprintf(changename[depth], "%.*s-%s: accept %c", 12008 sp->ts_twordlen, tword, fword + sp->ts_fidx, 12009 fword[sp->ts_fidx]); 12010 #endif 12011 ++depth; 12012 sp = &stack[depth]; 12013 ++sp->ts_fidx; 12014 tword[sp->ts_twordlen++] = c; 12015 sp->ts_arridx = idxs[arridx]; 12016 #ifdef FEAT_MBYTE 12017 if (newscore == SCORE_SUBST) 12018 sp->ts_isdiff = DIFF_YES; 12019 if (has_mbyte) 12020 { 12021 /* Multi-byte characters are a bit complicated to 12022 * handle: They differ when any of the bytes differ 12023 * and then their length may also differ. */ 12024 if (sp->ts_tcharlen == 0) 12025 { 12026 /* First byte. */ 12027 sp->ts_tcharidx = 0; 12028 sp->ts_tcharlen = MB_BYTE2LEN(c); 12029 sp->ts_fcharstart = sp->ts_fidx - 1; 12030 sp->ts_isdiff = (newscore != 0) 12031 ? DIFF_YES : DIFF_NONE; 12032 } 12033 else if (sp->ts_isdiff == DIFF_INSERT) 12034 /* When inserting trail bytes don't advance in the 12035 * bad word. */ 12036 --sp->ts_fidx; 12037 if (++sp->ts_tcharidx == sp->ts_tcharlen) 12038 { 12039 /* Last byte of character. */ 12040 if (sp->ts_isdiff == DIFF_YES) 12041 { 12042 /* Correct ts_fidx for the byte length of the 12043 * character (we didn't check that before). */ 12044 sp->ts_fidx = sp->ts_fcharstart 12045 + MB_BYTE2LEN( 12046 fword[sp->ts_fcharstart]); 12047 12048 /* For changing a composing character adjust 12049 * the score from SCORE_SUBST to 12050 * SCORE_SUBCOMP. */ 12051 if (enc_utf8 12052 && utf_iscomposing( 12053 mb_ptr2char(tword 12054 + sp->ts_twordlen 12055 - sp->ts_tcharlen)) 12056 && utf_iscomposing( 12057 mb_ptr2char(fword 12058 + sp->ts_fcharstart))) 12059 sp->ts_score -= 12060 SCORE_SUBST - SCORE_SUBCOMP; 12061 12062 /* For a similar character adjust score from 12063 * SCORE_SUBST to SCORE_SIMILAR. */ 12064 else if (!soundfold 12065 && slang->sl_has_map 12066 && similar_chars(slang, 12067 mb_ptr2char(tword 12068 + sp->ts_twordlen 12069 - sp->ts_tcharlen), 12070 mb_ptr2char(fword 12071 + sp->ts_fcharstart))) 12072 sp->ts_score -= 12073 SCORE_SUBST - SCORE_SIMILAR; 12074 } 12075 else if (sp->ts_isdiff == DIFF_INSERT 12076 && sp->ts_twordlen > sp->ts_tcharlen) 12077 { 12078 p = tword + sp->ts_twordlen - sp->ts_tcharlen; 12079 c = mb_ptr2char(p); 12080 if (enc_utf8 && utf_iscomposing(c)) 12081 { 12082 /* Inserting a composing char doesn't 12083 * count that much. */ 12084 sp->ts_score -= SCORE_INS - SCORE_INSCOMP; 12085 } 12086 else 12087 { 12088 /* If the previous character was the same, 12089 * thus doubling a character, give a bonus 12090 * to the score. Also for the soundfold 12091 * tree (might seem illogical but does 12092 * give better scores). */ 12093 mb_ptr_back(tword, p); 12094 if (c == mb_ptr2char(p)) 12095 sp->ts_score -= SCORE_INS 12096 - SCORE_INSDUP; 12097 } 12098 } 12099 12100 /* Starting a new char, reset the length. */ 12101 sp->ts_tcharlen = 0; 12102 } 12103 } 12104 else 12105 #endif 12106 { 12107 /* If we found a similar char adjust the score. 12108 * We do this after calling go_deeper() because 12109 * it's slow. */ 12110 if (newscore != 0 12111 && !soundfold 12112 && slang->sl_has_map 12113 && similar_chars(slang, 12114 c, fword[sp->ts_fidx - 1])) 12115 sp->ts_score -= SCORE_SUBST - SCORE_SIMILAR; 12116 } 12117 } 12118 } 12119 break; 12120 12121 case STATE_DEL: 12122 #ifdef FEAT_MBYTE 12123 /* When past the first byte of a multi-byte char don't try 12124 * delete/insert/swap a character. */ 12125 if (has_mbyte && sp->ts_tcharlen > 0) 12126 { 12127 PROF_STORE(sp->ts_state) 12128 sp->ts_state = STATE_FINAL; 12129 break; 12130 } 12131 #endif 12132 /* 12133 * Try skipping one character in the bad word (delete it). 12134 */ 12135 PROF_STORE(sp->ts_state) 12136 sp->ts_state = STATE_INS_PREP; 12137 sp->ts_curi = 1; 12138 if (soundfold && sp->ts_fidx == 0 && fword[sp->ts_fidx] == '*') 12139 /* Deleting a vowel at the start of a word counts less, see 12140 * soundalike_score(). */ 12141 newscore = 2 * SCORE_DEL / 3; 12142 else 12143 newscore = SCORE_DEL; 12144 if (fword[sp->ts_fidx] != NUL 12145 && TRY_DEEPER(su, stack, depth, newscore)) 12146 { 12147 go_deeper(stack, depth, newscore); 12148 #ifdef DEBUG_TRIEWALK 12149 sprintf(changename[depth], "%.*s-%s: delete %c", 12150 sp->ts_twordlen, tword, fword + sp->ts_fidx, 12151 fword[sp->ts_fidx]); 12152 #endif 12153 ++depth; 12154 12155 /* Remember what character we deleted, so that we can avoid 12156 * inserting it again. */ 12157 stack[depth].ts_flags |= TSF_DIDDEL; 12158 stack[depth].ts_delidx = sp->ts_fidx; 12159 12160 /* Advance over the character in fword[]. Give a bonus to the 12161 * score if the same character is following "nn" -> "n". It's 12162 * a bit illogical for soundfold tree but it does give better 12163 * results. */ 12164 #ifdef FEAT_MBYTE 12165 if (has_mbyte) 12166 { 12167 c = mb_ptr2char(fword + sp->ts_fidx); 12168 stack[depth].ts_fidx += MB_BYTE2LEN(fword[sp->ts_fidx]); 12169 if (enc_utf8 && utf_iscomposing(c)) 12170 stack[depth].ts_score -= SCORE_DEL - SCORE_DELCOMP; 12171 else if (c == mb_ptr2char(fword + stack[depth].ts_fidx)) 12172 stack[depth].ts_score -= SCORE_DEL - SCORE_DELDUP; 12173 } 12174 else 12175 #endif 12176 { 12177 ++stack[depth].ts_fidx; 12178 if (fword[sp->ts_fidx] == fword[sp->ts_fidx + 1]) 12179 stack[depth].ts_score -= SCORE_DEL - SCORE_DELDUP; 12180 } 12181 break; 12182 } 12183 /*FALLTHROUGH*/ 12184 12185 case STATE_INS_PREP: 12186 if (sp->ts_flags & TSF_DIDDEL) 12187 { 12188 /* If we just deleted a byte then inserting won't make sense, 12189 * a substitute is always cheaper. */ 12190 PROF_STORE(sp->ts_state) 12191 sp->ts_state = STATE_SWAP; 12192 break; 12193 } 12194 12195 /* skip over NUL bytes */ 12196 n = sp->ts_arridx; 12197 for (;;) 12198 { 12199 if (sp->ts_curi > byts[n]) 12200 { 12201 /* Only NUL bytes at this node, go to next state. */ 12202 PROF_STORE(sp->ts_state) 12203 sp->ts_state = STATE_SWAP; 12204 break; 12205 } 12206 if (byts[n + sp->ts_curi] != NUL) 12207 { 12208 /* Found a byte to insert. */ 12209 PROF_STORE(sp->ts_state) 12210 sp->ts_state = STATE_INS; 12211 break; 12212 } 12213 ++sp->ts_curi; 12214 } 12215 break; 12216 12217 /*FALLTHROUGH*/ 12218 12219 case STATE_INS: 12220 /* Insert one byte. Repeat this for each possible byte at this 12221 * node. */ 12222 n = sp->ts_arridx; 12223 if (sp->ts_curi > byts[n]) 12224 { 12225 /* Done all bytes at this node, go to next state. */ 12226 PROF_STORE(sp->ts_state) 12227 sp->ts_state = STATE_SWAP; 12228 break; 12229 } 12230 12231 /* Do one more byte at this node, but: 12232 * - Skip NUL bytes. 12233 * - Skip the byte if it's equal to the byte in the word, 12234 * accepting that byte is always better. 12235 */ 12236 n += sp->ts_curi++; 12237 c = byts[n]; 12238 if (soundfold && sp->ts_twordlen == 0 && c == '*') 12239 /* Inserting a vowel at the start of a word counts less, 12240 * see soundalike_score(). */ 12241 newscore = 2 * SCORE_INS / 3; 12242 else 12243 newscore = SCORE_INS; 12244 if (c != fword[sp->ts_fidx] 12245 && TRY_DEEPER(su, stack, depth, newscore)) 12246 { 12247 go_deeper(stack, depth, newscore); 12248 #ifdef DEBUG_TRIEWALK 12249 sprintf(changename[depth], "%.*s-%s: insert %c", 12250 sp->ts_twordlen, tword, fword + sp->ts_fidx, 12251 c); 12252 #endif 12253 ++depth; 12254 sp = &stack[depth]; 12255 tword[sp->ts_twordlen++] = c; 12256 sp->ts_arridx = idxs[n]; 12257 #ifdef FEAT_MBYTE 12258 if (has_mbyte) 12259 { 12260 fl = MB_BYTE2LEN(c); 12261 if (fl > 1) 12262 { 12263 /* There are following bytes for the same character. 12264 * We must find all bytes before trying 12265 * delete/insert/swap/etc. */ 12266 sp->ts_tcharlen = fl; 12267 sp->ts_tcharidx = 1; 12268 sp->ts_isdiff = DIFF_INSERT; 12269 } 12270 } 12271 else 12272 fl = 1; 12273 if (fl == 1) 12274 #endif 12275 { 12276 /* If the previous character was the same, thus doubling a 12277 * character, give a bonus to the score. Also for 12278 * soundfold words (illogical but does give a better 12279 * score). */ 12280 if (sp->ts_twordlen >= 2 12281 && tword[sp->ts_twordlen - 2] == c) 12282 sp->ts_score -= SCORE_INS - SCORE_INSDUP; 12283 } 12284 } 12285 break; 12286 12287 case STATE_SWAP: 12288 /* 12289 * Swap two bytes in the bad word: "12" -> "21". 12290 * We change "fword" here, it's changed back afterwards at 12291 * STATE_UNSWAP. 12292 */ 12293 p = fword + sp->ts_fidx; 12294 c = *p; 12295 if (c == NUL) 12296 { 12297 /* End of word, can't swap or replace. */ 12298 PROF_STORE(sp->ts_state) 12299 sp->ts_state = STATE_FINAL; 12300 break; 12301 } 12302 12303 /* Don't swap if the first character is not a word character. 12304 * SWAP3 etc. also don't make sense then. */ 12305 if (!soundfold && !spell_iswordp(p, curwin)) 12306 { 12307 PROF_STORE(sp->ts_state) 12308 sp->ts_state = STATE_REP_INI; 12309 break; 12310 } 12311 12312 #ifdef FEAT_MBYTE 12313 if (has_mbyte) 12314 { 12315 n = mb_cptr2len(p); 12316 c = mb_ptr2char(p); 12317 if (p[n] == NUL) 12318 c2 = NUL; 12319 else if (!soundfold && !spell_iswordp(p + n, curwin)) 12320 c2 = c; /* don't swap non-word char */ 12321 else 12322 c2 = mb_ptr2char(p + n); 12323 } 12324 else 12325 #endif 12326 { 12327 if (p[1] == NUL) 12328 c2 = NUL; 12329 else if (!soundfold && !spell_iswordp(p + 1, curwin)) 12330 c2 = c; /* don't swap non-word char */ 12331 else 12332 c2 = p[1]; 12333 } 12334 12335 /* When the second character is NUL we can't swap. */ 12336 if (c2 == NUL) 12337 { 12338 PROF_STORE(sp->ts_state) 12339 sp->ts_state = STATE_REP_INI; 12340 break; 12341 } 12342 12343 /* When characters are identical, swap won't do anything. 12344 * Also get here if the second char is not a word character. */ 12345 if (c == c2) 12346 { 12347 PROF_STORE(sp->ts_state) 12348 sp->ts_state = STATE_SWAP3; 12349 break; 12350 } 12351 if (c2 != NUL && TRY_DEEPER(su, stack, depth, SCORE_SWAP)) 12352 { 12353 go_deeper(stack, depth, SCORE_SWAP); 12354 #ifdef DEBUG_TRIEWALK 12355 sprintf(changename[depth], "%.*s-%s: swap %c and %c", 12356 sp->ts_twordlen, tword, fword + sp->ts_fidx, 12357 c, c2); 12358 #endif 12359 PROF_STORE(sp->ts_state) 12360 sp->ts_state = STATE_UNSWAP; 12361 ++depth; 12362 #ifdef FEAT_MBYTE 12363 if (has_mbyte) 12364 { 12365 fl = mb_char2len(c2); 12366 mch_memmove(p, p + n, fl); 12367 mb_char2bytes(c, p + fl); 12368 stack[depth].ts_fidxtry = sp->ts_fidx + n + fl; 12369 } 12370 else 12371 #endif 12372 { 12373 p[0] = c2; 12374 p[1] = c; 12375 stack[depth].ts_fidxtry = sp->ts_fidx + 2; 12376 } 12377 } 12378 else 12379 { 12380 /* If this swap doesn't work then SWAP3 won't either. */ 12381 PROF_STORE(sp->ts_state) 12382 sp->ts_state = STATE_REP_INI; 12383 } 12384 break; 12385 12386 case STATE_UNSWAP: 12387 /* Undo the STATE_SWAP swap: "21" -> "12". */ 12388 p = fword + sp->ts_fidx; 12389 #ifdef FEAT_MBYTE 12390 if (has_mbyte) 12391 { 12392 n = MB_BYTE2LEN(*p); 12393 c = mb_ptr2char(p + n); 12394 mch_memmove(p + MB_BYTE2LEN(p[n]), p, n); 12395 mb_char2bytes(c, p); 12396 } 12397 else 12398 #endif 12399 { 12400 c = *p; 12401 *p = p[1]; 12402 p[1] = c; 12403 } 12404 /*FALLTHROUGH*/ 12405 12406 case STATE_SWAP3: 12407 /* Swap two bytes, skipping one: "123" -> "321". We change 12408 * "fword" here, it's changed back afterwards at STATE_UNSWAP3. */ 12409 p = fword + sp->ts_fidx; 12410 #ifdef FEAT_MBYTE 12411 if (has_mbyte) 12412 { 12413 n = mb_cptr2len(p); 12414 c = mb_ptr2char(p); 12415 fl = mb_cptr2len(p + n); 12416 c2 = mb_ptr2char(p + n); 12417 if (!soundfold && !spell_iswordp(p + n + fl, curwin)) 12418 c3 = c; /* don't swap non-word char */ 12419 else 12420 c3 = mb_ptr2char(p + n + fl); 12421 } 12422 else 12423 #endif 12424 { 12425 c = *p; 12426 c2 = p[1]; 12427 if (!soundfold && !spell_iswordp(p + 2, curwin)) 12428 c3 = c; /* don't swap non-word char */ 12429 else 12430 c3 = p[2]; 12431 } 12432 12433 /* When characters are identical: "121" then SWAP3 result is 12434 * identical, ROT3L result is same as SWAP: "211", ROT3L result is 12435 * same as SWAP on next char: "112". Thus skip all swapping. 12436 * Also skip when c3 is NUL. 12437 * Also get here when the third character is not a word character. 12438 * Second character may any char: "a.b" -> "b.a" */ 12439 if (c == c3 || c3 == NUL) 12440 { 12441 PROF_STORE(sp->ts_state) 12442 sp->ts_state = STATE_REP_INI; 12443 break; 12444 } 12445 if (TRY_DEEPER(su, stack, depth, SCORE_SWAP3)) 12446 { 12447 go_deeper(stack, depth, SCORE_SWAP3); 12448 #ifdef DEBUG_TRIEWALK 12449 sprintf(changename[depth], "%.*s-%s: swap3 %c and %c", 12450 sp->ts_twordlen, tword, fword + sp->ts_fidx, 12451 c, c3); 12452 #endif 12453 PROF_STORE(sp->ts_state) 12454 sp->ts_state = STATE_UNSWAP3; 12455 ++depth; 12456 #ifdef FEAT_MBYTE 12457 if (has_mbyte) 12458 { 12459 tl = mb_char2len(c3); 12460 mch_memmove(p, p + n + fl, tl); 12461 mb_char2bytes(c2, p + tl); 12462 mb_char2bytes(c, p + fl + tl); 12463 stack[depth].ts_fidxtry = sp->ts_fidx + n + fl + tl; 12464 } 12465 else 12466 #endif 12467 { 12468 p[0] = p[2]; 12469 p[2] = c; 12470 stack[depth].ts_fidxtry = sp->ts_fidx + 3; 12471 } 12472 } 12473 else 12474 { 12475 PROF_STORE(sp->ts_state) 12476 sp->ts_state = STATE_REP_INI; 12477 } 12478 break; 12479 12480 case STATE_UNSWAP3: 12481 /* Undo STATE_SWAP3: "321" -> "123" */ 12482 p = fword + sp->ts_fidx; 12483 #ifdef FEAT_MBYTE 12484 if (has_mbyte) 12485 { 12486 n = MB_BYTE2LEN(*p); 12487 c2 = mb_ptr2char(p + n); 12488 fl = MB_BYTE2LEN(p[n]); 12489 c = mb_ptr2char(p + n + fl); 12490 tl = MB_BYTE2LEN(p[n + fl]); 12491 mch_memmove(p + fl + tl, p, n); 12492 mb_char2bytes(c, p); 12493 mb_char2bytes(c2, p + tl); 12494 p = p + tl; 12495 } 12496 else 12497 #endif 12498 { 12499 c = *p; 12500 *p = p[2]; 12501 p[2] = c; 12502 ++p; 12503 } 12504 12505 if (!soundfold && !spell_iswordp(p, curwin)) 12506 { 12507 /* Middle char is not a word char, skip the rotate. First and 12508 * third char were already checked at swap and swap3. */ 12509 PROF_STORE(sp->ts_state) 12510 sp->ts_state = STATE_REP_INI; 12511 break; 12512 } 12513 12514 /* Rotate three characters left: "123" -> "231". We change 12515 * "fword" here, it's changed back afterwards at STATE_UNROT3L. */ 12516 if (TRY_DEEPER(su, stack, depth, SCORE_SWAP3)) 12517 { 12518 go_deeper(stack, depth, SCORE_SWAP3); 12519 #ifdef DEBUG_TRIEWALK 12520 p = fword + sp->ts_fidx; 12521 sprintf(changename[depth], "%.*s-%s: rotate left %c%c%c", 12522 sp->ts_twordlen, tword, fword + sp->ts_fidx, 12523 p[0], p[1], p[2]); 12524 #endif 12525 PROF_STORE(sp->ts_state) 12526 sp->ts_state = STATE_UNROT3L; 12527 ++depth; 12528 p = fword + sp->ts_fidx; 12529 #ifdef FEAT_MBYTE 12530 if (has_mbyte) 12531 { 12532 n = mb_cptr2len(p); 12533 c = mb_ptr2char(p); 12534 fl = mb_cptr2len(p + n); 12535 fl += mb_cptr2len(p + n + fl); 12536 mch_memmove(p, p + n, fl); 12537 mb_char2bytes(c, p + fl); 12538 stack[depth].ts_fidxtry = sp->ts_fidx + n + fl; 12539 } 12540 else 12541 #endif 12542 { 12543 c = *p; 12544 *p = p[1]; 12545 p[1] = p[2]; 12546 p[2] = c; 12547 stack[depth].ts_fidxtry = sp->ts_fidx + 3; 12548 } 12549 } 12550 else 12551 { 12552 PROF_STORE(sp->ts_state) 12553 sp->ts_state = STATE_REP_INI; 12554 } 12555 break; 12556 12557 case STATE_UNROT3L: 12558 /* Undo ROT3L: "231" -> "123" */ 12559 p = fword + sp->ts_fidx; 12560 #ifdef FEAT_MBYTE 12561 if (has_mbyte) 12562 { 12563 n = MB_BYTE2LEN(*p); 12564 n += MB_BYTE2LEN(p[n]); 12565 c = mb_ptr2char(p + n); 12566 tl = MB_BYTE2LEN(p[n]); 12567 mch_memmove(p + tl, p, n); 12568 mb_char2bytes(c, p); 12569 } 12570 else 12571 #endif 12572 { 12573 c = p[2]; 12574 p[2] = p[1]; 12575 p[1] = *p; 12576 *p = c; 12577 } 12578 12579 /* Rotate three bytes right: "123" -> "312". We change "fword" 12580 * here, it's changed back afterwards at STATE_UNROT3R. */ 12581 if (TRY_DEEPER(su, stack, depth, SCORE_SWAP3)) 12582 { 12583 go_deeper(stack, depth, SCORE_SWAP3); 12584 #ifdef DEBUG_TRIEWALK 12585 p = fword + sp->ts_fidx; 12586 sprintf(changename[depth], "%.*s-%s: rotate right %c%c%c", 12587 sp->ts_twordlen, tword, fword + sp->ts_fidx, 12588 p[0], p[1], p[2]); 12589 #endif 12590 PROF_STORE(sp->ts_state) 12591 sp->ts_state = STATE_UNROT3R; 12592 ++depth; 12593 p = fword + sp->ts_fidx; 12594 #ifdef FEAT_MBYTE 12595 if (has_mbyte) 12596 { 12597 n = mb_cptr2len(p); 12598 n += mb_cptr2len(p + n); 12599 c = mb_ptr2char(p + n); 12600 tl = mb_cptr2len(p + n); 12601 mch_memmove(p + tl, p, n); 12602 mb_char2bytes(c, p); 12603 stack[depth].ts_fidxtry = sp->ts_fidx + n + tl; 12604 } 12605 else 12606 #endif 12607 { 12608 c = p[2]; 12609 p[2] = p[1]; 12610 p[1] = *p; 12611 *p = c; 12612 stack[depth].ts_fidxtry = sp->ts_fidx + 3; 12613 } 12614 } 12615 else 12616 { 12617 PROF_STORE(sp->ts_state) 12618 sp->ts_state = STATE_REP_INI; 12619 } 12620 break; 12621 12622 case STATE_UNROT3R: 12623 /* Undo ROT3R: "312" -> "123" */ 12624 p = fword + sp->ts_fidx; 12625 #ifdef FEAT_MBYTE 12626 if (has_mbyte) 12627 { 12628 c = mb_ptr2char(p); 12629 tl = MB_BYTE2LEN(*p); 12630 n = MB_BYTE2LEN(p[tl]); 12631 n += MB_BYTE2LEN(p[tl + n]); 12632 mch_memmove(p, p + tl, n); 12633 mb_char2bytes(c, p + n); 12634 } 12635 else 12636 #endif 12637 { 12638 c = *p; 12639 *p = p[1]; 12640 p[1] = p[2]; 12641 p[2] = c; 12642 } 12643 /*FALLTHROUGH*/ 12644 12645 case STATE_REP_INI: 12646 /* Check if matching with REP items from the .aff file would work. 12647 * Quickly skip if: 12648 * - there are no REP items and we are not in the soundfold trie 12649 * - the score is going to be too high anyway 12650 * - already applied a REP item or swapped here */ 12651 if ((lp->lp_replang == NULL && !soundfold) 12652 || sp->ts_score + SCORE_REP >= su->su_maxscore 12653 || sp->ts_fidx < sp->ts_fidxtry) 12654 { 12655 PROF_STORE(sp->ts_state) 12656 sp->ts_state = STATE_FINAL; 12657 break; 12658 } 12659 12660 /* Use the first byte to quickly find the first entry that may 12661 * match. If the index is -1 there is none. */ 12662 if (soundfold) 12663 sp->ts_curi = slang->sl_repsal_first[fword[sp->ts_fidx]]; 12664 else 12665 sp->ts_curi = lp->lp_replang->sl_rep_first[fword[sp->ts_fidx]]; 12666 12667 if (sp->ts_curi < 0) 12668 { 12669 PROF_STORE(sp->ts_state) 12670 sp->ts_state = STATE_FINAL; 12671 break; 12672 } 12673 12674 PROF_STORE(sp->ts_state) 12675 sp->ts_state = STATE_REP; 12676 /*FALLTHROUGH*/ 12677 12678 case STATE_REP: 12679 /* Try matching with REP items from the .aff file. For each match 12680 * replace the characters and check if the resulting word is 12681 * valid. */ 12682 p = fword + sp->ts_fidx; 12683 12684 if (soundfold) 12685 gap = &slang->sl_repsal; 12686 else 12687 gap = &lp->lp_replang->sl_rep; 12688 while (sp->ts_curi < gap->ga_len) 12689 { 12690 ftp = (fromto_T *)gap->ga_data + sp->ts_curi++; 12691 if (*ftp->ft_from != *p) 12692 { 12693 /* past possible matching entries */ 12694 sp->ts_curi = gap->ga_len; 12695 break; 12696 } 12697 if (STRNCMP(ftp->ft_from, p, STRLEN(ftp->ft_from)) == 0 12698 && TRY_DEEPER(su, stack, depth, SCORE_REP)) 12699 { 12700 go_deeper(stack, depth, SCORE_REP); 12701 #ifdef DEBUG_TRIEWALK 12702 sprintf(changename[depth], "%.*s-%s: replace %s with %s", 12703 sp->ts_twordlen, tword, fword + sp->ts_fidx, 12704 ftp->ft_from, ftp->ft_to); 12705 #endif 12706 /* Need to undo this afterwards. */ 12707 PROF_STORE(sp->ts_state) 12708 sp->ts_state = STATE_REP_UNDO; 12709 12710 /* Change the "from" to the "to" string. */ 12711 ++depth; 12712 fl = (int)STRLEN(ftp->ft_from); 12713 tl = (int)STRLEN(ftp->ft_to); 12714 if (fl != tl) 12715 { 12716 STRMOVE(p + tl, p + fl); 12717 repextra += tl - fl; 12718 } 12719 mch_memmove(p, ftp->ft_to, tl); 12720 stack[depth].ts_fidxtry = sp->ts_fidx + tl; 12721 #ifdef FEAT_MBYTE 12722 stack[depth].ts_tcharlen = 0; 12723 #endif 12724 break; 12725 } 12726 } 12727 12728 if (sp->ts_curi >= gap->ga_len && sp->ts_state == STATE_REP) 12729 { 12730 /* No (more) matches. */ 12731 PROF_STORE(sp->ts_state) 12732 sp->ts_state = STATE_FINAL; 12733 } 12734 12735 break; 12736 12737 case STATE_REP_UNDO: 12738 /* Undo a REP replacement and continue with the next one. */ 12739 if (soundfold) 12740 gap = &slang->sl_repsal; 12741 else 12742 gap = &lp->lp_replang->sl_rep; 12743 ftp = (fromto_T *)gap->ga_data + sp->ts_curi - 1; 12744 fl = (int)STRLEN(ftp->ft_from); 12745 tl = (int)STRLEN(ftp->ft_to); 12746 p = fword + sp->ts_fidx; 12747 if (fl != tl) 12748 { 12749 STRMOVE(p + fl, p + tl); 12750 repextra -= tl - fl; 12751 } 12752 mch_memmove(p, ftp->ft_from, fl); 12753 PROF_STORE(sp->ts_state) 12754 sp->ts_state = STATE_REP; 12755 break; 12756 12757 default: 12758 /* Did all possible states at this level, go up one level. */ 12759 --depth; 12760 12761 if (depth >= 0 && stack[depth].ts_prefixdepth == PFD_PREFIXTREE) 12762 { 12763 /* Continue in or go back to the prefix tree. */ 12764 byts = pbyts; 12765 idxs = pidxs; 12766 } 12767 12768 /* Don't check for CTRL-C too often, it takes time. */ 12769 if (--breakcheckcount == 0) 12770 { 12771 ui_breakcheck(); 12772 breakcheckcount = 1000; 12773 } 12774 } 12775 } 12776 } 12777 12778 12779 /* 12780 * Go one level deeper in the tree. 12781 */ 12782 static void 12783 go_deeper(trystate_T *stack, int depth, int score_add) 12784 { 12785 stack[depth + 1] = stack[depth]; 12786 stack[depth + 1].ts_state = STATE_START; 12787 stack[depth + 1].ts_score = stack[depth].ts_score + score_add; 12788 stack[depth + 1].ts_curi = 1; /* start just after length byte */ 12789 stack[depth + 1].ts_flags = 0; 12790 } 12791 12792 #ifdef FEAT_MBYTE 12793 /* 12794 * Case-folding may change the number of bytes: Count nr of chars in 12795 * fword[flen] and return the byte length of that many chars in "word". 12796 */ 12797 static int 12798 nofold_len(char_u *fword, int flen, char_u *word) 12799 { 12800 char_u *p; 12801 int i = 0; 12802 12803 for (p = fword; p < fword + flen; mb_ptr_adv(p)) 12804 ++i; 12805 for (p = word; i > 0; mb_ptr_adv(p)) 12806 --i; 12807 return (int)(p - word); 12808 } 12809 #endif 12810 12811 /* 12812 * "fword" is a good word with case folded. Find the matching keep-case 12813 * words and put it in "kword". 12814 * Theoretically there could be several keep-case words that result in the 12815 * same case-folded word, but we only find one... 12816 */ 12817 static void 12818 find_keepcap_word(slang_T *slang, char_u *fword, char_u *kword) 12819 { 12820 char_u uword[MAXWLEN]; /* "fword" in upper-case */ 12821 int depth; 12822 idx_T tryidx; 12823 12824 /* The following arrays are used at each depth in the tree. */ 12825 idx_T arridx[MAXWLEN]; 12826 int round[MAXWLEN]; 12827 int fwordidx[MAXWLEN]; 12828 int uwordidx[MAXWLEN]; 12829 int kwordlen[MAXWLEN]; 12830 12831 int flen, ulen; 12832 int l; 12833 int len; 12834 int c; 12835 idx_T lo, hi, m; 12836 char_u *p; 12837 char_u *byts = slang->sl_kbyts; /* array with bytes of the words */ 12838 idx_T *idxs = slang->sl_kidxs; /* array with indexes */ 12839 12840 if (byts == NULL) 12841 { 12842 /* array is empty: "cannot happen" */ 12843 *kword = NUL; 12844 return; 12845 } 12846 12847 /* Make an all-cap version of "fword". */ 12848 allcap_copy(fword, uword); 12849 12850 /* 12851 * Each character needs to be tried both case-folded and upper-case. 12852 * All this gets very complicated if we keep in mind that changing case 12853 * may change the byte length of a multi-byte character... 12854 */ 12855 depth = 0; 12856 arridx[0] = 0; 12857 round[0] = 0; 12858 fwordidx[0] = 0; 12859 uwordidx[0] = 0; 12860 kwordlen[0] = 0; 12861 while (depth >= 0) 12862 { 12863 if (fword[fwordidx[depth]] == NUL) 12864 { 12865 /* We are at the end of "fword". If the tree allows a word to end 12866 * here we have found a match. */ 12867 if (byts[arridx[depth] + 1] == 0) 12868 { 12869 kword[kwordlen[depth]] = NUL; 12870 return; 12871 } 12872 12873 /* kword is getting too long, continue one level up */ 12874 --depth; 12875 } 12876 else if (++round[depth] > 2) 12877 { 12878 /* tried both fold-case and upper-case character, continue one 12879 * level up */ 12880 --depth; 12881 } 12882 else 12883 { 12884 /* 12885 * round[depth] == 1: Try using the folded-case character. 12886 * round[depth] == 2: Try using the upper-case character. 12887 */ 12888 #ifdef FEAT_MBYTE 12889 if (has_mbyte) 12890 { 12891 flen = mb_cptr2len(fword + fwordidx[depth]); 12892 ulen = mb_cptr2len(uword + uwordidx[depth]); 12893 } 12894 else 12895 #endif 12896 ulen = flen = 1; 12897 if (round[depth] == 1) 12898 { 12899 p = fword + fwordidx[depth]; 12900 l = flen; 12901 } 12902 else 12903 { 12904 p = uword + uwordidx[depth]; 12905 l = ulen; 12906 } 12907 12908 for (tryidx = arridx[depth]; l > 0; --l) 12909 { 12910 /* Perform a binary search in the list of accepted bytes. */ 12911 len = byts[tryidx++]; 12912 c = *p++; 12913 lo = tryidx; 12914 hi = tryidx + len - 1; 12915 while (lo < hi) 12916 { 12917 m = (lo + hi) / 2; 12918 if (byts[m] > c) 12919 hi = m - 1; 12920 else if (byts[m] < c) 12921 lo = m + 1; 12922 else 12923 { 12924 lo = hi = m; 12925 break; 12926 } 12927 } 12928 12929 /* Stop if there is no matching byte. */ 12930 if (hi < lo || byts[lo] != c) 12931 break; 12932 12933 /* Continue at the child (if there is one). */ 12934 tryidx = idxs[lo]; 12935 } 12936 12937 if (l == 0) 12938 { 12939 /* 12940 * Found the matching char. Copy it to "kword" and go a 12941 * level deeper. 12942 */ 12943 if (round[depth] == 1) 12944 { 12945 STRNCPY(kword + kwordlen[depth], fword + fwordidx[depth], 12946 flen); 12947 kwordlen[depth + 1] = kwordlen[depth] + flen; 12948 } 12949 else 12950 { 12951 STRNCPY(kword + kwordlen[depth], uword + uwordidx[depth], 12952 ulen); 12953 kwordlen[depth + 1] = kwordlen[depth] + ulen; 12954 } 12955 fwordidx[depth + 1] = fwordidx[depth] + flen; 12956 uwordidx[depth + 1] = uwordidx[depth] + ulen; 12957 12958 ++depth; 12959 arridx[depth] = tryidx; 12960 round[depth] = 0; 12961 } 12962 } 12963 } 12964 12965 /* Didn't find it: "cannot happen". */ 12966 *kword = NUL; 12967 } 12968 12969 /* 12970 * Compute the sound-a-like score for suggestions in su->su_ga and add them to 12971 * su->su_sga. 12972 */ 12973 static void 12974 score_comp_sal(suginfo_T *su) 12975 { 12976 langp_T *lp; 12977 char_u badsound[MAXWLEN]; 12978 int i; 12979 suggest_T *stp; 12980 suggest_T *sstp; 12981 int score; 12982 int lpi; 12983 12984 if (ga_grow(&su->su_sga, su->su_ga.ga_len) == FAIL) 12985 return; 12986 12987 /* Use the sound-folding of the first language that supports it. */ 12988 for (lpi = 0; lpi < curwin->w_s->b_langp.ga_len; ++lpi) 12989 { 12990 lp = LANGP_ENTRY(curwin->w_s->b_langp, lpi); 12991 if (lp->lp_slang->sl_sal.ga_len > 0) 12992 { 12993 /* soundfold the bad word */ 12994 spell_soundfold(lp->lp_slang, su->su_fbadword, TRUE, badsound); 12995 12996 for (i = 0; i < su->su_ga.ga_len; ++i) 12997 { 12998 stp = &SUG(su->su_ga, i); 12999 13000 /* Case-fold the suggested word, sound-fold it and compute the 13001 * sound-a-like score. */ 13002 score = stp_sal_score(stp, su, lp->lp_slang, badsound); 13003 if (score < SCORE_MAXMAX) 13004 { 13005 /* Add the suggestion. */ 13006 sstp = &SUG(su->su_sga, su->su_sga.ga_len); 13007 sstp->st_word = vim_strsave(stp->st_word); 13008 if (sstp->st_word != NULL) 13009 { 13010 sstp->st_wordlen = stp->st_wordlen; 13011 sstp->st_score = score; 13012 sstp->st_altscore = 0; 13013 sstp->st_orglen = stp->st_orglen; 13014 ++su->su_sga.ga_len; 13015 } 13016 } 13017 } 13018 break; 13019 } 13020 } 13021 } 13022 13023 /* 13024 * Combine the list of suggestions in su->su_ga and su->su_sga. 13025 * They are entwined. 13026 */ 13027 static void 13028 score_combine(suginfo_T *su) 13029 { 13030 int i; 13031 int j; 13032 garray_T ga; 13033 garray_T *gap; 13034 langp_T *lp; 13035 suggest_T *stp; 13036 char_u *p; 13037 char_u badsound[MAXWLEN]; 13038 int round; 13039 int lpi; 13040 slang_T *slang = NULL; 13041 13042 /* Add the alternate score to su_ga. */ 13043 for (lpi = 0; lpi < curwin->w_s->b_langp.ga_len; ++lpi) 13044 { 13045 lp = LANGP_ENTRY(curwin->w_s->b_langp, lpi); 13046 if (lp->lp_slang->sl_sal.ga_len > 0) 13047 { 13048 /* soundfold the bad word */ 13049 slang = lp->lp_slang; 13050 spell_soundfold(slang, su->su_fbadword, TRUE, badsound); 13051 13052 for (i = 0; i < su->su_ga.ga_len; ++i) 13053 { 13054 stp = &SUG(su->su_ga, i); 13055 stp->st_altscore = stp_sal_score(stp, su, slang, badsound); 13056 if (stp->st_altscore == SCORE_MAXMAX) 13057 stp->st_score = (stp->st_score * 3 + SCORE_BIG) / 4; 13058 else 13059 stp->st_score = (stp->st_score * 3 13060 + stp->st_altscore) / 4; 13061 stp->st_salscore = FALSE; 13062 } 13063 break; 13064 } 13065 } 13066 13067 if (slang == NULL) /* Using "double" without sound folding. */ 13068 { 13069 (void)cleanup_suggestions(&su->su_ga, su->su_maxscore, 13070 su->su_maxcount); 13071 return; 13072 } 13073 13074 /* Add the alternate score to su_sga. */ 13075 for (i = 0; i < su->su_sga.ga_len; ++i) 13076 { 13077 stp = &SUG(su->su_sga, i); 13078 stp->st_altscore = spell_edit_score(slang, 13079 su->su_badword, stp->st_word); 13080 if (stp->st_score == SCORE_MAXMAX) 13081 stp->st_score = (SCORE_BIG * 7 + stp->st_altscore) / 8; 13082 else 13083 stp->st_score = (stp->st_score * 7 + stp->st_altscore) / 8; 13084 stp->st_salscore = TRUE; 13085 } 13086 13087 /* Remove bad suggestions, sort the suggestions and truncate at "maxcount" 13088 * for both lists. */ 13089 check_suggestions(su, &su->su_ga); 13090 (void)cleanup_suggestions(&su->su_ga, su->su_maxscore, su->su_maxcount); 13091 check_suggestions(su, &su->su_sga); 13092 (void)cleanup_suggestions(&su->su_sga, su->su_maxscore, su->su_maxcount); 13093 13094 ga_init2(&ga, (int)sizeof(suginfo_T), 1); 13095 if (ga_grow(&ga, su->su_ga.ga_len + su->su_sga.ga_len) == FAIL) 13096 return; 13097 13098 stp = &SUG(ga, 0); 13099 for (i = 0; i < su->su_ga.ga_len || i < su->su_sga.ga_len; ++i) 13100 { 13101 /* round 1: get a suggestion from su_ga 13102 * round 2: get a suggestion from su_sga */ 13103 for (round = 1; round <= 2; ++round) 13104 { 13105 gap = round == 1 ? &su->su_ga : &su->su_sga; 13106 if (i < gap->ga_len) 13107 { 13108 /* Don't add a word if it's already there. */ 13109 p = SUG(*gap, i).st_word; 13110 for (j = 0; j < ga.ga_len; ++j) 13111 if (STRCMP(stp[j].st_word, p) == 0) 13112 break; 13113 if (j == ga.ga_len) 13114 stp[ga.ga_len++] = SUG(*gap, i); 13115 else 13116 vim_free(p); 13117 } 13118 } 13119 } 13120 13121 ga_clear(&su->su_ga); 13122 ga_clear(&su->su_sga); 13123 13124 /* Truncate the list to the number of suggestions that will be displayed. */ 13125 if (ga.ga_len > su->su_maxcount) 13126 { 13127 for (i = su->su_maxcount; i < ga.ga_len; ++i) 13128 vim_free(stp[i].st_word); 13129 ga.ga_len = su->su_maxcount; 13130 } 13131 13132 su->su_ga = ga; 13133 } 13134 13135 /* 13136 * For the goodword in "stp" compute the soundalike score compared to the 13137 * badword. 13138 */ 13139 static int 13140 stp_sal_score( 13141 suggest_T *stp, 13142 suginfo_T *su, 13143 slang_T *slang, 13144 char_u *badsound) /* sound-folded badword */ 13145 { 13146 char_u *p; 13147 char_u *pbad; 13148 char_u *pgood; 13149 char_u badsound2[MAXWLEN]; 13150 char_u fword[MAXWLEN]; 13151 char_u goodsound[MAXWLEN]; 13152 char_u goodword[MAXWLEN]; 13153 int lendiff; 13154 13155 lendiff = (int)(su->su_badlen - stp->st_orglen); 13156 if (lendiff >= 0) 13157 pbad = badsound; 13158 else 13159 { 13160 /* soundfold the bad word with more characters following */ 13161 (void)spell_casefold(su->su_badptr, stp->st_orglen, fword, MAXWLEN); 13162 13163 /* When joining two words the sound often changes a lot. E.g., "t he" 13164 * sounds like "t h" while "the" sounds like "@". Avoid that by 13165 * removing the space. Don't do it when the good word also contains a 13166 * space. */ 13167 if (vim_iswhite(su->su_badptr[su->su_badlen]) 13168 && *skiptowhite(stp->st_word) == NUL) 13169 for (p = fword; *(p = skiptowhite(p)) != NUL; ) 13170 STRMOVE(p, p + 1); 13171 13172 spell_soundfold(slang, fword, TRUE, badsound2); 13173 pbad = badsound2; 13174 } 13175 13176 if (lendiff > 0 && stp->st_wordlen + lendiff < MAXWLEN) 13177 { 13178 /* Add part of the bad word to the good word, so that we soundfold 13179 * what replaces the bad word. */ 13180 STRCPY(goodword, stp->st_word); 13181 vim_strncpy(goodword + stp->st_wordlen, 13182 su->su_badptr + su->su_badlen - lendiff, lendiff); 13183 pgood = goodword; 13184 } 13185 else 13186 pgood = stp->st_word; 13187 13188 /* Sound-fold the word and compute the score for the difference. */ 13189 spell_soundfold(slang, pgood, FALSE, goodsound); 13190 13191 return soundalike_score(goodsound, pbad); 13192 } 13193 13194 /* structure used to store soundfolded words that add_sound_suggest() has 13195 * handled already. */ 13196 typedef struct 13197 { 13198 short sft_score; /* lowest score used */ 13199 char_u sft_word[1]; /* soundfolded word, actually longer */ 13200 } sftword_T; 13201 13202 static sftword_T dumsft; 13203 #define HIKEY2SFT(p) ((sftword_T *)(p - (dumsft.sft_word - (char_u *)&dumsft))) 13204 #define HI2SFT(hi) HIKEY2SFT((hi)->hi_key) 13205 13206 /* 13207 * Prepare for calling suggest_try_soundalike(). 13208 */ 13209 static void 13210 suggest_try_soundalike_prep(void) 13211 { 13212 langp_T *lp; 13213 int lpi; 13214 slang_T *slang; 13215 13216 /* Do this for all languages that support sound folding and for which a 13217 * .sug file has been loaded. */ 13218 for (lpi = 0; lpi < curwin->w_s->b_langp.ga_len; ++lpi) 13219 { 13220 lp = LANGP_ENTRY(curwin->w_s->b_langp, lpi); 13221 slang = lp->lp_slang; 13222 if (slang->sl_sal.ga_len > 0 && slang->sl_sbyts != NULL) 13223 /* prepare the hashtable used by add_sound_suggest() */ 13224 hash_init(&slang->sl_sounddone); 13225 } 13226 } 13227 13228 /* 13229 * Find suggestions by comparing the word in a sound-a-like form. 13230 * Note: This doesn't support postponed prefixes. 13231 */ 13232 static void 13233 suggest_try_soundalike(suginfo_T *su) 13234 { 13235 char_u salword[MAXWLEN]; 13236 langp_T *lp; 13237 int lpi; 13238 slang_T *slang; 13239 13240 /* Do this for all languages that support sound folding and for which a 13241 * .sug file has been loaded. */ 13242 for (lpi = 0; lpi < curwin->w_s->b_langp.ga_len; ++lpi) 13243 { 13244 lp = LANGP_ENTRY(curwin->w_s->b_langp, lpi); 13245 slang = lp->lp_slang; 13246 if (slang->sl_sal.ga_len > 0 && slang->sl_sbyts != NULL) 13247 { 13248 /* soundfold the bad word */ 13249 spell_soundfold(slang, su->su_fbadword, TRUE, salword); 13250 13251 /* try all kinds of inserts/deletes/swaps/etc. */ 13252 /* TODO: also soundfold the next words, so that we can try joining 13253 * and splitting */ 13254 #ifdef SUGGEST_PROFILE 13255 prof_init(); 13256 #endif 13257 suggest_trie_walk(su, lp, salword, TRUE); 13258 #ifdef SUGGEST_PROFILE 13259 prof_report("soundalike"); 13260 #endif 13261 } 13262 } 13263 } 13264 13265 /* 13266 * Finish up after calling suggest_try_soundalike(). 13267 */ 13268 static void 13269 suggest_try_soundalike_finish(void) 13270 { 13271 langp_T *lp; 13272 int lpi; 13273 slang_T *slang; 13274 int todo; 13275 hashitem_T *hi; 13276 13277 /* Do this for all languages that support sound folding and for which a 13278 * .sug file has been loaded. */ 13279 for (lpi = 0; lpi < curwin->w_s->b_langp.ga_len; ++lpi) 13280 { 13281 lp = LANGP_ENTRY(curwin->w_s->b_langp, lpi); 13282 slang = lp->lp_slang; 13283 if (slang->sl_sal.ga_len > 0 && slang->sl_sbyts != NULL) 13284 { 13285 /* Free the info about handled words. */ 13286 todo = (int)slang->sl_sounddone.ht_used; 13287 for (hi = slang->sl_sounddone.ht_array; todo > 0; ++hi) 13288 if (!HASHITEM_EMPTY(hi)) 13289 { 13290 vim_free(HI2SFT(hi)); 13291 --todo; 13292 } 13293 13294 /* Clear the hashtable, it may also be used by another region. */ 13295 hash_clear(&slang->sl_sounddone); 13296 hash_init(&slang->sl_sounddone); 13297 } 13298 } 13299 } 13300 13301 /* 13302 * A match with a soundfolded word is found. Add the good word(s) that 13303 * produce this soundfolded word. 13304 */ 13305 static void 13306 add_sound_suggest( 13307 suginfo_T *su, 13308 char_u *goodword, 13309 int score, /* soundfold score */ 13310 langp_T *lp) 13311 { 13312 slang_T *slang = lp->lp_slang; /* language for sound folding */ 13313 int sfwordnr; 13314 char_u *nrline; 13315 int orgnr; 13316 char_u theword[MAXWLEN]; 13317 int i; 13318 int wlen; 13319 char_u *byts; 13320 idx_T *idxs; 13321 int n; 13322 int wordcount; 13323 int wc; 13324 int goodscore; 13325 hash_T hash; 13326 hashitem_T *hi; 13327 sftword_T *sft; 13328 int bc, gc; 13329 int limit; 13330 13331 /* 13332 * It's very well possible that the same soundfold word is found several 13333 * times with different scores. Since the following is quite slow only do 13334 * the words that have a better score than before. Use a hashtable to 13335 * remember the words that have been done. 13336 */ 13337 hash = hash_hash(goodword); 13338 hi = hash_lookup(&slang->sl_sounddone, goodword, hash); 13339 if (HASHITEM_EMPTY(hi)) 13340 { 13341 sft = (sftword_T *)alloc((unsigned)(sizeof(sftword_T) 13342 + STRLEN(goodword))); 13343 if (sft != NULL) 13344 { 13345 sft->sft_score = score; 13346 STRCPY(sft->sft_word, goodword); 13347 hash_add_item(&slang->sl_sounddone, hi, sft->sft_word, hash); 13348 } 13349 } 13350 else 13351 { 13352 sft = HI2SFT(hi); 13353 if (score >= sft->sft_score) 13354 return; 13355 sft->sft_score = score; 13356 } 13357 13358 /* 13359 * Find the word nr in the soundfold tree. 13360 */ 13361 sfwordnr = soundfold_find(slang, goodword); 13362 if (sfwordnr < 0) 13363 { 13364 EMSG2(_(e_intern2), "add_sound_suggest()"); 13365 return; 13366 } 13367 13368 /* 13369 * go over the list of good words that produce this soundfold word 13370 */ 13371 nrline = ml_get_buf(slang->sl_sugbuf, (linenr_T)(sfwordnr + 1), FALSE); 13372 orgnr = 0; 13373 while (*nrline != NUL) 13374 { 13375 /* The wordnr was stored in a minimal nr of bytes as an offset to the 13376 * previous wordnr. */ 13377 orgnr += bytes2offset(&nrline); 13378 13379 byts = slang->sl_fbyts; 13380 idxs = slang->sl_fidxs; 13381 13382 /* Lookup the word "orgnr" one of the two tries. */ 13383 n = 0; 13384 wordcount = 0; 13385 for (wlen = 0; wlen < MAXWLEN - 3; ++wlen) 13386 { 13387 i = 1; 13388 if (wordcount == orgnr && byts[n + 1] == NUL) 13389 break; /* found end of word */ 13390 13391 if (byts[n + 1] == NUL) 13392 ++wordcount; 13393 13394 /* skip over the NUL bytes */ 13395 for ( ; byts[n + i] == NUL; ++i) 13396 if (i > byts[n]) /* safety check */ 13397 { 13398 STRCPY(theword + wlen, "BAD"); 13399 wlen += 3; 13400 goto badword; 13401 } 13402 13403 /* One of the siblings must have the word. */ 13404 for ( ; i < byts[n]; ++i) 13405 { 13406 wc = idxs[idxs[n + i]]; /* nr of words under this byte */ 13407 if (wordcount + wc > orgnr) 13408 break; 13409 wordcount += wc; 13410 } 13411 13412 theword[wlen] = byts[n + i]; 13413 n = idxs[n + i]; 13414 } 13415 badword: 13416 theword[wlen] = NUL; 13417 13418 /* Go over the possible flags and regions. */ 13419 for (; i <= byts[n] && byts[n + i] == NUL; ++i) 13420 { 13421 char_u cword[MAXWLEN]; 13422 char_u *p; 13423 int flags = (int)idxs[n + i]; 13424 13425 /* Skip words with the NOSUGGEST flag */ 13426 if (flags & WF_NOSUGGEST) 13427 continue; 13428 13429 if (flags & WF_KEEPCAP) 13430 { 13431 /* Must find the word in the keep-case tree. */ 13432 find_keepcap_word(slang, theword, cword); 13433 p = cword; 13434 } 13435 else 13436 { 13437 flags |= su->su_badflags; 13438 if ((flags & WF_CAPMASK) != 0) 13439 { 13440 /* Need to fix case according to "flags". */ 13441 make_case_word(theword, cword, flags); 13442 p = cword; 13443 } 13444 else 13445 p = theword; 13446 } 13447 13448 /* Add the suggestion. */ 13449 if (sps_flags & SPS_DOUBLE) 13450 { 13451 /* Add the suggestion if the score isn't too bad. */ 13452 if (score <= su->su_maxscore) 13453 add_suggestion(su, &su->su_sga, p, su->su_badlen, 13454 score, 0, FALSE, slang, FALSE); 13455 } 13456 else 13457 { 13458 /* Add a penalty for words in another region. */ 13459 if ((flags & WF_REGION) 13460 && (((unsigned)flags >> 16) & lp->lp_region) == 0) 13461 goodscore = SCORE_REGION; 13462 else 13463 goodscore = 0; 13464 13465 /* Add a small penalty for changing the first letter from 13466 * lower to upper case. Helps for "tath" -> "Kath", which is 13467 * less common than "tath" -> "path". Don't do it when the 13468 * letter is the same, that has already been counted. */ 13469 gc = PTR2CHAR(p); 13470 if (SPELL_ISUPPER(gc)) 13471 { 13472 bc = PTR2CHAR(su->su_badword); 13473 if (!SPELL_ISUPPER(bc) 13474 && SPELL_TOFOLD(bc) != SPELL_TOFOLD(gc)) 13475 goodscore += SCORE_ICASE / 2; 13476 } 13477 13478 /* Compute the score for the good word. This only does letter 13479 * insert/delete/swap/replace. REP items are not considered, 13480 * which may make the score a bit higher. 13481 * Use a limit for the score to make it work faster. Use 13482 * MAXSCORE(), because RESCORE() will change the score. 13483 * If the limit is very high then the iterative method is 13484 * inefficient, using an array is quicker. */ 13485 limit = MAXSCORE(su->su_sfmaxscore - goodscore, score); 13486 if (limit > SCORE_LIMITMAX) 13487 goodscore += spell_edit_score(slang, su->su_badword, p); 13488 else 13489 goodscore += spell_edit_score_limit(slang, su->su_badword, 13490 p, limit); 13491 13492 /* When going over the limit don't bother to do the rest. */ 13493 if (goodscore < SCORE_MAXMAX) 13494 { 13495 /* Give a bonus to words seen before. */ 13496 goodscore = score_wordcount_adj(slang, goodscore, p, FALSE); 13497 13498 /* Add the suggestion if the score isn't too bad. */ 13499 goodscore = RESCORE(goodscore, score); 13500 if (goodscore <= su->su_sfmaxscore) 13501 add_suggestion(su, &su->su_ga, p, su->su_badlen, 13502 goodscore, score, TRUE, slang, TRUE); 13503 } 13504 } 13505 } 13506 /* smsg("word %s (%d): %s (%d)", sftword, sftnr, theword, orgnr); */ 13507 } 13508 } 13509 13510 /* 13511 * Find word "word" in fold-case tree for "slang" and return the word number. 13512 */ 13513 static int 13514 soundfold_find(slang_T *slang, char_u *word) 13515 { 13516 idx_T arridx = 0; 13517 int len; 13518 int wlen = 0; 13519 int c; 13520 char_u *ptr = word; 13521 char_u *byts; 13522 idx_T *idxs; 13523 int wordnr = 0; 13524 13525 byts = slang->sl_sbyts; 13526 idxs = slang->sl_sidxs; 13527 13528 for (;;) 13529 { 13530 /* First byte is the number of possible bytes. */ 13531 len = byts[arridx++]; 13532 13533 /* If the first possible byte is a zero the word could end here. 13534 * If the word ends we found the word. If not skip the NUL bytes. */ 13535 c = ptr[wlen]; 13536 if (byts[arridx] == NUL) 13537 { 13538 if (c == NUL) 13539 break; 13540 13541 /* Skip over the zeros, there can be several. */ 13542 while (len > 0 && byts[arridx] == NUL) 13543 { 13544 ++arridx; 13545 --len; 13546 } 13547 if (len == 0) 13548 return -1; /* no children, word should have ended here */ 13549 ++wordnr; 13550 } 13551 13552 /* If the word ends we didn't find it. */ 13553 if (c == NUL) 13554 return -1; 13555 13556 /* Perform a binary search in the list of accepted bytes. */ 13557 if (c == TAB) /* <Tab> is handled like <Space> */ 13558 c = ' '; 13559 while (byts[arridx] < c) 13560 { 13561 /* The word count is in the first idxs[] entry of the child. */ 13562 wordnr += idxs[idxs[arridx]]; 13563 ++arridx; 13564 if (--len == 0) /* end of the bytes, didn't find it */ 13565 return -1; 13566 } 13567 if (byts[arridx] != c) /* didn't find the byte */ 13568 return -1; 13569 13570 /* Continue at the child (if there is one). */ 13571 arridx = idxs[arridx]; 13572 ++wlen; 13573 13574 /* One space in the good word may stand for several spaces in the 13575 * checked word. */ 13576 if (c == ' ') 13577 while (ptr[wlen] == ' ' || ptr[wlen] == TAB) 13578 ++wlen; 13579 } 13580 13581 return wordnr; 13582 } 13583 13584 /* 13585 * Copy "fword" to "cword", fixing case according to "flags". 13586 */ 13587 static void 13588 make_case_word(char_u *fword, char_u *cword, int flags) 13589 { 13590 if (flags & WF_ALLCAP) 13591 /* Make it all upper-case */ 13592 allcap_copy(fword, cword); 13593 else if (flags & WF_ONECAP) 13594 /* Make the first letter upper-case */ 13595 onecap_copy(fword, cword, TRUE); 13596 else 13597 /* Use goodword as-is. */ 13598 STRCPY(cword, fword); 13599 } 13600 13601 /* 13602 * Use map string "map" for languages "lp". 13603 */ 13604 static void 13605 set_map_str(slang_T *lp, char_u *map) 13606 { 13607 char_u *p; 13608 int headc = 0; 13609 int c; 13610 int i; 13611 13612 if (*map == NUL) 13613 { 13614 lp->sl_has_map = FALSE; 13615 return; 13616 } 13617 lp->sl_has_map = TRUE; 13618 13619 /* Init the array and hash tables empty. */ 13620 for (i = 0; i < 256; ++i) 13621 lp->sl_map_array[i] = 0; 13622 #ifdef FEAT_MBYTE 13623 hash_init(&lp->sl_map_hash); 13624 #endif 13625 13626 /* 13627 * The similar characters are stored separated with slashes: 13628 * "aaa/bbb/ccc/". Fill sl_map_array[c] with the character before c and 13629 * before the same slash. For characters above 255 sl_map_hash is used. 13630 */ 13631 for (p = map; *p != NUL; ) 13632 { 13633 #ifdef FEAT_MBYTE 13634 c = mb_cptr2char_adv(&p); 13635 #else 13636 c = *p++; 13637 #endif 13638 if (c == '/') 13639 headc = 0; 13640 else 13641 { 13642 if (headc == 0) 13643 headc = c; 13644 13645 #ifdef FEAT_MBYTE 13646 /* Characters above 255 don't fit in sl_map_array[], put them in 13647 * the hash table. Each entry is the char, a NUL the headchar and 13648 * a NUL. */ 13649 if (c >= 256) 13650 { 13651 int cl = mb_char2len(c); 13652 int headcl = mb_char2len(headc); 13653 char_u *b; 13654 hash_T hash; 13655 hashitem_T *hi; 13656 13657 b = alloc((unsigned)(cl + headcl + 2)); 13658 if (b == NULL) 13659 return; 13660 mb_char2bytes(c, b); 13661 b[cl] = NUL; 13662 mb_char2bytes(headc, b + cl + 1); 13663 b[cl + 1 + headcl] = NUL; 13664 hash = hash_hash(b); 13665 hi = hash_lookup(&lp->sl_map_hash, b, hash); 13666 if (HASHITEM_EMPTY(hi)) 13667 hash_add_item(&lp->sl_map_hash, hi, b, hash); 13668 else 13669 { 13670 /* This should have been checked when generating the .spl 13671 * file. */ 13672 EMSG(_("E783: duplicate char in MAP entry")); 13673 vim_free(b); 13674 } 13675 } 13676 else 13677 #endif 13678 lp->sl_map_array[c] = headc; 13679 } 13680 } 13681 } 13682 13683 /* 13684 * Return TRUE if "c1" and "c2" are similar characters according to the MAP 13685 * lines in the .aff file. 13686 */ 13687 static int 13688 similar_chars(slang_T *slang, int c1, int c2) 13689 { 13690 int m1, m2; 13691 #ifdef FEAT_MBYTE 13692 char_u buf[MB_MAXBYTES + 1]; 13693 hashitem_T *hi; 13694 13695 if (c1 >= 256) 13696 { 13697 buf[mb_char2bytes(c1, buf)] = 0; 13698 hi = hash_find(&slang->sl_map_hash, buf); 13699 if (HASHITEM_EMPTY(hi)) 13700 m1 = 0; 13701 else 13702 m1 = mb_ptr2char(hi->hi_key + STRLEN(hi->hi_key) + 1); 13703 } 13704 else 13705 #endif 13706 m1 = slang->sl_map_array[c1]; 13707 if (m1 == 0) 13708 return FALSE; 13709 13710 13711 #ifdef FEAT_MBYTE 13712 if (c2 >= 256) 13713 { 13714 buf[mb_char2bytes(c2, buf)] = 0; 13715 hi = hash_find(&slang->sl_map_hash, buf); 13716 if (HASHITEM_EMPTY(hi)) 13717 m2 = 0; 13718 else 13719 m2 = mb_ptr2char(hi->hi_key + STRLEN(hi->hi_key) + 1); 13720 } 13721 else 13722 #endif 13723 m2 = slang->sl_map_array[c2]; 13724 13725 return m1 == m2; 13726 } 13727 13728 /* 13729 * Add a suggestion to the list of suggestions. 13730 * For a suggestion that is already in the list the lowest score is remembered. 13731 */ 13732 static void 13733 add_suggestion( 13734 suginfo_T *su, 13735 garray_T *gap, /* either su_ga or su_sga */ 13736 char_u *goodword, 13737 int badlenarg, /* len of bad word replaced with "goodword" */ 13738 int score, 13739 int altscore, 13740 int had_bonus, /* value for st_had_bonus */ 13741 slang_T *slang, /* language for sound folding */ 13742 int maxsf) /* su_maxscore applies to soundfold score, 13743 su_sfmaxscore to the total score. */ 13744 { 13745 int goodlen; /* len of goodword changed */ 13746 int badlen; /* len of bad word changed */ 13747 suggest_T *stp; 13748 suggest_T new_sug; 13749 int i; 13750 char_u *pgood, *pbad; 13751 13752 /* Minimize "badlen" for consistency. Avoids that changing "the the" to 13753 * "thee the" is added next to changing the first "the" the "thee". */ 13754 pgood = goodword + STRLEN(goodword); 13755 pbad = su->su_badptr + badlenarg; 13756 for (;;) 13757 { 13758 goodlen = (int)(pgood - goodword); 13759 badlen = (int)(pbad - su->su_badptr); 13760 if (goodlen <= 0 || badlen <= 0) 13761 break; 13762 mb_ptr_back(goodword, pgood); 13763 mb_ptr_back(su->su_badptr, pbad); 13764 #ifdef FEAT_MBYTE 13765 if (has_mbyte) 13766 { 13767 if (mb_ptr2char(pgood) != mb_ptr2char(pbad)) 13768 break; 13769 } 13770 else 13771 #endif 13772 if (*pgood != *pbad) 13773 break; 13774 } 13775 13776 if (badlen == 0 && goodlen == 0) 13777 /* goodword doesn't change anything; may happen for "the the" changing 13778 * the first "the" to itself. */ 13779 return; 13780 13781 if (gap->ga_len == 0) 13782 i = -1; 13783 else 13784 { 13785 /* Check if the word is already there. Also check the length that is 13786 * being replaced "thes," -> "these" is a different suggestion from 13787 * "thes" -> "these". */ 13788 stp = &SUG(*gap, 0); 13789 for (i = gap->ga_len; --i >= 0; ++stp) 13790 if (stp->st_wordlen == goodlen 13791 && stp->st_orglen == badlen 13792 && STRNCMP(stp->st_word, goodword, goodlen) == 0) 13793 { 13794 /* 13795 * Found it. Remember the word with the lowest score. 13796 */ 13797 if (stp->st_slang == NULL) 13798 stp->st_slang = slang; 13799 13800 new_sug.st_score = score; 13801 new_sug.st_altscore = altscore; 13802 new_sug.st_had_bonus = had_bonus; 13803 13804 if (stp->st_had_bonus != had_bonus) 13805 { 13806 /* Only one of the two had the soundalike score computed. 13807 * Need to do that for the other one now, otherwise the 13808 * scores can't be compared. This happens because 13809 * suggest_try_change() doesn't compute the soundalike 13810 * word to keep it fast, while some special methods set 13811 * the soundalike score to zero. */ 13812 if (had_bonus) 13813 rescore_one(su, stp); 13814 else 13815 { 13816 new_sug.st_word = stp->st_word; 13817 new_sug.st_wordlen = stp->st_wordlen; 13818 new_sug.st_slang = stp->st_slang; 13819 new_sug.st_orglen = badlen; 13820 rescore_one(su, &new_sug); 13821 } 13822 } 13823 13824 if (stp->st_score > new_sug.st_score) 13825 { 13826 stp->st_score = new_sug.st_score; 13827 stp->st_altscore = new_sug.st_altscore; 13828 stp->st_had_bonus = new_sug.st_had_bonus; 13829 } 13830 break; 13831 } 13832 } 13833 13834 if (i < 0 && ga_grow(gap, 1) == OK) 13835 { 13836 /* Add a suggestion. */ 13837 stp = &SUG(*gap, gap->ga_len); 13838 stp->st_word = vim_strnsave(goodword, goodlen); 13839 if (stp->st_word != NULL) 13840 { 13841 stp->st_wordlen = goodlen; 13842 stp->st_score = score; 13843 stp->st_altscore = altscore; 13844 stp->st_had_bonus = had_bonus; 13845 stp->st_orglen = badlen; 13846 stp->st_slang = slang; 13847 ++gap->ga_len; 13848 13849 /* If we have too many suggestions now, sort the list and keep 13850 * the best suggestions. */ 13851 if (gap->ga_len > SUG_MAX_COUNT(su)) 13852 { 13853 if (maxsf) 13854 su->su_sfmaxscore = cleanup_suggestions(gap, 13855 su->su_sfmaxscore, SUG_CLEAN_COUNT(su)); 13856 else 13857 su->su_maxscore = cleanup_suggestions(gap, 13858 su->su_maxscore, SUG_CLEAN_COUNT(su)); 13859 } 13860 } 13861 } 13862 } 13863 13864 /* 13865 * Suggestions may in fact be flagged as errors. Esp. for banned words and 13866 * for split words, such as "the the". Remove these from the list here. 13867 */ 13868 static void 13869 check_suggestions( 13870 suginfo_T *su, 13871 garray_T *gap) /* either su_ga or su_sga */ 13872 { 13873 suggest_T *stp; 13874 int i; 13875 char_u longword[MAXWLEN + 1]; 13876 int len; 13877 hlf_T attr; 13878 13879 stp = &SUG(*gap, 0); 13880 for (i = gap->ga_len - 1; i >= 0; --i) 13881 { 13882 /* Need to append what follows to check for "the the". */ 13883 vim_strncpy(longword, stp[i].st_word, MAXWLEN); 13884 len = stp[i].st_wordlen; 13885 vim_strncpy(longword + len, su->su_badptr + stp[i].st_orglen, 13886 MAXWLEN - len); 13887 attr = HLF_COUNT; 13888 (void)spell_check(curwin, longword, &attr, NULL, FALSE); 13889 if (attr != HLF_COUNT) 13890 { 13891 /* Remove this entry. */ 13892 vim_free(stp[i].st_word); 13893 --gap->ga_len; 13894 if (i < gap->ga_len) 13895 mch_memmove(stp + i, stp + i + 1, 13896 sizeof(suggest_T) * (gap->ga_len - i)); 13897 } 13898 } 13899 } 13900 13901 13902 /* 13903 * Add a word to be banned. 13904 */ 13905 static void 13906 add_banned( 13907 suginfo_T *su, 13908 char_u *word) 13909 { 13910 char_u *s; 13911 hash_T hash; 13912 hashitem_T *hi; 13913 13914 hash = hash_hash(word); 13915 hi = hash_lookup(&su->su_banned, word, hash); 13916 if (HASHITEM_EMPTY(hi)) 13917 { 13918 s = vim_strsave(word); 13919 if (s != NULL) 13920 hash_add_item(&su->su_banned, hi, s, hash); 13921 } 13922 } 13923 13924 /* 13925 * Recompute the score for all suggestions if sound-folding is possible. This 13926 * is slow, thus only done for the final results. 13927 */ 13928 static void 13929 rescore_suggestions(suginfo_T *su) 13930 { 13931 int i; 13932 13933 if (su->su_sallang != NULL) 13934 for (i = 0; i < su->su_ga.ga_len; ++i) 13935 rescore_one(su, &SUG(su->su_ga, i)); 13936 } 13937 13938 /* 13939 * Recompute the score for one suggestion if sound-folding is possible. 13940 */ 13941 static void 13942 rescore_one(suginfo_T *su, suggest_T *stp) 13943 { 13944 slang_T *slang = stp->st_slang; 13945 char_u sal_badword[MAXWLEN]; 13946 char_u *p; 13947 13948 /* Only rescore suggestions that have no sal score yet and do have a 13949 * language. */ 13950 if (slang != NULL && slang->sl_sal.ga_len > 0 && !stp->st_had_bonus) 13951 { 13952 if (slang == su->su_sallang) 13953 p = su->su_sal_badword; 13954 else 13955 { 13956 spell_soundfold(slang, su->su_fbadword, TRUE, sal_badword); 13957 p = sal_badword; 13958 } 13959 13960 stp->st_altscore = stp_sal_score(stp, su, slang, p); 13961 if (stp->st_altscore == SCORE_MAXMAX) 13962 stp->st_altscore = SCORE_BIG; 13963 stp->st_score = RESCORE(stp->st_score, stp->st_altscore); 13964 stp->st_had_bonus = TRUE; 13965 } 13966 } 13967 13968 static int 13969 #ifdef __BORLANDC__ 13970 _RTLENTRYF 13971 #endif 13972 sug_compare(const void *s1, const void *s2); 13973 13974 /* 13975 * Function given to qsort() to sort the suggestions on st_score. 13976 * First on "st_score", then "st_altscore" then alphabetically. 13977 */ 13978 static int 13979 #ifdef __BORLANDC__ 13980 _RTLENTRYF 13981 #endif 13982 sug_compare(const void *s1, const void *s2) 13983 { 13984 suggest_T *p1 = (suggest_T *)s1; 13985 suggest_T *p2 = (suggest_T *)s2; 13986 int n = p1->st_score - p2->st_score; 13987 13988 if (n == 0) 13989 { 13990 n = p1->st_altscore - p2->st_altscore; 13991 if (n == 0) 13992 n = STRICMP(p1->st_word, p2->st_word); 13993 } 13994 return n; 13995 } 13996 13997 /* 13998 * Cleanup the suggestions: 13999 * - Sort on score. 14000 * - Remove words that won't be displayed. 14001 * Returns the maximum score in the list or "maxscore" unmodified. 14002 */ 14003 static int 14004 cleanup_suggestions( 14005 garray_T *gap, 14006 int maxscore, 14007 int keep) /* nr of suggestions to keep */ 14008 { 14009 suggest_T *stp = &SUG(*gap, 0); 14010 int i; 14011 14012 /* Sort the list. */ 14013 qsort(gap->ga_data, (size_t)gap->ga_len, sizeof(suggest_T), sug_compare); 14014 14015 /* Truncate the list to the number of suggestions that will be displayed. */ 14016 if (gap->ga_len > keep) 14017 { 14018 for (i = keep; i < gap->ga_len; ++i) 14019 vim_free(stp[i].st_word); 14020 gap->ga_len = keep; 14021 return stp[keep - 1].st_score; 14022 } 14023 return maxscore; 14024 } 14025 14026 #if defined(FEAT_EVAL) || defined(PROTO) 14027 /* 14028 * Soundfold a string, for soundfold(). 14029 * Result is in allocated memory, NULL for an error. 14030 */ 14031 char_u * 14032 eval_soundfold(char_u *word) 14033 { 14034 langp_T *lp; 14035 char_u sound[MAXWLEN]; 14036 int lpi; 14037 14038 if (curwin->w_p_spell && *curwin->w_s->b_p_spl != NUL) 14039 /* Use the sound-folding of the first language that supports it. */ 14040 for (lpi = 0; lpi < curwin->w_s->b_langp.ga_len; ++lpi) 14041 { 14042 lp = LANGP_ENTRY(curwin->w_s->b_langp, lpi); 14043 if (lp->lp_slang->sl_sal.ga_len > 0) 14044 { 14045 /* soundfold the word */ 14046 spell_soundfold(lp->lp_slang, word, FALSE, sound); 14047 return vim_strsave(sound); 14048 } 14049 } 14050 14051 /* No language with sound folding, return word as-is. */ 14052 return vim_strsave(word); 14053 } 14054 #endif 14055 14056 /* 14057 * Turn "inword" into its sound-a-like equivalent in "res[MAXWLEN]". 14058 * 14059 * There are many ways to turn a word into a sound-a-like representation. The 14060 * oldest is Soundex (1918!). A nice overview can be found in "Approximate 14061 * swedish name matching - survey and test of different algorithms" by Klas 14062 * Erikson. 14063 * 14064 * We support two methods: 14065 * 1. SOFOFROM/SOFOTO do a simple character mapping. 14066 * 2. SAL items define a more advanced sound-folding (and much slower). 14067 */ 14068 static void 14069 spell_soundfold( 14070 slang_T *slang, 14071 char_u *inword, 14072 int folded, /* "inword" is already case-folded */ 14073 char_u *res) 14074 { 14075 char_u fword[MAXWLEN]; 14076 char_u *word; 14077 14078 if (slang->sl_sofo) 14079 /* SOFOFROM and SOFOTO used */ 14080 spell_soundfold_sofo(slang, inword, res); 14081 else 14082 { 14083 /* SAL items used. Requires the word to be case-folded. */ 14084 if (folded) 14085 word = inword; 14086 else 14087 { 14088 (void)spell_casefold(inword, (int)STRLEN(inword), fword, MAXWLEN); 14089 word = fword; 14090 } 14091 14092 #ifdef FEAT_MBYTE 14093 if (has_mbyte) 14094 spell_soundfold_wsal(slang, word, res); 14095 else 14096 #endif 14097 spell_soundfold_sal(slang, word, res); 14098 } 14099 } 14100 14101 /* 14102 * Perform sound folding of "inword" into "res" according to SOFOFROM and 14103 * SOFOTO lines. 14104 */ 14105 static void 14106 spell_soundfold_sofo(slang_T *slang, char_u *inword, char_u *res) 14107 { 14108 char_u *s; 14109 int ri = 0; 14110 int c; 14111 14112 #ifdef FEAT_MBYTE 14113 if (has_mbyte) 14114 { 14115 int prevc = 0; 14116 int *ip; 14117 14118 /* The sl_sal_first[] table contains the translation for chars up to 14119 * 255, sl_sal the rest. */ 14120 for (s = inword; *s != NUL; ) 14121 { 14122 c = mb_cptr2char_adv(&s); 14123 if (enc_utf8 ? utf_class(c) == 0 : vim_iswhite(c)) 14124 c = ' '; 14125 else if (c < 256) 14126 c = slang->sl_sal_first[c]; 14127 else 14128 { 14129 ip = ((int **)slang->sl_sal.ga_data)[c & 0xff]; 14130 if (ip == NULL) /* empty list, can't match */ 14131 c = NUL; 14132 else 14133 for (;;) /* find "c" in the list */ 14134 { 14135 if (*ip == 0) /* not found */ 14136 { 14137 c = NUL; 14138 break; 14139 } 14140 if (*ip == c) /* match! */ 14141 { 14142 c = ip[1]; 14143 break; 14144 } 14145 ip += 2; 14146 } 14147 } 14148 14149 if (c != NUL && c != prevc) 14150 { 14151 ri += mb_char2bytes(c, res + ri); 14152 if (ri + MB_MAXBYTES > MAXWLEN) 14153 break; 14154 prevc = c; 14155 } 14156 } 14157 } 14158 else 14159 #endif 14160 { 14161 /* The sl_sal_first[] table contains the translation. */ 14162 for (s = inword; (c = *s) != NUL; ++s) 14163 { 14164 if (vim_iswhite(c)) 14165 c = ' '; 14166 else 14167 c = slang->sl_sal_first[c]; 14168 if (c != NUL && (ri == 0 || res[ri - 1] != c)) 14169 res[ri++] = c; 14170 } 14171 } 14172 14173 res[ri] = NUL; 14174 } 14175 14176 static void 14177 spell_soundfold_sal(slang_T *slang, char_u *inword, char_u *res) 14178 { 14179 salitem_T *smp; 14180 char_u word[MAXWLEN]; 14181 char_u *s = inword; 14182 char_u *t; 14183 char_u *pf; 14184 int i, j, z; 14185 int reslen; 14186 int n, k = 0; 14187 int z0; 14188 int k0; 14189 int n0; 14190 int c; 14191 int pri; 14192 int p0 = -333; 14193 int c0; 14194 14195 /* Remove accents, if wanted. We actually remove all non-word characters. 14196 * But keep white space. We need a copy, the word may be changed here. */ 14197 if (slang->sl_rem_accents) 14198 { 14199 t = word; 14200 while (*s != NUL) 14201 { 14202 if (vim_iswhite(*s)) 14203 { 14204 *t++ = ' '; 14205 s = skipwhite(s); 14206 } 14207 else 14208 { 14209 if (spell_iswordp_nmw(s, curwin)) 14210 *t++ = *s; 14211 ++s; 14212 } 14213 } 14214 *t = NUL; 14215 } 14216 else 14217 vim_strncpy(word, s, MAXWLEN - 1); 14218 14219 smp = (salitem_T *)slang->sl_sal.ga_data; 14220 14221 /* 14222 * This comes from Aspell phonet.cpp. Converted from C++ to C. 14223 * Changed to keep spaces. 14224 */ 14225 i = reslen = z = 0; 14226 while ((c = word[i]) != NUL) 14227 { 14228 /* Start with the first rule that has the character in the word. */ 14229 n = slang->sl_sal_first[c]; 14230 z0 = 0; 14231 14232 if (n >= 0) 14233 { 14234 /* check all rules for the same letter */ 14235 for (; (s = smp[n].sm_lead)[0] == c; ++n) 14236 { 14237 /* Quickly skip entries that don't match the word. Most 14238 * entries are less then three chars, optimize for that. */ 14239 k = smp[n].sm_leadlen; 14240 if (k > 1) 14241 { 14242 if (word[i + 1] != s[1]) 14243 continue; 14244 if (k > 2) 14245 { 14246 for (j = 2; j < k; ++j) 14247 if (word[i + j] != s[j]) 14248 break; 14249 if (j < k) 14250 continue; 14251 } 14252 } 14253 14254 if ((pf = smp[n].sm_oneof) != NULL) 14255 { 14256 /* Check for match with one of the chars in "sm_oneof". */ 14257 while (*pf != NUL && *pf != word[i + k]) 14258 ++pf; 14259 if (*pf == NUL) 14260 continue; 14261 ++k; 14262 } 14263 s = smp[n].sm_rules; 14264 pri = 5; /* default priority */ 14265 14266 p0 = *s; 14267 k0 = k; 14268 while (*s == '-' && k > 1) 14269 { 14270 k--; 14271 s++; 14272 } 14273 if (*s == '<') 14274 s++; 14275 if (VIM_ISDIGIT(*s)) 14276 { 14277 /* determine priority */ 14278 pri = *s - '0'; 14279 s++; 14280 } 14281 if (*s == '^' && *(s + 1) == '^') 14282 s++; 14283 14284 if (*s == NUL 14285 || (*s == '^' 14286 && (i == 0 || !(word[i - 1] == ' ' 14287 || spell_iswordp(word + i - 1, curwin))) 14288 && (*(s + 1) != '$' 14289 || (!spell_iswordp(word + i + k0, curwin)))) 14290 || (*s == '$' && i > 0 14291 && spell_iswordp(word + i - 1, curwin) 14292 && (!spell_iswordp(word + i + k0, curwin)))) 14293 { 14294 /* search for followup rules, if: */ 14295 /* followup and k > 1 and NO '-' in searchstring */ 14296 c0 = word[i + k - 1]; 14297 n0 = slang->sl_sal_first[c0]; 14298 14299 if (slang->sl_followup && k > 1 && n0 >= 0 14300 && p0 != '-' && word[i + k] != NUL) 14301 { 14302 /* test follow-up rule for "word[i + k]" */ 14303 for ( ; (s = smp[n0].sm_lead)[0] == c0; ++n0) 14304 { 14305 /* Quickly skip entries that don't match the word. 14306 * */ 14307 k0 = smp[n0].sm_leadlen; 14308 if (k0 > 1) 14309 { 14310 if (word[i + k] != s[1]) 14311 continue; 14312 if (k0 > 2) 14313 { 14314 pf = word + i + k + 1; 14315 for (j = 2; j < k0; ++j) 14316 if (*pf++ != s[j]) 14317 break; 14318 if (j < k0) 14319 continue; 14320 } 14321 } 14322 k0 += k - 1; 14323 14324 if ((pf = smp[n0].sm_oneof) != NULL) 14325 { 14326 /* Check for match with one of the chars in 14327 * "sm_oneof". */ 14328 while (*pf != NUL && *pf != word[i + k0]) 14329 ++pf; 14330 if (*pf == NUL) 14331 continue; 14332 ++k0; 14333 } 14334 14335 p0 = 5; 14336 s = smp[n0].sm_rules; 14337 while (*s == '-') 14338 { 14339 /* "k0" gets NOT reduced because 14340 * "if (k0 == k)" */ 14341 s++; 14342 } 14343 if (*s == '<') 14344 s++; 14345 if (VIM_ISDIGIT(*s)) 14346 { 14347 p0 = *s - '0'; 14348 s++; 14349 } 14350 14351 if (*s == NUL 14352 /* *s == '^' cuts */ 14353 || (*s == '$' 14354 && !spell_iswordp(word + i + k0, 14355 curwin))) 14356 { 14357 if (k0 == k) 14358 /* this is just a piece of the string */ 14359 continue; 14360 14361 if (p0 < pri) 14362 /* priority too low */ 14363 continue; 14364 /* rule fits; stop search */ 14365 break; 14366 } 14367 } 14368 14369 if (p0 >= pri && smp[n0].sm_lead[0] == c0) 14370 continue; 14371 } 14372 14373 /* replace string */ 14374 s = smp[n].sm_to; 14375 if (s == NULL) 14376 s = (char_u *)""; 14377 pf = smp[n].sm_rules; 14378 p0 = (vim_strchr(pf, '<') != NULL) ? 1 : 0; 14379 if (p0 == 1 && z == 0) 14380 { 14381 /* rule with '<' is used */ 14382 if (reslen > 0 && *s != NUL && (res[reslen - 1] == c 14383 || res[reslen - 1] == *s)) 14384 reslen--; 14385 z0 = 1; 14386 z = 1; 14387 k0 = 0; 14388 while (*s != NUL && word[i + k0] != NUL) 14389 { 14390 word[i + k0] = *s; 14391 k0++; 14392 s++; 14393 } 14394 if (k > k0) 14395 STRMOVE(word + i + k0, word + i + k); 14396 14397 /* new "actual letter" */ 14398 c = word[i]; 14399 } 14400 else 14401 { 14402 /* no '<' rule used */ 14403 i += k - 1; 14404 z = 0; 14405 while (*s != NUL && s[1] != NUL && reslen < MAXWLEN) 14406 { 14407 if (reslen == 0 || res[reslen - 1] != *s) 14408 res[reslen++] = *s; 14409 s++; 14410 } 14411 /* new "actual letter" */ 14412 c = *s; 14413 if (strstr((char *)pf, "^^") != NULL) 14414 { 14415 if (c != NUL) 14416 res[reslen++] = c; 14417 STRMOVE(word, word + i + 1); 14418 i = 0; 14419 z0 = 1; 14420 } 14421 } 14422 break; 14423 } 14424 } 14425 } 14426 else if (vim_iswhite(c)) 14427 { 14428 c = ' '; 14429 k = 1; 14430 } 14431 14432 if (z0 == 0) 14433 { 14434 if (k && !p0 && reslen < MAXWLEN && c != NUL 14435 && (!slang->sl_collapse || reslen == 0 14436 || res[reslen - 1] != c)) 14437 /* condense only double letters */ 14438 res[reslen++] = c; 14439 14440 i++; 14441 z = 0; 14442 k = 0; 14443 } 14444 } 14445 14446 res[reslen] = NUL; 14447 } 14448 14449 #ifdef FEAT_MBYTE 14450 /* 14451 * Turn "inword" into its sound-a-like equivalent in "res[MAXWLEN]". 14452 * Multi-byte version of spell_soundfold(). 14453 */ 14454 static void 14455 spell_soundfold_wsal(slang_T *slang, char_u *inword, char_u *res) 14456 { 14457 salitem_T *smp = (salitem_T *)slang->sl_sal.ga_data; 14458 int word[MAXWLEN]; 14459 int wres[MAXWLEN]; 14460 int l; 14461 char_u *s; 14462 int *ws; 14463 char_u *t; 14464 int *pf; 14465 int i, j, z; 14466 int reslen; 14467 int n, k = 0; 14468 int z0; 14469 int k0; 14470 int n0; 14471 int c; 14472 int pri; 14473 int p0 = -333; 14474 int c0; 14475 int did_white = FALSE; 14476 int wordlen; 14477 14478 14479 /* 14480 * Convert the multi-byte string to a wide-character string. 14481 * Remove accents, if wanted. We actually remove all non-word characters. 14482 * But keep white space. 14483 */ 14484 wordlen = 0; 14485 for (s = inword; *s != NUL; ) 14486 { 14487 t = s; 14488 c = mb_cptr2char_adv(&s); 14489 if (slang->sl_rem_accents) 14490 { 14491 if (enc_utf8 ? utf_class(c) == 0 : vim_iswhite(c)) 14492 { 14493 if (did_white) 14494 continue; 14495 c = ' '; 14496 did_white = TRUE; 14497 } 14498 else 14499 { 14500 did_white = FALSE; 14501 if (!spell_iswordp_nmw(t, curwin)) 14502 continue; 14503 } 14504 } 14505 word[wordlen++] = c; 14506 } 14507 word[wordlen] = NUL; 14508 14509 /* 14510 * This algorithm comes from Aspell phonet.cpp. 14511 * Converted from C++ to C. Added support for multi-byte chars. 14512 * Changed to keep spaces. 14513 */ 14514 i = reslen = z = 0; 14515 while ((c = word[i]) != NUL) 14516 { 14517 /* Start with the first rule that has the character in the word. */ 14518 n = slang->sl_sal_first[c & 0xff]; 14519 z0 = 0; 14520 14521 if (n >= 0) 14522 { 14523 /* Check all rules for the same index byte. 14524 * If c is 0x300 need extra check for the end of the array, as 14525 * (c & 0xff) is NUL. */ 14526 for (; ((ws = smp[n].sm_lead_w)[0] & 0xff) == (c & 0xff) 14527 && ws[0] != NUL; ++n) 14528 { 14529 /* Quickly skip entries that don't match the word. Most 14530 * entries are less then three chars, optimize for that. */ 14531 if (c != ws[0]) 14532 continue; 14533 k = smp[n].sm_leadlen; 14534 if (k > 1) 14535 { 14536 if (word[i + 1] != ws[1]) 14537 continue; 14538 if (k > 2) 14539 { 14540 for (j = 2; j < k; ++j) 14541 if (word[i + j] != ws[j]) 14542 break; 14543 if (j < k) 14544 continue; 14545 } 14546 } 14547 14548 if ((pf = smp[n].sm_oneof_w) != NULL) 14549 { 14550 /* Check for match with one of the chars in "sm_oneof". */ 14551 while (*pf != NUL && *pf != word[i + k]) 14552 ++pf; 14553 if (*pf == NUL) 14554 continue; 14555 ++k; 14556 } 14557 s = smp[n].sm_rules; 14558 pri = 5; /* default priority */ 14559 14560 p0 = *s; 14561 k0 = k; 14562 while (*s == '-' && k > 1) 14563 { 14564 k--; 14565 s++; 14566 } 14567 if (*s == '<') 14568 s++; 14569 if (VIM_ISDIGIT(*s)) 14570 { 14571 /* determine priority */ 14572 pri = *s - '0'; 14573 s++; 14574 } 14575 if (*s == '^' && *(s + 1) == '^') 14576 s++; 14577 14578 if (*s == NUL 14579 || (*s == '^' 14580 && (i == 0 || !(word[i - 1] == ' ' 14581 || spell_iswordp_w(word + i - 1, curwin))) 14582 && (*(s + 1) != '$' 14583 || (!spell_iswordp_w(word + i + k0, curwin)))) 14584 || (*s == '$' && i > 0 14585 && spell_iswordp_w(word + i - 1, curwin) 14586 && (!spell_iswordp_w(word + i + k0, curwin)))) 14587 { 14588 /* search for followup rules, if: */ 14589 /* followup and k > 1 and NO '-' in searchstring */ 14590 c0 = word[i + k - 1]; 14591 n0 = slang->sl_sal_first[c0 & 0xff]; 14592 14593 if (slang->sl_followup && k > 1 && n0 >= 0 14594 && p0 != '-' && word[i + k] != NUL) 14595 { 14596 /* Test follow-up rule for "word[i + k]"; loop over 14597 * all entries with the same index byte. */ 14598 for ( ; ((ws = smp[n0].sm_lead_w)[0] & 0xff) 14599 == (c0 & 0xff); ++n0) 14600 { 14601 /* Quickly skip entries that don't match the word. 14602 */ 14603 if (c0 != ws[0]) 14604 continue; 14605 k0 = smp[n0].sm_leadlen; 14606 if (k0 > 1) 14607 { 14608 if (word[i + k] != ws[1]) 14609 continue; 14610 if (k0 > 2) 14611 { 14612 pf = word + i + k + 1; 14613 for (j = 2; j < k0; ++j) 14614 if (*pf++ != ws[j]) 14615 break; 14616 if (j < k0) 14617 continue; 14618 } 14619 } 14620 k0 += k - 1; 14621 14622 if ((pf = smp[n0].sm_oneof_w) != NULL) 14623 { 14624 /* Check for match with one of the chars in 14625 * "sm_oneof". */ 14626 while (*pf != NUL && *pf != word[i + k0]) 14627 ++pf; 14628 if (*pf == NUL) 14629 continue; 14630 ++k0; 14631 } 14632 14633 p0 = 5; 14634 s = smp[n0].sm_rules; 14635 while (*s == '-') 14636 { 14637 /* "k0" gets NOT reduced because 14638 * "if (k0 == k)" */ 14639 s++; 14640 } 14641 if (*s == '<') 14642 s++; 14643 if (VIM_ISDIGIT(*s)) 14644 { 14645 p0 = *s - '0'; 14646 s++; 14647 } 14648 14649 if (*s == NUL 14650 /* *s == '^' cuts */ 14651 || (*s == '$' 14652 && !spell_iswordp_w(word + i + k0, 14653 curwin))) 14654 { 14655 if (k0 == k) 14656 /* this is just a piece of the string */ 14657 continue; 14658 14659 if (p0 < pri) 14660 /* priority too low */ 14661 continue; 14662 /* rule fits; stop search */ 14663 break; 14664 } 14665 } 14666 14667 if (p0 >= pri && (smp[n0].sm_lead_w[0] & 0xff) 14668 == (c0 & 0xff)) 14669 continue; 14670 } 14671 14672 /* replace string */ 14673 ws = smp[n].sm_to_w; 14674 s = smp[n].sm_rules; 14675 p0 = (vim_strchr(s, '<') != NULL) ? 1 : 0; 14676 if (p0 == 1 && z == 0) 14677 { 14678 /* rule with '<' is used */ 14679 if (reslen > 0 && ws != NULL && *ws != NUL 14680 && (wres[reslen - 1] == c 14681 || wres[reslen - 1] == *ws)) 14682 reslen--; 14683 z0 = 1; 14684 z = 1; 14685 k0 = 0; 14686 if (ws != NULL) 14687 while (*ws != NUL && word[i + k0] != NUL) 14688 { 14689 word[i + k0] = *ws; 14690 k0++; 14691 ws++; 14692 } 14693 if (k > k0) 14694 mch_memmove(word + i + k0, word + i + k, 14695 sizeof(int) * (wordlen - (i + k) + 1)); 14696 14697 /* new "actual letter" */ 14698 c = word[i]; 14699 } 14700 else 14701 { 14702 /* no '<' rule used */ 14703 i += k - 1; 14704 z = 0; 14705 if (ws != NULL) 14706 while (*ws != NUL && ws[1] != NUL 14707 && reslen < MAXWLEN) 14708 { 14709 if (reslen == 0 || wres[reslen - 1] != *ws) 14710 wres[reslen++] = *ws; 14711 ws++; 14712 } 14713 /* new "actual letter" */ 14714 if (ws == NULL) 14715 c = NUL; 14716 else 14717 c = *ws; 14718 if (strstr((char *)s, "^^") != NULL) 14719 { 14720 if (c != NUL) 14721 wres[reslen++] = c; 14722 mch_memmove(word, word + i + 1, 14723 sizeof(int) * (wordlen - (i + 1) + 1)); 14724 i = 0; 14725 z0 = 1; 14726 } 14727 } 14728 break; 14729 } 14730 } 14731 } 14732 else if (vim_iswhite(c)) 14733 { 14734 c = ' '; 14735 k = 1; 14736 } 14737 14738 if (z0 == 0) 14739 { 14740 if (k && !p0 && reslen < MAXWLEN && c != NUL 14741 && (!slang->sl_collapse || reslen == 0 14742 || wres[reslen - 1] != c)) 14743 /* condense only double letters */ 14744 wres[reslen++] = c; 14745 14746 i++; 14747 z = 0; 14748 k = 0; 14749 } 14750 } 14751 14752 /* Convert wide characters in "wres" to a multi-byte string in "res". */ 14753 l = 0; 14754 for (n = 0; n < reslen; ++n) 14755 { 14756 l += mb_char2bytes(wres[n], res + l); 14757 if (l + MB_MAXBYTES > MAXWLEN) 14758 break; 14759 } 14760 res[l] = NUL; 14761 } 14762 #endif 14763 14764 /* 14765 * Compute a score for two sound-a-like words. 14766 * This permits up to two inserts/deletes/swaps/etc. to keep things fast. 14767 * Instead of a generic loop we write out the code. That keeps it fast by 14768 * avoiding checks that will not be possible. 14769 */ 14770 static int 14771 soundalike_score( 14772 char_u *goodstart, /* sound-folded good word */ 14773 char_u *badstart) /* sound-folded bad word */ 14774 { 14775 char_u *goodsound = goodstart; 14776 char_u *badsound = badstart; 14777 int goodlen; 14778 int badlen; 14779 int n; 14780 char_u *pl, *ps; 14781 char_u *pl2, *ps2; 14782 int score = 0; 14783 14784 /* Adding/inserting "*" at the start (word starts with vowel) shouldn't be 14785 * counted so much, vowels halfway the word aren't counted at all. */ 14786 if ((*badsound == '*' || *goodsound == '*') && *badsound != *goodsound) 14787 { 14788 if ((badsound[0] == NUL && goodsound[1] == NUL) 14789 || (goodsound[0] == NUL && badsound[1] == NUL)) 14790 /* changing word with vowel to word without a sound */ 14791 return SCORE_DEL; 14792 if (badsound[0] == NUL || goodsound[0] == NUL) 14793 /* more than two changes */ 14794 return SCORE_MAXMAX; 14795 14796 if (badsound[1] == goodsound[1] 14797 || (badsound[1] != NUL 14798 && goodsound[1] != NUL 14799 && badsound[2] == goodsound[2])) 14800 { 14801 /* handle like a substitute */ 14802 } 14803 else 14804 { 14805 score = 2 * SCORE_DEL / 3; 14806 if (*badsound == '*') 14807 ++badsound; 14808 else 14809 ++goodsound; 14810 } 14811 } 14812 14813 goodlen = (int)STRLEN(goodsound); 14814 badlen = (int)STRLEN(badsound); 14815 14816 /* Return quickly if the lengths are too different to be fixed by two 14817 * changes. */ 14818 n = goodlen - badlen; 14819 if (n < -2 || n > 2) 14820 return SCORE_MAXMAX; 14821 14822 if (n > 0) 14823 { 14824 pl = goodsound; /* goodsound is longest */ 14825 ps = badsound; 14826 } 14827 else 14828 { 14829 pl = badsound; /* badsound is longest */ 14830 ps = goodsound; 14831 } 14832 14833 /* Skip over the identical part. */ 14834 while (*pl == *ps && *pl != NUL) 14835 { 14836 ++pl; 14837 ++ps; 14838 } 14839 14840 switch (n) 14841 { 14842 case -2: 14843 case 2: 14844 /* 14845 * Must delete two characters from "pl". 14846 */ 14847 ++pl; /* first delete */ 14848 while (*pl == *ps) 14849 { 14850 ++pl; 14851 ++ps; 14852 } 14853 /* strings must be equal after second delete */ 14854 if (STRCMP(pl + 1, ps) == 0) 14855 return score + SCORE_DEL * 2; 14856 14857 /* Failed to compare. */ 14858 break; 14859 14860 case -1: 14861 case 1: 14862 /* 14863 * Minimal one delete from "pl" required. 14864 */ 14865 14866 /* 1: delete */ 14867 pl2 = pl + 1; 14868 ps2 = ps; 14869 while (*pl2 == *ps2) 14870 { 14871 if (*pl2 == NUL) /* reached the end */ 14872 return score + SCORE_DEL; 14873 ++pl2; 14874 ++ps2; 14875 } 14876 14877 /* 2: delete then swap, then rest must be equal */ 14878 if (pl2[0] == ps2[1] && pl2[1] == ps2[0] 14879 && STRCMP(pl2 + 2, ps2 + 2) == 0) 14880 return score + SCORE_DEL + SCORE_SWAP; 14881 14882 /* 3: delete then substitute, then the rest must be equal */ 14883 if (STRCMP(pl2 + 1, ps2 + 1) == 0) 14884 return score + SCORE_DEL + SCORE_SUBST; 14885 14886 /* 4: first swap then delete */ 14887 if (pl[0] == ps[1] && pl[1] == ps[0]) 14888 { 14889 pl2 = pl + 2; /* swap, skip two chars */ 14890 ps2 = ps + 2; 14891 while (*pl2 == *ps2) 14892 { 14893 ++pl2; 14894 ++ps2; 14895 } 14896 /* delete a char and then strings must be equal */ 14897 if (STRCMP(pl2 + 1, ps2) == 0) 14898 return score + SCORE_SWAP + SCORE_DEL; 14899 } 14900 14901 /* 5: first substitute then delete */ 14902 pl2 = pl + 1; /* substitute, skip one char */ 14903 ps2 = ps + 1; 14904 while (*pl2 == *ps2) 14905 { 14906 ++pl2; 14907 ++ps2; 14908 } 14909 /* delete a char and then strings must be equal */ 14910 if (STRCMP(pl2 + 1, ps2) == 0) 14911 return score + SCORE_SUBST + SCORE_DEL; 14912 14913 /* Failed to compare. */ 14914 break; 14915 14916 case 0: 14917 /* 14918 * Lengths are equal, thus changes must result in same length: An 14919 * insert is only possible in combination with a delete. 14920 * 1: check if for identical strings 14921 */ 14922 if (*pl == NUL) 14923 return score; 14924 14925 /* 2: swap */ 14926 if (pl[0] == ps[1] && pl[1] == ps[0]) 14927 { 14928 pl2 = pl + 2; /* swap, skip two chars */ 14929 ps2 = ps + 2; 14930 while (*pl2 == *ps2) 14931 { 14932 if (*pl2 == NUL) /* reached the end */ 14933 return score + SCORE_SWAP; 14934 ++pl2; 14935 ++ps2; 14936 } 14937 /* 3: swap and swap again */ 14938 if (pl2[0] == ps2[1] && pl2[1] == ps2[0] 14939 && STRCMP(pl2 + 2, ps2 + 2) == 0) 14940 return score + SCORE_SWAP + SCORE_SWAP; 14941 14942 /* 4: swap and substitute */ 14943 if (STRCMP(pl2 + 1, ps2 + 1) == 0) 14944 return score + SCORE_SWAP + SCORE_SUBST; 14945 } 14946 14947 /* 5: substitute */ 14948 pl2 = pl + 1; 14949 ps2 = ps + 1; 14950 while (*pl2 == *ps2) 14951 { 14952 if (*pl2 == NUL) /* reached the end */ 14953 return score + SCORE_SUBST; 14954 ++pl2; 14955 ++ps2; 14956 } 14957 14958 /* 6: substitute and swap */ 14959 if (pl2[0] == ps2[1] && pl2[1] == ps2[0] 14960 && STRCMP(pl2 + 2, ps2 + 2) == 0) 14961 return score + SCORE_SUBST + SCORE_SWAP; 14962 14963 /* 7: substitute and substitute */ 14964 if (STRCMP(pl2 + 1, ps2 + 1) == 0) 14965 return score + SCORE_SUBST + SCORE_SUBST; 14966 14967 /* 8: insert then delete */ 14968 pl2 = pl; 14969 ps2 = ps + 1; 14970 while (*pl2 == *ps2) 14971 { 14972 ++pl2; 14973 ++ps2; 14974 } 14975 if (STRCMP(pl2 + 1, ps2) == 0) 14976 return score + SCORE_INS + SCORE_DEL; 14977 14978 /* 9: delete then insert */ 14979 pl2 = pl + 1; 14980 ps2 = ps; 14981 while (*pl2 == *ps2) 14982 { 14983 ++pl2; 14984 ++ps2; 14985 } 14986 if (STRCMP(pl2, ps2 + 1) == 0) 14987 return score + SCORE_INS + SCORE_DEL; 14988 14989 /* Failed to compare. */ 14990 break; 14991 } 14992 14993 return SCORE_MAXMAX; 14994 } 14995 14996 /* 14997 * Compute the "edit distance" to turn "badword" into "goodword". The less 14998 * deletes/inserts/substitutes/swaps are required the lower the score. 14999 * 15000 * The algorithm is described by Du and Chang, 1992. 15001 * The implementation of the algorithm comes from Aspell editdist.cpp, 15002 * edit_distance(). It has been converted from C++ to C and modified to 15003 * support multi-byte characters. 15004 */ 15005 static int 15006 spell_edit_score( 15007 slang_T *slang, 15008 char_u *badword, 15009 char_u *goodword) 15010 { 15011 int *cnt; 15012 int badlen, goodlen; /* lengths including NUL */ 15013 int j, i; 15014 int t; 15015 int bc, gc; 15016 int pbc, pgc; 15017 #ifdef FEAT_MBYTE 15018 char_u *p; 15019 int wbadword[MAXWLEN]; 15020 int wgoodword[MAXWLEN]; 15021 15022 if (has_mbyte) 15023 { 15024 /* Get the characters from the multi-byte strings and put them in an 15025 * int array for easy access. */ 15026 for (p = badword, badlen = 0; *p != NUL; ) 15027 wbadword[badlen++] = mb_cptr2char_adv(&p); 15028 wbadword[badlen++] = 0; 15029 for (p = goodword, goodlen = 0; *p != NUL; ) 15030 wgoodword[goodlen++] = mb_cptr2char_adv(&p); 15031 wgoodword[goodlen++] = 0; 15032 } 15033 else 15034 #endif 15035 { 15036 badlen = (int)STRLEN(badword) + 1; 15037 goodlen = (int)STRLEN(goodword) + 1; 15038 } 15039 15040 /* We use "cnt" as an array: CNT(badword_idx, goodword_idx). */ 15041 #define CNT(a, b) cnt[(a) + (b) * (badlen + 1)] 15042 cnt = (int *)lalloc((long_u)(sizeof(int) * (badlen + 1) * (goodlen + 1)), 15043 TRUE); 15044 if (cnt == NULL) 15045 return 0; /* out of memory */ 15046 15047 CNT(0, 0) = 0; 15048 for (j = 1; j <= goodlen; ++j) 15049 CNT(0, j) = CNT(0, j - 1) + SCORE_INS; 15050 15051 for (i = 1; i <= badlen; ++i) 15052 { 15053 CNT(i, 0) = CNT(i - 1, 0) + SCORE_DEL; 15054 for (j = 1; j <= goodlen; ++j) 15055 { 15056 #ifdef FEAT_MBYTE 15057 if (has_mbyte) 15058 { 15059 bc = wbadword[i - 1]; 15060 gc = wgoodword[j - 1]; 15061 } 15062 else 15063 #endif 15064 { 15065 bc = badword[i - 1]; 15066 gc = goodword[j - 1]; 15067 } 15068 if (bc == gc) 15069 CNT(i, j) = CNT(i - 1, j - 1); 15070 else 15071 { 15072 /* Use a better score when there is only a case difference. */ 15073 if (SPELL_TOFOLD(bc) == SPELL_TOFOLD(gc)) 15074 CNT(i, j) = SCORE_ICASE + CNT(i - 1, j - 1); 15075 else 15076 { 15077 /* For a similar character use SCORE_SIMILAR. */ 15078 if (slang != NULL 15079 && slang->sl_has_map 15080 && similar_chars(slang, gc, bc)) 15081 CNT(i, j) = SCORE_SIMILAR + CNT(i - 1, j - 1); 15082 else 15083 CNT(i, j) = SCORE_SUBST + CNT(i - 1, j - 1); 15084 } 15085 15086 if (i > 1 && j > 1) 15087 { 15088 #ifdef FEAT_MBYTE 15089 if (has_mbyte) 15090 { 15091 pbc = wbadword[i - 2]; 15092 pgc = wgoodword[j - 2]; 15093 } 15094 else 15095 #endif 15096 { 15097 pbc = badword[i - 2]; 15098 pgc = goodword[j - 2]; 15099 } 15100 if (bc == pgc && pbc == gc) 15101 { 15102 t = SCORE_SWAP + CNT(i - 2, j - 2); 15103 if (t < CNT(i, j)) 15104 CNT(i, j) = t; 15105 } 15106 } 15107 t = SCORE_DEL + CNT(i - 1, j); 15108 if (t < CNT(i, j)) 15109 CNT(i, j) = t; 15110 t = SCORE_INS + CNT(i, j - 1); 15111 if (t < CNT(i, j)) 15112 CNT(i, j) = t; 15113 } 15114 } 15115 } 15116 15117 i = CNT(badlen - 1, goodlen - 1); 15118 vim_free(cnt); 15119 return i; 15120 } 15121 15122 typedef struct 15123 { 15124 int badi; 15125 int goodi; 15126 int score; 15127 } limitscore_T; 15128 15129 /* 15130 * Like spell_edit_score(), but with a limit on the score to make it faster. 15131 * May return SCORE_MAXMAX when the score is higher than "limit". 15132 * 15133 * This uses a stack for the edits still to be tried. 15134 * The idea comes from Aspell leditdist.cpp. Rewritten in C and added support 15135 * for multi-byte characters. 15136 */ 15137 static int 15138 spell_edit_score_limit( 15139 slang_T *slang, 15140 char_u *badword, 15141 char_u *goodword, 15142 int limit) 15143 { 15144 limitscore_T stack[10]; /* allow for over 3 * 2 edits */ 15145 int stackidx; 15146 int bi, gi; 15147 int bi2, gi2; 15148 int bc, gc; 15149 int score; 15150 int score_off; 15151 int minscore; 15152 int round; 15153 15154 #ifdef FEAT_MBYTE 15155 /* Multi-byte characters require a bit more work, use a different function 15156 * to avoid testing "has_mbyte" quite often. */ 15157 if (has_mbyte) 15158 return spell_edit_score_limit_w(slang, badword, goodword, limit); 15159 #endif 15160 15161 /* 15162 * The idea is to go from start to end over the words. So long as 15163 * characters are equal just continue, this always gives the lowest score. 15164 * When there is a difference try several alternatives. Each alternative 15165 * increases "score" for the edit distance. Some of the alternatives are 15166 * pushed unto a stack and tried later, some are tried right away. At the 15167 * end of the word the score for one alternative is known. The lowest 15168 * possible score is stored in "minscore". 15169 */ 15170 stackidx = 0; 15171 bi = 0; 15172 gi = 0; 15173 score = 0; 15174 minscore = limit + 1; 15175 15176 for (;;) 15177 { 15178 /* Skip over an equal part, score remains the same. */ 15179 for (;;) 15180 { 15181 bc = badword[bi]; 15182 gc = goodword[gi]; 15183 if (bc != gc) /* stop at a char that's different */ 15184 break; 15185 if (bc == NUL) /* both words end */ 15186 { 15187 if (score < minscore) 15188 minscore = score; 15189 goto pop; /* do next alternative */ 15190 } 15191 ++bi; 15192 ++gi; 15193 } 15194 15195 if (gc == NUL) /* goodword ends, delete badword chars */ 15196 { 15197 do 15198 { 15199 if ((score += SCORE_DEL) >= minscore) 15200 goto pop; /* do next alternative */ 15201 } while (badword[++bi] != NUL); 15202 minscore = score; 15203 } 15204 else if (bc == NUL) /* badword ends, insert badword chars */ 15205 { 15206 do 15207 { 15208 if ((score += SCORE_INS) >= minscore) 15209 goto pop; /* do next alternative */ 15210 } while (goodword[++gi] != NUL); 15211 minscore = score; 15212 } 15213 else /* both words continue */ 15214 { 15215 /* If not close to the limit, perform a change. Only try changes 15216 * that may lead to a lower score than "minscore". 15217 * round 0: try deleting a char from badword 15218 * round 1: try inserting a char in badword */ 15219 for (round = 0; round <= 1; ++round) 15220 { 15221 score_off = score + (round == 0 ? SCORE_DEL : SCORE_INS); 15222 if (score_off < minscore) 15223 { 15224 if (score_off + SCORE_EDIT_MIN >= minscore) 15225 { 15226 /* Near the limit, rest of the words must match. We 15227 * can check that right now, no need to push an item 15228 * onto the stack. */ 15229 bi2 = bi + 1 - round; 15230 gi2 = gi + round; 15231 while (goodword[gi2] == badword[bi2]) 15232 { 15233 if (goodword[gi2] == NUL) 15234 { 15235 minscore = score_off; 15236 break; 15237 } 15238 ++bi2; 15239 ++gi2; 15240 } 15241 } 15242 else 15243 { 15244 /* try deleting/inserting a character later */ 15245 stack[stackidx].badi = bi + 1 - round; 15246 stack[stackidx].goodi = gi + round; 15247 stack[stackidx].score = score_off; 15248 ++stackidx; 15249 } 15250 } 15251 } 15252 15253 if (score + SCORE_SWAP < minscore) 15254 { 15255 /* If swapping two characters makes a match then the 15256 * substitution is more expensive, thus there is no need to 15257 * try both. */ 15258 if (gc == badword[bi + 1] && bc == goodword[gi + 1]) 15259 { 15260 /* Swap two characters, that is: skip them. */ 15261 gi += 2; 15262 bi += 2; 15263 score += SCORE_SWAP; 15264 continue; 15265 } 15266 } 15267 15268 /* Substitute one character for another which is the same 15269 * thing as deleting a character from both goodword and badword. 15270 * Use a better score when there is only a case difference. */ 15271 if (SPELL_TOFOLD(bc) == SPELL_TOFOLD(gc)) 15272 score += SCORE_ICASE; 15273 else 15274 { 15275 /* For a similar character use SCORE_SIMILAR. */ 15276 if (slang != NULL 15277 && slang->sl_has_map 15278 && similar_chars(slang, gc, bc)) 15279 score += SCORE_SIMILAR; 15280 else 15281 score += SCORE_SUBST; 15282 } 15283 15284 if (score < minscore) 15285 { 15286 /* Do the substitution. */ 15287 ++gi; 15288 ++bi; 15289 continue; 15290 } 15291 } 15292 pop: 15293 /* 15294 * Get here to try the next alternative, pop it from the stack. 15295 */ 15296 if (stackidx == 0) /* stack is empty, finished */ 15297 break; 15298 15299 /* pop an item from the stack */ 15300 --stackidx; 15301 gi = stack[stackidx].goodi; 15302 bi = stack[stackidx].badi; 15303 score = stack[stackidx].score; 15304 } 15305 15306 /* When the score goes over "limit" it may actually be much higher. 15307 * Return a very large number to avoid going below the limit when giving a 15308 * bonus. */ 15309 if (minscore > limit) 15310 return SCORE_MAXMAX; 15311 return minscore; 15312 } 15313 15314 #ifdef FEAT_MBYTE 15315 /* 15316 * Multi-byte version of spell_edit_score_limit(). 15317 * Keep it in sync with the above! 15318 */ 15319 static int 15320 spell_edit_score_limit_w( 15321 slang_T *slang, 15322 char_u *badword, 15323 char_u *goodword, 15324 int limit) 15325 { 15326 limitscore_T stack[10]; /* allow for over 3 * 2 edits */ 15327 int stackidx; 15328 int bi, gi; 15329 int bi2, gi2; 15330 int bc, gc; 15331 int score; 15332 int score_off; 15333 int minscore; 15334 int round; 15335 char_u *p; 15336 int wbadword[MAXWLEN]; 15337 int wgoodword[MAXWLEN]; 15338 15339 /* Get the characters from the multi-byte strings and put them in an 15340 * int array for easy access. */ 15341 bi = 0; 15342 for (p = badword; *p != NUL; ) 15343 wbadword[bi++] = mb_cptr2char_adv(&p); 15344 wbadword[bi++] = 0; 15345 gi = 0; 15346 for (p = goodword; *p != NUL; ) 15347 wgoodword[gi++] = mb_cptr2char_adv(&p); 15348 wgoodword[gi++] = 0; 15349 15350 /* 15351 * The idea is to go from start to end over the words. So long as 15352 * characters are equal just continue, this always gives the lowest score. 15353 * When there is a difference try several alternatives. Each alternative 15354 * increases "score" for the edit distance. Some of the alternatives are 15355 * pushed unto a stack and tried later, some are tried right away. At the 15356 * end of the word the score for one alternative is known. The lowest 15357 * possible score is stored in "minscore". 15358 */ 15359 stackidx = 0; 15360 bi = 0; 15361 gi = 0; 15362 score = 0; 15363 minscore = limit + 1; 15364 15365 for (;;) 15366 { 15367 /* Skip over an equal part, score remains the same. */ 15368 for (;;) 15369 { 15370 bc = wbadword[bi]; 15371 gc = wgoodword[gi]; 15372 15373 if (bc != gc) /* stop at a char that's different */ 15374 break; 15375 if (bc == NUL) /* both words end */ 15376 { 15377 if (score < minscore) 15378 minscore = score; 15379 goto pop; /* do next alternative */ 15380 } 15381 ++bi; 15382 ++gi; 15383 } 15384 15385 if (gc == NUL) /* goodword ends, delete badword chars */ 15386 { 15387 do 15388 { 15389 if ((score += SCORE_DEL) >= minscore) 15390 goto pop; /* do next alternative */ 15391 } while (wbadword[++bi] != NUL); 15392 minscore = score; 15393 } 15394 else if (bc == NUL) /* badword ends, insert badword chars */ 15395 { 15396 do 15397 { 15398 if ((score += SCORE_INS) >= minscore) 15399 goto pop; /* do next alternative */ 15400 } while (wgoodword[++gi] != NUL); 15401 minscore = score; 15402 } 15403 else /* both words continue */ 15404 { 15405 /* If not close to the limit, perform a change. Only try changes 15406 * that may lead to a lower score than "minscore". 15407 * round 0: try deleting a char from badword 15408 * round 1: try inserting a char in badword */ 15409 for (round = 0; round <= 1; ++round) 15410 { 15411 score_off = score + (round == 0 ? SCORE_DEL : SCORE_INS); 15412 if (score_off < minscore) 15413 { 15414 if (score_off + SCORE_EDIT_MIN >= minscore) 15415 { 15416 /* Near the limit, rest of the words must match. We 15417 * can check that right now, no need to push an item 15418 * onto the stack. */ 15419 bi2 = bi + 1 - round; 15420 gi2 = gi + round; 15421 while (wgoodword[gi2] == wbadword[bi2]) 15422 { 15423 if (wgoodword[gi2] == NUL) 15424 { 15425 minscore = score_off; 15426 break; 15427 } 15428 ++bi2; 15429 ++gi2; 15430 } 15431 } 15432 else 15433 { 15434 /* try deleting a character from badword later */ 15435 stack[stackidx].badi = bi + 1 - round; 15436 stack[stackidx].goodi = gi + round; 15437 stack[stackidx].score = score_off; 15438 ++stackidx; 15439 } 15440 } 15441 } 15442 15443 if (score + SCORE_SWAP < minscore) 15444 { 15445 /* If swapping two characters makes a match then the 15446 * substitution is more expensive, thus there is no need to 15447 * try both. */ 15448 if (gc == wbadword[bi + 1] && bc == wgoodword[gi + 1]) 15449 { 15450 /* Swap two characters, that is: skip them. */ 15451 gi += 2; 15452 bi += 2; 15453 score += SCORE_SWAP; 15454 continue; 15455 } 15456 } 15457 15458 /* Substitute one character for another which is the same 15459 * thing as deleting a character from both goodword and badword. 15460 * Use a better score when there is only a case difference. */ 15461 if (SPELL_TOFOLD(bc) == SPELL_TOFOLD(gc)) 15462 score += SCORE_ICASE; 15463 else 15464 { 15465 /* For a similar character use SCORE_SIMILAR. */ 15466 if (slang != NULL 15467 && slang->sl_has_map 15468 && similar_chars(slang, gc, bc)) 15469 score += SCORE_SIMILAR; 15470 else 15471 score += SCORE_SUBST; 15472 } 15473 15474 if (score < minscore) 15475 { 15476 /* Do the substitution. */ 15477 ++gi; 15478 ++bi; 15479 continue; 15480 } 15481 } 15482 pop: 15483 /* 15484 * Get here to try the next alternative, pop it from the stack. 15485 */ 15486 if (stackidx == 0) /* stack is empty, finished */ 15487 break; 15488 15489 /* pop an item from the stack */ 15490 --stackidx; 15491 gi = stack[stackidx].goodi; 15492 bi = stack[stackidx].badi; 15493 score = stack[stackidx].score; 15494 } 15495 15496 /* When the score goes over "limit" it may actually be much higher. 15497 * Return a very large number to avoid going below the limit when giving a 15498 * bonus. */ 15499 if (minscore > limit) 15500 return SCORE_MAXMAX; 15501 return minscore; 15502 } 15503 #endif 15504 15505 /* 15506 * ":spellinfo" 15507 */ 15508 void 15509 ex_spellinfo(exarg_T *eap UNUSED) 15510 { 15511 int lpi; 15512 langp_T *lp; 15513 char_u *p; 15514 15515 if (no_spell_checking(curwin)) 15516 return; 15517 15518 msg_start(); 15519 for (lpi = 0; lpi < curwin->w_s->b_langp.ga_len && !got_int; ++lpi) 15520 { 15521 lp = LANGP_ENTRY(curwin->w_s->b_langp, lpi); 15522 msg_puts((char_u *)"file: "); 15523 msg_puts(lp->lp_slang->sl_fname); 15524 msg_putchar('\n'); 15525 p = lp->lp_slang->sl_info; 15526 if (p != NULL) 15527 { 15528 msg_puts(p); 15529 msg_putchar('\n'); 15530 } 15531 } 15532 msg_end(); 15533 } 15534 15535 #define DUMPFLAG_KEEPCASE 1 /* round 2: keep-case tree */ 15536 #define DUMPFLAG_COUNT 2 /* include word count */ 15537 #define DUMPFLAG_ICASE 4 /* ignore case when finding matches */ 15538 #define DUMPFLAG_ONECAP 8 /* pattern starts with capital */ 15539 #define DUMPFLAG_ALLCAP 16 /* pattern is all capitals */ 15540 15541 /* 15542 * ":spelldump" 15543 */ 15544 void 15545 ex_spelldump(exarg_T *eap) 15546 { 15547 char_u *spl; 15548 long dummy; 15549 15550 if (no_spell_checking(curwin)) 15551 return; 15552 get_option_value((char_u*)"spl", &dummy, &spl, OPT_LOCAL); 15553 15554 /* Create a new empty buffer in a new window. */ 15555 do_cmdline_cmd((char_u *)"new"); 15556 15557 /* enable spelling locally in the new window */ 15558 set_option_value((char_u*)"spell", TRUE, (char_u*)"", OPT_LOCAL); 15559 set_option_value((char_u*)"spl", dummy, spl, OPT_LOCAL); 15560 vim_free(spl); 15561 15562 if (!bufempty() || !buf_valid(curbuf)) 15563 return; 15564 15565 spell_dump_compl(NULL, 0, NULL, eap->forceit ? DUMPFLAG_COUNT : 0); 15566 15567 /* Delete the empty line that we started with. */ 15568 if (curbuf->b_ml.ml_line_count > 1) 15569 ml_delete(curbuf->b_ml.ml_line_count, FALSE); 15570 15571 redraw_later(NOT_VALID); 15572 } 15573 15574 /* 15575 * Go through all possible words and: 15576 * 1. When "pat" is NULL: dump a list of all words in the current buffer. 15577 * "ic" and "dir" are not used. 15578 * 2. When "pat" is not NULL: add matching words to insert mode completion. 15579 */ 15580 void 15581 spell_dump_compl( 15582 char_u *pat, /* leading part of the word */ 15583 int ic, /* ignore case */ 15584 int *dir, /* direction for adding matches */ 15585 int dumpflags_arg) /* DUMPFLAG_* */ 15586 { 15587 langp_T *lp; 15588 slang_T *slang; 15589 idx_T arridx[MAXWLEN]; 15590 int curi[MAXWLEN]; 15591 char_u word[MAXWLEN]; 15592 int c; 15593 char_u *byts; 15594 idx_T *idxs; 15595 linenr_T lnum = 0; 15596 int round; 15597 int depth; 15598 int n; 15599 int flags; 15600 char_u *region_names = NULL; /* region names being used */ 15601 int do_region = TRUE; /* dump region names and numbers */ 15602 char_u *p; 15603 int lpi; 15604 int dumpflags = dumpflags_arg; 15605 int patlen; 15606 15607 /* When ignoring case or when the pattern starts with capital pass this on 15608 * to dump_word(). */ 15609 if (pat != NULL) 15610 { 15611 if (ic) 15612 dumpflags |= DUMPFLAG_ICASE; 15613 else 15614 { 15615 n = captype(pat, NULL); 15616 if (n == WF_ONECAP) 15617 dumpflags |= DUMPFLAG_ONECAP; 15618 else if (n == WF_ALLCAP 15619 #ifdef FEAT_MBYTE 15620 && (int)STRLEN(pat) > mb_ptr2len(pat) 15621 #else 15622 && (int)STRLEN(pat) > 1 15623 #endif 15624 ) 15625 dumpflags |= DUMPFLAG_ALLCAP; 15626 } 15627 } 15628 15629 /* Find out if we can support regions: All languages must support the same 15630 * regions or none at all. */ 15631 for (lpi = 0; lpi < curwin->w_s->b_langp.ga_len; ++lpi) 15632 { 15633 lp = LANGP_ENTRY(curwin->w_s->b_langp, lpi); 15634 p = lp->lp_slang->sl_regions; 15635 if (p[0] != 0) 15636 { 15637 if (region_names == NULL) /* first language with regions */ 15638 region_names = p; 15639 else if (STRCMP(region_names, p) != 0) 15640 { 15641 do_region = FALSE; /* region names are different */ 15642 break; 15643 } 15644 } 15645 } 15646 15647 if (do_region && region_names != NULL) 15648 { 15649 if (pat == NULL) 15650 { 15651 vim_snprintf((char *)IObuff, IOSIZE, "/regions=%s", region_names); 15652 ml_append(lnum++, IObuff, (colnr_T)0, FALSE); 15653 } 15654 } 15655 else 15656 do_region = FALSE; 15657 15658 /* 15659 * Loop over all files loaded for the entries in 'spelllang'. 15660 */ 15661 for (lpi = 0; lpi < curwin->w_s->b_langp.ga_len; ++lpi) 15662 { 15663 lp = LANGP_ENTRY(curwin->w_s->b_langp, lpi); 15664 slang = lp->lp_slang; 15665 if (slang->sl_fbyts == NULL) /* reloading failed */ 15666 continue; 15667 15668 if (pat == NULL) 15669 { 15670 vim_snprintf((char *)IObuff, IOSIZE, "# file: %s", slang->sl_fname); 15671 ml_append(lnum++, IObuff, (colnr_T)0, FALSE); 15672 } 15673 15674 /* When matching with a pattern and there are no prefixes only use 15675 * parts of the tree that match "pat". */ 15676 if (pat != NULL && slang->sl_pbyts == NULL) 15677 patlen = (int)STRLEN(pat); 15678 else 15679 patlen = -1; 15680 15681 /* round 1: case-folded tree 15682 * round 2: keep-case tree */ 15683 for (round = 1; round <= 2; ++round) 15684 { 15685 if (round == 1) 15686 { 15687 dumpflags &= ~DUMPFLAG_KEEPCASE; 15688 byts = slang->sl_fbyts; 15689 idxs = slang->sl_fidxs; 15690 } 15691 else 15692 { 15693 dumpflags |= DUMPFLAG_KEEPCASE; 15694 byts = slang->sl_kbyts; 15695 idxs = slang->sl_kidxs; 15696 } 15697 if (byts == NULL) 15698 continue; /* array is empty */ 15699 15700 depth = 0; 15701 arridx[0] = 0; 15702 curi[0] = 1; 15703 while (depth >= 0 && !got_int 15704 && (pat == NULL || !compl_interrupted)) 15705 { 15706 if (curi[depth] > byts[arridx[depth]]) 15707 { 15708 /* Done all bytes at this node, go up one level. */ 15709 --depth; 15710 line_breakcheck(); 15711 ins_compl_check_keys(50); 15712 } 15713 else 15714 { 15715 /* Do one more byte at this node. */ 15716 n = arridx[depth] + curi[depth]; 15717 ++curi[depth]; 15718 c = byts[n]; 15719 if (c == 0) 15720 { 15721 /* End of word, deal with the word. 15722 * Don't use keep-case words in the fold-case tree, 15723 * they will appear in the keep-case tree. 15724 * Only use the word when the region matches. */ 15725 flags = (int)idxs[n]; 15726 if ((round == 2 || (flags & WF_KEEPCAP) == 0) 15727 && (flags & WF_NEEDCOMP) == 0 15728 && (do_region 15729 || (flags & WF_REGION) == 0 15730 || (((unsigned)flags >> 16) 15731 & lp->lp_region) != 0)) 15732 { 15733 word[depth] = NUL; 15734 if (!do_region) 15735 flags &= ~WF_REGION; 15736 15737 /* Dump the basic word if there is no prefix or 15738 * when it's the first one. */ 15739 c = (unsigned)flags >> 24; 15740 if (c == 0 || curi[depth] == 2) 15741 { 15742 dump_word(slang, word, pat, dir, 15743 dumpflags, flags, lnum); 15744 if (pat == NULL) 15745 ++lnum; 15746 } 15747 15748 /* Apply the prefix, if there is one. */ 15749 if (c != 0) 15750 lnum = dump_prefixes(slang, word, pat, dir, 15751 dumpflags, flags, lnum); 15752 } 15753 } 15754 else 15755 { 15756 /* Normal char, go one level deeper. */ 15757 word[depth++] = c; 15758 arridx[depth] = idxs[n]; 15759 curi[depth] = 1; 15760 15761 /* Check if this characters matches with the pattern. 15762 * If not skip the whole tree below it. 15763 * Always ignore case here, dump_word() will check 15764 * proper case later. This isn't exactly right when 15765 * length changes for multi-byte characters with 15766 * ignore case... */ 15767 if (depth <= patlen 15768 && MB_STRNICMP(word, pat, depth) != 0) 15769 --depth; 15770 } 15771 } 15772 } 15773 } 15774 } 15775 } 15776 15777 /* 15778 * Dump one word: apply case modifications and append a line to the buffer. 15779 * When "lnum" is zero add insert mode completion. 15780 */ 15781 static void 15782 dump_word( 15783 slang_T *slang, 15784 char_u *word, 15785 char_u *pat, 15786 int *dir, 15787 int dumpflags, 15788 int wordflags, 15789 linenr_T lnum) 15790 { 15791 int keepcap = FALSE; 15792 char_u *p; 15793 char_u *tw; 15794 char_u cword[MAXWLEN]; 15795 char_u badword[MAXWLEN + 10]; 15796 int i; 15797 int flags = wordflags; 15798 15799 if (dumpflags & DUMPFLAG_ONECAP) 15800 flags |= WF_ONECAP; 15801 if (dumpflags & DUMPFLAG_ALLCAP) 15802 flags |= WF_ALLCAP; 15803 15804 if ((dumpflags & DUMPFLAG_KEEPCASE) == 0 && (flags & WF_CAPMASK) != 0) 15805 { 15806 /* Need to fix case according to "flags". */ 15807 make_case_word(word, cword, flags); 15808 p = cword; 15809 } 15810 else 15811 { 15812 p = word; 15813 if ((dumpflags & DUMPFLAG_KEEPCASE) 15814 && ((captype(word, NULL) & WF_KEEPCAP) == 0 15815 || (flags & WF_FIXCAP) != 0)) 15816 keepcap = TRUE; 15817 } 15818 tw = p; 15819 15820 if (pat == NULL) 15821 { 15822 /* Add flags and regions after a slash. */ 15823 if ((flags & (WF_BANNED | WF_RARE | WF_REGION)) || keepcap) 15824 { 15825 STRCPY(badword, p); 15826 STRCAT(badword, "/"); 15827 if (keepcap) 15828 STRCAT(badword, "="); 15829 if (flags & WF_BANNED) 15830 STRCAT(badword, "!"); 15831 else if (flags & WF_RARE) 15832 STRCAT(badword, "?"); 15833 if (flags & WF_REGION) 15834 for (i = 0; i < 7; ++i) 15835 if (flags & (0x10000 << i)) 15836 sprintf((char *)badword + STRLEN(badword), "%d", i + 1); 15837 p = badword; 15838 } 15839 15840 if (dumpflags & DUMPFLAG_COUNT) 15841 { 15842 hashitem_T *hi; 15843 15844 /* Include the word count for ":spelldump!". */ 15845 hi = hash_find(&slang->sl_wordcount, tw); 15846 if (!HASHITEM_EMPTY(hi)) 15847 { 15848 vim_snprintf((char *)IObuff, IOSIZE, "%s\t%d", 15849 tw, HI2WC(hi)->wc_count); 15850 p = IObuff; 15851 } 15852 } 15853 15854 ml_append(lnum, p, (colnr_T)0, FALSE); 15855 } 15856 else if (((dumpflags & DUMPFLAG_ICASE) 15857 ? MB_STRNICMP(p, pat, STRLEN(pat)) == 0 15858 : STRNCMP(p, pat, STRLEN(pat)) == 0) 15859 && ins_compl_add_infercase(p, (int)STRLEN(p), 15860 p_ic, NULL, *dir, 0) == OK) 15861 /* if dir was BACKWARD then honor it just once */ 15862 *dir = FORWARD; 15863 } 15864 15865 /* 15866 * For ":spelldump": Find matching prefixes for "word". Prepend each to 15867 * "word" and append a line to the buffer. 15868 * When "lnum" is zero add insert mode completion. 15869 * Return the updated line number. 15870 */ 15871 static linenr_T 15872 dump_prefixes( 15873 slang_T *slang, 15874 char_u *word, /* case-folded word */ 15875 char_u *pat, 15876 int *dir, 15877 int dumpflags, 15878 int flags, /* flags with prefix ID */ 15879 linenr_T startlnum) 15880 { 15881 idx_T arridx[MAXWLEN]; 15882 int curi[MAXWLEN]; 15883 char_u prefix[MAXWLEN]; 15884 char_u word_up[MAXWLEN]; 15885 int has_word_up = FALSE; 15886 int c; 15887 char_u *byts; 15888 idx_T *idxs; 15889 linenr_T lnum = startlnum; 15890 int depth; 15891 int n; 15892 int len; 15893 int i; 15894 15895 /* If the word starts with a lower-case letter make the word with an 15896 * upper-case letter in word_up[]. */ 15897 c = PTR2CHAR(word); 15898 if (SPELL_TOUPPER(c) != c) 15899 { 15900 onecap_copy(word, word_up, TRUE); 15901 has_word_up = TRUE; 15902 } 15903 15904 byts = slang->sl_pbyts; 15905 idxs = slang->sl_pidxs; 15906 if (byts != NULL) /* array not is empty */ 15907 { 15908 /* 15909 * Loop over all prefixes, building them byte-by-byte in prefix[]. 15910 * When at the end of a prefix check that it supports "flags". 15911 */ 15912 depth = 0; 15913 arridx[0] = 0; 15914 curi[0] = 1; 15915 while (depth >= 0 && !got_int) 15916 { 15917 n = arridx[depth]; 15918 len = byts[n]; 15919 if (curi[depth] > len) 15920 { 15921 /* Done all bytes at this node, go up one level. */ 15922 --depth; 15923 line_breakcheck(); 15924 } 15925 else 15926 { 15927 /* Do one more byte at this node. */ 15928 n += curi[depth]; 15929 ++curi[depth]; 15930 c = byts[n]; 15931 if (c == 0) 15932 { 15933 /* End of prefix, find out how many IDs there are. */ 15934 for (i = 1; i < len; ++i) 15935 if (byts[n + i] != 0) 15936 break; 15937 curi[depth] += i - 1; 15938 15939 c = valid_word_prefix(i, n, flags, word, slang, FALSE); 15940 if (c != 0) 15941 { 15942 vim_strncpy(prefix + depth, word, MAXWLEN - depth - 1); 15943 dump_word(slang, prefix, pat, dir, dumpflags, 15944 (c & WF_RAREPFX) ? (flags | WF_RARE) 15945 : flags, lnum); 15946 if (lnum != 0) 15947 ++lnum; 15948 } 15949 15950 /* Check for prefix that matches the word when the 15951 * first letter is upper-case, but only if the prefix has 15952 * a condition. */ 15953 if (has_word_up) 15954 { 15955 c = valid_word_prefix(i, n, flags, word_up, slang, 15956 TRUE); 15957 if (c != 0) 15958 { 15959 vim_strncpy(prefix + depth, word_up, 15960 MAXWLEN - depth - 1); 15961 dump_word(slang, prefix, pat, dir, dumpflags, 15962 (c & WF_RAREPFX) ? (flags | WF_RARE) 15963 : flags, lnum); 15964 if (lnum != 0) 15965 ++lnum; 15966 } 15967 } 15968 } 15969 else 15970 { 15971 /* Normal char, go one level deeper. */ 15972 prefix[depth++] = c; 15973 arridx[depth] = idxs[n]; 15974 curi[depth] = 1; 15975 } 15976 } 15977 } 15978 } 15979 15980 return lnum; 15981 } 15982 15983 /* 15984 * Move "p" to the end of word "start". 15985 * Uses the spell-checking word characters. 15986 */ 15987 char_u * 15988 spell_to_word_end(char_u *start, win_T *win) 15989 { 15990 char_u *p = start; 15991 15992 while (*p != NUL && spell_iswordp(p, win)) 15993 mb_ptr_adv(p); 15994 return p; 15995 } 15996 15997 #if defined(FEAT_INS_EXPAND) || defined(PROTO) 15998 /* 15999 * For Insert mode completion CTRL-X s: 16000 * Find start of the word in front of column "startcol". 16001 * We don't check if it is badly spelled, with completion we can only change 16002 * the word in front of the cursor. 16003 * Returns the column number of the word. 16004 */ 16005 int 16006 spell_word_start(int startcol) 16007 { 16008 char_u *line; 16009 char_u *p; 16010 int col = 0; 16011 16012 if (no_spell_checking(curwin)) 16013 return startcol; 16014 16015 /* Find a word character before "startcol". */ 16016 line = ml_get_curline(); 16017 for (p = line + startcol; p > line; ) 16018 { 16019 mb_ptr_back(line, p); 16020 if (spell_iswordp_nmw(p, curwin)) 16021 break; 16022 } 16023 16024 /* Go back to start of the word. */ 16025 while (p > line) 16026 { 16027 col = (int)(p - line); 16028 mb_ptr_back(line, p); 16029 if (!spell_iswordp(p, curwin)) 16030 break; 16031 col = 0; 16032 } 16033 16034 return col; 16035 } 16036 16037 /* 16038 * Need to check for 'spellcapcheck' now, the word is removed before 16039 * expand_spelling() is called. Therefore the ugly global variable. 16040 */ 16041 static int spell_expand_need_cap; 16042 16043 void 16044 spell_expand_check_cap(colnr_T col) 16045 { 16046 spell_expand_need_cap = check_need_cap(curwin->w_cursor.lnum, col); 16047 } 16048 16049 /* 16050 * Get list of spelling suggestions. 16051 * Used for Insert mode completion CTRL-X ?. 16052 * Returns the number of matches. The matches are in "matchp[]", array of 16053 * allocated strings. 16054 */ 16055 int 16056 expand_spelling( 16057 linenr_T lnum UNUSED, 16058 char_u *pat, 16059 char_u ***matchp) 16060 { 16061 garray_T ga; 16062 16063 spell_suggest_list(&ga, pat, 100, spell_expand_need_cap, TRUE); 16064 *matchp = ga.ga_data; 16065 return ga.ga_len; 16066 } 16067 #endif 16068 16069 #endif /* FEAT_SPELL */ 16070