1 /* vi:set ts=8 sts=4 sw=4 noet: 2 * 3 * VIM - Vi IMproved by Bram Moolenaar 4 * 5 * Do ":help uganda" in Vim to read copying and usage conditions. 6 * Do ":help credits" in Vim to see a list of people who contributed. 7 * See README.txt for an overview of the Vim source code. 8 */ 9 10 /* 11 * spellfile.c: code for reading and writing spell files. 12 * 13 * See spell.c for information about spell checking. 14 */ 15 16 /* 17 * Vim spell file format: <HEADER> 18 * <SECTIONS> 19 * <LWORDTREE> 20 * <KWORDTREE> 21 * <PREFIXTREE> 22 * 23 * <HEADER>: <fileID> <versionnr> 24 * 25 * <fileID> 8 bytes "VIMspell" 26 * <versionnr> 1 byte VIMSPELLVERSION 27 * 28 * 29 * Sections make it possible to add information to the .spl file without 30 * making it incompatible with previous versions. There are two kinds of 31 * sections: 32 * 1. Not essential for correct spell checking. E.g. for making suggestions. 33 * These are skipped when not supported. 34 * 2. Optional information, but essential for spell checking when present. 35 * E.g. conditions for affixes. When this section is present but not 36 * supported an error message is given. 37 * 38 * <SECTIONS>: <section> ... <sectionend> 39 * 40 * <section>: <sectionID> <sectionflags> <sectionlen> (section contents) 41 * 42 * <sectionID> 1 byte number from 0 to 254 identifying the section 43 * 44 * <sectionflags> 1 byte SNF_REQUIRED: this section is required for correct 45 * spell checking 46 * 47 * <sectionlen> 4 bytes length of section contents, MSB first 48 * 49 * <sectionend> 1 byte SN_END 50 * 51 * 52 * sectionID == SN_INFO: <infotext> 53 * <infotext> N bytes free format text with spell file info (version, 54 * website, etc) 55 * 56 * sectionID == SN_REGION: <regionname> ... 57 * <regionname> 2 bytes Up to 8 region names: ca, au, etc. Lower case. 58 * First <regionname> is region 1. 59 * 60 * sectionID == SN_CHARFLAGS: <charflagslen> <charflags> 61 * <folcharslen> <folchars> 62 * <charflagslen> 1 byte Number of bytes in <charflags> (should be 128). 63 * <charflags> N bytes List of flags (first one is for character 128): 64 * 0x01 word character CF_WORD 65 * 0x02 upper-case character CF_UPPER 66 * <folcharslen> 2 bytes Number of bytes in <folchars>. 67 * <folchars> N bytes Folded characters, first one is for character 128. 68 * 69 * sectionID == SN_MIDWORD: <midword> 70 * <midword> N bytes Characters that are word characters only when used 71 * in the middle of a word. 72 * 73 * sectionID == SN_PREFCOND: <prefcondcnt> <prefcond> ... 74 * <prefcondcnt> 2 bytes Number of <prefcond> items following. 75 * <prefcond> : <condlen> <condstr> 76 * <condlen> 1 byte Length of <condstr>. 77 * <condstr> N bytes Condition for the prefix. 78 * 79 * sectionID == SN_REP: <repcount> <rep> ... 80 * <repcount> 2 bytes number of <rep> items, MSB first. 81 * <rep> : <repfromlen> <repfrom> <reptolen> <repto> 82 * <repfromlen> 1 byte length of <repfrom> 83 * <repfrom> N bytes "from" part of replacement 84 * <reptolen> 1 byte length of <repto> 85 * <repto> N bytes "to" part of replacement 86 * 87 * sectionID == SN_REPSAL: <repcount> <rep> ... 88 * just like SN_REP but for soundfolded words 89 * 90 * sectionID == SN_SAL: <salflags> <salcount> <sal> ... 91 * <salflags> 1 byte flags for soundsalike conversion: 92 * SAL_F0LLOWUP 93 * SAL_COLLAPSE 94 * SAL_REM_ACCENTS 95 * <salcount> 2 bytes number of <sal> items following 96 * <sal> : <salfromlen> <salfrom> <saltolen> <salto> 97 * <salfromlen> 1 byte length of <salfrom> 98 * <salfrom> N bytes "from" part of soundsalike 99 * <saltolen> 1 byte length of <salto> 100 * <salto> N bytes "to" part of soundsalike 101 * 102 * sectionID == SN_SOFO: <sofofromlen> <sofofrom> <sofotolen> <sofoto> 103 * <sofofromlen> 2 bytes length of <sofofrom> 104 * <sofofrom> N bytes "from" part of soundfold 105 * <sofotolen> 2 bytes length of <sofoto> 106 * <sofoto> N bytes "to" part of soundfold 107 * 108 * sectionID == SN_SUGFILE: <timestamp> 109 * <timestamp> 8 bytes time in seconds that must match with .sug file 110 * 111 * sectionID == SN_NOSPLITSUGS: nothing 112 * 113 * sectionID == SN_NOCOMPOUNDSUGS: nothing 114 * 115 * sectionID == SN_WORDS: <word> ... 116 * <word> N bytes NUL terminated common word 117 * 118 * sectionID == SN_MAP: <mapstr> 119 * <mapstr> N bytes String with sequences of similar characters, 120 * separated by slashes. 121 * 122 * sectionID == SN_COMPOUND: <compmax> <compminlen> <compsylmax> <compoptions> 123 * <comppatcount> <comppattern> ... <compflags> 124 * <compmax> 1 byte Maximum nr of words in compound word. 125 * <compminlen> 1 byte Minimal word length for compounding. 126 * <compsylmax> 1 byte Maximum nr of syllables in compound word. 127 * <compoptions> 2 bytes COMP_ flags. 128 * <comppatcount> 2 bytes number of <comppattern> following 129 * <compflags> N bytes Flags from COMPOUNDRULE items, separated by 130 * slashes. 131 * 132 * <comppattern>: <comppatlen> <comppattext> 133 * <comppatlen> 1 byte length of <comppattext> 134 * <comppattext> N bytes end or begin chars from CHECKCOMPOUNDPATTERN 135 * 136 * sectionID == SN_NOBREAK: (empty, its presence is what matters) 137 * 138 * sectionID == SN_SYLLABLE: <syllable> 139 * <syllable> N bytes String from SYLLABLE item. 140 * 141 * <LWORDTREE>: <wordtree> 142 * 143 * <KWORDTREE>: <wordtree> 144 * 145 * <PREFIXTREE>: <wordtree> 146 * 147 * 148 * <wordtree>: <nodecount> <nodedata> ... 149 * 150 * <nodecount> 4 bytes Number of nodes following. MSB first. 151 * 152 * <nodedata>: <siblingcount> <sibling> ... 153 * 154 * <siblingcount> 1 byte Number of siblings in this node. The siblings 155 * follow in sorted order. 156 * 157 * <sibling>: <byte> [ <nodeidx> <xbyte> 158 * | <flags> [<flags2>] [<region>] [<affixID>] 159 * | [<pflags>] <affixID> <prefcondnr> ] 160 * 161 * <byte> 1 byte Byte value of the sibling. Special cases: 162 * BY_NOFLAGS: End of word without flags and for all 163 * regions. 164 * For PREFIXTREE <affixID> and 165 * <prefcondnr> follow. 166 * BY_FLAGS: End of word, <flags> follow. 167 * For PREFIXTREE <pflags>, <affixID> 168 * and <prefcondnr> follow. 169 * BY_FLAGS2: End of word, <flags> and <flags2> 170 * follow. Not used in PREFIXTREE. 171 * BY_INDEX: Child of sibling is shared, <nodeidx> 172 * and <xbyte> follow. 173 * 174 * <nodeidx> 3 bytes Index of child for this sibling, MSB first. 175 * 176 * <xbyte> 1 byte byte value of the sibling. 177 * 178 * <flags> 1 byte bitmask of: 179 * WF_ALLCAP word must have only capitals 180 * WF_ONECAP first char of word must be capital 181 * WF_KEEPCAP keep-case word 182 * WF_FIXCAP keep-case word, all caps not allowed 183 * WF_RARE rare word 184 * WF_BANNED bad word 185 * WF_REGION <region> follows 186 * WF_AFX <affixID> follows 187 * 188 * <flags2> 1 byte Bitmask of: 189 * WF_HAS_AFF >> 8 word includes affix 190 * WF_NEEDCOMP >> 8 word only valid in compound 191 * WF_NOSUGGEST >> 8 word not used for suggestions 192 * WF_COMPROOT >> 8 word already a compound 193 * WF_NOCOMPBEF >> 8 no compounding before this word 194 * WF_NOCOMPAFT >> 8 no compounding after this word 195 * 196 * <pflags> 1 byte bitmask of: 197 * WFP_RARE rare prefix 198 * WFP_NC non-combining prefix 199 * WFP_UP letter after prefix made upper case 200 * 201 * <region> 1 byte Bitmask for regions in which word is valid. When 202 * omitted it's valid in all regions. 203 * Lowest bit is for region 1. 204 * 205 * <affixID> 1 byte ID of affix that can be used with this word. In 206 * PREFIXTREE used for the required prefix ID. 207 * 208 * <prefcondnr> 2 bytes Prefix condition number, index in <prefcond> list 209 * from HEADER. 210 * 211 * All text characters are in 'encoding', but stored as single bytes. 212 */ 213 214 /* 215 * Vim .sug file format: <SUGHEADER> 216 * <SUGWORDTREE> 217 * <SUGTABLE> 218 * 219 * <SUGHEADER>: <fileID> <versionnr> <timestamp> 220 * 221 * <fileID> 6 bytes "VIMsug" 222 * <versionnr> 1 byte VIMSUGVERSION 223 * <timestamp> 8 bytes timestamp that must match with .spl file 224 * 225 * 226 * <SUGWORDTREE>: <wordtree> (see above, no flags or region used) 227 * 228 * 229 * <SUGTABLE>: <sugwcount> <sugline> ... 230 * 231 * <sugwcount> 4 bytes number of <sugline> following 232 * 233 * <sugline>: <sugnr> ... NUL 234 * 235 * <sugnr>: X bytes word number that results in this soundfolded word, 236 * stored as an offset to the previous number in as 237 * few bytes as possible, see offset2bytes()) 238 */ 239 240 #include "vim.h" 241 242 #if defined(FEAT_SPELL) || defined(PROTO) 243 244 #ifndef UNIX /* it's in os_unix.h for Unix */ 245 # include <time.h> /* for time_t */ 246 #endif 247 248 #ifndef UNIX /* it's in os_unix.h for Unix */ 249 # include <time.h> /* for time_t */ 250 #endif 251 252 /* Special byte values for <byte>. Some are only used in the tree for 253 * postponed prefixes, some only in the other trees. This is a bit messy... */ 254 #define BY_NOFLAGS 0 /* end of word without flags or region; for 255 * postponed prefix: no <pflags> */ 256 #define BY_INDEX 1 /* child is shared, index follows */ 257 #define BY_FLAGS 2 /* end of word, <flags> byte follows; for 258 * postponed prefix: <pflags> follows */ 259 #define BY_FLAGS2 3 /* end of word, <flags> and <flags2> bytes 260 * follow; never used in prefix tree */ 261 #define BY_SPECIAL BY_FLAGS2 /* highest special byte value */ 262 263 /* Flags used in .spl file for soundsalike flags. */ 264 #define SAL_F0LLOWUP 1 265 #define SAL_COLLAPSE 2 266 #define SAL_REM_ACCENTS 4 267 268 #define VIMSPELLMAGIC "VIMspell" /* string at start of Vim spell file */ 269 #define VIMSPELLMAGICL 8 270 #define VIMSPELLVERSION 50 271 272 /* Section IDs. Only renumber them when VIMSPELLVERSION changes! */ 273 #define SN_REGION 0 /* <regionname> section */ 274 #define SN_CHARFLAGS 1 /* charflags section */ 275 #define SN_MIDWORD 2 /* <midword> section */ 276 #define SN_PREFCOND 3 /* <prefcond> section */ 277 #define SN_REP 4 /* REP items section */ 278 #define SN_SAL 5 /* SAL items section */ 279 #define SN_SOFO 6 /* soundfolding section */ 280 #define SN_MAP 7 /* MAP items section */ 281 #define SN_COMPOUND 8 /* compound words section */ 282 #define SN_SYLLABLE 9 /* syllable section */ 283 #define SN_NOBREAK 10 /* NOBREAK section */ 284 #define SN_SUGFILE 11 /* timestamp for .sug file */ 285 #define SN_REPSAL 12 /* REPSAL items section */ 286 #define SN_WORDS 13 /* common words */ 287 #define SN_NOSPLITSUGS 14 /* don't split word for suggestions */ 288 #define SN_INFO 15 /* info section */ 289 #define SN_NOCOMPOUNDSUGS 16 /* don't compound for suggestions */ 290 #define SN_END 255 /* end of sections */ 291 292 #define SNF_REQUIRED 1 /* <sectionflags>: required section */ 293 294 #define CF_WORD 0x01 295 #define CF_UPPER 0x02 296 297 static int set_spell_finish(spelltab_T *new_st); 298 static int write_spell_prefcond(FILE *fd, garray_T *gap); 299 static char_u *read_cnt_string(FILE *fd, int cnt_bytes, int *lenp); 300 static int read_region_section(FILE *fd, slang_T *slang, int len); 301 static int read_charflags_section(FILE *fd); 302 static int read_prefcond_section(FILE *fd, slang_T *lp); 303 static int read_rep_section(FILE *fd, garray_T *gap, short *first); 304 static int read_sal_section(FILE *fd, slang_T *slang); 305 static int read_words_section(FILE *fd, slang_T *lp, int len); 306 static int read_sofo_section(FILE *fd, slang_T *slang); 307 static int read_compound(FILE *fd, slang_T *slang, int len); 308 static int set_sofo(slang_T *lp, char_u *from, char_u *to); 309 static void set_sal_first(slang_T *lp); 310 #ifdef FEAT_MBYTE 311 static int *mb_str2wide(char_u *s); 312 #endif 313 static int spell_read_tree(FILE *fd, char_u **bytsp, idx_T **idxsp, int prefixtree, int prefixcnt); 314 static idx_T read_tree_node(FILE *fd, char_u *byts, idx_T *idxs, int maxidx, idx_T startidx, int prefixtree, int maxprefcondnr); 315 static void spell_reload_one(char_u *fname, int added_word); 316 static void set_spell_charflags(char_u *flags, int cnt, char_u *upp); 317 static int set_spell_chartab(char_u *fol, char_u *low, char_u *upp); 318 static void set_map_str(slang_T *lp, char_u *map); 319 320 321 static char *e_spell_trunc = N_("E758: Truncated spell file"); 322 static char *e_afftrailing = N_("Trailing text in %s line %d: %s"); 323 static char *e_affname = N_("Affix name too long in %s line %d: %s"); 324 static char *e_affform = N_("E761: Format error in affix file FOL, LOW or UPP"); 325 static char *e_affrange = N_("E762: Character in FOL, LOW or UPP is out of range"); 326 static char *msg_compressing = N_("Compressing word tree..."); 327 328 /* 329 * Load one spell file and store the info into a slang_T. 330 * 331 * This is invoked in three ways: 332 * - From spell_load_cb() to load a spell file for the first time. "lang" is 333 * the language name, "old_lp" is NULL. Will allocate an slang_T. 334 * - To reload a spell file that was changed. "lang" is NULL and "old_lp" 335 * points to the existing slang_T. 336 * - Just after writing a .spl file; it's read back to produce the .sug file. 337 * "old_lp" is NULL and "lang" is NULL. Will allocate an slang_T. 338 * 339 * Returns the slang_T the spell file was loaded into. NULL for error. 340 */ 341 slang_T * 342 spell_load_file( 343 char_u *fname, 344 char_u *lang, 345 slang_T *old_lp, 346 int silent) /* no error if file doesn't exist */ 347 { 348 FILE *fd; 349 char_u buf[VIMSPELLMAGICL]; 350 char_u *p; 351 int i; 352 int n; 353 int len; 354 char_u *save_sourcing_name = sourcing_name; 355 linenr_T save_sourcing_lnum = sourcing_lnum; 356 slang_T *lp = NULL; 357 int c = 0; 358 int res; 359 360 fd = mch_fopen((char *)fname, "r"); 361 if (fd == NULL) 362 { 363 if (!silent) 364 EMSG2(_(e_notopen), fname); 365 else if (p_verbose > 2) 366 { 367 verbose_enter(); 368 smsg((char_u *)e_notopen, fname); 369 verbose_leave(); 370 } 371 goto endFAIL; 372 } 373 if (p_verbose > 2) 374 { 375 verbose_enter(); 376 smsg((char_u *)_("Reading spell file \"%s\""), fname); 377 verbose_leave(); 378 } 379 380 if (old_lp == NULL) 381 { 382 lp = slang_alloc(lang); 383 if (lp == NULL) 384 goto endFAIL; 385 386 /* Remember the file name, used to reload the file when it's updated. */ 387 lp->sl_fname = vim_strsave(fname); 388 if (lp->sl_fname == NULL) 389 goto endFAIL; 390 391 /* Check for .add.spl (_add.spl for VMS). */ 392 lp->sl_add = strstr((char *)gettail(fname), SPL_FNAME_ADD) != NULL; 393 } 394 else 395 lp = old_lp; 396 397 /* Set sourcing_name, so that error messages mention the file name. */ 398 sourcing_name = fname; 399 sourcing_lnum = 0; 400 401 /* 402 * <HEADER>: <fileID> 403 */ 404 for (i = 0; i < VIMSPELLMAGICL; ++i) 405 buf[i] = getc(fd); /* <fileID> */ 406 if (STRNCMP(buf, VIMSPELLMAGIC, VIMSPELLMAGICL) != 0) 407 { 408 EMSG(_("E757: This does not look like a spell file")); 409 goto endFAIL; 410 } 411 c = getc(fd); /* <versionnr> */ 412 if (c < VIMSPELLVERSION) 413 { 414 EMSG(_("E771: Old spell file, needs to be updated")); 415 goto endFAIL; 416 } 417 else if (c > VIMSPELLVERSION) 418 { 419 EMSG(_("E772: Spell file is for newer version of Vim")); 420 goto endFAIL; 421 } 422 423 424 /* 425 * <SECTIONS>: <section> ... <sectionend> 426 * <section>: <sectionID> <sectionflags> <sectionlen> (section contents) 427 */ 428 for (;;) 429 { 430 n = getc(fd); /* <sectionID> or <sectionend> */ 431 if (n == SN_END) 432 break; 433 c = getc(fd); /* <sectionflags> */ 434 len = get4c(fd); /* <sectionlen> */ 435 if (len < 0) 436 goto truncerr; 437 438 res = 0; 439 switch (n) 440 { 441 case SN_INFO: 442 lp->sl_info = read_string(fd, len); /* <infotext> */ 443 if (lp->sl_info == NULL) 444 goto endFAIL; 445 break; 446 447 case SN_REGION: 448 res = read_region_section(fd, lp, len); 449 break; 450 451 case SN_CHARFLAGS: 452 res = read_charflags_section(fd); 453 break; 454 455 case SN_MIDWORD: 456 lp->sl_midword = read_string(fd, len); /* <midword> */ 457 if (lp->sl_midword == NULL) 458 goto endFAIL; 459 break; 460 461 case SN_PREFCOND: 462 res = read_prefcond_section(fd, lp); 463 break; 464 465 case SN_REP: 466 res = read_rep_section(fd, &lp->sl_rep, lp->sl_rep_first); 467 break; 468 469 case SN_REPSAL: 470 res = read_rep_section(fd, &lp->sl_repsal, lp->sl_repsal_first); 471 break; 472 473 case SN_SAL: 474 res = read_sal_section(fd, lp); 475 break; 476 477 case SN_SOFO: 478 res = read_sofo_section(fd, lp); 479 break; 480 481 case SN_MAP: 482 p = read_string(fd, len); /* <mapstr> */ 483 if (p == NULL) 484 goto endFAIL; 485 set_map_str(lp, p); 486 vim_free(p); 487 break; 488 489 case SN_WORDS: 490 res = read_words_section(fd, lp, len); 491 break; 492 493 case SN_SUGFILE: 494 lp->sl_sugtime = get8ctime(fd); /* <timestamp> */ 495 break; 496 497 case SN_NOSPLITSUGS: 498 lp->sl_nosplitsugs = TRUE; 499 break; 500 501 case SN_NOCOMPOUNDSUGS: 502 lp->sl_nocompoundsugs = TRUE; 503 break; 504 505 case SN_COMPOUND: 506 res = read_compound(fd, lp, len); 507 break; 508 509 case SN_NOBREAK: 510 lp->sl_nobreak = TRUE; 511 break; 512 513 case SN_SYLLABLE: 514 lp->sl_syllable = read_string(fd, len); /* <syllable> */ 515 if (lp->sl_syllable == NULL) 516 goto endFAIL; 517 if (init_syl_tab(lp) == FAIL) 518 goto endFAIL; 519 break; 520 521 default: 522 /* Unsupported section. When it's required give an error 523 * message. When it's not required skip the contents. */ 524 if (c & SNF_REQUIRED) 525 { 526 EMSG(_("E770: Unsupported section in spell file")); 527 goto endFAIL; 528 } 529 while (--len >= 0) 530 if (getc(fd) < 0) 531 goto truncerr; 532 break; 533 } 534 someerror: 535 if (res == SP_FORMERROR) 536 { 537 EMSG(_(e_format)); 538 goto endFAIL; 539 } 540 if (res == SP_TRUNCERROR) 541 { 542 truncerr: 543 EMSG(_(e_spell_trunc)); 544 goto endFAIL; 545 } 546 if (res == SP_OTHERERROR) 547 goto endFAIL; 548 } 549 550 /* <LWORDTREE> */ 551 res = spell_read_tree(fd, &lp->sl_fbyts, &lp->sl_fidxs, FALSE, 0); 552 if (res != 0) 553 goto someerror; 554 555 /* <KWORDTREE> */ 556 res = spell_read_tree(fd, &lp->sl_kbyts, &lp->sl_kidxs, FALSE, 0); 557 if (res != 0) 558 goto someerror; 559 560 /* <PREFIXTREE> */ 561 res = spell_read_tree(fd, &lp->sl_pbyts, &lp->sl_pidxs, TRUE, 562 lp->sl_prefixcnt); 563 if (res != 0) 564 goto someerror; 565 566 /* For a new file link it in the list of spell files. */ 567 if (old_lp == NULL && lang != NULL) 568 { 569 lp->sl_next = first_lang; 570 first_lang = lp; 571 } 572 573 goto endOK; 574 575 endFAIL: 576 if (lang != NULL) 577 /* truncating the name signals the error to spell_load_lang() */ 578 *lang = NUL; 579 if (lp != NULL && old_lp == NULL) 580 slang_free(lp); 581 lp = NULL; 582 583 endOK: 584 if (fd != NULL) 585 fclose(fd); 586 sourcing_name = save_sourcing_name; 587 sourcing_lnum = save_sourcing_lnum; 588 589 return lp; 590 } 591 592 /* 593 * Fill in the wordcount fields for a trie. 594 * Returns the total number of words. 595 */ 596 static void 597 tree_count_words(char_u *byts, idx_T *idxs) 598 { 599 int depth; 600 idx_T arridx[MAXWLEN]; 601 int curi[MAXWLEN]; 602 int c; 603 idx_T n; 604 int wordcount[MAXWLEN]; 605 606 arridx[0] = 0; 607 curi[0] = 1; 608 wordcount[0] = 0; 609 depth = 0; 610 while (depth >= 0 && !got_int) 611 { 612 if (curi[depth] > byts[arridx[depth]]) 613 { 614 /* Done all bytes at this node, go up one level. */ 615 idxs[arridx[depth]] = wordcount[depth]; 616 if (depth > 0) 617 wordcount[depth - 1] += wordcount[depth]; 618 619 --depth; 620 fast_breakcheck(); 621 } 622 else 623 { 624 /* Do one more byte at this node. */ 625 n = arridx[depth] + curi[depth]; 626 ++curi[depth]; 627 628 c = byts[n]; 629 if (c == 0) 630 { 631 /* End of word, count it. */ 632 ++wordcount[depth]; 633 634 /* Skip over any other NUL bytes (same word with different 635 * flags). */ 636 while (byts[n + 1] == 0) 637 { 638 ++n; 639 ++curi[depth]; 640 } 641 } 642 else 643 { 644 /* Normal char, go one level deeper to count the words. */ 645 ++depth; 646 arridx[depth] = idxs[n]; 647 curi[depth] = 1; 648 wordcount[depth] = 0; 649 } 650 } 651 } 652 } 653 654 /* 655 * Load the .sug files for languages that have one and weren't loaded yet. 656 */ 657 void 658 suggest_load_files(void) 659 { 660 langp_T *lp; 661 int lpi; 662 slang_T *slang; 663 char_u *dotp; 664 FILE *fd; 665 char_u buf[MAXWLEN]; 666 int i; 667 time_t timestamp; 668 int wcount; 669 int wordnr; 670 garray_T ga; 671 int c; 672 673 /* Do this for all languages that support sound folding. */ 674 for (lpi = 0; lpi < curwin->w_s->b_langp.ga_len; ++lpi) 675 { 676 lp = LANGP_ENTRY(curwin->w_s->b_langp, lpi); 677 slang = lp->lp_slang; 678 if (slang->sl_sugtime != 0 && !slang->sl_sugloaded) 679 { 680 /* Change ".spl" to ".sug" and open the file. When the file isn't 681 * found silently skip it. Do set "sl_sugloaded" so that we 682 * don't try again and again. */ 683 slang->sl_sugloaded = TRUE; 684 685 dotp = vim_strrchr(slang->sl_fname, '.'); 686 if (dotp == NULL || fnamecmp(dotp, ".spl") != 0) 687 continue; 688 STRCPY(dotp, ".sug"); 689 fd = mch_fopen((char *)slang->sl_fname, "r"); 690 if (fd == NULL) 691 goto nextone; 692 693 /* 694 * <SUGHEADER>: <fileID> <versionnr> <timestamp> 695 */ 696 for (i = 0; i < VIMSUGMAGICL; ++i) 697 buf[i] = getc(fd); /* <fileID> */ 698 if (STRNCMP(buf, VIMSUGMAGIC, VIMSUGMAGICL) != 0) 699 { 700 EMSG2(_("E778: This does not look like a .sug file: %s"), 701 slang->sl_fname); 702 goto nextone; 703 } 704 c = getc(fd); /* <versionnr> */ 705 if (c < VIMSUGVERSION) 706 { 707 EMSG2(_("E779: Old .sug file, needs to be updated: %s"), 708 slang->sl_fname); 709 goto nextone; 710 } 711 else if (c > VIMSUGVERSION) 712 { 713 EMSG2(_("E780: .sug file is for newer version of Vim: %s"), 714 slang->sl_fname); 715 goto nextone; 716 } 717 718 /* Check the timestamp, it must be exactly the same as the one in 719 * the .spl file. Otherwise the word numbers won't match. */ 720 timestamp = get8ctime(fd); /* <timestamp> */ 721 if (timestamp != slang->sl_sugtime) 722 { 723 EMSG2(_("E781: .sug file doesn't match .spl file: %s"), 724 slang->sl_fname); 725 goto nextone; 726 } 727 728 /* 729 * <SUGWORDTREE>: <wordtree> 730 * Read the trie with the soundfolded words. 731 */ 732 if (spell_read_tree(fd, &slang->sl_sbyts, &slang->sl_sidxs, 733 FALSE, 0) != 0) 734 { 735 someerror: 736 EMSG2(_("E782: error while reading .sug file: %s"), 737 slang->sl_fname); 738 slang_clear_sug(slang); 739 goto nextone; 740 } 741 742 /* 743 * <SUGTABLE>: <sugwcount> <sugline> ... 744 * 745 * Read the table with word numbers. We use a file buffer for 746 * this, because it's so much like a file with lines. Makes it 747 * possible to swap the info and save on memory use. 748 */ 749 slang->sl_sugbuf = open_spellbuf(); 750 if (slang->sl_sugbuf == NULL) 751 goto someerror; 752 /* <sugwcount> */ 753 wcount = get4c(fd); 754 if (wcount < 0) 755 goto someerror; 756 757 /* Read all the wordnr lists into the buffer, one NUL terminated 758 * list per line. */ 759 ga_init2(&ga, 1, 100); 760 for (wordnr = 0; wordnr < wcount; ++wordnr) 761 { 762 ga.ga_len = 0; 763 for (;;) 764 { 765 c = getc(fd); /* <sugline> */ 766 if (c < 0 || ga_grow(&ga, 1) == FAIL) 767 goto someerror; 768 ((char_u *)ga.ga_data)[ga.ga_len++] = c; 769 if (c == NUL) 770 break; 771 } 772 if (ml_append_buf(slang->sl_sugbuf, (linenr_T)wordnr, 773 ga.ga_data, ga.ga_len, TRUE) == FAIL) 774 goto someerror; 775 } 776 ga_clear(&ga); 777 778 /* 779 * Need to put word counts in the word tries, so that we can find 780 * a word by its number. 781 */ 782 tree_count_words(slang->sl_fbyts, slang->sl_fidxs); 783 tree_count_words(slang->sl_sbyts, slang->sl_sidxs); 784 785 nextone: 786 if (fd != NULL) 787 fclose(fd); 788 STRCPY(dotp, ".spl"); 789 } 790 } 791 } 792 793 794 /* 795 * Read a length field from "fd" in "cnt_bytes" bytes. 796 * Allocate memory, read the string into it and add a NUL at the end. 797 * Returns NULL when the count is zero. 798 * Sets "*cntp" to SP_*ERROR when there is an error, length of the result 799 * otherwise. 800 */ 801 static char_u * 802 read_cnt_string(FILE *fd, int cnt_bytes, int *cntp) 803 { 804 int cnt = 0; 805 int i; 806 char_u *str; 807 808 /* read the length bytes, MSB first */ 809 for (i = 0; i < cnt_bytes; ++i) 810 cnt = (cnt << 8) + getc(fd); 811 if (cnt < 0) 812 { 813 *cntp = SP_TRUNCERROR; 814 return NULL; 815 } 816 *cntp = cnt; 817 if (cnt == 0) 818 return NULL; /* nothing to read, return NULL */ 819 820 str = read_string(fd, cnt); 821 if (str == NULL) 822 *cntp = SP_OTHERERROR; 823 return str; 824 } 825 826 /* 827 * Read SN_REGION: <regionname> ... 828 * Return SP_*ERROR flags. 829 */ 830 static int 831 read_region_section(FILE *fd, slang_T *lp, int len) 832 { 833 int i; 834 835 if (len > 16) 836 return SP_FORMERROR; 837 for (i = 0; i < len; ++i) 838 lp->sl_regions[i] = getc(fd); /* <regionname> */ 839 lp->sl_regions[len] = NUL; 840 return 0; 841 } 842 843 /* 844 * Read SN_CHARFLAGS section: <charflagslen> <charflags> 845 * <folcharslen> <folchars> 846 * Return SP_*ERROR flags. 847 */ 848 static int 849 read_charflags_section(FILE *fd) 850 { 851 char_u *flags; 852 char_u *fol; 853 int flagslen, follen; 854 855 /* <charflagslen> <charflags> */ 856 flags = read_cnt_string(fd, 1, &flagslen); 857 if (flagslen < 0) 858 return flagslen; 859 860 /* <folcharslen> <folchars> */ 861 fol = read_cnt_string(fd, 2, &follen); 862 if (follen < 0) 863 { 864 vim_free(flags); 865 return follen; 866 } 867 868 /* Set the word-char flags and fill SPELL_ISUPPER() table. */ 869 if (flags != NULL && fol != NULL) 870 set_spell_charflags(flags, flagslen, fol); 871 872 vim_free(flags); 873 vim_free(fol); 874 875 /* When <charflagslen> is zero then <fcharlen> must also be zero. */ 876 if ((flags == NULL) != (fol == NULL)) 877 return SP_FORMERROR; 878 return 0; 879 } 880 881 /* 882 * Read SN_PREFCOND section. 883 * Return SP_*ERROR flags. 884 */ 885 static int 886 read_prefcond_section(FILE *fd, slang_T *lp) 887 { 888 int cnt; 889 int i; 890 int n; 891 char_u *p; 892 char_u buf[MAXWLEN + 1]; 893 894 /* <prefcondcnt> <prefcond> ... */ 895 cnt = get2c(fd); /* <prefcondcnt> */ 896 if (cnt <= 0) 897 return SP_FORMERROR; 898 899 lp->sl_prefprog = (regprog_T **)alloc_clear( 900 (unsigned)sizeof(regprog_T *) * cnt); 901 if (lp->sl_prefprog == NULL) 902 return SP_OTHERERROR; 903 lp->sl_prefixcnt = cnt; 904 905 for (i = 0; i < cnt; ++i) 906 { 907 /* <prefcond> : <condlen> <condstr> */ 908 n = getc(fd); /* <condlen> */ 909 if (n < 0 || n >= MAXWLEN) 910 return SP_FORMERROR; 911 912 /* When <condlen> is zero we have an empty condition. Otherwise 913 * compile the regexp program used to check for the condition. */ 914 if (n > 0) 915 { 916 buf[0] = '^'; /* always match at one position only */ 917 p = buf + 1; 918 while (n-- > 0) 919 *p++ = getc(fd); /* <condstr> */ 920 *p = NUL; 921 lp->sl_prefprog[i] = vim_regcomp(buf, RE_MAGIC + RE_STRING); 922 } 923 } 924 return 0; 925 } 926 927 /* 928 * Read REP or REPSAL items section from "fd": <repcount> <rep> ... 929 * Return SP_*ERROR flags. 930 */ 931 static int 932 read_rep_section(FILE *fd, garray_T *gap, short *first) 933 { 934 int cnt; 935 fromto_T *ftp; 936 int i; 937 938 cnt = get2c(fd); /* <repcount> */ 939 if (cnt < 0) 940 return SP_TRUNCERROR; 941 942 if (ga_grow(gap, cnt) == FAIL) 943 return SP_OTHERERROR; 944 945 /* <rep> : <repfromlen> <repfrom> <reptolen> <repto> */ 946 for (; gap->ga_len < cnt; ++gap->ga_len) 947 { 948 ftp = &((fromto_T *)gap->ga_data)[gap->ga_len]; 949 ftp->ft_from = read_cnt_string(fd, 1, &i); 950 if (i < 0) 951 return i; 952 if (i == 0) 953 return SP_FORMERROR; 954 ftp->ft_to = read_cnt_string(fd, 1, &i); 955 if (i <= 0) 956 { 957 vim_free(ftp->ft_from); 958 if (i < 0) 959 return i; 960 return SP_FORMERROR; 961 } 962 } 963 964 /* Fill the first-index table. */ 965 for (i = 0; i < 256; ++i) 966 first[i] = -1; 967 for (i = 0; i < gap->ga_len; ++i) 968 { 969 ftp = &((fromto_T *)gap->ga_data)[i]; 970 if (first[*ftp->ft_from] == -1) 971 first[*ftp->ft_from] = i; 972 } 973 return 0; 974 } 975 976 /* 977 * Read SN_SAL section: <salflags> <salcount> <sal> ... 978 * Return SP_*ERROR flags. 979 */ 980 static int 981 read_sal_section(FILE *fd, slang_T *slang) 982 { 983 int i; 984 int cnt; 985 garray_T *gap; 986 salitem_T *smp; 987 int ccnt; 988 char_u *p; 989 int c = NUL; 990 991 slang->sl_sofo = FALSE; 992 993 i = getc(fd); /* <salflags> */ 994 if (i & SAL_F0LLOWUP) 995 slang->sl_followup = TRUE; 996 if (i & SAL_COLLAPSE) 997 slang->sl_collapse = TRUE; 998 if (i & SAL_REM_ACCENTS) 999 slang->sl_rem_accents = TRUE; 1000 1001 cnt = get2c(fd); /* <salcount> */ 1002 if (cnt < 0) 1003 return SP_TRUNCERROR; 1004 1005 gap = &slang->sl_sal; 1006 ga_init2(gap, sizeof(salitem_T), 10); 1007 if (ga_grow(gap, cnt + 1) == FAIL) 1008 return SP_OTHERERROR; 1009 1010 /* <sal> : <salfromlen> <salfrom> <saltolen> <salto> */ 1011 for (; gap->ga_len < cnt; ++gap->ga_len) 1012 { 1013 smp = &((salitem_T *)gap->ga_data)[gap->ga_len]; 1014 ccnt = getc(fd); /* <salfromlen> */ 1015 if (ccnt < 0) 1016 return SP_TRUNCERROR; 1017 if ((p = alloc(ccnt + 2)) == NULL) 1018 return SP_OTHERERROR; 1019 smp->sm_lead = p; 1020 1021 /* Read up to the first special char into sm_lead. */ 1022 for (i = 0; i < ccnt; ++i) 1023 { 1024 c = getc(fd); /* <salfrom> */ 1025 if (vim_strchr((char_u *)"0123456789(-<^$", c) != NULL) 1026 break; 1027 *p++ = c; 1028 } 1029 smp->sm_leadlen = (int)(p - smp->sm_lead); 1030 *p++ = NUL; 1031 1032 /* Put (abc) chars in sm_oneof, if any. */ 1033 if (c == '(') 1034 { 1035 smp->sm_oneof = p; 1036 for (++i; i < ccnt; ++i) 1037 { 1038 c = getc(fd); /* <salfrom> */ 1039 if (c == ')') 1040 break; 1041 *p++ = c; 1042 } 1043 *p++ = NUL; 1044 if (++i < ccnt) 1045 c = getc(fd); 1046 } 1047 else 1048 smp->sm_oneof = NULL; 1049 1050 /* Any following chars go in sm_rules. */ 1051 smp->sm_rules = p; 1052 if (i < ccnt) 1053 /* store the char we got while checking for end of sm_lead */ 1054 *p++ = c; 1055 for (++i; i < ccnt; ++i) 1056 *p++ = getc(fd); /* <salfrom> */ 1057 *p++ = NUL; 1058 1059 /* <saltolen> <salto> */ 1060 smp->sm_to = read_cnt_string(fd, 1, &ccnt); 1061 if (ccnt < 0) 1062 { 1063 vim_free(smp->sm_lead); 1064 return ccnt; 1065 } 1066 1067 #ifdef FEAT_MBYTE 1068 if (has_mbyte) 1069 { 1070 /* convert the multi-byte strings to wide char strings */ 1071 smp->sm_lead_w = mb_str2wide(smp->sm_lead); 1072 smp->sm_leadlen = mb_charlen(smp->sm_lead); 1073 if (smp->sm_oneof == NULL) 1074 smp->sm_oneof_w = NULL; 1075 else 1076 smp->sm_oneof_w = mb_str2wide(smp->sm_oneof); 1077 if (smp->sm_to == NULL) 1078 smp->sm_to_w = NULL; 1079 else 1080 smp->sm_to_w = mb_str2wide(smp->sm_to); 1081 if (smp->sm_lead_w == NULL 1082 || (smp->sm_oneof_w == NULL && smp->sm_oneof != NULL) 1083 || (smp->sm_to_w == NULL && smp->sm_to != NULL)) 1084 { 1085 vim_free(smp->sm_lead); 1086 vim_free(smp->sm_to); 1087 vim_free(smp->sm_lead_w); 1088 vim_free(smp->sm_oneof_w); 1089 vim_free(smp->sm_to_w); 1090 return SP_OTHERERROR; 1091 } 1092 } 1093 #endif 1094 } 1095 1096 if (gap->ga_len > 0) 1097 { 1098 /* Add one extra entry to mark the end with an empty sm_lead. Avoids 1099 * that we need to check the index every time. */ 1100 smp = &((salitem_T *)gap->ga_data)[gap->ga_len]; 1101 if ((p = alloc(1)) == NULL) 1102 return SP_OTHERERROR; 1103 p[0] = NUL; 1104 smp->sm_lead = p; 1105 smp->sm_leadlen = 0; 1106 smp->sm_oneof = NULL; 1107 smp->sm_rules = p; 1108 smp->sm_to = NULL; 1109 #ifdef FEAT_MBYTE 1110 if (has_mbyte) 1111 { 1112 smp->sm_lead_w = mb_str2wide(smp->sm_lead); 1113 smp->sm_leadlen = 0; 1114 smp->sm_oneof_w = NULL; 1115 smp->sm_to_w = NULL; 1116 } 1117 #endif 1118 ++gap->ga_len; 1119 } 1120 1121 /* Fill the first-index table. */ 1122 set_sal_first(slang); 1123 1124 return 0; 1125 } 1126 1127 /* 1128 * Read SN_WORDS: <word> ... 1129 * Return SP_*ERROR flags. 1130 */ 1131 static int 1132 read_words_section(FILE *fd, slang_T *lp, int len) 1133 { 1134 int done = 0; 1135 int i; 1136 int c; 1137 char_u word[MAXWLEN]; 1138 1139 while (done < len) 1140 { 1141 /* Read one word at a time. */ 1142 for (i = 0; ; ++i) 1143 { 1144 c = getc(fd); 1145 if (c == EOF) 1146 return SP_TRUNCERROR; 1147 word[i] = c; 1148 if (word[i] == NUL) 1149 break; 1150 if (i == MAXWLEN - 1) 1151 return SP_FORMERROR; 1152 } 1153 1154 /* Init the count to 10. */ 1155 count_common_word(lp, word, -1, 10); 1156 done += i + 1; 1157 } 1158 return 0; 1159 } 1160 1161 /* 1162 * SN_SOFO: <sofofromlen> <sofofrom> <sofotolen> <sofoto> 1163 * Return SP_*ERROR flags. 1164 */ 1165 static int 1166 read_sofo_section(FILE *fd, slang_T *slang) 1167 { 1168 int cnt; 1169 char_u *from, *to; 1170 int res; 1171 1172 slang->sl_sofo = TRUE; 1173 1174 /* <sofofromlen> <sofofrom> */ 1175 from = read_cnt_string(fd, 2, &cnt); 1176 if (cnt < 0) 1177 return cnt; 1178 1179 /* <sofotolen> <sofoto> */ 1180 to = read_cnt_string(fd, 2, &cnt); 1181 if (cnt < 0) 1182 { 1183 vim_free(from); 1184 return cnt; 1185 } 1186 1187 /* Store the info in slang->sl_sal and/or slang->sl_sal_first. */ 1188 if (from != NULL && to != NULL) 1189 res = set_sofo(slang, from, to); 1190 else if (from != NULL || to != NULL) 1191 res = SP_FORMERROR; /* only one of two strings is an error */ 1192 else 1193 res = 0; 1194 1195 vim_free(from); 1196 vim_free(to); 1197 return res; 1198 } 1199 1200 /* 1201 * Read the compound section from the .spl file: 1202 * <compmax> <compminlen> <compsylmax> <compoptions> <compflags> 1203 * Returns SP_*ERROR flags. 1204 */ 1205 static int 1206 read_compound(FILE *fd, slang_T *slang, int len) 1207 { 1208 int todo = len; 1209 int c; 1210 int atstart; 1211 char_u *pat; 1212 char_u *pp; 1213 char_u *cp; 1214 char_u *ap; 1215 char_u *crp; 1216 int cnt; 1217 garray_T *gap; 1218 1219 if (todo < 2) 1220 return SP_FORMERROR; /* need at least two bytes */ 1221 1222 --todo; 1223 c = getc(fd); /* <compmax> */ 1224 if (c < 2) 1225 c = MAXWLEN; 1226 slang->sl_compmax = c; 1227 1228 --todo; 1229 c = getc(fd); /* <compminlen> */ 1230 if (c < 1) 1231 c = 0; 1232 slang->sl_compminlen = c; 1233 1234 --todo; 1235 c = getc(fd); /* <compsylmax> */ 1236 if (c < 1) 1237 c = MAXWLEN; 1238 slang->sl_compsylmax = c; 1239 1240 c = getc(fd); /* <compoptions> */ 1241 if (c != 0) 1242 ungetc(c, fd); /* be backwards compatible with Vim 7.0b */ 1243 else 1244 { 1245 --todo; 1246 c = getc(fd); /* only use the lower byte for now */ 1247 --todo; 1248 slang->sl_compoptions = c; 1249 1250 gap = &slang->sl_comppat; 1251 c = get2c(fd); /* <comppatcount> */ 1252 todo -= 2; 1253 ga_init2(gap, sizeof(char_u *), c); 1254 if (ga_grow(gap, c) == OK) 1255 while (--c >= 0) 1256 { 1257 ((char_u **)(gap->ga_data))[gap->ga_len++] = 1258 read_cnt_string(fd, 1, &cnt); 1259 /* <comppatlen> <comppattext> */ 1260 if (cnt < 0) 1261 return cnt; 1262 todo -= cnt + 1; 1263 } 1264 } 1265 if (todo < 0) 1266 return SP_FORMERROR; 1267 1268 /* Turn the COMPOUNDRULE items into a regexp pattern: 1269 * "a[bc]/a*b+" -> "^\(a[bc]\|a*b\+\)$". 1270 * Inserting backslashes may double the length, "^\(\)$<Nul>" is 7 bytes. 1271 * Conversion to utf-8 may double the size. */ 1272 c = todo * 2 + 7; 1273 #ifdef FEAT_MBYTE 1274 if (enc_utf8) 1275 c += todo * 2; 1276 #endif 1277 pat = alloc((unsigned)c); 1278 if (pat == NULL) 1279 return SP_OTHERERROR; 1280 1281 /* We also need a list of all flags that can appear at the start and one 1282 * for all flags. */ 1283 cp = alloc(todo + 1); 1284 if (cp == NULL) 1285 { 1286 vim_free(pat); 1287 return SP_OTHERERROR; 1288 } 1289 slang->sl_compstartflags = cp; 1290 *cp = NUL; 1291 1292 ap = alloc(todo + 1); 1293 if (ap == NULL) 1294 { 1295 vim_free(pat); 1296 return SP_OTHERERROR; 1297 } 1298 slang->sl_compallflags = ap; 1299 *ap = NUL; 1300 1301 /* And a list of all patterns in their original form, for checking whether 1302 * compounding may work in match_compoundrule(). This is freed when we 1303 * encounter a wildcard, the check doesn't work then. */ 1304 crp = alloc(todo + 1); 1305 slang->sl_comprules = crp; 1306 1307 pp = pat; 1308 *pp++ = '^'; 1309 *pp++ = '\\'; 1310 *pp++ = '('; 1311 1312 atstart = 1; 1313 while (todo-- > 0) 1314 { 1315 c = getc(fd); /* <compflags> */ 1316 if (c == EOF) 1317 { 1318 vim_free(pat); 1319 return SP_TRUNCERROR; 1320 } 1321 1322 /* Add all flags to "sl_compallflags". */ 1323 if (vim_strchr((char_u *)"?*+[]/", c) == NULL 1324 && !byte_in_str(slang->sl_compallflags, c)) 1325 { 1326 *ap++ = c; 1327 *ap = NUL; 1328 } 1329 1330 if (atstart != 0) 1331 { 1332 /* At start of item: copy flags to "sl_compstartflags". For a 1333 * [abc] item set "atstart" to 2 and copy up to the ']'. */ 1334 if (c == '[') 1335 atstart = 2; 1336 else if (c == ']') 1337 atstart = 0; 1338 else 1339 { 1340 if (!byte_in_str(slang->sl_compstartflags, c)) 1341 { 1342 *cp++ = c; 1343 *cp = NUL; 1344 } 1345 if (atstart == 1) 1346 atstart = 0; 1347 } 1348 } 1349 1350 /* Copy flag to "sl_comprules", unless we run into a wildcard. */ 1351 if (crp != NULL) 1352 { 1353 if (c == '?' || c == '+' || c == '*') 1354 { 1355 vim_free(slang->sl_comprules); 1356 slang->sl_comprules = NULL; 1357 crp = NULL; 1358 } 1359 else 1360 *crp++ = c; 1361 } 1362 1363 if (c == '/') /* slash separates two items */ 1364 { 1365 *pp++ = '\\'; 1366 *pp++ = '|'; 1367 atstart = 1; 1368 } 1369 else /* normal char, "[abc]" and '*' are copied as-is */ 1370 { 1371 if (c == '?' || c == '+' || c == '~') 1372 *pp++ = '\\'; /* "a?" becomes "a\?", "a+" becomes "a\+" */ 1373 #ifdef FEAT_MBYTE 1374 if (enc_utf8) 1375 pp += mb_char2bytes(c, pp); 1376 else 1377 #endif 1378 *pp++ = c; 1379 } 1380 } 1381 1382 *pp++ = '\\'; 1383 *pp++ = ')'; 1384 *pp++ = '$'; 1385 *pp = NUL; 1386 1387 if (crp != NULL) 1388 *crp = NUL; 1389 1390 slang->sl_compprog = vim_regcomp(pat, RE_MAGIC + RE_STRING + RE_STRICT); 1391 vim_free(pat); 1392 if (slang->sl_compprog == NULL) 1393 return SP_FORMERROR; 1394 1395 return 0; 1396 } 1397 1398 /* 1399 * Set the SOFOFROM and SOFOTO items in language "lp". 1400 * Returns SP_*ERROR flags when there is something wrong. 1401 */ 1402 static int 1403 set_sofo(slang_T *lp, char_u *from, char_u *to) 1404 { 1405 int i; 1406 1407 #ifdef FEAT_MBYTE 1408 garray_T *gap; 1409 char_u *s; 1410 char_u *p; 1411 int c; 1412 int *inp; 1413 1414 if (has_mbyte) 1415 { 1416 /* Use "sl_sal" as an array with 256 pointers to a list of wide 1417 * characters. The index is the low byte of the character. 1418 * The list contains from-to pairs with a terminating NUL. 1419 * sl_sal_first[] is used for latin1 "from" characters. */ 1420 gap = &lp->sl_sal; 1421 ga_init2(gap, sizeof(int *), 1); 1422 if (ga_grow(gap, 256) == FAIL) 1423 return SP_OTHERERROR; 1424 vim_memset(gap->ga_data, 0, sizeof(int *) * 256); 1425 gap->ga_len = 256; 1426 1427 /* First count the number of items for each list. Temporarily use 1428 * sl_sal_first[] for this. */ 1429 for (p = from, s = to; *p != NUL && *s != NUL; ) 1430 { 1431 c = mb_cptr2char_adv(&p); 1432 MB_CPTR_ADV(s); 1433 if (c >= 256) 1434 ++lp->sl_sal_first[c & 0xff]; 1435 } 1436 if (*p != NUL || *s != NUL) /* lengths differ */ 1437 return SP_FORMERROR; 1438 1439 /* Allocate the lists. */ 1440 for (i = 0; i < 256; ++i) 1441 if (lp->sl_sal_first[i] > 0) 1442 { 1443 p = alloc(sizeof(int) * (lp->sl_sal_first[i] * 2 + 1)); 1444 if (p == NULL) 1445 return SP_OTHERERROR; 1446 ((int **)gap->ga_data)[i] = (int *)p; 1447 *(int *)p = 0; 1448 } 1449 1450 /* Put the characters up to 255 in sl_sal_first[] the rest in a sl_sal 1451 * list. */ 1452 vim_memset(lp->sl_sal_first, 0, sizeof(salfirst_T) * 256); 1453 for (p = from, s = to; *p != NUL && *s != NUL; ) 1454 { 1455 c = mb_cptr2char_adv(&p); 1456 i = mb_cptr2char_adv(&s); 1457 if (c >= 256) 1458 { 1459 /* Append the from-to chars at the end of the list with 1460 * the low byte. */ 1461 inp = ((int **)gap->ga_data)[c & 0xff]; 1462 while (*inp != 0) 1463 ++inp; 1464 *inp++ = c; /* from char */ 1465 *inp++ = i; /* to char */ 1466 *inp++ = NUL; /* NUL at the end */ 1467 } 1468 else 1469 /* mapping byte to char is done in sl_sal_first[] */ 1470 lp->sl_sal_first[c] = i; 1471 } 1472 } 1473 else 1474 #endif 1475 { 1476 /* mapping bytes to bytes is done in sl_sal_first[] */ 1477 if (STRLEN(from) != STRLEN(to)) 1478 return SP_FORMERROR; 1479 1480 for (i = 0; to[i] != NUL; ++i) 1481 lp->sl_sal_first[from[i]] = to[i]; 1482 lp->sl_sal.ga_len = 1; /* indicates we have soundfolding */ 1483 } 1484 1485 return 0; 1486 } 1487 1488 /* 1489 * Fill the first-index table for "lp". 1490 */ 1491 static void 1492 set_sal_first(slang_T *lp) 1493 { 1494 salfirst_T *sfirst; 1495 int i; 1496 salitem_T *smp; 1497 int c; 1498 garray_T *gap = &lp->sl_sal; 1499 1500 sfirst = lp->sl_sal_first; 1501 for (i = 0; i < 256; ++i) 1502 sfirst[i] = -1; 1503 smp = (salitem_T *)gap->ga_data; 1504 for (i = 0; i < gap->ga_len; ++i) 1505 { 1506 #ifdef FEAT_MBYTE 1507 if (has_mbyte) 1508 /* Use the lowest byte of the first character. For latin1 it's 1509 * the character, for other encodings it should differ for most 1510 * characters. */ 1511 c = *smp[i].sm_lead_w & 0xff; 1512 else 1513 #endif 1514 c = *smp[i].sm_lead; 1515 if (sfirst[c] == -1) 1516 { 1517 sfirst[c] = i; 1518 #ifdef FEAT_MBYTE 1519 if (has_mbyte) 1520 { 1521 int n; 1522 1523 /* Make sure all entries with this byte are following each 1524 * other. Move the ones that are in the wrong position. Do 1525 * keep the same ordering! */ 1526 while (i + 1 < gap->ga_len 1527 && (*smp[i + 1].sm_lead_w & 0xff) == c) 1528 /* Skip over entry with same index byte. */ 1529 ++i; 1530 1531 for (n = 1; i + n < gap->ga_len; ++n) 1532 if ((*smp[i + n].sm_lead_w & 0xff) == c) 1533 { 1534 salitem_T tsal; 1535 1536 /* Move entry with same index byte after the entries 1537 * we already found. */ 1538 ++i; 1539 --n; 1540 tsal = smp[i + n]; 1541 mch_memmove(smp + i + 1, smp + i, 1542 sizeof(salitem_T) * n); 1543 smp[i] = tsal; 1544 } 1545 } 1546 #endif 1547 } 1548 } 1549 } 1550 1551 #ifdef FEAT_MBYTE 1552 /* 1553 * Turn a multi-byte string into a wide character string. 1554 * Return it in allocated memory (NULL for out-of-memory) 1555 */ 1556 static int * 1557 mb_str2wide(char_u *s) 1558 { 1559 int *res; 1560 char_u *p; 1561 int i = 0; 1562 1563 res = (int *)alloc(sizeof(int) * (mb_charlen(s) + 1)); 1564 if (res != NULL) 1565 { 1566 for (p = s; *p != NUL; ) 1567 res[i++] = mb_ptr2char_adv(&p); 1568 res[i] = NUL; 1569 } 1570 return res; 1571 } 1572 #endif 1573 1574 /* 1575 * Read a tree from the .spl or .sug file. 1576 * Allocates the memory and stores pointers in "bytsp" and "idxsp". 1577 * This is skipped when the tree has zero length. 1578 * Returns zero when OK, SP_ value for an error. 1579 */ 1580 static int 1581 spell_read_tree( 1582 FILE *fd, 1583 char_u **bytsp, 1584 idx_T **idxsp, 1585 int prefixtree, /* TRUE for the prefix tree */ 1586 int prefixcnt) /* when "prefixtree" is TRUE: prefix count */ 1587 { 1588 long len; 1589 int idx; 1590 char_u *bp; 1591 idx_T *ip; 1592 1593 /* The tree size was computed when writing the file, so that we can 1594 * allocate it as one long block. <nodecount> */ 1595 len = get4c(fd); 1596 if (len < 0) 1597 return SP_TRUNCERROR; 1598 if (len >= LONG_MAX / (long)sizeof(int)) 1599 /* Invalid length, multiply with sizeof(int) would overflow. */ 1600 return SP_FORMERROR; 1601 if (len > 0) 1602 { 1603 /* Allocate the byte array. */ 1604 bp = lalloc((long_u)len, TRUE); 1605 if (bp == NULL) 1606 return SP_OTHERERROR; 1607 *bytsp = bp; 1608 1609 /* Allocate the index array. */ 1610 ip = (idx_T *)lalloc_clear((long_u)(len * sizeof(int)), TRUE); 1611 if (ip == NULL) 1612 return SP_OTHERERROR; 1613 *idxsp = ip; 1614 1615 /* Recursively read the tree and store it in the array. */ 1616 idx = read_tree_node(fd, bp, ip, len, 0, prefixtree, prefixcnt); 1617 if (idx < 0) 1618 return idx; 1619 } 1620 return 0; 1621 } 1622 1623 /* 1624 * Read one row of siblings from the spell file and store it in the byte array 1625 * "byts" and index array "idxs". Recursively read the children. 1626 * 1627 * NOTE: The code here must match put_node()! 1628 * 1629 * Returns the index (>= 0) following the siblings. 1630 * Returns SP_TRUNCERROR if the file is shorter than expected. 1631 * Returns SP_FORMERROR if there is a format error. 1632 */ 1633 static idx_T 1634 read_tree_node( 1635 FILE *fd, 1636 char_u *byts, 1637 idx_T *idxs, 1638 int maxidx, /* size of arrays */ 1639 idx_T startidx, /* current index in "byts" and "idxs" */ 1640 int prefixtree, /* TRUE for reading PREFIXTREE */ 1641 int maxprefcondnr) /* maximum for <prefcondnr> */ 1642 { 1643 int len; 1644 int i; 1645 int n; 1646 idx_T idx = startidx; 1647 int c; 1648 int c2; 1649 #define SHARED_MASK 0x8000000 1650 1651 len = getc(fd); /* <siblingcount> */ 1652 if (len <= 0) 1653 return SP_TRUNCERROR; 1654 1655 if (startidx + len >= maxidx) 1656 return SP_FORMERROR; 1657 byts[idx++] = len; 1658 1659 /* Read the byte values, flag/region bytes and shared indexes. */ 1660 for (i = 1; i <= len; ++i) 1661 { 1662 c = getc(fd); /* <byte> */ 1663 if (c < 0) 1664 return SP_TRUNCERROR; 1665 if (c <= BY_SPECIAL) 1666 { 1667 if (c == BY_NOFLAGS && !prefixtree) 1668 { 1669 /* No flags, all regions. */ 1670 idxs[idx] = 0; 1671 c = 0; 1672 } 1673 else if (c != BY_INDEX) 1674 { 1675 if (prefixtree) 1676 { 1677 /* Read the optional pflags byte, the prefix ID and the 1678 * condition nr. In idxs[] store the prefix ID in the low 1679 * byte, the condition index shifted up 8 bits, the flags 1680 * shifted up 24 bits. */ 1681 if (c == BY_FLAGS) 1682 c = getc(fd) << 24; /* <pflags> */ 1683 else 1684 c = 0; 1685 1686 c |= getc(fd); /* <affixID> */ 1687 1688 n = get2c(fd); /* <prefcondnr> */ 1689 if (n >= maxprefcondnr) 1690 return SP_FORMERROR; 1691 c |= (n << 8); 1692 } 1693 else /* c must be BY_FLAGS or BY_FLAGS2 */ 1694 { 1695 /* Read flags and optional region and prefix ID. In 1696 * idxs[] the flags go in the low two bytes, region above 1697 * that and prefix ID above the region. */ 1698 c2 = c; 1699 c = getc(fd); /* <flags> */ 1700 if (c2 == BY_FLAGS2) 1701 c = (getc(fd) << 8) + c; /* <flags2> */ 1702 if (c & WF_REGION) 1703 c = (getc(fd) << 16) + c; /* <region> */ 1704 if (c & WF_AFX) 1705 c = (getc(fd) << 24) + c; /* <affixID> */ 1706 } 1707 1708 idxs[idx] = c; 1709 c = 0; 1710 } 1711 else /* c == BY_INDEX */ 1712 { 1713 /* <nodeidx> */ 1714 n = get3c(fd); 1715 if (n < 0 || n >= maxidx) 1716 return SP_FORMERROR; 1717 idxs[idx] = n + SHARED_MASK; 1718 c = getc(fd); /* <xbyte> */ 1719 } 1720 } 1721 byts[idx++] = c; 1722 } 1723 1724 /* Recursively read the children for non-shared siblings. 1725 * Skip the end-of-word ones (zero byte value) and the shared ones (and 1726 * remove SHARED_MASK) */ 1727 for (i = 1; i <= len; ++i) 1728 if (byts[startidx + i] != 0) 1729 { 1730 if (idxs[startidx + i] & SHARED_MASK) 1731 idxs[startidx + i] &= ~SHARED_MASK; 1732 else 1733 { 1734 idxs[startidx + i] = idx; 1735 idx = read_tree_node(fd, byts, idxs, maxidx, idx, 1736 prefixtree, maxprefcondnr); 1737 if (idx < 0) 1738 break; 1739 } 1740 } 1741 1742 return idx; 1743 } 1744 1745 /* 1746 * Reload the spell file "fname" if it's loaded. 1747 */ 1748 static void 1749 spell_reload_one( 1750 char_u *fname, 1751 int added_word) /* invoked through "zg" */ 1752 { 1753 slang_T *slang; 1754 int didit = FALSE; 1755 1756 for (slang = first_lang; slang != NULL; slang = slang->sl_next) 1757 { 1758 if (fullpathcmp(fname, slang->sl_fname, FALSE) == FPC_SAME) 1759 { 1760 slang_clear(slang); 1761 if (spell_load_file(fname, NULL, slang, FALSE) == NULL) 1762 /* reloading failed, clear the language */ 1763 slang_clear(slang); 1764 redraw_all_later(SOME_VALID); 1765 didit = TRUE; 1766 } 1767 } 1768 1769 /* When "zg" was used and the file wasn't loaded yet, should redo 1770 * 'spelllang' to load it now. */ 1771 if (added_word && !didit) 1772 did_set_spelllang(curwin); 1773 } 1774 1775 1776 /* 1777 * Functions for ":mkspell". 1778 */ 1779 1780 #define MAXLINELEN 500 /* Maximum length in bytes of a line in a .aff 1781 and .dic file. */ 1782 /* 1783 * Main structure to store the contents of a ".aff" file. 1784 */ 1785 typedef struct afffile_S 1786 { 1787 char_u *af_enc; /* "SET", normalized, alloc'ed string or NULL */ 1788 int af_flagtype; /* AFT_CHAR, AFT_LONG, AFT_NUM or AFT_CAPLONG */ 1789 unsigned af_rare; /* RARE ID for rare word */ 1790 unsigned af_keepcase; /* KEEPCASE ID for keep-case word */ 1791 unsigned af_bad; /* BAD ID for banned word */ 1792 unsigned af_needaffix; /* NEEDAFFIX ID */ 1793 unsigned af_circumfix; /* CIRCUMFIX ID */ 1794 unsigned af_needcomp; /* NEEDCOMPOUND ID */ 1795 unsigned af_comproot; /* COMPOUNDROOT ID */ 1796 unsigned af_compforbid; /* COMPOUNDFORBIDFLAG ID */ 1797 unsigned af_comppermit; /* COMPOUNDPERMITFLAG ID */ 1798 unsigned af_nosuggest; /* NOSUGGEST ID */ 1799 int af_pfxpostpone; /* postpone prefixes without chop string and 1800 without flags */ 1801 int af_ignoreextra; /* IGNOREEXTRA present */ 1802 hashtab_T af_pref; /* hashtable for prefixes, affheader_T */ 1803 hashtab_T af_suff; /* hashtable for suffixes, affheader_T */ 1804 hashtab_T af_comp; /* hashtable for compound flags, compitem_T */ 1805 } afffile_T; 1806 1807 #define AFT_CHAR 0 /* flags are one character */ 1808 #define AFT_LONG 1 /* flags are two characters */ 1809 #define AFT_CAPLONG 2 /* flags are one or two characters */ 1810 #define AFT_NUM 3 /* flags are numbers, comma separated */ 1811 1812 typedef struct affentry_S affentry_T; 1813 /* Affix entry from ".aff" file. Used for prefixes and suffixes. */ 1814 struct affentry_S 1815 { 1816 affentry_T *ae_next; /* next affix with same name/number */ 1817 char_u *ae_chop; /* text to chop off basic word (can be NULL) */ 1818 char_u *ae_add; /* text to add to basic word (can be NULL) */ 1819 char_u *ae_flags; /* flags on the affix (can be NULL) */ 1820 char_u *ae_cond; /* condition (NULL for ".") */ 1821 regprog_T *ae_prog; /* regexp program for ae_cond or NULL */ 1822 char ae_compforbid; /* COMPOUNDFORBIDFLAG found */ 1823 char ae_comppermit; /* COMPOUNDPERMITFLAG found */ 1824 }; 1825 1826 #ifdef FEAT_MBYTE 1827 # define AH_KEY_LEN 17 /* 2 x 8 bytes + NUL */ 1828 #else 1829 # define AH_KEY_LEN 7 /* 6 digits + NUL */ 1830 #endif 1831 1832 /* Affix header from ".aff" file. Used for af_pref and af_suff. */ 1833 typedef struct affheader_S 1834 { 1835 char_u ah_key[AH_KEY_LEN]; /* key for hashtab == name of affix */ 1836 unsigned ah_flag; /* affix name as number, uses "af_flagtype" */ 1837 int ah_newID; /* prefix ID after renumbering; 0 if not used */ 1838 int ah_combine; /* suffix may combine with prefix */ 1839 int ah_follows; /* another affix block should be following */ 1840 affentry_T *ah_first; /* first affix entry */ 1841 } affheader_T; 1842 1843 #define HI2AH(hi) ((affheader_T *)(hi)->hi_key) 1844 1845 /* Flag used in compound items. */ 1846 typedef struct compitem_S 1847 { 1848 char_u ci_key[AH_KEY_LEN]; /* key for hashtab == name of compound */ 1849 unsigned ci_flag; /* affix name as number, uses "af_flagtype" */ 1850 int ci_newID; /* affix ID after renumbering. */ 1851 } compitem_T; 1852 1853 #define HI2CI(hi) ((compitem_T *)(hi)->hi_key) 1854 1855 /* 1856 * Structure that is used to store the items in the word tree. This avoids 1857 * the need to keep track of each allocated thing, everything is freed all at 1858 * once after ":mkspell" is done. 1859 * Note: "sb_next" must be just before "sb_data" to make sure the alignment of 1860 * "sb_data" is correct for systems where pointers must be aligned on 1861 * pointer-size boundaries and sizeof(pointer) > sizeof(int) (e.g., Sparc). 1862 */ 1863 #define SBLOCKSIZE 16000 /* size of sb_data */ 1864 typedef struct sblock_S sblock_T; 1865 struct sblock_S 1866 { 1867 int sb_used; /* nr of bytes already in use */ 1868 sblock_T *sb_next; /* next block in list */ 1869 char_u sb_data[1]; /* data, actually longer */ 1870 }; 1871 1872 /* 1873 * A node in the tree. 1874 */ 1875 typedef struct wordnode_S wordnode_T; 1876 struct wordnode_S 1877 { 1878 union /* shared to save space */ 1879 { 1880 char_u hashkey[6]; /* the hash key, only used while compressing */ 1881 int index; /* index in written nodes (valid after first 1882 round) */ 1883 } wn_u1; 1884 union /* shared to save space */ 1885 { 1886 wordnode_T *next; /* next node with same hash key */ 1887 wordnode_T *wnode; /* parent node that will write this node */ 1888 } wn_u2; 1889 wordnode_T *wn_child; /* child (next byte in word) */ 1890 wordnode_T *wn_sibling; /* next sibling (alternate byte in word, 1891 always sorted) */ 1892 int wn_refs; /* Nr. of references to this node. Only 1893 relevant for first node in a list of 1894 siblings, in following siblings it is 1895 always one. */ 1896 char_u wn_byte; /* Byte for this node. NUL for word end */ 1897 1898 /* Info for when "wn_byte" is NUL. 1899 * In PREFIXTREE "wn_region" is used for the prefcondnr. 1900 * In the soundfolded word tree "wn_flags" has the MSW of the wordnr and 1901 * "wn_region" the LSW of the wordnr. */ 1902 char_u wn_affixID; /* supported/required prefix ID or 0 */ 1903 short_u wn_flags; /* WF_ flags */ 1904 short wn_region; /* region mask */ 1905 1906 #ifdef SPELL_PRINTTREE 1907 int wn_nr; /* sequence nr for printing */ 1908 #endif 1909 }; 1910 1911 #define WN_MASK 0xffff /* mask relevant bits of "wn_flags" */ 1912 1913 #define HI2WN(hi) (wordnode_T *)((hi)->hi_key) 1914 1915 /* 1916 * Info used while reading the spell files. 1917 */ 1918 typedef struct spellinfo_S 1919 { 1920 wordnode_T *si_foldroot; /* tree with case-folded words */ 1921 long si_foldwcount; /* nr of words in si_foldroot */ 1922 1923 wordnode_T *si_keeproot; /* tree with keep-case words */ 1924 long si_keepwcount; /* nr of words in si_keeproot */ 1925 1926 wordnode_T *si_prefroot; /* tree with postponed prefixes */ 1927 1928 long si_sugtree; /* creating the soundfolding trie */ 1929 1930 sblock_T *si_blocks; /* memory blocks used */ 1931 long si_blocks_cnt; /* memory blocks allocated */ 1932 int si_did_emsg; /* TRUE when ran out of memory */ 1933 1934 long si_compress_cnt; /* words to add before lowering 1935 compression limit */ 1936 wordnode_T *si_first_free; /* List of nodes that have been freed during 1937 compression, linked by "wn_child" field. */ 1938 long si_free_count; /* number of nodes in si_first_free */ 1939 #ifdef SPELL_PRINTTREE 1940 int si_wordnode_nr; /* sequence nr for nodes */ 1941 #endif 1942 buf_T *si_spellbuf; /* buffer used to store soundfold word table */ 1943 1944 int si_ascii; /* handling only ASCII words */ 1945 int si_add; /* addition file */ 1946 int si_clear_chartab; /* when TRUE clear char tables */ 1947 int si_region; /* region mask */ 1948 vimconv_T si_conv; /* for conversion to 'encoding' */ 1949 int si_memtot; /* runtime memory used */ 1950 int si_verbose; /* verbose messages */ 1951 int si_msg_count; /* number of words added since last message */ 1952 char_u *si_info; /* info text chars or NULL */ 1953 int si_region_count; /* number of regions supported (1 when there 1954 are no regions) */ 1955 char_u si_region_name[17]; /* region names; used only if 1956 * si_region_count > 1) */ 1957 1958 garray_T si_rep; /* list of fromto_T entries from REP lines */ 1959 garray_T si_repsal; /* list of fromto_T entries from REPSAL lines */ 1960 garray_T si_sal; /* list of fromto_T entries from SAL lines */ 1961 char_u *si_sofofr; /* SOFOFROM text */ 1962 char_u *si_sofoto; /* SOFOTO text */ 1963 int si_nosugfile; /* NOSUGFILE item found */ 1964 int si_nosplitsugs; /* NOSPLITSUGS item found */ 1965 int si_nocompoundsugs; /* NOCOMPOUNDSUGS item found */ 1966 int si_followup; /* soundsalike: ? */ 1967 int si_collapse; /* soundsalike: ? */ 1968 hashtab_T si_commonwords; /* hashtable for common words */ 1969 time_t si_sugtime; /* timestamp for .sug file */ 1970 int si_rem_accents; /* soundsalike: remove accents */ 1971 garray_T si_map; /* MAP info concatenated */ 1972 char_u *si_midword; /* MIDWORD chars or NULL */ 1973 int si_compmax; /* max nr of words for compounding */ 1974 int si_compminlen; /* minimal length for compounding */ 1975 int si_compsylmax; /* max nr of syllables for compounding */ 1976 int si_compoptions; /* COMP_ flags */ 1977 garray_T si_comppat; /* CHECKCOMPOUNDPATTERN items, each stored as 1978 a string */ 1979 char_u *si_compflags; /* flags used for compounding */ 1980 char_u si_nobreak; /* NOBREAK */ 1981 char_u *si_syllable; /* syllable string */ 1982 garray_T si_prefcond; /* table with conditions for postponed 1983 * prefixes, each stored as a string */ 1984 int si_newprefID; /* current value for ah_newID */ 1985 int si_newcompID; /* current value for compound ID */ 1986 } spellinfo_T; 1987 1988 static afffile_T *spell_read_aff(spellinfo_T *spin, char_u *fname); 1989 static int is_aff_rule(char_u **items, int itemcnt, char *rulename, int mincount); 1990 static void aff_process_flags(afffile_T *affile, affentry_T *entry); 1991 static int spell_info_item(char_u *s); 1992 static unsigned affitem2flag(int flagtype, char_u *item, char_u *fname, int lnum); 1993 static unsigned get_affitem(int flagtype, char_u **pp); 1994 static void process_compflags(spellinfo_T *spin, afffile_T *aff, char_u *compflags); 1995 static void check_renumber(spellinfo_T *spin); 1996 static int flag_in_afflist(int flagtype, char_u *afflist, unsigned flag); 1997 static void aff_check_number(int spinval, int affval, char *name); 1998 static void aff_check_string(char_u *spinval, char_u *affval, char *name); 1999 static int str_equal(char_u *s1, char_u *s2); 2000 static void add_fromto(spellinfo_T *spin, garray_T *gap, char_u *from, char_u *to); 2001 static int sal_to_bool(char_u *s); 2002 static void spell_free_aff(afffile_T *aff); 2003 static int spell_read_dic(spellinfo_T *spin, char_u *fname, afffile_T *affile); 2004 static int get_affix_flags(afffile_T *affile, char_u *afflist); 2005 static int get_pfxlist(afffile_T *affile, char_u *afflist, char_u *store_afflist); 2006 static void get_compflags(afffile_T *affile, char_u *afflist, char_u *store_afflist); 2007 static int store_aff_word(spellinfo_T *spin, char_u *word, char_u *afflist, afffile_T *affile, hashtab_T *ht, hashtab_T *xht, int condit, int flags, char_u *pfxlist, int pfxlen); 2008 static int spell_read_wordfile(spellinfo_T *spin, char_u *fname); 2009 static void *getroom(spellinfo_T *spin, size_t len, int align); 2010 static char_u *getroom_save(spellinfo_T *spin, char_u *s); 2011 static void free_blocks(sblock_T *bl); 2012 static wordnode_T *wordtree_alloc(spellinfo_T *spin); 2013 static int store_word(spellinfo_T *spin, char_u *word, int flags, int region, char_u *pfxlist, int need_affix); 2014 static int tree_add_word(spellinfo_T *spin, char_u *word, wordnode_T *tree, int flags, int region, int affixID); 2015 static wordnode_T *get_wordnode(spellinfo_T *spin); 2016 static int deref_wordnode(spellinfo_T *spin, wordnode_T *node); 2017 static void free_wordnode(spellinfo_T *spin, wordnode_T *n); 2018 static void wordtree_compress(spellinfo_T *spin, wordnode_T *root); 2019 static int node_compress(spellinfo_T *spin, wordnode_T *node, hashtab_T *ht, int *tot); 2020 static int node_equal(wordnode_T *n1, wordnode_T *n2); 2021 static int write_vim_spell(spellinfo_T *spin, char_u *fname); 2022 static void clear_node(wordnode_T *node); 2023 static int put_node(FILE *fd, wordnode_T *node, int idx, int regionmask, int prefixtree); 2024 static void spell_make_sugfile(spellinfo_T *spin, char_u *wfname); 2025 static int sug_filltree(spellinfo_T *spin, slang_T *slang); 2026 static int sug_maketable(spellinfo_T *spin); 2027 static int sug_filltable(spellinfo_T *spin, wordnode_T *node, int startwordnr, garray_T *gap); 2028 static int offset2bytes(int nr, char_u *buf); 2029 static void sug_write(spellinfo_T *spin, char_u *fname); 2030 static void spell_message(spellinfo_T *spin, char_u *str); 2031 static void init_spellfile(void); 2032 2033 /* In the postponed prefixes tree wn_flags is used to store the WFP_ flags, 2034 * but it must be negative to indicate the prefix tree to tree_add_word(). 2035 * Use a negative number with the lower 8 bits zero. */ 2036 #define PFX_FLAGS -256 2037 2038 /* flags for "condit" argument of store_aff_word() */ 2039 #define CONDIT_COMB 1 /* affix must combine */ 2040 #define CONDIT_CFIX 2 /* affix must have CIRCUMFIX flag */ 2041 #define CONDIT_SUF 4 /* add a suffix for matching flags */ 2042 #define CONDIT_AFF 8 /* word already has an affix */ 2043 2044 /* 2045 * Tunable parameters for when the tree is compressed. See 'mkspellmem'. 2046 */ 2047 static long compress_start = 30000; /* memory / SBLOCKSIZE */ 2048 static long compress_inc = 100; /* memory / SBLOCKSIZE */ 2049 static long compress_added = 500000; /* word count */ 2050 2051 /* 2052 * Check the 'mkspellmem' option. Return FAIL if it's wrong. 2053 * Sets "sps_flags". 2054 */ 2055 int 2056 spell_check_msm(void) 2057 { 2058 char_u *p = p_msm; 2059 long start = 0; 2060 long incr = 0; 2061 long added = 0; 2062 2063 if (!VIM_ISDIGIT(*p)) 2064 return FAIL; 2065 /* block count = (value * 1024) / SBLOCKSIZE (but avoid overflow)*/ 2066 start = (getdigits(&p) * 10) / (SBLOCKSIZE / 102); 2067 if (*p != ',') 2068 return FAIL; 2069 ++p; 2070 if (!VIM_ISDIGIT(*p)) 2071 return FAIL; 2072 incr = (getdigits(&p) * 102) / (SBLOCKSIZE / 10); 2073 if (*p != ',') 2074 return FAIL; 2075 ++p; 2076 if (!VIM_ISDIGIT(*p)) 2077 return FAIL; 2078 added = getdigits(&p) * 1024; 2079 if (*p != NUL) 2080 return FAIL; 2081 2082 if (start == 0 || incr == 0 || added == 0 || incr > start) 2083 return FAIL; 2084 2085 compress_start = start; 2086 compress_inc = incr; 2087 compress_added = added; 2088 return OK; 2089 } 2090 2091 #ifdef SPELL_PRINTTREE 2092 /* 2093 * For debugging the tree code: print the current tree in a (more or less) 2094 * readable format, so that we can see what happens when adding a word and/or 2095 * compressing the tree. 2096 * Based on code from Olaf Seibert. 2097 */ 2098 #define PRINTLINESIZE 1000 2099 #define PRINTWIDTH 6 2100 2101 #define PRINTSOME(l, depth, fmt, a1, a2) vim_snprintf(l + depth * PRINTWIDTH, \ 2102 PRINTLINESIZE - PRINTWIDTH * depth, fmt, a1, a2) 2103 2104 static char line1[PRINTLINESIZE]; 2105 static char line2[PRINTLINESIZE]; 2106 static char line3[PRINTLINESIZE]; 2107 2108 static void 2109 spell_clear_flags(wordnode_T *node) 2110 { 2111 wordnode_T *np; 2112 2113 for (np = node; np != NULL; np = np->wn_sibling) 2114 { 2115 np->wn_u1.index = FALSE; 2116 spell_clear_flags(np->wn_child); 2117 } 2118 } 2119 2120 static void 2121 spell_print_node(wordnode_T *node, int depth) 2122 { 2123 if (node->wn_u1.index) 2124 { 2125 /* Done this node before, print the reference. */ 2126 PRINTSOME(line1, depth, "(%d)", node->wn_nr, 0); 2127 PRINTSOME(line2, depth, " ", 0, 0); 2128 PRINTSOME(line3, depth, " ", 0, 0); 2129 msg((char_u *)line1); 2130 msg((char_u *)line2); 2131 msg((char_u *)line3); 2132 } 2133 else 2134 { 2135 node->wn_u1.index = TRUE; 2136 2137 if (node->wn_byte != NUL) 2138 { 2139 if (node->wn_child != NULL) 2140 PRINTSOME(line1, depth, " %c -> ", node->wn_byte, 0); 2141 else 2142 /* Cannot happen? */ 2143 PRINTSOME(line1, depth, " %c ???", node->wn_byte, 0); 2144 } 2145 else 2146 PRINTSOME(line1, depth, " $ ", 0, 0); 2147 2148 PRINTSOME(line2, depth, "%d/%d ", node->wn_nr, node->wn_refs); 2149 2150 if (node->wn_sibling != NULL) 2151 PRINTSOME(line3, depth, " | ", 0, 0); 2152 else 2153 PRINTSOME(line3, depth, " ", 0, 0); 2154 2155 if (node->wn_byte == NUL) 2156 { 2157 msg((char_u *)line1); 2158 msg((char_u *)line2); 2159 msg((char_u *)line3); 2160 } 2161 2162 /* do the children */ 2163 if (node->wn_byte != NUL && node->wn_child != NULL) 2164 spell_print_node(node->wn_child, depth + 1); 2165 2166 /* do the siblings */ 2167 if (node->wn_sibling != NULL) 2168 { 2169 /* get rid of all parent details except | */ 2170 STRCPY(line1, line3); 2171 STRCPY(line2, line3); 2172 spell_print_node(node->wn_sibling, depth); 2173 } 2174 } 2175 } 2176 2177 static void 2178 spell_print_tree(wordnode_T *root) 2179 { 2180 if (root != NULL) 2181 { 2182 /* Clear the "wn_u1.index" fields, used to remember what has been 2183 * done. */ 2184 spell_clear_flags(root); 2185 2186 /* Recursively print the tree. */ 2187 spell_print_node(root, 0); 2188 } 2189 } 2190 #endif /* SPELL_PRINTTREE */ 2191 2192 /* 2193 * Read the affix file "fname". 2194 * Returns an afffile_T, NULL for complete failure. 2195 */ 2196 static afffile_T * 2197 spell_read_aff(spellinfo_T *spin, char_u *fname) 2198 { 2199 FILE *fd; 2200 afffile_T *aff; 2201 char_u rline[MAXLINELEN]; 2202 char_u *line; 2203 char_u *pc = NULL; 2204 #define MAXITEMCNT 30 2205 char_u *(items[MAXITEMCNT]); 2206 int itemcnt; 2207 char_u *p; 2208 int lnum = 0; 2209 affheader_T *cur_aff = NULL; 2210 int did_postpone_prefix = FALSE; 2211 int aff_todo = 0; 2212 hashtab_T *tp; 2213 char_u *low = NULL; 2214 char_u *fol = NULL; 2215 char_u *upp = NULL; 2216 int do_rep; 2217 int do_repsal; 2218 int do_sal; 2219 int do_mapline; 2220 int found_map = FALSE; 2221 hashitem_T *hi; 2222 int l; 2223 int compminlen = 0; /* COMPOUNDMIN value */ 2224 int compsylmax = 0; /* COMPOUNDSYLMAX value */ 2225 int compoptions = 0; /* COMP_ flags */ 2226 int compmax = 0; /* COMPOUNDWORDMAX value */ 2227 char_u *compflags = NULL; /* COMPOUNDFLAG and COMPOUNDRULE 2228 concatenated */ 2229 char_u *midword = NULL; /* MIDWORD value */ 2230 char_u *syllable = NULL; /* SYLLABLE value */ 2231 char_u *sofofrom = NULL; /* SOFOFROM value */ 2232 char_u *sofoto = NULL; /* SOFOTO value */ 2233 2234 /* 2235 * Open the file. 2236 */ 2237 fd = mch_fopen((char *)fname, "r"); 2238 if (fd == NULL) 2239 { 2240 EMSG2(_(e_notopen), fname); 2241 return NULL; 2242 } 2243 2244 vim_snprintf((char *)IObuff, IOSIZE, _("Reading affix file %s ..."), fname); 2245 spell_message(spin, IObuff); 2246 2247 /* Only do REP lines when not done in another .aff file already. */ 2248 do_rep = spin->si_rep.ga_len == 0; 2249 2250 /* Only do REPSAL lines when not done in another .aff file already. */ 2251 do_repsal = spin->si_repsal.ga_len == 0; 2252 2253 /* Only do SAL lines when not done in another .aff file already. */ 2254 do_sal = spin->si_sal.ga_len == 0; 2255 2256 /* Only do MAP lines when not done in another .aff file already. */ 2257 do_mapline = spin->si_map.ga_len == 0; 2258 2259 /* 2260 * Allocate and init the afffile_T structure. 2261 */ 2262 aff = (afffile_T *)getroom(spin, sizeof(afffile_T), TRUE); 2263 if (aff == NULL) 2264 { 2265 fclose(fd); 2266 return NULL; 2267 } 2268 hash_init(&aff->af_pref); 2269 hash_init(&aff->af_suff); 2270 hash_init(&aff->af_comp); 2271 2272 /* 2273 * Read all the lines in the file one by one. 2274 */ 2275 while (!vim_fgets(rline, MAXLINELEN, fd) && !got_int) 2276 { 2277 line_breakcheck(); 2278 ++lnum; 2279 2280 /* Skip comment lines. */ 2281 if (*rline == '#') 2282 continue; 2283 2284 /* Convert from "SET" to 'encoding' when needed. */ 2285 vim_free(pc); 2286 #ifdef FEAT_MBYTE 2287 if (spin->si_conv.vc_type != CONV_NONE) 2288 { 2289 pc = string_convert(&spin->si_conv, rline, NULL); 2290 if (pc == NULL) 2291 { 2292 smsg((char_u *)_("Conversion failure for word in %s line %d: %s"), 2293 fname, lnum, rline); 2294 continue; 2295 } 2296 line = pc; 2297 } 2298 else 2299 #endif 2300 { 2301 pc = NULL; 2302 line = rline; 2303 } 2304 2305 /* Split the line up in white separated items. Put a NUL after each 2306 * item. */ 2307 itemcnt = 0; 2308 for (p = line; ; ) 2309 { 2310 while (*p != NUL && *p <= ' ') /* skip white space and CR/NL */ 2311 ++p; 2312 if (*p == NUL) 2313 break; 2314 if (itemcnt == MAXITEMCNT) /* too many items */ 2315 break; 2316 items[itemcnt++] = p; 2317 /* A few items have arbitrary text argument, don't split them. */ 2318 if (itemcnt == 2 && spell_info_item(items[0])) 2319 while (*p >= ' ' || *p == TAB) /* skip until CR/NL */ 2320 ++p; 2321 else 2322 while (*p > ' ') /* skip until white space or CR/NL */ 2323 ++p; 2324 if (*p == NUL) 2325 break; 2326 *p++ = NUL; 2327 } 2328 2329 /* Handle non-empty lines. */ 2330 if (itemcnt > 0) 2331 { 2332 if (is_aff_rule(items, itemcnt, "SET", 2) && aff->af_enc == NULL) 2333 { 2334 #ifdef FEAT_MBYTE 2335 /* Setup for conversion from "ENC" to 'encoding'. */ 2336 aff->af_enc = enc_canonize(items[1]); 2337 if (aff->af_enc != NULL && !spin->si_ascii 2338 && convert_setup(&spin->si_conv, aff->af_enc, 2339 p_enc) == FAIL) 2340 smsg((char_u *)_("Conversion in %s not supported: from %s to %s"), 2341 fname, aff->af_enc, p_enc); 2342 spin->si_conv.vc_fail = TRUE; 2343 #else 2344 smsg((char_u *)_("Conversion in %s not supported"), fname); 2345 #endif 2346 } 2347 else if (is_aff_rule(items, itemcnt, "FLAG", 2) 2348 && aff->af_flagtype == AFT_CHAR) 2349 { 2350 if (STRCMP(items[1], "long") == 0) 2351 aff->af_flagtype = AFT_LONG; 2352 else if (STRCMP(items[1], "num") == 0) 2353 aff->af_flagtype = AFT_NUM; 2354 else if (STRCMP(items[1], "caplong") == 0) 2355 aff->af_flagtype = AFT_CAPLONG; 2356 else 2357 smsg((char_u *)_("Invalid value for FLAG in %s line %d: %s"), 2358 fname, lnum, items[1]); 2359 if (aff->af_rare != 0 2360 || aff->af_keepcase != 0 2361 || aff->af_bad != 0 2362 || aff->af_needaffix != 0 2363 || aff->af_circumfix != 0 2364 || aff->af_needcomp != 0 2365 || aff->af_comproot != 0 2366 || aff->af_nosuggest != 0 2367 || compflags != NULL 2368 || aff->af_suff.ht_used > 0 2369 || aff->af_pref.ht_used > 0) 2370 smsg((char_u *)_("FLAG after using flags in %s line %d: %s"), 2371 fname, lnum, items[1]); 2372 } 2373 else if (spell_info_item(items[0])) 2374 { 2375 p = (char_u *)getroom(spin, 2376 (spin->si_info == NULL ? 0 : STRLEN(spin->si_info)) 2377 + STRLEN(items[0]) 2378 + STRLEN(items[1]) + 3, FALSE); 2379 if (p != NULL) 2380 { 2381 if (spin->si_info != NULL) 2382 { 2383 STRCPY(p, spin->si_info); 2384 STRCAT(p, "\n"); 2385 } 2386 STRCAT(p, items[0]); 2387 STRCAT(p, " "); 2388 STRCAT(p, items[1]); 2389 spin->si_info = p; 2390 } 2391 } 2392 else if (is_aff_rule(items, itemcnt, "MIDWORD", 2) 2393 && midword == NULL) 2394 { 2395 midword = getroom_save(spin, items[1]); 2396 } 2397 else if (is_aff_rule(items, itemcnt, "TRY", 2)) 2398 { 2399 /* ignored, we look in the tree for what chars may appear */ 2400 } 2401 /* TODO: remove "RAR" later */ 2402 else if ((is_aff_rule(items, itemcnt, "RAR", 2) 2403 || is_aff_rule(items, itemcnt, "RARE", 2)) 2404 && aff->af_rare == 0) 2405 { 2406 aff->af_rare = affitem2flag(aff->af_flagtype, items[1], 2407 fname, lnum); 2408 } 2409 /* TODO: remove "KEP" later */ 2410 else if ((is_aff_rule(items, itemcnt, "KEP", 2) 2411 || is_aff_rule(items, itemcnt, "KEEPCASE", 2)) 2412 && aff->af_keepcase == 0) 2413 { 2414 aff->af_keepcase = affitem2flag(aff->af_flagtype, items[1], 2415 fname, lnum); 2416 } 2417 else if ((is_aff_rule(items, itemcnt, "BAD", 2) 2418 || is_aff_rule(items, itemcnt, "FORBIDDENWORD", 2)) 2419 && aff->af_bad == 0) 2420 { 2421 aff->af_bad = affitem2flag(aff->af_flagtype, items[1], 2422 fname, lnum); 2423 } 2424 else if (is_aff_rule(items, itemcnt, "NEEDAFFIX", 2) 2425 && aff->af_needaffix == 0) 2426 { 2427 aff->af_needaffix = affitem2flag(aff->af_flagtype, items[1], 2428 fname, lnum); 2429 } 2430 else if (is_aff_rule(items, itemcnt, "CIRCUMFIX", 2) 2431 && aff->af_circumfix == 0) 2432 { 2433 aff->af_circumfix = affitem2flag(aff->af_flagtype, items[1], 2434 fname, lnum); 2435 } 2436 else if (is_aff_rule(items, itemcnt, "NOSUGGEST", 2) 2437 && aff->af_nosuggest == 0) 2438 { 2439 aff->af_nosuggest = affitem2flag(aff->af_flagtype, items[1], 2440 fname, lnum); 2441 } 2442 else if ((is_aff_rule(items, itemcnt, "NEEDCOMPOUND", 2) 2443 || is_aff_rule(items, itemcnt, "ONLYINCOMPOUND", 2)) 2444 && aff->af_needcomp == 0) 2445 { 2446 aff->af_needcomp = affitem2flag(aff->af_flagtype, items[1], 2447 fname, lnum); 2448 } 2449 else if (is_aff_rule(items, itemcnt, "COMPOUNDROOT", 2) 2450 && aff->af_comproot == 0) 2451 { 2452 aff->af_comproot = affitem2flag(aff->af_flagtype, items[1], 2453 fname, lnum); 2454 } 2455 else if (is_aff_rule(items, itemcnt, "COMPOUNDFORBIDFLAG", 2) 2456 && aff->af_compforbid == 0) 2457 { 2458 aff->af_compforbid = affitem2flag(aff->af_flagtype, items[1], 2459 fname, lnum); 2460 if (aff->af_pref.ht_used > 0) 2461 smsg((char_u *)_("Defining COMPOUNDFORBIDFLAG after PFX item may give wrong results in %s line %d"), 2462 fname, lnum); 2463 } 2464 else if (is_aff_rule(items, itemcnt, "COMPOUNDPERMITFLAG", 2) 2465 && aff->af_comppermit == 0) 2466 { 2467 aff->af_comppermit = affitem2flag(aff->af_flagtype, items[1], 2468 fname, lnum); 2469 if (aff->af_pref.ht_used > 0) 2470 smsg((char_u *)_("Defining COMPOUNDPERMITFLAG after PFX item may give wrong results in %s line %d"), 2471 fname, lnum); 2472 } 2473 else if (is_aff_rule(items, itemcnt, "COMPOUNDFLAG", 2) 2474 && compflags == NULL) 2475 { 2476 /* Turn flag "c" into COMPOUNDRULE compatible string "c+", 2477 * "Na" into "Na+", "1234" into "1234+". */ 2478 p = getroom(spin, STRLEN(items[1]) + 2, FALSE); 2479 if (p != NULL) 2480 { 2481 STRCPY(p, items[1]); 2482 STRCAT(p, "+"); 2483 compflags = p; 2484 } 2485 } 2486 else if (is_aff_rule(items, itemcnt, "COMPOUNDRULES", 2)) 2487 { 2488 /* We don't use the count, but do check that it's a number and 2489 * not COMPOUNDRULE mistyped. */ 2490 if (atoi((char *)items[1]) == 0) 2491 smsg((char_u *)_("Wrong COMPOUNDRULES value in %s line %d: %s"), 2492 fname, lnum, items[1]); 2493 } 2494 else if (is_aff_rule(items, itemcnt, "COMPOUNDRULE", 2)) 2495 { 2496 /* Don't use the first rule if it is a number. */ 2497 if (compflags != NULL || *skipdigits(items[1]) != NUL) 2498 { 2499 /* Concatenate this string to previously defined ones, 2500 * using a slash to separate them. */ 2501 l = (int)STRLEN(items[1]) + 1; 2502 if (compflags != NULL) 2503 l += (int)STRLEN(compflags) + 1; 2504 p = getroom(spin, l, FALSE); 2505 if (p != NULL) 2506 { 2507 if (compflags != NULL) 2508 { 2509 STRCPY(p, compflags); 2510 STRCAT(p, "/"); 2511 } 2512 STRCAT(p, items[1]); 2513 compflags = p; 2514 } 2515 } 2516 } 2517 else if (is_aff_rule(items, itemcnt, "COMPOUNDWORDMAX", 2) 2518 && compmax == 0) 2519 { 2520 compmax = atoi((char *)items[1]); 2521 if (compmax == 0) 2522 smsg((char_u *)_("Wrong COMPOUNDWORDMAX value in %s line %d: %s"), 2523 fname, lnum, items[1]); 2524 } 2525 else if (is_aff_rule(items, itemcnt, "COMPOUNDMIN", 2) 2526 && compminlen == 0) 2527 { 2528 compminlen = atoi((char *)items[1]); 2529 if (compminlen == 0) 2530 smsg((char_u *)_("Wrong COMPOUNDMIN value in %s line %d: %s"), 2531 fname, lnum, items[1]); 2532 } 2533 else if (is_aff_rule(items, itemcnt, "COMPOUNDSYLMAX", 2) 2534 && compsylmax == 0) 2535 { 2536 compsylmax = atoi((char *)items[1]); 2537 if (compsylmax == 0) 2538 smsg((char_u *)_("Wrong COMPOUNDSYLMAX value in %s line %d: %s"), 2539 fname, lnum, items[1]); 2540 } 2541 else if (is_aff_rule(items, itemcnt, "CHECKCOMPOUNDDUP", 1)) 2542 { 2543 compoptions |= COMP_CHECKDUP; 2544 } 2545 else if (is_aff_rule(items, itemcnt, "CHECKCOMPOUNDREP", 1)) 2546 { 2547 compoptions |= COMP_CHECKREP; 2548 } 2549 else if (is_aff_rule(items, itemcnt, "CHECKCOMPOUNDCASE", 1)) 2550 { 2551 compoptions |= COMP_CHECKCASE; 2552 } 2553 else if (is_aff_rule(items, itemcnt, "CHECKCOMPOUNDTRIPLE", 1)) 2554 { 2555 compoptions |= COMP_CHECKTRIPLE; 2556 } 2557 else if (is_aff_rule(items, itemcnt, "CHECKCOMPOUNDPATTERN", 2)) 2558 { 2559 if (atoi((char *)items[1]) == 0) 2560 smsg((char_u *)_("Wrong CHECKCOMPOUNDPATTERN value in %s line %d: %s"), 2561 fname, lnum, items[1]); 2562 } 2563 else if (is_aff_rule(items, itemcnt, "CHECKCOMPOUNDPATTERN", 3)) 2564 { 2565 garray_T *gap = &spin->si_comppat; 2566 int i; 2567 2568 /* Only add the couple if it isn't already there. */ 2569 for (i = 0; i < gap->ga_len - 1; i += 2) 2570 if (STRCMP(((char_u **)(gap->ga_data))[i], items[1]) == 0 2571 && STRCMP(((char_u **)(gap->ga_data))[i + 1], 2572 items[2]) == 0) 2573 break; 2574 if (i >= gap->ga_len && ga_grow(gap, 2) == OK) 2575 { 2576 ((char_u **)(gap->ga_data))[gap->ga_len++] 2577 = getroom_save(spin, items[1]); 2578 ((char_u **)(gap->ga_data))[gap->ga_len++] 2579 = getroom_save(spin, items[2]); 2580 } 2581 } 2582 else if (is_aff_rule(items, itemcnt, "SYLLABLE", 2) 2583 && syllable == NULL) 2584 { 2585 syllable = getroom_save(spin, items[1]); 2586 } 2587 else if (is_aff_rule(items, itemcnt, "NOBREAK", 1)) 2588 { 2589 spin->si_nobreak = TRUE; 2590 } 2591 else if (is_aff_rule(items, itemcnt, "NOSPLITSUGS", 1)) 2592 { 2593 spin->si_nosplitsugs = TRUE; 2594 } 2595 else if (is_aff_rule(items, itemcnt, "NOCOMPOUNDSUGS", 1)) 2596 { 2597 spin->si_nocompoundsugs = TRUE; 2598 } 2599 else if (is_aff_rule(items, itemcnt, "NOSUGFILE", 1)) 2600 { 2601 spin->si_nosugfile = TRUE; 2602 } 2603 else if (is_aff_rule(items, itemcnt, "PFXPOSTPONE", 1)) 2604 { 2605 aff->af_pfxpostpone = TRUE; 2606 } 2607 else if (is_aff_rule(items, itemcnt, "IGNOREEXTRA", 1)) 2608 { 2609 aff->af_ignoreextra = TRUE; 2610 } 2611 else if ((STRCMP(items[0], "PFX") == 0 2612 || STRCMP(items[0], "SFX") == 0) 2613 && aff_todo == 0 2614 && itemcnt >= 4) 2615 { 2616 int lasti = 4; 2617 char_u key[AH_KEY_LEN]; 2618 2619 if (*items[0] == 'P') 2620 tp = &aff->af_pref; 2621 else 2622 tp = &aff->af_suff; 2623 2624 /* Myspell allows the same affix name to be used multiple 2625 * times. The affix files that do this have an undocumented 2626 * "S" flag on all but the last block, thus we check for that 2627 * and store it in ah_follows. */ 2628 vim_strncpy(key, items[1], AH_KEY_LEN - 1); 2629 hi = hash_find(tp, key); 2630 if (!HASHITEM_EMPTY(hi)) 2631 { 2632 cur_aff = HI2AH(hi); 2633 if (cur_aff->ah_combine != (*items[2] == 'Y')) 2634 smsg((char_u *)_("Different combining flag in continued affix block in %s line %d: %s"), 2635 fname, lnum, items[1]); 2636 if (!cur_aff->ah_follows) 2637 smsg((char_u *)_("Duplicate affix in %s line %d: %s"), 2638 fname, lnum, items[1]); 2639 } 2640 else 2641 { 2642 /* New affix letter. */ 2643 cur_aff = (affheader_T *)getroom(spin, 2644 sizeof(affheader_T), TRUE); 2645 if (cur_aff == NULL) 2646 break; 2647 cur_aff->ah_flag = affitem2flag(aff->af_flagtype, items[1], 2648 fname, lnum); 2649 if (cur_aff->ah_flag == 0 || STRLEN(items[1]) >= AH_KEY_LEN) 2650 break; 2651 if (cur_aff->ah_flag == aff->af_bad 2652 || cur_aff->ah_flag == aff->af_rare 2653 || cur_aff->ah_flag == aff->af_keepcase 2654 || cur_aff->ah_flag == aff->af_needaffix 2655 || cur_aff->ah_flag == aff->af_circumfix 2656 || cur_aff->ah_flag == aff->af_nosuggest 2657 || cur_aff->ah_flag == aff->af_needcomp 2658 || cur_aff->ah_flag == aff->af_comproot) 2659 smsg((char_u *)_("Affix also used for BAD/RARE/KEEPCASE/NEEDAFFIX/NEEDCOMPOUND/NOSUGGEST in %s line %d: %s"), 2660 fname, lnum, items[1]); 2661 STRCPY(cur_aff->ah_key, items[1]); 2662 hash_add(tp, cur_aff->ah_key); 2663 2664 cur_aff->ah_combine = (*items[2] == 'Y'); 2665 } 2666 2667 /* Check for the "S" flag, which apparently means that another 2668 * block with the same affix name is following. */ 2669 if (itemcnt > lasti && STRCMP(items[lasti], "S") == 0) 2670 { 2671 ++lasti; 2672 cur_aff->ah_follows = TRUE; 2673 } 2674 else 2675 cur_aff->ah_follows = FALSE; 2676 2677 /* Myspell allows extra text after the item, but that might 2678 * mean mistakes go unnoticed. Require a comment-starter. */ 2679 if (itemcnt > lasti && *items[lasti] != '#') 2680 smsg((char_u *)_(e_afftrailing), fname, lnum, items[lasti]); 2681 2682 if (STRCMP(items[2], "Y") != 0 && STRCMP(items[2], "N") != 0) 2683 smsg((char_u *)_("Expected Y or N in %s line %d: %s"), 2684 fname, lnum, items[2]); 2685 2686 if (*items[0] == 'P' && aff->af_pfxpostpone) 2687 { 2688 if (cur_aff->ah_newID == 0) 2689 { 2690 /* Use a new number in the .spl file later, to be able 2691 * to handle multiple .aff files. */ 2692 check_renumber(spin); 2693 cur_aff->ah_newID = ++spin->si_newprefID; 2694 2695 /* We only really use ah_newID if the prefix is 2696 * postponed. We know that only after handling all 2697 * the items. */ 2698 did_postpone_prefix = FALSE; 2699 } 2700 else 2701 /* Did use the ID in a previous block. */ 2702 did_postpone_prefix = TRUE; 2703 } 2704 2705 aff_todo = atoi((char *)items[3]); 2706 } 2707 else if ((STRCMP(items[0], "PFX") == 0 2708 || STRCMP(items[0], "SFX") == 0) 2709 && aff_todo > 0 2710 && STRCMP(cur_aff->ah_key, items[1]) == 0 2711 && itemcnt >= 5) 2712 { 2713 affentry_T *aff_entry; 2714 int upper = FALSE; 2715 int lasti = 5; 2716 2717 /* Myspell allows extra text after the item, but that might 2718 * mean mistakes go unnoticed. Require a comment-starter, 2719 * unless IGNOREEXTRA is used. Hunspell uses a "-" item. */ 2720 if (itemcnt > lasti 2721 && !aff->af_ignoreextra 2722 && *items[lasti] != '#' 2723 && (STRCMP(items[lasti], "-") != 0 2724 || itemcnt != lasti + 1)) 2725 smsg((char_u *)_(e_afftrailing), fname, lnum, items[lasti]); 2726 2727 /* New item for an affix letter. */ 2728 --aff_todo; 2729 aff_entry = (affentry_T *)getroom(spin, 2730 sizeof(affentry_T), TRUE); 2731 if (aff_entry == NULL) 2732 break; 2733 2734 if (STRCMP(items[2], "0") != 0) 2735 aff_entry->ae_chop = getroom_save(spin, items[2]); 2736 if (STRCMP(items[3], "0") != 0) 2737 { 2738 aff_entry->ae_add = getroom_save(spin, items[3]); 2739 2740 /* Recognize flags on the affix: abcd/XYZ */ 2741 aff_entry->ae_flags = vim_strchr(aff_entry->ae_add, '/'); 2742 if (aff_entry->ae_flags != NULL) 2743 { 2744 *aff_entry->ae_flags++ = NUL; 2745 aff_process_flags(aff, aff_entry); 2746 } 2747 } 2748 2749 /* Don't use an affix entry with non-ASCII characters when 2750 * "spin->si_ascii" is TRUE. */ 2751 if (!spin->si_ascii || !(has_non_ascii(aff_entry->ae_chop) 2752 || has_non_ascii(aff_entry->ae_add))) 2753 { 2754 aff_entry->ae_next = cur_aff->ah_first; 2755 cur_aff->ah_first = aff_entry; 2756 2757 if (STRCMP(items[4], ".") != 0) 2758 { 2759 char_u buf[MAXLINELEN]; 2760 2761 aff_entry->ae_cond = getroom_save(spin, items[4]); 2762 if (*items[0] == 'P') 2763 sprintf((char *)buf, "^%s", items[4]); 2764 else 2765 sprintf((char *)buf, "%s$", items[4]); 2766 aff_entry->ae_prog = vim_regcomp(buf, 2767 RE_MAGIC + RE_STRING + RE_STRICT); 2768 if (aff_entry->ae_prog == NULL) 2769 smsg((char_u *)_("Broken condition in %s line %d: %s"), 2770 fname, lnum, items[4]); 2771 } 2772 2773 /* For postponed prefixes we need an entry in si_prefcond 2774 * for the condition. Use an existing one if possible. 2775 * Can't be done for an affix with flags, ignoring 2776 * COMPOUNDFORBIDFLAG and COMPOUNDPERMITFLAG. */ 2777 if (*items[0] == 'P' && aff->af_pfxpostpone 2778 && aff_entry->ae_flags == NULL) 2779 { 2780 /* When the chop string is one lower-case letter and 2781 * the add string ends in the upper-case letter we set 2782 * the "upper" flag, clear "ae_chop" and remove the 2783 * letters from "ae_add". The condition must either 2784 * be empty or start with the same letter. */ 2785 if (aff_entry->ae_chop != NULL 2786 && aff_entry->ae_add != NULL 2787 #ifdef FEAT_MBYTE 2788 && aff_entry->ae_chop[(*mb_ptr2len)( 2789 aff_entry->ae_chop)] == NUL 2790 #else 2791 && aff_entry->ae_chop[1] == NUL 2792 #endif 2793 ) 2794 { 2795 int c, c_up; 2796 2797 c = PTR2CHAR(aff_entry->ae_chop); 2798 c_up = SPELL_TOUPPER(c); 2799 if (c_up != c 2800 && (aff_entry->ae_cond == NULL 2801 || PTR2CHAR(aff_entry->ae_cond) == c)) 2802 { 2803 p = aff_entry->ae_add 2804 + STRLEN(aff_entry->ae_add); 2805 MB_PTR_BACK(aff_entry->ae_add, p); 2806 if (PTR2CHAR(p) == c_up) 2807 { 2808 upper = TRUE; 2809 aff_entry->ae_chop = NULL; 2810 *p = NUL; 2811 2812 /* The condition is matched with the 2813 * actual word, thus must check for the 2814 * upper-case letter. */ 2815 if (aff_entry->ae_cond != NULL) 2816 { 2817 char_u buf[MAXLINELEN]; 2818 #ifdef FEAT_MBYTE 2819 if (has_mbyte) 2820 { 2821 onecap_copy(items[4], buf, TRUE); 2822 aff_entry->ae_cond = getroom_save( 2823 spin, buf); 2824 } 2825 else 2826 #endif 2827 *aff_entry->ae_cond = c_up; 2828 if (aff_entry->ae_cond != NULL) 2829 { 2830 sprintf((char *)buf, "^%s", 2831 aff_entry->ae_cond); 2832 vim_regfree(aff_entry->ae_prog); 2833 aff_entry->ae_prog = vim_regcomp( 2834 buf, RE_MAGIC + RE_STRING); 2835 } 2836 } 2837 } 2838 } 2839 } 2840 2841 if (aff_entry->ae_chop == NULL 2842 && aff_entry->ae_flags == NULL) 2843 { 2844 int idx; 2845 char_u **pp; 2846 int n; 2847 2848 /* Find a previously used condition. */ 2849 for (idx = spin->si_prefcond.ga_len - 1; idx >= 0; 2850 --idx) 2851 { 2852 p = ((char_u **)spin->si_prefcond.ga_data)[idx]; 2853 if (str_equal(p, aff_entry->ae_cond)) 2854 break; 2855 } 2856 if (idx < 0 && ga_grow(&spin->si_prefcond, 1) == OK) 2857 { 2858 /* Not found, add a new condition. */ 2859 idx = spin->si_prefcond.ga_len++; 2860 pp = ((char_u **)spin->si_prefcond.ga_data) 2861 + idx; 2862 if (aff_entry->ae_cond == NULL) 2863 *pp = NULL; 2864 else 2865 *pp = getroom_save(spin, 2866 aff_entry->ae_cond); 2867 } 2868 2869 /* Add the prefix to the prefix tree. */ 2870 if (aff_entry->ae_add == NULL) 2871 p = (char_u *)""; 2872 else 2873 p = aff_entry->ae_add; 2874 2875 /* PFX_FLAGS is a negative number, so that 2876 * tree_add_word() knows this is the prefix tree. */ 2877 n = PFX_FLAGS; 2878 if (!cur_aff->ah_combine) 2879 n |= WFP_NC; 2880 if (upper) 2881 n |= WFP_UP; 2882 if (aff_entry->ae_comppermit) 2883 n |= WFP_COMPPERMIT; 2884 if (aff_entry->ae_compforbid) 2885 n |= WFP_COMPFORBID; 2886 tree_add_word(spin, p, spin->si_prefroot, n, 2887 idx, cur_aff->ah_newID); 2888 did_postpone_prefix = TRUE; 2889 } 2890 2891 /* Didn't actually use ah_newID, backup si_newprefID. */ 2892 if (aff_todo == 0 && !did_postpone_prefix) 2893 { 2894 --spin->si_newprefID; 2895 cur_aff->ah_newID = 0; 2896 } 2897 } 2898 } 2899 } 2900 else if (is_aff_rule(items, itemcnt, "FOL", 2) && fol == NULL) 2901 { 2902 fol = vim_strsave(items[1]); 2903 } 2904 else if (is_aff_rule(items, itemcnt, "LOW", 2) && low == NULL) 2905 { 2906 low = vim_strsave(items[1]); 2907 } 2908 else if (is_aff_rule(items, itemcnt, "UPP", 2) && upp == NULL) 2909 { 2910 upp = vim_strsave(items[1]); 2911 } 2912 else if (is_aff_rule(items, itemcnt, "REP", 2) 2913 || is_aff_rule(items, itemcnt, "REPSAL", 2)) 2914 { 2915 /* Ignore REP/REPSAL count */; 2916 if (!isdigit(*items[1])) 2917 smsg((char_u *)_("Expected REP(SAL) count in %s line %d"), 2918 fname, lnum); 2919 } 2920 else if ((STRCMP(items[0], "REP") == 0 2921 || STRCMP(items[0], "REPSAL") == 0) 2922 && itemcnt >= 3) 2923 { 2924 /* REP/REPSAL item */ 2925 /* Myspell ignores extra arguments, we require it starts with 2926 * # to detect mistakes. */ 2927 if (itemcnt > 3 && items[3][0] != '#') 2928 smsg((char_u *)_(e_afftrailing), fname, lnum, items[3]); 2929 if (items[0][3] == 'S' ? do_repsal : do_rep) 2930 { 2931 /* Replace underscore with space (can't include a space 2932 * directly). */ 2933 for (p = items[1]; *p != NUL; MB_PTR_ADV(p)) 2934 if (*p == '_') 2935 *p = ' '; 2936 for (p = items[2]; *p != NUL; MB_PTR_ADV(p)) 2937 if (*p == '_') 2938 *p = ' '; 2939 add_fromto(spin, items[0][3] == 'S' 2940 ? &spin->si_repsal 2941 : &spin->si_rep, items[1], items[2]); 2942 } 2943 } 2944 else if (is_aff_rule(items, itemcnt, "MAP", 2)) 2945 { 2946 /* MAP item or count */ 2947 if (!found_map) 2948 { 2949 /* First line contains the count. */ 2950 found_map = TRUE; 2951 if (!isdigit(*items[1])) 2952 smsg((char_u *)_("Expected MAP count in %s line %d"), 2953 fname, lnum); 2954 } 2955 else if (do_mapline) 2956 { 2957 int c; 2958 2959 /* Check that every character appears only once. */ 2960 for (p = items[1]; *p != NUL; ) 2961 { 2962 #ifdef FEAT_MBYTE 2963 c = mb_ptr2char_adv(&p); 2964 #else 2965 c = *p++; 2966 #endif 2967 if ((spin->si_map.ga_len > 0 2968 && vim_strchr(spin->si_map.ga_data, c) 2969 != NULL) 2970 || vim_strchr(p, c) != NULL) 2971 smsg((char_u *)_("Duplicate character in MAP in %s line %d"), 2972 fname, lnum); 2973 } 2974 2975 /* We simply concatenate all the MAP strings, separated by 2976 * slashes. */ 2977 ga_concat(&spin->si_map, items[1]); 2978 ga_append(&spin->si_map, '/'); 2979 } 2980 } 2981 /* Accept "SAL from to" and "SAL from to #comment". */ 2982 else if (is_aff_rule(items, itemcnt, "SAL", 3)) 2983 { 2984 if (do_sal) 2985 { 2986 /* SAL item (sounds-a-like) 2987 * Either one of the known keys or a from-to pair. */ 2988 if (STRCMP(items[1], "followup") == 0) 2989 spin->si_followup = sal_to_bool(items[2]); 2990 else if (STRCMP(items[1], "collapse_result") == 0) 2991 spin->si_collapse = sal_to_bool(items[2]); 2992 else if (STRCMP(items[1], "remove_accents") == 0) 2993 spin->si_rem_accents = sal_to_bool(items[2]); 2994 else 2995 /* when "to" is "_" it means empty */ 2996 add_fromto(spin, &spin->si_sal, items[1], 2997 STRCMP(items[2], "_") == 0 ? (char_u *)"" 2998 : items[2]); 2999 } 3000 } 3001 else if (is_aff_rule(items, itemcnt, "SOFOFROM", 2) 3002 && sofofrom == NULL) 3003 { 3004 sofofrom = getroom_save(spin, items[1]); 3005 } 3006 else if (is_aff_rule(items, itemcnt, "SOFOTO", 2) 3007 && sofoto == NULL) 3008 { 3009 sofoto = getroom_save(spin, items[1]); 3010 } 3011 else if (STRCMP(items[0], "COMMON") == 0) 3012 { 3013 int i; 3014 3015 for (i = 1; i < itemcnt; ++i) 3016 { 3017 if (HASHITEM_EMPTY(hash_find(&spin->si_commonwords, 3018 items[i]))) 3019 { 3020 p = vim_strsave(items[i]); 3021 if (p == NULL) 3022 break; 3023 hash_add(&spin->si_commonwords, p); 3024 } 3025 } 3026 } 3027 else 3028 smsg((char_u *)_("Unrecognized or duplicate item in %s line %d: %s"), 3029 fname, lnum, items[0]); 3030 } 3031 } 3032 3033 if (fol != NULL || low != NULL || upp != NULL) 3034 { 3035 if (spin->si_clear_chartab) 3036 { 3037 /* Clear the char type tables, don't want to use any of the 3038 * currently used spell properties. */ 3039 init_spell_chartab(); 3040 spin->si_clear_chartab = FALSE; 3041 } 3042 3043 /* 3044 * Don't write a word table for an ASCII file, so that we don't check 3045 * for conflicts with a word table that matches 'encoding'. 3046 * Don't write one for utf-8 either, we use utf_*() and 3047 * mb_get_class(), the list of chars in the file will be incomplete. 3048 */ 3049 if (!spin->si_ascii 3050 #ifdef FEAT_MBYTE 3051 && !enc_utf8 3052 #endif 3053 ) 3054 { 3055 if (fol == NULL || low == NULL || upp == NULL) 3056 smsg((char_u *)_("Missing FOL/LOW/UPP line in %s"), fname); 3057 else 3058 (void)set_spell_chartab(fol, low, upp); 3059 } 3060 3061 vim_free(fol); 3062 vim_free(low); 3063 vim_free(upp); 3064 } 3065 3066 /* Use compound specifications of the .aff file for the spell info. */ 3067 if (compmax != 0) 3068 { 3069 aff_check_number(spin->si_compmax, compmax, "COMPOUNDWORDMAX"); 3070 spin->si_compmax = compmax; 3071 } 3072 3073 if (compminlen != 0) 3074 { 3075 aff_check_number(spin->si_compminlen, compminlen, "COMPOUNDMIN"); 3076 spin->si_compminlen = compminlen; 3077 } 3078 3079 if (compsylmax != 0) 3080 { 3081 if (syllable == NULL) 3082 smsg((char_u *)_("COMPOUNDSYLMAX used without SYLLABLE")); 3083 aff_check_number(spin->si_compsylmax, compsylmax, "COMPOUNDSYLMAX"); 3084 spin->si_compsylmax = compsylmax; 3085 } 3086 3087 if (compoptions != 0) 3088 { 3089 aff_check_number(spin->si_compoptions, compoptions, "COMPOUND options"); 3090 spin->si_compoptions |= compoptions; 3091 } 3092 3093 if (compflags != NULL) 3094 process_compflags(spin, aff, compflags); 3095 3096 /* Check that we didn't use too many renumbered flags. */ 3097 if (spin->si_newcompID < spin->si_newprefID) 3098 { 3099 if (spin->si_newcompID == 127 || spin->si_newcompID == 255) 3100 MSG(_("Too many postponed prefixes")); 3101 else if (spin->si_newprefID == 0 || spin->si_newprefID == 127) 3102 MSG(_("Too many compound flags")); 3103 else 3104 MSG(_("Too many postponed prefixes and/or compound flags")); 3105 } 3106 3107 if (syllable != NULL) 3108 { 3109 aff_check_string(spin->si_syllable, syllable, "SYLLABLE"); 3110 spin->si_syllable = syllable; 3111 } 3112 3113 if (sofofrom != NULL || sofoto != NULL) 3114 { 3115 if (sofofrom == NULL || sofoto == NULL) 3116 smsg((char_u *)_("Missing SOFO%s line in %s"), 3117 sofofrom == NULL ? "FROM" : "TO", fname); 3118 else if (spin->si_sal.ga_len > 0) 3119 smsg((char_u *)_("Both SAL and SOFO lines in %s"), fname); 3120 else 3121 { 3122 aff_check_string(spin->si_sofofr, sofofrom, "SOFOFROM"); 3123 aff_check_string(spin->si_sofoto, sofoto, "SOFOTO"); 3124 spin->si_sofofr = sofofrom; 3125 spin->si_sofoto = sofoto; 3126 } 3127 } 3128 3129 if (midword != NULL) 3130 { 3131 aff_check_string(spin->si_midword, midword, "MIDWORD"); 3132 spin->si_midword = midword; 3133 } 3134 3135 vim_free(pc); 3136 fclose(fd); 3137 return aff; 3138 } 3139 3140 /* 3141 * Return TRUE when items[0] equals "rulename", there are "mincount" items or 3142 * a comment is following after item "mincount". 3143 */ 3144 static int 3145 is_aff_rule( 3146 char_u **items, 3147 int itemcnt, 3148 char *rulename, 3149 int mincount) 3150 { 3151 return (STRCMP(items[0], rulename) == 0 3152 && (itemcnt == mincount 3153 || (itemcnt > mincount && items[mincount][0] == '#'))); 3154 } 3155 3156 /* 3157 * For affix "entry" move COMPOUNDFORBIDFLAG and COMPOUNDPERMITFLAG from 3158 * ae_flags to ae_comppermit and ae_compforbid. 3159 */ 3160 static void 3161 aff_process_flags(afffile_T *affile, affentry_T *entry) 3162 { 3163 char_u *p; 3164 char_u *prevp; 3165 unsigned flag; 3166 3167 if (entry->ae_flags != NULL 3168 && (affile->af_compforbid != 0 || affile->af_comppermit != 0)) 3169 { 3170 for (p = entry->ae_flags; *p != NUL; ) 3171 { 3172 prevp = p; 3173 flag = get_affitem(affile->af_flagtype, &p); 3174 if (flag == affile->af_comppermit || flag == affile->af_compforbid) 3175 { 3176 STRMOVE(prevp, p); 3177 p = prevp; 3178 if (flag == affile->af_comppermit) 3179 entry->ae_comppermit = TRUE; 3180 else 3181 entry->ae_compforbid = TRUE; 3182 } 3183 if (affile->af_flagtype == AFT_NUM && *p == ',') 3184 ++p; 3185 } 3186 if (*entry->ae_flags == NUL) 3187 entry->ae_flags = NULL; /* nothing left */ 3188 } 3189 } 3190 3191 /* 3192 * Return TRUE if "s" is the name of an info item in the affix file. 3193 */ 3194 static int 3195 spell_info_item(char_u *s) 3196 { 3197 return STRCMP(s, "NAME") == 0 3198 || STRCMP(s, "HOME") == 0 3199 || STRCMP(s, "VERSION") == 0 3200 || STRCMP(s, "AUTHOR") == 0 3201 || STRCMP(s, "EMAIL") == 0 3202 || STRCMP(s, "COPYRIGHT") == 0; 3203 } 3204 3205 /* 3206 * Turn an affix flag name into a number, according to the FLAG type. 3207 * returns zero for failure. 3208 */ 3209 static unsigned 3210 affitem2flag( 3211 int flagtype, 3212 char_u *item, 3213 char_u *fname, 3214 int lnum) 3215 { 3216 unsigned res; 3217 char_u *p = item; 3218 3219 res = get_affitem(flagtype, &p); 3220 if (res == 0) 3221 { 3222 if (flagtype == AFT_NUM) 3223 smsg((char_u *)_("Flag is not a number in %s line %d: %s"), 3224 fname, lnum, item); 3225 else 3226 smsg((char_u *)_("Illegal flag in %s line %d: %s"), 3227 fname, lnum, item); 3228 } 3229 if (*p != NUL) 3230 { 3231 smsg((char_u *)_(e_affname), fname, lnum, item); 3232 return 0; 3233 } 3234 3235 return res; 3236 } 3237 3238 /* 3239 * Get one affix name from "*pp" and advance the pointer. 3240 * Returns zero for an error, still advances the pointer then. 3241 */ 3242 static unsigned 3243 get_affitem(int flagtype, char_u **pp) 3244 { 3245 int res; 3246 3247 if (flagtype == AFT_NUM) 3248 { 3249 if (!VIM_ISDIGIT(**pp)) 3250 { 3251 ++*pp; /* always advance, avoid getting stuck */ 3252 return 0; 3253 } 3254 res = getdigits(pp); 3255 } 3256 else 3257 { 3258 #ifdef FEAT_MBYTE 3259 res = mb_ptr2char_adv(pp); 3260 #else 3261 res = *(*pp)++; 3262 #endif 3263 if (flagtype == AFT_LONG || (flagtype == AFT_CAPLONG 3264 && res >= 'A' && res <= 'Z')) 3265 { 3266 if (**pp == NUL) 3267 return 0; 3268 #ifdef FEAT_MBYTE 3269 res = mb_ptr2char_adv(pp) + (res << 16); 3270 #else 3271 res = *(*pp)++ + (res << 16); 3272 #endif 3273 } 3274 } 3275 return res; 3276 } 3277 3278 /* 3279 * Process the "compflags" string used in an affix file and append it to 3280 * spin->si_compflags. 3281 * The processing involves changing the affix names to ID numbers, so that 3282 * they fit in one byte. 3283 */ 3284 static void 3285 process_compflags( 3286 spellinfo_T *spin, 3287 afffile_T *aff, 3288 char_u *compflags) 3289 { 3290 char_u *p; 3291 char_u *prevp; 3292 unsigned flag; 3293 compitem_T *ci; 3294 int id; 3295 int len; 3296 char_u *tp; 3297 char_u key[AH_KEY_LEN]; 3298 hashitem_T *hi; 3299 3300 /* Make room for the old and the new compflags, concatenated with a / in 3301 * between. Processing it makes it shorter, but we don't know by how 3302 * much, thus allocate the maximum. */ 3303 len = (int)STRLEN(compflags) + 1; 3304 if (spin->si_compflags != NULL) 3305 len += (int)STRLEN(spin->si_compflags) + 1; 3306 p = getroom(spin, len, FALSE); 3307 if (p == NULL) 3308 return; 3309 if (spin->si_compflags != NULL) 3310 { 3311 STRCPY(p, spin->si_compflags); 3312 STRCAT(p, "/"); 3313 } 3314 spin->si_compflags = p; 3315 tp = p + STRLEN(p); 3316 3317 for (p = compflags; *p != NUL; ) 3318 { 3319 if (vim_strchr((char_u *)"/?*+[]", *p) != NULL) 3320 /* Copy non-flag characters directly. */ 3321 *tp++ = *p++; 3322 else 3323 { 3324 /* First get the flag number, also checks validity. */ 3325 prevp = p; 3326 flag = get_affitem(aff->af_flagtype, &p); 3327 if (flag != 0) 3328 { 3329 /* Find the flag in the hashtable. If it was used before, use 3330 * the existing ID. Otherwise add a new entry. */ 3331 vim_strncpy(key, prevp, p - prevp); 3332 hi = hash_find(&aff->af_comp, key); 3333 if (!HASHITEM_EMPTY(hi)) 3334 id = HI2CI(hi)->ci_newID; 3335 else 3336 { 3337 ci = (compitem_T *)getroom(spin, sizeof(compitem_T), TRUE); 3338 if (ci == NULL) 3339 break; 3340 STRCPY(ci->ci_key, key); 3341 ci->ci_flag = flag; 3342 /* Avoid using a flag ID that has a special meaning in a 3343 * regexp (also inside []). */ 3344 do 3345 { 3346 check_renumber(spin); 3347 id = spin->si_newcompID--; 3348 } while (vim_strchr((char_u *)"/?*+[]\\-^", id) != NULL); 3349 ci->ci_newID = id; 3350 hash_add(&aff->af_comp, ci->ci_key); 3351 } 3352 *tp++ = id; 3353 } 3354 if (aff->af_flagtype == AFT_NUM && *p == ',') 3355 ++p; 3356 } 3357 } 3358 3359 *tp = NUL; 3360 } 3361 3362 /* 3363 * Check that the new IDs for postponed affixes and compounding don't overrun 3364 * each other. We have almost 255 available, but start at 0-127 to avoid 3365 * using two bytes for utf-8. When the 0-127 range is used up go to 128-255. 3366 * When that is used up an error message is given. 3367 */ 3368 static void 3369 check_renumber(spellinfo_T *spin) 3370 { 3371 if (spin->si_newprefID == spin->si_newcompID && spin->si_newcompID < 128) 3372 { 3373 spin->si_newprefID = 127; 3374 spin->si_newcompID = 255; 3375 } 3376 } 3377 3378 /* 3379 * Return TRUE if flag "flag" appears in affix list "afflist". 3380 */ 3381 static int 3382 flag_in_afflist(int flagtype, char_u *afflist, unsigned flag) 3383 { 3384 char_u *p; 3385 unsigned n; 3386 3387 switch (flagtype) 3388 { 3389 case AFT_CHAR: 3390 return vim_strchr(afflist, flag) != NULL; 3391 3392 case AFT_CAPLONG: 3393 case AFT_LONG: 3394 for (p = afflist; *p != NUL; ) 3395 { 3396 #ifdef FEAT_MBYTE 3397 n = mb_ptr2char_adv(&p); 3398 #else 3399 n = *p++; 3400 #endif 3401 if ((flagtype == AFT_LONG || (n >= 'A' && n <= 'Z')) 3402 && *p != NUL) 3403 #ifdef FEAT_MBYTE 3404 n = mb_ptr2char_adv(&p) + (n << 16); 3405 #else 3406 n = *p++ + (n << 16); 3407 #endif 3408 if (n == flag) 3409 return TRUE; 3410 } 3411 break; 3412 3413 case AFT_NUM: 3414 for (p = afflist; *p != NUL; ) 3415 { 3416 n = getdigits(&p); 3417 if (n == flag) 3418 return TRUE; 3419 if (*p != NUL) /* skip over comma */ 3420 ++p; 3421 } 3422 break; 3423 } 3424 return FALSE; 3425 } 3426 3427 /* 3428 * Give a warning when "spinval" and "affval" numbers are set and not the same. 3429 */ 3430 static void 3431 aff_check_number(int spinval, int affval, char *name) 3432 { 3433 if (spinval != 0 && spinval != affval) 3434 smsg((char_u *)_("%s value differs from what is used in another .aff file"), name); 3435 } 3436 3437 /* 3438 * Give a warning when "spinval" and "affval" strings are set and not the same. 3439 */ 3440 static void 3441 aff_check_string(char_u *spinval, char_u *affval, char *name) 3442 { 3443 if (spinval != NULL && STRCMP(spinval, affval) != 0) 3444 smsg((char_u *)_("%s value differs from what is used in another .aff file"), name); 3445 } 3446 3447 /* 3448 * Return TRUE if strings "s1" and "s2" are equal. Also consider both being 3449 * NULL as equal. 3450 */ 3451 static int 3452 str_equal(char_u *s1, char_u *s2) 3453 { 3454 if (s1 == NULL || s2 == NULL) 3455 return s1 == s2; 3456 return STRCMP(s1, s2) == 0; 3457 } 3458 3459 /* 3460 * Add a from-to item to "gap". Used for REP and SAL items. 3461 * They are stored case-folded. 3462 */ 3463 static void 3464 add_fromto( 3465 spellinfo_T *spin, 3466 garray_T *gap, 3467 char_u *from, 3468 char_u *to) 3469 { 3470 fromto_T *ftp; 3471 char_u word[MAXWLEN]; 3472 3473 if (ga_grow(gap, 1) == OK) 3474 { 3475 ftp = ((fromto_T *)gap->ga_data) + gap->ga_len; 3476 (void)spell_casefold(from, (int)STRLEN(from), word, MAXWLEN); 3477 ftp->ft_from = getroom_save(spin, word); 3478 (void)spell_casefold(to, (int)STRLEN(to), word, MAXWLEN); 3479 ftp->ft_to = getroom_save(spin, word); 3480 ++gap->ga_len; 3481 } 3482 } 3483 3484 /* 3485 * Convert a boolean argument in a SAL line to TRUE or FALSE; 3486 */ 3487 static int 3488 sal_to_bool(char_u *s) 3489 { 3490 return STRCMP(s, "1") == 0 || STRCMP(s, "true") == 0; 3491 } 3492 3493 /* 3494 * Free the structure filled by spell_read_aff(). 3495 */ 3496 static void 3497 spell_free_aff(afffile_T *aff) 3498 { 3499 hashtab_T *ht; 3500 hashitem_T *hi; 3501 int todo; 3502 affheader_T *ah; 3503 affentry_T *ae; 3504 3505 vim_free(aff->af_enc); 3506 3507 /* All this trouble to free the "ae_prog" items... */ 3508 for (ht = &aff->af_pref; ; ht = &aff->af_suff) 3509 { 3510 todo = (int)ht->ht_used; 3511 for (hi = ht->ht_array; todo > 0; ++hi) 3512 { 3513 if (!HASHITEM_EMPTY(hi)) 3514 { 3515 --todo; 3516 ah = HI2AH(hi); 3517 for (ae = ah->ah_first; ae != NULL; ae = ae->ae_next) 3518 vim_regfree(ae->ae_prog); 3519 } 3520 } 3521 if (ht == &aff->af_suff) 3522 break; 3523 } 3524 3525 hash_clear(&aff->af_pref); 3526 hash_clear(&aff->af_suff); 3527 hash_clear(&aff->af_comp); 3528 } 3529 3530 /* 3531 * Read dictionary file "fname". 3532 * Returns OK or FAIL; 3533 */ 3534 static int 3535 spell_read_dic(spellinfo_T *spin, char_u *fname, afffile_T *affile) 3536 { 3537 hashtab_T ht; 3538 char_u line[MAXLINELEN]; 3539 char_u *p; 3540 char_u *afflist; 3541 char_u store_afflist[MAXWLEN]; 3542 int pfxlen; 3543 int need_affix; 3544 char_u *dw; 3545 char_u *pc; 3546 char_u *w; 3547 int l; 3548 hash_T hash; 3549 hashitem_T *hi; 3550 FILE *fd; 3551 int lnum = 1; 3552 int non_ascii = 0; 3553 int retval = OK; 3554 char_u message[MAXLINELEN + MAXWLEN]; 3555 int flags; 3556 int duplicate = 0; 3557 3558 /* 3559 * Open the file. 3560 */ 3561 fd = mch_fopen((char *)fname, "r"); 3562 if (fd == NULL) 3563 { 3564 EMSG2(_(e_notopen), fname); 3565 return FAIL; 3566 } 3567 3568 /* The hashtable is only used to detect duplicated words. */ 3569 hash_init(&ht); 3570 3571 vim_snprintf((char *)IObuff, IOSIZE, 3572 _("Reading dictionary file %s ..."), fname); 3573 spell_message(spin, IObuff); 3574 3575 /* start with a message for the first line */ 3576 spin->si_msg_count = 999999; 3577 3578 /* Read and ignore the first line: word count. */ 3579 (void)vim_fgets(line, MAXLINELEN, fd); 3580 if (!vim_isdigit(*skipwhite(line))) 3581 EMSG2(_("E760: No word count in %s"), fname); 3582 3583 /* 3584 * Read all the lines in the file one by one. 3585 * The words are converted to 'encoding' here, before being added to 3586 * the hashtable. 3587 */ 3588 while (!vim_fgets(line, MAXLINELEN, fd) && !got_int) 3589 { 3590 line_breakcheck(); 3591 ++lnum; 3592 if (line[0] == '#' || line[0] == '/') 3593 continue; /* comment line */ 3594 3595 /* Remove CR, LF and white space from the end. White space halfway 3596 * the word is kept to allow e.g., "et al.". */ 3597 l = (int)STRLEN(line); 3598 while (l > 0 && line[l - 1] <= ' ') 3599 --l; 3600 if (l == 0) 3601 continue; /* empty line */ 3602 line[l] = NUL; 3603 3604 #ifdef FEAT_MBYTE 3605 /* Convert from "SET" to 'encoding' when needed. */ 3606 if (spin->si_conv.vc_type != CONV_NONE) 3607 { 3608 pc = string_convert(&spin->si_conv, line, NULL); 3609 if (pc == NULL) 3610 { 3611 smsg((char_u *)_("Conversion failure for word in %s line %d: %s"), 3612 fname, lnum, line); 3613 continue; 3614 } 3615 w = pc; 3616 } 3617 else 3618 #endif 3619 { 3620 pc = NULL; 3621 w = line; 3622 } 3623 3624 /* Truncate the word at the "/", set "afflist" to what follows. 3625 * Replace "\/" by "/" and "\\" by "\". */ 3626 afflist = NULL; 3627 for (p = w; *p != NUL; MB_PTR_ADV(p)) 3628 { 3629 if (*p == '\\' && (p[1] == '\\' || p[1] == '/')) 3630 STRMOVE(p, p + 1); 3631 else if (*p == '/') 3632 { 3633 *p = NUL; 3634 afflist = p + 1; 3635 break; 3636 } 3637 } 3638 3639 /* Skip non-ASCII words when "spin->si_ascii" is TRUE. */ 3640 if (spin->si_ascii && has_non_ascii(w)) 3641 { 3642 ++non_ascii; 3643 vim_free(pc); 3644 continue; 3645 } 3646 3647 /* This takes time, print a message every 10000 words. */ 3648 if (spin->si_verbose && spin->si_msg_count > 10000) 3649 { 3650 spin->si_msg_count = 0; 3651 vim_snprintf((char *)message, sizeof(message), 3652 _("line %6d, word %6d - %s"), 3653 lnum, spin->si_foldwcount + spin->si_keepwcount, w); 3654 msg_start(); 3655 msg_puts_long_attr(message, 0); 3656 msg_clr_eos(); 3657 msg_didout = FALSE; 3658 msg_col = 0; 3659 out_flush(); 3660 } 3661 3662 /* Store the word in the hashtable to be able to find duplicates. */ 3663 dw = (char_u *)getroom_save(spin, w); 3664 if (dw == NULL) 3665 { 3666 retval = FAIL; 3667 vim_free(pc); 3668 break; 3669 } 3670 3671 hash = hash_hash(dw); 3672 hi = hash_lookup(&ht, dw, hash); 3673 if (!HASHITEM_EMPTY(hi)) 3674 { 3675 if (p_verbose > 0) 3676 smsg((char_u *)_("Duplicate word in %s line %d: %s"), 3677 fname, lnum, dw); 3678 else if (duplicate == 0) 3679 smsg((char_u *)_("First duplicate word in %s line %d: %s"), 3680 fname, lnum, dw); 3681 ++duplicate; 3682 } 3683 else 3684 hash_add_item(&ht, hi, dw, hash); 3685 3686 flags = 0; 3687 store_afflist[0] = NUL; 3688 pfxlen = 0; 3689 need_affix = FALSE; 3690 if (afflist != NULL) 3691 { 3692 /* Extract flags from the affix list. */ 3693 flags |= get_affix_flags(affile, afflist); 3694 3695 if (affile->af_needaffix != 0 && flag_in_afflist( 3696 affile->af_flagtype, afflist, affile->af_needaffix)) 3697 need_affix = TRUE; 3698 3699 if (affile->af_pfxpostpone) 3700 /* Need to store the list of prefix IDs with the word. */ 3701 pfxlen = get_pfxlist(affile, afflist, store_afflist); 3702 3703 if (spin->si_compflags != NULL) 3704 /* Need to store the list of compound flags with the word. 3705 * Concatenate them to the list of prefix IDs. */ 3706 get_compflags(affile, afflist, store_afflist + pfxlen); 3707 } 3708 3709 /* Add the word to the word tree(s). */ 3710 if (store_word(spin, dw, flags, spin->si_region, 3711 store_afflist, need_affix) == FAIL) 3712 retval = FAIL; 3713 3714 if (afflist != NULL) 3715 { 3716 /* Find all matching suffixes and add the resulting words. 3717 * Additionally do matching prefixes that combine. */ 3718 if (store_aff_word(spin, dw, afflist, affile, 3719 &affile->af_suff, &affile->af_pref, 3720 CONDIT_SUF, flags, store_afflist, pfxlen) == FAIL) 3721 retval = FAIL; 3722 3723 /* Find all matching prefixes and add the resulting words. */ 3724 if (store_aff_word(spin, dw, afflist, affile, 3725 &affile->af_pref, NULL, 3726 CONDIT_SUF, flags, store_afflist, pfxlen) == FAIL) 3727 retval = FAIL; 3728 } 3729 3730 vim_free(pc); 3731 } 3732 3733 if (duplicate > 0) 3734 smsg((char_u *)_("%d duplicate word(s) in %s"), duplicate, fname); 3735 if (spin->si_ascii && non_ascii > 0) 3736 smsg((char_u *)_("Ignored %d word(s) with non-ASCII characters in %s"), 3737 non_ascii, fname); 3738 hash_clear(&ht); 3739 3740 fclose(fd); 3741 return retval; 3742 } 3743 3744 /* 3745 * Check for affix flags in "afflist" that are turned into word flags. 3746 * Return WF_ flags. 3747 */ 3748 static int 3749 get_affix_flags(afffile_T *affile, char_u *afflist) 3750 { 3751 int flags = 0; 3752 3753 if (affile->af_keepcase != 0 && flag_in_afflist( 3754 affile->af_flagtype, afflist, affile->af_keepcase)) 3755 flags |= WF_KEEPCAP | WF_FIXCAP; 3756 if (affile->af_rare != 0 && flag_in_afflist( 3757 affile->af_flagtype, afflist, affile->af_rare)) 3758 flags |= WF_RARE; 3759 if (affile->af_bad != 0 && flag_in_afflist( 3760 affile->af_flagtype, afflist, affile->af_bad)) 3761 flags |= WF_BANNED; 3762 if (affile->af_needcomp != 0 && flag_in_afflist( 3763 affile->af_flagtype, afflist, affile->af_needcomp)) 3764 flags |= WF_NEEDCOMP; 3765 if (affile->af_comproot != 0 && flag_in_afflist( 3766 affile->af_flagtype, afflist, affile->af_comproot)) 3767 flags |= WF_COMPROOT; 3768 if (affile->af_nosuggest != 0 && flag_in_afflist( 3769 affile->af_flagtype, afflist, affile->af_nosuggest)) 3770 flags |= WF_NOSUGGEST; 3771 return flags; 3772 } 3773 3774 /* 3775 * Get the list of prefix IDs from the affix list "afflist". 3776 * Used for PFXPOSTPONE. 3777 * Put the resulting flags in "store_afflist[MAXWLEN]" with a terminating NUL 3778 * and return the number of affixes. 3779 */ 3780 static int 3781 get_pfxlist( 3782 afffile_T *affile, 3783 char_u *afflist, 3784 char_u *store_afflist) 3785 { 3786 char_u *p; 3787 char_u *prevp; 3788 int cnt = 0; 3789 int id; 3790 char_u key[AH_KEY_LEN]; 3791 hashitem_T *hi; 3792 3793 for (p = afflist; *p != NUL; ) 3794 { 3795 prevp = p; 3796 if (get_affitem(affile->af_flagtype, &p) != 0) 3797 { 3798 /* A flag is a postponed prefix flag if it appears in "af_pref" 3799 * and it's ID is not zero. */ 3800 vim_strncpy(key, prevp, p - prevp); 3801 hi = hash_find(&affile->af_pref, key); 3802 if (!HASHITEM_EMPTY(hi)) 3803 { 3804 id = HI2AH(hi)->ah_newID; 3805 if (id != 0) 3806 store_afflist[cnt++] = id; 3807 } 3808 } 3809 if (affile->af_flagtype == AFT_NUM && *p == ',') 3810 ++p; 3811 } 3812 3813 store_afflist[cnt] = NUL; 3814 return cnt; 3815 } 3816 3817 /* 3818 * Get the list of compound IDs from the affix list "afflist" that are used 3819 * for compound words. 3820 * Puts the flags in "store_afflist[]". 3821 */ 3822 static void 3823 get_compflags( 3824 afffile_T *affile, 3825 char_u *afflist, 3826 char_u *store_afflist) 3827 { 3828 char_u *p; 3829 char_u *prevp; 3830 int cnt = 0; 3831 char_u key[AH_KEY_LEN]; 3832 hashitem_T *hi; 3833 3834 for (p = afflist; *p != NUL; ) 3835 { 3836 prevp = p; 3837 if (get_affitem(affile->af_flagtype, &p) != 0) 3838 { 3839 /* A flag is a compound flag if it appears in "af_comp". */ 3840 vim_strncpy(key, prevp, p - prevp); 3841 hi = hash_find(&affile->af_comp, key); 3842 if (!HASHITEM_EMPTY(hi)) 3843 store_afflist[cnt++] = HI2CI(hi)->ci_newID; 3844 } 3845 if (affile->af_flagtype == AFT_NUM && *p == ',') 3846 ++p; 3847 } 3848 3849 store_afflist[cnt] = NUL; 3850 } 3851 3852 /* 3853 * Apply affixes to a word and store the resulting words. 3854 * "ht" is the hashtable with affentry_T that need to be applied, either 3855 * prefixes or suffixes. 3856 * "xht", when not NULL, is the prefix hashtable, to be used additionally on 3857 * the resulting words for combining affixes. 3858 * 3859 * Returns FAIL when out of memory. 3860 */ 3861 static int 3862 store_aff_word( 3863 spellinfo_T *spin, /* spell info */ 3864 char_u *word, /* basic word start */ 3865 char_u *afflist, /* list of names of supported affixes */ 3866 afffile_T *affile, 3867 hashtab_T *ht, 3868 hashtab_T *xht, 3869 int condit, /* CONDIT_SUF et al. */ 3870 int flags, /* flags for the word */ 3871 char_u *pfxlist, /* list of prefix IDs */ 3872 int pfxlen) /* nr of flags in "pfxlist" for prefixes, rest 3873 * is compound flags */ 3874 { 3875 int todo; 3876 hashitem_T *hi; 3877 affheader_T *ah; 3878 affentry_T *ae; 3879 char_u newword[MAXWLEN]; 3880 int retval = OK; 3881 int i, j; 3882 char_u *p; 3883 int use_flags; 3884 char_u *use_pfxlist; 3885 int use_pfxlen; 3886 int need_affix; 3887 char_u store_afflist[MAXWLEN]; 3888 char_u pfx_pfxlist[MAXWLEN]; 3889 size_t wordlen = STRLEN(word); 3890 int use_condit; 3891 3892 todo = (int)ht->ht_used; 3893 for (hi = ht->ht_array; todo > 0 && retval == OK; ++hi) 3894 { 3895 if (!HASHITEM_EMPTY(hi)) 3896 { 3897 --todo; 3898 ah = HI2AH(hi); 3899 3900 /* Check that the affix combines, if required, and that the word 3901 * supports this affix. */ 3902 if (((condit & CONDIT_COMB) == 0 || ah->ah_combine) 3903 && flag_in_afflist(affile->af_flagtype, afflist, 3904 ah->ah_flag)) 3905 { 3906 /* Loop over all affix entries with this name. */ 3907 for (ae = ah->ah_first; ae != NULL; ae = ae->ae_next) 3908 { 3909 /* Check the condition. It's not logical to match case 3910 * here, but it is required for compatibility with 3911 * Myspell. 3912 * Another requirement from Myspell is that the chop 3913 * string is shorter than the word itself. 3914 * For prefixes, when "PFXPOSTPONE" was used, only do 3915 * prefixes with a chop string and/or flags. 3916 * When a previously added affix had CIRCUMFIX this one 3917 * must have it too, if it had not then this one must not 3918 * have one either. */ 3919 if ((xht != NULL || !affile->af_pfxpostpone 3920 || ae->ae_chop != NULL 3921 || ae->ae_flags != NULL) 3922 && (ae->ae_chop == NULL 3923 || STRLEN(ae->ae_chop) < wordlen) 3924 && (ae->ae_prog == NULL 3925 || vim_regexec_prog(&ae->ae_prog, FALSE, 3926 word, (colnr_T)0)) 3927 && (((condit & CONDIT_CFIX) == 0) 3928 == ((condit & CONDIT_AFF) == 0 3929 || ae->ae_flags == NULL 3930 || !flag_in_afflist(affile->af_flagtype, 3931 ae->ae_flags, affile->af_circumfix)))) 3932 { 3933 /* Match. Remove the chop and add the affix. */ 3934 if (xht == NULL) 3935 { 3936 /* prefix: chop/add at the start of the word */ 3937 if (ae->ae_add == NULL) 3938 *newword = NUL; 3939 else 3940 vim_strncpy(newword, ae->ae_add, MAXWLEN - 1); 3941 p = word; 3942 if (ae->ae_chop != NULL) 3943 { 3944 /* Skip chop string. */ 3945 #ifdef FEAT_MBYTE 3946 if (has_mbyte) 3947 { 3948 i = mb_charlen(ae->ae_chop); 3949 for ( ; i > 0; --i) 3950 MB_PTR_ADV(p); 3951 } 3952 else 3953 #endif 3954 p += STRLEN(ae->ae_chop); 3955 } 3956 STRCAT(newword, p); 3957 } 3958 else 3959 { 3960 /* suffix: chop/add at the end of the word */ 3961 vim_strncpy(newword, word, MAXWLEN - 1); 3962 if (ae->ae_chop != NULL) 3963 { 3964 /* Remove chop string. */ 3965 p = newword + STRLEN(newword); 3966 i = (int)MB_CHARLEN(ae->ae_chop); 3967 for ( ; i > 0; --i) 3968 MB_PTR_BACK(newword, p); 3969 *p = NUL; 3970 } 3971 if (ae->ae_add != NULL) 3972 STRCAT(newword, ae->ae_add); 3973 } 3974 3975 use_flags = flags; 3976 use_pfxlist = pfxlist; 3977 use_pfxlen = pfxlen; 3978 need_affix = FALSE; 3979 use_condit = condit | CONDIT_COMB | CONDIT_AFF; 3980 if (ae->ae_flags != NULL) 3981 { 3982 /* Extract flags from the affix list. */ 3983 use_flags |= get_affix_flags(affile, ae->ae_flags); 3984 3985 if (affile->af_needaffix != 0 && flag_in_afflist( 3986 affile->af_flagtype, ae->ae_flags, 3987 affile->af_needaffix)) 3988 need_affix = TRUE; 3989 3990 /* When there is a CIRCUMFIX flag the other affix 3991 * must also have it and we don't add the word 3992 * with one affix. */ 3993 if (affile->af_circumfix != 0 && flag_in_afflist( 3994 affile->af_flagtype, ae->ae_flags, 3995 affile->af_circumfix)) 3996 { 3997 use_condit |= CONDIT_CFIX; 3998 if ((condit & CONDIT_CFIX) == 0) 3999 need_affix = TRUE; 4000 } 4001 4002 if (affile->af_pfxpostpone 4003 || spin->si_compflags != NULL) 4004 { 4005 if (affile->af_pfxpostpone) 4006 /* Get prefix IDS from the affix list. */ 4007 use_pfxlen = get_pfxlist(affile, 4008 ae->ae_flags, store_afflist); 4009 else 4010 use_pfxlen = 0; 4011 use_pfxlist = store_afflist; 4012 4013 /* Combine the prefix IDs. Avoid adding the 4014 * same ID twice. */ 4015 for (i = 0; i < pfxlen; ++i) 4016 { 4017 for (j = 0; j < use_pfxlen; ++j) 4018 if (pfxlist[i] == use_pfxlist[j]) 4019 break; 4020 if (j == use_pfxlen) 4021 use_pfxlist[use_pfxlen++] = pfxlist[i]; 4022 } 4023 4024 if (spin->si_compflags != NULL) 4025 /* Get compound IDS from the affix list. */ 4026 get_compflags(affile, ae->ae_flags, 4027 use_pfxlist + use_pfxlen); 4028 4029 /* Combine the list of compound flags. 4030 * Concatenate them to the prefix IDs list. 4031 * Avoid adding the same ID twice. */ 4032 for (i = pfxlen; pfxlist[i] != NUL; ++i) 4033 { 4034 for (j = use_pfxlen; 4035 use_pfxlist[j] != NUL; ++j) 4036 if (pfxlist[i] == use_pfxlist[j]) 4037 break; 4038 if (use_pfxlist[j] == NUL) 4039 { 4040 use_pfxlist[j++] = pfxlist[i]; 4041 use_pfxlist[j] = NUL; 4042 } 4043 } 4044 } 4045 } 4046 4047 /* Obey a "COMPOUNDFORBIDFLAG" of the affix: don't 4048 * use the compound flags. */ 4049 if (use_pfxlist != NULL && ae->ae_compforbid) 4050 { 4051 vim_strncpy(pfx_pfxlist, use_pfxlist, use_pfxlen); 4052 use_pfxlist = pfx_pfxlist; 4053 } 4054 4055 /* When there are postponed prefixes... */ 4056 if (spin->si_prefroot != NULL 4057 && spin->si_prefroot->wn_sibling != NULL) 4058 { 4059 /* ... add a flag to indicate an affix was used. */ 4060 use_flags |= WF_HAS_AFF; 4061 4062 /* ... don't use a prefix list if combining 4063 * affixes is not allowed. But do use the 4064 * compound flags after them. */ 4065 if (!ah->ah_combine && use_pfxlist != NULL) 4066 use_pfxlist += use_pfxlen; 4067 } 4068 4069 /* When compounding is supported and there is no 4070 * "COMPOUNDPERMITFLAG" then forbid compounding on the 4071 * side where the affix is applied. */ 4072 if (spin->si_compflags != NULL && !ae->ae_comppermit) 4073 { 4074 if (xht != NULL) 4075 use_flags |= WF_NOCOMPAFT; 4076 else 4077 use_flags |= WF_NOCOMPBEF; 4078 } 4079 4080 /* Store the modified word. */ 4081 if (store_word(spin, newword, use_flags, 4082 spin->si_region, use_pfxlist, 4083 need_affix) == FAIL) 4084 retval = FAIL; 4085 4086 /* When added a prefix or a first suffix and the affix 4087 * has flags may add a(nother) suffix. RECURSIVE! */ 4088 if ((condit & CONDIT_SUF) && ae->ae_flags != NULL) 4089 if (store_aff_word(spin, newword, ae->ae_flags, 4090 affile, &affile->af_suff, xht, 4091 use_condit & (xht == NULL 4092 ? ~0 : ~CONDIT_SUF), 4093 use_flags, use_pfxlist, pfxlen) == FAIL) 4094 retval = FAIL; 4095 4096 /* When added a suffix and combining is allowed also 4097 * try adding a prefix additionally. Both for the 4098 * word flags and for the affix flags. RECURSIVE! */ 4099 if (xht != NULL && ah->ah_combine) 4100 { 4101 if (store_aff_word(spin, newword, 4102 afflist, affile, 4103 xht, NULL, use_condit, 4104 use_flags, use_pfxlist, 4105 pfxlen) == FAIL 4106 || (ae->ae_flags != NULL 4107 && store_aff_word(spin, newword, 4108 ae->ae_flags, affile, 4109 xht, NULL, use_condit, 4110 use_flags, use_pfxlist, 4111 pfxlen) == FAIL)) 4112 retval = FAIL; 4113 } 4114 } 4115 } 4116 } 4117 } 4118 } 4119 4120 return retval; 4121 } 4122 4123 /* 4124 * Read a file with a list of words. 4125 */ 4126 static int 4127 spell_read_wordfile(spellinfo_T *spin, char_u *fname) 4128 { 4129 FILE *fd; 4130 long lnum = 0; 4131 char_u rline[MAXLINELEN]; 4132 char_u *line; 4133 char_u *pc = NULL; 4134 char_u *p; 4135 int l; 4136 int retval = OK; 4137 int did_word = FALSE; 4138 int non_ascii = 0; 4139 int flags; 4140 int regionmask; 4141 4142 /* 4143 * Open the file. 4144 */ 4145 fd = mch_fopen((char *)fname, "r"); 4146 if (fd == NULL) 4147 { 4148 EMSG2(_(e_notopen), fname); 4149 return FAIL; 4150 } 4151 4152 vim_snprintf((char *)IObuff, IOSIZE, _("Reading word file %s ..."), fname); 4153 spell_message(spin, IObuff); 4154 4155 /* 4156 * Read all the lines in the file one by one. 4157 */ 4158 while (!vim_fgets(rline, MAXLINELEN, fd) && !got_int) 4159 { 4160 line_breakcheck(); 4161 ++lnum; 4162 4163 /* Skip comment lines. */ 4164 if (*rline == '#') 4165 continue; 4166 4167 /* Remove CR, LF and white space from the end. */ 4168 l = (int)STRLEN(rline); 4169 while (l > 0 && rline[l - 1] <= ' ') 4170 --l; 4171 if (l == 0) 4172 continue; /* empty or blank line */ 4173 rline[l] = NUL; 4174 4175 /* Convert from "/encoding={encoding}" to 'encoding' when needed. */ 4176 vim_free(pc); 4177 #ifdef FEAT_MBYTE 4178 if (spin->si_conv.vc_type != CONV_NONE) 4179 { 4180 pc = string_convert(&spin->si_conv, rline, NULL); 4181 if (pc == NULL) 4182 { 4183 smsg((char_u *)_("Conversion failure for word in %s line %d: %s"), 4184 fname, lnum, rline); 4185 continue; 4186 } 4187 line = pc; 4188 } 4189 else 4190 #endif 4191 { 4192 pc = NULL; 4193 line = rline; 4194 } 4195 4196 if (*line == '/') 4197 { 4198 ++line; 4199 if (STRNCMP(line, "encoding=", 9) == 0) 4200 { 4201 if (spin->si_conv.vc_type != CONV_NONE) 4202 smsg((char_u *)_("Duplicate /encoding= line ignored in %s line %d: %s"), 4203 fname, lnum, line - 1); 4204 else if (did_word) 4205 smsg((char_u *)_("/encoding= line after word ignored in %s line %d: %s"), 4206 fname, lnum, line - 1); 4207 else 4208 { 4209 #ifdef FEAT_MBYTE 4210 char_u *enc; 4211 4212 /* Setup for conversion to 'encoding'. */ 4213 line += 9; 4214 enc = enc_canonize(line); 4215 if (enc != NULL && !spin->si_ascii 4216 && convert_setup(&spin->si_conv, enc, 4217 p_enc) == FAIL) 4218 smsg((char_u *)_("Conversion in %s not supported: from %s to %s"), 4219 fname, line, p_enc); 4220 vim_free(enc); 4221 spin->si_conv.vc_fail = TRUE; 4222 #else 4223 smsg((char_u *)_("Conversion in %s not supported"), fname); 4224 #endif 4225 } 4226 continue; 4227 } 4228 4229 if (STRNCMP(line, "regions=", 8) == 0) 4230 { 4231 if (spin->si_region_count > 1) 4232 smsg((char_u *)_("Duplicate /regions= line ignored in %s line %d: %s"), 4233 fname, lnum, line); 4234 else 4235 { 4236 line += 8; 4237 if (STRLEN(line) > 16) 4238 smsg((char_u *)_("Too many regions in %s line %d: %s"), 4239 fname, lnum, line); 4240 else 4241 { 4242 spin->si_region_count = (int)STRLEN(line) / 2; 4243 STRCPY(spin->si_region_name, line); 4244 4245 /* Adjust the mask for a word valid in all regions. */ 4246 spin->si_region = (1 << spin->si_region_count) - 1; 4247 } 4248 } 4249 continue; 4250 } 4251 4252 smsg((char_u *)_("/ line ignored in %s line %d: %s"), 4253 fname, lnum, line - 1); 4254 continue; 4255 } 4256 4257 flags = 0; 4258 regionmask = spin->si_region; 4259 4260 /* Check for flags and region after a slash. */ 4261 p = vim_strchr(line, '/'); 4262 if (p != NULL) 4263 { 4264 *p++ = NUL; 4265 while (*p != NUL) 4266 { 4267 if (*p == '=') /* keep-case word */ 4268 flags |= WF_KEEPCAP | WF_FIXCAP; 4269 else if (*p == '!') /* Bad, bad, wicked word. */ 4270 flags |= WF_BANNED; 4271 else if (*p == '?') /* Rare word. */ 4272 flags |= WF_RARE; 4273 else if (VIM_ISDIGIT(*p)) /* region number(s) */ 4274 { 4275 if ((flags & WF_REGION) == 0) /* first one */ 4276 regionmask = 0; 4277 flags |= WF_REGION; 4278 4279 l = *p - '0'; 4280 if (l == 0 || l > spin->si_region_count) 4281 { 4282 smsg((char_u *)_("Invalid region nr in %s line %d: %s"), 4283 fname, lnum, p); 4284 break; 4285 } 4286 regionmask |= 1 << (l - 1); 4287 } 4288 else 4289 { 4290 smsg((char_u *)_("Unrecognized flags in %s line %d: %s"), 4291 fname, lnum, p); 4292 break; 4293 } 4294 ++p; 4295 } 4296 } 4297 4298 /* Skip non-ASCII words when "spin->si_ascii" is TRUE. */ 4299 if (spin->si_ascii && has_non_ascii(line)) 4300 { 4301 ++non_ascii; 4302 continue; 4303 } 4304 4305 /* Normal word: store it. */ 4306 if (store_word(spin, line, flags, regionmask, NULL, FALSE) == FAIL) 4307 { 4308 retval = FAIL; 4309 break; 4310 } 4311 did_word = TRUE; 4312 } 4313 4314 vim_free(pc); 4315 fclose(fd); 4316 4317 if (spin->si_ascii && non_ascii > 0) 4318 { 4319 vim_snprintf((char *)IObuff, IOSIZE, 4320 _("Ignored %d words with non-ASCII characters"), non_ascii); 4321 spell_message(spin, IObuff); 4322 } 4323 4324 return retval; 4325 } 4326 4327 /* 4328 * Get part of an sblock_T, "len" bytes long. 4329 * This avoids calling free() for every little struct we use (and keeping 4330 * track of them). 4331 * The memory is cleared to all zeros. 4332 * Returns NULL when out of memory. 4333 */ 4334 static void * 4335 getroom( 4336 spellinfo_T *spin, 4337 size_t len, /* length needed */ 4338 int align) /* align for pointer */ 4339 { 4340 char_u *p; 4341 sblock_T *bl = spin->si_blocks; 4342 4343 if (align && bl != NULL) 4344 /* Round size up for alignment. On some systems structures need to be 4345 * aligned to the size of a pointer (e.g., SPARC). */ 4346 bl->sb_used = (bl->sb_used + sizeof(char *) - 1) 4347 & ~(sizeof(char *) - 1); 4348 4349 if (bl == NULL || bl->sb_used + len > SBLOCKSIZE) 4350 { 4351 if (len >= SBLOCKSIZE) 4352 bl = NULL; 4353 else 4354 /* Allocate a block of memory. It is not freed until much later. */ 4355 bl = (sblock_T *)alloc_clear( 4356 (unsigned)(sizeof(sblock_T) + SBLOCKSIZE)); 4357 if (bl == NULL) 4358 { 4359 if (!spin->si_did_emsg) 4360 { 4361 EMSG(_("E845: Insufficient memory, word list will be incomplete")); 4362 spin->si_did_emsg = TRUE; 4363 } 4364 return NULL; 4365 } 4366 bl->sb_next = spin->si_blocks; 4367 spin->si_blocks = bl; 4368 bl->sb_used = 0; 4369 ++spin->si_blocks_cnt; 4370 } 4371 4372 p = bl->sb_data + bl->sb_used; 4373 bl->sb_used += (int)len; 4374 4375 return p; 4376 } 4377 4378 /* 4379 * Make a copy of a string into memory allocated with getroom(). 4380 * Returns NULL when out of memory. 4381 */ 4382 static char_u * 4383 getroom_save(spellinfo_T *spin, char_u *s) 4384 { 4385 char_u *sc; 4386 4387 sc = (char_u *)getroom(spin, STRLEN(s) + 1, FALSE); 4388 if (sc != NULL) 4389 STRCPY(sc, s); 4390 return sc; 4391 } 4392 4393 4394 /* 4395 * Free the list of allocated sblock_T. 4396 */ 4397 static void 4398 free_blocks(sblock_T *bl) 4399 { 4400 sblock_T *next; 4401 4402 while (bl != NULL) 4403 { 4404 next = bl->sb_next; 4405 vim_free(bl); 4406 bl = next; 4407 } 4408 } 4409 4410 /* 4411 * Allocate the root of a word tree. 4412 * Returns NULL when out of memory. 4413 */ 4414 static wordnode_T * 4415 wordtree_alloc(spellinfo_T *spin) 4416 { 4417 return (wordnode_T *)getroom(spin, sizeof(wordnode_T), TRUE); 4418 } 4419 4420 /* 4421 * Store a word in the tree(s). 4422 * Always store it in the case-folded tree. For a keep-case word this is 4423 * useful when the word can also be used with all caps (no WF_FIXCAP flag) and 4424 * used to find suggestions. 4425 * For a keep-case word also store it in the keep-case tree. 4426 * When "pfxlist" is not NULL store the word for each postponed prefix ID and 4427 * compound flag. 4428 */ 4429 static int 4430 store_word( 4431 spellinfo_T *spin, 4432 char_u *word, 4433 int flags, /* extra flags, WF_BANNED */ 4434 int region, /* supported region(s) */ 4435 char_u *pfxlist, /* list of prefix IDs or NULL */ 4436 int need_affix) /* only store word with affix ID */ 4437 { 4438 int len = (int)STRLEN(word); 4439 int ct = captype(word, word + len); 4440 char_u foldword[MAXWLEN]; 4441 int res = OK; 4442 char_u *p; 4443 4444 (void)spell_casefold(word, len, foldword, MAXWLEN); 4445 for (p = pfxlist; res == OK; ++p) 4446 { 4447 if (!need_affix || (p != NULL && *p != NUL)) 4448 res = tree_add_word(spin, foldword, spin->si_foldroot, ct | flags, 4449 region, p == NULL ? 0 : *p); 4450 if (p == NULL || *p == NUL) 4451 break; 4452 } 4453 ++spin->si_foldwcount; 4454 4455 if (res == OK && (ct == WF_KEEPCAP || (flags & WF_KEEPCAP))) 4456 { 4457 for (p = pfxlist; res == OK; ++p) 4458 { 4459 if (!need_affix || (p != NULL && *p != NUL)) 4460 res = tree_add_word(spin, word, spin->si_keeproot, flags, 4461 region, p == NULL ? 0 : *p); 4462 if (p == NULL || *p == NUL) 4463 break; 4464 } 4465 ++spin->si_keepwcount; 4466 } 4467 return res; 4468 } 4469 4470 /* 4471 * Add word "word" to a word tree at "root". 4472 * When "flags" < 0 we are adding to the prefix tree where "flags" is used for 4473 * "rare" and "region" is the condition nr. 4474 * Returns FAIL when out of memory. 4475 */ 4476 static int 4477 tree_add_word( 4478 spellinfo_T *spin, 4479 char_u *word, 4480 wordnode_T *root, 4481 int flags, 4482 int region, 4483 int affixID) 4484 { 4485 wordnode_T *node = root; 4486 wordnode_T *np; 4487 wordnode_T *copyp, **copyprev; 4488 wordnode_T **prev = NULL; 4489 int i; 4490 4491 /* Add each byte of the word to the tree, including the NUL at the end. */ 4492 for (i = 0; ; ++i) 4493 { 4494 /* When there is more than one reference to this node we need to make 4495 * a copy, so that we can modify it. Copy the whole list of siblings 4496 * (we don't optimize for a partly shared list of siblings). */ 4497 if (node != NULL && node->wn_refs > 1) 4498 { 4499 --node->wn_refs; 4500 copyprev = prev; 4501 for (copyp = node; copyp != NULL; copyp = copyp->wn_sibling) 4502 { 4503 /* Allocate a new node and copy the info. */ 4504 np = get_wordnode(spin); 4505 if (np == NULL) 4506 return FAIL; 4507 np->wn_child = copyp->wn_child; 4508 if (np->wn_child != NULL) 4509 ++np->wn_child->wn_refs; /* child gets extra ref */ 4510 np->wn_byte = copyp->wn_byte; 4511 if (np->wn_byte == NUL) 4512 { 4513 np->wn_flags = copyp->wn_flags; 4514 np->wn_region = copyp->wn_region; 4515 np->wn_affixID = copyp->wn_affixID; 4516 } 4517 4518 /* Link the new node in the list, there will be one ref. */ 4519 np->wn_refs = 1; 4520 if (copyprev != NULL) 4521 *copyprev = np; 4522 copyprev = &np->wn_sibling; 4523 4524 /* Let "node" point to the head of the copied list. */ 4525 if (copyp == node) 4526 node = np; 4527 } 4528 } 4529 4530 /* Look for the sibling that has the same character. They are sorted 4531 * on byte value, thus stop searching when a sibling is found with a 4532 * higher byte value. For zero bytes (end of word) the sorting is 4533 * done on flags and then on affixID. */ 4534 while (node != NULL 4535 && (node->wn_byte < word[i] 4536 || (node->wn_byte == NUL 4537 && (flags < 0 4538 ? node->wn_affixID < (unsigned)affixID 4539 : (node->wn_flags < (unsigned)(flags & WN_MASK) 4540 || (node->wn_flags == (flags & WN_MASK) 4541 && (spin->si_sugtree 4542 ? (node->wn_region & 0xffff) < region 4543 : node->wn_affixID 4544 < (unsigned)affixID))))))) 4545 { 4546 prev = &node->wn_sibling; 4547 node = *prev; 4548 } 4549 if (node == NULL 4550 || node->wn_byte != word[i] 4551 || (word[i] == NUL 4552 && (flags < 0 4553 || spin->si_sugtree 4554 || node->wn_flags != (flags & WN_MASK) 4555 || node->wn_affixID != affixID))) 4556 { 4557 /* Allocate a new node. */ 4558 np = get_wordnode(spin); 4559 if (np == NULL) 4560 return FAIL; 4561 np->wn_byte = word[i]; 4562 4563 /* If "node" is NULL this is a new child or the end of the sibling 4564 * list: ref count is one. Otherwise use ref count of sibling and 4565 * make ref count of sibling one (matters when inserting in front 4566 * of the list of siblings). */ 4567 if (node == NULL) 4568 np->wn_refs = 1; 4569 else 4570 { 4571 np->wn_refs = node->wn_refs; 4572 node->wn_refs = 1; 4573 } 4574 if (prev != NULL) 4575 *prev = np; 4576 np->wn_sibling = node; 4577 node = np; 4578 } 4579 4580 if (word[i] == NUL) 4581 { 4582 node->wn_flags = flags; 4583 node->wn_region |= region; 4584 node->wn_affixID = affixID; 4585 break; 4586 } 4587 prev = &node->wn_child; 4588 node = *prev; 4589 } 4590 #ifdef SPELL_PRINTTREE 4591 smsg((char_u *)"Added \"%s\"", word); 4592 spell_print_tree(root->wn_sibling); 4593 #endif 4594 4595 /* count nr of words added since last message */ 4596 ++spin->si_msg_count; 4597 4598 if (spin->si_compress_cnt > 1) 4599 { 4600 if (--spin->si_compress_cnt == 1) 4601 /* Did enough words to lower the block count limit. */ 4602 spin->si_blocks_cnt += compress_inc; 4603 } 4604 4605 /* 4606 * When we have allocated lots of memory we need to compress the word tree 4607 * to free up some room. But compression is slow, and we might actually 4608 * need that room, thus only compress in the following situations: 4609 * 1. When not compressed before (si_compress_cnt == 0): when using 4610 * "compress_start" blocks. 4611 * 2. When compressed before and used "compress_inc" blocks before 4612 * adding "compress_added" words (si_compress_cnt > 1). 4613 * 3. When compressed before, added "compress_added" words 4614 * (si_compress_cnt == 1) and the number of free nodes drops below the 4615 * maximum word length. 4616 */ 4617 #ifndef SPELL_COMPRESS_ALLWAYS 4618 if (spin->si_compress_cnt == 1 4619 ? spin->si_free_count < MAXWLEN 4620 : spin->si_blocks_cnt >= compress_start) 4621 #endif 4622 { 4623 /* Decrement the block counter. The effect is that we compress again 4624 * when the freed up room has been used and another "compress_inc" 4625 * blocks have been allocated. Unless "compress_added" words have 4626 * been added, then the limit is put back again. */ 4627 spin->si_blocks_cnt -= compress_inc; 4628 spin->si_compress_cnt = compress_added; 4629 4630 if (spin->si_verbose) 4631 { 4632 msg_start(); 4633 msg_puts((char_u *)_(msg_compressing)); 4634 msg_clr_eos(); 4635 msg_didout = FALSE; 4636 msg_col = 0; 4637 out_flush(); 4638 } 4639 4640 /* Compress both trees. Either they both have many nodes, which makes 4641 * compression useful, or one of them is small, which means 4642 * compression goes fast. But when filling the soundfold word tree 4643 * there is no keep-case tree. */ 4644 wordtree_compress(spin, spin->si_foldroot); 4645 if (affixID >= 0) 4646 wordtree_compress(spin, spin->si_keeproot); 4647 } 4648 4649 return OK; 4650 } 4651 4652 /* 4653 * Get a wordnode_T, either from the list of previously freed nodes or 4654 * allocate a new one. 4655 * Returns NULL when out of memory. 4656 */ 4657 static wordnode_T * 4658 get_wordnode(spellinfo_T *spin) 4659 { 4660 wordnode_T *n; 4661 4662 if (spin->si_first_free == NULL) 4663 n = (wordnode_T *)getroom(spin, sizeof(wordnode_T), TRUE); 4664 else 4665 { 4666 n = spin->si_first_free; 4667 spin->si_first_free = n->wn_child; 4668 vim_memset(n, 0, sizeof(wordnode_T)); 4669 --spin->si_free_count; 4670 } 4671 #ifdef SPELL_PRINTTREE 4672 if (n != NULL) 4673 n->wn_nr = ++spin->si_wordnode_nr; 4674 #endif 4675 return n; 4676 } 4677 4678 /* 4679 * Decrement the reference count on a node (which is the head of a list of 4680 * siblings). If the reference count becomes zero free the node and its 4681 * siblings. 4682 * Returns the number of nodes actually freed. 4683 */ 4684 static int 4685 deref_wordnode(spellinfo_T *spin, wordnode_T *node) 4686 { 4687 wordnode_T *np; 4688 int cnt = 0; 4689 4690 if (--node->wn_refs == 0) 4691 { 4692 for (np = node; np != NULL; np = np->wn_sibling) 4693 { 4694 if (np->wn_child != NULL) 4695 cnt += deref_wordnode(spin, np->wn_child); 4696 free_wordnode(spin, np); 4697 ++cnt; 4698 } 4699 ++cnt; /* length field */ 4700 } 4701 return cnt; 4702 } 4703 4704 /* 4705 * Free a wordnode_T for re-use later. 4706 * Only the "wn_child" field becomes invalid. 4707 */ 4708 static void 4709 free_wordnode(spellinfo_T *spin, wordnode_T *n) 4710 { 4711 n->wn_child = spin->si_first_free; 4712 spin->si_first_free = n; 4713 ++spin->si_free_count; 4714 } 4715 4716 /* 4717 * Compress a tree: find tails that are identical and can be shared. 4718 */ 4719 static void 4720 wordtree_compress(spellinfo_T *spin, wordnode_T *root) 4721 { 4722 hashtab_T ht; 4723 int n; 4724 int tot = 0; 4725 int perc; 4726 4727 /* Skip the root itself, it's not actually used. The first sibling is the 4728 * start of the tree. */ 4729 if (root->wn_sibling != NULL) 4730 { 4731 hash_init(&ht); 4732 n = node_compress(spin, root->wn_sibling, &ht, &tot); 4733 4734 #ifndef SPELL_PRINTTREE 4735 if (spin->si_verbose || p_verbose > 2) 4736 #endif 4737 { 4738 if (tot > 1000000) 4739 perc = (tot - n) / (tot / 100); 4740 else if (tot == 0) 4741 perc = 0; 4742 else 4743 perc = (tot - n) * 100 / tot; 4744 vim_snprintf((char *)IObuff, IOSIZE, 4745 _("Compressed %d of %d nodes; %d (%d%%) remaining"), 4746 n, tot, tot - n, perc); 4747 spell_message(spin, IObuff); 4748 } 4749 #ifdef SPELL_PRINTTREE 4750 spell_print_tree(root->wn_sibling); 4751 #endif 4752 hash_clear(&ht); 4753 } 4754 } 4755 4756 /* 4757 * Compress a node, its siblings and its children, depth first. 4758 * Returns the number of compressed nodes. 4759 */ 4760 static int 4761 node_compress( 4762 spellinfo_T *spin, 4763 wordnode_T *node, 4764 hashtab_T *ht, 4765 int *tot) /* total count of nodes before compressing, 4766 incremented while going through the tree */ 4767 { 4768 wordnode_T *np; 4769 wordnode_T *tp; 4770 wordnode_T *child; 4771 hash_T hash; 4772 hashitem_T *hi; 4773 int len = 0; 4774 unsigned nr, n; 4775 int compressed = 0; 4776 4777 /* 4778 * Go through the list of siblings. Compress each child and then try 4779 * finding an identical child to replace it. 4780 * Note that with "child" we mean not just the node that is pointed to, 4781 * but the whole list of siblings of which the child node is the first. 4782 */ 4783 for (np = node; np != NULL && !got_int; np = np->wn_sibling) 4784 { 4785 ++len; 4786 if ((child = np->wn_child) != NULL) 4787 { 4788 /* Compress the child first. This fills hashkey. */ 4789 compressed += node_compress(spin, child, ht, tot); 4790 4791 /* Try to find an identical child. */ 4792 hash = hash_hash(child->wn_u1.hashkey); 4793 hi = hash_lookup(ht, child->wn_u1.hashkey, hash); 4794 if (!HASHITEM_EMPTY(hi)) 4795 { 4796 /* There are children we encountered before with a hash value 4797 * identical to the current child. Now check if there is one 4798 * that is really identical. */ 4799 for (tp = HI2WN(hi); tp != NULL; tp = tp->wn_u2.next) 4800 if (node_equal(child, tp)) 4801 { 4802 /* Found one! Now use that child in place of the 4803 * current one. This means the current child and all 4804 * its siblings is unlinked from the tree. */ 4805 ++tp->wn_refs; 4806 compressed += deref_wordnode(spin, child); 4807 np->wn_child = tp; 4808 break; 4809 } 4810 if (tp == NULL) 4811 { 4812 /* No other child with this hash value equals the child of 4813 * the node, add it to the linked list after the first 4814 * item. */ 4815 tp = HI2WN(hi); 4816 child->wn_u2.next = tp->wn_u2.next; 4817 tp->wn_u2.next = child; 4818 } 4819 } 4820 else 4821 /* No other child has this hash value, add it to the 4822 * hashtable. */ 4823 hash_add_item(ht, hi, child->wn_u1.hashkey, hash); 4824 } 4825 } 4826 *tot += len + 1; /* add one for the node that stores the length */ 4827 4828 /* 4829 * Make a hash key for the node and its siblings, so that we can quickly 4830 * find a lookalike node. This must be done after compressing the sibling 4831 * list, otherwise the hash key would become invalid by the compression. 4832 */ 4833 node->wn_u1.hashkey[0] = len; 4834 nr = 0; 4835 for (np = node; np != NULL; np = np->wn_sibling) 4836 { 4837 if (np->wn_byte == NUL) 4838 /* end node: use wn_flags, wn_region and wn_affixID */ 4839 n = np->wn_flags + (np->wn_region << 8) + (np->wn_affixID << 16); 4840 else 4841 /* byte node: use the byte value and the child pointer */ 4842 n = (unsigned)(np->wn_byte + ((long_u)np->wn_child << 8)); 4843 nr = nr * 101 + n; 4844 } 4845 4846 /* Avoid NUL bytes, it terminates the hash key. */ 4847 n = nr & 0xff; 4848 node->wn_u1.hashkey[1] = n == 0 ? 1 : n; 4849 n = (nr >> 8) & 0xff; 4850 node->wn_u1.hashkey[2] = n == 0 ? 1 : n; 4851 n = (nr >> 16) & 0xff; 4852 node->wn_u1.hashkey[3] = n == 0 ? 1 : n; 4853 n = (nr >> 24) & 0xff; 4854 node->wn_u1.hashkey[4] = n == 0 ? 1 : n; 4855 node->wn_u1.hashkey[5] = NUL; 4856 4857 /* Check for CTRL-C pressed now and then. */ 4858 fast_breakcheck(); 4859 4860 return compressed; 4861 } 4862 4863 /* 4864 * Return TRUE when two nodes have identical siblings and children. 4865 */ 4866 static int 4867 node_equal(wordnode_T *n1, wordnode_T *n2) 4868 { 4869 wordnode_T *p1; 4870 wordnode_T *p2; 4871 4872 for (p1 = n1, p2 = n2; p1 != NULL && p2 != NULL; 4873 p1 = p1->wn_sibling, p2 = p2->wn_sibling) 4874 if (p1->wn_byte != p2->wn_byte 4875 || (p1->wn_byte == NUL 4876 ? (p1->wn_flags != p2->wn_flags 4877 || p1->wn_region != p2->wn_region 4878 || p1->wn_affixID != p2->wn_affixID) 4879 : (p1->wn_child != p2->wn_child))) 4880 break; 4881 4882 return p1 == NULL && p2 == NULL; 4883 } 4884 4885 static int 4886 #ifdef __BORLANDC__ 4887 _RTLENTRYF 4888 #endif 4889 rep_compare(const void *s1, const void *s2); 4890 4891 /* 4892 * Function given to qsort() to sort the REP items on "from" string. 4893 */ 4894 static int 4895 #ifdef __BORLANDC__ 4896 _RTLENTRYF 4897 #endif 4898 rep_compare(const void *s1, const void *s2) 4899 { 4900 fromto_T *p1 = (fromto_T *)s1; 4901 fromto_T *p2 = (fromto_T *)s2; 4902 4903 return STRCMP(p1->ft_from, p2->ft_from); 4904 } 4905 4906 /* 4907 * Write the Vim .spl file "fname". 4908 * Return FAIL or OK; 4909 */ 4910 static int 4911 write_vim_spell(spellinfo_T *spin, char_u *fname) 4912 { 4913 FILE *fd; 4914 int regionmask; 4915 int round; 4916 wordnode_T *tree; 4917 int nodecount; 4918 int i; 4919 int l; 4920 garray_T *gap; 4921 fromto_T *ftp; 4922 char_u *p; 4923 int rr; 4924 int retval = OK; 4925 size_t fwv = 1; /* collect return value of fwrite() to avoid 4926 warnings from picky compiler */ 4927 4928 fd = mch_fopen((char *)fname, "w"); 4929 if (fd == NULL) 4930 { 4931 EMSG2(_(e_notopen), fname); 4932 return FAIL; 4933 } 4934 4935 /* <HEADER>: <fileID> <versionnr> */ 4936 /* <fileID> */ 4937 fwv &= fwrite(VIMSPELLMAGIC, VIMSPELLMAGICL, (size_t)1, fd); 4938 if (fwv != (size_t)1) 4939 /* Catch first write error, don't try writing more. */ 4940 goto theend; 4941 4942 putc(VIMSPELLVERSION, fd); /* <versionnr> */ 4943 4944 /* 4945 * <SECTIONS>: <section> ... <sectionend> 4946 */ 4947 4948 /* SN_INFO: <infotext> */ 4949 if (spin->si_info != NULL) 4950 { 4951 putc(SN_INFO, fd); /* <sectionID> */ 4952 putc(0, fd); /* <sectionflags> */ 4953 4954 i = (int)STRLEN(spin->si_info); 4955 put_bytes(fd, (long_u)i, 4); /* <sectionlen> */ 4956 fwv &= fwrite(spin->si_info, (size_t)i, (size_t)1, fd); /* <infotext> */ 4957 } 4958 4959 /* SN_REGION: <regionname> ... 4960 * Write the region names only if there is more than one. */ 4961 if (spin->si_region_count > 1) 4962 { 4963 putc(SN_REGION, fd); /* <sectionID> */ 4964 putc(SNF_REQUIRED, fd); /* <sectionflags> */ 4965 l = spin->si_region_count * 2; 4966 put_bytes(fd, (long_u)l, 4); /* <sectionlen> */ 4967 fwv &= fwrite(spin->si_region_name, (size_t)l, (size_t)1, fd); 4968 /* <regionname> ... */ 4969 regionmask = (1 << spin->si_region_count) - 1; 4970 } 4971 else 4972 regionmask = 0; 4973 4974 /* SN_CHARFLAGS: <charflagslen> <charflags> <folcharslen> <folchars> 4975 * 4976 * The table with character flags and the table for case folding. 4977 * This makes sure the same characters are recognized as word characters 4978 * when generating an when using a spell file. 4979 * Skip this for ASCII, the table may conflict with the one used for 4980 * 'encoding'. 4981 * Also skip this for an .add.spl file, the main spell file must contain 4982 * the table (avoids that it conflicts). File is shorter too. 4983 */ 4984 if (!spin->si_ascii && !spin->si_add) 4985 { 4986 char_u folchars[128 * 8]; 4987 int flags; 4988 4989 putc(SN_CHARFLAGS, fd); /* <sectionID> */ 4990 putc(SNF_REQUIRED, fd); /* <sectionflags> */ 4991 4992 /* Form the <folchars> string first, we need to know its length. */ 4993 l = 0; 4994 for (i = 128; i < 256; ++i) 4995 { 4996 #ifdef FEAT_MBYTE 4997 if (has_mbyte) 4998 l += mb_char2bytes(spelltab.st_fold[i], folchars + l); 4999 else 5000 #endif 5001 folchars[l++] = spelltab.st_fold[i]; 5002 } 5003 put_bytes(fd, (long_u)(1 + 128 + 2 + l), 4); /* <sectionlen> */ 5004 5005 fputc(128, fd); /* <charflagslen> */ 5006 for (i = 128; i < 256; ++i) 5007 { 5008 flags = 0; 5009 if (spelltab.st_isw[i]) 5010 flags |= CF_WORD; 5011 if (spelltab.st_isu[i]) 5012 flags |= CF_UPPER; 5013 fputc(flags, fd); /* <charflags> */ 5014 } 5015 5016 put_bytes(fd, (long_u)l, 2); /* <folcharslen> */ 5017 fwv &= fwrite(folchars, (size_t)l, (size_t)1, fd); /* <folchars> */ 5018 } 5019 5020 /* SN_MIDWORD: <midword> */ 5021 if (spin->si_midword != NULL) 5022 { 5023 putc(SN_MIDWORD, fd); /* <sectionID> */ 5024 putc(SNF_REQUIRED, fd); /* <sectionflags> */ 5025 5026 i = (int)STRLEN(spin->si_midword); 5027 put_bytes(fd, (long_u)i, 4); /* <sectionlen> */ 5028 fwv &= fwrite(spin->si_midword, (size_t)i, (size_t)1, fd); 5029 /* <midword> */ 5030 } 5031 5032 /* SN_PREFCOND: <prefcondcnt> <prefcond> ... */ 5033 if (spin->si_prefcond.ga_len > 0) 5034 { 5035 putc(SN_PREFCOND, fd); /* <sectionID> */ 5036 putc(SNF_REQUIRED, fd); /* <sectionflags> */ 5037 5038 l = write_spell_prefcond(NULL, &spin->si_prefcond); 5039 put_bytes(fd, (long_u)l, 4); /* <sectionlen> */ 5040 5041 write_spell_prefcond(fd, &spin->si_prefcond); 5042 } 5043 5044 /* SN_REP: <repcount> <rep> ... 5045 * SN_SAL: <salflags> <salcount> <sal> ... 5046 * SN_REPSAL: <repcount> <rep> ... */ 5047 5048 /* round 1: SN_REP section 5049 * round 2: SN_SAL section (unless SN_SOFO is used) 5050 * round 3: SN_REPSAL section */ 5051 for (round = 1; round <= 3; ++round) 5052 { 5053 if (round == 1) 5054 gap = &spin->si_rep; 5055 else if (round == 2) 5056 { 5057 /* Don't write SN_SAL when using a SN_SOFO section */ 5058 if (spin->si_sofofr != NULL && spin->si_sofoto != NULL) 5059 continue; 5060 gap = &spin->si_sal; 5061 } 5062 else 5063 gap = &spin->si_repsal; 5064 5065 /* Don't write the section if there are no items. */ 5066 if (gap->ga_len == 0) 5067 continue; 5068 5069 /* Sort the REP/REPSAL items. */ 5070 if (round != 2) 5071 qsort(gap->ga_data, (size_t)gap->ga_len, 5072 sizeof(fromto_T), rep_compare); 5073 5074 i = round == 1 ? SN_REP : (round == 2 ? SN_SAL : SN_REPSAL); 5075 putc(i, fd); /* <sectionID> */ 5076 5077 /* This is for making suggestions, section is not required. */ 5078 putc(0, fd); /* <sectionflags> */ 5079 5080 /* Compute the length of what follows. */ 5081 l = 2; /* count <repcount> or <salcount> */ 5082 for (i = 0; i < gap->ga_len; ++i) 5083 { 5084 ftp = &((fromto_T *)gap->ga_data)[i]; 5085 l += 1 + (int)STRLEN(ftp->ft_from); /* count <*fromlen> and <*from> */ 5086 l += 1 + (int)STRLEN(ftp->ft_to); /* count <*tolen> and <*to> */ 5087 } 5088 if (round == 2) 5089 ++l; /* count <salflags> */ 5090 put_bytes(fd, (long_u)l, 4); /* <sectionlen> */ 5091 5092 if (round == 2) 5093 { 5094 i = 0; 5095 if (spin->si_followup) 5096 i |= SAL_F0LLOWUP; 5097 if (spin->si_collapse) 5098 i |= SAL_COLLAPSE; 5099 if (spin->si_rem_accents) 5100 i |= SAL_REM_ACCENTS; 5101 putc(i, fd); /* <salflags> */ 5102 } 5103 5104 put_bytes(fd, (long_u)gap->ga_len, 2); /* <repcount> or <salcount> */ 5105 for (i = 0; i < gap->ga_len; ++i) 5106 { 5107 /* <rep> : <repfromlen> <repfrom> <reptolen> <repto> */ 5108 /* <sal> : <salfromlen> <salfrom> <saltolen> <salto> */ 5109 ftp = &((fromto_T *)gap->ga_data)[i]; 5110 for (rr = 1; rr <= 2; ++rr) 5111 { 5112 p = rr == 1 ? ftp->ft_from : ftp->ft_to; 5113 l = (int)STRLEN(p); 5114 putc(l, fd); 5115 if (l > 0) 5116 fwv &= fwrite(p, l, (size_t)1, fd); 5117 } 5118 } 5119 5120 } 5121 5122 /* SN_SOFO: <sofofromlen> <sofofrom> <sofotolen> <sofoto> 5123 * This is for making suggestions, section is not required. */ 5124 if (spin->si_sofofr != NULL && spin->si_sofoto != NULL) 5125 { 5126 putc(SN_SOFO, fd); /* <sectionID> */ 5127 putc(0, fd); /* <sectionflags> */ 5128 5129 l = (int)STRLEN(spin->si_sofofr); 5130 put_bytes(fd, (long_u)(l + STRLEN(spin->si_sofoto) + 4), 4); 5131 /* <sectionlen> */ 5132 5133 put_bytes(fd, (long_u)l, 2); /* <sofofromlen> */ 5134 fwv &= fwrite(spin->si_sofofr, l, (size_t)1, fd); /* <sofofrom> */ 5135 5136 l = (int)STRLEN(spin->si_sofoto); 5137 put_bytes(fd, (long_u)l, 2); /* <sofotolen> */ 5138 fwv &= fwrite(spin->si_sofoto, l, (size_t)1, fd); /* <sofoto> */ 5139 } 5140 5141 /* SN_WORDS: <word> ... 5142 * This is for making suggestions, section is not required. */ 5143 if (spin->si_commonwords.ht_used > 0) 5144 { 5145 putc(SN_WORDS, fd); /* <sectionID> */ 5146 putc(0, fd); /* <sectionflags> */ 5147 5148 /* round 1: count the bytes 5149 * round 2: write the bytes */ 5150 for (round = 1; round <= 2; ++round) 5151 { 5152 int todo; 5153 int len = 0; 5154 hashitem_T *hi; 5155 5156 todo = (int)spin->si_commonwords.ht_used; 5157 for (hi = spin->si_commonwords.ht_array; todo > 0; ++hi) 5158 if (!HASHITEM_EMPTY(hi)) 5159 { 5160 l = (int)STRLEN(hi->hi_key) + 1; 5161 len += l; 5162 if (round == 2) /* <word> */ 5163 fwv &= fwrite(hi->hi_key, (size_t)l, (size_t)1, fd); 5164 --todo; 5165 } 5166 if (round == 1) 5167 put_bytes(fd, (long_u)len, 4); /* <sectionlen> */ 5168 } 5169 } 5170 5171 /* SN_MAP: <mapstr> 5172 * This is for making suggestions, section is not required. */ 5173 if (spin->si_map.ga_len > 0) 5174 { 5175 putc(SN_MAP, fd); /* <sectionID> */ 5176 putc(0, fd); /* <sectionflags> */ 5177 l = spin->si_map.ga_len; 5178 put_bytes(fd, (long_u)l, 4); /* <sectionlen> */ 5179 fwv &= fwrite(spin->si_map.ga_data, (size_t)l, (size_t)1, fd); 5180 /* <mapstr> */ 5181 } 5182 5183 /* SN_SUGFILE: <timestamp> 5184 * This is used to notify that a .sug file may be available and at the 5185 * same time allows for checking that a .sug file that is found matches 5186 * with this .spl file. That's because the word numbers must be exactly 5187 * right. */ 5188 if (!spin->si_nosugfile 5189 && (spin->si_sal.ga_len > 0 5190 || (spin->si_sofofr != NULL && spin->si_sofoto != NULL))) 5191 { 5192 putc(SN_SUGFILE, fd); /* <sectionID> */ 5193 putc(0, fd); /* <sectionflags> */ 5194 put_bytes(fd, (long_u)8, 4); /* <sectionlen> */ 5195 5196 /* Set si_sugtime and write it to the file. */ 5197 spin->si_sugtime = time(NULL); 5198 put_time(fd, spin->si_sugtime); /* <timestamp> */ 5199 } 5200 5201 /* SN_NOSPLITSUGS: nothing 5202 * This is used to notify that no suggestions with word splits are to be 5203 * made. */ 5204 if (spin->si_nosplitsugs) 5205 { 5206 putc(SN_NOSPLITSUGS, fd); /* <sectionID> */ 5207 putc(0, fd); /* <sectionflags> */ 5208 put_bytes(fd, (long_u)0, 4); /* <sectionlen> */ 5209 } 5210 5211 /* SN_NOCOMPUNDSUGS: nothing 5212 * This is used to notify that no suggestions with compounds are to be 5213 * made. */ 5214 if (spin->si_nocompoundsugs) 5215 { 5216 putc(SN_NOCOMPOUNDSUGS, fd); /* <sectionID> */ 5217 putc(0, fd); /* <sectionflags> */ 5218 put_bytes(fd, (long_u)0, 4); /* <sectionlen> */ 5219 } 5220 5221 /* SN_COMPOUND: compound info. 5222 * We don't mark it required, when not supported all compound words will 5223 * be bad words. */ 5224 if (spin->si_compflags != NULL) 5225 { 5226 putc(SN_COMPOUND, fd); /* <sectionID> */ 5227 putc(0, fd); /* <sectionflags> */ 5228 5229 l = (int)STRLEN(spin->si_compflags); 5230 for (i = 0; i < spin->si_comppat.ga_len; ++i) 5231 l += (int)STRLEN(((char_u **)(spin->si_comppat.ga_data))[i]) + 1; 5232 put_bytes(fd, (long_u)(l + 7), 4); /* <sectionlen> */ 5233 5234 putc(spin->si_compmax, fd); /* <compmax> */ 5235 putc(spin->si_compminlen, fd); /* <compminlen> */ 5236 putc(spin->si_compsylmax, fd); /* <compsylmax> */ 5237 putc(0, fd); /* for Vim 7.0b compatibility */ 5238 putc(spin->si_compoptions, fd); /* <compoptions> */ 5239 put_bytes(fd, (long_u)spin->si_comppat.ga_len, 2); 5240 /* <comppatcount> */ 5241 for (i = 0; i < spin->si_comppat.ga_len; ++i) 5242 { 5243 p = ((char_u **)(spin->si_comppat.ga_data))[i]; 5244 putc((int)STRLEN(p), fd); /* <comppatlen> */ 5245 fwv &= fwrite(p, (size_t)STRLEN(p), (size_t)1, fd); 5246 /* <comppattext> */ 5247 } 5248 /* <compflags> */ 5249 fwv &= fwrite(spin->si_compflags, (size_t)STRLEN(spin->si_compflags), 5250 (size_t)1, fd); 5251 } 5252 5253 /* SN_NOBREAK: NOBREAK flag */ 5254 if (spin->si_nobreak) 5255 { 5256 putc(SN_NOBREAK, fd); /* <sectionID> */ 5257 putc(0, fd); /* <sectionflags> */ 5258 5259 /* It's empty, the presence of the section flags the feature. */ 5260 put_bytes(fd, (long_u)0, 4); /* <sectionlen> */ 5261 } 5262 5263 /* SN_SYLLABLE: syllable info. 5264 * We don't mark it required, when not supported syllables will not be 5265 * counted. */ 5266 if (spin->si_syllable != NULL) 5267 { 5268 putc(SN_SYLLABLE, fd); /* <sectionID> */ 5269 putc(0, fd); /* <sectionflags> */ 5270 5271 l = (int)STRLEN(spin->si_syllable); 5272 put_bytes(fd, (long_u)l, 4); /* <sectionlen> */ 5273 fwv &= fwrite(spin->si_syllable, (size_t)l, (size_t)1, fd); 5274 /* <syllable> */ 5275 } 5276 5277 /* end of <SECTIONS> */ 5278 putc(SN_END, fd); /* <sectionend> */ 5279 5280 5281 /* 5282 * <LWORDTREE> <KWORDTREE> <PREFIXTREE> 5283 */ 5284 spin->si_memtot = 0; 5285 for (round = 1; round <= 3; ++round) 5286 { 5287 if (round == 1) 5288 tree = spin->si_foldroot->wn_sibling; 5289 else if (round == 2) 5290 tree = spin->si_keeproot->wn_sibling; 5291 else 5292 tree = spin->si_prefroot->wn_sibling; 5293 5294 /* Clear the index and wnode fields in the tree. */ 5295 clear_node(tree); 5296 5297 /* Count the number of nodes. Needed to be able to allocate the 5298 * memory when reading the nodes. Also fills in index for shared 5299 * nodes. */ 5300 nodecount = put_node(NULL, tree, 0, regionmask, round == 3); 5301 5302 /* number of nodes in 4 bytes */ 5303 put_bytes(fd, (long_u)nodecount, 4); /* <nodecount> */ 5304 spin->si_memtot += nodecount + nodecount * sizeof(int); 5305 5306 /* Write the nodes. */ 5307 (void)put_node(fd, tree, 0, regionmask, round == 3); 5308 } 5309 5310 /* Write another byte to check for errors (file system full). */ 5311 if (putc(0, fd) == EOF) 5312 retval = FAIL; 5313 theend: 5314 if (fclose(fd) == EOF) 5315 retval = FAIL; 5316 5317 if (fwv != (size_t)1) 5318 retval = FAIL; 5319 if (retval == FAIL) 5320 EMSG(_(e_write)); 5321 5322 return retval; 5323 } 5324 5325 /* 5326 * Clear the index and wnode fields of "node", it siblings and its 5327 * children. This is needed because they are a union with other items to save 5328 * space. 5329 */ 5330 static void 5331 clear_node(wordnode_T *node) 5332 { 5333 wordnode_T *np; 5334 5335 if (node != NULL) 5336 for (np = node; np != NULL; np = np->wn_sibling) 5337 { 5338 np->wn_u1.index = 0; 5339 np->wn_u2.wnode = NULL; 5340 5341 if (np->wn_byte != NUL) 5342 clear_node(np->wn_child); 5343 } 5344 } 5345 5346 5347 /* 5348 * Dump a word tree at node "node". 5349 * 5350 * This first writes the list of possible bytes (siblings). Then for each 5351 * byte recursively write the children. 5352 * 5353 * NOTE: The code here must match the code in read_tree_node(), since 5354 * assumptions are made about the indexes (so that we don't have to write them 5355 * in the file). 5356 * 5357 * Returns the number of nodes used. 5358 */ 5359 static int 5360 put_node( 5361 FILE *fd, /* NULL when only counting */ 5362 wordnode_T *node, 5363 int idx, 5364 int regionmask, 5365 int prefixtree) /* TRUE for PREFIXTREE */ 5366 { 5367 int newindex = idx; 5368 int siblingcount = 0; 5369 wordnode_T *np; 5370 int flags; 5371 5372 /* If "node" is zero the tree is empty. */ 5373 if (node == NULL) 5374 return 0; 5375 5376 /* Store the index where this node is written. */ 5377 node->wn_u1.index = idx; 5378 5379 /* Count the number of siblings. */ 5380 for (np = node; np != NULL; np = np->wn_sibling) 5381 ++siblingcount; 5382 5383 /* Write the sibling count. */ 5384 if (fd != NULL) 5385 putc(siblingcount, fd); /* <siblingcount> */ 5386 5387 /* Write each sibling byte and optionally extra info. */ 5388 for (np = node; np != NULL; np = np->wn_sibling) 5389 { 5390 if (np->wn_byte == 0) 5391 { 5392 if (fd != NULL) 5393 { 5394 /* For a NUL byte (end of word) write the flags etc. */ 5395 if (prefixtree) 5396 { 5397 /* In PREFIXTREE write the required affixID and the 5398 * associated condition nr (stored in wn_region). The 5399 * byte value is misused to store the "rare" and "not 5400 * combining" flags */ 5401 if (np->wn_flags == (short_u)PFX_FLAGS) 5402 putc(BY_NOFLAGS, fd); /* <byte> */ 5403 else 5404 { 5405 putc(BY_FLAGS, fd); /* <byte> */ 5406 putc(np->wn_flags, fd); /* <pflags> */ 5407 } 5408 putc(np->wn_affixID, fd); /* <affixID> */ 5409 put_bytes(fd, (long_u)np->wn_region, 2); /* <prefcondnr> */ 5410 } 5411 else 5412 { 5413 /* For word trees we write the flag/region items. */ 5414 flags = np->wn_flags; 5415 if (regionmask != 0 && np->wn_region != regionmask) 5416 flags |= WF_REGION; 5417 if (np->wn_affixID != 0) 5418 flags |= WF_AFX; 5419 if (flags == 0) 5420 { 5421 /* word without flags or region */ 5422 putc(BY_NOFLAGS, fd); /* <byte> */ 5423 } 5424 else 5425 { 5426 if (np->wn_flags >= 0x100) 5427 { 5428 putc(BY_FLAGS2, fd); /* <byte> */ 5429 putc(flags, fd); /* <flags> */ 5430 putc((unsigned)flags >> 8, fd); /* <flags2> */ 5431 } 5432 else 5433 { 5434 putc(BY_FLAGS, fd); /* <byte> */ 5435 putc(flags, fd); /* <flags> */ 5436 } 5437 if (flags & WF_REGION) 5438 putc(np->wn_region, fd); /* <region> */ 5439 if (flags & WF_AFX) 5440 putc(np->wn_affixID, fd); /* <affixID> */ 5441 } 5442 } 5443 } 5444 } 5445 else 5446 { 5447 if (np->wn_child->wn_u1.index != 0 5448 && np->wn_child->wn_u2.wnode != node) 5449 { 5450 /* The child is written elsewhere, write the reference. */ 5451 if (fd != NULL) 5452 { 5453 putc(BY_INDEX, fd); /* <byte> */ 5454 /* <nodeidx> */ 5455 put_bytes(fd, (long_u)np->wn_child->wn_u1.index, 3); 5456 } 5457 } 5458 else if (np->wn_child->wn_u2.wnode == NULL) 5459 /* We will write the child below and give it an index. */ 5460 np->wn_child->wn_u2.wnode = node; 5461 5462 if (fd != NULL) 5463 if (putc(np->wn_byte, fd) == EOF) /* <byte> or <xbyte> */ 5464 { 5465 EMSG(_(e_write)); 5466 return 0; 5467 } 5468 } 5469 } 5470 5471 /* Space used in the array when reading: one for each sibling and one for 5472 * the count. */ 5473 newindex += siblingcount + 1; 5474 5475 /* Recursively dump the children of each sibling. */ 5476 for (np = node; np != NULL; np = np->wn_sibling) 5477 if (np->wn_byte != 0 && np->wn_child->wn_u2.wnode == node) 5478 newindex = put_node(fd, np->wn_child, newindex, regionmask, 5479 prefixtree); 5480 5481 return newindex; 5482 } 5483 5484 5485 /* 5486 * ":mkspell [-ascii] outfile infile ..." 5487 * ":mkspell [-ascii] addfile" 5488 */ 5489 void 5490 ex_mkspell(exarg_T *eap) 5491 { 5492 int fcount; 5493 char_u **fnames; 5494 char_u *arg = eap->arg; 5495 int ascii = FALSE; 5496 5497 if (STRNCMP(arg, "-ascii", 6) == 0) 5498 { 5499 ascii = TRUE; 5500 arg = skipwhite(arg + 6); 5501 } 5502 5503 /* Expand all the remaining arguments (e.g., $VIMRUNTIME). */ 5504 if (get_arglist_exp(arg, &fcount, &fnames, FALSE) == OK) 5505 { 5506 mkspell(fcount, fnames, ascii, eap->forceit, FALSE); 5507 FreeWild(fcount, fnames); 5508 } 5509 } 5510 5511 /* 5512 * Create the .sug file. 5513 * Uses the soundfold info in "spin". 5514 * Writes the file with the name "wfname", with ".spl" changed to ".sug". 5515 */ 5516 static void 5517 spell_make_sugfile(spellinfo_T *spin, char_u *wfname) 5518 { 5519 char_u *fname = NULL; 5520 int len; 5521 slang_T *slang; 5522 int free_slang = FALSE; 5523 5524 /* 5525 * Read back the .spl file that was written. This fills the required 5526 * info for soundfolding. This also uses less memory than the 5527 * pointer-linked version of the trie. And it avoids having two versions 5528 * of the code for the soundfolding stuff. 5529 * It might have been done already by spell_reload_one(). 5530 */ 5531 for (slang = first_lang; slang != NULL; slang = slang->sl_next) 5532 if (fullpathcmp(wfname, slang->sl_fname, FALSE) == FPC_SAME) 5533 break; 5534 if (slang == NULL) 5535 { 5536 spell_message(spin, (char_u *)_("Reading back spell file...")); 5537 slang = spell_load_file(wfname, NULL, NULL, FALSE); 5538 if (slang == NULL) 5539 return; 5540 free_slang = TRUE; 5541 } 5542 5543 /* 5544 * Clear the info in "spin" that is used. 5545 */ 5546 spin->si_blocks = NULL; 5547 spin->si_blocks_cnt = 0; 5548 spin->si_compress_cnt = 0; /* will stay at 0 all the time*/ 5549 spin->si_free_count = 0; 5550 spin->si_first_free = NULL; 5551 spin->si_foldwcount = 0; 5552 5553 /* 5554 * Go through the trie of good words, soundfold each word and add it to 5555 * the soundfold trie. 5556 */ 5557 spell_message(spin, (char_u *)_("Performing soundfolding...")); 5558 if (sug_filltree(spin, slang) == FAIL) 5559 goto theend; 5560 5561 /* 5562 * Create the table which links each soundfold word with a list of the 5563 * good words it may come from. Creates buffer "spin->si_spellbuf". 5564 * This also removes the wordnr from the NUL byte entries to make 5565 * compression possible. 5566 */ 5567 if (sug_maketable(spin) == FAIL) 5568 goto theend; 5569 5570 smsg((char_u *)_("Number of words after soundfolding: %ld"), 5571 (long)spin->si_spellbuf->b_ml.ml_line_count); 5572 5573 /* 5574 * Compress the soundfold trie. 5575 */ 5576 spell_message(spin, (char_u *)_(msg_compressing)); 5577 wordtree_compress(spin, spin->si_foldroot); 5578 5579 /* 5580 * Write the .sug file. 5581 * Make the file name by changing ".spl" to ".sug". 5582 */ 5583 fname = alloc(MAXPATHL); 5584 if (fname == NULL) 5585 goto theend; 5586 vim_strncpy(fname, wfname, MAXPATHL - 1); 5587 len = (int)STRLEN(fname); 5588 fname[len - 2] = 'u'; 5589 fname[len - 1] = 'g'; 5590 sug_write(spin, fname); 5591 5592 theend: 5593 vim_free(fname); 5594 if (free_slang) 5595 slang_free(slang); 5596 free_blocks(spin->si_blocks); 5597 close_spellbuf(spin->si_spellbuf); 5598 } 5599 5600 /* 5601 * Build the soundfold trie for language "slang". 5602 */ 5603 static int 5604 sug_filltree(spellinfo_T *spin, slang_T *slang) 5605 { 5606 char_u *byts; 5607 idx_T *idxs; 5608 int depth; 5609 idx_T arridx[MAXWLEN]; 5610 int curi[MAXWLEN]; 5611 char_u tword[MAXWLEN]; 5612 char_u tsalword[MAXWLEN]; 5613 int c; 5614 idx_T n; 5615 unsigned words_done = 0; 5616 int wordcount[MAXWLEN]; 5617 5618 /* We use si_foldroot for the soundfolded trie. */ 5619 spin->si_foldroot = wordtree_alloc(spin); 5620 if (spin->si_foldroot == NULL) 5621 return FAIL; 5622 5623 /* let tree_add_word() know we're adding to the soundfolded tree */ 5624 spin->si_sugtree = TRUE; 5625 5626 /* 5627 * Go through the whole case-folded tree, soundfold each word and put it 5628 * in the trie. 5629 */ 5630 byts = slang->sl_fbyts; 5631 idxs = slang->sl_fidxs; 5632 5633 arridx[0] = 0; 5634 curi[0] = 1; 5635 wordcount[0] = 0; 5636 5637 depth = 0; 5638 while (depth >= 0 && !got_int) 5639 { 5640 if (curi[depth] > byts[arridx[depth]]) 5641 { 5642 /* Done all bytes at this node, go up one level. */ 5643 idxs[arridx[depth]] = wordcount[depth]; 5644 if (depth > 0) 5645 wordcount[depth - 1] += wordcount[depth]; 5646 5647 --depth; 5648 line_breakcheck(); 5649 } 5650 else 5651 { 5652 5653 /* Do one more byte at this node. */ 5654 n = arridx[depth] + curi[depth]; 5655 ++curi[depth]; 5656 5657 c = byts[n]; 5658 if (c == 0) 5659 { 5660 /* Sound-fold the word. */ 5661 tword[depth] = NUL; 5662 spell_soundfold(slang, tword, TRUE, tsalword); 5663 5664 /* We use the "flags" field for the MSB of the wordnr, 5665 * "region" for the LSB of the wordnr. */ 5666 if (tree_add_word(spin, tsalword, spin->si_foldroot, 5667 words_done >> 16, words_done & 0xffff, 5668 0) == FAIL) 5669 return FAIL; 5670 5671 ++words_done; 5672 ++wordcount[depth]; 5673 5674 /* Reset the block count each time to avoid compression 5675 * kicking in. */ 5676 spin->si_blocks_cnt = 0; 5677 5678 /* Skip over any other NUL bytes (same word with different 5679 * flags). */ 5680 while (byts[n + 1] == 0) 5681 { 5682 ++n; 5683 ++curi[depth]; 5684 } 5685 } 5686 else 5687 { 5688 /* Normal char, go one level deeper. */ 5689 tword[depth++] = c; 5690 arridx[depth] = idxs[n]; 5691 curi[depth] = 1; 5692 wordcount[depth] = 0; 5693 } 5694 } 5695 } 5696 5697 smsg((char_u *)_("Total number of words: %d"), words_done); 5698 5699 return OK; 5700 } 5701 5702 /* 5703 * Make the table that links each word in the soundfold trie to the words it 5704 * can be produced from. 5705 * This is not unlike lines in a file, thus use a memfile to be able to access 5706 * the table efficiently. 5707 * Returns FAIL when out of memory. 5708 */ 5709 static int 5710 sug_maketable(spellinfo_T *spin) 5711 { 5712 garray_T ga; 5713 int res = OK; 5714 5715 /* Allocate a buffer, open a memline for it and create the swap file 5716 * (uses a temp file, not a .swp file). */ 5717 spin->si_spellbuf = open_spellbuf(); 5718 if (spin->si_spellbuf == NULL) 5719 return FAIL; 5720 5721 /* Use a buffer to store the line info, avoids allocating many small 5722 * pieces of memory. */ 5723 ga_init2(&ga, 1, 100); 5724 5725 /* recursively go through the tree */ 5726 if (sug_filltable(spin, spin->si_foldroot->wn_sibling, 0, &ga) == -1) 5727 res = FAIL; 5728 5729 ga_clear(&ga); 5730 return res; 5731 } 5732 5733 /* 5734 * Fill the table for one node and its children. 5735 * Returns the wordnr at the start of the node. 5736 * Returns -1 when out of memory. 5737 */ 5738 static int 5739 sug_filltable( 5740 spellinfo_T *spin, 5741 wordnode_T *node, 5742 int startwordnr, 5743 garray_T *gap) /* place to store line of numbers */ 5744 { 5745 wordnode_T *p, *np; 5746 int wordnr = startwordnr; 5747 int nr; 5748 int prev_nr; 5749 5750 for (p = node; p != NULL; p = p->wn_sibling) 5751 { 5752 if (p->wn_byte == NUL) 5753 { 5754 gap->ga_len = 0; 5755 prev_nr = 0; 5756 for (np = p; np != NULL && np->wn_byte == NUL; np = np->wn_sibling) 5757 { 5758 if (ga_grow(gap, 10) == FAIL) 5759 return -1; 5760 5761 nr = (np->wn_flags << 16) + (np->wn_region & 0xffff); 5762 /* Compute the offset from the previous nr and store the 5763 * offset in a way that it takes a minimum number of bytes. 5764 * It's a bit like utf-8, but without the need to mark 5765 * following bytes. */ 5766 nr -= prev_nr; 5767 prev_nr += nr; 5768 gap->ga_len += offset2bytes(nr, 5769 (char_u *)gap->ga_data + gap->ga_len); 5770 } 5771 5772 /* add the NUL byte */ 5773 ((char_u *)gap->ga_data)[gap->ga_len++] = NUL; 5774 5775 if (ml_append_buf(spin->si_spellbuf, (linenr_T)wordnr, 5776 gap->ga_data, gap->ga_len, TRUE) == FAIL) 5777 return -1; 5778 ++wordnr; 5779 5780 /* Remove extra NUL entries, we no longer need them. We don't 5781 * bother freeing the nodes, the won't be reused anyway. */ 5782 while (p->wn_sibling != NULL && p->wn_sibling->wn_byte == NUL) 5783 p->wn_sibling = p->wn_sibling->wn_sibling; 5784 5785 /* Clear the flags on the remaining NUL node, so that compression 5786 * works a lot better. */ 5787 p->wn_flags = 0; 5788 p->wn_region = 0; 5789 } 5790 else 5791 { 5792 wordnr = sug_filltable(spin, p->wn_child, wordnr, gap); 5793 if (wordnr == -1) 5794 return -1; 5795 } 5796 } 5797 return wordnr; 5798 } 5799 5800 /* 5801 * Convert an offset into a minimal number of bytes. 5802 * Similar to utf_char2byters, but use 8 bits in followup bytes and avoid NUL 5803 * bytes. 5804 */ 5805 static int 5806 offset2bytes(int nr, char_u *buf) 5807 { 5808 int rem; 5809 int b1, b2, b3, b4; 5810 5811 /* Split the number in parts of base 255. We need to avoid NUL bytes. */ 5812 b1 = nr % 255 + 1; 5813 rem = nr / 255; 5814 b2 = rem % 255 + 1; 5815 rem = rem / 255; 5816 b3 = rem % 255 + 1; 5817 b4 = rem / 255 + 1; 5818 5819 if (b4 > 1 || b3 > 0x1f) /* 4 bytes */ 5820 { 5821 buf[0] = 0xe0 + b4; 5822 buf[1] = b3; 5823 buf[2] = b2; 5824 buf[3] = b1; 5825 return 4; 5826 } 5827 if (b3 > 1 || b2 > 0x3f ) /* 3 bytes */ 5828 { 5829 buf[0] = 0xc0 + b3; 5830 buf[1] = b2; 5831 buf[2] = b1; 5832 return 3; 5833 } 5834 if (b2 > 1 || b1 > 0x7f ) /* 2 bytes */ 5835 { 5836 buf[0] = 0x80 + b2; 5837 buf[1] = b1; 5838 return 2; 5839 } 5840 /* 1 byte */ 5841 buf[0] = b1; 5842 return 1; 5843 } 5844 5845 /* 5846 * Write the .sug file in "fname". 5847 */ 5848 static void 5849 sug_write(spellinfo_T *spin, char_u *fname) 5850 { 5851 FILE *fd; 5852 wordnode_T *tree; 5853 int nodecount; 5854 int wcount; 5855 char_u *line; 5856 linenr_T lnum; 5857 int len; 5858 5859 /* Create the file. Note that an existing file is silently overwritten! */ 5860 fd = mch_fopen((char *)fname, "w"); 5861 if (fd == NULL) 5862 { 5863 EMSG2(_(e_notopen), fname); 5864 return; 5865 } 5866 5867 vim_snprintf((char *)IObuff, IOSIZE, 5868 _("Writing suggestion file %s ..."), fname); 5869 spell_message(spin, IObuff); 5870 5871 /* 5872 * <SUGHEADER>: <fileID> <versionnr> <timestamp> 5873 */ 5874 if (fwrite(VIMSUGMAGIC, VIMSUGMAGICL, (size_t)1, fd) != 1) /* <fileID> */ 5875 { 5876 EMSG(_(e_write)); 5877 goto theend; 5878 } 5879 putc(VIMSUGVERSION, fd); /* <versionnr> */ 5880 5881 /* Write si_sugtime to the file. */ 5882 put_time(fd, spin->si_sugtime); /* <timestamp> */ 5883 5884 /* 5885 * <SUGWORDTREE> 5886 */ 5887 spin->si_memtot = 0; 5888 tree = spin->si_foldroot->wn_sibling; 5889 5890 /* Clear the index and wnode fields in the tree. */ 5891 clear_node(tree); 5892 5893 /* Count the number of nodes. Needed to be able to allocate the 5894 * memory when reading the nodes. Also fills in index for shared 5895 * nodes. */ 5896 nodecount = put_node(NULL, tree, 0, 0, FALSE); 5897 5898 /* number of nodes in 4 bytes */ 5899 put_bytes(fd, (long_u)nodecount, 4); /* <nodecount> */ 5900 spin->si_memtot += nodecount + nodecount * sizeof(int); 5901 5902 /* Write the nodes. */ 5903 (void)put_node(fd, tree, 0, 0, FALSE); 5904 5905 /* 5906 * <SUGTABLE>: <sugwcount> <sugline> ... 5907 */ 5908 wcount = spin->si_spellbuf->b_ml.ml_line_count; 5909 put_bytes(fd, (long_u)wcount, 4); /* <sugwcount> */ 5910 5911 for (lnum = 1; lnum <= (linenr_T)wcount; ++lnum) 5912 { 5913 /* <sugline>: <sugnr> ... NUL */ 5914 line = ml_get_buf(spin->si_spellbuf, lnum, FALSE); 5915 len = (int)STRLEN(line) + 1; 5916 if (fwrite(line, (size_t)len, (size_t)1, fd) == 0) 5917 { 5918 EMSG(_(e_write)); 5919 goto theend; 5920 } 5921 spin->si_memtot += len; 5922 } 5923 5924 /* Write another byte to check for errors. */ 5925 if (putc(0, fd) == EOF) 5926 EMSG(_(e_write)); 5927 5928 vim_snprintf((char *)IObuff, IOSIZE, 5929 _("Estimated runtime memory use: %d bytes"), spin->si_memtot); 5930 spell_message(spin, IObuff); 5931 5932 theend: 5933 /* close the file */ 5934 fclose(fd); 5935 } 5936 5937 5938 /* 5939 * Create a Vim spell file from one or more word lists. 5940 * "fnames[0]" is the output file name. 5941 * "fnames[fcount - 1]" is the last input file name. 5942 * Exception: when "fnames[0]" ends in ".add" it's used as the input file name 5943 * and ".spl" is appended to make the output file name. 5944 */ 5945 void 5946 mkspell( 5947 int fcount, 5948 char_u **fnames, 5949 int ascii, /* -ascii argument given */ 5950 int over_write, /* overwrite existing output file */ 5951 int added_word) /* invoked through "zg" */ 5952 { 5953 char_u *fname = NULL; 5954 char_u *wfname; 5955 char_u **innames; 5956 int incount; 5957 afffile_T *(afile[8]); 5958 int i; 5959 int len; 5960 stat_T st; 5961 int error = FALSE; 5962 spellinfo_T spin; 5963 5964 vim_memset(&spin, 0, sizeof(spin)); 5965 spin.si_verbose = !added_word; 5966 spin.si_ascii = ascii; 5967 spin.si_followup = TRUE; 5968 spin.si_rem_accents = TRUE; 5969 ga_init2(&spin.si_rep, (int)sizeof(fromto_T), 20); 5970 ga_init2(&spin.si_repsal, (int)sizeof(fromto_T), 20); 5971 ga_init2(&spin.si_sal, (int)sizeof(fromto_T), 20); 5972 ga_init2(&spin.si_map, (int)sizeof(char_u), 100); 5973 ga_init2(&spin.si_comppat, (int)sizeof(char_u *), 20); 5974 ga_init2(&spin.si_prefcond, (int)sizeof(char_u *), 50); 5975 hash_init(&spin.si_commonwords); 5976 spin.si_newcompID = 127; /* start compound ID at first maximum */ 5977 5978 /* default: fnames[0] is output file, following are input files */ 5979 innames = &fnames[1]; 5980 incount = fcount - 1; 5981 5982 wfname = alloc(MAXPATHL); 5983 if (wfname == NULL) 5984 return; 5985 5986 if (fcount >= 1) 5987 { 5988 len = (int)STRLEN(fnames[0]); 5989 if (fcount == 1 && len > 4 && STRCMP(fnames[0] + len - 4, ".add") == 0) 5990 { 5991 /* For ":mkspell path/en.latin1.add" output file is 5992 * "path/en.latin1.add.spl". */ 5993 innames = &fnames[0]; 5994 incount = 1; 5995 vim_snprintf((char *)wfname, MAXPATHL, "%s.spl", fnames[0]); 5996 } 5997 else if (fcount == 1) 5998 { 5999 /* For ":mkspell path/vim" output file is "path/vim.latin1.spl". */ 6000 innames = &fnames[0]; 6001 incount = 1; 6002 vim_snprintf((char *)wfname, MAXPATHL, SPL_FNAME_TMPL, 6003 fnames[0], spin.si_ascii ? (char_u *)"ascii" : spell_enc()); 6004 } 6005 else if (len > 4 && STRCMP(fnames[0] + len - 4, ".spl") == 0) 6006 { 6007 /* Name ends in ".spl", use as the file name. */ 6008 vim_strncpy(wfname, fnames[0], MAXPATHL - 1); 6009 } 6010 else 6011 /* Name should be language, make the file name from it. */ 6012 vim_snprintf((char *)wfname, MAXPATHL, SPL_FNAME_TMPL, 6013 fnames[0], spin.si_ascii ? (char_u *)"ascii" : spell_enc()); 6014 6015 /* Check for .ascii.spl. */ 6016 if (strstr((char *)gettail(wfname), SPL_FNAME_ASCII) != NULL) 6017 spin.si_ascii = TRUE; 6018 6019 /* Check for .add.spl. */ 6020 if (strstr((char *)gettail(wfname), SPL_FNAME_ADD) != NULL) 6021 spin.si_add = TRUE; 6022 } 6023 6024 if (incount <= 0) 6025 EMSG(_(e_invarg)); /* need at least output and input names */ 6026 else if (vim_strchr(gettail(wfname), '_') != NULL) 6027 EMSG(_("E751: Output file name must not have region name")); 6028 else if (incount > 8) 6029 EMSG(_("E754: Only up to 8 regions supported")); 6030 else 6031 { 6032 /* Check for overwriting before doing things that may take a lot of 6033 * time. */ 6034 if (!over_write && mch_stat((char *)wfname, &st) >= 0) 6035 { 6036 EMSG(_(e_exists)); 6037 goto theend; 6038 } 6039 if (mch_isdir(wfname)) 6040 { 6041 EMSG2(_(e_isadir2), wfname); 6042 goto theend; 6043 } 6044 6045 fname = alloc(MAXPATHL); 6046 if (fname == NULL) 6047 goto theend; 6048 6049 /* 6050 * Init the aff and dic pointers. 6051 * Get the region names if there are more than 2 arguments. 6052 */ 6053 for (i = 0; i < incount; ++i) 6054 { 6055 afile[i] = NULL; 6056 6057 if (incount > 1) 6058 { 6059 len = (int)STRLEN(innames[i]); 6060 if (STRLEN(gettail(innames[i])) < 5 6061 || innames[i][len - 3] != '_') 6062 { 6063 EMSG2(_("E755: Invalid region in %s"), innames[i]); 6064 goto theend; 6065 } 6066 spin.si_region_name[i * 2] = TOLOWER_ASC(innames[i][len - 2]); 6067 spin.si_region_name[i * 2 + 1] = 6068 TOLOWER_ASC(innames[i][len - 1]); 6069 } 6070 } 6071 spin.si_region_count = incount; 6072 6073 spin.si_foldroot = wordtree_alloc(&spin); 6074 spin.si_keeproot = wordtree_alloc(&spin); 6075 spin.si_prefroot = wordtree_alloc(&spin); 6076 if (spin.si_foldroot == NULL 6077 || spin.si_keeproot == NULL 6078 || spin.si_prefroot == NULL) 6079 { 6080 free_blocks(spin.si_blocks); 6081 goto theend; 6082 } 6083 6084 /* When not producing a .add.spl file clear the character table when 6085 * we encounter one in the .aff file. This means we dump the current 6086 * one in the .spl file if the .aff file doesn't define one. That's 6087 * better than guessing the contents, the table will match a 6088 * previously loaded spell file. */ 6089 if (!spin.si_add) 6090 spin.si_clear_chartab = TRUE; 6091 6092 /* 6093 * Read all the .aff and .dic files. 6094 * Text is converted to 'encoding'. 6095 * Words are stored in the case-folded and keep-case trees. 6096 */ 6097 for (i = 0; i < incount && !error; ++i) 6098 { 6099 spin.si_conv.vc_type = CONV_NONE; 6100 spin.si_region = 1 << i; 6101 6102 vim_snprintf((char *)fname, MAXPATHL, "%s.aff", innames[i]); 6103 if (mch_stat((char *)fname, &st) >= 0) 6104 { 6105 /* Read the .aff file. Will init "spin->si_conv" based on the 6106 * "SET" line. */ 6107 afile[i] = spell_read_aff(&spin, fname); 6108 if (afile[i] == NULL) 6109 error = TRUE; 6110 else 6111 { 6112 /* Read the .dic file and store the words in the trees. */ 6113 vim_snprintf((char *)fname, MAXPATHL, "%s.dic", 6114 innames[i]); 6115 if (spell_read_dic(&spin, fname, afile[i]) == FAIL) 6116 error = TRUE; 6117 } 6118 } 6119 else 6120 { 6121 /* No .aff file, try reading the file as a word list. Store 6122 * the words in the trees. */ 6123 if (spell_read_wordfile(&spin, innames[i]) == FAIL) 6124 error = TRUE; 6125 } 6126 6127 #ifdef FEAT_MBYTE 6128 /* Free any conversion stuff. */ 6129 convert_setup(&spin.si_conv, NULL, NULL); 6130 #endif 6131 } 6132 6133 if (spin.si_compflags != NULL && spin.si_nobreak) 6134 MSG(_("Warning: both compounding and NOBREAK specified")); 6135 6136 if (!error && !got_int) 6137 { 6138 /* 6139 * Combine tails in the tree. 6140 */ 6141 spell_message(&spin, (char_u *)_(msg_compressing)); 6142 wordtree_compress(&spin, spin.si_foldroot); 6143 wordtree_compress(&spin, spin.si_keeproot); 6144 wordtree_compress(&spin, spin.si_prefroot); 6145 } 6146 6147 if (!error && !got_int) 6148 { 6149 /* 6150 * Write the info in the spell file. 6151 */ 6152 vim_snprintf((char *)IObuff, IOSIZE, 6153 _("Writing spell file %s ..."), wfname); 6154 spell_message(&spin, IObuff); 6155 6156 error = write_vim_spell(&spin, wfname) == FAIL; 6157 6158 spell_message(&spin, (char_u *)_("Done!")); 6159 vim_snprintf((char *)IObuff, IOSIZE, 6160 _("Estimated runtime memory use: %d bytes"), spin.si_memtot); 6161 spell_message(&spin, IObuff); 6162 6163 /* 6164 * If the file is loaded need to reload it. 6165 */ 6166 if (!error) 6167 spell_reload_one(wfname, added_word); 6168 } 6169 6170 /* Free the allocated memory. */ 6171 ga_clear(&spin.si_rep); 6172 ga_clear(&spin.si_repsal); 6173 ga_clear(&spin.si_sal); 6174 ga_clear(&spin.si_map); 6175 ga_clear(&spin.si_comppat); 6176 ga_clear(&spin.si_prefcond); 6177 hash_clear_all(&spin.si_commonwords, 0); 6178 6179 /* Free the .aff file structures. */ 6180 for (i = 0; i < incount; ++i) 6181 if (afile[i] != NULL) 6182 spell_free_aff(afile[i]); 6183 6184 /* Free all the bits and pieces at once. */ 6185 free_blocks(spin.si_blocks); 6186 6187 /* 6188 * If there is soundfolding info and no NOSUGFILE item create the 6189 * .sug file with the soundfolded word trie. 6190 */ 6191 if (spin.si_sugtime != 0 && !error && !got_int) 6192 spell_make_sugfile(&spin, wfname); 6193 6194 } 6195 6196 theend: 6197 vim_free(fname); 6198 vim_free(wfname); 6199 } 6200 6201 /* 6202 * Display a message for spell file processing when 'verbose' is set or using 6203 * ":mkspell". "str" can be IObuff. 6204 */ 6205 static void 6206 spell_message(spellinfo_T *spin, char_u *str) 6207 { 6208 if (spin->si_verbose || p_verbose > 2) 6209 { 6210 if (!spin->si_verbose) 6211 verbose_enter(); 6212 MSG(str); 6213 out_flush(); 6214 if (!spin->si_verbose) 6215 verbose_leave(); 6216 } 6217 } 6218 6219 /* 6220 * ":[count]spellgood {word}" 6221 * ":[count]spellwrong {word}" 6222 * ":[count]spellundo {word}" 6223 */ 6224 void 6225 ex_spell(exarg_T *eap) 6226 { 6227 spell_add_word(eap->arg, (int)STRLEN(eap->arg), eap->cmdidx == CMD_spellwrong, 6228 eap->forceit ? 0 : (int)eap->line2, 6229 eap->cmdidx == CMD_spellundo); 6230 } 6231 6232 /* 6233 * Add "word[len]" to 'spellfile' as a good or bad word. 6234 */ 6235 void 6236 spell_add_word( 6237 char_u *word, 6238 int len, 6239 int bad, 6240 int idx, /* "zG" and "zW": zero, otherwise index in 6241 'spellfile' */ 6242 int undo) /* TRUE for "zug", "zuG", "zuw" and "zuW" */ 6243 { 6244 FILE *fd = NULL; 6245 buf_T *buf = NULL; 6246 int new_spf = FALSE; 6247 char_u *fname; 6248 char_u *fnamebuf = NULL; 6249 char_u line[MAXWLEN * 2]; 6250 long fpos, fpos_next = 0; 6251 int i; 6252 char_u *spf; 6253 6254 if (idx == 0) /* use internal wordlist */ 6255 { 6256 if (int_wordlist == NULL) 6257 { 6258 int_wordlist = vim_tempname('s', FALSE); 6259 if (int_wordlist == NULL) 6260 return; 6261 } 6262 fname = int_wordlist; 6263 } 6264 else 6265 { 6266 /* If 'spellfile' isn't set figure out a good default value. */ 6267 if (*curwin->w_s->b_p_spf == NUL) 6268 { 6269 init_spellfile(); 6270 new_spf = TRUE; 6271 } 6272 6273 if (*curwin->w_s->b_p_spf == NUL) 6274 { 6275 EMSG2(_(e_notset), "spellfile"); 6276 return; 6277 } 6278 fnamebuf = alloc(MAXPATHL); 6279 if (fnamebuf == NULL) 6280 return; 6281 6282 for (spf = curwin->w_s->b_p_spf, i = 1; *spf != NUL; ++i) 6283 { 6284 copy_option_part(&spf, fnamebuf, MAXPATHL, ","); 6285 if (i == idx) 6286 break; 6287 if (*spf == NUL) 6288 { 6289 EMSGN(_("E765: 'spellfile' does not have %ld entries"), idx); 6290 vim_free(fnamebuf); 6291 return; 6292 } 6293 } 6294 6295 /* Check that the user isn't editing the .add file somewhere. */ 6296 buf = buflist_findname_exp(fnamebuf); 6297 if (buf != NULL && buf->b_ml.ml_mfp == NULL) 6298 buf = NULL; 6299 if (buf != NULL && bufIsChanged(buf)) 6300 { 6301 EMSG(_(e_bufloaded)); 6302 vim_free(fnamebuf); 6303 return; 6304 } 6305 6306 fname = fnamebuf; 6307 } 6308 6309 if (bad || undo) 6310 { 6311 /* When the word appears as good word we need to remove that one, 6312 * since its flags sort before the one with WF_BANNED. */ 6313 fd = mch_fopen((char *)fname, "r"); 6314 if (fd != NULL) 6315 { 6316 while (!vim_fgets(line, MAXWLEN * 2, fd)) 6317 { 6318 fpos = fpos_next; 6319 fpos_next = ftell(fd); 6320 if (STRNCMP(word, line, len) == 0 6321 && (line[len] == '/' || line[len] < ' ')) 6322 { 6323 /* Found duplicate word. Remove it by writing a '#' at 6324 * the start of the line. Mixing reading and writing 6325 * doesn't work for all systems, close the file first. */ 6326 fclose(fd); 6327 fd = mch_fopen((char *)fname, "r+"); 6328 if (fd == NULL) 6329 break; 6330 if (fseek(fd, fpos, SEEK_SET) == 0) 6331 { 6332 fputc('#', fd); 6333 if (undo) 6334 { 6335 home_replace(NULL, fname, NameBuff, MAXPATHL, TRUE); 6336 smsg((char_u *)_("Word '%.*s' removed from %s"), 6337 len, word, NameBuff); 6338 } 6339 } 6340 fseek(fd, fpos_next, SEEK_SET); 6341 } 6342 } 6343 if (fd != NULL) 6344 fclose(fd); 6345 } 6346 } 6347 6348 if (!undo) 6349 { 6350 fd = mch_fopen((char *)fname, "a"); 6351 if (fd == NULL && new_spf) 6352 { 6353 char_u *p; 6354 6355 /* We just initialized the 'spellfile' option and can't open the 6356 * file. We may need to create the "spell" directory first. We 6357 * already checked the runtime directory is writable in 6358 * init_spellfile(). */ 6359 if (!dir_of_file_exists(fname) && (p = gettail_sep(fname)) != fname) 6360 { 6361 int c = *p; 6362 6363 /* The directory doesn't exist. Try creating it and opening 6364 * the file again. */ 6365 *p = NUL; 6366 vim_mkdir(fname, 0755); 6367 *p = c; 6368 fd = mch_fopen((char *)fname, "a"); 6369 } 6370 } 6371 6372 if (fd == NULL) 6373 EMSG2(_(e_notopen), fname); 6374 else 6375 { 6376 if (bad) 6377 fprintf(fd, "%.*s/!\n", len, word); 6378 else 6379 fprintf(fd, "%.*s\n", len, word); 6380 fclose(fd); 6381 6382 home_replace(NULL, fname, NameBuff, MAXPATHL, TRUE); 6383 smsg((char_u *)_("Word '%.*s' added to %s"), len, word, NameBuff); 6384 } 6385 } 6386 6387 if (fd != NULL) 6388 { 6389 /* Update the .add.spl file. */ 6390 mkspell(1, &fname, FALSE, TRUE, TRUE); 6391 6392 /* If the .add file is edited somewhere, reload it. */ 6393 if (buf != NULL) 6394 buf_reload(buf, buf->b_orig_mode); 6395 6396 redraw_all_later(SOME_VALID); 6397 } 6398 vim_free(fnamebuf); 6399 } 6400 6401 /* 6402 * Initialize 'spellfile' for the current buffer. 6403 */ 6404 static void 6405 init_spellfile(void) 6406 { 6407 char_u *buf; 6408 int l; 6409 char_u *fname; 6410 char_u *rtp; 6411 char_u *lend; 6412 int aspath = FALSE; 6413 char_u *lstart = curbuf->b_s.b_p_spl; 6414 6415 if (*curwin->w_s->b_p_spl != NUL && curwin->w_s->b_langp.ga_len > 0) 6416 { 6417 buf = alloc(MAXPATHL); 6418 if (buf == NULL) 6419 return; 6420 6421 /* Find the end of the language name. Exclude the region. If there 6422 * is a path separator remember the start of the tail. */ 6423 for (lend = curwin->w_s->b_p_spl; *lend != NUL 6424 && vim_strchr((char_u *)",._", *lend) == NULL; ++lend) 6425 if (vim_ispathsep(*lend)) 6426 { 6427 aspath = TRUE; 6428 lstart = lend + 1; 6429 } 6430 6431 /* Loop over all entries in 'runtimepath'. Use the first one where we 6432 * are allowed to write. */ 6433 rtp = p_rtp; 6434 while (*rtp != NUL) 6435 { 6436 if (aspath) 6437 /* Use directory of an entry with path, e.g., for 6438 * "/dir/lg.utf-8.spl" use "/dir". */ 6439 vim_strncpy(buf, curbuf->b_s.b_p_spl, 6440 lstart - curbuf->b_s.b_p_spl - 1); 6441 else 6442 /* Copy the path from 'runtimepath' to buf[]. */ 6443 copy_option_part(&rtp, buf, MAXPATHL, ","); 6444 if (filewritable(buf) == 2) 6445 { 6446 /* Use the first language name from 'spelllang' and the 6447 * encoding used in the first loaded .spl file. */ 6448 if (aspath) 6449 vim_strncpy(buf, curbuf->b_s.b_p_spl, 6450 lend - curbuf->b_s.b_p_spl); 6451 else 6452 { 6453 /* Create the "spell" directory if it doesn't exist yet. */ 6454 l = (int)STRLEN(buf); 6455 vim_snprintf((char *)buf + l, MAXPATHL - l, "/spell"); 6456 if (filewritable(buf) != 2) 6457 vim_mkdir(buf, 0755); 6458 6459 l = (int)STRLEN(buf); 6460 vim_snprintf((char *)buf + l, MAXPATHL - l, 6461 "/%.*s", (int)(lend - lstart), lstart); 6462 } 6463 l = (int)STRLEN(buf); 6464 fname = LANGP_ENTRY(curwin->w_s->b_langp, 0) 6465 ->lp_slang->sl_fname; 6466 vim_snprintf((char *)buf + l, MAXPATHL - l, ".%s.add", 6467 fname != NULL 6468 && strstr((char *)gettail(fname), ".ascii.") != NULL 6469 ? (char_u *)"ascii" : spell_enc()); 6470 set_option_value((char_u *)"spellfile", 0L, buf, OPT_LOCAL); 6471 break; 6472 } 6473 aspath = FALSE; 6474 } 6475 6476 vim_free(buf); 6477 } 6478 } 6479 6480 6481 6482 /* 6483 * Set the spell character tables from strings in the affix file. 6484 */ 6485 static int 6486 set_spell_chartab(char_u *fol, char_u *low, char_u *upp) 6487 { 6488 /* We build the new tables here first, so that we can compare with the 6489 * previous one. */ 6490 spelltab_T new_st; 6491 char_u *pf = fol, *pl = low, *pu = upp; 6492 int f, l, u; 6493 6494 clear_spell_chartab(&new_st); 6495 6496 while (*pf != NUL) 6497 { 6498 if (*pl == NUL || *pu == NUL) 6499 { 6500 EMSG(_(e_affform)); 6501 return FAIL; 6502 } 6503 #ifdef FEAT_MBYTE 6504 f = mb_ptr2char_adv(&pf); 6505 l = mb_ptr2char_adv(&pl); 6506 u = mb_ptr2char_adv(&pu); 6507 #else 6508 f = *pf++; 6509 l = *pl++; 6510 u = *pu++; 6511 #endif 6512 /* Every character that appears is a word character. */ 6513 if (f < 256) 6514 new_st.st_isw[f] = TRUE; 6515 if (l < 256) 6516 new_st.st_isw[l] = TRUE; 6517 if (u < 256) 6518 new_st.st_isw[u] = TRUE; 6519 6520 /* if "LOW" and "FOL" are not the same the "LOW" char needs 6521 * case-folding */ 6522 if (l < 256 && l != f) 6523 { 6524 if (f >= 256) 6525 { 6526 EMSG(_(e_affrange)); 6527 return FAIL; 6528 } 6529 new_st.st_fold[l] = f; 6530 } 6531 6532 /* if "UPP" and "FOL" are not the same the "UPP" char needs 6533 * case-folding, it's upper case and the "UPP" is the upper case of 6534 * "FOL" . */ 6535 if (u < 256 && u != f) 6536 { 6537 if (f >= 256) 6538 { 6539 EMSG(_(e_affrange)); 6540 return FAIL; 6541 } 6542 new_st.st_fold[u] = f; 6543 new_st.st_isu[u] = TRUE; 6544 new_st.st_upper[f] = u; 6545 } 6546 } 6547 6548 if (*pl != NUL || *pu != NUL) 6549 { 6550 EMSG(_(e_affform)); 6551 return FAIL; 6552 } 6553 6554 return set_spell_finish(&new_st); 6555 } 6556 6557 /* 6558 * Set the spell character tables from strings in the .spl file. 6559 */ 6560 static void 6561 set_spell_charflags( 6562 char_u *flags, 6563 int cnt, /* length of "flags" */ 6564 char_u *fol) 6565 { 6566 /* We build the new tables here first, so that we can compare with the 6567 * previous one. */ 6568 spelltab_T new_st; 6569 int i; 6570 char_u *p = fol; 6571 int c; 6572 6573 clear_spell_chartab(&new_st); 6574 6575 for (i = 0; i < 128; ++i) 6576 { 6577 if (i < cnt) 6578 { 6579 new_st.st_isw[i + 128] = (flags[i] & CF_WORD) != 0; 6580 new_st.st_isu[i + 128] = (flags[i] & CF_UPPER) != 0; 6581 } 6582 6583 if (*p != NUL) 6584 { 6585 #ifdef FEAT_MBYTE 6586 c = mb_ptr2char_adv(&p); 6587 #else 6588 c = *p++; 6589 #endif 6590 new_st.st_fold[i + 128] = c; 6591 if (i + 128 != c && new_st.st_isu[i + 128] && c < 256) 6592 new_st.st_upper[c] = i + 128; 6593 } 6594 } 6595 6596 (void)set_spell_finish(&new_st); 6597 } 6598 6599 static int 6600 set_spell_finish(spelltab_T *new_st) 6601 { 6602 int i; 6603 6604 if (did_set_spelltab) 6605 { 6606 /* check that it's the same table */ 6607 for (i = 0; i < 256; ++i) 6608 { 6609 if (spelltab.st_isw[i] != new_st->st_isw[i] 6610 || spelltab.st_isu[i] != new_st->st_isu[i] 6611 || spelltab.st_fold[i] != new_st->st_fold[i] 6612 || spelltab.st_upper[i] != new_st->st_upper[i]) 6613 { 6614 EMSG(_("E763: Word characters differ between spell files")); 6615 return FAIL; 6616 } 6617 } 6618 } 6619 else 6620 { 6621 /* copy the new spelltab into the one being used */ 6622 spelltab = *new_st; 6623 did_set_spelltab = TRUE; 6624 } 6625 6626 return OK; 6627 } 6628 6629 /* 6630 * Write the table with prefix conditions to the .spl file. 6631 * When "fd" is NULL only count the length of what is written. 6632 */ 6633 static int 6634 write_spell_prefcond(FILE *fd, garray_T *gap) 6635 { 6636 int i; 6637 char_u *p; 6638 int len; 6639 int totlen; 6640 size_t x = 1; /* collect return value of fwrite() */ 6641 6642 if (fd != NULL) 6643 put_bytes(fd, (long_u)gap->ga_len, 2); /* <prefcondcnt> */ 6644 6645 totlen = 2 + gap->ga_len; /* length of <prefcondcnt> and <condlen> bytes */ 6646 6647 for (i = 0; i < gap->ga_len; ++i) 6648 { 6649 /* <prefcond> : <condlen> <condstr> */ 6650 p = ((char_u **)gap->ga_data)[i]; 6651 if (p != NULL) 6652 { 6653 len = (int)STRLEN(p); 6654 if (fd != NULL) 6655 { 6656 fputc(len, fd); 6657 x &= fwrite(p, (size_t)len, (size_t)1, fd); 6658 } 6659 totlen += len; 6660 } 6661 else if (fd != NULL) 6662 fputc(0, fd); 6663 } 6664 6665 return totlen; 6666 } 6667 6668 6669 /* 6670 * Use map string "map" for languages "lp". 6671 */ 6672 static void 6673 set_map_str(slang_T *lp, char_u *map) 6674 { 6675 char_u *p; 6676 int headc = 0; 6677 int c; 6678 int i; 6679 6680 if (*map == NUL) 6681 { 6682 lp->sl_has_map = FALSE; 6683 return; 6684 } 6685 lp->sl_has_map = TRUE; 6686 6687 /* Init the array and hash tables empty. */ 6688 for (i = 0; i < 256; ++i) 6689 lp->sl_map_array[i] = 0; 6690 #ifdef FEAT_MBYTE 6691 hash_init(&lp->sl_map_hash); 6692 #endif 6693 6694 /* 6695 * The similar characters are stored separated with slashes: 6696 * "aaa/bbb/ccc/". Fill sl_map_array[c] with the character before c and 6697 * before the same slash. For characters above 255 sl_map_hash is used. 6698 */ 6699 for (p = map; *p != NUL; ) 6700 { 6701 #ifdef FEAT_MBYTE 6702 c = mb_cptr2char_adv(&p); 6703 #else 6704 c = *p++; 6705 #endif 6706 if (c == '/') 6707 headc = 0; 6708 else 6709 { 6710 if (headc == 0) 6711 headc = c; 6712 6713 #ifdef FEAT_MBYTE 6714 /* Characters above 255 don't fit in sl_map_array[], put them in 6715 * the hash table. Each entry is the char, a NUL the headchar and 6716 * a NUL. */ 6717 if (c >= 256) 6718 { 6719 int cl = mb_char2len(c); 6720 int headcl = mb_char2len(headc); 6721 char_u *b; 6722 hash_T hash; 6723 hashitem_T *hi; 6724 6725 b = alloc((unsigned)(cl + headcl + 2)); 6726 if (b == NULL) 6727 return; 6728 mb_char2bytes(c, b); 6729 b[cl] = NUL; 6730 mb_char2bytes(headc, b + cl + 1); 6731 b[cl + 1 + headcl] = NUL; 6732 hash = hash_hash(b); 6733 hi = hash_lookup(&lp->sl_map_hash, b, hash); 6734 if (HASHITEM_EMPTY(hi)) 6735 hash_add_item(&lp->sl_map_hash, hi, b, hash); 6736 else 6737 { 6738 /* This should have been checked when generating the .spl 6739 * file. */ 6740 EMSG(_("E783: duplicate char in MAP entry")); 6741 vim_free(b); 6742 } 6743 } 6744 else 6745 #endif 6746 lp->sl_map_array[c] = headc; 6747 } 6748 } 6749 } 6750 6751 6752 #endif /* FEAT_SPELL */ 6753