1 /* vi:set ts=8 sts=4 sw=4 noet: 2 * 3 * VIM - Vi IMproved by Bram Moolenaar 4 * 5 * Do ":help uganda" in Vim to read copying and usage conditions. 6 * Do ":help credits" in Vim to see a list of people who contributed. 7 * See README.txt for an overview of the Vim source code. 8 */ 9 10 /* 11 * spellfile.c: code for reading and writing spell files. 12 * 13 * See spell.c for information about spell checking. 14 */ 15 16 /* 17 * Vim spell file format: <HEADER> 18 * <SECTIONS> 19 * <LWORDTREE> 20 * <KWORDTREE> 21 * <PREFIXTREE> 22 * 23 * <HEADER>: <fileID> <versionnr> 24 * 25 * <fileID> 8 bytes "VIMspell" 26 * <versionnr> 1 byte VIMSPELLVERSION 27 * 28 * 29 * Sections make it possible to add information to the .spl file without 30 * making it incompatible with previous versions. There are two kinds of 31 * sections: 32 * 1. Not essential for correct spell checking. E.g. for making suggestions. 33 * These are skipped when not supported. 34 * 2. Optional information, but essential for spell checking when present. 35 * E.g. conditions for affixes. When this section is present but not 36 * supported an error message is given. 37 * 38 * <SECTIONS>: <section> ... <sectionend> 39 * 40 * <section>: <sectionID> <sectionflags> <sectionlen> (section contents) 41 * 42 * <sectionID> 1 byte number from 0 to 254 identifying the section 43 * 44 * <sectionflags> 1 byte SNF_REQUIRED: this section is required for correct 45 * spell checking 46 * 47 * <sectionlen> 4 bytes length of section contents, MSB first 48 * 49 * <sectionend> 1 byte SN_END 50 * 51 * 52 * sectionID == SN_INFO: <infotext> 53 * <infotext> N bytes free format text with spell file info (version, 54 * website, etc) 55 * 56 * sectionID == SN_REGION: <regionname> ... 57 * <regionname> 2 bytes Up to MAXREGIONS region names: ca, au, etc. Lower 58 * case. First <regionname> is region 1. 59 * 60 * sectionID == SN_CHARFLAGS: <charflagslen> <charflags> 61 * <folcharslen> <folchars> 62 * <charflagslen> 1 byte Number of bytes in <charflags> (should be 128). 63 * <charflags> N bytes List of flags (first one is for character 128): 64 * 0x01 word character CF_WORD 65 * 0x02 upper-case character CF_UPPER 66 * <folcharslen> 2 bytes Number of bytes in <folchars>. 67 * <folchars> N bytes Folded characters, first one is for character 128. 68 * 69 * sectionID == SN_MIDWORD: <midword> 70 * <midword> N bytes Characters that are word characters only when used 71 * in the middle of a word. 72 * 73 * sectionID == SN_PREFCOND: <prefcondcnt> <prefcond> ... 74 * <prefcondcnt> 2 bytes Number of <prefcond> items following. 75 * <prefcond> : <condlen> <condstr> 76 * <condlen> 1 byte Length of <condstr>. 77 * <condstr> N bytes Condition for the prefix. 78 * 79 * sectionID == SN_REP: <repcount> <rep> ... 80 * <repcount> 2 bytes number of <rep> items, MSB first. 81 * <rep> : <repfromlen> <repfrom> <reptolen> <repto> 82 * <repfromlen> 1 byte length of <repfrom> 83 * <repfrom> N bytes "from" part of replacement 84 * <reptolen> 1 byte length of <repto> 85 * <repto> N bytes "to" part of replacement 86 * 87 * sectionID == SN_REPSAL: <repcount> <rep> ... 88 * just like SN_REP but for soundfolded words 89 * 90 * sectionID == SN_SAL: <salflags> <salcount> <sal> ... 91 * <salflags> 1 byte flags for soundsalike conversion: 92 * SAL_F0LLOWUP 93 * SAL_COLLAPSE 94 * SAL_REM_ACCENTS 95 * <salcount> 2 bytes number of <sal> items following 96 * <sal> : <salfromlen> <salfrom> <saltolen> <salto> 97 * <salfromlen> 1 byte length of <salfrom> 98 * <salfrom> N bytes "from" part of soundsalike 99 * <saltolen> 1 byte length of <salto> 100 * <salto> N bytes "to" part of soundsalike 101 * 102 * sectionID == SN_SOFO: <sofofromlen> <sofofrom> <sofotolen> <sofoto> 103 * <sofofromlen> 2 bytes length of <sofofrom> 104 * <sofofrom> N bytes "from" part of soundfold 105 * <sofotolen> 2 bytes length of <sofoto> 106 * <sofoto> N bytes "to" part of soundfold 107 * 108 * sectionID == SN_SUGFILE: <timestamp> 109 * <timestamp> 8 bytes time in seconds that must match with .sug file 110 * 111 * sectionID == SN_NOSPLITSUGS: nothing 112 * 113 * sectionID == SN_NOCOMPOUNDSUGS: nothing 114 * 115 * sectionID == SN_WORDS: <word> ... 116 * <word> N bytes NUL terminated common word 117 * 118 * sectionID == SN_MAP: <mapstr> 119 * <mapstr> N bytes String with sequences of similar characters, 120 * separated by slashes. 121 * 122 * sectionID == SN_COMPOUND: <compmax> <compminlen> <compsylmax> <compoptions> 123 * <comppatcount> <comppattern> ... <compflags> 124 * <compmax> 1 byte Maximum nr of words in compound word. 125 * <compminlen> 1 byte Minimal word length for compounding. 126 * <compsylmax> 1 byte Maximum nr of syllables in compound word. 127 * <compoptions> 2 bytes COMP_ flags. 128 * <comppatcount> 2 bytes number of <comppattern> following 129 * <compflags> N bytes Flags from COMPOUNDRULE items, separated by 130 * slashes. 131 * 132 * <comppattern>: <comppatlen> <comppattext> 133 * <comppatlen> 1 byte length of <comppattext> 134 * <comppattext> N bytes end or begin chars from CHECKCOMPOUNDPATTERN 135 * 136 * sectionID == SN_NOBREAK: (empty, its presence is what matters) 137 * 138 * sectionID == SN_SYLLABLE: <syllable> 139 * <syllable> N bytes String from SYLLABLE item. 140 * 141 * <LWORDTREE>: <wordtree> 142 * 143 * <KWORDTREE>: <wordtree> 144 * 145 * <PREFIXTREE>: <wordtree> 146 * 147 * 148 * <wordtree>: <nodecount> <nodedata> ... 149 * 150 * <nodecount> 4 bytes Number of nodes following. MSB first. 151 * 152 * <nodedata>: <siblingcount> <sibling> ... 153 * 154 * <siblingcount> 1 byte Number of siblings in this node. The siblings 155 * follow in sorted order. 156 * 157 * <sibling>: <byte> [ <nodeidx> <xbyte> 158 * | <flags> [<flags2>] [<region>] [<affixID>] 159 * | [<pflags>] <affixID> <prefcondnr> ] 160 * 161 * <byte> 1 byte Byte value of the sibling. Special cases: 162 * BY_NOFLAGS: End of word without flags and for all 163 * regions. 164 * For PREFIXTREE <affixID> and 165 * <prefcondnr> follow. 166 * BY_FLAGS: End of word, <flags> follow. 167 * For PREFIXTREE <pflags>, <affixID> 168 * and <prefcondnr> follow. 169 * BY_FLAGS2: End of word, <flags> and <flags2> 170 * follow. Not used in PREFIXTREE. 171 * BY_INDEX: Child of sibling is shared, <nodeidx> 172 * and <xbyte> follow. 173 * 174 * <nodeidx> 3 bytes Index of child for this sibling, MSB first. 175 * 176 * <xbyte> 1 byte byte value of the sibling. 177 * 178 * <flags> 1 byte bitmask of: 179 * WF_ALLCAP word must have only capitals 180 * WF_ONECAP first char of word must be capital 181 * WF_KEEPCAP keep-case word 182 * WF_FIXCAP keep-case word, all caps not allowed 183 * WF_RARE rare word 184 * WF_BANNED bad word 185 * WF_REGION <region> follows 186 * WF_AFX <affixID> follows 187 * 188 * <flags2> 1 byte Bitmask of: 189 * WF_HAS_AFF >> 8 word includes affix 190 * WF_NEEDCOMP >> 8 word only valid in compound 191 * WF_NOSUGGEST >> 8 word not used for suggestions 192 * WF_COMPROOT >> 8 word already a compound 193 * WF_NOCOMPBEF >> 8 no compounding before this word 194 * WF_NOCOMPAFT >> 8 no compounding after this word 195 * 196 * <pflags> 1 byte bitmask of: 197 * WFP_RARE rare prefix 198 * WFP_NC non-combining prefix 199 * WFP_UP letter after prefix made upper case 200 * 201 * <region> 1 byte Bitmask for regions in which word is valid. When 202 * omitted it's valid in all regions. 203 * Lowest bit is for region 1. 204 * 205 * <affixID> 1 byte ID of affix that can be used with this word. In 206 * PREFIXTREE used for the required prefix ID. 207 * 208 * <prefcondnr> 2 bytes Prefix condition number, index in <prefcond> list 209 * from HEADER. 210 * 211 * All text characters are in 'encoding', but stored as single bytes. 212 */ 213 214 /* 215 * Vim .sug file format: <SUGHEADER> 216 * <SUGWORDTREE> 217 * <SUGTABLE> 218 * 219 * <SUGHEADER>: <fileID> <versionnr> <timestamp> 220 * 221 * <fileID> 6 bytes "VIMsug" 222 * <versionnr> 1 byte VIMSUGVERSION 223 * <timestamp> 8 bytes timestamp that must match with .spl file 224 * 225 * 226 * <SUGWORDTREE>: <wordtree> (see above, no flags or region used) 227 * 228 * 229 * <SUGTABLE>: <sugwcount> <sugline> ... 230 * 231 * <sugwcount> 4 bytes number of <sugline> following 232 * 233 * <sugline>: <sugnr> ... NUL 234 * 235 * <sugnr>: X bytes word number that results in this soundfolded word, 236 * stored as an offset to the previous number in as 237 * few bytes as possible, see offset2bytes()) 238 */ 239 240 #include "vim.h" 241 242 #if defined(FEAT_SPELL) || defined(PROTO) 243 244 #ifndef UNIX // it's in os_unix.h for Unix 245 # include <time.h> // for time_t 246 #endif 247 248 #ifndef UNIX // it's in os_unix.h for Unix 249 # include <time.h> // for time_t 250 #endif 251 252 // Special byte values for <byte>. Some are only used in the tree for 253 // postponed prefixes, some only in the other trees. This is a bit messy... 254 #define BY_NOFLAGS 0 // end of word without flags or region; for 255 // postponed prefix: no <pflags> 256 #define BY_INDEX 1 // child is shared, index follows 257 #define BY_FLAGS 2 // end of word, <flags> byte follows; for 258 // postponed prefix: <pflags> follows 259 #define BY_FLAGS2 3 // end of word, <flags> and <flags2> bytes 260 // follow; never used in prefix tree 261 #define BY_SPECIAL BY_FLAGS2 // highest special byte value 262 263 #define ZERO_FLAG 65009 // used when flag is zero: "0" 264 265 // Flags used in .spl file for soundsalike flags. 266 #define SAL_F0LLOWUP 1 267 #define SAL_COLLAPSE 2 268 #define SAL_REM_ACCENTS 4 269 270 #define VIMSPELLMAGIC "VIMspell" // string at start of Vim spell file 271 #define VIMSPELLMAGICL 8 272 #define VIMSPELLVERSION 50 273 274 // Section IDs. Only renumber them when VIMSPELLVERSION changes! 275 #define SN_REGION 0 // <regionname> section 276 #define SN_CHARFLAGS 1 // charflags section 277 #define SN_MIDWORD 2 // <midword> section 278 #define SN_PREFCOND 3 // <prefcond> section 279 #define SN_REP 4 // REP items section 280 #define SN_SAL 5 // SAL items section 281 #define SN_SOFO 6 // soundfolding section 282 #define SN_MAP 7 // MAP items section 283 #define SN_COMPOUND 8 // compound words section 284 #define SN_SYLLABLE 9 // syllable section 285 #define SN_NOBREAK 10 // NOBREAK section 286 #define SN_SUGFILE 11 // timestamp for .sug file 287 #define SN_REPSAL 12 // REPSAL items section 288 #define SN_WORDS 13 // common words 289 #define SN_NOSPLITSUGS 14 // don't split word for suggestions 290 #define SN_INFO 15 // info section 291 #define SN_NOCOMPOUNDSUGS 16 // don't compound for suggestions 292 #define SN_END 255 // end of sections 293 294 #define SNF_REQUIRED 1 // <sectionflags>: required section 295 296 #define CF_WORD 0x01 297 #define CF_UPPER 0x02 298 299 /* 300 * Loop through all the siblings of a node (including the node) 301 */ 302 #define FOR_ALL_NODE_SIBLINGS(node, np) \ 303 for ((np) = (node); (np) != NULL; (np) = (np)->wn_sibling) 304 305 static int set_spell_finish(spelltab_T *new_st); 306 static int write_spell_prefcond(FILE *fd, garray_T *gap); 307 static int read_region_section(FILE *fd, slang_T *slang, int len); 308 static int read_charflags_section(FILE *fd); 309 static int read_prefcond_section(FILE *fd, slang_T *lp); 310 static int read_rep_section(FILE *fd, garray_T *gap, short *first); 311 static int read_sal_section(FILE *fd, slang_T *slang); 312 static int read_words_section(FILE *fd, slang_T *lp, int len); 313 static int read_sofo_section(FILE *fd, slang_T *slang); 314 static int read_compound(FILE *fd, slang_T *slang, int len); 315 static int set_sofo(slang_T *lp, char_u *from, char_u *to); 316 static void set_sal_first(slang_T *lp); 317 static int *mb_str2wide(char_u *s); 318 static int spell_read_tree(FILE *fd, char_u **bytsp, idx_T **idxsp, int prefixtree, int prefixcnt); 319 static idx_T read_tree_node(FILE *fd, char_u *byts, idx_T *idxs, int maxidx, idx_T startidx, int prefixtree, int maxprefcondnr); 320 static void set_spell_charflags(char_u *flags, int cnt, char_u *upp); 321 static int set_spell_chartab(char_u *fol, char_u *low, char_u *upp); 322 static void set_map_str(slang_T *lp, char_u *map); 323 324 325 static char *e_spell_trunc = N_("E758: Truncated spell file"); 326 static char *e_afftrailing = N_("Trailing text in %s line %d: %s"); 327 static char *e_affname = N_("Affix name too long in %s line %d: %s"); 328 static char *e_affform = N_("E761: Format error in affix file FOL, LOW or UPP"); 329 static char *e_affrange = N_("E762: Character in FOL, LOW or UPP is out of range"); 330 static char *msg_compressing = N_("Compressing word tree..."); 331 332 /* 333 * Load one spell file and store the info into a slang_T. 334 * 335 * This is invoked in three ways: 336 * - From spell_load_cb() to load a spell file for the first time. "lang" is 337 * the language name, "old_lp" is NULL. Will allocate an slang_T. 338 * - To reload a spell file that was changed. "lang" is NULL and "old_lp" 339 * points to the existing slang_T. 340 * - Just after writing a .spl file; it's read back to produce the .sug file. 341 * "old_lp" is NULL and "lang" is NULL. Will allocate an slang_T. 342 * 343 * Returns the slang_T the spell file was loaded into. NULL for error. 344 */ 345 slang_T * 346 spell_load_file( 347 char_u *fname, 348 char_u *lang, 349 slang_T *old_lp, 350 int silent) // no error if file doesn't exist 351 { 352 FILE *fd; 353 char_u buf[VIMSPELLMAGICL]; 354 char_u *p; 355 int i; 356 int n; 357 int len; 358 slang_T *lp = NULL; 359 int c = 0; 360 int res; 361 int did_estack_push = FALSE; 362 ESTACK_CHECK_DECLARATION 363 364 fd = mch_fopen((char *)fname, "r"); 365 if (fd == NULL) 366 { 367 if (!silent) 368 semsg(_(e_notopen), fname); 369 else if (p_verbose > 2) 370 { 371 verbose_enter(); 372 smsg((const char *)e_notopen, fname); 373 verbose_leave(); 374 } 375 goto endFAIL; 376 } 377 if (p_verbose > 2) 378 { 379 verbose_enter(); 380 smsg(_("Reading spell file \"%s\""), fname); 381 verbose_leave(); 382 } 383 384 if (old_lp == NULL) 385 { 386 lp = slang_alloc(lang); 387 if (lp == NULL) 388 goto endFAIL; 389 390 // Remember the file name, used to reload the file when it's updated. 391 lp->sl_fname = vim_strsave(fname); 392 if (lp->sl_fname == NULL) 393 goto endFAIL; 394 395 // Check for .add.spl (_add.spl for VMS). 396 lp->sl_add = strstr((char *)gettail(fname), SPL_FNAME_ADD) != NULL; 397 } 398 else 399 lp = old_lp; 400 401 // Set sourcing_name, so that error messages mention the file name. 402 estack_push(ETYPE_SPELL, fname, 0); 403 ESTACK_CHECK_SETUP 404 did_estack_push = TRUE; 405 406 /* 407 * <HEADER>: <fileID> 408 */ 409 for (i = 0; i < VIMSPELLMAGICL; ++i) 410 buf[i] = getc(fd); // <fileID> 411 if (STRNCMP(buf, VIMSPELLMAGIC, VIMSPELLMAGICL) != 0) 412 { 413 emsg(_("E757: This does not look like a spell file")); 414 goto endFAIL; 415 } 416 c = getc(fd); // <versionnr> 417 if (c < VIMSPELLVERSION) 418 { 419 emsg(_("E771: Old spell file, needs to be updated")); 420 goto endFAIL; 421 } 422 else if (c > VIMSPELLVERSION) 423 { 424 emsg(_("E772: Spell file is for newer version of Vim")); 425 goto endFAIL; 426 } 427 428 429 /* 430 * <SECTIONS>: <section> ... <sectionend> 431 * <section>: <sectionID> <sectionflags> <sectionlen> (section contents) 432 */ 433 for (;;) 434 { 435 n = getc(fd); // <sectionID> or <sectionend> 436 if (n == SN_END) 437 break; 438 c = getc(fd); // <sectionflags> 439 len = get4c(fd); // <sectionlen> 440 if (len < 0) 441 goto truncerr; 442 443 res = 0; 444 switch (n) 445 { 446 case SN_INFO: 447 lp->sl_info = read_string(fd, len); // <infotext> 448 if (lp->sl_info == NULL) 449 goto endFAIL; 450 break; 451 452 case SN_REGION: 453 res = read_region_section(fd, lp, len); 454 break; 455 456 case SN_CHARFLAGS: 457 res = read_charflags_section(fd); 458 break; 459 460 case SN_MIDWORD: 461 lp->sl_midword = read_string(fd, len); // <midword> 462 if (lp->sl_midword == NULL) 463 goto endFAIL; 464 break; 465 466 case SN_PREFCOND: 467 res = read_prefcond_section(fd, lp); 468 break; 469 470 case SN_REP: 471 res = read_rep_section(fd, &lp->sl_rep, lp->sl_rep_first); 472 break; 473 474 case SN_REPSAL: 475 res = read_rep_section(fd, &lp->sl_repsal, lp->sl_repsal_first); 476 break; 477 478 case SN_SAL: 479 res = read_sal_section(fd, lp); 480 break; 481 482 case SN_SOFO: 483 res = read_sofo_section(fd, lp); 484 break; 485 486 case SN_MAP: 487 p = read_string(fd, len); // <mapstr> 488 if (p == NULL) 489 goto endFAIL; 490 set_map_str(lp, p); 491 vim_free(p); 492 break; 493 494 case SN_WORDS: 495 res = read_words_section(fd, lp, len); 496 break; 497 498 case SN_SUGFILE: 499 lp->sl_sugtime = get8ctime(fd); // <timestamp> 500 break; 501 502 case SN_NOSPLITSUGS: 503 lp->sl_nosplitsugs = TRUE; 504 break; 505 506 case SN_NOCOMPOUNDSUGS: 507 lp->sl_nocompoundsugs = TRUE; 508 break; 509 510 case SN_COMPOUND: 511 res = read_compound(fd, lp, len); 512 break; 513 514 case SN_NOBREAK: 515 lp->sl_nobreak = TRUE; 516 break; 517 518 case SN_SYLLABLE: 519 lp->sl_syllable = read_string(fd, len); // <syllable> 520 if (lp->sl_syllable == NULL) 521 goto endFAIL; 522 if (init_syl_tab(lp) == FAIL) 523 goto endFAIL; 524 break; 525 526 default: 527 // Unsupported section. When it's required give an error 528 // message. When it's not required skip the contents. 529 if (c & SNF_REQUIRED) 530 { 531 emsg(_("E770: Unsupported section in spell file")); 532 goto endFAIL; 533 } 534 while (--len >= 0) 535 if (getc(fd) < 0) 536 goto truncerr; 537 break; 538 } 539 someerror: 540 if (res == SP_FORMERROR) 541 { 542 emsg(_(e_format)); 543 goto endFAIL; 544 } 545 if (res == SP_TRUNCERROR) 546 { 547 truncerr: 548 emsg(_(e_spell_trunc)); 549 goto endFAIL; 550 } 551 if (res == SP_OTHERERROR) 552 goto endFAIL; 553 } 554 555 // <LWORDTREE> 556 res = spell_read_tree(fd, &lp->sl_fbyts, &lp->sl_fidxs, FALSE, 0); 557 if (res != 0) 558 goto someerror; 559 560 // <KWORDTREE> 561 res = spell_read_tree(fd, &lp->sl_kbyts, &lp->sl_kidxs, FALSE, 0); 562 if (res != 0) 563 goto someerror; 564 565 // <PREFIXTREE> 566 res = spell_read_tree(fd, &lp->sl_pbyts, &lp->sl_pidxs, TRUE, 567 lp->sl_prefixcnt); 568 if (res != 0) 569 goto someerror; 570 571 // For a new file link it in the list of spell files. 572 if (old_lp == NULL && lang != NULL) 573 { 574 lp->sl_next = first_lang; 575 first_lang = lp; 576 } 577 578 goto endOK; 579 580 endFAIL: 581 if (lang != NULL) 582 // truncating the name signals the error to spell_load_lang() 583 *lang = NUL; 584 if (lp != NULL && old_lp == NULL) 585 slang_free(lp); 586 lp = NULL; 587 588 endOK: 589 if (fd != NULL) 590 fclose(fd); 591 if (did_estack_push) 592 { 593 ESTACK_CHECK_NOW 594 estack_pop(); 595 } 596 597 return lp; 598 } 599 600 /* 601 * Fill in the wordcount fields for a trie. 602 * Returns the total number of words. 603 */ 604 static void 605 tree_count_words(char_u *byts, idx_T *idxs) 606 { 607 int depth; 608 idx_T arridx[MAXWLEN]; 609 int curi[MAXWLEN]; 610 int c; 611 idx_T n; 612 int wordcount[MAXWLEN]; 613 614 arridx[0] = 0; 615 curi[0] = 1; 616 wordcount[0] = 0; 617 depth = 0; 618 while (depth >= 0 && !got_int) 619 { 620 if (curi[depth] > byts[arridx[depth]]) 621 { 622 // Done all bytes at this node, go up one level. 623 idxs[arridx[depth]] = wordcount[depth]; 624 if (depth > 0) 625 wordcount[depth - 1] += wordcount[depth]; 626 627 --depth; 628 fast_breakcheck(); 629 } 630 else 631 { 632 // Do one more byte at this node. 633 n = arridx[depth] + curi[depth]; 634 ++curi[depth]; 635 636 c = byts[n]; 637 if (c == 0) 638 { 639 // End of word, count it. 640 ++wordcount[depth]; 641 642 // Skip over any other NUL bytes (same word with different 643 // flags). 644 while (byts[n + 1] == 0) 645 { 646 ++n; 647 ++curi[depth]; 648 } 649 } 650 else 651 { 652 // Normal char, go one level deeper to count the words. 653 ++depth; 654 arridx[depth] = idxs[n]; 655 curi[depth] = 1; 656 wordcount[depth] = 0; 657 } 658 } 659 } 660 } 661 662 /* 663 * Load the .sug files for languages that have one and weren't loaded yet. 664 */ 665 void 666 suggest_load_files(void) 667 { 668 langp_T *lp; 669 int lpi; 670 slang_T *slang; 671 char_u *dotp; 672 FILE *fd; 673 char_u buf[MAXWLEN]; 674 int i; 675 time_t timestamp; 676 int wcount; 677 int wordnr; 678 garray_T ga; 679 int c; 680 681 // Do this for all languages that support sound folding. 682 for (lpi = 0; lpi < curwin->w_s->b_langp.ga_len; ++lpi) 683 { 684 lp = LANGP_ENTRY(curwin->w_s->b_langp, lpi); 685 slang = lp->lp_slang; 686 if (slang->sl_sugtime != 0 && !slang->sl_sugloaded) 687 { 688 // Change ".spl" to ".sug" and open the file. When the file isn't 689 // found silently skip it. Do set "sl_sugloaded" so that we 690 // don't try again and again. 691 slang->sl_sugloaded = TRUE; 692 693 dotp = vim_strrchr(slang->sl_fname, '.'); 694 if (dotp == NULL || fnamecmp(dotp, ".spl") != 0) 695 continue; 696 STRCPY(dotp, ".sug"); 697 fd = mch_fopen((char *)slang->sl_fname, "r"); 698 if (fd == NULL) 699 goto nextone; 700 701 /* 702 * <SUGHEADER>: <fileID> <versionnr> <timestamp> 703 */ 704 for (i = 0; i < VIMSUGMAGICL; ++i) 705 buf[i] = getc(fd); // <fileID> 706 if (STRNCMP(buf, VIMSUGMAGIC, VIMSUGMAGICL) != 0) 707 { 708 semsg(_("E778: This does not look like a .sug file: %s"), 709 slang->sl_fname); 710 goto nextone; 711 } 712 c = getc(fd); // <versionnr> 713 if (c < VIMSUGVERSION) 714 { 715 semsg(_("E779: Old .sug file, needs to be updated: %s"), 716 slang->sl_fname); 717 goto nextone; 718 } 719 else if (c > VIMSUGVERSION) 720 { 721 semsg(_("E780: .sug file is for newer version of Vim: %s"), 722 slang->sl_fname); 723 goto nextone; 724 } 725 726 // Check the timestamp, it must be exactly the same as the one in 727 // the .spl file. Otherwise the word numbers won't match. 728 timestamp = get8ctime(fd); // <timestamp> 729 if (timestamp != slang->sl_sugtime) 730 { 731 semsg(_("E781: .sug file doesn't match .spl file: %s"), 732 slang->sl_fname); 733 goto nextone; 734 } 735 736 /* 737 * <SUGWORDTREE>: <wordtree> 738 * Read the trie with the soundfolded words. 739 */ 740 if (spell_read_tree(fd, &slang->sl_sbyts, &slang->sl_sidxs, 741 FALSE, 0) != 0) 742 { 743 someerror: 744 semsg(_("E782: error while reading .sug file: %s"), 745 slang->sl_fname); 746 slang_clear_sug(slang); 747 goto nextone; 748 } 749 750 /* 751 * <SUGTABLE>: <sugwcount> <sugline> ... 752 * 753 * Read the table with word numbers. We use a file buffer for 754 * this, because it's so much like a file with lines. Makes it 755 * possible to swap the info and save on memory use. 756 */ 757 slang->sl_sugbuf = open_spellbuf(); 758 if (slang->sl_sugbuf == NULL) 759 goto someerror; 760 // <sugwcount> 761 wcount = get4c(fd); 762 if (wcount < 0) 763 goto someerror; 764 765 // Read all the wordnr lists into the buffer, one NUL terminated 766 // list per line. 767 ga_init2(&ga, 1, 100); 768 for (wordnr = 0; wordnr < wcount; ++wordnr) 769 { 770 ga.ga_len = 0; 771 for (;;) 772 { 773 c = getc(fd); // <sugline> 774 if (c < 0 || ga_grow(&ga, 1) == FAIL) 775 goto someerror; 776 ((char_u *)ga.ga_data)[ga.ga_len++] = c; 777 if (c == NUL) 778 break; 779 } 780 if (ml_append_buf(slang->sl_sugbuf, (linenr_T)wordnr, 781 ga.ga_data, ga.ga_len, TRUE) == FAIL) 782 goto someerror; 783 } 784 ga_clear(&ga); 785 786 /* 787 * Need to put word counts in the word tries, so that we can find 788 * a word by its number. 789 */ 790 tree_count_words(slang->sl_fbyts, slang->sl_fidxs); 791 tree_count_words(slang->sl_sbyts, slang->sl_sidxs); 792 793 nextone: 794 if (fd != NULL) 795 fclose(fd); 796 STRCPY(dotp, ".spl"); 797 } 798 } 799 } 800 801 802 /* 803 * Read a length field from "fd" in "cnt_bytes" bytes. 804 * Allocate memory, read the string into it and add a NUL at the end. 805 * Returns NULL when the count is zero. 806 * Sets "*cntp" to SP_*ERROR when there is an error, length of the result 807 * otherwise. 808 */ 809 static char_u * 810 read_cnt_string(FILE *fd, int cnt_bytes, int *cntp) 811 { 812 int cnt = 0; 813 int i; 814 char_u *str; 815 816 // read the length bytes, MSB first 817 for (i = 0; i < cnt_bytes; ++i) 818 cnt = (cnt << 8) + getc(fd); 819 if (cnt < 0) 820 { 821 *cntp = SP_TRUNCERROR; 822 return NULL; 823 } 824 *cntp = cnt; 825 if (cnt == 0) 826 return NULL; // nothing to read, return NULL 827 828 str = read_string(fd, cnt); 829 if (str == NULL) 830 *cntp = SP_OTHERERROR; 831 return str; 832 } 833 834 /* 835 * Read SN_REGION: <regionname> ... 836 * Return SP_*ERROR flags. 837 */ 838 static int 839 read_region_section(FILE *fd, slang_T *lp, int len) 840 { 841 int i; 842 843 if (len > MAXREGIONS * 2) 844 return SP_FORMERROR; 845 for (i = 0; i < len; ++i) 846 lp->sl_regions[i] = getc(fd); // <regionname> 847 lp->sl_regions[len] = NUL; 848 return 0; 849 } 850 851 /* 852 * Read SN_CHARFLAGS section: <charflagslen> <charflags> 853 * <folcharslen> <folchars> 854 * Return SP_*ERROR flags. 855 */ 856 static int 857 read_charflags_section(FILE *fd) 858 { 859 char_u *flags; 860 char_u *fol; 861 int flagslen, follen; 862 863 // <charflagslen> <charflags> 864 flags = read_cnt_string(fd, 1, &flagslen); 865 if (flagslen < 0) 866 return flagslen; 867 868 // <folcharslen> <folchars> 869 fol = read_cnt_string(fd, 2, &follen); 870 if (follen < 0) 871 { 872 vim_free(flags); 873 return follen; 874 } 875 876 // Set the word-char flags and fill SPELL_ISUPPER() table. 877 if (flags != NULL && fol != NULL) 878 set_spell_charflags(flags, flagslen, fol); 879 880 vim_free(flags); 881 vim_free(fol); 882 883 // When <charflagslen> is zero then <fcharlen> must also be zero. 884 if ((flags == NULL) != (fol == NULL)) 885 return SP_FORMERROR; 886 return 0; 887 } 888 889 /* 890 * Read SN_PREFCOND section. 891 * Return SP_*ERROR flags. 892 */ 893 static int 894 read_prefcond_section(FILE *fd, slang_T *lp) 895 { 896 int cnt; 897 int i; 898 int n; 899 char_u *p; 900 char_u buf[MAXWLEN + 1]; 901 902 // <prefcondcnt> <prefcond> ... 903 cnt = get2c(fd); // <prefcondcnt> 904 if (cnt <= 0) 905 return SP_FORMERROR; 906 907 lp->sl_prefprog = ALLOC_CLEAR_MULT(regprog_T *, cnt); 908 if (lp->sl_prefprog == NULL) 909 return SP_OTHERERROR; 910 lp->sl_prefixcnt = cnt; 911 912 for (i = 0; i < cnt; ++i) 913 { 914 // <prefcond> : <condlen> <condstr> 915 n = getc(fd); // <condlen> 916 if (n < 0 || n >= MAXWLEN) 917 return SP_FORMERROR; 918 919 // When <condlen> is zero we have an empty condition. Otherwise 920 // compile the regexp program used to check for the condition. 921 if (n > 0) 922 { 923 buf[0] = '^'; // always match at one position only 924 p = buf + 1; 925 while (n-- > 0) 926 *p++ = getc(fd); // <condstr> 927 *p = NUL; 928 lp->sl_prefprog[i] = vim_regcomp(buf, RE_MAGIC + RE_STRING); 929 } 930 } 931 return 0; 932 } 933 934 /* 935 * Read REP or REPSAL items section from "fd": <repcount> <rep> ... 936 * Return SP_*ERROR flags. 937 */ 938 static int 939 read_rep_section(FILE *fd, garray_T *gap, short *first) 940 { 941 int cnt; 942 fromto_T *ftp; 943 int i; 944 945 cnt = get2c(fd); // <repcount> 946 if (cnt < 0) 947 return SP_TRUNCERROR; 948 949 if (ga_grow(gap, cnt) == FAIL) 950 return SP_OTHERERROR; 951 952 // <rep> : <repfromlen> <repfrom> <reptolen> <repto> 953 for (; gap->ga_len < cnt; ++gap->ga_len) 954 { 955 ftp = &((fromto_T *)gap->ga_data)[gap->ga_len]; 956 ftp->ft_from = read_cnt_string(fd, 1, &i); 957 if (i < 0) 958 return i; 959 if (i == 0) 960 return SP_FORMERROR; 961 ftp->ft_to = read_cnt_string(fd, 1, &i); 962 if (i <= 0) 963 { 964 vim_free(ftp->ft_from); 965 if (i < 0) 966 return i; 967 return SP_FORMERROR; 968 } 969 } 970 971 // Fill the first-index table. 972 for (i = 0; i < 256; ++i) 973 first[i] = -1; 974 for (i = 0; i < gap->ga_len; ++i) 975 { 976 ftp = &((fromto_T *)gap->ga_data)[i]; 977 if (first[*ftp->ft_from] == -1) 978 first[*ftp->ft_from] = i; 979 } 980 return 0; 981 } 982 983 /* 984 * Read SN_SAL section: <salflags> <salcount> <sal> ... 985 * Return SP_*ERROR flags. 986 */ 987 static int 988 read_sal_section(FILE *fd, slang_T *slang) 989 { 990 int i; 991 int cnt; 992 garray_T *gap; 993 salitem_T *smp; 994 int ccnt; 995 char_u *p; 996 997 slang->sl_sofo = FALSE; 998 999 i = getc(fd); // <salflags> 1000 if (i & SAL_F0LLOWUP) 1001 slang->sl_followup = TRUE; 1002 if (i & SAL_COLLAPSE) 1003 slang->sl_collapse = TRUE; 1004 if (i & SAL_REM_ACCENTS) 1005 slang->sl_rem_accents = TRUE; 1006 1007 cnt = get2c(fd); // <salcount> 1008 if (cnt < 0) 1009 return SP_TRUNCERROR; 1010 1011 gap = &slang->sl_sal; 1012 ga_init2(gap, sizeof(salitem_T), 10); 1013 if (ga_grow(gap, cnt + 1) == FAIL) 1014 return SP_OTHERERROR; 1015 1016 // <sal> : <salfromlen> <salfrom> <saltolen> <salto> 1017 for (; gap->ga_len < cnt; ++gap->ga_len) 1018 { 1019 int c = NUL; 1020 1021 smp = &((salitem_T *)gap->ga_data)[gap->ga_len]; 1022 ccnt = getc(fd); // <salfromlen> 1023 if (ccnt < 0) 1024 return SP_TRUNCERROR; 1025 if ((p = alloc(ccnt + 2)) == NULL) 1026 return SP_OTHERERROR; 1027 smp->sm_lead = p; 1028 1029 // Read up to the first special char into sm_lead. 1030 for (i = 0; i < ccnt; ++i) 1031 { 1032 c = getc(fd); // <salfrom> 1033 if (vim_strchr((char_u *)"0123456789(-<^$", c) != NULL) 1034 break; 1035 *p++ = c; 1036 } 1037 smp->sm_leadlen = (int)(p - smp->sm_lead); 1038 *p++ = NUL; 1039 1040 // Put (abc) chars in sm_oneof, if any. 1041 if (c == '(') 1042 { 1043 smp->sm_oneof = p; 1044 for (++i; i < ccnt; ++i) 1045 { 1046 c = getc(fd); // <salfrom> 1047 if (c == ')') 1048 break; 1049 *p++ = c; 1050 } 1051 *p++ = NUL; 1052 if (++i < ccnt) 1053 c = getc(fd); 1054 } 1055 else 1056 smp->sm_oneof = NULL; 1057 1058 // Any following chars go in sm_rules. 1059 smp->sm_rules = p; 1060 if (i < ccnt) 1061 // store the char we got while checking for end of sm_lead 1062 *p++ = c; 1063 for (++i; i < ccnt; ++i) 1064 *p++ = getc(fd); // <salfrom> 1065 *p++ = NUL; 1066 1067 // <saltolen> <salto> 1068 smp->sm_to = read_cnt_string(fd, 1, &ccnt); 1069 if (ccnt < 0) 1070 { 1071 vim_free(smp->sm_lead); 1072 return ccnt; 1073 } 1074 1075 if (has_mbyte) 1076 { 1077 // convert the multi-byte strings to wide char strings 1078 smp->sm_lead_w = mb_str2wide(smp->sm_lead); 1079 smp->sm_leadlen = mb_charlen(smp->sm_lead); 1080 if (smp->sm_oneof == NULL) 1081 smp->sm_oneof_w = NULL; 1082 else 1083 smp->sm_oneof_w = mb_str2wide(smp->sm_oneof); 1084 if (smp->sm_to == NULL) 1085 smp->sm_to_w = NULL; 1086 else 1087 smp->sm_to_w = mb_str2wide(smp->sm_to); 1088 if (smp->sm_lead_w == NULL 1089 || (smp->sm_oneof_w == NULL && smp->sm_oneof != NULL) 1090 || (smp->sm_to_w == NULL && smp->sm_to != NULL)) 1091 { 1092 vim_free(smp->sm_lead); 1093 vim_free(smp->sm_to); 1094 vim_free(smp->sm_lead_w); 1095 vim_free(smp->sm_oneof_w); 1096 vim_free(smp->sm_to_w); 1097 return SP_OTHERERROR; 1098 } 1099 } 1100 } 1101 1102 if (gap->ga_len > 0) 1103 { 1104 // Add one extra entry to mark the end with an empty sm_lead. Avoids 1105 // that we need to check the index every time. 1106 smp = &((salitem_T *)gap->ga_data)[gap->ga_len]; 1107 if ((p = alloc(1)) == NULL) 1108 return SP_OTHERERROR; 1109 p[0] = NUL; 1110 smp->sm_lead = p; 1111 smp->sm_leadlen = 0; 1112 smp->sm_oneof = NULL; 1113 smp->sm_rules = p; 1114 smp->sm_to = NULL; 1115 if (has_mbyte) 1116 { 1117 smp->sm_lead_w = mb_str2wide(smp->sm_lead); 1118 smp->sm_leadlen = 0; 1119 smp->sm_oneof_w = NULL; 1120 smp->sm_to_w = NULL; 1121 } 1122 ++gap->ga_len; 1123 } 1124 1125 // Fill the first-index table. 1126 set_sal_first(slang); 1127 1128 return 0; 1129 } 1130 1131 /* 1132 * Read SN_WORDS: <word> ... 1133 * Return SP_*ERROR flags. 1134 */ 1135 static int 1136 read_words_section(FILE *fd, slang_T *lp, int len) 1137 { 1138 int done = 0; 1139 int i; 1140 int c; 1141 char_u word[MAXWLEN]; 1142 1143 while (done < len) 1144 { 1145 // Read one word at a time. 1146 for (i = 0; ; ++i) 1147 { 1148 c = getc(fd); 1149 if (c == EOF) 1150 return SP_TRUNCERROR; 1151 word[i] = c; 1152 if (word[i] == NUL) 1153 break; 1154 if (i == MAXWLEN - 1) 1155 return SP_FORMERROR; 1156 } 1157 1158 // Init the count to 10. 1159 count_common_word(lp, word, -1, 10); 1160 done += i + 1; 1161 } 1162 return 0; 1163 } 1164 1165 /* 1166 * SN_SOFO: <sofofromlen> <sofofrom> <sofotolen> <sofoto> 1167 * Return SP_*ERROR flags. 1168 */ 1169 static int 1170 read_sofo_section(FILE *fd, slang_T *slang) 1171 { 1172 int cnt; 1173 char_u *from, *to; 1174 int res; 1175 1176 slang->sl_sofo = TRUE; 1177 1178 // <sofofromlen> <sofofrom> 1179 from = read_cnt_string(fd, 2, &cnt); 1180 if (cnt < 0) 1181 return cnt; 1182 1183 // <sofotolen> <sofoto> 1184 to = read_cnt_string(fd, 2, &cnt); 1185 if (cnt < 0) 1186 { 1187 vim_free(from); 1188 return cnt; 1189 } 1190 1191 // Store the info in slang->sl_sal and/or slang->sl_sal_first. 1192 if (from != NULL && to != NULL) 1193 res = set_sofo(slang, from, to); 1194 else if (from != NULL || to != NULL) 1195 res = SP_FORMERROR; // only one of two strings is an error 1196 else 1197 res = 0; 1198 1199 vim_free(from); 1200 vim_free(to); 1201 return res; 1202 } 1203 1204 /* 1205 * Read the compound section from the .spl file: 1206 * <compmax> <compminlen> <compsylmax> <compoptions> <compflags> 1207 * Returns SP_*ERROR flags. 1208 */ 1209 static int 1210 read_compound(FILE *fd, slang_T *slang, int len) 1211 { 1212 int todo = len; 1213 int c; 1214 int atstart; 1215 char_u *pat; 1216 char_u *pp; 1217 char_u *cp; 1218 char_u *ap; 1219 char_u *crp; 1220 int cnt; 1221 garray_T *gap; 1222 1223 if (todo < 2) 1224 return SP_FORMERROR; // need at least two bytes 1225 1226 --todo; 1227 c = getc(fd); // <compmax> 1228 if (c < 2) 1229 c = MAXWLEN; 1230 slang->sl_compmax = c; 1231 1232 --todo; 1233 c = getc(fd); // <compminlen> 1234 if (c < 1) 1235 c = 0; 1236 slang->sl_compminlen = c; 1237 1238 --todo; 1239 c = getc(fd); // <compsylmax> 1240 if (c < 1) 1241 c = MAXWLEN; 1242 slang->sl_compsylmax = c; 1243 1244 c = getc(fd); // <compoptions> 1245 if (c != 0) 1246 ungetc(c, fd); // be backwards compatible with Vim 7.0b 1247 else 1248 { 1249 --todo; 1250 c = getc(fd); // only use the lower byte for now 1251 --todo; 1252 slang->sl_compoptions = c; 1253 1254 gap = &slang->sl_comppat; 1255 c = get2c(fd); // <comppatcount> 1256 todo -= 2; 1257 ga_init2(gap, sizeof(char_u *), c); 1258 if (ga_grow(gap, c) == OK) 1259 while (--c >= 0) 1260 { 1261 ((char_u **)(gap->ga_data))[gap->ga_len++] = 1262 read_cnt_string(fd, 1, &cnt); 1263 // <comppatlen> <comppattext> 1264 if (cnt < 0) 1265 return cnt; 1266 todo -= cnt + 1; 1267 } 1268 } 1269 if (todo < 0) 1270 return SP_FORMERROR; 1271 1272 // Turn the COMPOUNDRULE items into a regexp pattern: 1273 // "a[bc]/a*b+" -> "^\(a[bc]\|a*b\+\)$". 1274 // Inserting backslashes may double the length, "^\(\)$<Nul>" is 7 bytes. 1275 // Conversion to utf-8 may double the size. 1276 c = todo * 2 + 7; 1277 if (enc_utf8) 1278 c += todo * 2; 1279 pat = alloc(c); 1280 if (pat == NULL) 1281 return SP_OTHERERROR; 1282 1283 // We also need a list of all flags that can appear at the start and one 1284 // for all flags. 1285 cp = alloc(todo + 1); 1286 if (cp == NULL) 1287 { 1288 vim_free(pat); 1289 return SP_OTHERERROR; 1290 } 1291 slang->sl_compstartflags = cp; 1292 *cp = NUL; 1293 1294 ap = alloc(todo + 1); 1295 if (ap == NULL) 1296 { 1297 vim_free(pat); 1298 return SP_OTHERERROR; 1299 } 1300 slang->sl_compallflags = ap; 1301 *ap = NUL; 1302 1303 // And a list of all patterns in their original form, for checking whether 1304 // compounding may work in match_compoundrule(). This is freed when we 1305 // encounter a wildcard, the check doesn't work then. 1306 crp = alloc(todo + 1); 1307 slang->sl_comprules = crp; 1308 1309 pp = pat; 1310 *pp++ = '^'; 1311 *pp++ = '\\'; 1312 *pp++ = '('; 1313 1314 atstart = 1; 1315 while (todo-- > 0) 1316 { 1317 c = getc(fd); // <compflags> 1318 if (c == EOF) 1319 { 1320 vim_free(pat); 1321 return SP_TRUNCERROR; 1322 } 1323 1324 // Add all flags to "sl_compallflags". 1325 if (vim_strchr((char_u *)"?*+[]/", c) == NULL 1326 && !byte_in_str(slang->sl_compallflags, c)) 1327 { 1328 *ap++ = c; 1329 *ap = NUL; 1330 } 1331 1332 if (atstart != 0) 1333 { 1334 // At start of item: copy flags to "sl_compstartflags". For a 1335 // [abc] item set "atstart" to 2 and copy up to the ']'. 1336 if (c == '[') 1337 atstart = 2; 1338 else if (c == ']') 1339 atstart = 0; 1340 else 1341 { 1342 if (!byte_in_str(slang->sl_compstartflags, c)) 1343 { 1344 *cp++ = c; 1345 *cp = NUL; 1346 } 1347 if (atstart == 1) 1348 atstart = 0; 1349 } 1350 } 1351 1352 // Copy flag to "sl_comprules", unless we run into a wildcard. 1353 if (crp != NULL) 1354 { 1355 if (c == '?' || c == '+' || c == '*') 1356 { 1357 VIM_CLEAR(slang->sl_comprules); 1358 crp = NULL; 1359 } 1360 else 1361 *crp++ = c; 1362 } 1363 1364 if (c == '/') // slash separates two items 1365 { 1366 *pp++ = '\\'; 1367 *pp++ = '|'; 1368 atstart = 1; 1369 } 1370 else // normal char, "[abc]" and '*' are copied as-is 1371 { 1372 if (c == '?' || c == '+' || c == '~') 1373 *pp++ = '\\'; // "a?" becomes "a\?", "a+" becomes "a\+" 1374 if (enc_utf8) 1375 pp += mb_char2bytes(c, pp); 1376 else 1377 *pp++ = c; 1378 } 1379 } 1380 1381 *pp++ = '\\'; 1382 *pp++ = ')'; 1383 *pp++ = '$'; 1384 *pp = NUL; 1385 1386 if (crp != NULL) 1387 *crp = NUL; 1388 1389 slang->sl_compprog = vim_regcomp(pat, RE_MAGIC + RE_STRING + RE_STRICT); 1390 vim_free(pat); 1391 if (slang->sl_compprog == NULL) 1392 return SP_FORMERROR; 1393 1394 return 0; 1395 } 1396 1397 /* 1398 * Set the SOFOFROM and SOFOTO items in language "lp". 1399 * Returns SP_*ERROR flags when there is something wrong. 1400 */ 1401 static int 1402 set_sofo(slang_T *lp, char_u *from, char_u *to) 1403 { 1404 int i; 1405 1406 garray_T *gap; 1407 char_u *s; 1408 char_u *p; 1409 int c; 1410 int *inp; 1411 1412 if (has_mbyte) 1413 { 1414 // Use "sl_sal" as an array with 256 pointers to a list of wide 1415 // characters. The index is the low byte of the character. 1416 // The list contains from-to pairs with a terminating NUL. 1417 // sl_sal_first[] is used for latin1 "from" characters. 1418 gap = &lp->sl_sal; 1419 ga_init2(gap, sizeof(int *), 1); 1420 if (ga_grow(gap, 256) == FAIL) 1421 return SP_OTHERERROR; 1422 vim_memset(gap->ga_data, 0, sizeof(int *) * 256); 1423 gap->ga_len = 256; 1424 1425 // First count the number of items for each list. Temporarily use 1426 // sl_sal_first[] for this. 1427 for (p = from, s = to; *p != NUL && *s != NUL; ) 1428 { 1429 c = mb_cptr2char_adv(&p); 1430 MB_CPTR_ADV(s); 1431 if (c >= 256) 1432 ++lp->sl_sal_first[c & 0xff]; 1433 } 1434 if (*p != NUL || *s != NUL) // lengths differ 1435 return SP_FORMERROR; 1436 1437 // Allocate the lists. 1438 for (i = 0; i < 256; ++i) 1439 if (lp->sl_sal_first[i] > 0) 1440 { 1441 p = alloc(sizeof(int) * (lp->sl_sal_first[i] * 2 + 1)); 1442 if (p == NULL) 1443 return SP_OTHERERROR; 1444 ((int **)gap->ga_data)[i] = (int *)p; 1445 *(int *)p = 0; 1446 } 1447 1448 // Put the characters up to 255 in sl_sal_first[] the rest in a sl_sal 1449 // list. 1450 vim_memset(lp->sl_sal_first, 0, sizeof(salfirst_T) * 256); 1451 for (p = from, s = to; *p != NUL && *s != NUL; ) 1452 { 1453 c = mb_cptr2char_adv(&p); 1454 i = mb_cptr2char_adv(&s); 1455 if (c >= 256) 1456 { 1457 // Append the from-to chars at the end of the list with 1458 // the low byte. 1459 inp = ((int **)gap->ga_data)[c & 0xff]; 1460 while (*inp != 0) 1461 ++inp; 1462 *inp++ = c; // from char 1463 *inp++ = i; // to char 1464 *inp++ = NUL; // NUL at the end 1465 } 1466 else 1467 // mapping byte to char is done in sl_sal_first[] 1468 lp->sl_sal_first[c] = i; 1469 } 1470 } 1471 else 1472 { 1473 // mapping bytes to bytes is done in sl_sal_first[] 1474 if (STRLEN(from) != STRLEN(to)) 1475 return SP_FORMERROR; 1476 1477 for (i = 0; to[i] != NUL; ++i) 1478 lp->sl_sal_first[from[i]] = to[i]; 1479 lp->sl_sal.ga_len = 1; // indicates we have soundfolding 1480 } 1481 1482 return 0; 1483 } 1484 1485 /* 1486 * Fill the first-index table for "lp". 1487 */ 1488 static void 1489 set_sal_first(slang_T *lp) 1490 { 1491 salfirst_T *sfirst; 1492 int i; 1493 salitem_T *smp; 1494 int c; 1495 garray_T *gap = &lp->sl_sal; 1496 1497 sfirst = lp->sl_sal_first; 1498 for (i = 0; i < 256; ++i) 1499 sfirst[i] = -1; 1500 smp = (salitem_T *)gap->ga_data; 1501 for (i = 0; i < gap->ga_len; ++i) 1502 { 1503 if (has_mbyte) 1504 // Use the lowest byte of the first character. For latin1 it's 1505 // the character, for other encodings it should differ for most 1506 // characters. 1507 c = *smp[i].sm_lead_w & 0xff; 1508 else 1509 c = *smp[i].sm_lead; 1510 if (sfirst[c] == -1) 1511 { 1512 sfirst[c] = i; 1513 if (has_mbyte) 1514 { 1515 int n; 1516 1517 // Make sure all entries with this byte are following each 1518 // other. Move the ones that are in the wrong position. Do 1519 // keep the same ordering! 1520 while (i + 1 < gap->ga_len 1521 && (*smp[i + 1].sm_lead_w & 0xff) == c) 1522 // Skip over entry with same index byte. 1523 ++i; 1524 1525 for (n = 1; i + n < gap->ga_len; ++n) 1526 if ((*smp[i + n].sm_lead_w & 0xff) == c) 1527 { 1528 salitem_T tsal; 1529 1530 // Move entry with same index byte after the entries 1531 // we already found. 1532 ++i; 1533 --n; 1534 tsal = smp[i + n]; 1535 mch_memmove(smp + i + 1, smp + i, 1536 sizeof(salitem_T) * n); 1537 smp[i] = tsal; 1538 } 1539 } 1540 } 1541 } 1542 } 1543 1544 /* 1545 * Turn a multi-byte string into a wide character string. 1546 * Return it in allocated memory (NULL for out-of-memory) 1547 */ 1548 static int * 1549 mb_str2wide(char_u *s) 1550 { 1551 int *res; 1552 char_u *p; 1553 int i = 0; 1554 1555 res = ALLOC_MULT(int, mb_charlen(s) + 1); 1556 if (res != NULL) 1557 { 1558 for (p = s; *p != NUL; ) 1559 res[i++] = mb_ptr2char_adv(&p); 1560 res[i] = NUL; 1561 } 1562 return res; 1563 } 1564 1565 /* 1566 * Read a tree from the .spl or .sug file. 1567 * Allocates the memory and stores pointers in "bytsp" and "idxsp". 1568 * This is skipped when the tree has zero length. 1569 * Returns zero when OK, SP_ value for an error. 1570 */ 1571 static int 1572 spell_read_tree( 1573 FILE *fd, 1574 char_u **bytsp, 1575 idx_T **idxsp, 1576 int prefixtree, // TRUE for the prefix tree 1577 int prefixcnt) // when "prefixtree" is TRUE: prefix count 1578 { 1579 long len; 1580 int idx; 1581 char_u *bp; 1582 idx_T *ip; 1583 1584 // The tree size was computed when writing the file, so that we can 1585 // allocate it as one long block. <nodecount> 1586 len = get4c(fd); 1587 if (len < 0) 1588 return SP_TRUNCERROR; 1589 if (len >= LONG_MAX / (long)sizeof(int)) 1590 // Invalid length, multiply with sizeof(int) would overflow. 1591 return SP_FORMERROR; 1592 if (len > 0) 1593 { 1594 // Allocate the byte array. 1595 bp = alloc(len); 1596 if (bp == NULL) 1597 return SP_OTHERERROR; 1598 *bytsp = bp; 1599 1600 // Allocate the index array. 1601 ip = lalloc_clear(len * sizeof(int), TRUE); 1602 if (ip == NULL) 1603 return SP_OTHERERROR; 1604 *idxsp = ip; 1605 1606 // Recursively read the tree and store it in the array. 1607 idx = read_tree_node(fd, bp, ip, len, 0, prefixtree, prefixcnt); 1608 if (idx < 0) 1609 return idx; 1610 } 1611 return 0; 1612 } 1613 1614 /* 1615 * Read one row of siblings from the spell file and store it in the byte array 1616 * "byts" and index array "idxs". Recursively read the children. 1617 * 1618 * NOTE: The code here must match put_node()! 1619 * 1620 * Returns the index (>= 0) following the siblings. 1621 * Returns SP_TRUNCERROR if the file is shorter than expected. 1622 * Returns SP_FORMERROR if there is a format error. 1623 */ 1624 static idx_T 1625 read_tree_node( 1626 FILE *fd, 1627 char_u *byts, 1628 idx_T *idxs, 1629 int maxidx, // size of arrays 1630 idx_T startidx, // current index in "byts" and "idxs" 1631 int prefixtree, // TRUE for reading PREFIXTREE 1632 int maxprefcondnr) // maximum for <prefcondnr> 1633 { 1634 int len; 1635 int i; 1636 int n; 1637 idx_T idx = startidx; 1638 int c; 1639 int c2; 1640 #define SHARED_MASK 0x8000000 1641 1642 len = getc(fd); // <siblingcount> 1643 if (len <= 0) 1644 return SP_TRUNCERROR; 1645 1646 if (startidx + len >= maxidx) 1647 return SP_FORMERROR; 1648 byts[idx++] = len; 1649 1650 // Read the byte values, flag/region bytes and shared indexes. 1651 for (i = 1; i <= len; ++i) 1652 { 1653 c = getc(fd); // <byte> 1654 if (c < 0) 1655 return SP_TRUNCERROR; 1656 if (c <= BY_SPECIAL) 1657 { 1658 if (c == BY_NOFLAGS && !prefixtree) 1659 { 1660 // No flags, all regions. 1661 idxs[idx] = 0; 1662 c = 0; 1663 } 1664 else if (c != BY_INDEX) 1665 { 1666 if (prefixtree) 1667 { 1668 // Read the optional pflags byte, the prefix ID and the 1669 // condition nr. In idxs[] store the prefix ID in the low 1670 // byte, the condition index shifted up 8 bits, the flags 1671 // shifted up 24 bits. 1672 if (c == BY_FLAGS) 1673 c = getc(fd) << 24; // <pflags> 1674 else 1675 c = 0; 1676 1677 c |= getc(fd); // <affixID> 1678 1679 n = get2c(fd); // <prefcondnr> 1680 if (n >= maxprefcondnr) 1681 return SP_FORMERROR; 1682 c |= (n << 8); 1683 } 1684 else // c must be BY_FLAGS or BY_FLAGS2 1685 { 1686 // Read flags and optional region and prefix ID. In 1687 // idxs[] the flags go in the low two bytes, region above 1688 // that and prefix ID above the region. 1689 c2 = c; 1690 c = getc(fd); // <flags> 1691 if (c2 == BY_FLAGS2) 1692 c = (getc(fd) << 8) + c; // <flags2> 1693 if (c & WF_REGION) 1694 c = (getc(fd) << 16) + c; // <region> 1695 if (c & WF_AFX) 1696 c = (getc(fd) << 24) + c; // <affixID> 1697 } 1698 1699 idxs[idx] = c; 1700 c = 0; 1701 } 1702 else // c == BY_INDEX 1703 { 1704 // <nodeidx> 1705 n = get3c(fd); 1706 if (n < 0 || n >= maxidx) 1707 return SP_FORMERROR; 1708 idxs[idx] = n + SHARED_MASK; 1709 c = getc(fd); // <xbyte> 1710 } 1711 } 1712 byts[idx++] = c; 1713 } 1714 1715 // Recursively read the children for non-shared siblings. 1716 // Skip the end-of-word ones (zero byte value) and the shared ones (and 1717 // remove SHARED_MASK) 1718 for (i = 1; i <= len; ++i) 1719 if (byts[startidx + i] != 0) 1720 { 1721 if (idxs[startidx + i] & SHARED_MASK) 1722 idxs[startidx + i] &= ~SHARED_MASK; 1723 else 1724 { 1725 idxs[startidx + i] = idx; 1726 idx = read_tree_node(fd, byts, idxs, maxidx, idx, 1727 prefixtree, maxprefcondnr); 1728 if (idx < 0) 1729 break; 1730 } 1731 } 1732 1733 return idx; 1734 } 1735 1736 /* 1737 * Reload the spell file "fname" if it's loaded. 1738 */ 1739 static void 1740 spell_reload_one( 1741 char_u *fname, 1742 int added_word) // invoked through "zg" 1743 { 1744 slang_T *slang; 1745 int didit = FALSE; 1746 1747 FOR_ALL_SPELL_LANGS(slang) 1748 { 1749 if (fullpathcmp(fname, slang->sl_fname, FALSE, TRUE) == FPC_SAME) 1750 { 1751 slang_clear(slang); 1752 if (spell_load_file(fname, NULL, slang, FALSE) == NULL) 1753 // reloading failed, clear the language 1754 slang_clear(slang); 1755 redraw_all_later(SOME_VALID); 1756 didit = TRUE; 1757 } 1758 } 1759 1760 // When "zg" was used and the file wasn't loaded yet, should redo 1761 // 'spelllang' to load it now. 1762 if (added_word && !didit) 1763 did_set_spelllang(curwin); 1764 } 1765 1766 1767 /* 1768 * Functions for ":mkspell". 1769 */ 1770 1771 #define MAXLINELEN 500 // Maximum length in bytes of a line in a .aff 1772 // and .dic file. 1773 /* 1774 * Main structure to store the contents of a ".aff" file. 1775 */ 1776 typedef struct afffile_S 1777 { 1778 char_u *af_enc; // "SET", normalized, alloc'ed string or NULL 1779 int af_flagtype; // AFT_CHAR, AFT_LONG, AFT_NUM or AFT_CAPLONG 1780 unsigned af_rare; // RARE ID for rare word 1781 unsigned af_keepcase; // KEEPCASE ID for keep-case word 1782 unsigned af_bad; // BAD ID for banned word 1783 unsigned af_needaffix; // NEEDAFFIX ID 1784 unsigned af_circumfix; // CIRCUMFIX ID 1785 unsigned af_needcomp; // NEEDCOMPOUND ID 1786 unsigned af_comproot; // COMPOUNDROOT ID 1787 unsigned af_compforbid; // COMPOUNDFORBIDFLAG ID 1788 unsigned af_comppermit; // COMPOUNDPERMITFLAG ID 1789 unsigned af_nosuggest; // NOSUGGEST ID 1790 int af_pfxpostpone; // postpone prefixes without chop string and 1791 // without flags 1792 int af_ignoreextra; // IGNOREEXTRA present 1793 hashtab_T af_pref; // hashtable for prefixes, affheader_T 1794 hashtab_T af_suff; // hashtable for suffixes, affheader_T 1795 hashtab_T af_comp; // hashtable for compound flags, compitem_T 1796 } afffile_T; 1797 1798 #define AFT_CHAR 0 // flags are one character 1799 #define AFT_LONG 1 // flags are two characters 1800 #define AFT_CAPLONG 2 // flags are one or two characters 1801 #define AFT_NUM 3 // flags are numbers, comma separated 1802 1803 typedef struct affentry_S affentry_T; 1804 // Affix entry from ".aff" file. Used for prefixes and suffixes. 1805 struct affentry_S 1806 { 1807 affentry_T *ae_next; // next affix with same name/number 1808 char_u *ae_chop; // text to chop off basic word (can be NULL) 1809 char_u *ae_add; // text to add to basic word (can be NULL) 1810 char_u *ae_flags; // flags on the affix (can be NULL) 1811 char_u *ae_cond; // condition (NULL for ".") 1812 regprog_T *ae_prog; // regexp program for ae_cond or NULL 1813 char ae_compforbid; // COMPOUNDFORBIDFLAG found 1814 char ae_comppermit; // COMPOUNDPERMITFLAG found 1815 }; 1816 1817 #define AH_KEY_LEN 17 // 2 x 8 bytes + NUL 1818 1819 // Affix header from ".aff" file. Used for af_pref and af_suff. 1820 typedef struct affheader_S 1821 { 1822 char_u ah_key[AH_KEY_LEN]; // key for hashtab == name of affix 1823 unsigned ah_flag; // affix name as number, uses "af_flagtype" 1824 int ah_newID; // prefix ID after renumbering; 0 if not used 1825 int ah_combine; // suffix may combine with prefix 1826 int ah_follows; // another affix block should be following 1827 affentry_T *ah_first; // first affix entry 1828 } affheader_T; 1829 1830 #define HI2AH(hi) ((affheader_T *)(hi)->hi_key) 1831 1832 // Flag used in compound items. 1833 typedef struct compitem_S 1834 { 1835 char_u ci_key[AH_KEY_LEN]; // key for hashtab == name of compound 1836 unsigned ci_flag; // affix name as number, uses "af_flagtype" 1837 int ci_newID; // affix ID after renumbering. 1838 } compitem_T; 1839 1840 #define HI2CI(hi) ((compitem_T *)(hi)->hi_key) 1841 1842 /* 1843 * Structure that is used to store the items in the word tree. This avoids 1844 * the need to keep track of each allocated thing, everything is freed all at 1845 * once after ":mkspell" is done. 1846 * Note: "sb_next" must be just before "sb_data" to make sure the alignment of 1847 * "sb_data" is correct for systems where pointers must be aligned on 1848 * pointer-size boundaries and sizeof(pointer) > sizeof(int) (e.g., Sparc). 1849 */ 1850 #define SBLOCKSIZE 16000 // size of sb_data 1851 typedef struct sblock_S sblock_T; 1852 struct sblock_S 1853 { 1854 int sb_used; // nr of bytes already in use 1855 sblock_T *sb_next; // next block in list 1856 char_u sb_data[1]; // data, actually longer 1857 }; 1858 1859 /* 1860 * A node in the tree. 1861 */ 1862 typedef struct wordnode_S wordnode_T; 1863 struct wordnode_S 1864 { 1865 union // shared to save space 1866 { 1867 char_u hashkey[6]; // the hash key, only used while compressing 1868 int index; // index in written nodes (valid after first 1869 // round) 1870 } wn_u1; 1871 union // shared to save space 1872 { 1873 wordnode_T *next; // next node with same hash key 1874 wordnode_T *wnode; // parent node that will write this node 1875 } wn_u2; 1876 wordnode_T *wn_child; // child (next byte in word) 1877 wordnode_T *wn_sibling; // next sibling (alternate byte in word, 1878 // always sorted) 1879 int wn_refs; // Nr. of references to this node. Only 1880 // relevant for first node in a list of 1881 // siblings, in following siblings it is 1882 // always one. 1883 char_u wn_byte; // Byte for this node. NUL for word end 1884 1885 // Info for when "wn_byte" is NUL. 1886 // In PREFIXTREE "wn_region" is used for the prefcondnr. 1887 // In the soundfolded word tree "wn_flags" has the MSW of the wordnr and 1888 // "wn_region" the LSW of the wordnr. 1889 char_u wn_affixID; // supported/required prefix ID or 0 1890 short_u wn_flags; // WF_ flags 1891 short wn_region; // region mask 1892 1893 #ifdef SPELL_PRINTTREE 1894 int wn_nr; // sequence nr for printing 1895 #endif 1896 }; 1897 1898 #define WN_MASK 0xffff // mask relevant bits of "wn_flags" 1899 1900 #define HI2WN(hi) (wordnode_T *)((hi)->hi_key) 1901 1902 /* 1903 * Info used while reading the spell files. 1904 */ 1905 typedef struct spellinfo_S 1906 { 1907 wordnode_T *si_foldroot; // tree with case-folded words 1908 long si_foldwcount; // nr of words in si_foldroot 1909 1910 wordnode_T *si_keeproot; // tree with keep-case words 1911 long si_keepwcount; // nr of words in si_keeproot 1912 1913 wordnode_T *si_prefroot; // tree with postponed prefixes 1914 1915 long si_sugtree; // creating the soundfolding trie 1916 1917 sblock_T *si_blocks; // memory blocks used 1918 long si_blocks_cnt; // memory blocks allocated 1919 int si_did_emsg; // TRUE when ran out of memory 1920 1921 long si_compress_cnt; // words to add before lowering 1922 // compression limit 1923 wordnode_T *si_first_free; // List of nodes that have been freed during 1924 // compression, linked by "wn_child" field. 1925 long si_free_count; // number of nodes in si_first_free 1926 #ifdef SPELL_PRINTTREE 1927 int si_wordnode_nr; // sequence nr for nodes 1928 #endif 1929 buf_T *si_spellbuf; // buffer used to store soundfold word table 1930 1931 int si_ascii; // handling only ASCII words 1932 int si_add; // addition file 1933 int si_clear_chartab; // when TRUE clear char tables 1934 int si_region; // region mask 1935 vimconv_T si_conv; // for conversion to 'encoding' 1936 int si_memtot; // runtime memory used 1937 int si_verbose; // verbose messages 1938 int si_msg_count; // number of words added since last message 1939 char_u *si_info; // info text chars or NULL 1940 int si_region_count; // number of regions supported (1 when there 1941 // are no regions) 1942 char_u si_region_name[MAXREGIONS * 2 + 1]; 1943 // region names; used only if 1944 // si_region_count > 1) 1945 1946 garray_T si_rep; // list of fromto_T entries from REP lines 1947 garray_T si_repsal; // list of fromto_T entries from REPSAL lines 1948 garray_T si_sal; // list of fromto_T entries from SAL lines 1949 char_u *si_sofofr; // SOFOFROM text 1950 char_u *si_sofoto; // SOFOTO text 1951 int si_nosugfile; // NOSUGFILE item found 1952 int si_nosplitsugs; // NOSPLITSUGS item found 1953 int si_nocompoundsugs; // NOCOMPOUNDSUGS item found 1954 int si_followup; // soundsalike: ? 1955 int si_collapse; // soundsalike: ? 1956 hashtab_T si_commonwords; // hashtable for common words 1957 time_t si_sugtime; // timestamp for .sug file 1958 int si_rem_accents; // soundsalike: remove accents 1959 garray_T si_map; // MAP info concatenated 1960 char_u *si_midword; // MIDWORD chars or NULL 1961 int si_compmax; // max nr of words for compounding 1962 int si_compminlen; // minimal length for compounding 1963 int si_compsylmax; // max nr of syllables for compounding 1964 int si_compoptions; // COMP_ flags 1965 garray_T si_comppat; // CHECKCOMPOUNDPATTERN items, each stored as 1966 // a string 1967 char_u *si_compflags; // flags used for compounding 1968 char_u si_nobreak; // NOBREAK 1969 char_u *si_syllable; // syllable string 1970 garray_T si_prefcond; // table with conditions for postponed 1971 // prefixes, each stored as a string 1972 int si_newprefID; // current value for ah_newID 1973 int si_newcompID; // current value for compound ID 1974 } spellinfo_T; 1975 1976 static int is_aff_rule(char_u **items, int itemcnt, char *rulename, int mincount); 1977 static void aff_process_flags(afffile_T *affile, affentry_T *entry); 1978 static int spell_info_item(char_u *s); 1979 static unsigned affitem2flag(int flagtype, char_u *item, char_u *fname, int lnum); 1980 static unsigned get_affitem(int flagtype, char_u **pp); 1981 static void process_compflags(spellinfo_T *spin, afffile_T *aff, char_u *compflags); 1982 static void check_renumber(spellinfo_T *spin); 1983 static void aff_check_number(int spinval, int affval, char *name); 1984 static void aff_check_string(char_u *spinval, char_u *affval, char *name); 1985 static int str_equal(char_u *s1, char_u *s2); 1986 static void add_fromto(spellinfo_T *spin, garray_T *gap, char_u *from, char_u *to); 1987 static int sal_to_bool(char_u *s); 1988 static int get_affix_flags(afffile_T *affile, char_u *afflist); 1989 static int get_pfxlist(afffile_T *affile, char_u *afflist, char_u *store_afflist); 1990 static void get_compflags(afffile_T *affile, char_u *afflist, char_u *store_afflist); 1991 static int store_aff_word(spellinfo_T *spin, char_u *word, char_u *afflist, afffile_T *affile, hashtab_T *ht, hashtab_T *xht, int condit, int flags, char_u *pfxlist, int pfxlen); 1992 static void *getroom(spellinfo_T *spin, size_t len, int align); 1993 static char_u *getroom_save(spellinfo_T *spin, char_u *s); 1994 static int store_word(spellinfo_T *spin, char_u *word, int flags, int region, char_u *pfxlist, int need_affix); 1995 static int tree_add_word(spellinfo_T *spin, char_u *word, wordnode_T *tree, int flags, int region, int affixID); 1996 static wordnode_T *get_wordnode(spellinfo_T *spin); 1997 static void free_wordnode(spellinfo_T *spin, wordnode_T *n); 1998 static void wordtree_compress(spellinfo_T *spin, wordnode_T *root, char *name); 1999 static long node_compress(spellinfo_T *spin, wordnode_T *node, hashtab_T *ht, long *tot); 2000 static int node_equal(wordnode_T *n1, wordnode_T *n2); 2001 static void clear_node(wordnode_T *node); 2002 static int put_node(FILE *fd, wordnode_T *node, int idx, int regionmask, int prefixtree); 2003 static int sug_filltree(spellinfo_T *spin, slang_T *slang); 2004 static int sug_maketable(spellinfo_T *spin); 2005 static int sug_filltable(spellinfo_T *spin, wordnode_T *node, int startwordnr, garray_T *gap); 2006 static int offset2bytes(int nr, char_u *buf); 2007 static void sug_write(spellinfo_T *spin, char_u *fname); 2008 static void spell_message(spellinfo_T *spin, char_u *str); 2009 static void init_spellfile(void); 2010 2011 // In the postponed prefixes tree wn_flags is used to store the WFP_ flags, 2012 // but it must be negative to indicate the prefix tree to tree_add_word(). 2013 // Use a negative number with the lower 8 bits zero. 2014 #define PFX_FLAGS -256 2015 2016 // flags for "condit" argument of store_aff_word() 2017 #define CONDIT_COMB 1 // affix must combine 2018 #define CONDIT_CFIX 2 // affix must have CIRCUMFIX flag 2019 #define CONDIT_SUF 4 // add a suffix for matching flags 2020 #define CONDIT_AFF 8 // word already has an affix 2021 2022 /* 2023 * Tunable parameters for when the tree is compressed. Filled from the 2024 * 'mkspellmem' option. 2025 */ 2026 static long compress_start = 30000; // memory / SBLOCKSIZE 2027 static long compress_inc = 100; // memory / SBLOCKSIZE 2028 static long compress_added = 500000; // word count 2029 2030 /* 2031 * Check the 'mkspellmem' option. Return FAIL if it's wrong. 2032 * Sets "sps_flags". 2033 */ 2034 int 2035 spell_check_msm(void) 2036 { 2037 char_u *p = p_msm; 2038 long start = 0; 2039 long incr = 0; 2040 long added = 0; 2041 2042 if (!VIM_ISDIGIT(*p)) 2043 return FAIL; 2044 // block count = (value * 1024) / SBLOCKSIZE (but avoid overflow) 2045 start = (getdigits(&p) * 10) / (SBLOCKSIZE / 102); 2046 if (*p != ',') 2047 return FAIL; 2048 ++p; 2049 if (!VIM_ISDIGIT(*p)) 2050 return FAIL; 2051 incr = (getdigits(&p) * 102) / (SBLOCKSIZE / 10); 2052 if (*p != ',') 2053 return FAIL; 2054 ++p; 2055 if (!VIM_ISDIGIT(*p)) 2056 return FAIL; 2057 added = getdigits(&p) * 1024; 2058 if (*p != NUL) 2059 return FAIL; 2060 2061 if (start == 0 || incr == 0 || added == 0 || incr > start) 2062 return FAIL; 2063 2064 compress_start = start; 2065 compress_inc = incr; 2066 compress_added = added; 2067 return OK; 2068 } 2069 2070 #ifdef SPELL_PRINTTREE 2071 /* 2072 * For debugging the tree code: print the current tree in a (more or less) 2073 * readable format, so that we can see what happens when adding a word and/or 2074 * compressing the tree. 2075 * Based on code from Olaf Seibert. 2076 */ 2077 #define PRINTLINESIZE 1000 2078 #define PRINTWIDTH 6 2079 2080 #define PRINTSOME(l, depth, fmt, a1, a2) vim_snprintf(l + depth * PRINTWIDTH, \ 2081 PRINTLINESIZE - PRINTWIDTH * depth, fmt, a1, a2) 2082 2083 static char line1[PRINTLINESIZE]; 2084 static char line2[PRINTLINESIZE]; 2085 static char line3[PRINTLINESIZE]; 2086 2087 static void 2088 spell_clear_flags(wordnode_T *node) 2089 { 2090 wordnode_T *np; 2091 2092 FOR_ALL_NODE_SIBLINGS(node, np) 2093 { 2094 np->wn_u1.index = FALSE; 2095 spell_clear_flags(np->wn_child); 2096 } 2097 } 2098 2099 static void 2100 spell_print_node(wordnode_T *node, int depth) 2101 { 2102 if (node->wn_u1.index) 2103 { 2104 // Done this node before, print the reference. 2105 PRINTSOME(line1, depth, "(%d)", node->wn_nr, 0); 2106 PRINTSOME(line2, depth, " ", 0, 0); 2107 PRINTSOME(line3, depth, " ", 0, 0); 2108 msg(line1); 2109 msg(line2); 2110 msg(line3); 2111 } 2112 else 2113 { 2114 node->wn_u1.index = TRUE; 2115 2116 if (node->wn_byte != NUL) 2117 { 2118 if (node->wn_child != NULL) 2119 PRINTSOME(line1, depth, " %c -> ", node->wn_byte, 0); 2120 else 2121 // Cannot happen? 2122 PRINTSOME(line1, depth, " %c ???", node->wn_byte, 0); 2123 } 2124 else 2125 PRINTSOME(line1, depth, " $ ", 0, 0); 2126 2127 PRINTSOME(line2, depth, "%d/%d ", node->wn_nr, node->wn_refs); 2128 2129 if (node->wn_sibling != NULL) 2130 PRINTSOME(line3, depth, " | ", 0, 0); 2131 else 2132 PRINTSOME(line3, depth, " ", 0, 0); 2133 2134 if (node->wn_byte == NUL) 2135 { 2136 msg(line1); 2137 msg(line2); 2138 msg(line3); 2139 } 2140 2141 // do the children 2142 if (node->wn_byte != NUL && node->wn_child != NULL) 2143 spell_print_node(node->wn_child, depth + 1); 2144 2145 // do the siblings 2146 if (node->wn_sibling != NULL) 2147 { 2148 // get rid of all parent details except | 2149 STRCPY(line1, line3); 2150 STRCPY(line2, line3); 2151 spell_print_node(node->wn_sibling, depth); 2152 } 2153 } 2154 } 2155 2156 static void 2157 spell_print_tree(wordnode_T *root) 2158 { 2159 if (root != NULL) 2160 { 2161 // Clear the "wn_u1.index" fields, used to remember what has been 2162 // done. 2163 spell_clear_flags(root); 2164 2165 // Recursively print the tree. 2166 spell_print_node(root, 0); 2167 } 2168 } 2169 #endif // SPELL_PRINTTREE 2170 2171 /* 2172 * Read the affix file "fname". 2173 * Returns an afffile_T, NULL for complete failure. 2174 */ 2175 static afffile_T * 2176 spell_read_aff(spellinfo_T *spin, char_u *fname) 2177 { 2178 FILE *fd; 2179 afffile_T *aff; 2180 char_u rline[MAXLINELEN]; 2181 char_u *line; 2182 char_u *pc = NULL; 2183 #define MAXITEMCNT 30 2184 char_u *(items[MAXITEMCNT]); 2185 int itemcnt; 2186 char_u *p; 2187 int lnum = 0; 2188 affheader_T *cur_aff = NULL; 2189 int did_postpone_prefix = FALSE; 2190 int aff_todo = 0; 2191 hashtab_T *tp; 2192 char_u *low = NULL; 2193 char_u *fol = NULL; 2194 char_u *upp = NULL; 2195 int do_rep; 2196 int do_repsal; 2197 int do_sal; 2198 int do_mapline; 2199 int found_map = FALSE; 2200 hashitem_T *hi; 2201 int l; 2202 int compminlen = 0; // COMPOUNDMIN value 2203 int compsylmax = 0; // COMPOUNDSYLMAX value 2204 int compoptions = 0; // COMP_ flags 2205 int compmax = 0; // COMPOUNDWORDMAX value 2206 char_u *compflags = NULL; // COMPOUNDFLAG and COMPOUNDRULE 2207 // concatenated 2208 char_u *midword = NULL; // MIDWORD value 2209 char_u *syllable = NULL; // SYLLABLE value 2210 char_u *sofofrom = NULL; // SOFOFROM value 2211 char_u *sofoto = NULL; // SOFOTO value 2212 2213 /* 2214 * Open the file. 2215 */ 2216 fd = mch_fopen((char *)fname, "r"); 2217 if (fd == NULL) 2218 { 2219 semsg(_(e_notopen), fname); 2220 return NULL; 2221 } 2222 2223 vim_snprintf((char *)IObuff, IOSIZE, _("Reading affix file %s..."), fname); 2224 spell_message(spin, IObuff); 2225 2226 // Only do REP lines when not done in another .aff file already. 2227 do_rep = spin->si_rep.ga_len == 0; 2228 2229 // Only do REPSAL lines when not done in another .aff file already. 2230 do_repsal = spin->si_repsal.ga_len == 0; 2231 2232 // Only do SAL lines when not done in another .aff file already. 2233 do_sal = spin->si_sal.ga_len == 0; 2234 2235 // Only do MAP lines when not done in another .aff file already. 2236 do_mapline = spin->si_map.ga_len == 0; 2237 2238 /* 2239 * Allocate and init the afffile_T structure. 2240 */ 2241 aff = (afffile_T *)getroom(spin, sizeof(afffile_T), TRUE); 2242 if (aff == NULL) 2243 { 2244 fclose(fd); 2245 return NULL; 2246 } 2247 hash_init(&aff->af_pref); 2248 hash_init(&aff->af_suff); 2249 hash_init(&aff->af_comp); 2250 2251 /* 2252 * Read all the lines in the file one by one. 2253 */ 2254 while (!vim_fgets(rline, MAXLINELEN, fd) && !got_int) 2255 { 2256 line_breakcheck(); 2257 ++lnum; 2258 2259 // Skip comment lines. 2260 if (*rline == '#') 2261 continue; 2262 2263 // Convert from "SET" to 'encoding' when needed. 2264 vim_free(pc); 2265 if (spin->si_conv.vc_type != CONV_NONE) 2266 { 2267 pc = string_convert(&spin->si_conv, rline, NULL); 2268 if (pc == NULL) 2269 { 2270 smsg(_("Conversion failure for word in %s line %d: %s"), 2271 fname, lnum, rline); 2272 continue; 2273 } 2274 line = pc; 2275 } 2276 else 2277 { 2278 pc = NULL; 2279 line = rline; 2280 } 2281 2282 // Split the line up in white separated items. Put a NUL after each 2283 // item. 2284 itemcnt = 0; 2285 for (p = line; ; ) 2286 { 2287 while (*p != NUL && *p <= ' ') // skip white space and CR/NL 2288 ++p; 2289 if (*p == NUL) 2290 break; 2291 if (itemcnt == MAXITEMCNT) // too many items 2292 break; 2293 items[itemcnt++] = p; 2294 // A few items have arbitrary text argument, don't split them. 2295 if (itemcnt == 2 && spell_info_item(items[0])) 2296 while (*p >= ' ' || *p == TAB) // skip until CR/NL 2297 ++p; 2298 else 2299 while (*p > ' ') // skip until white space or CR/NL 2300 ++p; 2301 if (*p == NUL) 2302 break; 2303 *p++ = NUL; 2304 } 2305 2306 // Handle non-empty lines. 2307 if (itemcnt > 0) 2308 { 2309 if (is_aff_rule(items, itemcnt, "SET", 2) && aff->af_enc == NULL) 2310 { 2311 // Setup for conversion from "ENC" to 'encoding'. 2312 aff->af_enc = enc_canonize(items[1]); 2313 if (aff->af_enc != NULL && !spin->si_ascii 2314 && convert_setup(&spin->si_conv, aff->af_enc, 2315 p_enc) == FAIL) 2316 smsg(_("Conversion in %s not supported: from %s to %s"), 2317 fname, aff->af_enc, p_enc); 2318 spin->si_conv.vc_fail = TRUE; 2319 } 2320 else if (is_aff_rule(items, itemcnt, "FLAG", 2) 2321 && aff->af_flagtype == AFT_CHAR) 2322 { 2323 if (STRCMP(items[1], "long") == 0) 2324 aff->af_flagtype = AFT_LONG; 2325 else if (STRCMP(items[1], "num") == 0) 2326 aff->af_flagtype = AFT_NUM; 2327 else if (STRCMP(items[1], "caplong") == 0) 2328 aff->af_flagtype = AFT_CAPLONG; 2329 else 2330 smsg(_("Invalid value for FLAG in %s line %d: %s"), 2331 fname, lnum, items[1]); 2332 if (aff->af_rare != 0 2333 || aff->af_keepcase != 0 2334 || aff->af_bad != 0 2335 || aff->af_needaffix != 0 2336 || aff->af_circumfix != 0 2337 || aff->af_needcomp != 0 2338 || aff->af_comproot != 0 2339 || aff->af_nosuggest != 0 2340 || compflags != NULL 2341 || aff->af_suff.ht_used > 0 2342 || aff->af_pref.ht_used > 0) 2343 smsg(_("FLAG after using flags in %s line %d: %s"), 2344 fname, lnum, items[1]); 2345 } 2346 else if (spell_info_item(items[0])) 2347 { 2348 p = (char_u *)getroom(spin, 2349 (spin->si_info == NULL ? 0 : STRLEN(spin->si_info)) 2350 + STRLEN(items[0]) 2351 + STRLEN(items[1]) + 3, FALSE); 2352 if (p != NULL) 2353 { 2354 if (spin->si_info != NULL) 2355 { 2356 STRCPY(p, spin->si_info); 2357 STRCAT(p, "\n"); 2358 } 2359 STRCAT(p, items[0]); 2360 STRCAT(p, " "); 2361 STRCAT(p, items[1]); 2362 spin->si_info = p; 2363 } 2364 } 2365 else if (is_aff_rule(items, itemcnt, "MIDWORD", 2) 2366 && midword == NULL) 2367 { 2368 midword = getroom_save(spin, items[1]); 2369 } 2370 else if (is_aff_rule(items, itemcnt, "TRY", 2)) 2371 { 2372 // ignored, we look in the tree for what chars may appear 2373 } 2374 // TODO: remove "RAR" later 2375 else if ((is_aff_rule(items, itemcnt, "RAR", 2) 2376 || is_aff_rule(items, itemcnt, "RARE", 2)) 2377 && aff->af_rare == 0) 2378 { 2379 aff->af_rare = affitem2flag(aff->af_flagtype, items[1], 2380 fname, lnum); 2381 } 2382 // TODO: remove "KEP" later 2383 else if ((is_aff_rule(items, itemcnt, "KEP", 2) 2384 || is_aff_rule(items, itemcnt, "KEEPCASE", 2)) 2385 && aff->af_keepcase == 0) 2386 { 2387 aff->af_keepcase = affitem2flag(aff->af_flagtype, items[1], 2388 fname, lnum); 2389 } 2390 else if ((is_aff_rule(items, itemcnt, "BAD", 2) 2391 || is_aff_rule(items, itemcnt, "FORBIDDENWORD", 2)) 2392 && aff->af_bad == 0) 2393 { 2394 aff->af_bad = affitem2flag(aff->af_flagtype, items[1], 2395 fname, lnum); 2396 } 2397 else if (is_aff_rule(items, itemcnt, "NEEDAFFIX", 2) 2398 && aff->af_needaffix == 0) 2399 { 2400 aff->af_needaffix = affitem2flag(aff->af_flagtype, items[1], 2401 fname, lnum); 2402 } 2403 else if (is_aff_rule(items, itemcnt, "CIRCUMFIX", 2) 2404 && aff->af_circumfix == 0) 2405 { 2406 aff->af_circumfix = affitem2flag(aff->af_flagtype, items[1], 2407 fname, lnum); 2408 } 2409 else if (is_aff_rule(items, itemcnt, "NOSUGGEST", 2) 2410 && aff->af_nosuggest == 0) 2411 { 2412 aff->af_nosuggest = affitem2flag(aff->af_flagtype, items[1], 2413 fname, lnum); 2414 } 2415 else if ((is_aff_rule(items, itemcnt, "NEEDCOMPOUND", 2) 2416 || is_aff_rule(items, itemcnt, "ONLYINCOMPOUND", 2)) 2417 && aff->af_needcomp == 0) 2418 { 2419 aff->af_needcomp = affitem2flag(aff->af_flagtype, items[1], 2420 fname, lnum); 2421 } 2422 else if (is_aff_rule(items, itemcnt, "COMPOUNDROOT", 2) 2423 && aff->af_comproot == 0) 2424 { 2425 aff->af_comproot = affitem2flag(aff->af_flagtype, items[1], 2426 fname, lnum); 2427 } 2428 else if (is_aff_rule(items, itemcnt, "COMPOUNDFORBIDFLAG", 2) 2429 && aff->af_compforbid == 0) 2430 { 2431 aff->af_compforbid = affitem2flag(aff->af_flagtype, items[1], 2432 fname, lnum); 2433 if (aff->af_pref.ht_used > 0) 2434 smsg(_("Defining COMPOUNDFORBIDFLAG after PFX item may give wrong results in %s line %d"), 2435 fname, lnum); 2436 } 2437 else if (is_aff_rule(items, itemcnt, "COMPOUNDPERMITFLAG", 2) 2438 && aff->af_comppermit == 0) 2439 { 2440 aff->af_comppermit = affitem2flag(aff->af_flagtype, items[1], 2441 fname, lnum); 2442 if (aff->af_pref.ht_used > 0) 2443 smsg(_("Defining COMPOUNDPERMITFLAG after PFX item may give wrong results in %s line %d"), 2444 fname, lnum); 2445 } 2446 else if (is_aff_rule(items, itemcnt, "COMPOUNDFLAG", 2) 2447 && compflags == NULL) 2448 { 2449 // Turn flag "c" into COMPOUNDRULE compatible string "c+", 2450 // "Na" into "Na+", "1234" into "1234+". 2451 p = getroom(spin, STRLEN(items[1]) + 2, FALSE); 2452 if (p != NULL) 2453 { 2454 STRCPY(p, items[1]); 2455 STRCAT(p, "+"); 2456 compflags = p; 2457 } 2458 } 2459 else if (is_aff_rule(items, itemcnt, "COMPOUNDRULES", 2)) 2460 { 2461 // We don't use the count, but do check that it's a number and 2462 // not COMPOUNDRULE mistyped. 2463 if (atoi((char *)items[1]) == 0) 2464 smsg(_("Wrong COMPOUNDRULES value in %s line %d: %s"), 2465 fname, lnum, items[1]); 2466 } 2467 else if (is_aff_rule(items, itemcnt, "COMPOUNDRULE", 2)) 2468 { 2469 // Don't use the first rule if it is a number. 2470 if (compflags != NULL || *skipdigits(items[1]) != NUL) 2471 { 2472 // Concatenate this string to previously defined ones, 2473 // using a slash to separate them. 2474 l = (int)STRLEN(items[1]) + 1; 2475 if (compflags != NULL) 2476 l += (int)STRLEN(compflags) + 1; 2477 p = getroom(spin, l, FALSE); 2478 if (p != NULL) 2479 { 2480 if (compflags != NULL) 2481 { 2482 STRCPY(p, compflags); 2483 STRCAT(p, "/"); 2484 } 2485 STRCAT(p, items[1]); 2486 compflags = p; 2487 } 2488 } 2489 } 2490 else if (is_aff_rule(items, itemcnt, "COMPOUNDWORDMAX", 2) 2491 && compmax == 0) 2492 { 2493 compmax = atoi((char *)items[1]); 2494 if (compmax == 0) 2495 smsg(_("Wrong COMPOUNDWORDMAX value in %s line %d: %s"), 2496 fname, lnum, items[1]); 2497 } 2498 else if (is_aff_rule(items, itemcnt, "COMPOUNDMIN", 2) 2499 && compminlen == 0) 2500 { 2501 compminlen = atoi((char *)items[1]); 2502 if (compminlen == 0) 2503 smsg(_("Wrong COMPOUNDMIN value in %s line %d: %s"), 2504 fname, lnum, items[1]); 2505 } 2506 else if (is_aff_rule(items, itemcnt, "COMPOUNDSYLMAX", 2) 2507 && compsylmax == 0) 2508 { 2509 compsylmax = atoi((char *)items[1]); 2510 if (compsylmax == 0) 2511 smsg(_("Wrong COMPOUNDSYLMAX value in %s line %d: %s"), 2512 fname, lnum, items[1]); 2513 } 2514 else if (is_aff_rule(items, itemcnt, "CHECKCOMPOUNDDUP", 1)) 2515 { 2516 compoptions |= COMP_CHECKDUP; 2517 } 2518 else if (is_aff_rule(items, itemcnt, "CHECKCOMPOUNDREP", 1)) 2519 { 2520 compoptions |= COMP_CHECKREP; 2521 } 2522 else if (is_aff_rule(items, itemcnt, "CHECKCOMPOUNDCASE", 1)) 2523 { 2524 compoptions |= COMP_CHECKCASE; 2525 } 2526 else if (is_aff_rule(items, itemcnt, "CHECKCOMPOUNDTRIPLE", 1)) 2527 { 2528 compoptions |= COMP_CHECKTRIPLE; 2529 } 2530 else if (is_aff_rule(items, itemcnt, "CHECKCOMPOUNDPATTERN", 2)) 2531 { 2532 if (atoi((char *)items[1]) == 0) 2533 smsg(_("Wrong CHECKCOMPOUNDPATTERN value in %s line %d: %s"), 2534 fname, lnum, items[1]); 2535 } 2536 else if (is_aff_rule(items, itemcnt, "CHECKCOMPOUNDPATTERN", 3)) 2537 { 2538 garray_T *gap = &spin->si_comppat; 2539 int i; 2540 2541 // Only add the couple if it isn't already there. 2542 for (i = 0; i < gap->ga_len - 1; i += 2) 2543 if (STRCMP(((char_u **)(gap->ga_data))[i], items[1]) == 0 2544 && STRCMP(((char_u **)(gap->ga_data))[i + 1], 2545 items[2]) == 0) 2546 break; 2547 if (i >= gap->ga_len && ga_grow(gap, 2) == OK) 2548 { 2549 ((char_u **)(gap->ga_data))[gap->ga_len++] 2550 = getroom_save(spin, items[1]); 2551 ((char_u **)(gap->ga_data))[gap->ga_len++] 2552 = getroom_save(spin, items[2]); 2553 } 2554 } 2555 else if (is_aff_rule(items, itemcnt, "SYLLABLE", 2) 2556 && syllable == NULL) 2557 { 2558 syllable = getroom_save(spin, items[1]); 2559 } 2560 else if (is_aff_rule(items, itemcnt, "NOBREAK", 1)) 2561 { 2562 spin->si_nobreak = TRUE; 2563 } 2564 else if (is_aff_rule(items, itemcnt, "NOSPLITSUGS", 1)) 2565 { 2566 spin->si_nosplitsugs = TRUE; 2567 } 2568 else if (is_aff_rule(items, itemcnt, "NOCOMPOUNDSUGS", 1)) 2569 { 2570 spin->si_nocompoundsugs = TRUE; 2571 } 2572 else if (is_aff_rule(items, itemcnt, "NOSUGFILE", 1)) 2573 { 2574 spin->si_nosugfile = TRUE; 2575 } 2576 else if (is_aff_rule(items, itemcnt, "PFXPOSTPONE", 1)) 2577 { 2578 aff->af_pfxpostpone = TRUE; 2579 } 2580 else if (is_aff_rule(items, itemcnt, "IGNOREEXTRA", 1)) 2581 { 2582 aff->af_ignoreextra = TRUE; 2583 } 2584 else if ((STRCMP(items[0], "PFX") == 0 2585 || STRCMP(items[0], "SFX") == 0) 2586 && aff_todo == 0 2587 && itemcnt >= 4) 2588 { 2589 int lasti = 4; 2590 char_u key[AH_KEY_LEN]; 2591 2592 if (*items[0] == 'P') 2593 tp = &aff->af_pref; 2594 else 2595 tp = &aff->af_suff; 2596 2597 // Myspell allows the same affix name to be used multiple 2598 // times. The affix files that do this have an undocumented 2599 // "S" flag on all but the last block, thus we check for that 2600 // and store it in ah_follows. 2601 vim_strncpy(key, items[1], AH_KEY_LEN - 1); 2602 hi = hash_find(tp, key); 2603 if (!HASHITEM_EMPTY(hi)) 2604 { 2605 cur_aff = HI2AH(hi); 2606 if (cur_aff->ah_combine != (*items[2] == 'Y')) 2607 smsg(_("Different combining flag in continued affix block in %s line %d: %s"), 2608 fname, lnum, items[1]); 2609 if (!cur_aff->ah_follows) 2610 smsg(_("Duplicate affix in %s line %d: %s"), 2611 fname, lnum, items[1]); 2612 } 2613 else 2614 { 2615 // New affix letter. 2616 cur_aff = (affheader_T *)getroom(spin, 2617 sizeof(affheader_T), TRUE); 2618 if (cur_aff == NULL) 2619 break; 2620 cur_aff->ah_flag = affitem2flag(aff->af_flagtype, items[1], 2621 fname, lnum); 2622 if (cur_aff->ah_flag == 0 || STRLEN(items[1]) >= AH_KEY_LEN) 2623 break; 2624 if (cur_aff->ah_flag == aff->af_bad 2625 || cur_aff->ah_flag == aff->af_rare 2626 || cur_aff->ah_flag == aff->af_keepcase 2627 || cur_aff->ah_flag == aff->af_needaffix 2628 || cur_aff->ah_flag == aff->af_circumfix 2629 || cur_aff->ah_flag == aff->af_nosuggest 2630 || cur_aff->ah_flag == aff->af_needcomp 2631 || cur_aff->ah_flag == aff->af_comproot) 2632 smsg(_("Affix also used for BAD/RARE/KEEPCASE/NEEDAFFIX/NEEDCOMPOUND/NOSUGGEST in %s line %d: %s"), 2633 fname, lnum, items[1]); 2634 STRCPY(cur_aff->ah_key, items[1]); 2635 hash_add(tp, cur_aff->ah_key); 2636 2637 cur_aff->ah_combine = (*items[2] == 'Y'); 2638 } 2639 2640 // Check for the "S" flag, which apparently means that another 2641 // block with the same affix name is following. 2642 if (itemcnt > lasti && STRCMP(items[lasti], "S") == 0) 2643 { 2644 ++lasti; 2645 cur_aff->ah_follows = TRUE; 2646 } 2647 else 2648 cur_aff->ah_follows = FALSE; 2649 2650 // Myspell allows extra text after the item, but that might 2651 // mean mistakes go unnoticed. Require a comment-starter. 2652 if (itemcnt > lasti && *items[lasti] != '#') 2653 smsg(_(e_afftrailing), fname, lnum, items[lasti]); 2654 2655 if (STRCMP(items[2], "Y") != 0 && STRCMP(items[2], "N") != 0) 2656 smsg(_("Expected Y or N in %s line %d: %s"), 2657 fname, lnum, items[2]); 2658 2659 if (*items[0] == 'P' && aff->af_pfxpostpone) 2660 { 2661 if (cur_aff->ah_newID == 0) 2662 { 2663 // Use a new number in the .spl file later, to be able 2664 // to handle multiple .aff files. 2665 check_renumber(spin); 2666 cur_aff->ah_newID = ++spin->si_newprefID; 2667 2668 // We only really use ah_newID if the prefix is 2669 // postponed. We know that only after handling all 2670 // the items. 2671 did_postpone_prefix = FALSE; 2672 } 2673 else 2674 // Did use the ID in a previous block. 2675 did_postpone_prefix = TRUE; 2676 } 2677 2678 aff_todo = atoi((char *)items[3]); 2679 } 2680 else if ((STRCMP(items[0], "PFX") == 0 2681 || STRCMP(items[0], "SFX") == 0) 2682 && aff_todo > 0 2683 && STRCMP(cur_aff->ah_key, items[1]) == 0 2684 && itemcnt >= 5) 2685 { 2686 affentry_T *aff_entry; 2687 int upper = FALSE; 2688 int lasti = 5; 2689 2690 // Myspell allows extra text after the item, but that might 2691 // mean mistakes go unnoticed. Require a comment-starter, 2692 // unless IGNOREEXTRA is used. Hunspell uses a "-" item. 2693 if (itemcnt > lasti 2694 && !aff->af_ignoreextra 2695 && *items[lasti] != '#' 2696 && (STRCMP(items[lasti], "-") != 0 2697 || itemcnt != lasti + 1)) 2698 smsg(_(e_afftrailing), fname, lnum, items[lasti]); 2699 2700 // New item for an affix letter. 2701 --aff_todo; 2702 aff_entry = (affentry_T *)getroom(spin, 2703 sizeof(affentry_T), TRUE); 2704 if (aff_entry == NULL) 2705 break; 2706 2707 if (STRCMP(items[2], "0") != 0) 2708 aff_entry->ae_chop = getroom_save(spin, items[2]); 2709 if (STRCMP(items[3], "0") != 0) 2710 { 2711 aff_entry->ae_add = getroom_save(spin, items[3]); 2712 2713 // Recognize flags on the affix: abcd/XYZ 2714 aff_entry->ae_flags = vim_strchr(aff_entry->ae_add, '/'); 2715 if (aff_entry->ae_flags != NULL) 2716 { 2717 *aff_entry->ae_flags++ = NUL; 2718 aff_process_flags(aff, aff_entry); 2719 } 2720 } 2721 2722 // Don't use an affix entry with non-ASCII characters when 2723 // "spin->si_ascii" is TRUE. 2724 if (!spin->si_ascii || !(has_non_ascii(aff_entry->ae_chop) 2725 || has_non_ascii(aff_entry->ae_add))) 2726 { 2727 aff_entry->ae_next = cur_aff->ah_first; 2728 cur_aff->ah_first = aff_entry; 2729 2730 if (STRCMP(items[4], ".") != 0) 2731 { 2732 char_u buf[MAXLINELEN]; 2733 2734 aff_entry->ae_cond = getroom_save(spin, items[4]); 2735 if (*items[0] == 'P') 2736 sprintf((char *)buf, "^%s", items[4]); 2737 else 2738 sprintf((char *)buf, "%s$", items[4]); 2739 aff_entry->ae_prog = vim_regcomp(buf, 2740 RE_MAGIC + RE_STRING + RE_STRICT); 2741 if (aff_entry->ae_prog == NULL) 2742 smsg(_("Broken condition in %s line %d: %s"), 2743 fname, lnum, items[4]); 2744 } 2745 2746 // For postponed prefixes we need an entry in si_prefcond 2747 // for the condition. Use an existing one if possible. 2748 // Can't be done for an affix with flags, ignoring 2749 // COMPOUNDFORBIDFLAG and COMPOUNDPERMITFLAG. 2750 if (*items[0] == 'P' && aff->af_pfxpostpone 2751 && aff_entry->ae_flags == NULL) 2752 { 2753 // When the chop string is one lower-case letter and 2754 // the add string ends in the upper-case letter we set 2755 // the "upper" flag, clear "ae_chop" and remove the 2756 // letters from "ae_add". The condition must either 2757 // be empty or start with the same letter. 2758 if (aff_entry->ae_chop != NULL 2759 && aff_entry->ae_add != NULL 2760 && aff_entry->ae_chop[(*mb_ptr2len)( 2761 aff_entry->ae_chop)] == NUL) 2762 { 2763 int c, c_up; 2764 2765 c = PTR2CHAR(aff_entry->ae_chop); 2766 c_up = SPELL_TOUPPER(c); 2767 if (c_up != c 2768 && (aff_entry->ae_cond == NULL 2769 || PTR2CHAR(aff_entry->ae_cond) == c)) 2770 { 2771 p = aff_entry->ae_add 2772 + STRLEN(aff_entry->ae_add); 2773 MB_PTR_BACK(aff_entry->ae_add, p); 2774 if (PTR2CHAR(p) == c_up) 2775 { 2776 upper = TRUE; 2777 aff_entry->ae_chop = NULL; 2778 *p = NUL; 2779 2780 // The condition is matched with the 2781 // actual word, thus must check for the 2782 // upper-case letter. 2783 if (aff_entry->ae_cond != NULL) 2784 { 2785 char_u buf[MAXLINELEN]; 2786 2787 if (has_mbyte) 2788 { 2789 onecap_copy(items[4], buf, TRUE); 2790 aff_entry->ae_cond = getroom_save( 2791 spin, buf); 2792 } 2793 else 2794 *aff_entry->ae_cond = c_up; 2795 if (aff_entry->ae_cond != NULL) 2796 { 2797 sprintf((char *)buf, "^%s", 2798 aff_entry->ae_cond); 2799 vim_regfree(aff_entry->ae_prog); 2800 aff_entry->ae_prog = vim_regcomp( 2801 buf, RE_MAGIC + RE_STRING); 2802 } 2803 } 2804 } 2805 } 2806 } 2807 2808 if (aff_entry->ae_chop == NULL 2809 && aff_entry->ae_flags == NULL) 2810 { 2811 int idx; 2812 char_u **pp; 2813 int n; 2814 2815 // Find a previously used condition. 2816 for (idx = spin->si_prefcond.ga_len - 1; idx >= 0; 2817 --idx) 2818 { 2819 p = ((char_u **)spin->si_prefcond.ga_data)[idx]; 2820 if (str_equal(p, aff_entry->ae_cond)) 2821 break; 2822 } 2823 if (idx < 0 && ga_grow(&spin->si_prefcond, 1) == OK) 2824 { 2825 // Not found, add a new condition. 2826 idx = spin->si_prefcond.ga_len++; 2827 pp = ((char_u **)spin->si_prefcond.ga_data) 2828 + idx; 2829 if (aff_entry->ae_cond == NULL) 2830 *pp = NULL; 2831 else 2832 *pp = getroom_save(spin, 2833 aff_entry->ae_cond); 2834 } 2835 2836 // Add the prefix to the prefix tree. 2837 if (aff_entry->ae_add == NULL) 2838 p = (char_u *)""; 2839 else 2840 p = aff_entry->ae_add; 2841 2842 // PFX_FLAGS is a negative number, so that 2843 // tree_add_word() knows this is the prefix tree. 2844 n = PFX_FLAGS; 2845 if (!cur_aff->ah_combine) 2846 n |= WFP_NC; 2847 if (upper) 2848 n |= WFP_UP; 2849 if (aff_entry->ae_comppermit) 2850 n |= WFP_COMPPERMIT; 2851 if (aff_entry->ae_compforbid) 2852 n |= WFP_COMPFORBID; 2853 tree_add_word(spin, p, spin->si_prefroot, n, 2854 idx, cur_aff->ah_newID); 2855 did_postpone_prefix = TRUE; 2856 } 2857 2858 // Didn't actually use ah_newID, backup si_newprefID. 2859 if (aff_todo == 0 && !did_postpone_prefix) 2860 { 2861 --spin->si_newprefID; 2862 cur_aff->ah_newID = 0; 2863 } 2864 } 2865 } 2866 } 2867 else if (is_aff_rule(items, itemcnt, "FOL", 2) && fol == NULL) 2868 { 2869 fol = vim_strsave(items[1]); 2870 } 2871 else if (is_aff_rule(items, itemcnt, "LOW", 2) && low == NULL) 2872 { 2873 low = vim_strsave(items[1]); 2874 } 2875 else if (is_aff_rule(items, itemcnt, "UPP", 2) && upp == NULL) 2876 { 2877 upp = vim_strsave(items[1]); 2878 } 2879 else if (is_aff_rule(items, itemcnt, "REP", 2) 2880 || is_aff_rule(items, itemcnt, "REPSAL", 2)) 2881 { 2882 // Ignore REP/REPSAL count 2883 if (!isdigit(*items[1])) 2884 smsg(_("Expected REP(SAL) count in %s line %d"), 2885 fname, lnum); 2886 } 2887 else if ((STRCMP(items[0], "REP") == 0 2888 || STRCMP(items[0], "REPSAL") == 0) 2889 && itemcnt >= 3) 2890 { 2891 // REP/REPSAL item 2892 // Myspell ignores extra arguments, we require it starts with 2893 // # to detect mistakes. 2894 if (itemcnt > 3 && items[3][0] != '#') 2895 smsg(_(e_afftrailing), fname, lnum, items[3]); 2896 if (items[0][3] == 'S' ? do_repsal : do_rep) 2897 { 2898 // Replace underscore with space (can't include a space 2899 // directly). 2900 for (p = items[1]; *p != NUL; MB_PTR_ADV(p)) 2901 if (*p == '_') 2902 *p = ' '; 2903 for (p = items[2]; *p != NUL; MB_PTR_ADV(p)) 2904 if (*p == '_') 2905 *p = ' '; 2906 add_fromto(spin, items[0][3] == 'S' 2907 ? &spin->si_repsal 2908 : &spin->si_rep, items[1], items[2]); 2909 } 2910 } 2911 else if (is_aff_rule(items, itemcnt, "MAP", 2)) 2912 { 2913 // MAP item or count 2914 if (!found_map) 2915 { 2916 // First line contains the count. 2917 found_map = TRUE; 2918 if (!isdigit(*items[1])) 2919 smsg(_("Expected MAP count in %s line %d"), 2920 fname, lnum); 2921 } 2922 else if (do_mapline) 2923 { 2924 int c; 2925 2926 // Check that every character appears only once. 2927 for (p = items[1]; *p != NUL; ) 2928 { 2929 c = mb_ptr2char_adv(&p); 2930 if ((spin->si_map.ga_len > 0 2931 && vim_strchr(spin->si_map.ga_data, c) 2932 != NULL) 2933 || vim_strchr(p, c) != NULL) 2934 smsg(_("Duplicate character in MAP in %s line %d"), 2935 fname, lnum); 2936 } 2937 2938 // We simply concatenate all the MAP strings, separated by 2939 // slashes. 2940 ga_concat(&spin->si_map, items[1]); 2941 ga_append(&spin->si_map, '/'); 2942 } 2943 } 2944 // Accept "SAL from to" and "SAL from to #comment". 2945 else if (is_aff_rule(items, itemcnt, "SAL", 3)) 2946 { 2947 if (do_sal) 2948 { 2949 // SAL item (sounds-a-like) 2950 // Either one of the known keys or a from-to pair. 2951 if (STRCMP(items[1], "followup") == 0) 2952 spin->si_followup = sal_to_bool(items[2]); 2953 else if (STRCMP(items[1], "collapse_result") == 0) 2954 spin->si_collapse = sal_to_bool(items[2]); 2955 else if (STRCMP(items[1], "remove_accents") == 0) 2956 spin->si_rem_accents = sal_to_bool(items[2]); 2957 else 2958 // when "to" is "_" it means empty 2959 add_fromto(spin, &spin->si_sal, items[1], 2960 STRCMP(items[2], "_") == 0 ? (char_u *)"" 2961 : items[2]); 2962 } 2963 } 2964 else if (is_aff_rule(items, itemcnt, "SOFOFROM", 2) 2965 && sofofrom == NULL) 2966 { 2967 sofofrom = getroom_save(spin, items[1]); 2968 } 2969 else if (is_aff_rule(items, itemcnt, "SOFOTO", 2) 2970 && sofoto == NULL) 2971 { 2972 sofoto = getroom_save(spin, items[1]); 2973 } 2974 else if (STRCMP(items[0], "COMMON") == 0) 2975 { 2976 int i; 2977 2978 for (i = 1; i < itemcnt; ++i) 2979 { 2980 if (HASHITEM_EMPTY(hash_find(&spin->si_commonwords, 2981 items[i]))) 2982 { 2983 p = vim_strsave(items[i]); 2984 if (p == NULL) 2985 break; 2986 hash_add(&spin->si_commonwords, p); 2987 } 2988 } 2989 } 2990 else 2991 smsg(_("Unrecognized or duplicate item in %s line %d: %s"), 2992 fname, lnum, items[0]); 2993 } 2994 } 2995 2996 if (fol != NULL || low != NULL || upp != NULL) 2997 { 2998 if (spin->si_clear_chartab) 2999 { 3000 // Clear the char type tables, don't want to use any of the 3001 // currently used spell properties. 3002 init_spell_chartab(); 3003 spin->si_clear_chartab = FALSE; 3004 } 3005 3006 /* 3007 * Don't write a word table for an ASCII file, so that we don't check 3008 * for conflicts with a word table that matches 'encoding'. 3009 * Don't write one for utf-8 either, we use utf_*() and 3010 * mb_get_class(), the list of chars in the file will be incomplete. 3011 */ 3012 if (!spin->si_ascii && !enc_utf8) 3013 { 3014 if (fol == NULL || low == NULL || upp == NULL) 3015 smsg(_("Missing FOL/LOW/UPP line in %s"), fname); 3016 else 3017 (void)set_spell_chartab(fol, low, upp); 3018 } 3019 3020 vim_free(fol); 3021 vim_free(low); 3022 vim_free(upp); 3023 } 3024 3025 // Use compound specifications of the .aff file for the spell info. 3026 if (compmax != 0) 3027 { 3028 aff_check_number(spin->si_compmax, compmax, "COMPOUNDWORDMAX"); 3029 spin->si_compmax = compmax; 3030 } 3031 3032 if (compminlen != 0) 3033 { 3034 aff_check_number(spin->si_compminlen, compminlen, "COMPOUNDMIN"); 3035 spin->si_compminlen = compminlen; 3036 } 3037 3038 if (compsylmax != 0) 3039 { 3040 if (syllable == NULL) 3041 smsg(_("COMPOUNDSYLMAX used without SYLLABLE")); 3042 aff_check_number(spin->si_compsylmax, compsylmax, "COMPOUNDSYLMAX"); 3043 spin->si_compsylmax = compsylmax; 3044 } 3045 3046 if (compoptions != 0) 3047 { 3048 aff_check_number(spin->si_compoptions, compoptions, "COMPOUND options"); 3049 spin->si_compoptions |= compoptions; 3050 } 3051 3052 if (compflags != NULL) 3053 process_compflags(spin, aff, compflags); 3054 3055 // Check that we didn't use too many renumbered flags. 3056 if (spin->si_newcompID < spin->si_newprefID) 3057 { 3058 if (spin->si_newcompID == 127 || spin->si_newcompID == 255) 3059 msg(_("Too many postponed prefixes")); 3060 else if (spin->si_newprefID == 0 || spin->si_newprefID == 127) 3061 msg(_("Too many compound flags")); 3062 else 3063 msg(_("Too many postponed prefixes and/or compound flags")); 3064 } 3065 3066 if (syllable != NULL) 3067 { 3068 aff_check_string(spin->si_syllable, syllable, "SYLLABLE"); 3069 spin->si_syllable = syllable; 3070 } 3071 3072 if (sofofrom != NULL || sofoto != NULL) 3073 { 3074 if (sofofrom == NULL || sofoto == NULL) 3075 smsg(_("Missing SOFO%s line in %s"), 3076 sofofrom == NULL ? "FROM" : "TO", fname); 3077 else if (spin->si_sal.ga_len > 0) 3078 smsg(_("Both SAL and SOFO lines in %s"), fname); 3079 else 3080 { 3081 aff_check_string(spin->si_sofofr, sofofrom, "SOFOFROM"); 3082 aff_check_string(spin->si_sofoto, sofoto, "SOFOTO"); 3083 spin->si_sofofr = sofofrom; 3084 spin->si_sofoto = sofoto; 3085 } 3086 } 3087 3088 if (midword != NULL) 3089 { 3090 aff_check_string(spin->si_midword, midword, "MIDWORD"); 3091 spin->si_midword = midword; 3092 } 3093 3094 vim_free(pc); 3095 fclose(fd); 3096 return aff; 3097 } 3098 3099 /* 3100 * Return TRUE when items[0] equals "rulename", there are "mincount" items or 3101 * a comment is following after item "mincount". 3102 */ 3103 static int 3104 is_aff_rule( 3105 char_u **items, 3106 int itemcnt, 3107 char *rulename, 3108 int mincount) 3109 { 3110 return (STRCMP(items[0], rulename) == 0 3111 && (itemcnt == mincount 3112 || (itemcnt > mincount && items[mincount][0] == '#'))); 3113 } 3114 3115 /* 3116 * For affix "entry" move COMPOUNDFORBIDFLAG and COMPOUNDPERMITFLAG from 3117 * ae_flags to ae_comppermit and ae_compforbid. 3118 */ 3119 static void 3120 aff_process_flags(afffile_T *affile, affentry_T *entry) 3121 { 3122 char_u *p; 3123 char_u *prevp; 3124 unsigned flag; 3125 3126 if (entry->ae_flags != NULL 3127 && (affile->af_compforbid != 0 || affile->af_comppermit != 0)) 3128 { 3129 for (p = entry->ae_flags; *p != NUL; ) 3130 { 3131 prevp = p; 3132 flag = get_affitem(affile->af_flagtype, &p); 3133 if (flag == affile->af_comppermit || flag == affile->af_compforbid) 3134 { 3135 STRMOVE(prevp, p); 3136 p = prevp; 3137 if (flag == affile->af_comppermit) 3138 entry->ae_comppermit = TRUE; 3139 else 3140 entry->ae_compforbid = TRUE; 3141 } 3142 if (affile->af_flagtype == AFT_NUM && *p == ',') 3143 ++p; 3144 } 3145 if (*entry->ae_flags == NUL) 3146 entry->ae_flags = NULL; // nothing left 3147 } 3148 } 3149 3150 /* 3151 * Return TRUE if "s" is the name of an info item in the affix file. 3152 */ 3153 static int 3154 spell_info_item(char_u *s) 3155 { 3156 return STRCMP(s, "NAME") == 0 3157 || STRCMP(s, "HOME") == 0 3158 || STRCMP(s, "VERSION") == 0 3159 || STRCMP(s, "AUTHOR") == 0 3160 || STRCMP(s, "EMAIL") == 0 3161 || STRCMP(s, "COPYRIGHT") == 0; 3162 } 3163 3164 /* 3165 * Turn an affix flag name into a number, according to the FLAG type. 3166 * returns zero for failure. 3167 */ 3168 static unsigned 3169 affitem2flag( 3170 int flagtype, 3171 char_u *item, 3172 char_u *fname, 3173 int lnum) 3174 { 3175 unsigned res; 3176 char_u *p = item; 3177 3178 res = get_affitem(flagtype, &p); 3179 if (res == 0) 3180 { 3181 if (flagtype == AFT_NUM) 3182 smsg(_("Flag is not a number in %s line %d: %s"), 3183 fname, lnum, item); 3184 else 3185 smsg(_("Illegal flag in %s line %d: %s"), 3186 fname, lnum, item); 3187 } 3188 if (*p != NUL) 3189 { 3190 smsg(_(e_affname), fname, lnum, item); 3191 return 0; 3192 } 3193 3194 return res; 3195 } 3196 3197 /* 3198 * Get one affix name from "*pp" and advance the pointer. 3199 * Returns ZERO_FLAG for "0". 3200 * Returns zero for an error, still advances the pointer then. 3201 */ 3202 static unsigned 3203 get_affitem(int flagtype, char_u **pp) 3204 { 3205 int res; 3206 3207 if (flagtype == AFT_NUM) 3208 { 3209 if (!VIM_ISDIGIT(**pp)) 3210 { 3211 ++*pp; // always advance, avoid getting stuck 3212 return 0; 3213 } 3214 res = getdigits(pp); 3215 if (res == 0) 3216 res = ZERO_FLAG; 3217 } 3218 else 3219 { 3220 res = mb_ptr2char_adv(pp); 3221 if (flagtype == AFT_LONG || (flagtype == AFT_CAPLONG 3222 && res >= 'A' && res <= 'Z')) 3223 { 3224 if (**pp == NUL) 3225 return 0; 3226 res = mb_ptr2char_adv(pp) + (res << 16); 3227 } 3228 } 3229 return res; 3230 } 3231 3232 /* 3233 * Process the "compflags" string used in an affix file and append it to 3234 * spin->si_compflags. 3235 * The processing involves changing the affix names to ID numbers, so that 3236 * they fit in one byte. 3237 */ 3238 static void 3239 process_compflags( 3240 spellinfo_T *spin, 3241 afffile_T *aff, 3242 char_u *compflags) 3243 { 3244 char_u *p; 3245 char_u *prevp; 3246 unsigned flag; 3247 compitem_T *ci; 3248 int id; 3249 int len; 3250 char_u *tp; 3251 char_u key[AH_KEY_LEN]; 3252 hashitem_T *hi; 3253 3254 // Make room for the old and the new compflags, concatenated with a / in 3255 // between. Processing it makes it shorter, but we don't know by how 3256 // much, thus allocate the maximum. 3257 len = (int)STRLEN(compflags) + 1; 3258 if (spin->si_compflags != NULL) 3259 len += (int)STRLEN(spin->si_compflags) + 1; 3260 p = getroom(spin, len, FALSE); 3261 if (p == NULL) 3262 return; 3263 if (spin->si_compflags != NULL) 3264 { 3265 STRCPY(p, spin->si_compflags); 3266 STRCAT(p, "/"); 3267 } 3268 spin->si_compflags = p; 3269 tp = p + STRLEN(p); 3270 3271 for (p = compflags; *p != NUL; ) 3272 { 3273 if (vim_strchr((char_u *)"/?*+[]", *p) != NULL) 3274 // Copy non-flag characters directly. 3275 *tp++ = *p++; 3276 else 3277 { 3278 // First get the flag number, also checks validity. 3279 prevp = p; 3280 flag = get_affitem(aff->af_flagtype, &p); 3281 if (flag != 0) 3282 { 3283 // Find the flag in the hashtable. If it was used before, use 3284 // the existing ID. Otherwise add a new entry. 3285 vim_strncpy(key, prevp, p - prevp); 3286 hi = hash_find(&aff->af_comp, key); 3287 if (!HASHITEM_EMPTY(hi)) 3288 id = HI2CI(hi)->ci_newID; 3289 else 3290 { 3291 ci = (compitem_T *)getroom(spin, sizeof(compitem_T), TRUE); 3292 if (ci == NULL) 3293 break; 3294 STRCPY(ci->ci_key, key); 3295 ci->ci_flag = flag; 3296 // Avoid using a flag ID that has a special meaning in a 3297 // regexp (also inside []). 3298 do 3299 { 3300 check_renumber(spin); 3301 id = spin->si_newcompID--; 3302 } while (vim_strchr((char_u *)"/?*+[]\\-^", id) != NULL); 3303 ci->ci_newID = id; 3304 hash_add(&aff->af_comp, ci->ci_key); 3305 } 3306 *tp++ = id; 3307 } 3308 if (aff->af_flagtype == AFT_NUM && *p == ',') 3309 ++p; 3310 } 3311 } 3312 3313 *tp = NUL; 3314 } 3315 3316 /* 3317 * Check that the new IDs for postponed affixes and compounding don't overrun 3318 * each other. We have almost 255 available, but start at 0-127 to avoid 3319 * using two bytes for utf-8. When the 0-127 range is used up go to 128-255. 3320 * When that is used up an error message is given. 3321 */ 3322 static void 3323 check_renumber(spellinfo_T *spin) 3324 { 3325 if (spin->si_newprefID == spin->si_newcompID && spin->si_newcompID < 128) 3326 { 3327 spin->si_newprefID = 127; 3328 spin->si_newcompID = 255; 3329 } 3330 } 3331 3332 /* 3333 * Return TRUE if flag "flag" appears in affix list "afflist". 3334 */ 3335 static int 3336 flag_in_afflist(int flagtype, char_u *afflist, unsigned flag) 3337 { 3338 char_u *p; 3339 unsigned n; 3340 3341 switch (flagtype) 3342 { 3343 case AFT_CHAR: 3344 return vim_strchr(afflist, flag) != NULL; 3345 3346 case AFT_CAPLONG: 3347 case AFT_LONG: 3348 for (p = afflist; *p != NUL; ) 3349 { 3350 n = mb_ptr2char_adv(&p); 3351 if ((flagtype == AFT_LONG || (n >= 'A' && n <= 'Z')) 3352 && *p != NUL) 3353 n = mb_ptr2char_adv(&p) + (n << 16); 3354 if (n == flag) 3355 return TRUE; 3356 } 3357 break; 3358 3359 case AFT_NUM: 3360 for (p = afflist; *p != NUL; ) 3361 { 3362 n = getdigits(&p); 3363 if (n == 0) 3364 n = ZERO_FLAG; 3365 if (n == flag) 3366 return TRUE; 3367 if (*p != NUL) // skip over comma 3368 ++p; 3369 } 3370 break; 3371 } 3372 return FALSE; 3373 } 3374 3375 /* 3376 * Give a warning when "spinval" and "affval" numbers are set and not the same. 3377 */ 3378 static void 3379 aff_check_number(int spinval, int affval, char *name) 3380 { 3381 if (spinval != 0 && spinval != affval) 3382 smsg(_("%s value differs from what is used in another .aff file"), name); 3383 } 3384 3385 /* 3386 * Give a warning when "spinval" and "affval" strings are set and not the same. 3387 */ 3388 static void 3389 aff_check_string(char_u *spinval, char_u *affval, char *name) 3390 { 3391 if (spinval != NULL && STRCMP(spinval, affval) != 0) 3392 smsg(_("%s value differs from what is used in another .aff file"), name); 3393 } 3394 3395 /* 3396 * Return TRUE if strings "s1" and "s2" are equal. Also consider both being 3397 * NULL as equal. 3398 */ 3399 static int 3400 str_equal(char_u *s1, char_u *s2) 3401 { 3402 if (s1 == NULL || s2 == NULL) 3403 return s1 == s2; 3404 return STRCMP(s1, s2) == 0; 3405 } 3406 3407 /* 3408 * Add a from-to item to "gap". Used for REP and SAL items. 3409 * They are stored case-folded. 3410 */ 3411 static void 3412 add_fromto( 3413 spellinfo_T *spin, 3414 garray_T *gap, 3415 char_u *from, 3416 char_u *to) 3417 { 3418 fromto_T *ftp; 3419 char_u word[MAXWLEN]; 3420 3421 if (ga_grow(gap, 1) == OK) 3422 { 3423 ftp = ((fromto_T *)gap->ga_data) + gap->ga_len; 3424 (void)spell_casefold(from, (int)STRLEN(from), word, MAXWLEN); 3425 ftp->ft_from = getroom_save(spin, word); 3426 (void)spell_casefold(to, (int)STRLEN(to), word, MAXWLEN); 3427 ftp->ft_to = getroom_save(spin, word); 3428 ++gap->ga_len; 3429 } 3430 } 3431 3432 /* 3433 * Convert a boolean argument in a SAL line to TRUE or FALSE; 3434 */ 3435 static int 3436 sal_to_bool(char_u *s) 3437 { 3438 return STRCMP(s, "1") == 0 || STRCMP(s, "true") == 0; 3439 } 3440 3441 /* 3442 * Free the structure filled by spell_read_aff(). 3443 */ 3444 static void 3445 spell_free_aff(afffile_T *aff) 3446 { 3447 hashtab_T *ht; 3448 hashitem_T *hi; 3449 int todo; 3450 affheader_T *ah; 3451 affentry_T *ae; 3452 3453 vim_free(aff->af_enc); 3454 3455 // All this trouble to free the "ae_prog" items... 3456 for (ht = &aff->af_pref; ; ht = &aff->af_suff) 3457 { 3458 todo = (int)ht->ht_used; 3459 for (hi = ht->ht_array; todo > 0; ++hi) 3460 { 3461 if (!HASHITEM_EMPTY(hi)) 3462 { 3463 --todo; 3464 ah = HI2AH(hi); 3465 for (ae = ah->ah_first; ae != NULL; ae = ae->ae_next) 3466 vim_regfree(ae->ae_prog); 3467 } 3468 } 3469 if (ht == &aff->af_suff) 3470 break; 3471 } 3472 3473 hash_clear(&aff->af_pref); 3474 hash_clear(&aff->af_suff); 3475 hash_clear(&aff->af_comp); 3476 } 3477 3478 /* 3479 * Read dictionary file "fname". 3480 * Returns OK or FAIL; 3481 */ 3482 static int 3483 spell_read_dic(spellinfo_T *spin, char_u *fname, afffile_T *affile) 3484 { 3485 hashtab_T ht; 3486 char_u line[MAXLINELEN]; 3487 char_u *p; 3488 char_u *afflist; 3489 char_u store_afflist[MAXWLEN]; 3490 int pfxlen; 3491 int need_affix; 3492 char_u *dw; 3493 char_u *pc; 3494 char_u *w; 3495 int l; 3496 hash_T hash; 3497 hashitem_T *hi; 3498 FILE *fd; 3499 int lnum = 1; 3500 int non_ascii = 0; 3501 int retval = OK; 3502 char_u message[MAXLINELEN + MAXWLEN]; 3503 int flags; 3504 int duplicate = 0; 3505 time_T last_msg_time = 0; 3506 3507 /* 3508 * Open the file. 3509 */ 3510 fd = mch_fopen((char *)fname, "r"); 3511 if (fd == NULL) 3512 { 3513 semsg(_(e_notopen), fname); 3514 return FAIL; 3515 } 3516 3517 // The hashtable is only used to detect duplicated words. 3518 hash_init(&ht); 3519 3520 vim_snprintf((char *)IObuff, IOSIZE, 3521 _("Reading dictionary file %s..."), fname); 3522 spell_message(spin, IObuff); 3523 3524 // start with a message for the first line 3525 spin->si_msg_count = 999999; 3526 3527 // Read and ignore the first line: word count. 3528 (void)vim_fgets(line, MAXLINELEN, fd); 3529 if (!vim_isdigit(*skipwhite(line))) 3530 semsg(_("E760: No word count in %s"), fname); 3531 3532 /* 3533 * Read all the lines in the file one by one. 3534 * The words are converted to 'encoding' here, before being added to 3535 * the hashtable. 3536 */ 3537 while (!vim_fgets(line, MAXLINELEN, fd) && !got_int) 3538 { 3539 line_breakcheck(); 3540 ++lnum; 3541 if (line[0] == '#' || line[0] == '/') 3542 continue; // comment line 3543 3544 // Remove CR, LF and white space from the end. White space halfway 3545 // the word is kept to allow e.g., "et al.". 3546 l = (int)STRLEN(line); 3547 while (l > 0 && line[l - 1] <= ' ') 3548 --l; 3549 if (l == 0) 3550 continue; // empty line 3551 line[l] = NUL; 3552 3553 // Convert from "SET" to 'encoding' when needed. 3554 if (spin->si_conv.vc_type != CONV_NONE) 3555 { 3556 pc = string_convert(&spin->si_conv, line, NULL); 3557 if (pc == NULL) 3558 { 3559 smsg(_("Conversion failure for word in %s line %d: %s"), 3560 fname, lnum, line); 3561 continue; 3562 } 3563 w = pc; 3564 } 3565 else 3566 { 3567 pc = NULL; 3568 w = line; 3569 } 3570 3571 // Truncate the word at the "/", set "afflist" to what follows. 3572 // Replace "\/" by "/" and "\\" by "\". 3573 afflist = NULL; 3574 for (p = w; *p != NUL; MB_PTR_ADV(p)) 3575 { 3576 if (*p == '\\' && (p[1] == '\\' || p[1] == '/')) 3577 STRMOVE(p, p + 1); 3578 else if (*p == '/') 3579 { 3580 *p = NUL; 3581 afflist = p + 1; 3582 break; 3583 } 3584 } 3585 3586 // Skip non-ASCII words when "spin->si_ascii" is TRUE. 3587 if (spin->si_ascii && has_non_ascii(w)) 3588 { 3589 ++non_ascii; 3590 vim_free(pc); 3591 continue; 3592 } 3593 3594 // This takes time, print a message every 10000 words, but not more 3595 // often than once per second. 3596 if (spin->si_verbose && spin->si_msg_count > 10000) 3597 { 3598 spin->si_msg_count = 0; 3599 if (vim_time() > last_msg_time) 3600 { 3601 last_msg_time = vim_time(); 3602 vim_snprintf((char *)message, sizeof(message), 3603 _("line %6d, word %6ld - %s"), 3604 lnum, spin->si_foldwcount + spin->si_keepwcount, w); 3605 msg_start(); 3606 msg_outtrans_long_attr(message, 0); 3607 msg_clr_eos(); 3608 msg_didout = FALSE; 3609 msg_col = 0; 3610 out_flush(); 3611 } 3612 } 3613 3614 // Store the word in the hashtable to be able to find duplicates. 3615 dw = (char_u *)getroom_save(spin, w); 3616 if (dw == NULL) 3617 { 3618 retval = FAIL; 3619 vim_free(pc); 3620 break; 3621 } 3622 3623 hash = hash_hash(dw); 3624 hi = hash_lookup(&ht, dw, hash); 3625 if (!HASHITEM_EMPTY(hi)) 3626 { 3627 if (p_verbose > 0) 3628 smsg(_("Duplicate word in %s line %d: %s"), 3629 fname, lnum, dw); 3630 else if (duplicate == 0) 3631 smsg(_("First duplicate word in %s line %d: %s"), 3632 fname, lnum, dw); 3633 ++duplicate; 3634 } 3635 else 3636 hash_add_item(&ht, hi, dw, hash); 3637 3638 flags = 0; 3639 store_afflist[0] = NUL; 3640 pfxlen = 0; 3641 need_affix = FALSE; 3642 if (afflist != NULL) 3643 { 3644 // Extract flags from the affix list. 3645 flags |= get_affix_flags(affile, afflist); 3646 3647 if (affile->af_needaffix != 0 && flag_in_afflist( 3648 affile->af_flagtype, afflist, affile->af_needaffix)) 3649 need_affix = TRUE; 3650 3651 if (affile->af_pfxpostpone) 3652 // Need to store the list of prefix IDs with the word. 3653 pfxlen = get_pfxlist(affile, afflist, store_afflist); 3654 3655 if (spin->si_compflags != NULL) 3656 // Need to store the list of compound flags with the word. 3657 // Concatenate them to the list of prefix IDs. 3658 get_compflags(affile, afflist, store_afflist + pfxlen); 3659 } 3660 3661 // Add the word to the word tree(s). 3662 if (store_word(spin, dw, flags, spin->si_region, 3663 store_afflist, need_affix) == FAIL) 3664 retval = FAIL; 3665 3666 if (afflist != NULL) 3667 { 3668 // Find all matching suffixes and add the resulting words. 3669 // Additionally do matching prefixes that combine. 3670 if (store_aff_word(spin, dw, afflist, affile, 3671 &affile->af_suff, &affile->af_pref, 3672 CONDIT_SUF, flags, store_afflist, pfxlen) == FAIL) 3673 retval = FAIL; 3674 3675 // Find all matching prefixes and add the resulting words. 3676 if (store_aff_word(spin, dw, afflist, affile, 3677 &affile->af_pref, NULL, 3678 CONDIT_SUF, flags, store_afflist, pfxlen) == FAIL) 3679 retval = FAIL; 3680 } 3681 3682 vim_free(pc); 3683 } 3684 3685 if (duplicate > 0) 3686 smsg(_("%d duplicate word(s) in %s"), duplicate, fname); 3687 if (spin->si_ascii && non_ascii > 0) 3688 smsg(_("Ignored %d word(s) with non-ASCII characters in %s"), 3689 non_ascii, fname); 3690 hash_clear(&ht); 3691 3692 fclose(fd); 3693 return retval; 3694 } 3695 3696 /* 3697 * Check for affix flags in "afflist" that are turned into word flags. 3698 * Return WF_ flags. 3699 */ 3700 static int 3701 get_affix_flags(afffile_T *affile, char_u *afflist) 3702 { 3703 int flags = 0; 3704 3705 if (affile->af_keepcase != 0 && flag_in_afflist( 3706 affile->af_flagtype, afflist, affile->af_keepcase)) 3707 flags |= WF_KEEPCAP | WF_FIXCAP; 3708 if (affile->af_rare != 0 && flag_in_afflist( 3709 affile->af_flagtype, afflist, affile->af_rare)) 3710 flags |= WF_RARE; 3711 if (affile->af_bad != 0 && flag_in_afflist( 3712 affile->af_flagtype, afflist, affile->af_bad)) 3713 flags |= WF_BANNED; 3714 if (affile->af_needcomp != 0 && flag_in_afflist( 3715 affile->af_flagtype, afflist, affile->af_needcomp)) 3716 flags |= WF_NEEDCOMP; 3717 if (affile->af_comproot != 0 && flag_in_afflist( 3718 affile->af_flagtype, afflist, affile->af_comproot)) 3719 flags |= WF_COMPROOT; 3720 if (affile->af_nosuggest != 0 && flag_in_afflist( 3721 affile->af_flagtype, afflist, affile->af_nosuggest)) 3722 flags |= WF_NOSUGGEST; 3723 return flags; 3724 } 3725 3726 /* 3727 * Get the list of prefix IDs from the affix list "afflist". 3728 * Used for PFXPOSTPONE. 3729 * Put the resulting flags in "store_afflist[MAXWLEN]" with a terminating NUL 3730 * and return the number of affixes. 3731 */ 3732 static int 3733 get_pfxlist( 3734 afffile_T *affile, 3735 char_u *afflist, 3736 char_u *store_afflist) 3737 { 3738 char_u *p; 3739 char_u *prevp; 3740 int cnt = 0; 3741 int id; 3742 char_u key[AH_KEY_LEN]; 3743 hashitem_T *hi; 3744 3745 for (p = afflist; *p != NUL; ) 3746 { 3747 prevp = p; 3748 if (get_affitem(affile->af_flagtype, &p) != 0) 3749 { 3750 // A flag is a postponed prefix flag if it appears in "af_pref" 3751 // and its ID is not zero. 3752 vim_strncpy(key, prevp, p - prevp); 3753 hi = hash_find(&affile->af_pref, key); 3754 if (!HASHITEM_EMPTY(hi)) 3755 { 3756 id = HI2AH(hi)->ah_newID; 3757 if (id != 0) 3758 store_afflist[cnt++] = id; 3759 } 3760 } 3761 if (affile->af_flagtype == AFT_NUM && *p == ',') 3762 ++p; 3763 } 3764 3765 store_afflist[cnt] = NUL; 3766 return cnt; 3767 } 3768 3769 /* 3770 * Get the list of compound IDs from the affix list "afflist" that are used 3771 * for compound words. 3772 * Puts the flags in "store_afflist[]". 3773 */ 3774 static void 3775 get_compflags( 3776 afffile_T *affile, 3777 char_u *afflist, 3778 char_u *store_afflist) 3779 { 3780 char_u *p; 3781 char_u *prevp; 3782 int cnt = 0; 3783 char_u key[AH_KEY_LEN]; 3784 hashitem_T *hi; 3785 3786 for (p = afflist; *p != NUL; ) 3787 { 3788 prevp = p; 3789 if (get_affitem(affile->af_flagtype, &p) != 0) 3790 { 3791 // A flag is a compound flag if it appears in "af_comp". 3792 vim_strncpy(key, prevp, p - prevp); 3793 hi = hash_find(&affile->af_comp, key); 3794 if (!HASHITEM_EMPTY(hi)) 3795 store_afflist[cnt++] = HI2CI(hi)->ci_newID; 3796 } 3797 if (affile->af_flagtype == AFT_NUM && *p == ',') 3798 ++p; 3799 } 3800 3801 store_afflist[cnt] = NUL; 3802 } 3803 3804 /* 3805 * Apply affixes to a word and store the resulting words. 3806 * "ht" is the hashtable with affentry_T that need to be applied, either 3807 * prefixes or suffixes. 3808 * "xht", when not NULL, is the prefix hashtable, to be used additionally on 3809 * the resulting words for combining affixes. 3810 * 3811 * Returns FAIL when out of memory. 3812 */ 3813 static int 3814 store_aff_word( 3815 spellinfo_T *spin, // spell info 3816 char_u *word, // basic word start 3817 char_u *afflist, // list of names of supported affixes 3818 afffile_T *affile, 3819 hashtab_T *ht, 3820 hashtab_T *xht, 3821 int condit, // CONDIT_SUF et al. 3822 int flags, // flags for the word 3823 char_u *pfxlist, // list of prefix IDs 3824 int pfxlen) // nr of flags in "pfxlist" for prefixes, rest 3825 // is compound flags 3826 { 3827 int todo; 3828 hashitem_T *hi; 3829 affheader_T *ah; 3830 affentry_T *ae; 3831 char_u newword[MAXWLEN]; 3832 int retval = OK; 3833 int i, j; 3834 char_u *p; 3835 int use_flags; 3836 char_u *use_pfxlist; 3837 int use_pfxlen; 3838 int need_affix; 3839 char_u store_afflist[MAXWLEN]; 3840 char_u pfx_pfxlist[MAXWLEN]; 3841 size_t wordlen = STRLEN(word); 3842 int use_condit; 3843 3844 todo = (int)ht->ht_used; 3845 for (hi = ht->ht_array; todo > 0 && retval == OK; ++hi) 3846 { 3847 if (!HASHITEM_EMPTY(hi)) 3848 { 3849 --todo; 3850 ah = HI2AH(hi); 3851 3852 // Check that the affix combines, if required, and that the word 3853 // supports this affix. 3854 if (((condit & CONDIT_COMB) == 0 || ah->ah_combine) 3855 && flag_in_afflist(affile->af_flagtype, afflist, 3856 ah->ah_flag)) 3857 { 3858 // Loop over all affix entries with this name. 3859 for (ae = ah->ah_first; ae != NULL; ae = ae->ae_next) 3860 { 3861 // Check the condition. It's not logical to match case 3862 // here, but it is required for compatibility with 3863 // Myspell. 3864 // Another requirement from Myspell is that the chop 3865 // string is shorter than the word itself. 3866 // For prefixes, when "PFXPOSTPONE" was used, only do 3867 // prefixes with a chop string and/or flags. 3868 // When a previously added affix had CIRCUMFIX this one 3869 // must have it too, if it had not then this one must not 3870 // have one either. 3871 if ((xht != NULL || !affile->af_pfxpostpone 3872 || ae->ae_chop != NULL 3873 || ae->ae_flags != NULL) 3874 && (ae->ae_chop == NULL 3875 || STRLEN(ae->ae_chop) < wordlen) 3876 && (ae->ae_prog == NULL 3877 || vim_regexec_prog(&ae->ae_prog, FALSE, 3878 word, (colnr_T)0)) 3879 && (((condit & CONDIT_CFIX) == 0) 3880 == ((condit & CONDIT_AFF) == 0 3881 || ae->ae_flags == NULL 3882 || !flag_in_afflist(affile->af_flagtype, 3883 ae->ae_flags, affile->af_circumfix)))) 3884 { 3885 // Match. Remove the chop and add the affix. 3886 if (xht == NULL) 3887 { 3888 // prefix: chop/add at the start of the word 3889 if (ae->ae_add == NULL) 3890 *newword = NUL; 3891 else 3892 vim_strncpy(newword, ae->ae_add, MAXWLEN - 1); 3893 p = word; 3894 if (ae->ae_chop != NULL) 3895 { 3896 // Skip chop string. 3897 if (has_mbyte) 3898 { 3899 i = mb_charlen(ae->ae_chop); 3900 for ( ; i > 0; --i) 3901 MB_PTR_ADV(p); 3902 } 3903 else 3904 p += STRLEN(ae->ae_chop); 3905 } 3906 STRCAT(newword, p); 3907 } 3908 else 3909 { 3910 // suffix: chop/add at the end of the word 3911 vim_strncpy(newword, word, MAXWLEN - 1); 3912 if (ae->ae_chop != NULL) 3913 { 3914 // Remove chop string. 3915 p = newword + STRLEN(newword); 3916 i = (int)MB_CHARLEN(ae->ae_chop); 3917 for ( ; i > 0; --i) 3918 MB_PTR_BACK(newword, p); 3919 *p = NUL; 3920 } 3921 if (ae->ae_add != NULL) 3922 STRCAT(newword, ae->ae_add); 3923 } 3924 3925 use_flags = flags; 3926 use_pfxlist = pfxlist; 3927 use_pfxlen = pfxlen; 3928 need_affix = FALSE; 3929 use_condit = condit | CONDIT_COMB | CONDIT_AFF; 3930 if (ae->ae_flags != NULL) 3931 { 3932 // Extract flags from the affix list. 3933 use_flags |= get_affix_flags(affile, ae->ae_flags); 3934 3935 if (affile->af_needaffix != 0 && flag_in_afflist( 3936 affile->af_flagtype, ae->ae_flags, 3937 affile->af_needaffix)) 3938 need_affix = TRUE; 3939 3940 // When there is a CIRCUMFIX flag the other affix 3941 // must also have it and we don't add the word 3942 // with one affix. 3943 if (affile->af_circumfix != 0 && flag_in_afflist( 3944 affile->af_flagtype, ae->ae_flags, 3945 affile->af_circumfix)) 3946 { 3947 use_condit |= CONDIT_CFIX; 3948 if ((condit & CONDIT_CFIX) == 0) 3949 need_affix = TRUE; 3950 } 3951 3952 if (affile->af_pfxpostpone 3953 || spin->si_compflags != NULL) 3954 { 3955 if (affile->af_pfxpostpone) 3956 // Get prefix IDS from the affix list. 3957 use_pfxlen = get_pfxlist(affile, 3958 ae->ae_flags, store_afflist); 3959 else 3960 use_pfxlen = 0; 3961 use_pfxlist = store_afflist; 3962 3963 // Combine the prefix IDs. Avoid adding the 3964 // same ID twice. 3965 for (i = 0; i < pfxlen; ++i) 3966 { 3967 for (j = 0; j < use_pfxlen; ++j) 3968 if (pfxlist[i] == use_pfxlist[j]) 3969 break; 3970 if (j == use_pfxlen) 3971 use_pfxlist[use_pfxlen++] = pfxlist[i]; 3972 } 3973 3974 if (spin->si_compflags != NULL) 3975 // Get compound IDS from the affix list. 3976 get_compflags(affile, ae->ae_flags, 3977 use_pfxlist + use_pfxlen); 3978 3979 // Combine the list of compound flags. 3980 // Concatenate them to the prefix IDs list. 3981 // Avoid adding the same ID twice. 3982 for (i = pfxlen; pfxlist[i] != NUL; ++i) 3983 { 3984 for (j = use_pfxlen; 3985 use_pfxlist[j] != NUL; ++j) 3986 if (pfxlist[i] == use_pfxlist[j]) 3987 break; 3988 if (use_pfxlist[j] == NUL) 3989 { 3990 use_pfxlist[j++] = pfxlist[i]; 3991 use_pfxlist[j] = NUL; 3992 } 3993 } 3994 } 3995 } 3996 3997 // Obey a "COMPOUNDFORBIDFLAG" of the affix: don't 3998 // use the compound flags. 3999 if (use_pfxlist != NULL && ae->ae_compforbid) 4000 { 4001 vim_strncpy(pfx_pfxlist, use_pfxlist, use_pfxlen); 4002 use_pfxlist = pfx_pfxlist; 4003 } 4004 4005 // When there are postponed prefixes... 4006 if (spin->si_prefroot != NULL 4007 && spin->si_prefroot->wn_sibling != NULL) 4008 { 4009 // ... add a flag to indicate an affix was used. 4010 use_flags |= WF_HAS_AFF; 4011 4012 // ... don't use a prefix list if combining 4013 // affixes is not allowed. But do use the 4014 // compound flags after them. 4015 if (!ah->ah_combine && use_pfxlist != NULL) 4016 use_pfxlist += use_pfxlen; 4017 } 4018 4019 // When compounding is supported and there is no 4020 // "COMPOUNDPERMITFLAG" then forbid compounding on the 4021 // side where the affix is applied. 4022 if (spin->si_compflags != NULL && !ae->ae_comppermit) 4023 { 4024 if (xht != NULL) 4025 use_flags |= WF_NOCOMPAFT; 4026 else 4027 use_flags |= WF_NOCOMPBEF; 4028 } 4029 4030 // Store the modified word. 4031 if (store_word(spin, newword, use_flags, 4032 spin->si_region, use_pfxlist, 4033 need_affix) == FAIL) 4034 retval = FAIL; 4035 4036 // When added a prefix or a first suffix and the affix 4037 // has flags may add a(nother) suffix. RECURSIVE! 4038 if ((condit & CONDIT_SUF) && ae->ae_flags != NULL) 4039 if (store_aff_word(spin, newword, ae->ae_flags, 4040 affile, &affile->af_suff, xht, 4041 use_condit & (xht == NULL 4042 ? ~0 : ~CONDIT_SUF), 4043 use_flags, use_pfxlist, pfxlen) == FAIL) 4044 retval = FAIL; 4045 4046 // When added a suffix and combining is allowed also 4047 // try adding a prefix additionally. Both for the 4048 // word flags and for the affix flags. RECURSIVE! 4049 if (xht != NULL && ah->ah_combine) 4050 { 4051 if (store_aff_word(spin, newword, 4052 afflist, affile, 4053 xht, NULL, use_condit, 4054 use_flags, use_pfxlist, 4055 pfxlen) == FAIL 4056 || (ae->ae_flags != NULL 4057 && store_aff_word(spin, newword, 4058 ae->ae_flags, affile, 4059 xht, NULL, use_condit, 4060 use_flags, use_pfxlist, 4061 pfxlen) == FAIL)) 4062 retval = FAIL; 4063 } 4064 } 4065 } 4066 } 4067 } 4068 } 4069 4070 return retval; 4071 } 4072 4073 /* 4074 * Read a file with a list of words. 4075 */ 4076 static int 4077 spell_read_wordfile(spellinfo_T *spin, char_u *fname) 4078 { 4079 FILE *fd; 4080 long lnum = 0; 4081 char_u rline[MAXLINELEN]; 4082 char_u *line; 4083 char_u *pc = NULL; 4084 char_u *p; 4085 int l; 4086 int retval = OK; 4087 int did_word = FALSE; 4088 int non_ascii = 0; 4089 int flags; 4090 int regionmask; 4091 4092 /* 4093 * Open the file. 4094 */ 4095 fd = mch_fopen((char *)fname, "r"); 4096 if (fd == NULL) 4097 { 4098 semsg(_(e_notopen), fname); 4099 return FAIL; 4100 } 4101 4102 vim_snprintf((char *)IObuff, IOSIZE, _("Reading word file %s..."), fname); 4103 spell_message(spin, IObuff); 4104 4105 /* 4106 * Read all the lines in the file one by one. 4107 */ 4108 while (!vim_fgets(rline, MAXLINELEN, fd) && !got_int) 4109 { 4110 line_breakcheck(); 4111 ++lnum; 4112 4113 // Skip comment lines. 4114 if (*rline == '#') 4115 continue; 4116 4117 // Remove CR, LF and white space from the end. 4118 l = (int)STRLEN(rline); 4119 while (l > 0 && rline[l - 1] <= ' ') 4120 --l; 4121 if (l == 0) 4122 continue; // empty or blank line 4123 rline[l] = NUL; 4124 4125 // Convert from "/encoding={encoding}" to 'encoding' when needed. 4126 vim_free(pc); 4127 if (spin->si_conv.vc_type != CONV_NONE) 4128 { 4129 pc = string_convert(&spin->si_conv, rline, NULL); 4130 if (pc == NULL) 4131 { 4132 smsg(_("Conversion failure for word in %s line %ld: %s"), 4133 fname, lnum, rline); 4134 continue; 4135 } 4136 line = pc; 4137 } 4138 else 4139 { 4140 pc = NULL; 4141 line = rline; 4142 } 4143 4144 if (*line == '/') 4145 { 4146 ++line; 4147 if (STRNCMP(line, "encoding=", 9) == 0) 4148 { 4149 if (spin->si_conv.vc_type != CONV_NONE) 4150 smsg(_("Duplicate /encoding= line ignored in %s line %ld: %s"), 4151 fname, lnum, line - 1); 4152 else if (did_word) 4153 smsg(_("/encoding= line after word ignored in %s line %ld: %s"), 4154 fname, lnum, line - 1); 4155 else 4156 { 4157 char_u *enc; 4158 4159 // Setup for conversion to 'encoding'. 4160 line += 9; 4161 enc = enc_canonize(line); 4162 if (enc != NULL && !spin->si_ascii 4163 && convert_setup(&spin->si_conv, enc, 4164 p_enc) == FAIL) 4165 smsg(_("Conversion in %s not supported: from %s to %s"), 4166 fname, line, p_enc); 4167 vim_free(enc); 4168 spin->si_conv.vc_fail = TRUE; 4169 } 4170 continue; 4171 } 4172 4173 if (STRNCMP(line, "regions=", 8) == 0) 4174 { 4175 if (spin->si_region_count > 1) 4176 smsg(_("Duplicate /regions= line ignored in %s line %ld: %s"), 4177 fname, lnum, line); 4178 else 4179 { 4180 line += 8; 4181 if (STRLEN(line) > MAXREGIONS * 2) 4182 smsg(_("Too many regions in %s line %ld: %s"), 4183 fname, lnum, line); 4184 else 4185 { 4186 spin->si_region_count = (int)STRLEN(line) / 2; 4187 STRCPY(spin->si_region_name, line); 4188 4189 // Adjust the mask for a word valid in all regions. 4190 spin->si_region = (1 << spin->si_region_count) - 1; 4191 } 4192 } 4193 continue; 4194 } 4195 4196 smsg(_("/ line ignored in %s line %ld: %s"), 4197 fname, lnum, line - 1); 4198 continue; 4199 } 4200 4201 flags = 0; 4202 regionmask = spin->si_region; 4203 4204 // Check for flags and region after a slash. 4205 p = vim_strchr(line, '/'); 4206 if (p != NULL) 4207 { 4208 *p++ = NUL; 4209 while (*p != NUL) 4210 { 4211 if (*p == '=') // keep-case word 4212 flags |= WF_KEEPCAP | WF_FIXCAP; 4213 else if (*p == '!') // Bad, bad, wicked word. 4214 flags |= WF_BANNED; 4215 else if (*p == '?') // Rare word. 4216 flags |= WF_RARE; 4217 else if (VIM_ISDIGIT(*p)) // region number(s) 4218 { 4219 if ((flags & WF_REGION) == 0) // first one 4220 regionmask = 0; 4221 flags |= WF_REGION; 4222 4223 l = *p - '0'; 4224 if (l == 0 || l > spin->si_region_count) 4225 { 4226 smsg(_("Invalid region nr in %s line %ld: %s"), 4227 fname, lnum, p); 4228 break; 4229 } 4230 regionmask |= 1 << (l - 1); 4231 } 4232 else 4233 { 4234 smsg(_("Unrecognized flags in %s line %ld: %s"), 4235 fname, lnum, p); 4236 break; 4237 } 4238 ++p; 4239 } 4240 } 4241 4242 // Skip non-ASCII words when "spin->si_ascii" is TRUE. 4243 if (spin->si_ascii && has_non_ascii(line)) 4244 { 4245 ++non_ascii; 4246 continue; 4247 } 4248 4249 // Normal word: store it. 4250 if (store_word(spin, line, flags, regionmask, NULL, FALSE) == FAIL) 4251 { 4252 retval = FAIL; 4253 break; 4254 } 4255 did_word = TRUE; 4256 } 4257 4258 vim_free(pc); 4259 fclose(fd); 4260 4261 if (spin->si_ascii && non_ascii > 0) 4262 { 4263 vim_snprintf((char *)IObuff, IOSIZE, 4264 _("Ignored %d words with non-ASCII characters"), non_ascii); 4265 spell_message(spin, IObuff); 4266 } 4267 4268 return retval; 4269 } 4270 4271 /* 4272 * Get part of an sblock_T, "len" bytes long. 4273 * This avoids calling free() for every little struct we use (and keeping 4274 * track of them). 4275 * The memory is cleared to all zeros. 4276 * Returns NULL when out of memory. 4277 */ 4278 static void * 4279 getroom( 4280 spellinfo_T *spin, 4281 size_t len, // length needed 4282 int align) // align for pointer 4283 { 4284 char_u *p; 4285 sblock_T *bl = spin->si_blocks; 4286 4287 if (align && bl != NULL) 4288 // Round size up for alignment. On some systems structures need to be 4289 // aligned to the size of a pointer (e.g., SPARC). 4290 bl->sb_used = (bl->sb_used + sizeof(char *) - 1) 4291 & ~(sizeof(char *) - 1); 4292 4293 if (bl == NULL || bl->sb_used + len > SBLOCKSIZE) 4294 { 4295 if (len >= SBLOCKSIZE) 4296 bl = NULL; 4297 else 4298 // Allocate a block of memory. It is not freed until much later. 4299 bl = alloc_clear(sizeof(sblock_T) + SBLOCKSIZE); 4300 if (bl == NULL) 4301 { 4302 if (!spin->si_did_emsg) 4303 { 4304 emsg(_("E845: Insufficient memory, word list will be incomplete")); 4305 spin->si_did_emsg = TRUE; 4306 } 4307 return NULL; 4308 } 4309 bl->sb_next = spin->si_blocks; 4310 spin->si_blocks = bl; 4311 bl->sb_used = 0; 4312 ++spin->si_blocks_cnt; 4313 } 4314 4315 p = bl->sb_data + bl->sb_used; 4316 bl->sb_used += (int)len; 4317 4318 return p; 4319 } 4320 4321 /* 4322 * Make a copy of a string into memory allocated with getroom(). 4323 * Returns NULL when out of memory. 4324 */ 4325 static char_u * 4326 getroom_save(spellinfo_T *spin, char_u *s) 4327 { 4328 char_u *sc; 4329 4330 sc = (char_u *)getroom(spin, STRLEN(s) + 1, FALSE); 4331 if (sc != NULL) 4332 STRCPY(sc, s); 4333 return sc; 4334 } 4335 4336 4337 /* 4338 * Free the list of allocated sblock_T. 4339 */ 4340 static void 4341 free_blocks(sblock_T *bl) 4342 { 4343 sblock_T *next; 4344 4345 while (bl != NULL) 4346 { 4347 next = bl->sb_next; 4348 vim_free(bl); 4349 bl = next; 4350 } 4351 } 4352 4353 /* 4354 * Allocate the root of a word tree. 4355 * Returns NULL when out of memory. 4356 */ 4357 static wordnode_T * 4358 wordtree_alloc(spellinfo_T *spin) 4359 { 4360 return (wordnode_T *)getroom(spin, sizeof(wordnode_T), TRUE); 4361 } 4362 4363 /* 4364 * Store a word in the tree(s). 4365 * Always store it in the case-folded tree. For a keep-case word this is 4366 * useful when the word can also be used with all caps (no WF_FIXCAP flag) and 4367 * used to find suggestions. 4368 * For a keep-case word also store it in the keep-case tree. 4369 * When "pfxlist" is not NULL store the word for each postponed prefix ID and 4370 * compound flag. 4371 */ 4372 static int 4373 store_word( 4374 spellinfo_T *spin, 4375 char_u *word, 4376 int flags, // extra flags, WF_BANNED 4377 int region, // supported region(s) 4378 char_u *pfxlist, // list of prefix IDs or NULL 4379 int need_affix) // only store word with affix ID 4380 { 4381 int len = (int)STRLEN(word); 4382 int ct = captype(word, word + len); 4383 char_u foldword[MAXWLEN]; 4384 int res = OK; 4385 char_u *p; 4386 4387 (void)spell_casefold(word, len, foldword, MAXWLEN); 4388 for (p = pfxlist; res == OK; ++p) 4389 { 4390 if (!need_affix || (p != NULL && *p != NUL)) 4391 res = tree_add_word(spin, foldword, spin->si_foldroot, ct | flags, 4392 region, p == NULL ? 0 : *p); 4393 if (p == NULL || *p == NUL) 4394 break; 4395 } 4396 ++spin->si_foldwcount; 4397 4398 if (res == OK && (ct == WF_KEEPCAP || (flags & WF_KEEPCAP))) 4399 { 4400 for (p = pfxlist; res == OK; ++p) 4401 { 4402 if (!need_affix || (p != NULL && *p != NUL)) 4403 res = tree_add_word(spin, word, spin->si_keeproot, flags, 4404 region, p == NULL ? 0 : *p); 4405 if (p == NULL || *p == NUL) 4406 break; 4407 } 4408 ++spin->si_keepwcount; 4409 } 4410 return res; 4411 } 4412 4413 /* 4414 * Add word "word" to a word tree at "root". 4415 * When "flags" < 0 we are adding to the prefix tree where "flags" is used for 4416 * "rare" and "region" is the condition nr. 4417 * Returns FAIL when out of memory. 4418 */ 4419 static int 4420 tree_add_word( 4421 spellinfo_T *spin, 4422 char_u *word, 4423 wordnode_T *root, 4424 int flags, 4425 int region, 4426 int affixID) 4427 { 4428 wordnode_T *node = root; 4429 wordnode_T *np; 4430 wordnode_T *copyp, **copyprev; 4431 wordnode_T **prev = NULL; 4432 int i; 4433 4434 // Add each byte of the word to the tree, including the NUL at the end. 4435 for (i = 0; ; ++i) 4436 { 4437 // When there is more than one reference to this node we need to make 4438 // a copy, so that we can modify it. Copy the whole list of siblings 4439 // (we don't optimize for a partly shared list of siblings). 4440 if (node != NULL && node->wn_refs > 1) 4441 { 4442 --node->wn_refs; 4443 copyprev = prev; 4444 FOR_ALL_NODE_SIBLINGS(node, copyp) 4445 { 4446 // Allocate a new node and copy the info. 4447 np = get_wordnode(spin); 4448 if (np == NULL) 4449 return FAIL; 4450 np->wn_child = copyp->wn_child; 4451 if (np->wn_child != NULL) 4452 ++np->wn_child->wn_refs; // child gets extra ref 4453 np->wn_byte = copyp->wn_byte; 4454 if (np->wn_byte == NUL) 4455 { 4456 np->wn_flags = copyp->wn_flags; 4457 np->wn_region = copyp->wn_region; 4458 np->wn_affixID = copyp->wn_affixID; 4459 } 4460 4461 // Link the new node in the list, there will be one ref. 4462 np->wn_refs = 1; 4463 if (copyprev != NULL) 4464 *copyprev = np; 4465 copyprev = &np->wn_sibling; 4466 4467 // Let "node" point to the head of the copied list. 4468 if (copyp == node) 4469 node = np; 4470 } 4471 } 4472 4473 // Look for the sibling that has the same character. They are sorted 4474 // on byte value, thus stop searching when a sibling is found with a 4475 // higher byte value. For zero bytes (end of word) the sorting is 4476 // done on flags and then on affixID. 4477 while (node != NULL 4478 && (node->wn_byte < word[i] 4479 || (node->wn_byte == NUL 4480 && (flags < 0 4481 ? node->wn_affixID < (unsigned)affixID 4482 : (node->wn_flags < (unsigned)(flags & WN_MASK) 4483 || (node->wn_flags == (flags & WN_MASK) 4484 && (spin->si_sugtree 4485 ? (node->wn_region & 0xffff) < region 4486 : node->wn_affixID 4487 < (unsigned)affixID))))))) 4488 { 4489 prev = &node->wn_sibling; 4490 node = *prev; 4491 } 4492 if (node == NULL 4493 || node->wn_byte != word[i] 4494 || (word[i] == NUL 4495 && (flags < 0 4496 || spin->si_sugtree 4497 || node->wn_flags != (flags & WN_MASK) 4498 || node->wn_affixID != affixID))) 4499 { 4500 // Allocate a new node. 4501 np = get_wordnode(spin); 4502 if (np == NULL) 4503 return FAIL; 4504 np->wn_byte = word[i]; 4505 4506 // If "node" is NULL this is a new child or the end of the sibling 4507 // list: ref count is one. Otherwise use ref count of sibling and 4508 // make ref count of sibling one (matters when inserting in front 4509 // of the list of siblings). 4510 if (node == NULL) 4511 np->wn_refs = 1; 4512 else 4513 { 4514 np->wn_refs = node->wn_refs; 4515 node->wn_refs = 1; 4516 } 4517 if (prev != NULL) 4518 *prev = np; 4519 np->wn_sibling = node; 4520 node = np; 4521 } 4522 4523 if (word[i] == NUL) 4524 { 4525 node->wn_flags = flags; 4526 node->wn_region |= region; 4527 node->wn_affixID = affixID; 4528 break; 4529 } 4530 prev = &node->wn_child; 4531 node = *prev; 4532 } 4533 #ifdef SPELL_PRINTTREE 4534 smsg("Added \"%s\"", word); 4535 spell_print_tree(root->wn_sibling); 4536 #endif 4537 4538 // count nr of words added since last message 4539 ++spin->si_msg_count; 4540 4541 if (spin->si_compress_cnt > 1) 4542 { 4543 if (--spin->si_compress_cnt == 1) 4544 // Did enough words to lower the block count limit. 4545 spin->si_blocks_cnt += compress_inc; 4546 } 4547 4548 /* 4549 * When we have allocated lots of memory we need to compress the word tree 4550 * to free up some room. But compression is slow, and we might actually 4551 * need that room, thus only compress in the following situations: 4552 * 1. When not compressed before (si_compress_cnt == 0): when using 4553 * "compress_start" blocks. 4554 * 2. When compressed before and used "compress_inc" blocks before 4555 * adding "compress_added" words (si_compress_cnt > 1). 4556 * 3. When compressed before, added "compress_added" words 4557 * (si_compress_cnt == 1) and the number of free nodes drops below the 4558 * maximum word length. 4559 */ 4560 #ifndef SPELL_COMPRESS_ALLWAYS 4561 if (spin->si_compress_cnt == 1 4562 ? spin->si_free_count < MAXWLEN 4563 : spin->si_blocks_cnt >= compress_start) 4564 #endif 4565 { 4566 // Decrement the block counter. The effect is that we compress again 4567 // when the freed up room has been used and another "compress_inc" 4568 // blocks have been allocated. Unless "compress_added" words have 4569 // been added, then the limit is put back again. 4570 spin->si_blocks_cnt -= compress_inc; 4571 spin->si_compress_cnt = compress_added; 4572 4573 if (spin->si_verbose) 4574 { 4575 msg_start(); 4576 msg_puts(_(msg_compressing)); 4577 msg_clr_eos(); 4578 msg_didout = FALSE; 4579 msg_col = 0; 4580 out_flush(); 4581 } 4582 4583 // Compress both trees. Either they both have many nodes, which makes 4584 // compression useful, or one of them is small, which means 4585 // compression goes fast. But when filling the soundfold word tree 4586 // there is no keep-case tree. 4587 wordtree_compress(spin, spin->si_foldroot, "case-folded"); 4588 if (affixID >= 0) 4589 wordtree_compress(spin, spin->si_keeproot, "keep-case"); 4590 } 4591 4592 return OK; 4593 } 4594 4595 /* 4596 * Get a wordnode_T, either from the list of previously freed nodes or 4597 * allocate a new one. 4598 * Returns NULL when out of memory. 4599 */ 4600 static wordnode_T * 4601 get_wordnode(spellinfo_T *spin) 4602 { 4603 wordnode_T *n; 4604 4605 if (spin->si_first_free == NULL) 4606 n = (wordnode_T *)getroom(spin, sizeof(wordnode_T), TRUE); 4607 else 4608 { 4609 n = spin->si_first_free; 4610 spin->si_first_free = n->wn_child; 4611 CLEAR_POINTER(n); 4612 --spin->si_free_count; 4613 } 4614 #ifdef SPELL_PRINTTREE 4615 if (n != NULL) 4616 n->wn_nr = ++spin->si_wordnode_nr; 4617 #endif 4618 return n; 4619 } 4620 4621 /* 4622 * Decrement the reference count on a node (which is the head of a list of 4623 * siblings). If the reference count becomes zero free the node and its 4624 * siblings. 4625 * Returns the number of nodes actually freed. 4626 */ 4627 static int 4628 deref_wordnode(spellinfo_T *spin, wordnode_T *node) 4629 { 4630 wordnode_T *np; 4631 int cnt = 0; 4632 4633 if (--node->wn_refs == 0) 4634 { 4635 FOR_ALL_NODE_SIBLINGS(node, np) 4636 { 4637 if (np->wn_child != NULL) 4638 cnt += deref_wordnode(spin, np->wn_child); 4639 free_wordnode(spin, np); 4640 ++cnt; 4641 } 4642 ++cnt; // length field 4643 } 4644 return cnt; 4645 } 4646 4647 /* 4648 * Free a wordnode_T for re-use later. 4649 * Only the "wn_child" field becomes invalid. 4650 */ 4651 static void 4652 free_wordnode(spellinfo_T *spin, wordnode_T *n) 4653 { 4654 n->wn_child = spin->si_first_free; 4655 spin->si_first_free = n; 4656 ++spin->si_free_count; 4657 } 4658 4659 /* 4660 * Compress a tree: find tails that are identical and can be shared. 4661 */ 4662 static void 4663 wordtree_compress(spellinfo_T *spin, wordnode_T *root, char *name) 4664 { 4665 hashtab_T ht; 4666 long n; 4667 long tot = 0; 4668 long perc; 4669 4670 // Skip the root itself, it's not actually used. The first sibling is the 4671 // start of the tree. 4672 if (root->wn_sibling != NULL) 4673 { 4674 hash_init(&ht); 4675 n = node_compress(spin, root->wn_sibling, &ht, &tot); 4676 4677 #ifndef SPELL_PRINTTREE 4678 if (spin->si_verbose || p_verbose > 2) 4679 #endif 4680 { 4681 if (tot > 1000000) 4682 perc = (tot - n) / (tot / 100); 4683 else if (tot == 0) 4684 perc = 0; 4685 else 4686 perc = (tot - n) * 100 / tot; 4687 vim_snprintf((char *)IObuff, IOSIZE, 4688 _("Compressed %s: %ld of %ld nodes; %ld (%ld%%) remaining"), 4689 name, n, tot, tot - n, perc); 4690 spell_message(spin, IObuff); 4691 } 4692 #ifdef SPELL_PRINTTREE 4693 spell_print_tree(root->wn_sibling); 4694 #endif 4695 hash_clear(&ht); 4696 } 4697 } 4698 4699 /* 4700 * Compress a node, its siblings and its children, depth first. 4701 * Returns the number of compressed nodes. 4702 */ 4703 static long 4704 node_compress( 4705 spellinfo_T *spin, 4706 wordnode_T *node, 4707 hashtab_T *ht, 4708 long *tot) // total count of nodes before compressing, 4709 // incremented while going through the tree 4710 { 4711 wordnode_T *np; 4712 wordnode_T *tp; 4713 wordnode_T *child; 4714 hash_T hash; 4715 hashitem_T *hi; 4716 long len = 0; 4717 unsigned nr, n; 4718 long compressed = 0; 4719 4720 /* 4721 * Go through the list of siblings. Compress each child and then try 4722 * finding an identical child to replace it. 4723 * Note that with "child" we mean not just the node that is pointed to, 4724 * but the whole list of siblings of which the child node is the first. 4725 */ 4726 for (np = node; np != NULL && !got_int; np = np->wn_sibling) 4727 { 4728 ++len; 4729 if ((child = np->wn_child) != NULL) 4730 { 4731 // Compress the child first. This fills hashkey. 4732 compressed += node_compress(spin, child, ht, tot); 4733 4734 // Try to find an identical child. 4735 hash = hash_hash(child->wn_u1.hashkey); 4736 hi = hash_lookup(ht, child->wn_u1.hashkey, hash); 4737 if (!HASHITEM_EMPTY(hi)) 4738 { 4739 // There are children we encountered before with a hash value 4740 // identical to the current child. Now check if there is one 4741 // that is really identical. 4742 for (tp = HI2WN(hi); tp != NULL; tp = tp->wn_u2.next) 4743 if (node_equal(child, tp)) 4744 { 4745 // Found one! Now use that child in place of the 4746 // current one. This means the current child and all 4747 // its siblings is unlinked from the tree. 4748 ++tp->wn_refs; 4749 compressed += deref_wordnode(spin, child); 4750 np->wn_child = tp; 4751 break; 4752 } 4753 if (tp == NULL) 4754 { 4755 // No other child with this hash value equals the child of 4756 // the node, add it to the linked list after the first 4757 // item. 4758 tp = HI2WN(hi); 4759 child->wn_u2.next = tp->wn_u2.next; 4760 tp->wn_u2.next = child; 4761 } 4762 } 4763 else 4764 // No other child has this hash value, add it to the 4765 // hashtable. 4766 hash_add_item(ht, hi, child->wn_u1.hashkey, hash); 4767 } 4768 } 4769 *tot += len + 1; // add one for the node that stores the length 4770 4771 /* 4772 * Make a hash key for the node and its siblings, so that we can quickly 4773 * find a lookalike node. This must be done after compressing the sibling 4774 * list, otherwise the hash key would become invalid by the compression. 4775 */ 4776 node->wn_u1.hashkey[0] = len; 4777 nr = 0; 4778 FOR_ALL_NODE_SIBLINGS(node, np) 4779 { 4780 if (np->wn_byte == NUL) 4781 // end node: use wn_flags, wn_region and wn_affixID 4782 n = np->wn_flags + (np->wn_region << 8) + (np->wn_affixID << 16); 4783 else 4784 // byte node: use the byte value and the child pointer 4785 n = (unsigned)(np->wn_byte + ((long_u)np->wn_child << 8)); 4786 nr = nr * 101 + n; 4787 } 4788 4789 // Avoid NUL bytes, it terminates the hash key. 4790 n = nr & 0xff; 4791 node->wn_u1.hashkey[1] = n == 0 ? 1 : n; 4792 n = (nr >> 8) & 0xff; 4793 node->wn_u1.hashkey[2] = n == 0 ? 1 : n; 4794 n = (nr >> 16) & 0xff; 4795 node->wn_u1.hashkey[3] = n == 0 ? 1 : n; 4796 n = (nr >> 24) & 0xff; 4797 node->wn_u1.hashkey[4] = n == 0 ? 1 : n; 4798 node->wn_u1.hashkey[5] = NUL; 4799 4800 // Check for CTRL-C pressed now and then. 4801 veryfast_breakcheck(); 4802 4803 return compressed; 4804 } 4805 4806 /* 4807 * Return TRUE when two nodes have identical siblings and children. 4808 */ 4809 static int 4810 node_equal(wordnode_T *n1, wordnode_T *n2) 4811 { 4812 wordnode_T *p1; 4813 wordnode_T *p2; 4814 4815 for (p1 = n1, p2 = n2; p1 != NULL && p2 != NULL; 4816 p1 = p1->wn_sibling, p2 = p2->wn_sibling) 4817 if (p1->wn_byte != p2->wn_byte 4818 || (p1->wn_byte == NUL 4819 ? (p1->wn_flags != p2->wn_flags 4820 || p1->wn_region != p2->wn_region 4821 || p1->wn_affixID != p2->wn_affixID) 4822 : (p1->wn_child != p2->wn_child))) 4823 break; 4824 4825 return p1 == NULL && p2 == NULL; 4826 } 4827 4828 static int rep_compare(const void *s1, const void *s2); 4829 4830 /* 4831 * Function given to qsort() to sort the REP items on "from" string. 4832 */ 4833 static int 4834 rep_compare(const void *s1, const void *s2) 4835 { 4836 fromto_T *p1 = (fromto_T *)s1; 4837 fromto_T *p2 = (fromto_T *)s2; 4838 4839 return STRCMP(p1->ft_from, p2->ft_from); 4840 } 4841 4842 /* 4843 * Write the Vim .spl file "fname". 4844 * Return FAIL or OK; 4845 */ 4846 static int 4847 write_vim_spell(spellinfo_T *spin, char_u *fname) 4848 { 4849 FILE *fd; 4850 int regionmask; 4851 int round; 4852 wordnode_T *tree; 4853 int nodecount; 4854 int i; 4855 int l; 4856 garray_T *gap; 4857 fromto_T *ftp; 4858 char_u *p; 4859 int rr; 4860 int retval = OK; 4861 size_t fwv = 1; // collect return value of fwrite() to avoid 4862 // warnings from picky compiler 4863 4864 fd = mch_fopen((char *)fname, "w"); 4865 if (fd == NULL) 4866 { 4867 semsg(_(e_notopen), fname); 4868 return FAIL; 4869 } 4870 4871 // <HEADER>: <fileID> <versionnr> 4872 // <fileID> 4873 fwv &= fwrite(VIMSPELLMAGIC, VIMSPELLMAGICL, (size_t)1, fd); 4874 if (fwv != (size_t)1) 4875 // Catch first write error, don't try writing more. 4876 goto theend; 4877 4878 putc(VIMSPELLVERSION, fd); // <versionnr> 4879 4880 /* 4881 * <SECTIONS>: <section> ... <sectionend> 4882 */ 4883 4884 // SN_INFO: <infotext> 4885 if (spin->si_info != NULL) 4886 { 4887 putc(SN_INFO, fd); // <sectionID> 4888 putc(0, fd); // <sectionflags> 4889 4890 i = (int)STRLEN(spin->si_info); 4891 put_bytes(fd, (long_u)i, 4); // <sectionlen> 4892 fwv &= fwrite(spin->si_info, (size_t)i, (size_t)1, fd); // <infotext> 4893 } 4894 4895 // SN_REGION: <regionname> ... 4896 // Write the region names only if there is more than one. 4897 if (spin->si_region_count > 1) 4898 { 4899 putc(SN_REGION, fd); // <sectionID> 4900 putc(SNF_REQUIRED, fd); // <sectionflags> 4901 l = spin->si_region_count * 2; 4902 put_bytes(fd, (long_u)l, 4); // <sectionlen> 4903 fwv &= fwrite(spin->si_region_name, (size_t)l, (size_t)1, fd); 4904 // <regionname> ... 4905 regionmask = (1 << spin->si_region_count) - 1; 4906 } 4907 else 4908 regionmask = 0; 4909 4910 // SN_CHARFLAGS: <charflagslen> <charflags> <folcharslen> <folchars> 4911 // 4912 // The table with character flags and the table for case folding. 4913 // This makes sure the same characters are recognized as word characters 4914 // when generating an when using a spell file. 4915 // Skip this for ASCII, the table may conflict with the one used for 4916 // 'encoding'. 4917 // Also skip this for an .add.spl file, the main spell file must contain 4918 // the table (avoids that it conflicts). File is shorter too. 4919 if (!spin->si_ascii && !spin->si_add) 4920 { 4921 char_u folchars[128 * 8]; 4922 int flags; 4923 4924 putc(SN_CHARFLAGS, fd); // <sectionID> 4925 putc(SNF_REQUIRED, fd); // <sectionflags> 4926 4927 // Form the <folchars> string first, we need to know its length. 4928 l = 0; 4929 for (i = 128; i < 256; ++i) 4930 { 4931 if (has_mbyte) 4932 l += mb_char2bytes(spelltab.st_fold[i], folchars + l); 4933 else 4934 folchars[l++] = spelltab.st_fold[i]; 4935 } 4936 put_bytes(fd, (long_u)(1 + 128 + 2 + l), 4); // <sectionlen> 4937 4938 fputc(128, fd); // <charflagslen> 4939 for (i = 128; i < 256; ++i) 4940 { 4941 flags = 0; 4942 if (spelltab.st_isw[i]) 4943 flags |= CF_WORD; 4944 if (spelltab.st_isu[i]) 4945 flags |= CF_UPPER; 4946 fputc(flags, fd); // <charflags> 4947 } 4948 4949 put_bytes(fd, (long_u)l, 2); // <folcharslen> 4950 fwv &= fwrite(folchars, (size_t)l, (size_t)1, fd); // <folchars> 4951 } 4952 4953 // SN_MIDWORD: <midword> 4954 if (spin->si_midword != NULL) 4955 { 4956 putc(SN_MIDWORD, fd); // <sectionID> 4957 putc(SNF_REQUIRED, fd); // <sectionflags> 4958 4959 i = (int)STRLEN(spin->si_midword); 4960 put_bytes(fd, (long_u)i, 4); // <sectionlen> 4961 fwv &= fwrite(spin->si_midword, (size_t)i, (size_t)1, fd); 4962 // <midword> 4963 } 4964 4965 // SN_PREFCOND: <prefcondcnt> <prefcond> ... 4966 if (spin->si_prefcond.ga_len > 0) 4967 { 4968 putc(SN_PREFCOND, fd); // <sectionID> 4969 putc(SNF_REQUIRED, fd); // <sectionflags> 4970 4971 l = write_spell_prefcond(NULL, &spin->si_prefcond); 4972 put_bytes(fd, (long_u)l, 4); // <sectionlen> 4973 4974 write_spell_prefcond(fd, &spin->si_prefcond); 4975 } 4976 4977 // SN_REP: <repcount> <rep> ... 4978 // SN_SAL: <salflags> <salcount> <sal> ... 4979 // SN_REPSAL: <repcount> <rep> ... 4980 4981 // round 1: SN_REP section 4982 // round 2: SN_SAL section (unless SN_SOFO is used) 4983 // round 3: SN_REPSAL section 4984 for (round = 1; round <= 3; ++round) 4985 { 4986 if (round == 1) 4987 gap = &spin->si_rep; 4988 else if (round == 2) 4989 { 4990 // Don't write SN_SAL when using a SN_SOFO section 4991 if (spin->si_sofofr != NULL && spin->si_sofoto != NULL) 4992 continue; 4993 gap = &spin->si_sal; 4994 } 4995 else 4996 gap = &spin->si_repsal; 4997 4998 // Don't write the section if there are no items. 4999 if (gap->ga_len == 0) 5000 continue; 5001 5002 // Sort the REP/REPSAL items. 5003 if (round != 2) 5004 qsort(gap->ga_data, (size_t)gap->ga_len, 5005 sizeof(fromto_T), rep_compare); 5006 5007 i = round == 1 ? SN_REP : (round == 2 ? SN_SAL : SN_REPSAL); 5008 putc(i, fd); // <sectionID> 5009 5010 // This is for making suggestions, section is not required. 5011 putc(0, fd); // <sectionflags> 5012 5013 // Compute the length of what follows. 5014 l = 2; // count <repcount> or <salcount> 5015 for (i = 0; i < gap->ga_len; ++i) 5016 { 5017 ftp = &((fromto_T *)gap->ga_data)[i]; 5018 l += 1 + (int)STRLEN(ftp->ft_from); // count <*fromlen> and <*from> 5019 l += 1 + (int)STRLEN(ftp->ft_to); // count <*tolen> and <*to> 5020 } 5021 if (round == 2) 5022 ++l; // count <salflags> 5023 put_bytes(fd, (long_u)l, 4); // <sectionlen> 5024 5025 if (round == 2) 5026 { 5027 i = 0; 5028 if (spin->si_followup) 5029 i |= SAL_F0LLOWUP; 5030 if (spin->si_collapse) 5031 i |= SAL_COLLAPSE; 5032 if (spin->si_rem_accents) 5033 i |= SAL_REM_ACCENTS; 5034 putc(i, fd); // <salflags> 5035 } 5036 5037 put_bytes(fd, (long_u)gap->ga_len, 2); // <repcount> or <salcount> 5038 for (i = 0; i < gap->ga_len; ++i) 5039 { 5040 // <rep> : <repfromlen> <repfrom> <reptolen> <repto> 5041 // <sal> : <salfromlen> <salfrom> <saltolen> <salto> 5042 ftp = &((fromto_T *)gap->ga_data)[i]; 5043 for (rr = 1; rr <= 2; ++rr) 5044 { 5045 p = rr == 1 ? ftp->ft_from : ftp->ft_to; 5046 l = (int)STRLEN(p); 5047 putc(l, fd); 5048 if (l > 0) 5049 fwv &= fwrite(p, l, (size_t)1, fd); 5050 } 5051 } 5052 5053 } 5054 5055 // SN_SOFO: <sofofromlen> <sofofrom> <sofotolen> <sofoto> 5056 // This is for making suggestions, section is not required. 5057 if (spin->si_sofofr != NULL && spin->si_sofoto != NULL) 5058 { 5059 putc(SN_SOFO, fd); // <sectionID> 5060 putc(0, fd); // <sectionflags> 5061 5062 l = (int)STRLEN(spin->si_sofofr); 5063 put_bytes(fd, (long_u)(l + STRLEN(spin->si_sofoto) + 4), 4); 5064 // <sectionlen> 5065 5066 put_bytes(fd, (long_u)l, 2); // <sofofromlen> 5067 fwv &= fwrite(spin->si_sofofr, l, (size_t)1, fd); // <sofofrom> 5068 5069 l = (int)STRLEN(spin->si_sofoto); 5070 put_bytes(fd, (long_u)l, 2); // <sofotolen> 5071 fwv &= fwrite(spin->si_sofoto, l, (size_t)1, fd); // <sofoto> 5072 } 5073 5074 // SN_WORDS: <word> ... 5075 // This is for making suggestions, section is not required. 5076 if (spin->si_commonwords.ht_used > 0) 5077 { 5078 putc(SN_WORDS, fd); // <sectionID> 5079 putc(0, fd); // <sectionflags> 5080 5081 // round 1: count the bytes 5082 // round 2: write the bytes 5083 for (round = 1; round <= 2; ++round) 5084 { 5085 int todo; 5086 int len = 0; 5087 hashitem_T *hi; 5088 5089 todo = (int)spin->si_commonwords.ht_used; 5090 for (hi = spin->si_commonwords.ht_array; todo > 0; ++hi) 5091 if (!HASHITEM_EMPTY(hi)) 5092 { 5093 l = (int)STRLEN(hi->hi_key) + 1; 5094 len += l; 5095 if (round == 2) // <word> 5096 fwv &= fwrite(hi->hi_key, (size_t)l, (size_t)1, fd); 5097 --todo; 5098 } 5099 if (round == 1) 5100 put_bytes(fd, (long_u)len, 4); // <sectionlen> 5101 } 5102 } 5103 5104 // SN_MAP: <mapstr> 5105 // This is for making suggestions, section is not required. 5106 if (spin->si_map.ga_len > 0) 5107 { 5108 putc(SN_MAP, fd); // <sectionID> 5109 putc(0, fd); // <sectionflags> 5110 l = spin->si_map.ga_len; 5111 put_bytes(fd, (long_u)l, 4); // <sectionlen> 5112 fwv &= fwrite(spin->si_map.ga_data, (size_t)l, (size_t)1, fd); 5113 // <mapstr> 5114 } 5115 5116 // SN_SUGFILE: <timestamp> 5117 // This is used to notify that a .sug file may be available and at the 5118 // same time allows for checking that a .sug file that is found matches 5119 // with this .spl file. That's because the word numbers must be exactly 5120 // right. 5121 if (!spin->si_nosugfile 5122 && (spin->si_sal.ga_len > 0 5123 || (spin->si_sofofr != NULL && spin->si_sofoto != NULL))) 5124 { 5125 putc(SN_SUGFILE, fd); // <sectionID> 5126 putc(0, fd); // <sectionflags> 5127 put_bytes(fd, (long_u)8, 4); // <sectionlen> 5128 5129 // Set si_sugtime and write it to the file. 5130 spin->si_sugtime = time(NULL); 5131 put_time(fd, spin->si_sugtime); // <timestamp> 5132 } 5133 5134 // SN_NOSPLITSUGS: nothing 5135 // This is used to notify that no suggestions with word splits are to be 5136 // made. 5137 if (spin->si_nosplitsugs) 5138 { 5139 putc(SN_NOSPLITSUGS, fd); // <sectionID> 5140 putc(0, fd); // <sectionflags> 5141 put_bytes(fd, (long_u)0, 4); // <sectionlen> 5142 } 5143 5144 // SN_NOCOMPUNDSUGS: nothing 5145 // This is used to notify that no suggestions with compounds are to be 5146 // made. 5147 if (spin->si_nocompoundsugs) 5148 { 5149 putc(SN_NOCOMPOUNDSUGS, fd); // <sectionID> 5150 putc(0, fd); // <sectionflags> 5151 put_bytes(fd, (long_u)0, 4); // <sectionlen> 5152 } 5153 5154 // SN_COMPOUND: compound info. 5155 // We don't mark it required, when not supported all compound words will 5156 // be bad words. 5157 if (spin->si_compflags != NULL) 5158 { 5159 putc(SN_COMPOUND, fd); // <sectionID> 5160 putc(0, fd); // <sectionflags> 5161 5162 l = (int)STRLEN(spin->si_compflags); 5163 for (i = 0; i < spin->si_comppat.ga_len; ++i) 5164 l += (int)STRLEN(((char_u **)(spin->si_comppat.ga_data))[i]) + 1; 5165 put_bytes(fd, (long_u)(l + 7), 4); // <sectionlen> 5166 5167 putc(spin->si_compmax, fd); // <compmax> 5168 putc(spin->si_compminlen, fd); // <compminlen> 5169 putc(spin->si_compsylmax, fd); // <compsylmax> 5170 putc(0, fd); // for Vim 7.0b compatibility 5171 putc(spin->si_compoptions, fd); // <compoptions> 5172 put_bytes(fd, (long_u)spin->si_comppat.ga_len, 2); 5173 // <comppatcount> 5174 for (i = 0; i < spin->si_comppat.ga_len; ++i) 5175 { 5176 p = ((char_u **)(spin->si_comppat.ga_data))[i]; 5177 putc((int)STRLEN(p), fd); // <comppatlen> 5178 fwv &= fwrite(p, (size_t)STRLEN(p), (size_t)1, fd); 5179 // <comppattext> 5180 } 5181 // <compflags> 5182 fwv &= fwrite(spin->si_compflags, (size_t)STRLEN(spin->si_compflags), 5183 (size_t)1, fd); 5184 } 5185 5186 // SN_NOBREAK: NOBREAK flag 5187 if (spin->si_nobreak) 5188 { 5189 putc(SN_NOBREAK, fd); // <sectionID> 5190 putc(0, fd); // <sectionflags> 5191 5192 // It's empty, the presence of the section flags the feature. 5193 put_bytes(fd, (long_u)0, 4); // <sectionlen> 5194 } 5195 5196 // SN_SYLLABLE: syllable info. 5197 // We don't mark it required, when not supported syllables will not be 5198 // counted. 5199 if (spin->si_syllable != NULL) 5200 { 5201 putc(SN_SYLLABLE, fd); // <sectionID> 5202 putc(0, fd); // <sectionflags> 5203 5204 l = (int)STRLEN(spin->si_syllable); 5205 put_bytes(fd, (long_u)l, 4); // <sectionlen> 5206 fwv &= fwrite(spin->si_syllable, (size_t)l, (size_t)1, fd); 5207 // <syllable> 5208 } 5209 5210 // end of <SECTIONS> 5211 putc(SN_END, fd); // <sectionend> 5212 5213 5214 /* 5215 * <LWORDTREE> <KWORDTREE> <PREFIXTREE> 5216 */ 5217 spin->si_memtot = 0; 5218 for (round = 1; round <= 3; ++round) 5219 { 5220 if (round == 1) 5221 tree = spin->si_foldroot->wn_sibling; 5222 else if (round == 2) 5223 tree = spin->si_keeproot->wn_sibling; 5224 else 5225 tree = spin->si_prefroot->wn_sibling; 5226 5227 // Clear the index and wnode fields in the tree. 5228 clear_node(tree); 5229 5230 // Count the number of nodes. Needed to be able to allocate the 5231 // memory when reading the nodes. Also fills in index for shared 5232 // nodes. 5233 nodecount = put_node(NULL, tree, 0, regionmask, round == 3); 5234 5235 // number of nodes in 4 bytes 5236 put_bytes(fd, (long_u)nodecount, 4); // <nodecount> 5237 spin->si_memtot += nodecount + nodecount * sizeof(int); 5238 5239 // Write the nodes. 5240 (void)put_node(fd, tree, 0, regionmask, round == 3); 5241 } 5242 5243 // Write another byte to check for errors (file system full). 5244 if (putc(0, fd) == EOF) 5245 retval = FAIL; 5246 theend: 5247 if (fclose(fd) == EOF) 5248 retval = FAIL; 5249 5250 if (fwv != (size_t)1) 5251 retval = FAIL; 5252 if (retval == FAIL) 5253 emsg(_(e_write)); 5254 5255 return retval; 5256 } 5257 5258 /* 5259 * Clear the index and wnode fields of "node", it siblings and its 5260 * children. This is needed because they are a union with other items to save 5261 * space. 5262 */ 5263 static void 5264 clear_node(wordnode_T *node) 5265 { 5266 wordnode_T *np; 5267 5268 if (node != NULL) 5269 FOR_ALL_NODE_SIBLINGS(node, np) 5270 { 5271 np->wn_u1.index = 0; 5272 np->wn_u2.wnode = NULL; 5273 5274 if (np->wn_byte != NUL) 5275 clear_node(np->wn_child); 5276 } 5277 } 5278 5279 5280 /* 5281 * Dump a word tree at node "node". 5282 * 5283 * This first writes the list of possible bytes (siblings). Then for each 5284 * byte recursively write the children. 5285 * 5286 * NOTE: The code here must match the code in read_tree_node(), since 5287 * assumptions are made about the indexes (so that we don't have to write them 5288 * in the file). 5289 * 5290 * Returns the number of nodes used. 5291 */ 5292 static int 5293 put_node( 5294 FILE *fd, // NULL when only counting 5295 wordnode_T *node, 5296 int idx, 5297 int regionmask, 5298 int prefixtree) // TRUE for PREFIXTREE 5299 { 5300 int newindex = idx; 5301 int siblingcount = 0; 5302 wordnode_T *np; 5303 int flags; 5304 5305 // If "node" is zero the tree is empty. 5306 if (node == NULL) 5307 return 0; 5308 5309 // Store the index where this node is written. 5310 node->wn_u1.index = idx; 5311 5312 // Count the number of siblings. 5313 FOR_ALL_NODE_SIBLINGS(node, np) 5314 ++siblingcount; 5315 5316 // Write the sibling count. 5317 if (fd != NULL) 5318 putc(siblingcount, fd); // <siblingcount> 5319 5320 // Write each sibling byte and optionally extra info. 5321 FOR_ALL_NODE_SIBLINGS(node, np) 5322 { 5323 if (np->wn_byte == 0) 5324 { 5325 if (fd != NULL) 5326 { 5327 // For a NUL byte (end of word) write the flags etc. 5328 if (prefixtree) 5329 { 5330 // In PREFIXTREE write the required affixID and the 5331 // associated condition nr (stored in wn_region). The 5332 // byte value is misused to store the "rare" and "not 5333 // combining" flags 5334 if (np->wn_flags == (short_u)PFX_FLAGS) 5335 putc(BY_NOFLAGS, fd); // <byte> 5336 else 5337 { 5338 putc(BY_FLAGS, fd); // <byte> 5339 putc(np->wn_flags, fd); // <pflags> 5340 } 5341 putc(np->wn_affixID, fd); // <affixID> 5342 put_bytes(fd, (long_u)np->wn_region, 2); // <prefcondnr> 5343 } 5344 else 5345 { 5346 // For word trees we write the flag/region items. 5347 flags = np->wn_flags; 5348 if (regionmask != 0 && np->wn_region != regionmask) 5349 flags |= WF_REGION; 5350 if (np->wn_affixID != 0) 5351 flags |= WF_AFX; 5352 if (flags == 0) 5353 { 5354 // word without flags or region 5355 putc(BY_NOFLAGS, fd); // <byte> 5356 } 5357 else 5358 { 5359 if (np->wn_flags >= 0x100) 5360 { 5361 putc(BY_FLAGS2, fd); // <byte> 5362 putc(flags, fd); // <flags> 5363 putc((unsigned)flags >> 8, fd); // <flags2> 5364 } 5365 else 5366 { 5367 putc(BY_FLAGS, fd); // <byte> 5368 putc(flags, fd); // <flags> 5369 } 5370 if (flags & WF_REGION) 5371 putc(np->wn_region, fd); // <region> 5372 if (flags & WF_AFX) 5373 putc(np->wn_affixID, fd); // <affixID> 5374 } 5375 } 5376 } 5377 } 5378 else 5379 { 5380 if (np->wn_child->wn_u1.index != 0 5381 && np->wn_child->wn_u2.wnode != node) 5382 { 5383 // The child is written elsewhere, write the reference. 5384 if (fd != NULL) 5385 { 5386 putc(BY_INDEX, fd); // <byte> 5387 // <nodeidx> 5388 put_bytes(fd, (long_u)np->wn_child->wn_u1.index, 3); 5389 } 5390 } 5391 else if (np->wn_child->wn_u2.wnode == NULL) 5392 // We will write the child below and give it an index. 5393 np->wn_child->wn_u2.wnode = node; 5394 5395 if (fd != NULL) 5396 if (putc(np->wn_byte, fd) == EOF) // <byte> or <xbyte> 5397 { 5398 emsg(_(e_write)); 5399 return 0; 5400 } 5401 } 5402 } 5403 5404 // Space used in the array when reading: one for each sibling and one for 5405 // the count. 5406 newindex += siblingcount + 1; 5407 5408 // Recursively dump the children of each sibling. 5409 FOR_ALL_NODE_SIBLINGS(node, np) 5410 if (np->wn_byte != 0 && np->wn_child->wn_u2.wnode == node) 5411 newindex = put_node(fd, np->wn_child, newindex, regionmask, 5412 prefixtree); 5413 5414 return newindex; 5415 } 5416 5417 5418 /* 5419 * ":mkspell [-ascii] outfile infile ..." 5420 * ":mkspell [-ascii] addfile" 5421 */ 5422 void 5423 ex_mkspell(exarg_T *eap) 5424 { 5425 int fcount; 5426 char_u **fnames; 5427 char_u *arg = eap->arg; 5428 int ascii = FALSE; 5429 5430 if (STRNCMP(arg, "-ascii", 6) == 0) 5431 { 5432 ascii = TRUE; 5433 arg = skipwhite(arg + 6); 5434 } 5435 5436 // Expand all the remaining arguments (e.g., $VIMRUNTIME). 5437 if (get_arglist_exp(arg, &fcount, &fnames, FALSE) == OK) 5438 { 5439 mkspell(fcount, fnames, ascii, eap->forceit, FALSE); 5440 FreeWild(fcount, fnames); 5441 } 5442 } 5443 5444 /* 5445 * Create the .sug file. 5446 * Uses the soundfold info in "spin". 5447 * Writes the file with the name "wfname", with ".spl" changed to ".sug". 5448 */ 5449 static void 5450 spell_make_sugfile(spellinfo_T *spin, char_u *wfname) 5451 { 5452 char_u *fname = NULL; 5453 int len; 5454 slang_T *slang; 5455 int free_slang = FALSE; 5456 5457 /* 5458 * Read back the .spl file that was written. This fills the required 5459 * info for soundfolding. This also uses less memory than the 5460 * pointer-linked version of the trie. And it avoids having two versions 5461 * of the code for the soundfolding stuff. 5462 * It might have been done already by spell_reload_one(). 5463 */ 5464 FOR_ALL_SPELL_LANGS(slang) 5465 if (fullpathcmp(wfname, slang->sl_fname, FALSE, TRUE) == FPC_SAME) 5466 break; 5467 if (slang == NULL) 5468 { 5469 spell_message(spin, (char_u *)_("Reading back spell file...")); 5470 slang = spell_load_file(wfname, NULL, NULL, FALSE); 5471 if (slang == NULL) 5472 return; 5473 free_slang = TRUE; 5474 } 5475 5476 /* 5477 * Clear the info in "spin" that is used. 5478 */ 5479 spin->si_blocks = NULL; 5480 spin->si_blocks_cnt = 0; 5481 spin->si_compress_cnt = 0; // will stay at 0 all the time 5482 spin->si_free_count = 0; 5483 spin->si_first_free = NULL; 5484 spin->si_foldwcount = 0; 5485 5486 /* 5487 * Go through the trie of good words, soundfold each word and add it to 5488 * the soundfold trie. 5489 */ 5490 spell_message(spin, (char_u *)_("Performing soundfolding...")); 5491 if (sug_filltree(spin, slang) == FAIL) 5492 goto theend; 5493 5494 /* 5495 * Create the table which links each soundfold word with a list of the 5496 * good words it may come from. Creates buffer "spin->si_spellbuf". 5497 * This also removes the wordnr from the NUL byte entries to make 5498 * compression possible. 5499 */ 5500 if (sug_maketable(spin) == FAIL) 5501 goto theend; 5502 5503 smsg(_("Number of words after soundfolding: %ld"), 5504 (long)spin->si_spellbuf->b_ml.ml_line_count); 5505 5506 /* 5507 * Compress the soundfold trie. 5508 */ 5509 spell_message(spin, (char_u *)_(msg_compressing)); 5510 wordtree_compress(spin, spin->si_foldroot, "case-folded"); 5511 5512 /* 5513 * Write the .sug file. 5514 * Make the file name by changing ".spl" to ".sug". 5515 */ 5516 fname = alloc(MAXPATHL); 5517 if (fname == NULL) 5518 goto theend; 5519 vim_strncpy(fname, wfname, MAXPATHL - 1); 5520 len = (int)STRLEN(fname); 5521 fname[len - 2] = 'u'; 5522 fname[len - 1] = 'g'; 5523 sug_write(spin, fname); 5524 5525 theend: 5526 vim_free(fname); 5527 if (free_slang) 5528 slang_free(slang); 5529 free_blocks(spin->si_blocks); 5530 close_spellbuf(spin->si_spellbuf); 5531 } 5532 5533 /* 5534 * Build the soundfold trie for language "slang". 5535 */ 5536 static int 5537 sug_filltree(spellinfo_T *spin, slang_T *slang) 5538 { 5539 char_u *byts; 5540 idx_T *idxs; 5541 int depth; 5542 idx_T arridx[MAXWLEN]; 5543 int curi[MAXWLEN]; 5544 char_u tword[MAXWLEN]; 5545 char_u tsalword[MAXWLEN]; 5546 int c; 5547 idx_T n; 5548 unsigned words_done = 0; 5549 int wordcount[MAXWLEN]; 5550 5551 // We use si_foldroot for the soundfolded trie. 5552 spin->si_foldroot = wordtree_alloc(spin); 5553 if (spin->si_foldroot == NULL) 5554 return FAIL; 5555 5556 // let tree_add_word() know we're adding to the soundfolded tree 5557 spin->si_sugtree = TRUE; 5558 5559 /* 5560 * Go through the whole case-folded tree, soundfold each word and put it 5561 * in the trie. 5562 */ 5563 byts = slang->sl_fbyts; 5564 idxs = slang->sl_fidxs; 5565 5566 arridx[0] = 0; 5567 curi[0] = 1; 5568 wordcount[0] = 0; 5569 5570 depth = 0; 5571 while (depth >= 0 && !got_int) 5572 { 5573 if (curi[depth] > byts[arridx[depth]]) 5574 { 5575 // Done all bytes at this node, go up one level. 5576 idxs[arridx[depth]] = wordcount[depth]; 5577 if (depth > 0) 5578 wordcount[depth - 1] += wordcount[depth]; 5579 5580 --depth; 5581 line_breakcheck(); 5582 } 5583 else 5584 { 5585 5586 // Do one more byte at this node. 5587 n = arridx[depth] + curi[depth]; 5588 ++curi[depth]; 5589 5590 c = byts[n]; 5591 if (c == 0) 5592 { 5593 // Sound-fold the word. 5594 tword[depth] = NUL; 5595 spell_soundfold(slang, tword, TRUE, tsalword); 5596 5597 // We use the "flags" field for the MSB of the wordnr, 5598 // "region" for the LSB of the wordnr. 5599 if (tree_add_word(spin, tsalword, spin->si_foldroot, 5600 words_done >> 16, words_done & 0xffff, 5601 0) == FAIL) 5602 return FAIL; 5603 5604 ++words_done; 5605 ++wordcount[depth]; 5606 5607 // Reset the block count each time to avoid compression 5608 // kicking in. 5609 spin->si_blocks_cnt = 0; 5610 5611 // Skip over any other NUL bytes (same word with different 5612 // flags). 5613 while (byts[n + 1] == 0) 5614 { 5615 ++n; 5616 ++curi[depth]; 5617 } 5618 } 5619 else 5620 { 5621 // Normal char, go one level deeper. 5622 tword[depth++] = c; 5623 arridx[depth] = idxs[n]; 5624 curi[depth] = 1; 5625 wordcount[depth] = 0; 5626 } 5627 } 5628 } 5629 5630 smsg(_("Total number of words: %d"), words_done); 5631 5632 return OK; 5633 } 5634 5635 /* 5636 * Make the table that links each word in the soundfold trie to the words it 5637 * can be produced from. 5638 * This is not unlike lines in a file, thus use a memfile to be able to access 5639 * the table efficiently. 5640 * Returns FAIL when out of memory. 5641 */ 5642 static int 5643 sug_maketable(spellinfo_T *spin) 5644 { 5645 garray_T ga; 5646 int res = OK; 5647 5648 // Allocate a buffer, open a memline for it and create the swap file 5649 // (uses a temp file, not a .swp file). 5650 spin->si_spellbuf = open_spellbuf(); 5651 if (spin->si_spellbuf == NULL) 5652 return FAIL; 5653 5654 // Use a buffer to store the line info, avoids allocating many small 5655 // pieces of memory. 5656 ga_init2(&ga, 1, 100); 5657 5658 // recursively go through the tree 5659 if (sug_filltable(spin, spin->si_foldroot->wn_sibling, 0, &ga) == -1) 5660 res = FAIL; 5661 5662 ga_clear(&ga); 5663 return res; 5664 } 5665 5666 /* 5667 * Fill the table for one node and its children. 5668 * Returns the wordnr at the start of the node. 5669 * Returns -1 when out of memory. 5670 */ 5671 static int 5672 sug_filltable( 5673 spellinfo_T *spin, 5674 wordnode_T *node, 5675 int startwordnr, 5676 garray_T *gap) // place to store line of numbers 5677 { 5678 wordnode_T *p, *np; 5679 int wordnr = startwordnr; 5680 int nr; 5681 int prev_nr; 5682 5683 FOR_ALL_NODE_SIBLINGS(node, p) 5684 { 5685 if (p->wn_byte == NUL) 5686 { 5687 gap->ga_len = 0; 5688 prev_nr = 0; 5689 for (np = p; np != NULL && np->wn_byte == NUL; np = np->wn_sibling) 5690 { 5691 if (ga_grow(gap, 10) == FAIL) 5692 return -1; 5693 5694 nr = (np->wn_flags << 16) + (np->wn_region & 0xffff); 5695 // Compute the offset from the previous nr and store the 5696 // offset in a way that it takes a minimum number of bytes. 5697 // It's a bit like utf-8, but without the need to mark 5698 // following bytes. 5699 nr -= prev_nr; 5700 prev_nr += nr; 5701 gap->ga_len += offset2bytes(nr, 5702 (char_u *)gap->ga_data + gap->ga_len); 5703 } 5704 5705 // add the NUL byte 5706 ((char_u *)gap->ga_data)[gap->ga_len++] = NUL; 5707 5708 if (ml_append_buf(spin->si_spellbuf, (linenr_T)wordnr, 5709 gap->ga_data, gap->ga_len, TRUE) == FAIL) 5710 return -1; 5711 ++wordnr; 5712 5713 // Remove extra NUL entries, we no longer need them. We don't 5714 // bother freeing the nodes, the won't be reused anyway. 5715 while (p->wn_sibling != NULL && p->wn_sibling->wn_byte == NUL) 5716 p->wn_sibling = p->wn_sibling->wn_sibling; 5717 5718 // Clear the flags on the remaining NUL node, so that compression 5719 // works a lot better. 5720 p->wn_flags = 0; 5721 p->wn_region = 0; 5722 } 5723 else 5724 { 5725 wordnr = sug_filltable(spin, p->wn_child, wordnr, gap); 5726 if (wordnr == -1) 5727 return -1; 5728 } 5729 } 5730 return wordnr; 5731 } 5732 5733 /* 5734 * Convert an offset into a minimal number of bytes. 5735 * Similar to utf_char2byters, but use 8 bits in followup bytes and avoid NUL 5736 * bytes. 5737 */ 5738 static int 5739 offset2bytes(int nr, char_u *buf) 5740 { 5741 int rem; 5742 int b1, b2, b3, b4; 5743 5744 // Split the number in parts of base 255. We need to avoid NUL bytes. 5745 b1 = nr % 255 + 1; 5746 rem = nr / 255; 5747 b2 = rem % 255 + 1; 5748 rem = rem / 255; 5749 b3 = rem % 255 + 1; 5750 b4 = rem / 255 + 1; 5751 5752 if (b4 > 1 || b3 > 0x1f) // 4 bytes 5753 { 5754 buf[0] = 0xe0 + b4; 5755 buf[1] = b3; 5756 buf[2] = b2; 5757 buf[3] = b1; 5758 return 4; 5759 } 5760 if (b3 > 1 || b2 > 0x3f ) // 3 bytes 5761 { 5762 buf[0] = 0xc0 + b3; 5763 buf[1] = b2; 5764 buf[2] = b1; 5765 return 3; 5766 } 5767 if (b2 > 1 || b1 > 0x7f ) // 2 bytes 5768 { 5769 buf[0] = 0x80 + b2; 5770 buf[1] = b1; 5771 return 2; 5772 } 5773 // 1 byte 5774 buf[0] = b1; 5775 return 1; 5776 } 5777 5778 /* 5779 * Write the .sug file in "fname". 5780 */ 5781 static void 5782 sug_write(spellinfo_T *spin, char_u *fname) 5783 { 5784 FILE *fd; 5785 wordnode_T *tree; 5786 int nodecount; 5787 int wcount; 5788 char_u *line; 5789 linenr_T lnum; 5790 int len; 5791 5792 // Create the file. Note that an existing file is silently overwritten! 5793 fd = mch_fopen((char *)fname, "w"); 5794 if (fd == NULL) 5795 { 5796 semsg(_(e_notopen), fname); 5797 return; 5798 } 5799 5800 vim_snprintf((char *)IObuff, IOSIZE, 5801 _("Writing suggestion file %s..."), fname); 5802 spell_message(spin, IObuff); 5803 5804 /* 5805 * <SUGHEADER>: <fileID> <versionnr> <timestamp> 5806 */ 5807 if (fwrite(VIMSUGMAGIC, VIMSUGMAGICL, (size_t)1, fd) != 1) // <fileID> 5808 { 5809 emsg(_(e_write)); 5810 goto theend; 5811 } 5812 putc(VIMSUGVERSION, fd); // <versionnr> 5813 5814 // Write si_sugtime to the file. 5815 put_time(fd, spin->si_sugtime); // <timestamp> 5816 5817 /* 5818 * <SUGWORDTREE> 5819 */ 5820 spin->si_memtot = 0; 5821 tree = spin->si_foldroot->wn_sibling; 5822 5823 // Clear the index and wnode fields in the tree. 5824 clear_node(tree); 5825 5826 // Count the number of nodes. Needed to be able to allocate the 5827 // memory when reading the nodes. Also fills in index for shared 5828 // nodes. 5829 nodecount = put_node(NULL, tree, 0, 0, FALSE); 5830 5831 // number of nodes in 4 bytes 5832 put_bytes(fd, (long_u)nodecount, 4); // <nodecount> 5833 spin->si_memtot += nodecount + nodecount * sizeof(int); 5834 5835 // Write the nodes. 5836 (void)put_node(fd, tree, 0, 0, FALSE); 5837 5838 /* 5839 * <SUGTABLE>: <sugwcount> <sugline> ... 5840 */ 5841 wcount = spin->si_spellbuf->b_ml.ml_line_count; 5842 put_bytes(fd, (long_u)wcount, 4); // <sugwcount> 5843 5844 for (lnum = 1; lnum <= (linenr_T)wcount; ++lnum) 5845 { 5846 // <sugline>: <sugnr> ... NUL 5847 line = ml_get_buf(spin->si_spellbuf, lnum, FALSE); 5848 len = (int)STRLEN(line) + 1; 5849 if (fwrite(line, (size_t)len, (size_t)1, fd) == 0) 5850 { 5851 emsg(_(e_write)); 5852 goto theend; 5853 } 5854 spin->si_memtot += len; 5855 } 5856 5857 // Write another byte to check for errors. 5858 if (putc(0, fd) == EOF) 5859 emsg(_(e_write)); 5860 5861 vim_snprintf((char *)IObuff, IOSIZE, 5862 _("Estimated runtime memory use: %d bytes"), spin->si_memtot); 5863 spell_message(spin, IObuff); 5864 5865 theend: 5866 // close the file 5867 fclose(fd); 5868 } 5869 5870 5871 /* 5872 * Create a Vim spell file from one or more word lists. 5873 * "fnames[0]" is the output file name. 5874 * "fnames[fcount - 1]" is the last input file name. 5875 * Exception: when "fnames[0]" ends in ".add" it's used as the input file name 5876 * and ".spl" is appended to make the output file name. 5877 */ 5878 void 5879 mkspell( 5880 int fcount, 5881 char_u **fnames, 5882 int ascii, // -ascii argument given 5883 int over_write, // overwrite existing output file 5884 int added_word) // invoked through "zg" 5885 { 5886 char_u *fname = NULL; 5887 char_u *wfname; 5888 char_u **innames; 5889 int incount; 5890 afffile_T *(afile[MAXREGIONS]); 5891 int i; 5892 int len; 5893 stat_T st; 5894 int error = FALSE; 5895 spellinfo_T spin; 5896 5897 CLEAR_FIELD(spin); 5898 spin.si_verbose = !added_word; 5899 spin.si_ascii = ascii; 5900 spin.si_followup = TRUE; 5901 spin.si_rem_accents = TRUE; 5902 ga_init2(&spin.si_rep, (int)sizeof(fromto_T), 20); 5903 ga_init2(&spin.si_repsal, (int)sizeof(fromto_T), 20); 5904 ga_init2(&spin.si_sal, (int)sizeof(fromto_T), 20); 5905 ga_init2(&spin.si_map, (int)sizeof(char_u), 100); 5906 ga_init2(&spin.si_comppat, (int)sizeof(char_u *), 20); 5907 ga_init2(&spin.si_prefcond, (int)sizeof(char_u *), 50); 5908 hash_init(&spin.si_commonwords); 5909 spin.si_newcompID = 127; // start compound ID at first maximum 5910 5911 // default: fnames[0] is output file, following are input files 5912 // When "fcount" is 1 there is only one file. 5913 innames = &fnames[fcount == 1 ? 0 : 1]; 5914 incount = fcount - 1; 5915 5916 wfname = alloc(MAXPATHL); 5917 if (wfname == NULL) 5918 return; 5919 5920 if (fcount >= 1) 5921 { 5922 len = (int)STRLEN(fnames[0]); 5923 if (fcount == 1 && len > 4 && STRCMP(fnames[0] + len - 4, ".add") == 0) 5924 { 5925 // For ":mkspell path/en.latin1.add" output file is 5926 // "path/en.latin1.add.spl". 5927 incount = 1; 5928 vim_snprintf((char *)wfname, MAXPATHL, "%s.spl", fnames[0]); 5929 } 5930 else if (fcount == 1) 5931 { 5932 // For ":mkspell path/vim" output file is "path/vim.latin1.spl". 5933 incount = 1; 5934 vim_snprintf((char *)wfname, MAXPATHL, SPL_FNAME_TMPL, 5935 fnames[0], spin.si_ascii ? (char_u *)"ascii" : spell_enc()); 5936 } 5937 else if (len > 4 && STRCMP(fnames[0] + len - 4, ".spl") == 0) 5938 { 5939 // Name ends in ".spl", use as the file name. 5940 vim_strncpy(wfname, fnames[0], MAXPATHL - 1); 5941 } 5942 else 5943 // Name should be language, make the file name from it. 5944 vim_snprintf((char *)wfname, MAXPATHL, SPL_FNAME_TMPL, 5945 fnames[0], spin.si_ascii ? (char_u *)"ascii" : spell_enc()); 5946 5947 // Check for .ascii.spl. 5948 if (strstr((char *)gettail(wfname), SPL_FNAME_ASCII) != NULL) 5949 spin.si_ascii = TRUE; 5950 5951 // Check for .add.spl. 5952 if (strstr((char *)gettail(wfname), SPL_FNAME_ADD) != NULL) 5953 spin.si_add = TRUE; 5954 } 5955 5956 if (incount <= 0) 5957 emsg(_(e_invarg)); // need at least output and input names 5958 else if (vim_strchr(gettail(wfname), '_') != NULL) 5959 emsg(_("E751: Output file name must not have region name")); 5960 else if (incount > MAXREGIONS) 5961 semsg(_("E754: Only up to %d regions supported"), MAXREGIONS); 5962 else 5963 { 5964 // Check for overwriting before doing things that may take a lot of 5965 // time. 5966 if (!over_write && mch_stat((char *)wfname, &st) >= 0) 5967 { 5968 emsg(_(e_exists)); 5969 goto theend; 5970 } 5971 if (mch_isdir(wfname)) 5972 { 5973 semsg(_(e_isadir2), wfname); 5974 goto theend; 5975 } 5976 5977 fname = alloc(MAXPATHL); 5978 if (fname == NULL) 5979 goto theend; 5980 5981 /* 5982 * Init the aff and dic pointers. 5983 * Get the region names if there are more than 2 arguments. 5984 */ 5985 for (i = 0; i < incount; ++i) 5986 { 5987 afile[i] = NULL; 5988 5989 if (incount > 1) 5990 { 5991 len = (int)STRLEN(innames[i]); 5992 if (STRLEN(gettail(innames[i])) < 5 5993 || innames[i][len - 3] != '_') 5994 { 5995 semsg(_("E755: Invalid region in %s"), innames[i]); 5996 goto theend; 5997 } 5998 spin.si_region_name[i * 2] = TOLOWER_ASC(innames[i][len - 2]); 5999 spin.si_region_name[i * 2 + 1] = 6000 TOLOWER_ASC(innames[i][len - 1]); 6001 } 6002 } 6003 spin.si_region_count = incount; 6004 6005 spin.si_foldroot = wordtree_alloc(&spin); 6006 spin.si_keeproot = wordtree_alloc(&spin); 6007 spin.si_prefroot = wordtree_alloc(&spin); 6008 if (spin.si_foldroot == NULL 6009 || spin.si_keeproot == NULL 6010 || spin.si_prefroot == NULL) 6011 { 6012 free_blocks(spin.si_blocks); 6013 goto theend; 6014 } 6015 6016 // When not producing a .add.spl file clear the character table when 6017 // we encounter one in the .aff file. This means we dump the current 6018 // one in the .spl file if the .aff file doesn't define one. That's 6019 // better than guessing the contents, the table will match a 6020 // previously loaded spell file. 6021 if (!spin.si_add) 6022 spin.si_clear_chartab = TRUE; 6023 6024 /* 6025 * Read all the .aff and .dic files. 6026 * Text is converted to 'encoding'. 6027 * Words are stored in the case-folded and keep-case trees. 6028 */ 6029 for (i = 0; i < incount && !error; ++i) 6030 { 6031 spin.si_conv.vc_type = CONV_NONE; 6032 spin.si_region = 1 << i; 6033 6034 vim_snprintf((char *)fname, MAXPATHL, "%s.aff", innames[i]); 6035 if (mch_stat((char *)fname, &st) >= 0) 6036 { 6037 // Read the .aff file. Will init "spin->si_conv" based on the 6038 // "SET" line. 6039 afile[i] = spell_read_aff(&spin, fname); 6040 if (afile[i] == NULL) 6041 error = TRUE; 6042 else 6043 { 6044 // Read the .dic file and store the words in the trees. 6045 vim_snprintf((char *)fname, MAXPATHL, "%s.dic", 6046 innames[i]); 6047 if (spell_read_dic(&spin, fname, afile[i]) == FAIL) 6048 error = TRUE; 6049 } 6050 } 6051 else 6052 { 6053 // No .aff file, try reading the file as a word list. Store 6054 // the words in the trees. 6055 if (spell_read_wordfile(&spin, innames[i]) == FAIL) 6056 error = TRUE; 6057 } 6058 6059 // Free any conversion stuff. 6060 convert_setup(&spin.si_conv, NULL, NULL); 6061 } 6062 6063 if (spin.si_compflags != NULL && spin.si_nobreak) 6064 msg(_("Warning: both compounding and NOBREAK specified")); 6065 6066 if (!error && !got_int) 6067 { 6068 /* 6069 * Combine tails in the tree. 6070 */ 6071 spell_message(&spin, (char_u *)_(msg_compressing)); 6072 wordtree_compress(&spin, spin.si_foldroot, "case-folded"); 6073 wordtree_compress(&spin, spin.si_keeproot, "keep-case"); 6074 wordtree_compress(&spin, spin.si_prefroot, "prefixes"); 6075 } 6076 6077 if (!error && !got_int) 6078 { 6079 /* 6080 * Write the info in the spell file. 6081 */ 6082 vim_snprintf((char *)IObuff, IOSIZE, 6083 _("Writing spell file %s..."), wfname); 6084 spell_message(&spin, IObuff); 6085 6086 error = write_vim_spell(&spin, wfname) == FAIL; 6087 6088 spell_message(&spin, (char_u *)_("Done!")); 6089 vim_snprintf((char *)IObuff, IOSIZE, 6090 _("Estimated runtime memory use: %d bytes"), spin.si_memtot); 6091 spell_message(&spin, IObuff); 6092 6093 /* 6094 * If the file is loaded need to reload it. 6095 */ 6096 if (!error) 6097 spell_reload_one(wfname, added_word); 6098 } 6099 6100 // Free the allocated memory. 6101 ga_clear(&spin.si_rep); 6102 ga_clear(&spin.si_repsal); 6103 ga_clear(&spin.si_sal); 6104 ga_clear(&spin.si_map); 6105 ga_clear(&spin.si_comppat); 6106 ga_clear(&spin.si_prefcond); 6107 hash_clear_all(&spin.si_commonwords, 0); 6108 6109 // Free the .aff file structures. 6110 for (i = 0; i < incount; ++i) 6111 if (afile[i] != NULL) 6112 spell_free_aff(afile[i]); 6113 6114 // Free all the bits and pieces at once. 6115 free_blocks(spin.si_blocks); 6116 6117 /* 6118 * If there is soundfolding info and no NOSUGFILE item create the 6119 * .sug file with the soundfolded word trie. 6120 */ 6121 if (spin.si_sugtime != 0 && !error && !got_int) 6122 spell_make_sugfile(&spin, wfname); 6123 6124 } 6125 6126 theend: 6127 vim_free(fname); 6128 vim_free(wfname); 6129 } 6130 6131 /* 6132 * Display a message for spell file processing when 'verbose' is set or using 6133 * ":mkspell". "str" can be IObuff. 6134 */ 6135 static void 6136 spell_message(spellinfo_T *spin, char_u *str) 6137 { 6138 if (spin->si_verbose || p_verbose > 2) 6139 { 6140 if (!spin->si_verbose) 6141 verbose_enter(); 6142 msg((char *)str); 6143 out_flush(); 6144 if (!spin->si_verbose) 6145 verbose_leave(); 6146 } 6147 } 6148 6149 /* 6150 * ":[count]spellgood {word}" 6151 * ":[count]spellwrong {word}" 6152 * ":[count]spellundo {word}" 6153 * ":[count]spellrare {word}" 6154 */ 6155 void 6156 ex_spell(exarg_T *eap) 6157 { 6158 spell_add_word(eap->arg, (int)STRLEN(eap->arg), 6159 eap->cmdidx == CMD_spellwrong ? SPELL_ADD_BAD : 6160 eap->cmdidx == CMD_spellrare ? SPELL_ADD_RARE : SPELL_ADD_GOOD, 6161 eap->forceit ? 0 : (int)eap->line2, 6162 eap->cmdidx == CMD_spellundo); 6163 } 6164 6165 /* 6166 * Add "word[len]" to 'spellfile' as a good, rare or bad word. 6167 */ 6168 void 6169 spell_add_word( 6170 char_u *word, 6171 int len, 6172 int what, // SPELL_ADD_ values 6173 int idx, // "zG" and "zW": zero, otherwise index in 6174 // 'spellfile' 6175 int undo) // TRUE for "zug", "zuG", "zuw" and "zuW" 6176 { 6177 FILE *fd = NULL; 6178 buf_T *buf = NULL; 6179 int new_spf = FALSE; 6180 char_u *fname; 6181 char_u *fnamebuf = NULL; 6182 char_u line[MAXWLEN * 2]; 6183 long fpos, fpos_next = 0; 6184 int i; 6185 char_u *spf; 6186 6187 if (idx == 0) // use internal wordlist 6188 { 6189 if (int_wordlist == NULL) 6190 { 6191 int_wordlist = vim_tempname('s', FALSE); 6192 if (int_wordlist == NULL) 6193 return; 6194 } 6195 fname = int_wordlist; 6196 } 6197 else 6198 { 6199 // If 'spellfile' isn't set figure out a good default value. 6200 if (*curwin->w_s->b_p_spf == NUL) 6201 { 6202 init_spellfile(); 6203 new_spf = TRUE; 6204 } 6205 6206 if (*curwin->w_s->b_p_spf == NUL) 6207 { 6208 semsg(_(e_notset), "spellfile"); 6209 return; 6210 } 6211 fnamebuf = alloc(MAXPATHL); 6212 if (fnamebuf == NULL) 6213 return; 6214 6215 for (spf = curwin->w_s->b_p_spf, i = 1; *spf != NUL; ++i) 6216 { 6217 copy_option_part(&spf, fnamebuf, MAXPATHL, ","); 6218 if (i == idx) 6219 break; 6220 if (*spf == NUL) 6221 { 6222 semsg(_("E765: 'spellfile' does not have %d entries"), idx); 6223 vim_free(fnamebuf); 6224 return; 6225 } 6226 } 6227 6228 // Check that the user isn't editing the .add file somewhere. 6229 buf = buflist_findname_exp(fnamebuf); 6230 if (buf != NULL && buf->b_ml.ml_mfp == NULL) 6231 buf = NULL; 6232 if (buf != NULL && bufIsChanged(buf)) 6233 { 6234 emsg(_(e_bufloaded)); 6235 vim_free(fnamebuf); 6236 return; 6237 } 6238 6239 fname = fnamebuf; 6240 } 6241 6242 if (what == SPELL_ADD_BAD || undo) 6243 { 6244 // When the word appears as good word we need to remove that one, 6245 // since its flags sort before the one with WF_BANNED. 6246 fd = mch_fopen((char *)fname, "r"); 6247 if (fd != NULL) 6248 { 6249 while (!vim_fgets(line, MAXWLEN * 2, fd)) 6250 { 6251 fpos = fpos_next; 6252 fpos_next = ftell(fd); 6253 if (STRNCMP(word, line, len) == 0 6254 && (line[len] == '/' || line[len] < ' ')) 6255 { 6256 // Found duplicate word. Remove it by writing a '#' at 6257 // the start of the line. Mixing reading and writing 6258 // doesn't work for all systems, close the file first. 6259 fclose(fd); 6260 fd = mch_fopen((char *)fname, "r+"); 6261 if (fd == NULL) 6262 break; 6263 if (fseek(fd, fpos, SEEK_SET) == 0) 6264 { 6265 fputc('#', fd); 6266 if (undo) 6267 { 6268 home_replace(NULL, fname, NameBuff, MAXPATHL, TRUE); 6269 smsg(_("Word '%.*s' removed from %s"), 6270 len, word, NameBuff); 6271 } 6272 } 6273 fseek(fd, fpos_next, SEEK_SET); 6274 } 6275 } 6276 if (fd != NULL) 6277 fclose(fd); 6278 } 6279 } 6280 6281 if (!undo) 6282 { 6283 fd = mch_fopen((char *)fname, "a"); 6284 if (fd == NULL && new_spf) 6285 { 6286 char_u *p; 6287 6288 // We just initialized the 'spellfile' option and can't open the 6289 // file. We may need to create the "spell" directory first. We 6290 // already checked the runtime directory is writable in 6291 // init_spellfile(). 6292 if (!dir_of_file_exists(fname) && (p = gettail_sep(fname)) != fname) 6293 { 6294 int c = *p; 6295 6296 // The directory doesn't exist. Try creating it and opening 6297 // the file again. 6298 *p = NUL; 6299 vim_mkdir(fname, 0755); 6300 *p = c; 6301 fd = mch_fopen((char *)fname, "a"); 6302 } 6303 } 6304 6305 if (fd == NULL) 6306 semsg(_(e_notopen), fname); 6307 else 6308 { 6309 if (what == SPELL_ADD_BAD) 6310 fprintf(fd, "%.*s/!\n", len, word); 6311 else if (what == SPELL_ADD_RARE) 6312 fprintf(fd, "%.*s/?\n", len, word); 6313 else 6314 fprintf(fd, "%.*s\n", len, word); 6315 fclose(fd); 6316 6317 home_replace(NULL, fname, NameBuff, MAXPATHL, TRUE); 6318 smsg(_("Word '%.*s' added to %s"), len, word, NameBuff); 6319 } 6320 } 6321 6322 if (fd != NULL) 6323 { 6324 // Update the .add.spl file. 6325 mkspell(1, &fname, FALSE, TRUE, TRUE); 6326 6327 // If the .add file is edited somewhere, reload it. 6328 if (buf != NULL) 6329 buf_reload(buf, buf->b_orig_mode); 6330 6331 redraw_all_later(SOME_VALID); 6332 } 6333 vim_free(fnamebuf); 6334 } 6335 6336 /* 6337 * Initialize 'spellfile' for the current buffer. 6338 */ 6339 static void 6340 init_spellfile(void) 6341 { 6342 char_u *buf; 6343 int l; 6344 char_u *fname; 6345 char_u *rtp; 6346 char_u *lend; 6347 int aspath = FALSE; 6348 char_u *lstart = curbuf->b_s.b_p_spl; 6349 6350 if (*curwin->w_s->b_p_spl != NUL && curwin->w_s->b_langp.ga_len > 0) 6351 { 6352 buf = alloc(MAXPATHL); 6353 if (buf == NULL) 6354 return; 6355 6356 // Find the end of the language name. Exclude the region. If there 6357 // is a path separator remember the start of the tail. 6358 for (lend = curwin->w_s->b_p_spl; *lend != NUL 6359 && vim_strchr((char_u *)",._", *lend) == NULL; ++lend) 6360 if (vim_ispathsep(*lend)) 6361 { 6362 aspath = TRUE; 6363 lstart = lend + 1; 6364 } 6365 6366 // Loop over all entries in 'runtimepath'. Use the first one where we 6367 // are allowed to write. 6368 rtp = p_rtp; 6369 while (*rtp != NUL) 6370 { 6371 if (aspath) 6372 // Use directory of an entry with path, e.g., for 6373 // "/dir/lg.utf-8.spl" use "/dir". 6374 vim_strncpy(buf, curbuf->b_s.b_p_spl, 6375 lstart - curbuf->b_s.b_p_spl - 1); 6376 else 6377 // Copy the path from 'runtimepath' to buf[]. 6378 copy_option_part(&rtp, buf, MAXPATHL, ","); 6379 if (filewritable(buf) == 2) 6380 { 6381 // Use the first language name from 'spelllang' and the 6382 // encoding used in the first loaded .spl file. 6383 if (aspath) 6384 vim_strncpy(buf, curbuf->b_s.b_p_spl, 6385 lend - curbuf->b_s.b_p_spl); 6386 else 6387 { 6388 // Create the "spell" directory if it doesn't exist yet. 6389 l = (int)STRLEN(buf); 6390 vim_snprintf((char *)buf + l, MAXPATHL - l, "/spell"); 6391 if (filewritable(buf) != 2) 6392 vim_mkdir(buf, 0755); 6393 6394 l = (int)STRLEN(buf); 6395 vim_snprintf((char *)buf + l, MAXPATHL - l, 6396 "/%.*s", (int)(lend - lstart), lstart); 6397 } 6398 l = (int)STRLEN(buf); 6399 fname = LANGP_ENTRY(curwin->w_s->b_langp, 0) 6400 ->lp_slang->sl_fname; 6401 vim_snprintf((char *)buf + l, MAXPATHL - l, ".%s.add", 6402 fname != NULL 6403 && strstr((char *)gettail(fname), ".ascii.") != NULL 6404 ? (char_u *)"ascii" : spell_enc()); 6405 set_option_value((char_u *)"spellfile", 0L, buf, OPT_LOCAL); 6406 break; 6407 } 6408 aspath = FALSE; 6409 } 6410 6411 vim_free(buf); 6412 } 6413 } 6414 6415 6416 6417 /* 6418 * Set the spell character tables from strings in the affix file. 6419 */ 6420 static int 6421 set_spell_chartab(char_u *fol, char_u *low, char_u *upp) 6422 { 6423 // We build the new tables here first, so that we can compare with the 6424 // previous one. 6425 spelltab_T new_st; 6426 char_u *pf = fol, *pl = low, *pu = upp; 6427 int f, l, u; 6428 6429 clear_spell_chartab(&new_st); 6430 6431 while (*pf != NUL) 6432 { 6433 if (*pl == NUL || *pu == NUL) 6434 { 6435 emsg(_(e_affform)); 6436 return FAIL; 6437 } 6438 f = mb_ptr2char_adv(&pf); 6439 l = mb_ptr2char_adv(&pl); 6440 u = mb_ptr2char_adv(&pu); 6441 6442 // Every character that appears is a word character. 6443 if (f < 256) 6444 new_st.st_isw[f] = TRUE; 6445 if (l < 256) 6446 new_st.st_isw[l] = TRUE; 6447 if (u < 256) 6448 new_st.st_isw[u] = TRUE; 6449 6450 // if "LOW" and "FOL" are not the same the "LOW" char needs 6451 // case-folding 6452 if (l < 256 && l != f) 6453 { 6454 if (f >= 256) 6455 { 6456 emsg(_(e_affrange)); 6457 return FAIL; 6458 } 6459 new_st.st_fold[l] = f; 6460 } 6461 6462 // if "UPP" and "FOL" are not the same the "UPP" char needs 6463 // case-folding, it's upper case and the "UPP" is the upper case of 6464 // "FOL" . 6465 if (u < 256 && u != f) 6466 { 6467 if (f >= 256) 6468 { 6469 emsg(_(e_affrange)); 6470 return FAIL; 6471 } 6472 new_st.st_fold[u] = f; 6473 new_st.st_isu[u] = TRUE; 6474 new_st.st_upper[f] = u; 6475 } 6476 } 6477 6478 if (*pl != NUL || *pu != NUL) 6479 { 6480 emsg(_(e_affform)); 6481 return FAIL; 6482 } 6483 6484 return set_spell_finish(&new_st); 6485 } 6486 6487 /* 6488 * Set the spell character tables from strings in the .spl file. 6489 */ 6490 static void 6491 set_spell_charflags( 6492 char_u *flags, 6493 int cnt, // length of "flags" 6494 char_u *fol) 6495 { 6496 // We build the new tables here first, so that we can compare with the 6497 // previous one. 6498 spelltab_T new_st; 6499 int i; 6500 char_u *p = fol; 6501 int c; 6502 6503 clear_spell_chartab(&new_st); 6504 6505 for (i = 0; i < 128; ++i) 6506 { 6507 if (i < cnt) 6508 { 6509 new_st.st_isw[i + 128] = (flags[i] & CF_WORD) != 0; 6510 new_st.st_isu[i + 128] = (flags[i] & CF_UPPER) != 0; 6511 } 6512 6513 if (*p != NUL) 6514 { 6515 c = mb_ptr2char_adv(&p); 6516 new_st.st_fold[i + 128] = c; 6517 if (i + 128 != c && new_st.st_isu[i + 128] && c < 256) 6518 new_st.st_upper[c] = i + 128; 6519 } 6520 } 6521 6522 (void)set_spell_finish(&new_st); 6523 } 6524 6525 static int 6526 set_spell_finish(spelltab_T *new_st) 6527 { 6528 int i; 6529 6530 if (did_set_spelltab) 6531 { 6532 // check that it's the same table 6533 for (i = 0; i < 256; ++i) 6534 { 6535 if (spelltab.st_isw[i] != new_st->st_isw[i] 6536 || spelltab.st_isu[i] != new_st->st_isu[i] 6537 || spelltab.st_fold[i] != new_st->st_fold[i] 6538 || spelltab.st_upper[i] != new_st->st_upper[i]) 6539 { 6540 emsg(_("E763: Word characters differ between spell files")); 6541 return FAIL; 6542 } 6543 } 6544 } 6545 else 6546 { 6547 // copy the new spelltab into the one being used 6548 spelltab = *new_st; 6549 did_set_spelltab = TRUE; 6550 } 6551 6552 return OK; 6553 } 6554 6555 /* 6556 * Write the table with prefix conditions to the .spl file. 6557 * When "fd" is NULL only count the length of what is written. 6558 */ 6559 static int 6560 write_spell_prefcond(FILE *fd, garray_T *gap) 6561 { 6562 int i; 6563 char_u *p; 6564 int len; 6565 int totlen; 6566 size_t x = 1; // collect return value of fwrite() 6567 6568 if (fd != NULL) 6569 put_bytes(fd, (long_u)gap->ga_len, 2); // <prefcondcnt> 6570 6571 totlen = 2 + gap->ga_len; // length of <prefcondcnt> and <condlen> bytes 6572 6573 for (i = 0; i < gap->ga_len; ++i) 6574 { 6575 // <prefcond> : <condlen> <condstr> 6576 p = ((char_u **)gap->ga_data)[i]; 6577 if (p != NULL) 6578 { 6579 len = (int)STRLEN(p); 6580 if (fd != NULL) 6581 { 6582 fputc(len, fd); 6583 x &= fwrite(p, (size_t)len, (size_t)1, fd); 6584 } 6585 totlen += len; 6586 } 6587 else if (fd != NULL) 6588 fputc(0, fd); 6589 } 6590 6591 return totlen; 6592 } 6593 6594 6595 /* 6596 * Use map string "map" for languages "lp". 6597 */ 6598 static void 6599 set_map_str(slang_T *lp, char_u *map) 6600 { 6601 char_u *p; 6602 int headc = 0; 6603 int c; 6604 int i; 6605 6606 if (*map == NUL) 6607 { 6608 lp->sl_has_map = FALSE; 6609 return; 6610 } 6611 lp->sl_has_map = TRUE; 6612 6613 // Init the array and hash tables empty. 6614 for (i = 0; i < 256; ++i) 6615 lp->sl_map_array[i] = 0; 6616 hash_init(&lp->sl_map_hash); 6617 6618 /* 6619 * The similar characters are stored separated with slashes: 6620 * "aaa/bbb/ccc/". Fill sl_map_array[c] with the character before c and 6621 * before the same slash. For characters above 255 sl_map_hash is used. 6622 */ 6623 for (p = map; *p != NUL; ) 6624 { 6625 c = mb_cptr2char_adv(&p); 6626 if (c == '/') 6627 headc = 0; 6628 else 6629 { 6630 if (headc == 0) 6631 headc = c; 6632 6633 // Characters above 255 don't fit in sl_map_array[], put them in 6634 // the hash table. Each entry is the char, a NUL the headchar and 6635 // a NUL. 6636 if (c >= 256) 6637 { 6638 int cl = mb_char2len(c); 6639 int headcl = mb_char2len(headc); 6640 char_u *b; 6641 hash_T hash; 6642 hashitem_T *hi; 6643 6644 b = alloc(cl + headcl + 2); 6645 if (b == NULL) 6646 return; 6647 mb_char2bytes(c, b); 6648 b[cl] = NUL; 6649 mb_char2bytes(headc, b + cl + 1); 6650 b[cl + 1 + headcl] = NUL; 6651 hash = hash_hash(b); 6652 hi = hash_lookup(&lp->sl_map_hash, b, hash); 6653 if (HASHITEM_EMPTY(hi)) 6654 hash_add_item(&lp->sl_map_hash, hi, b, hash); 6655 else 6656 { 6657 // This should have been checked when generating the .spl 6658 // file. 6659 emsg(_("E783: duplicate char in MAP entry")); 6660 vim_free(b); 6661 } 6662 } 6663 else 6664 lp->sl_map_array[c] = headc; 6665 } 6666 } 6667 } 6668 6669 #endif // FEAT_SPELL 6670