1 /* vi:set ts=8 sts=4 sw=4 noet: 2 * 3 * VIM - Vi IMproved by Bram Moolenaar 4 * 5 * Do ":help uganda" in Vim to read copying and usage conditions. 6 * Do ":help credits" in Vim to see a list of people who contributed. 7 * See README.txt for an overview of the Vim source code. 8 */ 9 10 /* 11 * spellfile.c: code for reading and writing spell files. 12 * 13 * See spell.c for information about spell checking. 14 */ 15 16 /* 17 * Vim spell file format: <HEADER> 18 * <SECTIONS> 19 * <LWORDTREE> 20 * <KWORDTREE> 21 * <PREFIXTREE> 22 * 23 * <HEADER>: <fileID> <versionnr> 24 * 25 * <fileID> 8 bytes "VIMspell" 26 * <versionnr> 1 byte VIMSPELLVERSION 27 * 28 * 29 * Sections make it possible to add information to the .spl file without 30 * making it incompatible with previous versions. There are two kinds of 31 * sections: 32 * 1. Not essential for correct spell checking. E.g. for making suggestions. 33 * These are skipped when not supported. 34 * 2. Optional information, but essential for spell checking when present. 35 * E.g. conditions for affixes. When this section is present but not 36 * supported an error message is given. 37 * 38 * <SECTIONS>: <section> ... <sectionend> 39 * 40 * <section>: <sectionID> <sectionflags> <sectionlen> (section contents) 41 * 42 * <sectionID> 1 byte number from 0 to 254 identifying the section 43 * 44 * <sectionflags> 1 byte SNF_REQUIRED: this section is required for correct 45 * spell checking 46 * 47 * <sectionlen> 4 bytes length of section contents, MSB first 48 * 49 * <sectionend> 1 byte SN_END 50 * 51 * 52 * sectionID == SN_INFO: <infotext> 53 * <infotext> N bytes free format text with spell file info (version, 54 * website, etc) 55 * 56 * sectionID == SN_REGION: <regionname> ... 57 * <regionname> 2 bytes Up to MAXREGIONS region names: ca, au, etc. Lower 58 * case. First <regionname> is region 1. 59 * 60 * sectionID == SN_CHARFLAGS: <charflagslen> <charflags> 61 * <folcharslen> <folchars> 62 * <charflagslen> 1 byte Number of bytes in <charflags> (should be 128). 63 * <charflags> N bytes List of flags (first one is for character 128): 64 * 0x01 word character CF_WORD 65 * 0x02 upper-case character CF_UPPER 66 * <folcharslen> 2 bytes Number of bytes in <folchars>. 67 * <folchars> N bytes Folded characters, first one is for character 128. 68 * 69 * sectionID == SN_MIDWORD: <midword> 70 * <midword> N bytes Characters that are word characters only when used 71 * in the middle of a word. 72 * 73 * sectionID == SN_PREFCOND: <prefcondcnt> <prefcond> ... 74 * <prefcondcnt> 2 bytes Number of <prefcond> items following. 75 * <prefcond> : <condlen> <condstr> 76 * <condlen> 1 byte Length of <condstr>. 77 * <condstr> N bytes Condition for the prefix. 78 * 79 * sectionID == SN_REP: <repcount> <rep> ... 80 * <repcount> 2 bytes number of <rep> items, MSB first. 81 * <rep> : <repfromlen> <repfrom> <reptolen> <repto> 82 * <repfromlen> 1 byte length of <repfrom> 83 * <repfrom> N bytes "from" part of replacement 84 * <reptolen> 1 byte length of <repto> 85 * <repto> N bytes "to" part of replacement 86 * 87 * sectionID == SN_REPSAL: <repcount> <rep> ... 88 * just like SN_REP but for soundfolded words 89 * 90 * sectionID == SN_SAL: <salflags> <salcount> <sal> ... 91 * <salflags> 1 byte flags for soundsalike conversion: 92 * SAL_F0LLOWUP 93 * SAL_COLLAPSE 94 * SAL_REM_ACCENTS 95 * <salcount> 2 bytes number of <sal> items following 96 * <sal> : <salfromlen> <salfrom> <saltolen> <salto> 97 * <salfromlen> 1 byte length of <salfrom> 98 * <salfrom> N bytes "from" part of soundsalike 99 * <saltolen> 1 byte length of <salto> 100 * <salto> N bytes "to" part of soundsalike 101 * 102 * sectionID == SN_SOFO: <sofofromlen> <sofofrom> <sofotolen> <sofoto> 103 * <sofofromlen> 2 bytes length of <sofofrom> 104 * <sofofrom> N bytes "from" part of soundfold 105 * <sofotolen> 2 bytes length of <sofoto> 106 * <sofoto> N bytes "to" part of soundfold 107 * 108 * sectionID == SN_SUGFILE: <timestamp> 109 * <timestamp> 8 bytes time in seconds that must match with .sug file 110 * 111 * sectionID == SN_NOSPLITSUGS: nothing 112 * 113 * sectionID == SN_NOCOMPOUNDSUGS: nothing 114 * 115 * sectionID == SN_WORDS: <word> ... 116 * <word> N bytes NUL terminated common word 117 * 118 * sectionID == SN_MAP: <mapstr> 119 * <mapstr> N bytes String with sequences of similar characters, 120 * separated by slashes. 121 * 122 * sectionID == SN_COMPOUND: <compmax> <compminlen> <compsylmax> <compoptions> 123 * <comppatcount> <comppattern> ... <compflags> 124 * <compmax> 1 byte Maximum nr of words in compound word. 125 * <compminlen> 1 byte Minimal word length for compounding. 126 * <compsylmax> 1 byte Maximum nr of syllables in compound word. 127 * <compoptions> 2 bytes COMP_ flags. 128 * <comppatcount> 2 bytes number of <comppattern> following 129 * <compflags> N bytes Flags from COMPOUNDRULE items, separated by 130 * slashes. 131 * 132 * <comppattern>: <comppatlen> <comppattext> 133 * <comppatlen> 1 byte length of <comppattext> 134 * <comppattext> N bytes end or begin chars from CHECKCOMPOUNDPATTERN 135 * 136 * sectionID == SN_NOBREAK: (empty, its presence is what matters) 137 * 138 * sectionID == SN_SYLLABLE: <syllable> 139 * <syllable> N bytes String from SYLLABLE item. 140 * 141 * <LWORDTREE>: <wordtree> 142 * 143 * <KWORDTREE>: <wordtree> 144 * 145 * <PREFIXTREE>: <wordtree> 146 * 147 * 148 * <wordtree>: <nodecount> <nodedata> ... 149 * 150 * <nodecount> 4 bytes Number of nodes following. MSB first. 151 * 152 * <nodedata>: <siblingcount> <sibling> ... 153 * 154 * <siblingcount> 1 byte Number of siblings in this node. The siblings 155 * follow in sorted order. 156 * 157 * <sibling>: <byte> [ <nodeidx> <xbyte> 158 * | <flags> [<flags2>] [<region>] [<affixID>] 159 * | [<pflags>] <affixID> <prefcondnr> ] 160 * 161 * <byte> 1 byte Byte value of the sibling. Special cases: 162 * BY_NOFLAGS: End of word without flags and for all 163 * regions. 164 * For PREFIXTREE <affixID> and 165 * <prefcondnr> follow. 166 * BY_FLAGS: End of word, <flags> follow. 167 * For PREFIXTREE <pflags>, <affixID> 168 * and <prefcondnr> follow. 169 * BY_FLAGS2: End of word, <flags> and <flags2> 170 * follow. Not used in PREFIXTREE. 171 * BY_INDEX: Child of sibling is shared, <nodeidx> 172 * and <xbyte> follow. 173 * 174 * <nodeidx> 3 bytes Index of child for this sibling, MSB first. 175 * 176 * <xbyte> 1 byte byte value of the sibling. 177 * 178 * <flags> 1 byte bitmask of: 179 * WF_ALLCAP word must have only capitals 180 * WF_ONECAP first char of word must be capital 181 * WF_KEEPCAP keep-case word 182 * WF_FIXCAP keep-case word, all caps not allowed 183 * WF_RARE rare word 184 * WF_BANNED bad word 185 * WF_REGION <region> follows 186 * WF_AFX <affixID> follows 187 * 188 * <flags2> 1 byte Bitmask of: 189 * WF_HAS_AFF >> 8 word includes affix 190 * WF_NEEDCOMP >> 8 word only valid in compound 191 * WF_NOSUGGEST >> 8 word not used for suggestions 192 * WF_COMPROOT >> 8 word already a compound 193 * WF_NOCOMPBEF >> 8 no compounding before this word 194 * WF_NOCOMPAFT >> 8 no compounding after this word 195 * 196 * <pflags> 1 byte bitmask of: 197 * WFP_RARE rare prefix 198 * WFP_NC non-combining prefix 199 * WFP_UP letter after prefix made upper case 200 * 201 * <region> 1 byte Bitmask for regions in which word is valid. When 202 * omitted it's valid in all regions. 203 * Lowest bit is for region 1. 204 * 205 * <affixID> 1 byte ID of affix that can be used with this word. In 206 * PREFIXTREE used for the required prefix ID. 207 * 208 * <prefcondnr> 2 bytes Prefix condition number, index in <prefcond> list 209 * from HEADER. 210 * 211 * All text characters are in 'encoding', but stored as single bytes. 212 */ 213 214 /* 215 * Vim .sug file format: <SUGHEADER> 216 * <SUGWORDTREE> 217 * <SUGTABLE> 218 * 219 * <SUGHEADER>: <fileID> <versionnr> <timestamp> 220 * 221 * <fileID> 6 bytes "VIMsug" 222 * <versionnr> 1 byte VIMSUGVERSION 223 * <timestamp> 8 bytes timestamp that must match with .spl file 224 * 225 * 226 * <SUGWORDTREE>: <wordtree> (see above, no flags or region used) 227 * 228 * 229 * <SUGTABLE>: <sugwcount> <sugline> ... 230 * 231 * <sugwcount> 4 bytes number of <sugline> following 232 * 233 * <sugline>: <sugnr> ... NUL 234 * 235 * <sugnr>: X bytes word number that results in this soundfolded word, 236 * stored as an offset to the previous number in as 237 * few bytes as possible, see offset2bytes()) 238 */ 239 240 #include "vim.h" 241 242 #if defined(FEAT_SPELL) || defined(PROTO) 243 244 #ifndef UNIX // it's in os_unix.h for Unix 245 # include <time.h> // for time_t 246 #endif 247 248 #ifndef UNIX // it's in os_unix.h for Unix 249 # include <time.h> // for time_t 250 #endif 251 252 // Special byte values for <byte>. Some are only used in the tree for 253 // postponed prefixes, some only in the other trees. This is a bit messy... 254 #define BY_NOFLAGS 0 // end of word without flags or region; for 255 // postponed prefix: no <pflags> 256 #define BY_INDEX 1 // child is shared, index follows 257 #define BY_FLAGS 2 // end of word, <flags> byte follows; for 258 // postponed prefix: <pflags> follows 259 #define BY_FLAGS2 3 // end of word, <flags> and <flags2> bytes 260 // follow; never used in prefix tree 261 #define BY_SPECIAL BY_FLAGS2 // highest special byte value 262 263 #define ZERO_FLAG 65009 // used when flag is zero: "0" 264 265 // Flags used in .spl file for soundsalike flags. 266 #define SAL_F0LLOWUP 1 267 #define SAL_COLLAPSE 2 268 #define SAL_REM_ACCENTS 4 269 270 #define VIMSPELLMAGIC "VIMspell" // string at start of Vim spell file 271 #define VIMSPELLMAGICL 8 272 #define VIMSPELLVERSION 50 273 274 // Section IDs. Only renumber them when VIMSPELLVERSION changes! 275 #define SN_REGION 0 // <regionname> section 276 #define SN_CHARFLAGS 1 // charflags section 277 #define SN_MIDWORD 2 // <midword> section 278 #define SN_PREFCOND 3 // <prefcond> section 279 #define SN_REP 4 // REP items section 280 #define SN_SAL 5 // SAL items section 281 #define SN_SOFO 6 // soundfolding section 282 #define SN_MAP 7 // MAP items section 283 #define SN_COMPOUND 8 // compound words section 284 #define SN_SYLLABLE 9 // syllable section 285 #define SN_NOBREAK 10 // NOBREAK section 286 #define SN_SUGFILE 11 // timestamp for .sug file 287 #define SN_REPSAL 12 // REPSAL items section 288 #define SN_WORDS 13 // common words 289 #define SN_NOSPLITSUGS 14 // don't split word for suggestions 290 #define SN_INFO 15 // info section 291 #define SN_NOCOMPOUNDSUGS 16 // don't compound for suggestions 292 #define SN_END 255 // end of sections 293 294 #define SNF_REQUIRED 1 // <sectionflags>: required section 295 296 #define CF_WORD 0x01 297 #define CF_UPPER 0x02 298 299 static int set_spell_finish(spelltab_T *new_st); 300 static int write_spell_prefcond(FILE *fd, garray_T *gap); 301 static int read_region_section(FILE *fd, slang_T *slang, int len); 302 static int read_charflags_section(FILE *fd); 303 static int read_prefcond_section(FILE *fd, slang_T *lp); 304 static int read_rep_section(FILE *fd, garray_T *gap, short *first); 305 static int read_sal_section(FILE *fd, slang_T *slang); 306 static int read_words_section(FILE *fd, slang_T *lp, int len); 307 static int read_sofo_section(FILE *fd, slang_T *slang); 308 static int read_compound(FILE *fd, slang_T *slang, int len); 309 static int set_sofo(slang_T *lp, char_u *from, char_u *to); 310 static void set_sal_first(slang_T *lp); 311 static int *mb_str2wide(char_u *s); 312 static int spell_read_tree(FILE *fd, char_u **bytsp, idx_T **idxsp, int prefixtree, int prefixcnt); 313 static idx_T read_tree_node(FILE *fd, char_u *byts, idx_T *idxs, int maxidx, idx_T startidx, int prefixtree, int maxprefcondnr); 314 static void set_spell_charflags(char_u *flags, int cnt, char_u *upp); 315 static int set_spell_chartab(char_u *fol, char_u *low, char_u *upp); 316 static void set_map_str(slang_T *lp, char_u *map); 317 318 319 static char *e_spell_trunc = N_("E758: Truncated spell file"); 320 static char *e_afftrailing = N_("Trailing text in %s line %d: %s"); 321 static char *e_affname = N_("Affix name too long in %s line %d: %s"); 322 static char *e_affform = N_("E761: Format error in affix file FOL, LOW or UPP"); 323 static char *e_affrange = N_("E762: Character in FOL, LOW or UPP is out of range"); 324 static char *msg_compressing = N_("Compressing word tree..."); 325 326 /* 327 * Load one spell file and store the info into a slang_T. 328 * 329 * This is invoked in three ways: 330 * - From spell_load_cb() to load a spell file for the first time. "lang" is 331 * the language name, "old_lp" is NULL. Will allocate an slang_T. 332 * - To reload a spell file that was changed. "lang" is NULL and "old_lp" 333 * points to the existing slang_T. 334 * - Just after writing a .spl file; it's read back to produce the .sug file. 335 * "old_lp" is NULL and "lang" is NULL. Will allocate an slang_T. 336 * 337 * Returns the slang_T the spell file was loaded into. NULL for error. 338 */ 339 slang_T * 340 spell_load_file( 341 char_u *fname, 342 char_u *lang, 343 slang_T *old_lp, 344 int silent) // no error if file doesn't exist 345 { 346 FILE *fd; 347 char_u buf[VIMSPELLMAGICL]; 348 char_u *p; 349 int i; 350 int n; 351 int len; 352 slang_T *lp = NULL; 353 int c = 0; 354 int res; 355 int did_estack_push = FALSE; 356 ESTACK_CHECK_DECLARATION 357 358 fd = mch_fopen((char *)fname, "r"); 359 if (fd == NULL) 360 { 361 if (!silent) 362 semsg(_(e_notopen), fname); 363 else if (p_verbose > 2) 364 { 365 verbose_enter(); 366 smsg((const char *)e_notopen, fname); 367 verbose_leave(); 368 } 369 goto endFAIL; 370 } 371 if (p_verbose > 2) 372 { 373 verbose_enter(); 374 smsg(_("Reading spell file \"%s\""), fname); 375 verbose_leave(); 376 } 377 378 if (old_lp == NULL) 379 { 380 lp = slang_alloc(lang); 381 if (lp == NULL) 382 goto endFAIL; 383 384 // Remember the file name, used to reload the file when it's updated. 385 lp->sl_fname = vim_strsave(fname); 386 if (lp->sl_fname == NULL) 387 goto endFAIL; 388 389 // Check for .add.spl (_add.spl for VMS). 390 lp->sl_add = strstr((char *)gettail(fname), SPL_FNAME_ADD) != NULL; 391 } 392 else 393 lp = old_lp; 394 395 // Set sourcing_name, so that error messages mention the file name. 396 estack_push(ETYPE_SPELL, fname, 0); 397 ESTACK_CHECK_SETUP 398 did_estack_push = TRUE; 399 400 /* 401 * <HEADER>: <fileID> 402 */ 403 for (i = 0; i < VIMSPELLMAGICL; ++i) 404 buf[i] = getc(fd); // <fileID> 405 if (STRNCMP(buf, VIMSPELLMAGIC, VIMSPELLMAGICL) != 0) 406 { 407 emsg(_("E757: This does not look like a spell file")); 408 goto endFAIL; 409 } 410 c = getc(fd); // <versionnr> 411 if (c < VIMSPELLVERSION) 412 { 413 emsg(_("E771: Old spell file, needs to be updated")); 414 goto endFAIL; 415 } 416 else if (c > VIMSPELLVERSION) 417 { 418 emsg(_("E772: Spell file is for newer version of Vim")); 419 goto endFAIL; 420 } 421 422 423 /* 424 * <SECTIONS>: <section> ... <sectionend> 425 * <section>: <sectionID> <sectionflags> <sectionlen> (section contents) 426 */ 427 for (;;) 428 { 429 n = getc(fd); // <sectionID> or <sectionend> 430 if (n == SN_END) 431 break; 432 c = getc(fd); // <sectionflags> 433 len = get4c(fd); // <sectionlen> 434 if (len < 0) 435 goto truncerr; 436 437 res = 0; 438 switch (n) 439 { 440 case SN_INFO: 441 lp->sl_info = read_string(fd, len); // <infotext> 442 if (lp->sl_info == NULL) 443 goto endFAIL; 444 break; 445 446 case SN_REGION: 447 res = read_region_section(fd, lp, len); 448 break; 449 450 case SN_CHARFLAGS: 451 res = read_charflags_section(fd); 452 break; 453 454 case SN_MIDWORD: 455 lp->sl_midword = read_string(fd, len); // <midword> 456 if (lp->sl_midword == NULL) 457 goto endFAIL; 458 break; 459 460 case SN_PREFCOND: 461 res = read_prefcond_section(fd, lp); 462 break; 463 464 case SN_REP: 465 res = read_rep_section(fd, &lp->sl_rep, lp->sl_rep_first); 466 break; 467 468 case SN_REPSAL: 469 res = read_rep_section(fd, &lp->sl_repsal, lp->sl_repsal_first); 470 break; 471 472 case SN_SAL: 473 res = read_sal_section(fd, lp); 474 break; 475 476 case SN_SOFO: 477 res = read_sofo_section(fd, lp); 478 break; 479 480 case SN_MAP: 481 p = read_string(fd, len); // <mapstr> 482 if (p == NULL) 483 goto endFAIL; 484 set_map_str(lp, p); 485 vim_free(p); 486 break; 487 488 case SN_WORDS: 489 res = read_words_section(fd, lp, len); 490 break; 491 492 case SN_SUGFILE: 493 lp->sl_sugtime = get8ctime(fd); // <timestamp> 494 break; 495 496 case SN_NOSPLITSUGS: 497 lp->sl_nosplitsugs = TRUE; 498 break; 499 500 case SN_NOCOMPOUNDSUGS: 501 lp->sl_nocompoundsugs = TRUE; 502 break; 503 504 case SN_COMPOUND: 505 res = read_compound(fd, lp, len); 506 break; 507 508 case SN_NOBREAK: 509 lp->sl_nobreak = TRUE; 510 break; 511 512 case SN_SYLLABLE: 513 lp->sl_syllable = read_string(fd, len); // <syllable> 514 if (lp->sl_syllable == NULL) 515 goto endFAIL; 516 if (init_syl_tab(lp) == FAIL) 517 goto endFAIL; 518 break; 519 520 default: 521 // Unsupported section. When it's required give an error 522 // message. When it's not required skip the contents. 523 if (c & SNF_REQUIRED) 524 { 525 emsg(_("E770: Unsupported section in spell file")); 526 goto endFAIL; 527 } 528 while (--len >= 0) 529 if (getc(fd) < 0) 530 goto truncerr; 531 break; 532 } 533 someerror: 534 if (res == SP_FORMERROR) 535 { 536 emsg(_(e_format)); 537 goto endFAIL; 538 } 539 if (res == SP_TRUNCERROR) 540 { 541 truncerr: 542 emsg(_(e_spell_trunc)); 543 goto endFAIL; 544 } 545 if (res == SP_OTHERERROR) 546 goto endFAIL; 547 } 548 549 // <LWORDTREE> 550 res = spell_read_tree(fd, &lp->sl_fbyts, &lp->sl_fidxs, FALSE, 0); 551 if (res != 0) 552 goto someerror; 553 554 // <KWORDTREE> 555 res = spell_read_tree(fd, &lp->sl_kbyts, &lp->sl_kidxs, FALSE, 0); 556 if (res != 0) 557 goto someerror; 558 559 // <PREFIXTREE> 560 res = spell_read_tree(fd, &lp->sl_pbyts, &lp->sl_pidxs, TRUE, 561 lp->sl_prefixcnt); 562 if (res != 0) 563 goto someerror; 564 565 // For a new file link it in the list of spell files. 566 if (old_lp == NULL && lang != NULL) 567 { 568 lp->sl_next = first_lang; 569 first_lang = lp; 570 } 571 572 goto endOK; 573 574 endFAIL: 575 if (lang != NULL) 576 // truncating the name signals the error to spell_load_lang() 577 *lang = NUL; 578 if (lp != NULL && old_lp == NULL) 579 slang_free(lp); 580 lp = NULL; 581 582 endOK: 583 if (fd != NULL) 584 fclose(fd); 585 if (did_estack_push) 586 { 587 ESTACK_CHECK_NOW 588 estack_pop(); 589 } 590 591 return lp; 592 } 593 594 /* 595 * Fill in the wordcount fields for a trie. 596 * Returns the total number of words. 597 */ 598 static void 599 tree_count_words(char_u *byts, idx_T *idxs) 600 { 601 int depth; 602 idx_T arridx[MAXWLEN]; 603 int curi[MAXWLEN]; 604 int c; 605 idx_T n; 606 int wordcount[MAXWLEN]; 607 608 arridx[0] = 0; 609 curi[0] = 1; 610 wordcount[0] = 0; 611 depth = 0; 612 while (depth >= 0 && !got_int) 613 { 614 if (curi[depth] > byts[arridx[depth]]) 615 { 616 // Done all bytes at this node, go up one level. 617 idxs[arridx[depth]] = wordcount[depth]; 618 if (depth > 0) 619 wordcount[depth - 1] += wordcount[depth]; 620 621 --depth; 622 fast_breakcheck(); 623 } 624 else 625 { 626 // Do one more byte at this node. 627 n = arridx[depth] + curi[depth]; 628 ++curi[depth]; 629 630 c = byts[n]; 631 if (c == 0) 632 { 633 // End of word, count it. 634 ++wordcount[depth]; 635 636 // Skip over any other NUL bytes (same word with different 637 // flags). 638 while (byts[n + 1] == 0) 639 { 640 ++n; 641 ++curi[depth]; 642 } 643 } 644 else 645 { 646 // Normal char, go one level deeper to count the words. 647 ++depth; 648 arridx[depth] = idxs[n]; 649 curi[depth] = 1; 650 wordcount[depth] = 0; 651 } 652 } 653 } 654 } 655 656 /* 657 * Load the .sug files for languages that have one and weren't loaded yet. 658 */ 659 void 660 suggest_load_files(void) 661 { 662 langp_T *lp; 663 int lpi; 664 slang_T *slang; 665 char_u *dotp; 666 FILE *fd; 667 char_u buf[MAXWLEN]; 668 int i; 669 time_t timestamp; 670 int wcount; 671 int wordnr; 672 garray_T ga; 673 int c; 674 675 // Do this for all languages that support sound folding. 676 for (lpi = 0; lpi < curwin->w_s->b_langp.ga_len; ++lpi) 677 { 678 lp = LANGP_ENTRY(curwin->w_s->b_langp, lpi); 679 slang = lp->lp_slang; 680 if (slang->sl_sugtime != 0 && !slang->sl_sugloaded) 681 { 682 // Change ".spl" to ".sug" and open the file. When the file isn't 683 // found silently skip it. Do set "sl_sugloaded" so that we 684 // don't try again and again. 685 slang->sl_sugloaded = TRUE; 686 687 dotp = vim_strrchr(slang->sl_fname, '.'); 688 if (dotp == NULL || fnamecmp(dotp, ".spl") != 0) 689 continue; 690 STRCPY(dotp, ".sug"); 691 fd = mch_fopen((char *)slang->sl_fname, "r"); 692 if (fd == NULL) 693 goto nextone; 694 695 /* 696 * <SUGHEADER>: <fileID> <versionnr> <timestamp> 697 */ 698 for (i = 0; i < VIMSUGMAGICL; ++i) 699 buf[i] = getc(fd); // <fileID> 700 if (STRNCMP(buf, VIMSUGMAGIC, VIMSUGMAGICL) != 0) 701 { 702 semsg(_("E778: This does not look like a .sug file: %s"), 703 slang->sl_fname); 704 goto nextone; 705 } 706 c = getc(fd); // <versionnr> 707 if (c < VIMSUGVERSION) 708 { 709 semsg(_("E779: Old .sug file, needs to be updated: %s"), 710 slang->sl_fname); 711 goto nextone; 712 } 713 else if (c > VIMSUGVERSION) 714 { 715 semsg(_("E780: .sug file is for newer version of Vim: %s"), 716 slang->sl_fname); 717 goto nextone; 718 } 719 720 // Check the timestamp, it must be exactly the same as the one in 721 // the .spl file. Otherwise the word numbers won't match. 722 timestamp = get8ctime(fd); // <timestamp> 723 if (timestamp != slang->sl_sugtime) 724 { 725 semsg(_("E781: .sug file doesn't match .spl file: %s"), 726 slang->sl_fname); 727 goto nextone; 728 } 729 730 /* 731 * <SUGWORDTREE>: <wordtree> 732 * Read the trie with the soundfolded words. 733 */ 734 if (spell_read_tree(fd, &slang->sl_sbyts, &slang->sl_sidxs, 735 FALSE, 0) != 0) 736 { 737 someerror: 738 semsg(_("E782: error while reading .sug file: %s"), 739 slang->sl_fname); 740 slang_clear_sug(slang); 741 goto nextone; 742 } 743 744 /* 745 * <SUGTABLE>: <sugwcount> <sugline> ... 746 * 747 * Read the table with word numbers. We use a file buffer for 748 * this, because it's so much like a file with lines. Makes it 749 * possible to swap the info and save on memory use. 750 */ 751 slang->sl_sugbuf = open_spellbuf(); 752 if (slang->sl_sugbuf == NULL) 753 goto someerror; 754 // <sugwcount> 755 wcount = get4c(fd); 756 if (wcount < 0) 757 goto someerror; 758 759 // Read all the wordnr lists into the buffer, one NUL terminated 760 // list per line. 761 ga_init2(&ga, 1, 100); 762 for (wordnr = 0; wordnr < wcount; ++wordnr) 763 { 764 ga.ga_len = 0; 765 for (;;) 766 { 767 c = getc(fd); // <sugline> 768 if (c < 0 || ga_grow(&ga, 1) == FAIL) 769 goto someerror; 770 ((char_u *)ga.ga_data)[ga.ga_len++] = c; 771 if (c == NUL) 772 break; 773 } 774 if (ml_append_buf(slang->sl_sugbuf, (linenr_T)wordnr, 775 ga.ga_data, ga.ga_len, TRUE) == FAIL) 776 goto someerror; 777 } 778 ga_clear(&ga); 779 780 /* 781 * Need to put word counts in the word tries, so that we can find 782 * a word by its number. 783 */ 784 tree_count_words(slang->sl_fbyts, slang->sl_fidxs); 785 tree_count_words(slang->sl_sbyts, slang->sl_sidxs); 786 787 nextone: 788 if (fd != NULL) 789 fclose(fd); 790 STRCPY(dotp, ".spl"); 791 } 792 } 793 } 794 795 796 /* 797 * Read a length field from "fd" in "cnt_bytes" bytes. 798 * Allocate memory, read the string into it and add a NUL at the end. 799 * Returns NULL when the count is zero. 800 * Sets "*cntp" to SP_*ERROR when there is an error, length of the result 801 * otherwise. 802 */ 803 static char_u * 804 read_cnt_string(FILE *fd, int cnt_bytes, int *cntp) 805 { 806 int cnt = 0; 807 int i; 808 char_u *str; 809 810 // read the length bytes, MSB first 811 for (i = 0; i < cnt_bytes; ++i) 812 cnt = (cnt << 8) + getc(fd); 813 if (cnt < 0) 814 { 815 *cntp = SP_TRUNCERROR; 816 return NULL; 817 } 818 *cntp = cnt; 819 if (cnt == 0) 820 return NULL; // nothing to read, return NULL 821 822 str = read_string(fd, cnt); 823 if (str == NULL) 824 *cntp = SP_OTHERERROR; 825 return str; 826 } 827 828 /* 829 * Read SN_REGION: <regionname> ... 830 * Return SP_*ERROR flags. 831 */ 832 static int 833 read_region_section(FILE *fd, slang_T *lp, int len) 834 { 835 int i; 836 837 if (len > MAXREGIONS * 2) 838 return SP_FORMERROR; 839 for (i = 0; i < len; ++i) 840 lp->sl_regions[i] = getc(fd); // <regionname> 841 lp->sl_regions[len] = NUL; 842 return 0; 843 } 844 845 /* 846 * Read SN_CHARFLAGS section: <charflagslen> <charflags> 847 * <folcharslen> <folchars> 848 * Return SP_*ERROR flags. 849 */ 850 static int 851 read_charflags_section(FILE *fd) 852 { 853 char_u *flags; 854 char_u *fol; 855 int flagslen, follen; 856 857 // <charflagslen> <charflags> 858 flags = read_cnt_string(fd, 1, &flagslen); 859 if (flagslen < 0) 860 return flagslen; 861 862 // <folcharslen> <folchars> 863 fol = read_cnt_string(fd, 2, &follen); 864 if (follen < 0) 865 { 866 vim_free(flags); 867 return follen; 868 } 869 870 // Set the word-char flags and fill SPELL_ISUPPER() table. 871 if (flags != NULL && fol != NULL) 872 set_spell_charflags(flags, flagslen, fol); 873 874 vim_free(flags); 875 vim_free(fol); 876 877 // When <charflagslen> is zero then <fcharlen> must also be zero. 878 if ((flags == NULL) != (fol == NULL)) 879 return SP_FORMERROR; 880 return 0; 881 } 882 883 /* 884 * Read SN_PREFCOND section. 885 * Return SP_*ERROR flags. 886 */ 887 static int 888 read_prefcond_section(FILE *fd, slang_T *lp) 889 { 890 int cnt; 891 int i; 892 int n; 893 char_u *p; 894 char_u buf[MAXWLEN + 1]; 895 896 // <prefcondcnt> <prefcond> ... 897 cnt = get2c(fd); // <prefcondcnt> 898 if (cnt <= 0) 899 return SP_FORMERROR; 900 901 lp->sl_prefprog = ALLOC_CLEAR_MULT(regprog_T *, cnt); 902 if (lp->sl_prefprog == NULL) 903 return SP_OTHERERROR; 904 lp->sl_prefixcnt = cnt; 905 906 for (i = 0; i < cnt; ++i) 907 { 908 // <prefcond> : <condlen> <condstr> 909 n = getc(fd); // <condlen> 910 if (n < 0 || n >= MAXWLEN) 911 return SP_FORMERROR; 912 913 // When <condlen> is zero we have an empty condition. Otherwise 914 // compile the regexp program used to check for the condition. 915 if (n > 0) 916 { 917 buf[0] = '^'; // always match at one position only 918 p = buf + 1; 919 while (n-- > 0) 920 *p++ = getc(fd); // <condstr> 921 *p = NUL; 922 lp->sl_prefprog[i] = vim_regcomp(buf, RE_MAGIC + RE_STRING); 923 } 924 } 925 return 0; 926 } 927 928 /* 929 * Read REP or REPSAL items section from "fd": <repcount> <rep> ... 930 * Return SP_*ERROR flags. 931 */ 932 static int 933 read_rep_section(FILE *fd, garray_T *gap, short *first) 934 { 935 int cnt; 936 fromto_T *ftp; 937 int i; 938 939 cnt = get2c(fd); // <repcount> 940 if (cnt < 0) 941 return SP_TRUNCERROR; 942 943 if (ga_grow(gap, cnt) == FAIL) 944 return SP_OTHERERROR; 945 946 // <rep> : <repfromlen> <repfrom> <reptolen> <repto> 947 for (; gap->ga_len < cnt; ++gap->ga_len) 948 { 949 ftp = &((fromto_T *)gap->ga_data)[gap->ga_len]; 950 ftp->ft_from = read_cnt_string(fd, 1, &i); 951 if (i < 0) 952 return i; 953 if (i == 0) 954 return SP_FORMERROR; 955 ftp->ft_to = read_cnt_string(fd, 1, &i); 956 if (i <= 0) 957 { 958 vim_free(ftp->ft_from); 959 if (i < 0) 960 return i; 961 return SP_FORMERROR; 962 } 963 } 964 965 // Fill the first-index table. 966 for (i = 0; i < 256; ++i) 967 first[i] = -1; 968 for (i = 0; i < gap->ga_len; ++i) 969 { 970 ftp = &((fromto_T *)gap->ga_data)[i]; 971 if (first[*ftp->ft_from] == -1) 972 first[*ftp->ft_from] = i; 973 } 974 return 0; 975 } 976 977 /* 978 * Read SN_SAL section: <salflags> <salcount> <sal> ... 979 * Return SP_*ERROR flags. 980 */ 981 static int 982 read_sal_section(FILE *fd, slang_T *slang) 983 { 984 int i; 985 int cnt; 986 garray_T *gap; 987 salitem_T *smp; 988 int ccnt; 989 char_u *p; 990 int c = NUL; 991 992 slang->sl_sofo = FALSE; 993 994 i = getc(fd); // <salflags> 995 if (i & SAL_F0LLOWUP) 996 slang->sl_followup = TRUE; 997 if (i & SAL_COLLAPSE) 998 slang->sl_collapse = TRUE; 999 if (i & SAL_REM_ACCENTS) 1000 slang->sl_rem_accents = TRUE; 1001 1002 cnt = get2c(fd); // <salcount> 1003 if (cnt < 0) 1004 return SP_TRUNCERROR; 1005 1006 gap = &slang->sl_sal; 1007 ga_init2(gap, sizeof(salitem_T), 10); 1008 if (ga_grow(gap, cnt + 1) == FAIL) 1009 return SP_OTHERERROR; 1010 1011 // <sal> : <salfromlen> <salfrom> <saltolen> <salto> 1012 for (; gap->ga_len < cnt; ++gap->ga_len) 1013 { 1014 smp = &((salitem_T *)gap->ga_data)[gap->ga_len]; 1015 ccnt = getc(fd); // <salfromlen> 1016 if (ccnt < 0) 1017 return SP_TRUNCERROR; 1018 if ((p = alloc(ccnt + 2)) == NULL) 1019 return SP_OTHERERROR; 1020 smp->sm_lead = p; 1021 1022 // Read up to the first special char into sm_lead. 1023 for (i = 0; i < ccnt; ++i) 1024 { 1025 c = getc(fd); // <salfrom> 1026 if (vim_strchr((char_u *)"0123456789(-<^$", c) != NULL) 1027 break; 1028 *p++ = c; 1029 } 1030 smp->sm_leadlen = (int)(p - smp->sm_lead); 1031 *p++ = NUL; 1032 1033 // Put (abc) chars in sm_oneof, if any. 1034 if (c == '(') 1035 { 1036 smp->sm_oneof = p; 1037 for (++i; i < ccnt; ++i) 1038 { 1039 c = getc(fd); // <salfrom> 1040 if (c == ')') 1041 break; 1042 *p++ = c; 1043 } 1044 *p++ = NUL; 1045 if (++i < ccnt) 1046 c = getc(fd); 1047 } 1048 else 1049 smp->sm_oneof = NULL; 1050 1051 // Any following chars go in sm_rules. 1052 smp->sm_rules = p; 1053 if (i < ccnt) 1054 // store the char we got while checking for end of sm_lead 1055 *p++ = c; 1056 for (++i; i < ccnt; ++i) 1057 *p++ = getc(fd); // <salfrom> 1058 *p++ = NUL; 1059 1060 // <saltolen> <salto> 1061 smp->sm_to = read_cnt_string(fd, 1, &ccnt); 1062 if (ccnt < 0) 1063 { 1064 vim_free(smp->sm_lead); 1065 return ccnt; 1066 } 1067 1068 if (has_mbyte) 1069 { 1070 // convert the multi-byte strings to wide char strings 1071 smp->sm_lead_w = mb_str2wide(smp->sm_lead); 1072 smp->sm_leadlen = mb_charlen(smp->sm_lead); 1073 if (smp->sm_oneof == NULL) 1074 smp->sm_oneof_w = NULL; 1075 else 1076 smp->sm_oneof_w = mb_str2wide(smp->sm_oneof); 1077 if (smp->sm_to == NULL) 1078 smp->sm_to_w = NULL; 1079 else 1080 smp->sm_to_w = mb_str2wide(smp->sm_to); 1081 if (smp->sm_lead_w == NULL 1082 || (smp->sm_oneof_w == NULL && smp->sm_oneof != NULL) 1083 || (smp->sm_to_w == NULL && smp->sm_to != NULL)) 1084 { 1085 vim_free(smp->sm_lead); 1086 vim_free(smp->sm_to); 1087 vim_free(smp->sm_lead_w); 1088 vim_free(smp->sm_oneof_w); 1089 vim_free(smp->sm_to_w); 1090 return SP_OTHERERROR; 1091 } 1092 } 1093 } 1094 1095 if (gap->ga_len > 0) 1096 { 1097 // Add one extra entry to mark the end with an empty sm_lead. Avoids 1098 // that we need to check the index every time. 1099 smp = &((salitem_T *)gap->ga_data)[gap->ga_len]; 1100 if ((p = alloc(1)) == NULL) 1101 return SP_OTHERERROR; 1102 p[0] = NUL; 1103 smp->sm_lead = p; 1104 smp->sm_leadlen = 0; 1105 smp->sm_oneof = NULL; 1106 smp->sm_rules = p; 1107 smp->sm_to = NULL; 1108 if (has_mbyte) 1109 { 1110 smp->sm_lead_w = mb_str2wide(smp->sm_lead); 1111 smp->sm_leadlen = 0; 1112 smp->sm_oneof_w = NULL; 1113 smp->sm_to_w = NULL; 1114 } 1115 ++gap->ga_len; 1116 } 1117 1118 // Fill the first-index table. 1119 set_sal_first(slang); 1120 1121 return 0; 1122 } 1123 1124 /* 1125 * Read SN_WORDS: <word> ... 1126 * Return SP_*ERROR flags. 1127 */ 1128 static int 1129 read_words_section(FILE *fd, slang_T *lp, int len) 1130 { 1131 int done = 0; 1132 int i; 1133 int c; 1134 char_u word[MAXWLEN]; 1135 1136 while (done < len) 1137 { 1138 // Read one word at a time. 1139 for (i = 0; ; ++i) 1140 { 1141 c = getc(fd); 1142 if (c == EOF) 1143 return SP_TRUNCERROR; 1144 word[i] = c; 1145 if (word[i] == NUL) 1146 break; 1147 if (i == MAXWLEN - 1) 1148 return SP_FORMERROR; 1149 } 1150 1151 // Init the count to 10. 1152 count_common_word(lp, word, -1, 10); 1153 done += i + 1; 1154 } 1155 return 0; 1156 } 1157 1158 /* 1159 * SN_SOFO: <sofofromlen> <sofofrom> <sofotolen> <sofoto> 1160 * Return SP_*ERROR flags. 1161 */ 1162 static int 1163 read_sofo_section(FILE *fd, slang_T *slang) 1164 { 1165 int cnt; 1166 char_u *from, *to; 1167 int res; 1168 1169 slang->sl_sofo = TRUE; 1170 1171 // <sofofromlen> <sofofrom> 1172 from = read_cnt_string(fd, 2, &cnt); 1173 if (cnt < 0) 1174 return cnt; 1175 1176 // <sofotolen> <sofoto> 1177 to = read_cnt_string(fd, 2, &cnt); 1178 if (cnt < 0) 1179 { 1180 vim_free(from); 1181 return cnt; 1182 } 1183 1184 // Store the info in slang->sl_sal and/or slang->sl_sal_first. 1185 if (from != NULL && to != NULL) 1186 res = set_sofo(slang, from, to); 1187 else if (from != NULL || to != NULL) 1188 res = SP_FORMERROR; // only one of two strings is an error 1189 else 1190 res = 0; 1191 1192 vim_free(from); 1193 vim_free(to); 1194 return res; 1195 } 1196 1197 /* 1198 * Read the compound section from the .spl file: 1199 * <compmax> <compminlen> <compsylmax> <compoptions> <compflags> 1200 * Returns SP_*ERROR flags. 1201 */ 1202 static int 1203 read_compound(FILE *fd, slang_T *slang, int len) 1204 { 1205 int todo = len; 1206 int c; 1207 int atstart; 1208 char_u *pat; 1209 char_u *pp; 1210 char_u *cp; 1211 char_u *ap; 1212 char_u *crp; 1213 int cnt; 1214 garray_T *gap; 1215 1216 if (todo < 2) 1217 return SP_FORMERROR; // need at least two bytes 1218 1219 --todo; 1220 c = getc(fd); // <compmax> 1221 if (c < 2) 1222 c = MAXWLEN; 1223 slang->sl_compmax = c; 1224 1225 --todo; 1226 c = getc(fd); // <compminlen> 1227 if (c < 1) 1228 c = 0; 1229 slang->sl_compminlen = c; 1230 1231 --todo; 1232 c = getc(fd); // <compsylmax> 1233 if (c < 1) 1234 c = MAXWLEN; 1235 slang->sl_compsylmax = c; 1236 1237 c = getc(fd); // <compoptions> 1238 if (c != 0) 1239 ungetc(c, fd); // be backwards compatible with Vim 7.0b 1240 else 1241 { 1242 --todo; 1243 c = getc(fd); // only use the lower byte for now 1244 --todo; 1245 slang->sl_compoptions = c; 1246 1247 gap = &slang->sl_comppat; 1248 c = get2c(fd); // <comppatcount> 1249 todo -= 2; 1250 ga_init2(gap, sizeof(char_u *), c); 1251 if (ga_grow(gap, c) == OK) 1252 while (--c >= 0) 1253 { 1254 ((char_u **)(gap->ga_data))[gap->ga_len++] = 1255 read_cnt_string(fd, 1, &cnt); 1256 // <comppatlen> <comppattext> 1257 if (cnt < 0) 1258 return cnt; 1259 todo -= cnt + 1; 1260 } 1261 } 1262 if (todo < 0) 1263 return SP_FORMERROR; 1264 1265 // Turn the COMPOUNDRULE items into a regexp pattern: 1266 // "a[bc]/a*b+" -> "^\(a[bc]\|a*b\+\)$". 1267 // Inserting backslashes may double the length, "^\(\)$<Nul>" is 7 bytes. 1268 // Conversion to utf-8 may double the size. 1269 c = todo * 2 + 7; 1270 if (enc_utf8) 1271 c += todo * 2; 1272 pat = alloc(c); 1273 if (pat == NULL) 1274 return SP_OTHERERROR; 1275 1276 // We also need a list of all flags that can appear at the start and one 1277 // for all flags. 1278 cp = alloc(todo + 1); 1279 if (cp == NULL) 1280 { 1281 vim_free(pat); 1282 return SP_OTHERERROR; 1283 } 1284 slang->sl_compstartflags = cp; 1285 *cp = NUL; 1286 1287 ap = alloc(todo + 1); 1288 if (ap == NULL) 1289 { 1290 vim_free(pat); 1291 return SP_OTHERERROR; 1292 } 1293 slang->sl_compallflags = ap; 1294 *ap = NUL; 1295 1296 // And a list of all patterns in their original form, for checking whether 1297 // compounding may work in match_compoundrule(). This is freed when we 1298 // encounter a wildcard, the check doesn't work then. 1299 crp = alloc(todo + 1); 1300 slang->sl_comprules = crp; 1301 1302 pp = pat; 1303 *pp++ = '^'; 1304 *pp++ = '\\'; 1305 *pp++ = '('; 1306 1307 atstart = 1; 1308 while (todo-- > 0) 1309 { 1310 c = getc(fd); // <compflags> 1311 if (c == EOF) 1312 { 1313 vim_free(pat); 1314 return SP_TRUNCERROR; 1315 } 1316 1317 // Add all flags to "sl_compallflags". 1318 if (vim_strchr((char_u *)"?*+[]/", c) == NULL 1319 && !byte_in_str(slang->sl_compallflags, c)) 1320 { 1321 *ap++ = c; 1322 *ap = NUL; 1323 } 1324 1325 if (atstart != 0) 1326 { 1327 // At start of item: copy flags to "sl_compstartflags". For a 1328 // [abc] item set "atstart" to 2 and copy up to the ']'. 1329 if (c == '[') 1330 atstart = 2; 1331 else if (c == ']') 1332 atstart = 0; 1333 else 1334 { 1335 if (!byte_in_str(slang->sl_compstartflags, c)) 1336 { 1337 *cp++ = c; 1338 *cp = NUL; 1339 } 1340 if (atstart == 1) 1341 atstart = 0; 1342 } 1343 } 1344 1345 // Copy flag to "sl_comprules", unless we run into a wildcard. 1346 if (crp != NULL) 1347 { 1348 if (c == '?' || c == '+' || c == '*') 1349 { 1350 VIM_CLEAR(slang->sl_comprules); 1351 crp = NULL; 1352 } 1353 else 1354 *crp++ = c; 1355 } 1356 1357 if (c == '/') // slash separates two items 1358 { 1359 *pp++ = '\\'; 1360 *pp++ = '|'; 1361 atstart = 1; 1362 } 1363 else // normal char, "[abc]" and '*' are copied as-is 1364 { 1365 if (c == '?' || c == '+' || c == '~') 1366 *pp++ = '\\'; // "a?" becomes "a\?", "a+" becomes "a\+" 1367 if (enc_utf8) 1368 pp += mb_char2bytes(c, pp); 1369 else 1370 *pp++ = c; 1371 } 1372 } 1373 1374 *pp++ = '\\'; 1375 *pp++ = ')'; 1376 *pp++ = '$'; 1377 *pp = NUL; 1378 1379 if (crp != NULL) 1380 *crp = NUL; 1381 1382 slang->sl_compprog = vim_regcomp(pat, RE_MAGIC + RE_STRING + RE_STRICT); 1383 vim_free(pat); 1384 if (slang->sl_compprog == NULL) 1385 return SP_FORMERROR; 1386 1387 return 0; 1388 } 1389 1390 /* 1391 * Set the SOFOFROM and SOFOTO items in language "lp". 1392 * Returns SP_*ERROR flags when there is something wrong. 1393 */ 1394 static int 1395 set_sofo(slang_T *lp, char_u *from, char_u *to) 1396 { 1397 int i; 1398 1399 garray_T *gap; 1400 char_u *s; 1401 char_u *p; 1402 int c; 1403 int *inp; 1404 1405 if (has_mbyte) 1406 { 1407 // Use "sl_sal" as an array with 256 pointers to a list of wide 1408 // characters. The index is the low byte of the character. 1409 // The list contains from-to pairs with a terminating NUL. 1410 // sl_sal_first[] is used for latin1 "from" characters. 1411 gap = &lp->sl_sal; 1412 ga_init2(gap, sizeof(int *), 1); 1413 if (ga_grow(gap, 256) == FAIL) 1414 return SP_OTHERERROR; 1415 vim_memset(gap->ga_data, 0, sizeof(int *) * 256); 1416 gap->ga_len = 256; 1417 1418 // First count the number of items for each list. Temporarily use 1419 // sl_sal_first[] for this. 1420 for (p = from, s = to; *p != NUL && *s != NUL; ) 1421 { 1422 c = mb_cptr2char_adv(&p); 1423 MB_CPTR_ADV(s); 1424 if (c >= 256) 1425 ++lp->sl_sal_first[c & 0xff]; 1426 } 1427 if (*p != NUL || *s != NUL) // lengths differ 1428 return SP_FORMERROR; 1429 1430 // Allocate the lists. 1431 for (i = 0; i < 256; ++i) 1432 if (lp->sl_sal_first[i] > 0) 1433 { 1434 p = alloc(sizeof(int) * (lp->sl_sal_first[i] * 2 + 1)); 1435 if (p == NULL) 1436 return SP_OTHERERROR; 1437 ((int **)gap->ga_data)[i] = (int *)p; 1438 *(int *)p = 0; 1439 } 1440 1441 // Put the characters up to 255 in sl_sal_first[] the rest in a sl_sal 1442 // list. 1443 vim_memset(lp->sl_sal_first, 0, sizeof(salfirst_T) * 256); 1444 for (p = from, s = to; *p != NUL && *s != NUL; ) 1445 { 1446 c = mb_cptr2char_adv(&p); 1447 i = mb_cptr2char_adv(&s); 1448 if (c >= 256) 1449 { 1450 // Append the from-to chars at the end of the list with 1451 // the low byte. 1452 inp = ((int **)gap->ga_data)[c & 0xff]; 1453 while (*inp != 0) 1454 ++inp; 1455 *inp++ = c; // from char 1456 *inp++ = i; // to char 1457 *inp++ = NUL; // NUL at the end 1458 } 1459 else 1460 // mapping byte to char is done in sl_sal_first[] 1461 lp->sl_sal_first[c] = i; 1462 } 1463 } 1464 else 1465 { 1466 // mapping bytes to bytes is done in sl_sal_first[] 1467 if (STRLEN(from) != STRLEN(to)) 1468 return SP_FORMERROR; 1469 1470 for (i = 0; to[i] != NUL; ++i) 1471 lp->sl_sal_first[from[i]] = to[i]; 1472 lp->sl_sal.ga_len = 1; // indicates we have soundfolding 1473 } 1474 1475 return 0; 1476 } 1477 1478 /* 1479 * Fill the first-index table for "lp". 1480 */ 1481 static void 1482 set_sal_first(slang_T *lp) 1483 { 1484 salfirst_T *sfirst; 1485 int i; 1486 salitem_T *smp; 1487 int c; 1488 garray_T *gap = &lp->sl_sal; 1489 1490 sfirst = lp->sl_sal_first; 1491 for (i = 0; i < 256; ++i) 1492 sfirst[i] = -1; 1493 smp = (salitem_T *)gap->ga_data; 1494 for (i = 0; i < gap->ga_len; ++i) 1495 { 1496 if (has_mbyte) 1497 // Use the lowest byte of the first character. For latin1 it's 1498 // the character, for other encodings it should differ for most 1499 // characters. 1500 c = *smp[i].sm_lead_w & 0xff; 1501 else 1502 c = *smp[i].sm_lead; 1503 if (sfirst[c] == -1) 1504 { 1505 sfirst[c] = i; 1506 if (has_mbyte) 1507 { 1508 int n; 1509 1510 // Make sure all entries with this byte are following each 1511 // other. Move the ones that are in the wrong position. Do 1512 // keep the same ordering! 1513 while (i + 1 < gap->ga_len 1514 && (*smp[i + 1].sm_lead_w & 0xff) == c) 1515 // Skip over entry with same index byte. 1516 ++i; 1517 1518 for (n = 1; i + n < gap->ga_len; ++n) 1519 if ((*smp[i + n].sm_lead_w & 0xff) == c) 1520 { 1521 salitem_T tsal; 1522 1523 // Move entry with same index byte after the entries 1524 // we already found. 1525 ++i; 1526 --n; 1527 tsal = smp[i + n]; 1528 mch_memmove(smp + i + 1, smp + i, 1529 sizeof(salitem_T) * n); 1530 smp[i] = tsal; 1531 } 1532 } 1533 } 1534 } 1535 } 1536 1537 /* 1538 * Turn a multi-byte string into a wide character string. 1539 * Return it in allocated memory (NULL for out-of-memory) 1540 */ 1541 static int * 1542 mb_str2wide(char_u *s) 1543 { 1544 int *res; 1545 char_u *p; 1546 int i = 0; 1547 1548 res = ALLOC_MULT(int, mb_charlen(s) + 1); 1549 if (res != NULL) 1550 { 1551 for (p = s; *p != NUL; ) 1552 res[i++] = mb_ptr2char_adv(&p); 1553 res[i] = NUL; 1554 } 1555 return res; 1556 } 1557 1558 /* 1559 * Read a tree from the .spl or .sug file. 1560 * Allocates the memory and stores pointers in "bytsp" and "idxsp". 1561 * This is skipped when the tree has zero length. 1562 * Returns zero when OK, SP_ value for an error. 1563 */ 1564 static int 1565 spell_read_tree( 1566 FILE *fd, 1567 char_u **bytsp, 1568 idx_T **idxsp, 1569 int prefixtree, // TRUE for the prefix tree 1570 int prefixcnt) // when "prefixtree" is TRUE: prefix count 1571 { 1572 long len; 1573 int idx; 1574 char_u *bp; 1575 idx_T *ip; 1576 1577 // The tree size was computed when writing the file, so that we can 1578 // allocate it as one long block. <nodecount> 1579 len = get4c(fd); 1580 if (len < 0) 1581 return SP_TRUNCERROR; 1582 if (len >= LONG_MAX / (long)sizeof(int)) 1583 // Invalid length, multiply with sizeof(int) would overflow. 1584 return SP_FORMERROR; 1585 if (len > 0) 1586 { 1587 // Allocate the byte array. 1588 bp = alloc(len); 1589 if (bp == NULL) 1590 return SP_OTHERERROR; 1591 *bytsp = bp; 1592 1593 // Allocate the index array. 1594 ip = lalloc_clear(len * sizeof(int), TRUE); 1595 if (ip == NULL) 1596 return SP_OTHERERROR; 1597 *idxsp = ip; 1598 1599 // Recursively read the tree and store it in the array. 1600 idx = read_tree_node(fd, bp, ip, len, 0, prefixtree, prefixcnt); 1601 if (idx < 0) 1602 return idx; 1603 } 1604 return 0; 1605 } 1606 1607 /* 1608 * Read one row of siblings from the spell file and store it in the byte array 1609 * "byts" and index array "idxs". Recursively read the children. 1610 * 1611 * NOTE: The code here must match put_node()! 1612 * 1613 * Returns the index (>= 0) following the siblings. 1614 * Returns SP_TRUNCERROR if the file is shorter than expected. 1615 * Returns SP_FORMERROR if there is a format error. 1616 */ 1617 static idx_T 1618 read_tree_node( 1619 FILE *fd, 1620 char_u *byts, 1621 idx_T *idxs, 1622 int maxidx, // size of arrays 1623 idx_T startidx, // current index in "byts" and "idxs" 1624 int prefixtree, // TRUE for reading PREFIXTREE 1625 int maxprefcondnr) // maximum for <prefcondnr> 1626 { 1627 int len; 1628 int i; 1629 int n; 1630 idx_T idx = startidx; 1631 int c; 1632 int c2; 1633 #define SHARED_MASK 0x8000000 1634 1635 len = getc(fd); // <siblingcount> 1636 if (len <= 0) 1637 return SP_TRUNCERROR; 1638 1639 if (startidx + len >= maxidx) 1640 return SP_FORMERROR; 1641 byts[idx++] = len; 1642 1643 // Read the byte values, flag/region bytes and shared indexes. 1644 for (i = 1; i <= len; ++i) 1645 { 1646 c = getc(fd); // <byte> 1647 if (c < 0) 1648 return SP_TRUNCERROR; 1649 if (c <= BY_SPECIAL) 1650 { 1651 if (c == BY_NOFLAGS && !prefixtree) 1652 { 1653 // No flags, all regions. 1654 idxs[idx] = 0; 1655 c = 0; 1656 } 1657 else if (c != BY_INDEX) 1658 { 1659 if (prefixtree) 1660 { 1661 // Read the optional pflags byte, the prefix ID and the 1662 // condition nr. In idxs[] store the prefix ID in the low 1663 // byte, the condition index shifted up 8 bits, the flags 1664 // shifted up 24 bits. 1665 if (c == BY_FLAGS) 1666 c = getc(fd) << 24; // <pflags> 1667 else 1668 c = 0; 1669 1670 c |= getc(fd); // <affixID> 1671 1672 n = get2c(fd); // <prefcondnr> 1673 if (n >= maxprefcondnr) 1674 return SP_FORMERROR; 1675 c |= (n << 8); 1676 } 1677 else // c must be BY_FLAGS or BY_FLAGS2 1678 { 1679 // Read flags and optional region and prefix ID. In 1680 // idxs[] the flags go in the low two bytes, region above 1681 // that and prefix ID above the region. 1682 c2 = c; 1683 c = getc(fd); // <flags> 1684 if (c2 == BY_FLAGS2) 1685 c = (getc(fd) << 8) + c; // <flags2> 1686 if (c & WF_REGION) 1687 c = (getc(fd) << 16) + c; // <region> 1688 if (c & WF_AFX) 1689 c = (getc(fd) << 24) + c; // <affixID> 1690 } 1691 1692 idxs[idx] = c; 1693 c = 0; 1694 } 1695 else // c == BY_INDEX 1696 { 1697 // <nodeidx> 1698 n = get3c(fd); 1699 if (n < 0 || n >= maxidx) 1700 return SP_FORMERROR; 1701 idxs[idx] = n + SHARED_MASK; 1702 c = getc(fd); // <xbyte> 1703 } 1704 } 1705 byts[idx++] = c; 1706 } 1707 1708 // Recursively read the children for non-shared siblings. 1709 // Skip the end-of-word ones (zero byte value) and the shared ones (and 1710 // remove SHARED_MASK) 1711 for (i = 1; i <= len; ++i) 1712 if (byts[startidx + i] != 0) 1713 { 1714 if (idxs[startidx + i] & SHARED_MASK) 1715 idxs[startidx + i] &= ~SHARED_MASK; 1716 else 1717 { 1718 idxs[startidx + i] = idx; 1719 idx = read_tree_node(fd, byts, idxs, maxidx, idx, 1720 prefixtree, maxprefcondnr); 1721 if (idx < 0) 1722 break; 1723 } 1724 } 1725 1726 return idx; 1727 } 1728 1729 /* 1730 * Reload the spell file "fname" if it's loaded. 1731 */ 1732 static void 1733 spell_reload_one( 1734 char_u *fname, 1735 int added_word) // invoked through "zg" 1736 { 1737 slang_T *slang; 1738 int didit = FALSE; 1739 1740 for (slang = first_lang; slang != NULL; slang = slang->sl_next) 1741 { 1742 if (fullpathcmp(fname, slang->sl_fname, FALSE, TRUE) == FPC_SAME) 1743 { 1744 slang_clear(slang); 1745 if (spell_load_file(fname, NULL, slang, FALSE) == NULL) 1746 // reloading failed, clear the language 1747 slang_clear(slang); 1748 redraw_all_later(SOME_VALID); 1749 didit = TRUE; 1750 } 1751 } 1752 1753 // When "zg" was used and the file wasn't loaded yet, should redo 1754 // 'spelllang' to load it now. 1755 if (added_word && !didit) 1756 did_set_spelllang(curwin); 1757 } 1758 1759 1760 /* 1761 * Functions for ":mkspell". 1762 */ 1763 1764 #define MAXLINELEN 500 // Maximum length in bytes of a line in a .aff 1765 // and .dic file. 1766 /* 1767 * Main structure to store the contents of a ".aff" file. 1768 */ 1769 typedef struct afffile_S 1770 { 1771 char_u *af_enc; // "SET", normalized, alloc'ed string or NULL 1772 int af_flagtype; // AFT_CHAR, AFT_LONG, AFT_NUM or AFT_CAPLONG 1773 unsigned af_rare; // RARE ID for rare word 1774 unsigned af_keepcase; // KEEPCASE ID for keep-case word 1775 unsigned af_bad; // BAD ID for banned word 1776 unsigned af_needaffix; // NEEDAFFIX ID 1777 unsigned af_circumfix; // CIRCUMFIX ID 1778 unsigned af_needcomp; // NEEDCOMPOUND ID 1779 unsigned af_comproot; // COMPOUNDROOT ID 1780 unsigned af_compforbid; // COMPOUNDFORBIDFLAG ID 1781 unsigned af_comppermit; // COMPOUNDPERMITFLAG ID 1782 unsigned af_nosuggest; // NOSUGGEST ID 1783 int af_pfxpostpone; // postpone prefixes without chop string and 1784 // without flags 1785 int af_ignoreextra; // IGNOREEXTRA present 1786 hashtab_T af_pref; // hashtable for prefixes, affheader_T 1787 hashtab_T af_suff; // hashtable for suffixes, affheader_T 1788 hashtab_T af_comp; // hashtable for compound flags, compitem_T 1789 } afffile_T; 1790 1791 #define AFT_CHAR 0 // flags are one character 1792 #define AFT_LONG 1 // flags are two characters 1793 #define AFT_CAPLONG 2 // flags are one or two characters 1794 #define AFT_NUM 3 // flags are numbers, comma separated 1795 1796 typedef struct affentry_S affentry_T; 1797 // Affix entry from ".aff" file. Used for prefixes and suffixes. 1798 struct affentry_S 1799 { 1800 affentry_T *ae_next; // next affix with same name/number 1801 char_u *ae_chop; // text to chop off basic word (can be NULL) 1802 char_u *ae_add; // text to add to basic word (can be NULL) 1803 char_u *ae_flags; // flags on the affix (can be NULL) 1804 char_u *ae_cond; // condition (NULL for ".") 1805 regprog_T *ae_prog; // regexp program for ae_cond or NULL 1806 char ae_compforbid; // COMPOUNDFORBIDFLAG found 1807 char ae_comppermit; // COMPOUNDPERMITFLAG found 1808 }; 1809 1810 #define AH_KEY_LEN 17 // 2 x 8 bytes + NUL 1811 1812 // Affix header from ".aff" file. Used for af_pref and af_suff. 1813 typedef struct affheader_S 1814 { 1815 char_u ah_key[AH_KEY_LEN]; // key for hashtab == name of affix 1816 unsigned ah_flag; // affix name as number, uses "af_flagtype" 1817 int ah_newID; // prefix ID after renumbering; 0 if not used 1818 int ah_combine; // suffix may combine with prefix 1819 int ah_follows; // another affix block should be following 1820 affentry_T *ah_first; // first affix entry 1821 } affheader_T; 1822 1823 #define HI2AH(hi) ((affheader_T *)(hi)->hi_key) 1824 1825 // Flag used in compound items. 1826 typedef struct compitem_S 1827 { 1828 char_u ci_key[AH_KEY_LEN]; // key for hashtab == name of compound 1829 unsigned ci_flag; // affix name as number, uses "af_flagtype" 1830 int ci_newID; // affix ID after renumbering. 1831 } compitem_T; 1832 1833 #define HI2CI(hi) ((compitem_T *)(hi)->hi_key) 1834 1835 /* 1836 * Structure that is used to store the items in the word tree. This avoids 1837 * the need to keep track of each allocated thing, everything is freed all at 1838 * once after ":mkspell" is done. 1839 * Note: "sb_next" must be just before "sb_data" to make sure the alignment of 1840 * "sb_data" is correct for systems where pointers must be aligned on 1841 * pointer-size boundaries and sizeof(pointer) > sizeof(int) (e.g., Sparc). 1842 */ 1843 #define SBLOCKSIZE 16000 // size of sb_data 1844 typedef struct sblock_S sblock_T; 1845 struct sblock_S 1846 { 1847 int sb_used; // nr of bytes already in use 1848 sblock_T *sb_next; // next block in list 1849 char_u sb_data[1]; // data, actually longer 1850 }; 1851 1852 /* 1853 * A node in the tree. 1854 */ 1855 typedef struct wordnode_S wordnode_T; 1856 struct wordnode_S 1857 { 1858 union // shared to save space 1859 { 1860 char_u hashkey[6]; // the hash key, only used while compressing 1861 int index; // index in written nodes (valid after first 1862 // round) 1863 } wn_u1; 1864 union // shared to save space 1865 { 1866 wordnode_T *next; // next node with same hash key 1867 wordnode_T *wnode; // parent node that will write this node 1868 } wn_u2; 1869 wordnode_T *wn_child; // child (next byte in word) 1870 wordnode_T *wn_sibling; // next sibling (alternate byte in word, 1871 // always sorted) 1872 int wn_refs; // Nr. of references to this node. Only 1873 // relevant for first node in a list of 1874 // siblings, in following siblings it is 1875 // always one. 1876 char_u wn_byte; // Byte for this node. NUL for word end 1877 1878 // Info for when "wn_byte" is NUL. 1879 // In PREFIXTREE "wn_region" is used for the prefcondnr. 1880 // In the soundfolded word tree "wn_flags" has the MSW of the wordnr and 1881 // "wn_region" the LSW of the wordnr. 1882 char_u wn_affixID; // supported/required prefix ID or 0 1883 short_u wn_flags; // WF_ flags 1884 short wn_region; // region mask 1885 1886 #ifdef SPELL_PRINTTREE 1887 int wn_nr; // sequence nr for printing 1888 #endif 1889 }; 1890 1891 #define WN_MASK 0xffff // mask relevant bits of "wn_flags" 1892 1893 #define HI2WN(hi) (wordnode_T *)((hi)->hi_key) 1894 1895 /* 1896 * Info used while reading the spell files. 1897 */ 1898 typedef struct spellinfo_S 1899 { 1900 wordnode_T *si_foldroot; // tree with case-folded words 1901 long si_foldwcount; // nr of words in si_foldroot 1902 1903 wordnode_T *si_keeproot; // tree with keep-case words 1904 long si_keepwcount; // nr of words in si_keeproot 1905 1906 wordnode_T *si_prefroot; // tree with postponed prefixes 1907 1908 long si_sugtree; // creating the soundfolding trie 1909 1910 sblock_T *si_blocks; // memory blocks used 1911 long si_blocks_cnt; // memory blocks allocated 1912 int si_did_emsg; // TRUE when ran out of memory 1913 1914 long si_compress_cnt; // words to add before lowering 1915 // compression limit 1916 wordnode_T *si_first_free; // List of nodes that have been freed during 1917 // compression, linked by "wn_child" field. 1918 long si_free_count; // number of nodes in si_first_free 1919 #ifdef SPELL_PRINTTREE 1920 int si_wordnode_nr; // sequence nr for nodes 1921 #endif 1922 buf_T *si_spellbuf; // buffer used to store soundfold word table 1923 1924 int si_ascii; // handling only ASCII words 1925 int si_add; // addition file 1926 int si_clear_chartab; // when TRUE clear char tables 1927 int si_region; // region mask 1928 vimconv_T si_conv; // for conversion to 'encoding' 1929 int si_memtot; // runtime memory used 1930 int si_verbose; // verbose messages 1931 int si_msg_count; // number of words added since last message 1932 char_u *si_info; // info text chars or NULL 1933 int si_region_count; // number of regions supported (1 when there 1934 // are no regions) 1935 char_u si_region_name[MAXREGIONS * 2 + 1]; 1936 // region names; used only if 1937 // si_region_count > 1) 1938 1939 garray_T si_rep; // list of fromto_T entries from REP lines 1940 garray_T si_repsal; // list of fromto_T entries from REPSAL lines 1941 garray_T si_sal; // list of fromto_T entries from SAL lines 1942 char_u *si_sofofr; // SOFOFROM text 1943 char_u *si_sofoto; // SOFOTO text 1944 int si_nosugfile; // NOSUGFILE item found 1945 int si_nosplitsugs; // NOSPLITSUGS item found 1946 int si_nocompoundsugs; // NOCOMPOUNDSUGS item found 1947 int si_followup; // soundsalike: ? 1948 int si_collapse; // soundsalike: ? 1949 hashtab_T si_commonwords; // hashtable for common words 1950 time_t si_sugtime; // timestamp for .sug file 1951 int si_rem_accents; // soundsalike: remove accents 1952 garray_T si_map; // MAP info concatenated 1953 char_u *si_midword; // MIDWORD chars or NULL 1954 int si_compmax; // max nr of words for compounding 1955 int si_compminlen; // minimal length for compounding 1956 int si_compsylmax; // max nr of syllables for compounding 1957 int si_compoptions; // COMP_ flags 1958 garray_T si_comppat; // CHECKCOMPOUNDPATTERN items, each stored as 1959 // a string 1960 char_u *si_compflags; // flags used for compounding 1961 char_u si_nobreak; // NOBREAK 1962 char_u *si_syllable; // syllable string 1963 garray_T si_prefcond; // table with conditions for postponed 1964 // prefixes, each stored as a string 1965 int si_newprefID; // current value for ah_newID 1966 int si_newcompID; // current value for compound ID 1967 } spellinfo_T; 1968 1969 static int is_aff_rule(char_u **items, int itemcnt, char *rulename, int mincount); 1970 static void aff_process_flags(afffile_T *affile, affentry_T *entry); 1971 static int spell_info_item(char_u *s); 1972 static unsigned affitem2flag(int flagtype, char_u *item, char_u *fname, int lnum); 1973 static unsigned get_affitem(int flagtype, char_u **pp); 1974 static void process_compflags(spellinfo_T *spin, afffile_T *aff, char_u *compflags); 1975 static void check_renumber(spellinfo_T *spin); 1976 static void aff_check_number(int spinval, int affval, char *name); 1977 static void aff_check_string(char_u *spinval, char_u *affval, char *name); 1978 static int str_equal(char_u *s1, char_u *s2); 1979 static void add_fromto(spellinfo_T *spin, garray_T *gap, char_u *from, char_u *to); 1980 static int sal_to_bool(char_u *s); 1981 static int get_affix_flags(afffile_T *affile, char_u *afflist); 1982 static int get_pfxlist(afffile_T *affile, char_u *afflist, char_u *store_afflist); 1983 static void get_compflags(afffile_T *affile, char_u *afflist, char_u *store_afflist); 1984 static int store_aff_word(spellinfo_T *spin, char_u *word, char_u *afflist, afffile_T *affile, hashtab_T *ht, hashtab_T *xht, int condit, int flags, char_u *pfxlist, int pfxlen); 1985 static void *getroom(spellinfo_T *spin, size_t len, int align); 1986 static char_u *getroom_save(spellinfo_T *spin, char_u *s); 1987 static int store_word(spellinfo_T *spin, char_u *word, int flags, int region, char_u *pfxlist, int need_affix); 1988 static int tree_add_word(spellinfo_T *spin, char_u *word, wordnode_T *tree, int flags, int region, int affixID); 1989 static wordnode_T *get_wordnode(spellinfo_T *spin); 1990 static void free_wordnode(spellinfo_T *spin, wordnode_T *n); 1991 static void wordtree_compress(spellinfo_T *spin, wordnode_T *root); 1992 static int node_compress(spellinfo_T *spin, wordnode_T *node, hashtab_T *ht, int *tot); 1993 static int node_equal(wordnode_T *n1, wordnode_T *n2); 1994 static void clear_node(wordnode_T *node); 1995 static int put_node(FILE *fd, wordnode_T *node, int idx, int regionmask, int prefixtree); 1996 static int sug_filltree(spellinfo_T *spin, slang_T *slang); 1997 static int sug_maketable(spellinfo_T *spin); 1998 static int sug_filltable(spellinfo_T *spin, wordnode_T *node, int startwordnr, garray_T *gap); 1999 static int offset2bytes(int nr, char_u *buf); 2000 static void sug_write(spellinfo_T *spin, char_u *fname); 2001 static void spell_message(spellinfo_T *spin, char_u *str); 2002 static void init_spellfile(void); 2003 2004 // In the postponed prefixes tree wn_flags is used to store the WFP_ flags, 2005 // but it must be negative to indicate the prefix tree to tree_add_word(). 2006 // Use a negative number with the lower 8 bits zero. 2007 #define PFX_FLAGS -256 2008 2009 // flags for "condit" argument of store_aff_word() 2010 #define CONDIT_COMB 1 // affix must combine 2011 #define CONDIT_CFIX 2 // affix must have CIRCUMFIX flag 2012 #define CONDIT_SUF 4 // add a suffix for matching flags 2013 #define CONDIT_AFF 8 // word already has an affix 2014 2015 /* 2016 * Tunable parameters for when the tree is compressed. See 'mkspellmem'. 2017 */ 2018 static long compress_start = 30000; // memory / SBLOCKSIZE 2019 static long compress_inc = 100; // memory / SBLOCKSIZE 2020 static long compress_added = 500000; // word count 2021 2022 /* 2023 * Check the 'mkspellmem' option. Return FAIL if it's wrong. 2024 * Sets "sps_flags". 2025 */ 2026 int 2027 spell_check_msm(void) 2028 { 2029 char_u *p = p_msm; 2030 long start = 0; 2031 long incr = 0; 2032 long added = 0; 2033 2034 if (!VIM_ISDIGIT(*p)) 2035 return FAIL; 2036 // block count = (value * 1024) / SBLOCKSIZE (but avoid overflow) 2037 start = (getdigits(&p) * 10) / (SBLOCKSIZE / 102); 2038 if (*p != ',') 2039 return FAIL; 2040 ++p; 2041 if (!VIM_ISDIGIT(*p)) 2042 return FAIL; 2043 incr = (getdigits(&p) * 102) / (SBLOCKSIZE / 10); 2044 if (*p != ',') 2045 return FAIL; 2046 ++p; 2047 if (!VIM_ISDIGIT(*p)) 2048 return FAIL; 2049 added = getdigits(&p) * 1024; 2050 if (*p != NUL) 2051 return FAIL; 2052 2053 if (start == 0 || incr == 0 || added == 0 || incr > start) 2054 return FAIL; 2055 2056 compress_start = start; 2057 compress_inc = incr; 2058 compress_added = added; 2059 return OK; 2060 } 2061 2062 #ifdef SPELL_PRINTTREE 2063 /* 2064 * For debugging the tree code: print the current tree in a (more or less) 2065 * readable format, so that we can see what happens when adding a word and/or 2066 * compressing the tree. 2067 * Based on code from Olaf Seibert. 2068 */ 2069 #define PRINTLINESIZE 1000 2070 #define PRINTWIDTH 6 2071 2072 #define PRINTSOME(l, depth, fmt, a1, a2) vim_snprintf(l + depth * PRINTWIDTH, \ 2073 PRINTLINESIZE - PRINTWIDTH * depth, fmt, a1, a2) 2074 2075 static char line1[PRINTLINESIZE]; 2076 static char line2[PRINTLINESIZE]; 2077 static char line3[PRINTLINESIZE]; 2078 2079 static void 2080 spell_clear_flags(wordnode_T *node) 2081 { 2082 wordnode_T *np; 2083 2084 for (np = node; np != NULL; np = np->wn_sibling) 2085 { 2086 np->wn_u1.index = FALSE; 2087 spell_clear_flags(np->wn_child); 2088 } 2089 } 2090 2091 static void 2092 spell_print_node(wordnode_T *node, int depth) 2093 { 2094 if (node->wn_u1.index) 2095 { 2096 // Done this node before, print the reference. 2097 PRINTSOME(line1, depth, "(%d)", node->wn_nr, 0); 2098 PRINTSOME(line2, depth, " ", 0, 0); 2099 PRINTSOME(line3, depth, " ", 0, 0); 2100 msg(line1); 2101 msg(line2); 2102 msg(line3); 2103 } 2104 else 2105 { 2106 node->wn_u1.index = TRUE; 2107 2108 if (node->wn_byte != NUL) 2109 { 2110 if (node->wn_child != NULL) 2111 PRINTSOME(line1, depth, " %c -> ", node->wn_byte, 0); 2112 else 2113 // Cannot happen? 2114 PRINTSOME(line1, depth, " %c ???", node->wn_byte, 0); 2115 } 2116 else 2117 PRINTSOME(line1, depth, " $ ", 0, 0); 2118 2119 PRINTSOME(line2, depth, "%d/%d ", node->wn_nr, node->wn_refs); 2120 2121 if (node->wn_sibling != NULL) 2122 PRINTSOME(line3, depth, " | ", 0, 0); 2123 else 2124 PRINTSOME(line3, depth, " ", 0, 0); 2125 2126 if (node->wn_byte == NUL) 2127 { 2128 msg(line1); 2129 msg(line2); 2130 msg(line3); 2131 } 2132 2133 // do the children 2134 if (node->wn_byte != NUL && node->wn_child != NULL) 2135 spell_print_node(node->wn_child, depth + 1); 2136 2137 // do the siblings 2138 if (node->wn_sibling != NULL) 2139 { 2140 // get rid of all parent details except | 2141 STRCPY(line1, line3); 2142 STRCPY(line2, line3); 2143 spell_print_node(node->wn_sibling, depth); 2144 } 2145 } 2146 } 2147 2148 static void 2149 spell_print_tree(wordnode_T *root) 2150 { 2151 if (root != NULL) 2152 { 2153 // Clear the "wn_u1.index" fields, used to remember what has been 2154 // done. 2155 spell_clear_flags(root); 2156 2157 // Recursively print the tree. 2158 spell_print_node(root, 0); 2159 } 2160 } 2161 #endif // SPELL_PRINTTREE 2162 2163 /* 2164 * Read the affix file "fname". 2165 * Returns an afffile_T, NULL for complete failure. 2166 */ 2167 static afffile_T * 2168 spell_read_aff(spellinfo_T *spin, char_u *fname) 2169 { 2170 FILE *fd; 2171 afffile_T *aff; 2172 char_u rline[MAXLINELEN]; 2173 char_u *line; 2174 char_u *pc = NULL; 2175 #define MAXITEMCNT 30 2176 char_u *(items[MAXITEMCNT]); 2177 int itemcnt; 2178 char_u *p; 2179 int lnum = 0; 2180 affheader_T *cur_aff = NULL; 2181 int did_postpone_prefix = FALSE; 2182 int aff_todo = 0; 2183 hashtab_T *tp; 2184 char_u *low = NULL; 2185 char_u *fol = NULL; 2186 char_u *upp = NULL; 2187 int do_rep; 2188 int do_repsal; 2189 int do_sal; 2190 int do_mapline; 2191 int found_map = FALSE; 2192 hashitem_T *hi; 2193 int l; 2194 int compminlen = 0; // COMPOUNDMIN value 2195 int compsylmax = 0; // COMPOUNDSYLMAX value 2196 int compoptions = 0; // COMP_ flags 2197 int compmax = 0; // COMPOUNDWORDMAX value 2198 char_u *compflags = NULL; // COMPOUNDFLAG and COMPOUNDRULE 2199 // concatenated 2200 char_u *midword = NULL; // MIDWORD value 2201 char_u *syllable = NULL; // SYLLABLE value 2202 char_u *sofofrom = NULL; // SOFOFROM value 2203 char_u *sofoto = NULL; // SOFOTO value 2204 2205 /* 2206 * Open the file. 2207 */ 2208 fd = mch_fopen((char *)fname, "r"); 2209 if (fd == NULL) 2210 { 2211 semsg(_(e_notopen), fname); 2212 return NULL; 2213 } 2214 2215 vim_snprintf((char *)IObuff, IOSIZE, _("Reading affix file %s..."), fname); 2216 spell_message(spin, IObuff); 2217 2218 // Only do REP lines when not done in another .aff file already. 2219 do_rep = spin->si_rep.ga_len == 0; 2220 2221 // Only do REPSAL lines when not done in another .aff file already. 2222 do_repsal = spin->si_repsal.ga_len == 0; 2223 2224 // Only do SAL lines when not done in another .aff file already. 2225 do_sal = spin->si_sal.ga_len == 0; 2226 2227 // Only do MAP lines when not done in another .aff file already. 2228 do_mapline = spin->si_map.ga_len == 0; 2229 2230 /* 2231 * Allocate and init the afffile_T structure. 2232 */ 2233 aff = (afffile_T *)getroom(spin, sizeof(afffile_T), TRUE); 2234 if (aff == NULL) 2235 { 2236 fclose(fd); 2237 return NULL; 2238 } 2239 hash_init(&aff->af_pref); 2240 hash_init(&aff->af_suff); 2241 hash_init(&aff->af_comp); 2242 2243 /* 2244 * Read all the lines in the file one by one. 2245 */ 2246 while (!vim_fgets(rline, MAXLINELEN, fd) && !got_int) 2247 { 2248 line_breakcheck(); 2249 ++lnum; 2250 2251 // Skip comment lines. 2252 if (*rline == '#') 2253 continue; 2254 2255 // Convert from "SET" to 'encoding' when needed. 2256 vim_free(pc); 2257 if (spin->si_conv.vc_type != CONV_NONE) 2258 { 2259 pc = string_convert(&spin->si_conv, rline, NULL); 2260 if (pc == NULL) 2261 { 2262 smsg(_("Conversion failure for word in %s line %d: %s"), 2263 fname, lnum, rline); 2264 continue; 2265 } 2266 line = pc; 2267 } 2268 else 2269 { 2270 pc = NULL; 2271 line = rline; 2272 } 2273 2274 // Split the line up in white separated items. Put a NUL after each 2275 // item. 2276 itemcnt = 0; 2277 for (p = line; ; ) 2278 { 2279 while (*p != NUL && *p <= ' ') // skip white space and CR/NL 2280 ++p; 2281 if (*p == NUL) 2282 break; 2283 if (itemcnt == MAXITEMCNT) // too many items 2284 break; 2285 items[itemcnt++] = p; 2286 // A few items have arbitrary text argument, don't split them. 2287 if (itemcnt == 2 && spell_info_item(items[0])) 2288 while (*p >= ' ' || *p == TAB) // skip until CR/NL 2289 ++p; 2290 else 2291 while (*p > ' ') // skip until white space or CR/NL 2292 ++p; 2293 if (*p == NUL) 2294 break; 2295 *p++ = NUL; 2296 } 2297 2298 // Handle non-empty lines. 2299 if (itemcnt > 0) 2300 { 2301 if (is_aff_rule(items, itemcnt, "SET", 2) && aff->af_enc == NULL) 2302 { 2303 // Setup for conversion from "ENC" to 'encoding'. 2304 aff->af_enc = enc_canonize(items[1]); 2305 if (aff->af_enc != NULL && !spin->si_ascii 2306 && convert_setup(&spin->si_conv, aff->af_enc, 2307 p_enc) == FAIL) 2308 smsg(_("Conversion in %s not supported: from %s to %s"), 2309 fname, aff->af_enc, p_enc); 2310 spin->si_conv.vc_fail = TRUE; 2311 } 2312 else if (is_aff_rule(items, itemcnt, "FLAG", 2) 2313 && aff->af_flagtype == AFT_CHAR) 2314 { 2315 if (STRCMP(items[1], "long") == 0) 2316 aff->af_flagtype = AFT_LONG; 2317 else if (STRCMP(items[1], "num") == 0) 2318 aff->af_flagtype = AFT_NUM; 2319 else if (STRCMP(items[1], "caplong") == 0) 2320 aff->af_flagtype = AFT_CAPLONG; 2321 else 2322 smsg(_("Invalid value for FLAG in %s line %d: %s"), 2323 fname, lnum, items[1]); 2324 if (aff->af_rare != 0 2325 || aff->af_keepcase != 0 2326 || aff->af_bad != 0 2327 || aff->af_needaffix != 0 2328 || aff->af_circumfix != 0 2329 || aff->af_needcomp != 0 2330 || aff->af_comproot != 0 2331 || aff->af_nosuggest != 0 2332 || compflags != NULL 2333 || aff->af_suff.ht_used > 0 2334 || aff->af_pref.ht_used > 0) 2335 smsg(_("FLAG after using flags in %s line %d: %s"), 2336 fname, lnum, items[1]); 2337 } 2338 else if (spell_info_item(items[0])) 2339 { 2340 p = (char_u *)getroom(spin, 2341 (spin->si_info == NULL ? 0 : STRLEN(spin->si_info)) 2342 + STRLEN(items[0]) 2343 + STRLEN(items[1]) + 3, FALSE); 2344 if (p != NULL) 2345 { 2346 if (spin->si_info != NULL) 2347 { 2348 STRCPY(p, spin->si_info); 2349 STRCAT(p, "\n"); 2350 } 2351 STRCAT(p, items[0]); 2352 STRCAT(p, " "); 2353 STRCAT(p, items[1]); 2354 spin->si_info = p; 2355 } 2356 } 2357 else if (is_aff_rule(items, itemcnt, "MIDWORD", 2) 2358 && midword == NULL) 2359 { 2360 midword = getroom_save(spin, items[1]); 2361 } 2362 else if (is_aff_rule(items, itemcnt, "TRY", 2)) 2363 { 2364 // ignored, we look in the tree for what chars may appear 2365 } 2366 // TODO: remove "RAR" later 2367 else if ((is_aff_rule(items, itemcnt, "RAR", 2) 2368 || is_aff_rule(items, itemcnt, "RARE", 2)) 2369 && aff->af_rare == 0) 2370 { 2371 aff->af_rare = affitem2flag(aff->af_flagtype, items[1], 2372 fname, lnum); 2373 } 2374 // TODO: remove "KEP" later 2375 else if ((is_aff_rule(items, itemcnt, "KEP", 2) 2376 || is_aff_rule(items, itemcnt, "KEEPCASE", 2)) 2377 && aff->af_keepcase == 0) 2378 { 2379 aff->af_keepcase = affitem2flag(aff->af_flagtype, items[1], 2380 fname, lnum); 2381 } 2382 else if ((is_aff_rule(items, itemcnt, "BAD", 2) 2383 || is_aff_rule(items, itemcnt, "FORBIDDENWORD", 2)) 2384 && aff->af_bad == 0) 2385 { 2386 aff->af_bad = affitem2flag(aff->af_flagtype, items[1], 2387 fname, lnum); 2388 } 2389 else if (is_aff_rule(items, itemcnt, "NEEDAFFIX", 2) 2390 && aff->af_needaffix == 0) 2391 { 2392 aff->af_needaffix = affitem2flag(aff->af_flagtype, items[1], 2393 fname, lnum); 2394 } 2395 else if (is_aff_rule(items, itemcnt, "CIRCUMFIX", 2) 2396 && aff->af_circumfix == 0) 2397 { 2398 aff->af_circumfix = affitem2flag(aff->af_flagtype, items[1], 2399 fname, lnum); 2400 } 2401 else if (is_aff_rule(items, itemcnt, "NOSUGGEST", 2) 2402 && aff->af_nosuggest == 0) 2403 { 2404 aff->af_nosuggest = affitem2flag(aff->af_flagtype, items[1], 2405 fname, lnum); 2406 } 2407 else if ((is_aff_rule(items, itemcnt, "NEEDCOMPOUND", 2) 2408 || is_aff_rule(items, itemcnt, "ONLYINCOMPOUND", 2)) 2409 && aff->af_needcomp == 0) 2410 { 2411 aff->af_needcomp = affitem2flag(aff->af_flagtype, items[1], 2412 fname, lnum); 2413 } 2414 else if (is_aff_rule(items, itemcnt, "COMPOUNDROOT", 2) 2415 && aff->af_comproot == 0) 2416 { 2417 aff->af_comproot = affitem2flag(aff->af_flagtype, items[1], 2418 fname, lnum); 2419 } 2420 else if (is_aff_rule(items, itemcnt, "COMPOUNDFORBIDFLAG", 2) 2421 && aff->af_compforbid == 0) 2422 { 2423 aff->af_compforbid = affitem2flag(aff->af_flagtype, items[1], 2424 fname, lnum); 2425 if (aff->af_pref.ht_used > 0) 2426 smsg(_("Defining COMPOUNDFORBIDFLAG after PFX item may give wrong results in %s line %d"), 2427 fname, lnum); 2428 } 2429 else if (is_aff_rule(items, itemcnt, "COMPOUNDPERMITFLAG", 2) 2430 && aff->af_comppermit == 0) 2431 { 2432 aff->af_comppermit = affitem2flag(aff->af_flagtype, items[1], 2433 fname, lnum); 2434 if (aff->af_pref.ht_used > 0) 2435 smsg(_("Defining COMPOUNDPERMITFLAG after PFX item may give wrong results in %s line %d"), 2436 fname, lnum); 2437 } 2438 else if (is_aff_rule(items, itemcnt, "COMPOUNDFLAG", 2) 2439 && compflags == NULL) 2440 { 2441 // Turn flag "c" into COMPOUNDRULE compatible string "c+", 2442 // "Na" into "Na+", "1234" into "1234+". 2443 p = getroom(spin, STRLEN(items[1]) + 2, FALSE); 2444 if (p != NULL) 2445 { 2446 STRCPY(p, items[1]); 2447 STRCAT(p, "+"); 2448 compflags = p; 2449 } 2450 } 2451 else if (is_aff_rule(items, itemcnt, "COMPOUNDRULES", 2)) 2452 { 2453 // We don't use the count, but do check that it's a number and 2454 // not COMPOUNDRULE mistyped. 2455 if (atoi((char *)items[1]) == 0) 2456 smsg(_("Wrong COMPOUNDRULES value in %s line %d: %s"), 2457 fname, lnum, items[1]); 2458 } 2459 else if (is_aff_rule(items, itemcnt, "COMPOUNDRULE", 2)) 2460 { 2461 // Don't use the first rule if it is a number. 2462 if (compflags != NULL || *skipdigits(items[1]) != NUL) 2463 { 2464 // Concatenate this string to previously defined ones, 2465 // using a slash to separate them. 2466 l = (int)STRLEN(items[1]) + 1; 2467 if (compflags != NULL) 2468 l += (int)STRLEN(compflags) + 1; 2469 p = getroom(spin, l, FALSE); 2470 if (p != NULL) 2471 { 2472 if (compflags != NULL) 2473 { 2474 STRCPY(p, compflags); 2475 STRCAT(p, "/"); 2476 } 2477 STRCAT(p, items[1]); 2478 compflags = p; 2479 } 2480 } 2481 } 2482 else if (is_aff_rule(items, itemcnt, "COMPOUNDWORDMAX", 2) 2483 && compmax == 0) 2484 { 2485 compmax = atoi((char *)items[1]); 2486 if (compmax == 0) 2487 smsg(_("Wrong COMPOUNDWORDMAX value in %s line %d: %s"), 2488 fname, lnum, items[1]); 2489 } 2490 else if (is_aff_rule(items, itemcnt, "COMPOUNDMIN", 2) 2491 && compminlen == 0) 2492 { 2493 compminlen = atoi((char *)items[1]); 2494 if (compminlen == 0) 2495 smsg(_("Wrong COMPOUNDMIN value in %s line %d: %s"), 2496 fname, lnum, items[1]); 2497 } 2498 else if (is_aff_rule(items, itemcnt, "COMPOUNDSYLMAX", 2) 2499 && compsylmax == 0) 2500 { 2501 compsylmax = atoi((char *)items[1]); 2502 if (compsylmax == 0) 2503 smsg(_("Wrong COMPOUNDSYLMAX value in %s line %d: %s"), 2504 fname, lnum, items[1]); 2505 } 2506 else if (is_aff_rule(items, itemcnt, "CHECKCOMPOUNDDUP", 1)) 2507 { 2508 compoptions |= COMP_CHECKDUP; 2509 } 2510 else if (is_aff_rule(items, itemcnt, "CHECKCOMPOUNDREP", 1)) 2511 { 2512 compoptions |= COMP_CHECKREP; 2513 } 2514 else if (is_aff_rule(items, itemcnt, "CHECKCOMPOUNDCASE", 1)) 2515 { 2516 compoptions |= COMP_CHECKCASE; 2517 } 2518 else if (is_aff_rule(items, itemcnt, "CHECKCOMPOUNDTRIPLE", 1)) 2519 { 2520 compoptions |= COMP_CHECKTRIPLE; 2521 } 2522 else if (is_aff_rule(items, itemcnt, "CHECKCOMPOUNDPATTERN", 2)) 2523 { 2524 if (atoi((char *)items[1]) == 0) 2525 smsg(_("Wrong CHECKCOMPOUNDPATTERN value in %s line %d: %s"), 2526 fname, lnum, items[1]); 2527 } 2528 else if (is_aff_rule(items, itemcnt, "CHECKCOMPOUNDPATTERN", 3)) 2529 { 2530 garray_T *gap = &spin->si_comppat; 2531 int i; 2532 2533 // Only add the couple if it isn't already there. 2534 for (i = 0; i < gap->ga_len - 1; i += 2) 2535 if (STRCMP(((char_u **)(gap->ga_data))[i], items[1]) == 0 2536 && STRCMP(((char_u **)(gap->ga_data))[i + 1], 2537 items[2]) == 0) 2538 break; 2539 if (i >= gap->ga_len && ga_grow(gap, 2) == OK) 2540 { 2541 ((char_u **)(gap->ga_data))[gap->ga_len++] 2542 = getroom_save(spin, items[1]); 2543 ((char_u **)(gap->ga_data))[gap->ga_len++] 2544 = getroom_save(spin, items[2]); 2545 } 2546 } 2547 else if (is_aff_rule(items, itemcnt, "SYLLABLE", 2) 2548 && syllable == NULL) 2549 { 2550 syllable = getroom_save(spin, items[1]); 2551 } 2552 else if (is_aff_rule(items, itemcnt, "NOBREAK", 1)) 2553 { 2554 spin->si_nobreak = TRUE; 2555 } 2556 else if (is_aff_rule(items, itemcnt, "NOSPLITSUGS", 1)) 2557 { 2558 spin->si_nosplitsugs = TRUE; 2559 } 2560 else if (is_aff_rule(items, itemcnt, "NOCOMPOUNDSUGS", 1)) 2561 { 2562 spin->si_nocompoundsugs = TRUE; 2563 } 2564 else if (is_aff_rule(items, itemcnt, "NOSUGFILE", 1)) 2565 { 2566 spin->si_nosugfile = TRUE; 2567 } 2568 else if (is_aff_rule(items, itemcnt, "PFXPOSTPONE", 1)) 2569 { 2570 aff->af_pfxpostpone = TRUE; 2571 } 2572 else if (is_aff_rule(items, itemcnt, "IGNOREEXTRA", 1)) 2573 { 2574 aff->af_ignoreextra = TRUE; 2575 } 2576 else if ((STRCMP(items[0], "PFX") == 0 2577 || STRCMP(items[0], "SFX") == 0) 2578 && aff_todo == 0 2579 && itemcnt >= 4) 2580 { 2581 int lasti = 4; 2582 char_u key[AH_KEY_LEN]; 2583 2584 if (*items[0] == 'P') 2585 tp = &aff->af_pref; 2586 else 2587 tp = &aff->af_suff; 2588 2589 // Myspell allows the same affix name to be used multiple 2590 // times. The affix files that do this have an undocumented 2591 // "S" flag on all but the last block, thus we check for that 2592 // and store it in ah_follows. 2593 vim_strncpy(key, items[1], AH_KEY_LEN - 1); 2594 hi = hash_find(tp, key); 2595 if (!HASHITEM_EMPTY(hi)) 2596 { 2597 cur_aff = HI2AH(hi); 2598 if (cur_aff->ah_combine != (*items[2] == 'Y')) 2599 smsg(_("Different combining flag in continued affix block in %s line %d: %s"), 2600 fname, lnum, items[1]); 2601 if (!cur_aff->ah_follows) 2602 smsg(_("Duplicate affix in %s line %d: %s"), 2603 fname, lnum, items[1]); 2604 } 2605 else 2606 { 2607 // New affix letter. 2608 cur_aff = (affheader_T *)getroom(spin, 2609 sizeof(affheader_T), TRUE); 2610 if (cur_aff == NULL) 2611 break; 2612 cur_aff->ah_flag = affitem2flag(aff->af_flagtype, items[1], 2613 fname, lnum); 2614 if (cur_aff->ah_flag == 0 || STRLEN(items[1]) >= AH_KEY_LEN) 2615 break; 2616 if (cur_aff->ah_flag == aff->af_bad 2617 || cur_aff->ah_flag == aff->af_rare 2618 || cur_aff->ah_flag == aff->af_keepcase 2619 || cur_aff->ah_flag == aff->af_needaffix 2620 || cur_aff->ah_flag == aff->af_circumfix 2621 || cur_aff->ah_flag == aff->af_nosuggest 2622 || cur_aff->ah_flag == aff->af_needcomp 2623 || cur_aff->ah_flag == aff->af_comproot) 2624 smsg(_("Affix also used for BAD/RARE/KEEPCASE/NEEDAFFIX/NEEDCOMPOUND/NOSUGGEST in %s line %d: %s"), 2625 fname, lnum, items[1]); 2626 STRCPY(cur_aff->ah_key, items[1]); 2627 hash_add(tp, cur_aff->ah_key); 2628 2629 cur_aff->ah_combine = (*items[2] == 'Y'); 2630 } 2631 2632 // Check for the "S" flag, which apparently means that another 2633 // block with the same affix name is following. 2634 if (itemcnt > lasti && STRCMP(items[lasti], "S") == 0) 2635 { 2636 ++lasti; 2637 cur_aff->ah_follows = TRUE; 2638 } 2639 else 2640 cur_aff->ah_follows = FALSE; 2641 2642 // Myspell allows extra text after the item, but that might 2643 // mean mistakes go unnoticed. Require a comment-starter. 2644 if (itemcnt > lasti && *items[lasti] != '#') 2645 smsg(_(e_afftrailing), fname, lnum, items[lasti]); 2646 2647 if (STRCMP(items[2], "Y") != 0 && STRCMP(items[2], "N") != 0) 2648 smsg(_("Expected Y or N in %s line %d: %s"), 2649 fname, lnum, items[2]); 2650 2651 if (*items[0] == 'P' && aff->af_pfxpostpone) 2652 { 2653 if (cur_aff->ah_newID == 0) 2654 { 2655 // Use a new number in the .spl file later, to be able 2656 // to handle multiple .aff files. 2657 check_renumber(spin); 2658 cur_aff->ah_newID = ++spin->si_newprefID; 2659 2660 // We only really use ah_newID if the prefix is 2661 // postponed. We know that only after handling all 2662 // the items. 2663 did_postpone_prefix = FALSE; 2664 } 2665 else 2666 // Did use the ID in a previous block. 2667 did_postpone_prefix = TRUE; 2668 } 2669 2670 aff_todo = atoi((char *)items[3]); 2671 } 2672 else if ((STRCMP(items[0], "PFX") == 0 2673 || STRCMP(items[0], "SFX") == 0) 2674 && aff_todo > 0 2675 && STRCMP(cur_aff->ah_key, items[1]) == 0 2676 && itemcnt >= 5) 2677 { 2678 affentry_T *aff_entry; 2679 int upper = FALSE; 2680 int lasti = 5; 2681 2682 // Myspell allows extra text after the item, but that might 2683 // mean mistakes go unnoticed. Require a comment-starter, 2684 // unless IGNOREEXTRA is used. Hunspell uses a "-" item. 2685 if (itemcnt > lasti 2686 && !aff->af_ignoreextra 2687 && *items[lasti] != '#' 2688 && (STRCMP(items[lasti], "-") != 0 2689 || itemcnt != lasti + 1)) 2690 smsg(_(e_afftrailing), fname, lnum, items[lasti]); 2691 2692 // New item for an affix letter. 2693 --aff_todo; 2694 aff_entry = (affentry_T *)getroom(spin, 2695 sizeof(affentry_T), TRUE); 2696 if (aff_entry == NULL) 2697 break; 2698 2699 if (STRCMP(items[2], "0") != 0) 2700 aff_entry->ae_chop = getroom_save(spin, items[2]); 2701 if (STRCMP(items[3], "0") != 0) 2702 { 2703 aff_entry->ae_add = getroom_save(spin, items[3]); 2704 2705 // Recognize flags on the affix: abcd/XYZ 2706 aff_entry->ae_flags = vim_strchr(aff_entry->ae_add, '/'); 2707 if (aff_entry->ae_flags != NULL) 2708 { 2709 *aff_entry->ae_flags++ = NUL; 2710 aff_process_flags(aff, aff_entry); 2711 } 2712 } 2713 2714 // Don't use an affix entry with non-ASCII characters when 2715 // "spin->si_ascii" is TRUE. 2716 if (!spin->si_ascii || !(has_non_ascii(aff_entry->ae_chop) 2717 || has_non_ascii(aff_entry->ae_add))) 2718 { 2719 aff_entry->ae_next = cur_aff->ah_first; 2720 cur_aff->ah_first = aff_entry; 2721 2722 if (STRCMP(items[4], ".") != 0) 2723 { 2724 char_u buf[MAXLINELEN]; 2725 2726 aff_entry->ae_cond = getroom_save(spin, items[4]); 2727 if (*items[0] == 'P') 2728 sprintf((char *)buf, "^%s", items[4]); 2729 else 2730 sprintf((char *)buf, "%s$", items[4]); 2731 aff_entry->ae_prog = vim_regcomp(buf, 2732 RE_MAGIC + RE_STRING + RE_STRICT); 2733 if (aff_entry->ae_prog == NULL) 2734 smsg(_("Broken condition in %s line %d: %s"), 2735 fname, lnum, items[4]); 2736 } 2737 2738 // For postponed prefixes we need an entry in si_prefcond 2739 // for the condition. Use an existing one if possible. 2740 // Can't be done for an affix with flags, ignoring 2741 // COMPOUNDFORBIDFLAG and COMPOUNDPERMITFLAG. 2742 if (*items[0] == 'P' && aff->af_pfxpostpone 2743 && aff_entry->ae_flags == NULL) 2744 { 2745 // When the chop string is one lower-case letter and 2746 // the add string ends in the upper-case letter we set 2747 // the "upper" flag, clear "ae_chop" and remove the 2748 // letters from "ae_add". The condition must either 2749 // be empty or start with the same letter. 2750 if (aff_entry->ae_chop != NULL 2751 && aff_entry->ae_add != NULL 2752 && aff_entry->ae_chop[(*mb_ptr2len)( 2753 aff_entry->ae_chop)] == NUL) 2754 { 2755 int c, c_up; 2756 2757 c = PTR2CHAR(aff_entry->ae_chop); 2758 c_up = SPELL_TOUPPER(c); 2759 if (c_up != c 2760 && (aff_entry->ae_cond == NULL 2761 || PTR2CHAR(aff_entry->ae_cond) == c)) 2762 { 2763 p = aff_entry->ae_add 2764 + STRLEN(aff_entry->ae_add); 2765 MB_PTR_BACK(aff_entry->ae_add, p); 2766 if (PTR2CHAR(p) == c_up) 2767 { 2768 upper = TRUE; 2769 aff_entry->ae_chop = NULL; 2770 *p = NUL; 2771 2772 // The condition is matched with the 2773 // actual word, thus must check for the 2774 // upper-case letter. 2775 if (aff_entry->ae_cond != NULL) 2776 { 2777 char_u buf[MAXLINELEN]; 2778 2779 if (has_mbyte) 2780 { 2781 onecap_copy(items[4], buf, TRUE); 2782 aff_entry->ae_cond = getroom_save( 2783 spin, buf); 2784 } 2785 else 2786 *aff_entry->ae_cond = c_up; 2787 if (aff_entry->ae_cond != NULL) 2788 { 2789 sprintf((char *)buf, "^%s", 2790 aff_entry->ae_cond); 2791 vim_regfree(aff_entry->ae_prog); 2792 aff_entry->ae_prog = vim_regcomp( 2793 buf, RE_MAGIC + RE_STRING); 2794 } 2795 } 2796 } 2797 } 2798 } 2799 2800 if (aff_entry->ae_chop == NULL 2801 && aff_entry->ae_flags == NULL) 2802 { 2803 int idx; 2804 char_u **pp; 2805 int n; 2806 2807 // Find a previously used condition. 2808 for (idx = spin->si_prefcond.ga_len - 1; idx >= 0; 2809 --idx) 2810 { 2811 p = ((char_u **)spin->si_prefcond.ga_data)[idx]; 2812 if (str_equal(p, aff_entry->ae_cond)) 2813 break; 2814 } 2815 if (idx < 0 && ga_grow(&spin->si_prefcond, 1) == OK) 2816 { 2817 // Not found, add a new condition. 2818 idx = spin->si_prefcond.ga_len++; 2819 pp = ((char_u **)spin->si_prefcond.ga_data) 2820 + idx; 2821 if (aff_entry->ae_cond == NULL) 2822 *pp = NULL; 2823 else 2824 *pp = getroom_save(spin, 2825 aff_entry->ae_cond); 2826 } 2827 2828 // Add the prefix to the prefix tree. 2829 if (aff_entry->ae_add == NULL) 2830 p = (char_u *)""; 2831 else 2832 p = aff_entry->ae_add; 2833 2834 // PFX_FLAGS is a negative number, so that 2835 // tree_add_word() knows this is the prefix tree. 2836 n = PFX_FLAGS; 2837 if (!cur_aff->ah_combine) 2838 n |= WFP_NC; 2839 if (upper) 2840 n |= WFP_UP; 2841 if (aff_entry->ae_comppermit) 2842 n |= WFP_COMPPERMIT; 2843 if (aff_entry->ae_compforbid) 2844 n |= WFP_COMPFORBID; 2845 tree_add_word(spin, p, spin->si_prefroot, n, 2846 idx, cur_aff->ah_newID); 2847 did_postpone_prefix = TRUE; 2848 } 2849 2850 // Didn't actually use ah_newID, backup si_newprefID. 2851 if (aff_todo == 0 && !did_postpone_prefix) 2852 { 2853 --spin->si_newprefID; 2854 cur_aff->ah_newID = 0; 2855 } 2856 } 2857 } 2858 } 2859 else if (is_aff_rule(items, itemcnt, "FOL", 2) && fol == NULL) 2860 { 2861 fol = vim_strsave(items[1]); 2862 } 2863 else if (is_aff_rule(items, itemcnt, "LOW", 2) && low == NULL) 2864 { 2865 low = vim_strsave(items[1]); 2866 } 2867 else if (is_aff_rule(items, itemcnt, "UPP", 2) && upp == NULL) 2868 { 2869 upp = vim_strsave(items[1]); 2870 } 2871 else if (is_aff_rule(items, itemcnt, "REP", 2) 2872 || is_aff_rule(items, itemcnt, "REPSAL", 2)) 2873 { 2874 // Ignore REP/REPSAL count 2875 if (!isdigit(*items[1])) 2876 smsg(_("Expected REP(SAL) count in %s line %d"), 2877 fname, lnum); 2878 } 2879 else if ((STRCMP(items[0], "REP") == 0 2880 || STRCMP(items[0], "REPSAL") == 0) 2881 && itemcnt >= 3) 2882 { 2883 // REP/REPSAL item 2884 // Myspell ignores extra arguments, we require it starts with 2885 // # to detect mistakes. 2886 if (itemcnt > 3 && items[3][0] != '#') 2887 smsg(_(e_afftrailing), fname, lnum, items[3]); 2888 if (items[0][3] == 'S' ? do_repsal : do_rep) 2889 { 2890 // Replace underscore with space (can't include a space 2891 // directly). 2892 for (p = items[1]; *p != NUL; MB_PTR_ADV(p)) 2893 if (*p == '_') 2894 *p = ' '; 2895 for (p = items[2]; *p != NUL; MB_PTR_ADV(p)) 2896 if (*p == '_') 2897 *p = ' '; 2898 add_fromto(spin, items[0][3] == 'S' 2899 ? &spin->si_repsal 2900 : &spin->si_rep, items[1], items[2]); 2901 } 2902 } 2903 else if (is_aff_rule(items, itemcnt, "MAP", 2)) 2904 { 2905 // MAP item or count 2906 if (!found_map) 2907 { 2908 // First line contains the count. 2909 found_map = TRUE; 2910 if (!isdigit(*items[1])) 2911 smsg(_("Expected MAP count in %s line %d"), 2912 fname, lnum); 2913 } 2914 else if (do_mapline) 2915 { 2916 int c; 2917 2918 // Check that every character appears only once. 2919 for (p = items[1]; *p != NUL; ) 2920 { 2921 c = mb_ptr2char_adv(&p); 2922 if ((spin->si_map.ga_len > 0 2923 && vim_strchr(spin->si_map.ga_data, c) 2924 != NULL) 2925 || vim_strchr(p, c) != NULL) 2926 smsg(_("Duplicate character in MAP in %s line %d"), 2927 fname, lnum); 2928 } 2929 2930 // We simply concatenate all the MAP strings, separated by 2931 // slashes. 2932 ga_concat(&spin->si_map, items[1]); 2933 ga_append(&spin->si_map, '/'); 2934 } 2935 } 2936 // Accept "SAL from to" and "SAL from to #comment". 2937 else if (is_aff_rule(items, itemcnt, "SAL", 3)) 2938 { 2939 if (do_sal) 2940 { 2941 // SAL item (sounds-a-like) 2942 // Either one of the known keys or a from-to pair. 2943 if (STRCMP(items[1], "followup") == 0) 2944 spin->si_followup = sal_to_bool(items[2]); 2945 else if (STRCMP(items[1], "collapse_result") == 0) 2946 spin->si_collapse = sal_to_bool(items[2]); 2947 else if (STRCMP(items[1], "remove_accents") == 0) 2948 spin->si_rem_accents = sal_to_bool(items[2]); 2949 else 2950 // when "to" is "_" it means empty 2951 add_fromto(spin, &spin->si_sal, items[1], 2952 STRCMP(items[2], "_") == 0 ? (char_u *)"" 2953 : items[2]); 2954 } 2955 } 2956 else if (is_aff_rule(items, itemcnt, "SOFOFROM", 2) 2957 && sofofrom == NULL) 2958 { 2959 sofofrom = getroom_save(spin, items[1]); 2960 } 2961 else if (is_aff_rule(items, itemcnt, "SOFOTO", 2) 2962 && sofoto == NULL) 2963 { 2964 sofoto = getroom_save(spin, items[1]); 2965 } 2966 else if (STRCMP(items[0], "COMMON") == 0) 2967 { 2968 int i; 2969 2970 for (i = 1; i < itemcnt; ++i) 2971 { 2972 if (HASHITEM_EMPTY(hash_find(&spin->si_commonwords, 2973 items[i]))) 2974 { 2975 p = vim_strsave(items[i]); 2976 if (p == NULL) 2977 break; 2978 hash_add(&spin->si_commonwords, p); 2979 } 2980 } 2981 } 2982 else 2983 smsg(_("Unrecognized or duplicate item in %s line %d: %s"), 2984 fname, lnum, items[0]); 2985 } 2986 } 2987 2988 if (fol != NULL || low != NULL || upp != NULL) 2989 { 2990 if (spin->si_clear_chartab) 2991 { 2992 // Clear the char type tables, don't want to use any of the 2993 // currently used spell properties. 2994 init_spell_chartab(); 2995 spin->si_clear_chartab = FALSE; 2996 } 2997 2998 /* 2999 * Don't write a word table for an ASCII file, so that we don't check 3000 * for conflicts with a word table that matches 'encoding'. 3001 * Don't write one for utf-8 either, we use utf_*() and 3002 * mb_get_class(), the list of chars in the file will be incomplete. 3003 */ 3004 if (!spin->si_ascii && !enc_utf8) 3005 { 3006 if (fol == NULL || low == NULL || upp == NULL) 3007 smsg(_("Missing FOL/LOW/UPP line in %s"), fname); 3008 else 3009 (void)set_spell_chartab(fol, low, upp); 3010 } 3011 3012 vim_free(fol); 3013 vim_free(low); 3014 vim_free(upp); 3015 } 3016 3017 // Use compound specifications of the .aff file for the spell info. 3018 if (compmax != 0) 3019 { 3020 aff_check_number(spin->si_compmax, compmax, "COMPOUNDWORDMAX"); 3021 spin->si_compmax = compmax; 3022 } 3023 3024 if (compminlen != 0) 3025 { 3026 aff_check_number(spin->si_compminlen, compminlen, "COMPOUNDMIN"); 3027 spin->si_compminlen = compminlen; 3028 } 3029 3030 if (compsylmax != 0) 3031 { 3032 if (syllable == NULL) 3033 smsg(_("COMPOUNDSYLMAX used without SYLLABLE")); 3034 aff_check_number(spin->si_compsylmax, compsylmax, "COMPOUNDSYLMAX"); 3035 spin->si_compsylmax = compsylmax; 3036 } 3037 3038 if (compoptions != 0) 3039 { 3040 aff_check_number(spin->si_compoptions, compoptions, "COMPOUND options"); 3041 spin->si_compoptions |= compoptions; 3042 } 3043 3044 if (compflags != NULL) 3045 process_compflags(spin, aff, compflags); 3046 3047 // Check that we didn't use too many renumbered flags. 3048 if (spin->si_newcompID < spin->si_newprefID) 3049 { 3050 if (spin->si_newcompID == 127 || spin->si_newcompID == 255) 3051 msg(_("Too many postponed prefixes")); 3052 else if (spin->si_newprefID == 0 || spin->si_newprefID == 127) 3053 msg(_("Too many compound flags")); 3054 else 3055 msg(_("Too many postponed prefixes and/or compound flags")); 3056 } 3057 3058 if (syllable != NULL) 3059 { 3060 aff_check_string(spin->si_syllable, syllable, "SYLLABLE"); 3061 spin->si_syllable = syllable; 3062 } 3063 3064 if (sofofrom != NULL || sofoto != NULL) 3065 { 3066 if (sofofrom == NULL || sofoto == NULL) 3067 smsg(_("Missing SOFO%s line in %s"), 3068 sofofrom == NULL ? "FROM" : "TO", fname); 3069 else if (spin->si_sal.ga_len > 0) 3070 smsg(_("Both SAL and SOFO lines in %s"), fname); 3071 else 3072 { 3073 aff_check_string(spin->si_sofofr, sofofrom, "SOFOFROM"); 3074 aff_check_string(spin->si_sofoto, sofoto, "SOFOTO"); 3075 spin->si_sofofr = sofofrom; 3076 spin->si_sofoto = sofoto; 3077 } 3078 } 3079 3080 if (midword != NULL) 3081 { 3082 aff_check_string(spin->si_midword, midword, "MIDWORD"); 3083 spin->si_midword = midword; 3084 } 3085 3086 vim_free(pc); 3087 fclose(fd); 3088 return aff; 3089 } 3090 3091 /* 3092 * Return TRUE when items[0] equals "rulename", there are "mincount" items or 3093 * a comment is following after item "mincount". 3094 */ 3095 static int 3096 is_aff_rule( 3097 char_u **items, 3098 int itemcnt, 3099 char *rulename, 3100 int mincount) 3101 { 3102 return (STRCMP(items[0], rulename) == 0 3103 && (itemcnt == mincount 3104 || (itemcnt > mincount && items[mincount][0] == '#'))); 3105 } 3106 3107 /* 3108 * For affix "entry" move COMPOUNDFORBIDFLAG and COMPOUNDPERMITFLAG from 3109 * ae_flags to ae_comppermit and ae_compforbid. 3110 */ 3111 static void 3112 aff_process_flags(afffile_T *affile, affentry_T *entry) 3113 { 3114 char_u *p; 3115 char_u *prevp; 3116 unsigned flag; 3117 3118 if (entry->ae_flags != NULL 3119 && (affile->af_compforbid != 0 || affile->af_comppermit != 0)) 3120 { 3121 for (p = entry->ae_flags; *p != NUL; ) 3122 { 3123 prevp = p; 3124 flag = get_affitem(affile->af_flagtype, &p); 3125 if (flag == affile->af_comppermit || flag == affile->af_compforbid) 3126 { 3127 STRMOVE(prevp, p); 3128 p = prevp; 3129 if (flag == affile->af_comppermit) 3130 entry->ae_comppermit = TRUE; 3131 else 3132 entry->ae_compforbid = TRUE; 3133 } 3134 if (affile->af_flagtype == AFT_NUM && *p == ',') 3135 ++p; 3136 } 3137 if (*entry->ae_flags == NUL) 3138 entry->ae_flags = NULL; // nothing left 3139 } 3140 } 3141 3142 /* 3143 * Return TRUE if "s" is the name of an info item in the affix file. 3144 */ 3145 static int 3146 spell_info_item(char_u *s) 3147 { 3148 return STRCMP(s, "NAME") == 0 3149 || STRCMP(s, "HOME") == 0 3150 || STRCMP(s, "VERSION") == 0 3151 || STRCMP(s, "AUTHOR") == 0 3152 || STRCMP(s, "EMAIL") == 0 3153 || STRCMP(s, "COPYRIGHT") == 0; 3154 } 3155 3156 /* 3157 * Turn an affix flag name into a number, according to the FLAG type. 3158 * returns zero for failure. 3159 */ 3160 static unsigned 3161 affitem2flag( 3162 int flagtype, 3163 char_u *item, 3164 char_u *fname, 3165 int lnum) 3166 { 3167 unsigned res; 3168 char_u *p = item; 3169 3170 res = get_affitem(flagtype, &p); 3171 if (res == 0) 3172 { 3173 if (flagtype == AFT_NUM) 3174 smsg(_("Flag is not a number in %s line %d: %s"), 3175 fname, lnum, item); 3176 else 3177 smsg(_("Illegal flag in %s line %d: %s"), 3178 fname, lnum, item); 3179 } 3180 if (*p != NUL) 3181 { 3182 smsg(_(e_affname), fname, lnum, item); 3183 return 0; 3184 } 3185 3186 return res; 3187 } 3188 3189 /* 3190 * Get one affix name from "*pp" and advance the pointer. 3191 * Returns ZERO_FLAG for "0". 3192 * Returns zero for an error, still advances the pointer then. 3193 */ 3194 static unsigned 3195 get_affitem(int flagtype, char_u **pp) 3196 { 3197 int res; 3198 3199 if (flagtype == AFT_NUM) 3200 { 3201 if (!VIM_ISDIGIT(**pp)) 3202 { 3203 ++*pp; // always advance, avoid getting stuck 3204 return 0; 3205 } 3206 res = getdigits(pp); 3207 if (res == 0) 3208 res = ZERO_FLAG; 3209 } 3210 else 3211 { 3212 res = mb_ptr2char_adv(pp); 3213 if (flagtype == AFT_LONG || (flagtype == AFT_CAPLONG 3214 && res >= 'A' && res <= 'Z')) 3215 { 3216 if (**pp == NUL) 3217 return 0; 3218 res = mb_ptr2char_adv(pp) + (res << 16); 3219 } 3220 } 3221 return res; 3222 } 3223 3224 /* 3225 * Process the "compflags" string used in an affix file and append it to 3226 * spin->si_compflags. 3227 * The processing involves changing the affix names to ID numbers, so that 3228 * they fit in one byte. 3229 */ 3230 static void 3231 process_compflags( 3232 spellinfo_T *spin, 3233 afffile_T *aff, 3234 char_u *compflags) 3235 { 3236 char_u *p; 3237 char_u *prevp; 3238 unsigned flag; 3239 compitem_T *ci; 3240 int id; 3241 int len; 3242 char_u *tp; 3243 char_u key[AH_KEY_LEN]; 3244 hashitem_T *hi; 3245 3246 // Make room for the old and the new compflags, concatenated with a / in 3247 // between. Processing it makes it shorter, but we don't know by how 3248 // much, thus allocate the maximum. 3249 len = (int)STRLEN(compflags) + 1; 3250 if (spin->si_compflags != NULL) 3251 len += (int)STRLEN(spin->si_compflags) + 1; 3252 p = getroom(spin, len, FALSE); 3253 if (p == NULL) 3254 return; 3255 if (spin->si_compflags != NULL) 3256 { 3257 STRCPY(p, spin->si_compflags); 3258 STRCAT(p, "/"); 3259 } 3260 spin->si_compflags = p; 3261 tp = p + STRLEN(p); 3262 3263 for (p = compflags; *p != NUL; ) 3264 { 3265 if (vim_strchr((char_u *)"/?*+[]", *p) != NULL) 3266 // Copy non-flag characters directly. 3267 *tp++ = *p++; 3268 else 3269 { 3270 // First get the flag number, also checks validity. 3271 prevp = p; 3272 flag = get_affitem(aff->af_flagtype, &p); 3273 if (flag != 0) 3274 { 3275 // Find the flag in the hashtable. If it was used before, use 3276 // the existing ID. Otherwise add a new entry. 3277 vim_strncpy(key, prevp, p - prevp); 3278 hi = hash_find(&aff->af_comp, key); 3279 if (!HASHITEM_EMPTY(hi)) 3280 id = HI2CI(hi)->ci_newID; 3281 else 3282 { 3283 ci = (compitem_T *)getroom(spin, sizeof(compitem_T), TRUE); 3284 if (ci == NULL) 3285 break; 3286 STRCPY(ci->ci_key, key); 3287 ci->ci_flag = flag; 3288 // Avoid using a flag ID that has a special meaning in a 3289 // regexp (also inside []). 3290 do 3291 { 3292 check_renumber(spin); 3293 id = spin->si_newcompID--; 3294 } while (vim_strchr((char_u *)"/?*+[]\\-^", id) != NULL); 3295 ci->ci_newID = id; 3296 hash_add(&aff->af_comp, ci->ci_key); 3297 } 3298 *tp++ = id; 3299 } 3300 if (aff->af_flagtype == AFT_NUM && *p == ',') 3301 ++p; 3302 } 3303 } 3304 3305 *tp = NUL; 3306 } 3307 3308 /* 3309 * Check that the new IDs for postponed affixes and compounding don't overrun 3310 * each other. We have almost 255 available, but start at 0-127 to avoid 3311 * using two bytes for utf-8. When the 0-127 range is used up go to 128-255. 3312 * When that is used up an error message is given. 3313 */ 3314 static void 3315 check_renumber(spellinfo_T *spin) 3316 { 3317 if (spin->si_newprefID == spin->si_newcompID && spin->si_newcompID < 128) 3318 { 3319 spin->si_newprefID = 127; 3320 spin->si_newcompID = 255; 3321 } 3322 } 3323 3324 /* 3325 * Return TRUE if flag "flag" appears in affix list "afflist". 3326 */ 3327 static int 3328 flag_in_afflist(int flagtype, char_u *afflist, unsigned flag) 3329 { 3330 char_u *p; 3331 unsigned n; 3332 3333 switch (flagtype) 3334 { 3335 case AFT_CHAR: 3336 return vim_strchr(afflist, flag) != NULL; 3337 3338 case AFT_CAPLONG: 3339 case AFT_LONG: 3340 for (p = afflist; *p != NUL; ) 3341 { 3342 n = mb_ptr2char_adv(&p); 3343 if ((flagtype == AFT_LONG || (n >= 'A' && n <= 'Z')) 3344 && *p != NUL) 3345 n = mb_ptr2char_adv(&p) + (n << 16); 3346 if (n == flag) 3347 return TRUE; 3348 } 3349 break; 3350 3351 case AFT_NUM: 3352 for (p = afflist; *p != NUL; ) 3353 { 3354 n = getdigits(&p); 3355 if (n == 0) 3356 n = ZERO_FLAG; 3357 if (n == flag) 3358 return TRUE; 3359 if (*p != NUL) // skip over comma 3360 ++p; 3361 } 3362 break; 3363 } 3364 return FALSE; 3365 } 3366 3367 /* 3368 * Give a warning when "spinval" and "affval" numbers are set and not the same. 3369 */ 3370 static void 3371 aff_check_number(int spinval, int affval, char *name) 3372 { 3373 if (spinval != 0 && spinval != affval) 3374 smsg(_("%s value differs from what is used in another .aff file"), name); 3375 } 3376 3377 /* 3378 * Give a warning when "spinval" and "affval" strings are set and not the same. 3379 */ 3380 static void 3381 aff_check_string(char_u *spinval, char_u *affval, char *name) 3382 { 3383 if (spinval != NULL && STRCMP(spinval, affval) != 0) 3384 smsg(_("%s value differs from what is used in another .aff file"), name); 3385 } 3386 3387 /* 3388 * Return TRUE if strings "s1" and "s2" are equal. Also consider both being 3389 * NULL as equal. 3390 */ 3391 static int 3392 str_equal(char_u *s1, char_u *s2) 3393 { 3394 if (s1 == NULL || s2 == NULL) 3395 return s1 == s2; 3396 return STRCMP(s1, s2) == 0; 3397 } 3398 3399 /* 3400 * Add a from-to item to "gap". Used for REP and SAL items. 3401 * They are stored case-folded. 3402 */ 3403 static void 3404 add_fromto( 3405 spellinfo_T *spin, 3406 garray_T *gap, 3407 char_u *from, 3408 char_u *to) 3409 { 3410 fromto_T *ftp; 3411 char_u word[MAXWLEN]; 3412 3413 if (ga_grow(gap, 1) == OK) 3414 { 3415 ftp = ((fromto_T *)gap->ga_data) + gap->ga_len; 3416 (void)spell_casefold(from, (int)STRLEN(from), word, MAXWLEN); 3417 ftp->ft_from = getroom_save(spin, word); 3418 (void)spell_casefold(to, (int)STRLEN(to), word, MAXWLEN); 3419 ftp->ft_to = getroom_save(spin, word); 3420 ++gap->ga_len; 3421 } 3422 } 3423 3424 /* 3425 * Convert a boolean argument in a SAL line to TRUE or FALSE; 3426 */ 3427 static int 3428 sal_to_bool(char_u *s) 3429 { 3430 return STRCMP(s, "1") == 0 || STRCMP(s, "true") == 0; 3431 } 3432 3433 /* 3434 * Free the structure filled by spell_read_aff(). 3435 */ 3436 static void 3437 spell_free_aff(afffile_T *aff) 3438 { 3439 hashtab_T *ht; 3440 hashitem_T *hi; 3441 int todo; 3442 affheader_T *ah; 3443 affentry_T *ae; 3444 3445 vim_free(aff->af_enc); 3446 3447 // All this trouble to free the "ae_prog" items... 3448 for (ht = &aff->af_pref; ; ht = &aff->af_suff) 3449 { 3450 todo = (int)ht->ht_used; 3451 for (hi = ht->ht_array; todo > 0; ++hi) 3452 { 3453 if (!HASHITEM_EMPTY(hi)) 3454 { 3455 --todo; 3456 ah = HI2AH(hi); 3457 for (ae = ah->ah_first; ae != NULL; ae = ae->ae_next) 3458 vim_regfree(ae->ae_prog); 3459 } 3460 } 3461 if (ht == &aff->af_suff) 3462 break; 3463 } 3464 3465 hash_clear(&aff->af_pref); 3466 hash_clear(&aff->af_suff); 3467 hash_clear(&aff->af_comp); 3468 } 3469 3470 /* 3471 * Read dictionary file "fname". 3472 * Returns OK or FAIL; 3473 */ 3474 static int 3475 spell_read_dic(spellinfo_T *spin, char_u *fname, afffile_T *affile) 3476 { 3477 hashtab_T ht; 3478 char_u line[MAXLINELEN]; 3479 char_u *p; 3480 char_u *afflist; 3481 char_u store_afflist[MAXWLEN]; 3482 int pfxlen; 3483 int need_affix; 3484 char_u *dw; 3485 char_u *pc; 3486 char_u *w; 3487 int l; 3488 hash_T hash; 3489 hashitem_T *hi; 3490 FILE *fd; 3491 int lnum = 1; 3492 int non_ascii = 0; 3493 int retval = OK; 3494 char_u message[MAXLINELEN + MAXWLEN]; 3495 int flags; 3496 int duplicate = 0; 3497 3498 /* 3499 * Open the file. 3500 */ 3501 fd = mch_fopen((char *)fname, "r"); 3502 if (fd == NULL) 3503 { 3504 semsg(_(e_notopen), fname); 3505 return FAIL; 3506 } 3507 3508 // The hashtable is only used to detect duplicated words. 3509 hash_init(&ht); 3510 3511 vim_snprintf((char *)IObuff, IOSIZE, 3512 _("Reading dictionary file %s..."), fname); 3513 spell_message(spin, IObuff); 3514 3515 // start with a message for the first line 3516 spin->si_msg_count = 999999; 3517 3518 // Read and ignore the first line: word count. 3519 (void)vim_fgets(line, MAXLINELEN, fd); 3520 if (!vim_isdigit(*skipwhite(line))) 3521 semsg(_("E760: No word count in %s"), fname); 3522 3523 /* 3524 * Read all the lines in the file one by one. 3525 * The words are converted to 'encoding' here, before being added to 3526 * the hashtable. 3527 */ 3528 while (!vim_fgets(line, MAXLINELEN, fd) && !got_int) 3529 { 3530 line_breakcheck(); 3531 ++lnum; 3532 if (line[0] == '#' || line[0] == '/') 3533 continue; // comment line 3534 3535 // Remove CR, LF and white space from the end. White space halfway 3536 // the word is kept to allow e.g., "et al.". 3537 l = (int)STRLEN(line); 3538 while (l > 0 && line[l - 1] <= ' ') 3539 --l; 3540 if (l == 0) 3541 continue; // empty line 3542 line[l] = NUL; 3543 3544 // Convert from "SET" to 'encoding' when needed. 3545 if (spin->si_conv.vc_type != CONV_NONE) 3546 { 3547 pc = string_convert(&spin->si_conv, line, NULL); 3548 if (pc == NULL) 3549 { 3550 smsg(_("Conversion failure for word in %s line %d: %s"), 3551 fname, lnum, line); 3552 continue; 3553 } 3554 w = pc; 3555 } 3556 else 3557 { 3558 pc = NULL; 3559 w = line; 3560 } 3561 3562 // Truncate the word at the "/", set "afflist" to what follows. 3563 // Replace "\/" by "/" and "\\" by "\". 3564 afflist = NULL; 3565 for (p = w; *p != NUL; MB_PTR_ADV(p)) 3566 { 3567 if (*p == '\\' && (p[1] == '\\' || p[1] == '/')) 3568 STRMOVE(p, p + 1); 3569 else if (*p == '/') 3570 { 3571 *p = NUL; 3572 afflist = p + 1; 3573 break; 3574 } 3575 } 3576 3577 // Skip non-ASCII words when "spin->si_ascii" is TRUE. 3578 if (spin->si_ascii && has_non_ascii(w)) 3579 { 3580 ++non_ascii; 3581 vim_free(pc); 3582 continue; 3583 } 3584 3585 // This takes time, print a message every 10000 words. 3586 if (spin->si_verbose && spin->si_msg_count > 10000) 3587 { 3588 spin->si_msg_count = 0; 3589 vim_snprintf((char *)message, sizeof(message), 3590 _("line %6d, word %6ld - %s"), 3591 lnum, spin->si_foldwcount + spin->si_keepwcount, w); 3592 msg_start(); 3593 msg_outtrans_long_attr(message, 0); 3594 msg_clr_eos(); 3595 msg_didout = FALSE; 3596 msg_col = 0; 3597 out_flush(); 3598 } 3599 3600 // Store the word in the hashtable to be able to find duplicates. 3601 dw = (char_u *)getroom_save(spin, w); 3602 if (dw == NULL) 3603 { 3604 retval = FAIL; 3605 vim_free(pc); 3606 break; 3607 } 3608 3609 hash = hash_hash(dw); 3610 hi = hash_lookup(&ht, dw, hash); 3611 if (!HASHITEM_EMPTY(hi)) 3612 { 3613 if (p_verbose > 0) 3614 smsg(_("Duplicate word in %s line %d: %s"), 3615 fname, lnum, dw); 3616 else if (duplicate == 0) 3617 smsg(_("First duplicate word in %s line %d: %s"), 3618 fname, lnum, dw); 3619 ++duplicate; 3620 } 3621 else 3622 hash_add_item(&ht, hi, dw, hash); 3623 3624 flags = 0; 3625 store_afflist[0] = NUL; 3626 pfxlen = 0; 3627 need_affix = FALSE; 3628 if (afflist != NULL) 3629 { 3630 // Extract flags from the affix list. 3631 flags |= get_affix_flags(affile, afflist); 3632 3633 if (affile->af_needaffix != 0 && flag_in_afflist( 3634 affile->af_flagtype, afflist, affile->af_needaffix)) 3635 need_affix = TRUE; 3636 3637 if (affile->af_pfxpostpone) 3638 // Need to store the list of prefix IDs with the word. 3639 pfxlen = get_pfxlist(affile, afflist, store_afflist); 3640 3641 if (spin->si_compflags != NULL) 3642 // Need to store the list of compound flags with the word. 3643 // Concatenate them to the list of prefix IDs. 3644 get_compflags(affile, afflist, store_afflist + pfxlen); 3645 } 3646 3647 // Add the word to the word tree(s). 3648 if (store_word(spin, dw, flags, spin->si_region, 3649 store_afflist, need_affix) == FAIL) 3650 retval = FAIL; 3651 3652 if (afflist != NULL) 3653 { 3654 // Find all matching suffixes and add the resulting words. 3655 // Additionally do matching prefixes that combine. 3656 if (store_aff_word(spin, dw, afflist, affile, 3657 &affile->af_suff, &affile->af_pref, 3658 CONDIT_SUF, flags, store_afflist, pfxlen) == FAIL) 3659 retval = FAIL; 3660 3661 // Find all matching prefixes and add the resulting words. 3662 if (store_aff_word(spin, dw, afflist, affile, 3663 &affile->af_pref, NULL, 3664 CONDIT_SUF, flags, store_afflist, pfxlen) == FAIL) 3665 retval = FAIL; 3666 } 3667 3668 vim_free(pc); 3669 } 3670 3671 if (duplicate > 0) 3672 smsg(_("%d duplicate word(s) in %s"), duplicate, fname); 3673 if (spin->si_ascii && non_ascii > 0) 3674 smsg(_("Ignored %d word(s) with non-ASCII characters in %s"), 3675 non_ascii, fname); 3676 hash_clear(&ht); 3677 3678 fclose(fd); 3679 return retval; 3680 } 3681 3682 /* 3683 * Check for affix flags in "afflist" that are turned into word flags. 3684 * Return WF_ flags. 3685 */ 3686 static int 3687 get_affix_flags(afffile_T *affile, char_u *afflist) 3688 { 3689 int flags = 0; 3690 3691 if (affile->af_keepcase != 0 && flag_in_afflist( 3692 affile->af_flagtype, afflist, affile->af_keepcase)) 3693 flags |= WF_KEEPCAP | WF_FIXCAP; 3694 if (affile->af_rare != 0 && flag_in_afflist( 3695 affile->af_flagtype, afflist, affile->af_rare)) 3696 flags |= WF_RARE; 3697 if (affile->af_bad != 0 && flag_in_afflist( 3698 affile->af_flagtype, afflist, affile->af_bad)) 3699 flags |= WF_BANNED; 3700 if (affile->af_needcomp != 0 && flag_in_afflist( 3701 affile->af_flagtype, afflist, affile->af_needcomp)) 3702 flags |= WF_NEEDCOMP; 3703 if (affile->af_comproot != 0 && flag_in_afflist( 3704 affile->af_flagtype, afflist, affile->af_comproot)) 3705 flags |= WF_COMPROOT; 3706 if (affile->af_nosuggest != 0 && flag_in_afflist( 3707 affile->af_flagtype, afflist, affile->af_nosuggest)) 3708 flags |= WF_NOSUGGEST; 3709 return flags; 3710 } 3711 3712 /* 3713 * Get the list of prefix IDs from the affix list "afflist". 3714 * Used for PFXPOSTPONE. 3715 * Put the resulting flags in "store_afflist[MAXWLEN]" with a terminating NUL 3716 * and return the number of affixes. 3717 */ 3718 static int 3719 get_pfxlist( 3720 afffile_T *affile, 3721 char_u *afflist, 3722 char_u *store_afflist) 3723 { 3724 char_u *p; 3725 char_u *prevp; 3726 int cnt = 0; 3727 int id; 3728 char_u key[AH_KEY_LEN]; 3729 hashitem_T *hi; 3730 3731 for (p = afflist; *p != NUL; ) 3732 { 3733 prevp = p; 3734 if (get_affitem(affile->af_flagtype, &p) != 0) 3735 { 3736 // A flag is a postponed prefix flag if it appears in "af_pref" 3737 // and its ID is not zero. 3738 vim_strncpy(key, prevp, p - prevp); 3739 hi = hash_find(&affile->af_pref, key); 3740 if (!HASHITEM_EMPTY(hi)) 3741 { 3742 id = HI2AH(hi)->ah_newID; 3743 if (id != 0) 3744 store_afflist[cnt++] = id; 3745 } 3746 } 3747 if (affile->af_flagtype == AFT_NUM && *p == ',') 3748 ++p; 3749 } 3750 3751 store_afflist[cnt] = NUL; 3752 return cnt; 3753 } 3754 3755 /* 3756 * Get the list of compound IDs from the affix list "afflist" that are used 3757 * for compound words. 3758 * Puts the flags in "store_afflist[]". 3759 */ 3760 static void 3761 get_compflags( 3762 afffile_T *affile, 3763 char_u *afflist, 3764 char_u *store_afflist) 3765 { 3766 char_u *p; 3767 char_u *prevp; 3768 int cnt = 0; 3769 char_u key[AH_KEY_LEN]; 3770 hashitem_T *hi; 3771 3772 for (p = afflist; *p != NUL; ) 3773 { 3774 prevp = p; 3775 if (get_affitem(affile->af_flagtype, &p) != 0) 3776 { 3777 // A flag is a compound flag if it appears in "af_comp". 3778 vim_strncpy(key, prevp, p - prevp); 3779 hi = hash_find(&affile->af_comp, key); 3780 if (!HASHITEM_EMPTY(hi)) 3781 store_afflist[cnt++] = HI2CI(hi)->ci_newID; 3782 } 3783 if (affile->af_flagtype == AFT_NUM && *p == ',') 3784 ++p; 3785 } 3786 3787 store_afflist[cnt] = NUL; 3788 } 3789 3790 /* 3791 * Apply affixes to a word and store the resulting words. 3792 * "ht" is the hashtable with affentry_T that need to be applied, either 3793 * prefixes or suffixes. 3794 * "xht", when not NULL, is the prefix hashtable, to be used additionally on 3795 * the resulting words for combining affixes. 3796 * 3797 * Returns FAIL when out of memory. 3798 */ 3799 static int 3800 store_aff_word( 3801 spellinfo_T *spin, // spell info 3802 char_u *word, // basic word start 3803 char_u *afflist, // list of names of supported affixes 3804 afffile_T *affile, 3805 hashtab_T *ht, 3806 hashtab_T *xht, 3807 int condit, // CONDIT_SUF et al. 3808 int flags, // flags for the word 3809 char_u *pfxlist, // list of prefix IDs 3810 int pfxlen) // nr of flags in "pfxlist" for prefixes, rest 3811 // is compound flags 3812 { 3813 int todo; 3814 hashitem_T *hi; 3815 affheader_T *ah; 3816 affentry_T *ae; 3817 char_u newword[MAXWLEN]; 3818 int retval = OK; 3819 int i, j; 3820 char_u *p; 3821 int use_flags; 3822 char_u *use_pfxlist; 3823 int use_pfxlen; 3824 int need_affix; 3825 char_u store_afflist[MAXWLEN]; 3826 char_u pfx_pfxlist[MAXWLEN]; 3827 size_t wordlen = STRLEN(word); 3828 int use_condit; 3829 3830 todo = (int)ht->ht_used; 3831 for (hi = ht->ht_array; todo > 0 && retval == OK; ++hi) 3832 { 3833 if (!HASHITEM_EMPTY(hi)) 3834 { 3835 --todo; 3836 ah = HI2AH(hi); 3837 3838 // Check that the affix combines, if required, and that the word 3839 // supports this affix. 3840 if (((condit & CONDIT_COMB) == 0 || ah->ah_combine) 3841 && flag_in_afflist(affile->af_flagtype, afflist, 3842 ah->ah_flag)) 3843 { 3844 // Loop over all affix entries with this name. 3845 for (ae = ah->ah_first; ae != NULL; ae = ae->ae_next) 3846 { 3847 // Check the condition. It's not logical to match case 3848 // here, but it is required for compatibility with 3849 // Myspell. 3850 // Another requirement from Myspell is that the chop 3851 // string is shorter than the word itself. 3852 // For prefixes, when "PFXPOSTPONE" was used, only do 3853 // prefixes with a chop string and/or flags. 3854 // When a previously added affix had CIRCUMFIX this one 3855 // must have it too, if it had not then this one must not 3856 // have one either. 3857 if ((xht != NULL || !affile->af_pfxpostpone 3858 || ae->ae_chop != NULL 3859 || ae->ae_flags != NULL) 3860 && (ae->ae_chop == NULL 3861 || STRLEN(ae->ae_chop) < wordlen) 3862 && (ae->ae_prog == NULL 3863 || vim_regexec_prog(&ae->ae_prog, FALSE, 3864 word, (colnr_T)0)) 3865 && (((condit & CONDIT_CFIX) == 0) 3866 == ((condit & CONDIT_AFF) == 0 3867 || ae->ae_flags == NULL 3868 || !flag_in_afflist(affile->af_flagtype, 3869 ae->ae_flags, affile->af_circumfix)))) 3870 { 3871 // Match. Remove the chop and add the affix. 3872 if (xht == NULL) 3873 { 3874 // prefix: chop/add at the start of the word 3875 if (ae->ae_add == NULL) 3876 *newword = NUL; 3877 else 3878 vim_strncpy(newword, ae->ae_add, MAXWLEN - 1); 3879 p = word; 3880 if (ae->ae_chop != NULL) 3881 { 3882 // Skip chop string. 3883 if (has_mbyte) 3884 { 3885 i = mb_charlen(ae->ae_chop); 3886 for ( ; i > 0; --i) 3887 MB_PTR_ADV(p); 3888 } 3889 else 3890 p += STRLEN(ae->ae_chop); 3891 } 3892 STRCAT(newword, p); 3893 } 3894 else 3895 { 3896 // suffix: chop/add at the end of the word 3897 vim_strncpy(newword, word, MAXWLEN - 1); 3898 if (ae->ae_chop != NULL) 3899 { 3900 // Remove chop string. 3901 p = newword + STRLEN(newword); 3902 i = (int)MB_CHARLEN(ae->ae_chop); 3903 for ( ; i > 0; --i) 3904 MB_PTR_BACK(newword, p); 3905 *p = NUL; 3906 } 3907 if (ae->ae_add != NULL) 3908 STRCAT(newword, ae->ae_add); 3909 } 3910 3911 use_flags = flags; 3912 use_pfxlist = pfxlist; 3913 use_pfxlen = pfxlen; 3914 need_affix = FALSE; 3915 use_condit = condit | CONDIT_COMB | CONDIT_AFF; 3916 if (ae->ae_flags != NULL) 3917 { 3918 // Extract flags from the affix list. 3919 use_flags |= get_affix_flags(affile, ae->ae_flags); 3920 3921 if (affile->af_needaffix != 0 && flag_in_afflist( 3922 affile->af_flagtype, ae->ae_flags, 3923 affile->af_needaffix)) 3924 need_affix = TRUE; 3925 3926 // When there is a CIRCUMFIX flag the other affix 3927 // must also have it and we don't add the word 3928 // with one affix. 3929 if (affile->af_circumfix != 0 && flag_in_afflist( 3930 affile->af_flagtype, ae->ae_flags, 3931 affile->af_circumfix)) 3932 { 3933 use_condit |= CONDIT_CFIX; 3934 if ((condit & CONDIT_CFIX) == 0) 3935 need_affix = TRUE; 3936 } 3937 3938 if (affile->af_pfxpostpone 3939 || spin->si_compflags != NULL) 3940 { 3941 if (affile->af_pfxpostpone) 3942 // Get prefix IDS from the affix list. 3943 use_pfxlen = get_pfxlist(affile, 3944 ae->ae_flags, store_afflist); 3945 else 3946 use_pfxlen = 0; 3947 use_pfxlist = store_afflist; 3948 3949 // Combine the prefix IDs. Avoid adding the 3950 // same ID twice. 3951 for (i = 0; i < pfxlen; ++i) 3952 { 3953 for (j = 0; j < use_pfxlen; ++j) 3954 if (pfxlist[i] == use_pfxlist[j]) 3955 break; 3956 if (j == use_pfxlen) 3957 use_pfxlist[use_pfxlen++] = pfxlist[i]; 3958 } 3959 3960 if (spin->si_compflags != NULL) 3961 // Get compound IDS from the affix list. 3962 get_compflags(affile, ae->ae_flags, 3963 use_pfxlist + use_pfxlen); 3964 3965 // Combine the list of compound flags. 3966 // Concatenate them to the prefix IDs list. 3967 // Avoid adding the same ID twice. 3968 for (i = pfxlen; pfxlist[i] != NUL; ++i) 3969 { 3970 for (j = use_pfxlen; 3971 use_pfxlist[j] != NUL; ++j) 3972 if (pfxlist[i] == use_pfxlist[j]) 3973 break; 3974 if (use_pfxlist[j] == NUL) 3975 { 3976 use_pfxlist[j++] = pfxlist[i]; 3977 use_pfxlist[j] = NUL; 3978 } 3979 } 3980 } 3981 } 3982 3983 // Obey a "COMPOUNDFORBIDFLAG" of the affix: don't 3984 // use the compound flags. 3985 if (use_pfxlist != NULL && ae->ae_compforbid) 3986 { 3987 vim_strncpy(pfx_pfxlist, use_pfxlist, use_pfxlen); 3988 use_pfxlist = pfx_pfxlist; 3989 } 3990 3991 // When there are postponed prefixes... 3992 if (spin->si_prefroot != NULL 3993 && spin->si_prefroot->wn_sibling != NULL) 3994 { 3995 // ... add a flag to indicate an affix was used. 3996 use_flags |= WF_HAS_AFF; 3997 3998 // ... don't use a prefix list if combining 3999 // affixes is not allowed. But do use the 4000 // compound flags after them. 4001 if (!ah->ah_combine && use_pfxlist != NULL) 4002 use_pfxlist += use_pfxlen; 4003 } 4004 4005 // When compounding is supported and there is no 4006 // "COMPOUNDPERMITFLAG" then forbid compounding on the 4007 // side where the affix is applied. 4008 if (spin->si_compflags != NULL && !ae->ae_comppermit) 4009 { 4010 if (xht != NULL) 4011 use_flags |= WF_NOCOMPAFT; 4012 else 4013 use_flags |= WF_NOCOMPBEF; 4014 } 4015 4016 // Store the modified word. 4017 if (store_word(spin, newword, use_flags, 4018 spin->si_region, use_pfxlist, 4019 need_affix) == FAIL) 4020 retval = FAIL; 4021 4022 // When added a prefix or a first suffix and the affix 4023 // has flags may add a(nother) suffix. RECURSIVE! 4024 if ((condit & CONDIT_SUF) && ae->ae_flags != NULL) 4025 if (store_aff_word(spin, newword, ae->ae_flags, 4026 affile, &affile->af_suff, xht, 4027 use_condit & (xht == NULL 4028 ? ~0 : ~CONDIT_SUF), 4029 use_flags, use_pfxlist, pfxlen) == FAIL) 4030 retval = FAIL; 4031 4032 // When added a suffix and combining is allowed also 4033 // try adding a prefix additionally. Both for the 4034 // word flags and for the affix flags. RECURSIVE! 4035 if (xht != NULL && ah->ah_combine) 4036 { 4037 if (store_aff_word(spin, newword, 4038 afflist, affile, 4039 xht, NULL, use_condit, 4040 use_flags, use_pfxlist, 4041 pfxlen) == FAIL 4042 || (ae->ae_flags != NULL 4043 && store_aff_word(spin, newword, 4044 ae->ae_flags, affile, 4045 xht, NULL, use_condit, 4046 use_flags, use_pfxlist, 4047 pfxlen) == FAIL)) 4048 retval = FAIL; 4049 } 4050 } 4051 } 4052 } 4053 } 4054 } 4055 4056 return retval; 4057 } 4058 4059 /* 4060 * Read a file with a list of words. 4061 */ 4062 static int 4063 spell_read_wordfile(spellinfo_T *spin, char_u *fname) 4064 { 4065 FILE *fd; 4066 long lnum = 0; 4067 char_u rline[MAXLINELEN]; 4068 char_u *line; 4069 char_u *pc = NULL; 4070 char_u *p; 4071 int l; 4072 int retval = OK; 4073 int did_word = FALSE; 4074 int non_ascii = 0; 4075 int flags; 4076 int regionmask; 4077 4078 /* 4079 * Open the file. 4080 */ 4081 fd = mch_fopen((char *)fname, "r"); 4082 if (fd == NULL) 4083 { 4084 semsg(_(e_notopen), fname); 4085 return FAIL; 4086 } 4087 4088 vim_snprintf((char *)IObuff, IOSIZE, _("Reading word file %s..."), fname); 4089 spell_message(spin, IObuff); 4090 4091 /* 4092 * Read all the lines in the file one by one. 4093 */ 4094 while (!vim_fgets(rline, MAXLINELEN, fd) && !got_int) 4095 { 4096 line_breakcheck(); 4097 ++lnum; 4098 4099 // Skip comment lines. 4100 if (*rline == '#') 4101 continue; 4102 4103 // Remove CR, LF and white space from the end. 4104 l = (int)STRLEN(rline); 4105 while (l > 0 && rline[l - 1] <= ' ') 4106 --l; 4107 if (l == 0) 4108 continue; // empty or blank line 4109 rline[l] = NUL; 4110 4111 // Convert from "/encoding={encoding}" to 'encoding' when needed. 4112 vim_free(pc); 4113 if (spin->si_conv.vc_type != CONV_NONE) 4114 { 4115 pc = string_convert(&spin->si_conv, rline, NULL); 4116 if (pc == NULL) 4117 { 4118 smsg(_("Conversion failure for word in %s line %d: %s"), 4119 fname, lnum, rline); 4120 continue; 4121 } 4122 line = pc; 4123 } 4124 else 4125 { 4126 pc = NULL; 4127 line = rline; 4128 } 4129 4130 if (*line == '/') 4131 { 4132 ++line; 4133 if (STRNCMP(line, "encoding=", 9) == 0) 4134 { 4135 if (spin->si_conv.vc_type != CONV_NONE) 4136 smsg(_("Duplicate /encoding= line ignored in %s line %d: %s"), 4137 fname, lnum, line - 1); 4138 else if (did_word) 4139 smsg(_("/encoding= line after word ignored in %s line %d: %s"), 4140 fname, lnum, line - 1); 4141 else 4142 { 4143 char_u *enc; 4144 4145 // Setup for conversion to 'encoding'. 4146 line += 9; 4147 enc = enc_canonize(line); 4148 if (enc != NULL && !spin->si_ascii 4149 && convert_setup(&spin->si_conv, enc, 4150 p_enc) == FAIL) 4151 smsg(_("Conversion in %s not supported: from %s to %s"), 4152 fname, line, p_enc); 4153 vim_free(enc); 4154 spin->si_conv.vc_fail = TRUE; 4155 } 4156 continue; 4157 } 4158 4159 if (STRNCMP(line, "regions=", 8) == 0) 4160 { 4161 if (spin->si_region_count > 1) 4162 smsg(_("Duplicate /regions= line ignored in %s line %d: %s"), 4163 fname, lnum, line); 4164 else 4165 { 4166 line += 8; 4167 if (STRLEN(line) > MAXREGIONS * 2) 4168 smsg(_("Too many regions in %s line %d: %s"), 4169 fname, lnum, line); 4170 else 4171 { 4172 spin->si_region_count = (int)STRLEN(line) / 2; 4173 STRCPY(spin->si_region_name, line); 4174 4175 // Adjust the mask for a word valid in all regions. 4176 spin->si_region = (1 << spin->si_region_count) - 1; 4177 } 4178 } 4179 continue; 4180 } 4181 4182 smsg(_("/ line ignored in %s line %d: %s"), 4183 fname, lnum, line - 1); 4184 continue; 4185 } 4186 4187 flags = 0; 4188 regionmask = spin->si_region; 4189 4190 // Check for flags and region after a slash. 4191 p = vim_strchr(line, '/'); 4192 if (p != NULL) 4193 { 4194 *p++ = NUL; 4195 while (*p != NUL) 4196 { 4197 if (*p == '=') // keep-case word 4198 flags |= WF_KEEPCAP | WF_FIXCAP; 4199 else if (*p == '!') // Bad, bad, wicked word. 4200 flags |= WF_BANNED; 4201 else if (*p == '?') // Rare word. 4202 flags |= WF_RARE; 4203 else if (VIM_ISDIGIT(*p)) // region number(s) 4204 { 4205 if ((flags & WF_REGION) == 0) // first one 4206 regionmask = 0; 4207 flags |= WF_REGION; 4208 4209 l = *p - '0'; 4210 if (l == 0 || l > spin->si_region_count) 4211 { 4212 smsg(_("Invalid region nr in %s line %d: %s"), 4213 fname, lnum, p); 4214 break; 4215 } 4216 regionmask |= 1 << (l - 1); 4217 } 4218 else 4219 { 4220 smsg(_("Unrecognized flags in %s line %d: %s"), 4221 fname, lnum, p); 4222 break; 4223 } 4224 ++p; 4225 } 4226 } 4227 4228 // Skip non-ASCII words when "spin->si_ascii" is TRUE. 4229 if (spin->si_ascii && has_non_ascii(line)) 4230 { 4231 ++non_ascii; 4232 continue; 4233 } 4234 4235 // Normal word: store it. 4236 if (store_word(spin, line, flags, regionmask, NULL, FALSE) == FAIL) 4237 { 4238 retval = FAIL; 4239 break; 4240 } 4241 did_word = TRUE; 4242 } 4243 4244 vim_free(pc); 4245 fclose(fd); 4246 4247 if (spin->si_ascii && non_ascii > 0) 4248 { 4249 vim_snprintf((char *)IObuff, IOSIZE, 4250 _("Ignored %d words with non-ASCII characters"), non_ascii); 4251 spell_message(spin, IObuff); 4252 } 4253 4254 return retval; 4255 } 4256 4257 /* 4258 * Get part of an sblock_T, "len" bytes long. 4259 * This avoids calling free() for every little struct we use (and keeping 4260 * track of them). 4261 * The memory is cleared to all zeros. 4262 * Returns NULL when out of memory. 4263 */ 4264 static void * 4265 getroom( 4266 spellinfo_T *spin, 4267 size_t len, // length needed 4268 int align) // align for pointer 4269 { 4270 char_u *p; 4271 sblock_T *bl = spin->si_blocks; 4272 4273 if (align && bl != NULL) 4274 // Round size up for alignment. On some systems structures need to be 4275 // aligned to the size of a pointer (e.g., SPARC). 4276 bl->sb_used = (bl->sb_used + sizeof(char *) - 1) 4277 & ~(sizeof(char *) - 1); 4278 4279 if (bl == NULL || bl->sb_used + len > SBLOCKSIZE) 4280 { 4281 if (len >= SBLOCKSIZE) 4282 bl = NULL; 4283 else 4284 // Allocate a block of memory. It is not freed until much later. 4285 bl = alloc_clear(sizeof(sblock_T) + SBLOCKSIZE); 4286 if (bl == NULL) 4287 { 4288 if (!spin->si_did_emsg) 4289 { 4290 emsg(_("E845: Insufficient memory, word list will be incomplete")); 4291 spin->si_did_emsg = TRUE; 4292 } 4293 return NULL; 4294 } 4295 bl->sb_next = spin->si_blocks; 4296 spin->si_blocks = bl; 4297 bl->sb_used = 0; 4298 ++spin->si_blocks_cnt; 4299 } 4300 4301 p = bl->sb_data + bl->sb_used; 4302 bl->sb_used += (int)len; 4303 4304 return p; 4305 } 4306 4307 /* 4308 * Make a copy of a string into memory allocated with getroom(). 4309 * Returns NULL when out of memory. 4310 */ 4311 static char_u * 4312 getroom_save(spellinfo_T *spin, char_u *s) 4313 { 4314 char_u *sc; 4315 4316 sc = (char_u *)getroom(spin, STRLEN(s) + 1, FALSE); 4317 if (sc != NULL) 4318 STRCPY(sc, s); 4319 return sc; 4320 } 4321 4322 4323 /* 4324 * Free the list of allocated sblock_T. 4325 */ 4326 static void 4327 free_blocks(sblock_T *bl) 4328 { 4329 sblock_T *next; 4330 4331 while (bl != NULL) 4332 { 4333 next = bl->sb_next; 4334 vim_free(bl); 4335 bl = next; 4336 } 4337 } 4338 4339 /* 4340 * Allocate the root of a word tree. 4341 * Returns NULL when out of memory. 4342 */ 4343 static wordnode_T * 4344 wordtree_alloc(spellinfo_T *spin) 4345 { 4346 return (wordnode_T *)getroom(spin, sizeof(wordnode_T), TRUE); 4347 } 4348 4349 /* 4350 * Store a word in the tree(s). 4351 * Always store it in the case-folded tree. For a keep-case word this is 4352 * useful when the word can also be used with all caps (no WF_FIXCAP flag) and 4353 * used to find suggestions. 4354 * For a keep-case word also store it in the keep-case tree. 4355 * When "pfxlist" is not NULL store the word for each postponed prefix ID and 4356 * compound flag. 4357 */ 4358 static int 4359 store_word( 4360 spellinfo_T *spin, 4361 char_u *word, 4362 int flags, // extra flags, WF_BANNED 4363 int region, // supported region(s) 4364 char_u *pfxlist, // list of prefix IDs or NULL 4365 int need_affix) // only store word with affix ID 4366 { 4367 int len = (int)STRLEN(word); 4368 int ct = captype(word, word + len); 4369 char_u foldword[MAXWLEN]; 4370 int res = OK; 4371 char_u *p; 4372 4373 (void)spell_casefold(word, len, foldword, MAXWLEN); 4374 for (p = pfxlist; res == OK; ++p) 4375 { 4376 if (!need_affix || (p != NULL && *p != NUL)) 4377 res = tree_add_word(spin, foldword, spin->si_foldroot, ct | flags, 4378 region, p == NULL ? 0 : *p); 4379 if (p == NULL || *p == NUL) 4380 break; 4381 } 4382 ++spin->si_foldwcount; 4383 4384 if (res == OK && (ct == WF_KEEPCAP || (flags & WF_KEEPCAP))) 4385 { 4386 for (p = pfxlist; res == OK; ++p) 4387 { 4388 if (!need_affix || (p != NULL && *p != NUL)) 4389 res = tree_add_word(spin, word, spin->si_keeproot, flags, 4390 region, p == NULL ? 0 : *p); 4391 if (p == NULL || *p == NUL) 4392 break; 4393 } 4394 ++spin->si_keepwcount; 4395 } 4396 return res; 4397 } 4398 4399 /* 4400 * Add word "word" to a word tree at "root". 4401 * When "flags" < 0 we are adding to the prefix tree where "flags" is used for 4402 * "rare" and "region" is the condition nr. 4403 * Returns FAIL when out of memory. 4404 */ 4405 static int 4406 tree_add_word( 4407 spellinfo_T *spin, 4408 char_u *word, 4409 wordnode_T *root, 4410 int flags, 4411 int region, 4412 int affixID) 4413 { 4414 wordnode_T *node = root; 4415 wordnode_T *np; 4416 wordnode_T *copyp, **copyprev; 4417 wordnode_T **prev = NULL; 4418 int i; 4419 4420 // Add each byte of the word to the tree, including the NUL at the end. 4421 for (i = 0; ; ++i) 4422 { 4423 // When there is more than one reference to this node we need to make 4424 // a copy, so that we can modify it. Copy the whole list of siblings 4425 // (we don't optimize for a partly shared list of siblings). 4426 if (node != NULL && node->wn_refs > 1) 4427 { 4428 --node->wn_refs; 4429 copyprev = prev; 4430 for (copyp = node; copyp != NULL; copyp = copyp->wn_sibling) 4431 { 4432 // Allocate a new node and copy the info. 4433 np = get_wordnode(spin); 4434 if (np == NULL) 4435 return FAIL; 4436 np->wn_child = copyp->wn_child; 4437 if (np->wn_child != NULL) 4438 ++np->wn_child->wn_refs; // child gets extra ref 4439 np->wn_byte = copyp->wn_byte; 4440 if (np->wn_byte == NUL) 4441 { 4442 np->wn_flags = copyp->wn_flags; 4443 np->wn_region = copyp->wn_region; 4444 np->wn_affixID = copyp->wn_affixID; 4445 } 4446 4447 // Link the new node in the list, there will be one ref. 4448 np->wn_refs = 1; 4449 if (copyprev != NULL) 4450 *copyprev = np; 4451 copyprev = &np->wn_sibling; 4452 4453 // Let "node" point to the head of the copied list. 4454 if (copyp == node) 4455 node = np; 4456 } 4457 } 4458 4459 // Look for the sibling that has the same character. They are sorted 4460 // on byte value, thus stop searching when a sibling is found with a 4461 // higher byte value. For zero bytes (end of word) the sorting is 4462 // done on flags and then on affixID. 4463 while (node != NULL 4464 && (node->wn_byte < word[i] 4465 || (node->wn_byte == NUL 4466 && (flags < 0 4467 ? node->wn_affixID < (unsigned)affixID 4468 : (node->wn_flags < (unsigned)(flags & WN_MASK) 4469 || (node->wn_flags == (flags & WN_MASK) 4470 && (spin->si_sugtree 4471 ? (node->wn_region & 0xffff) < region 4472 : node->wn_affixID 4473 < (unsigned)affixID))))))) 4474 { 4475 prev = &node->wn_sibling; 4476 node = *prev; 4477 } 4478 if (node == NULL 4479 || node->wn_byte != word[i] 4480 || (word[i] == NUL 4481 && (flags < 0 4482 || spin->si_sugtree 4483 || node->wn_flags != (flags & WN_MASK) 4484 || node->wn_affixID != affixID))) 4485 { 4486 // Allocate a new node. 4487 np = get_wordnode(spin); 4488 if (np == NULL) 4489 return FAIL; 4490 np->wn_byte = word[i]; 4491 4492 // If "node" is NULL this is a new child or the end of the sibling 4493 // list: ref count is one. Otherwise use ref count of sibling and 4494 // make ref count of sibling one (matters when inserting in front 4495 // of the list of siblings). 4496 if (node == NULL) 4497 np->wn_refs = 1; 4498 else 4499 { 4500 np->wn_refs = node->wn_refs; 4501 node->wn_refs = 1; 4502 } 4503 if (prev != NULL) 4504 *prev = np; 4505 np->wn_sibling = node; 4506 node = np; 4507 } 4508 4509 if (word[i] == NUL) 4510 { 4511 node->wn_flags = flags; 4512 node->wn_region |= region; 4513 node->wn_affixID = affixID; 4514 break; 4515 } 4516 prev = &node->wn_child; 4517 node = *prev; 4518 } 4519 #ifdef SPELL_PRINTTREE 4520 smsg("Added \"%s\"", word); 4521 spell_print_tree(root->wn_sibling); 4522 #endif 4523 4524 // count nr of words added since last message 4525 ++spin->si_msg_count; 4526 4527 if (spin->si_compress_cnt > 1) 4528 { 4529 if (--spin->si_compress_cnt == 1) 4530 // Did enough words to lower the block count limit. 4531 spin->si_blocks_cnt += compress_inc; 4532 } 4533 4534 /* 4535 * When we have allocated lots of memory we need to compress the word tree 4536 * to free up some room. But compression is slow, and we might actually 4537 * need that room, thus only compress in the following situations: 4538 * 1. When not compressed before (si_compress_cnt == 0): when using 4539 * "compress_start" blocks. 4540 * 2. When compressed before and used "compress_inc" blocks before 4541 * adding "compress_added" words (si_compress_cnt > 1). 4542 * 3. When compressed before, added "compress_added" words 4543 * (si_compress_cnt == 1) and the number of free nodes drops below the 4544 * maximum word length. 4545 */ 4546 #ifndef SPELL_COMPRESS_ALLWAYS 4547 if (spin->si_compress_cnt == 1 4548 ? spin->si_free_count < MAXWLEN 4549 : spin->si_blocks_cnt >= compress_start) 4550 #endif 4551 { 4552 // Decrement the block counter. The effect is that we compress again 4553 // when the freed up room has been used and another "compress_inc" 4554 // blocks have been allocated. Unless "compress_added" words have 4555 // been added, then the limit is put back again. 4556 spin->si_blocks_cnt -= compress_inc; 4557 spin->si_compress_cnt = compress_added; 4558 4559 if (spin->si_verbose) 4560 { 4561 msg_start(); 4562 msg_puts(_(msg_compressing)); 4563 msg_clr_eos(); 4564 msg_didout = FALSE; 4565 msg_col = 0; 4566 out_flush(); 4567 } 4568 4569 // Compress both trees. Either they both have many nodes, which makes 4570 // compression useful, or one of them is small, which means 4571 // compression goes fast. But when filling the soundfold word tree 4572 // there is no keep-case tree. 4573 wordtree_compress(spin, spin->si_foldroot); 4574 if (affixID >= 0) 4575 wordtree_compress(spin, spin->si_keeproot); 4576 } 4577 4578 return OK; 4579 } 4580 4581 /* 4582 * Get a wordnode_T, either from the list of previously freed nodes or 4583 * allocate a new one. 4584 * Returns NULL when out of memory. 4585 */ 4586 static wordnode_T * 4587 get_wordnode(spellinfo_T *spin) 4588 { 4589 wordnode_T *n; 4590 4591 if (spin->si_first_free == NULL) 4592 n = (wordnode_T *)getroom(spin, sizeof(wordnode_T), TRUE); 4593 else 4594 { 4595 n = spin->si_first_free; 4596 spin->si_first_free = n->wn_child; 4597 vim_memset(n, 0, sizeof(wordnode_T)); 4598 --spin->si_free_count; 4599 } 4600 #ifdef SPELL_PRINTTREE 4601 if (n != NULL) 4602 n->wn_nr = ++spin->si_wordnode_nr; 4603 #endif 4604 return n; 4605 } 4606 4607 /* 4608 * Decrement the reference count on a node (which is the head of a list of 4609 * siblings). If the reference count becomes zero free the node and its 4610 * siblings. 4611 * Returns the number of nodes actually freed. 4612 */ 4613 static int 4614 deref_wordnode(spellinfo_T *spin, wordnode_T *node) 4615 { 4616 wordnode_T *np; 4617 int cnt = 0; 4618 4619 if (--node->wn_refs == 0) 4620 { 4621 for (np = node; np != NULL; np = np->wn_sibling) 4622 { 4623 if (np->wn_child != NULL) 4624 cnt += deref_wordnode(spin, np->wn_child); 4625 free_wordnode(spin, np); 4626 ++cnt; 4627 } 4628 ++cnt; // length field 4629 } 4630 return cnt; 4631 } 4632 4633 /* 4634 * Free a wordnode_T for re-use later. 4635 * Only the "wn_child" field becomes invalid. 4636 */ 4637 static void 4638 free_wordnode(spellinfo_T *spin, wordnode_T *n) 4639 { 4640 n->wn_child = spin->si_first_free; 4641 spin->si_first_free = n; 4642 ++spin->si_free_count; 4643 } 4644 4645 /* 4646 * Compress a tree: find tails that are identical and can be shared. 4647 */ 4648 static void 4649 wordtree_compress(spellinfo_T *spin, wordnode_T *root) 4650 { 4651 hashtab_T ht; 4652 int n; 4653 int tot = 0; 4654 int perc; 4655 4656 // Skip the root itself, it's not actually used. The first sibling is the 4657 // start of the tree. 4658 if (root->wn_sibling != NULL) 4659 { 4660 hash_init(&ht); 4661 n = node_compress(spin, root->wn_sibling, &ht, &tot); 4662 4663 #ifndef SPELL_PRINTTREE 4664 if (spin->si_verbose || p_verbose > 2) 4665 #endif 4666 { 4667 if (tot > 1000000) 4668 perc = (tot - n) / (tot / 100); 4669 else if (tot == 0) 4670 perc = 0; 4671 else 4672 perc = (tot - n) * 100 / tot; 4673 vim_snprintf((char *)IObuff, IOSIZE, 4674 _("Compressed %d of %d nodes; %d (%d%%) remaining"), 4675 n, tot, tot - n, perc); 4676 spell_message(spin, IObuff); 4677 } 4678 #ifdef SPELL_PRINTTREE 4679 spell_print_tree(root->wn_sibling); 4680 #endif 4681 hash_clear(&ht); 4682 } 4683 } 4684 4685 /* 4686 * Compress a node, its siblings and its children, depth first. 4687 * Returns the number of compressed nodes. 4688 */ 4689 static int 4690 node_compress( 4691 spellinfo_T *spin, 4692 wordnode_T *node, 4693 hashtab_T *ht, 4694 int *tot) // total count of nodes before compressing, 4695 // incremented while going through the tree 4696 { 4697 wordnode_T *np; 4698 wordnode_T *tp; 4699 wordnode_T *child; 4700 hash_T hash; 4701 hashitem_T *hi; 4702 int len = 0; 4703 unsigned nr, n; 4704 int compressed = 0; 4705 4706 /* 4707 * Go through the list of siblings. Compress each child and then try 4708 * finding an identical child to replace it. 4709 * Note that with "child" we mean not just the node that is pointed to, 4710 * but the whole list of siblings of which the child node is the first. 4711 */ 4712 for (np = node; np != NULL && !got_int; np = np->wn_sibling) 4713 { 4714 ++len; 4715 if ((child = np->wn_child) != NULL) 4716 { 4717 // Compress the child first. This fills hashkey. 4718 compressed += node_compress(spin, child, ht, tot); 4719 4720 // Try to find an identical child. 4721 hash = hash_hash(child->wn_u1.hashkey); 4722 hi = hash_lookup(ht, child->wn_u1.hashkey, hash); 4723 if (!HASHITEM_EMPTY(hi)) 4724 { 4725 // There are children we encountered before with a hash value 4726 // identical to the current child. Now check if there is one 4727 // that is really identical. 4728 for (tp = HI2WN(hi); tp != NULL; tp = tp->wn_u2.next) 4729 if (node_equal(child, tp)) 4730 { 4731 // Found one! Now use that child in place of the 4732 // current one. This means the current child and all 4733 // its siblings is unlinked from the tree. 4734 ++tp->wn_refs; 4735 compressed += deref_wordnode(spin, child); 4736 np->wn_child = tp; 4737 break; 4738 } 4739 if (tp == NULL) 4740 { 4741 // No other child with this hash value equals the child of 4742 // the node, add it to the linked list after the first 4743 // item. 4744 tp = HI2WN(hi); 4745 child->wn_u2.next = tp->wn_u2.next; 4746 tp->wn_u2.next = child; 4747 } 4748 } 4749 else 4750 // No other child has this hash value, add it to the 4751 // hashtable. 4752 hash_add_item(ht, hi, child->wn_u1.hashkey, hash); 4753 } 4754 } 4755 *tot += len + 1; // add one for the node that stores the length 4756 4757 /* 4758 * Make a hash key for the node and its siblings, so that we can quickly 4759 * find a lookalike node. This must be done after compressing the sibling 4760 * list, otherwise the hash key would become invalid by the compression. 4761 */ 4762 node->wn_u1.hashkey[0] = len; 4763 nr = 0; 4764 for (np = node; np != NULL; np = np->wn_sibling) 4765 { 4766 if (np->wn_byte == NUL) 4767 // end node: use wn_flags, wn_region and wn_affixID 4768 n = np->wn_flags + (np->wn_region << 8) + (np->wn_affixID << 16); 4769 else 4770 // byte node: use the byte value and the child pointer 4771 n = (unsigned)(np->wn_byte + ((long_u)np->wn_child << 8)); 4772 nr = nr * 101 + n; 4773 } 4774 4775 // Avoid NUL bytes, it terminates the hash key. 4776 n = nr & 0xff; 4777 node->wn_u1.hashkey[1] = n == 0 ? 1 : n; 4778 n = (nr >> 8) & 0xff; 4779 node->wn_u1.hashkey[2] = n == 0 ? 1 : n; 4780 n = (nr >> 16) & 0xff; 4781 node->wn_u1.hashkey[3] = n == 0 ? 1 : n; 4782 n = (nr >> 24) & 0xff; 4783 node->wn_u1.hashkey[4] = n == 0 ? 1 : n; 4784 node->wn_u1.hashkey[5] = NUL; 4785 4786 // Check for CTRL-C pressed now and then. 4787 fast_breakcheck(); 4788 4789 return compressed; 4790 } 4791 4792 /* 4793 * Return TRUE when two nodes have identical siblings and children. 4794 */ 4795 static int 4796 node_equal(wordnode_T *n1, wordnode_T *n2) 4797 { 4798 wordnode_T *p1; 4799 wordnode_T *p2; 4800 4801 for (p1 = n1, p2 = n2; p1 != NULL && p2 != NULL; 4802 p1 = p1->wn_sibling, p2 = p2->wn_sibling) 4803 if (p1->wn_byte != p2->wn_byte 4804 || (p1->wn_byte == NUL 4805 ? (p1->wn_flags != p2->wn_flags 4806 || p1->wn_region != p2->wn_region 4807 || p1->wn_affixID != p2->wn_affixID) 4808 : (p1->wn_child != p2->wn_child))) 4809 break; 4810 4811 return p1 == NULL && p2 == NULL; 4812 } 4813 4814 static int rep_compare(const void *s1, const void *s2); 4815 4816 /* 4817 * Function given to qsort() to sort the REP items on "from" string. 4818 */ 4819 static int 4820 rep_compare(const void *s1, const void *s2) 4821 { 4822 fromto_T *p1 = (fromto_T *)s1; 4823 fromto_T *p2 = (fromto_T *)s2; 4824 4825 return STRCMP(p1->ft_from, p2->ft_from); 4826 } 4827 4828 /* 4829 * Write the Vim .spl file "fname". 4830 * Return FAIL or OK; 4831 */ 4832 static int 4833 write_vim_spell(spellinfo_T *spin, char_u *fname) 4834 { 4835 FILE *fd; 4836 int regionmask; 4837 int round; 4838 wordnode_T *tree; 4839 int nodecount; 4840 int i; 4841 int l; 4842 garray_T *gap; 4843 fromto_T *ftp; 4844 char_u *p; 4845 int rr; 4846 int retval = OK; 4847 size_t fwv = 1; // collect return value of fwrite() to avoid 4848 // warnings from picky compiler 4849 4850 fd = mch_fopen((char *)fname, "w"); 4851 if (fd == NULL) 4852 { 4853 semsg(_(e_notopen), fname); 4854 return FAIL; 4855 } 4856 4857 // <HEADER>: <fileID> <versionnr> 4858 // <fileID> 4859 fwv &= fwrite(VIMSPELLMAGIC, VIMSPELLMAGICL, (size_t)1, fd); 4860 if (fwv != (size_t)1) 4861 // Catch first write error, don't try writing more. 4862 goto theend; 4863 4864 putc(VIMSPELLVERSION, fd); // <versionnr> 4865 4866 /* 4867 * <SECTIONS>: <section> ... <sectionend> 4868 */ 4869 4870 // SN_INFO: <infotext> 4871 if (spin->si_info != NULL) 4872 { 4873 putc(SN_INFO, fd); // <sectionID> 4874 putc(0, fd); // <sectionflags> 4875 4876 i = (int)STRLEN(spin->si_info); 4877 put_bytes(fd, (long_u)i, 4); // <sectionlen> 4878 fwv &= fwrite(spin->si_info, (size_t)i, (size_t)1, fd); // <infotext> 4879 } 4880 4881 // SN_REGION: <regionname> ... 4882 // Write the region names only if there is more than one. 4883 if (spin->si_region_count > 1) 4884 { 4885 putc(SN_REGION, fd); // <sectionID> 4886 putc(SNF_REQUIRED, fd); // <sectionflags> 4887 l = spin->si_region_count * 2; 4888 put_bytes(fd, (long_u)l, 4); // <sectionlen> 4889 fwv &= fwrite(spin->si_region_name, (size_t)l, (size_t)1, fd); 4890 // <regionname> ... 4891 regionmask = (1 << spin->si_region_count) - 1; 4892 } 4893 else 4894 regionmask = 0; 4895 4896 // SN_CHARFLAGS: <charflagslen> <charflags> <folcharslen> <folchars> 4897 // 4898 // The table with character flags and the table for case folding. 4899 // This makes sure the same characters are recognized as word characters 4900 // when generating an when using a spell file. 4901 // Skip this for ASCII, the table may conflict with the one used for 4902 // 'encoding'. 4903 // Also skip this for an .add.spl file, the main spell file must contain 4904 // the table (avoids that it conflicts). File is shorter too. 4905 if (!spin->si_ascii && !spin->si_add) 4906 { 4907 char_u folchars[128 * 8]; 4908 int flags; 4909 4910 putc(SN_CHARFLAGS, fd); // <sectionID> 4911 putc(SNF_REQUIRED, fd); // <sectionflags> 4912 4913 // Form the <folchars> string first, we need to know its length. 4914 l = 0; 4915 for (i = 128; i < 256; ++i) 4916 { 4917 if (has_mbyte) 4918 l += mb_char2bytes(spelltab.st_fold[i], folchars + l); 4919 else 4920 folchars[l++] = spelltab.st_fold[i]; 4921 } 4922 put_bytes(fd, (long_u)(1 + 128 + 2 + l), 4); // <sectionlen> 4923 4924 fputc(128, fd); // <charflagslen> 4925 for (i = 128; i < 256; ++i) 4926 { 4927 flags = 0; 4928 if (spelltab.st_isw[i]) 4929 flags |= CF_WORD; 4930 if (spelltab.st_isu[i]) 4931 flags |= CF_UPPER; 4932 fputc(flags, fd); // <charflags> 4933 } 4934 4935 put_bytes(fd, (long_u)l, 2); // <folcharslen> 4936 fwv &= fwrite(folchars, (size_t)l, (size_t)1, fd); // <folchars> 4937 } 4938 4939 // SN_MIDWORD: <midword> 4940 if (spin->si_midword != NULL) 4941 { 4942 putc(SN_MIDWORD, fd); // <sectionID> 4943 putc(SNF_REQUIRED, fd); // <sectionflags> 4944 4945 i = (int)STRLEN(spin->si_midword); 4946 put_bytes(fd, (long_u)i, 4); // <sectionlen> 4947 fwv &= fwrite(spin->si_midword, (size_t)i, (size_t)1, fd); 4948 // <midword> 4949 } 4950 4951 // SN_PREFCOND: <prefcondcnt> <prefcond> ... 4952 if (spin->si_prefcond.ga_len > 0) 4953 { 4954 putc(SN_PREFCOND, fd); // <sectionID> 4955 putc(SNF_REQUIRED, fd); // <sectionflags> 4956 4957 l = write_spell_prefcond(NULL, &spin->si_prefcond); 4958 put_bytes(fd, (long_u)l, 4); // <sectionlen> 4959 4960 write_spell_prefcond(fd, &spin->si_prefcond); 4961 } 4962 4963 // SN_REP: <repcount> <rep> ... 4964 // SN_SAL: <salflags> <salcount> <sal> ... 4965 // SN_REPSAL: <repcount> <rep> ... 4966 4967 // round 1: SN_REP section 4968 // round 2: SN_SAL section (unless SN_SOFO is used) 4969 // round 3: SN_REPSAL section 4970 for (round = 1; round <= 3; ++round) 4971 { 4972 if (round == 1) 4973 gap = &spin->si_rep; 4974 else if (round == 2) 4975 { 4976 // Don't write SN_SAL when using a SN_SOFO section 4977 if (spin->si_sofofr != NULL && spin->si_sofoto != NULL) 4978 continue; 4979 gap = &spin->si_sal; 4980 } 4981 else 4982 gap = &spin->si_repsal; 4983 4984 // Don't write the section if there are no items. 4985 if (gap->ga_len == 0) 4986 continue; 4987 4988 // Sort the REP/REPSAL items. 4989 if (round != 2) 4990 qsort(gap->ga_data, (size_t)gap->ga_len, 4991 sizeof(fromto_T), rep_compare); 4992 4993 i = round == 1 ? SN_REP : (round == 2 ? SN_SAL : SN_REPSAL); 4994 putc(i, fd); // <sectionID> 4995 4996 // This is for making suggestions, section is not required. 4997 putc(0, fd); // <sectionflags> 4998 4999 // Compute the length of what follows. 5000 l = 2; // count <repcount> or <salcount> 5001 for (i = 0; i < gap->ga_len; ++i) 5002 { 5003 ftp = &((fromto_T *)gap->ga_data)[i]; 5004 l += 1 + (int)STRLEN(ftp->ft_from); // count <*fromlen> and <*from> 5005 l += 1 + (int)STRLEN(ftp->ft_to); // count <*tolen> and <*to> 5006 } 5007 if (round == 2) 5008 ++l; // count <salflags> 5009 put_bytes(fd, (long_u)l, 4); // <sectionlen> 5010 5011 if (round == 2) 5012 { 5013 i = 0; 5014 if (spin->si_followup) 5015 i |= SAL_F0LLOWUP; 5016 if (spin->si_collapse) 5017 i |= SAL_COLLAPSE; 5018 if (spin->si_rem_accents) 5019 i |= SAL_REM_ACCENTS; 5020 putc(i, fd); // <salflags> 5021 } 5022 5023 put_bytes(fd, (long_u)gap->ga_len, 2); // <repcount> or <salcount> 5024 for (i = 0; i < gap->ga_len; ++i) 5025 { 5026 // <rep> : <repfromlen> <repfrom> <reptolen> <repto> 5027 // <sal> : <salfromlen> <salfrom> <saltolen> <salto> 5028 ftp = &((fromto_T *)gap->ga_data)[i]; 5029 for (rr = 1; rr <= 2; ++rr) 5030 { 5031 p = rr == 1 ? ftp->ft_from : ftp->ft_to; 5032 l = (int)STRLEN(p); 5033 putc(l, fd); 5034 if (l > 0) 5035 fwv &= fwrite(p, l, (size_t)1, fd); 5036 } 5037 } 5038 5039 } 5040 5041 // SN_SOFO: <sofofromlen> <sofofrom> <sofotolen> <sofoto> 5042 // This is for making suggestions, section is not required. 5043 if (spin->si_sofofr != NULL && spin->si_sofoto != NULL) 5044 { 5045 putc(SN_SOFO, fd); // <sectionID> 5046 putc(0, fd); // <sectionflags> 5047 5048 l = (int)STRLEN(spin->si_sofofr); 5049 put_bytes(fd, (long_u)(l + STRLEN(spin->si_sofoto) + 4), 4); 5050 // <sectionlen> 5051 5052 put_bytes(fd, (long_u)l, 2); // <sofofromlen> 5053 fwv &= fwrite(spin->si_sofofr, l, (size_t)1, fd); // <sofofrom> 5054 5055 l = (int)STRLEN(spin->si_sofoto); 5056 put_bytes(fd, (long_u)l, 2); // <sofotolen> 5057 fwv &= fwrite(spin->si_sofoto, l, (size_t)1, fd); // <sofoto> 5058 } 5059 5060 // SN_WORDS: <word> ... 5061 // This is for making suggestions, section is not required. 5062 if (spin->si_commonwords.ht_used > 0) 5063 { 5064 putc(SN_WORDS, fd); // <sectionID> 5065 putc(0, fd); // <sectionflags> 5066 5067 // round 1: count the bytes 5068 // round 2: write the bytes 5069 for (round = 1; round <= 2; ++round) 5070 { 5071 int todo; 5072 int len = 0; 5073 hashitem_T *hi; 5074 5075 todo = (int)spin->si_commonwords.ht_used; 5076 for (hi = spin->si_commonwords.ht_array; todo > 0; ++hi) 5077 if (!HASHITEM_EMPTY(hi)) 5078 { 5079 l = (int)STRLEN(hi->hi_key) + 1; 5080 len += l; 5081 if (round == 2) // <word> 5082 fwv &= fwrite(hi->hi_key, (size_t)l, (size_t)1, fd); 5083 --todo; 5084 } 5085 if (round == 1) 5086 put_bytes(fd, (long_u)len, 4); // <sectionlen> 5087 } 5088 } 5089 5090 // SN_MAP: <mapstr> 5091 // This is for making suggestions, section is not required. 5092 if (spin->si_map.ga_len > 0) 5093 { 5094 putc(SN_MAP, fd); // <sectionID> 5095 putc(0, fd); // <sectionflags> 5096 l = spin->si_map.ga_len; 5097 put_bytes(fd, (long_u)l, 4); // <sectionlen> 5098 fwv &= fwrite(spin->si_map.ga_data, (size_t)l, (size_t)1, fd); 5099 // <mapstr> 5100 } 5101 5102 // SN_SUGFILE: <timestamp> 5103 // This is used to notify that a .sug file may be available and at the 5104 // same time allows for checking that a .sug file that is found matches 5105 // with this .spl file. That's because the word numbers must be exactly 5106 // right. 5107 if (!spin->si_nosugfile 5108 && (spin->si_sal.ga_len > 0 5109 || (spin->si_sofofr != NULL && spin->si_sofoto != NULL))) 5110 { 5111 putc(SN_SUGFILE, fd); // <sectionID> 5112 putc(0, fd); // <sectionflags> 5113 put_bytes(fd, (long_u)8, 4); // <sectionlen> 5114 5115 // Set si_sugtime and write it to the file. 5116 spin->si_sugtime = time(NULL); 5117 put_time(fd, spin->si_sugtime); // <timestamp> 5118 } 5119 5120 // SN_NOSPLITSUGS: nothing 5121 // This is used to notify that no suggestions with word splits are to be 5122 // made. 5123 if (spin->si_nosplitsugs) 5124 { 5125 putc(SN_NOSPLITSUGS, fd); // <sectionID> 5126 putc(0, fd); // <sectionflags> 5127 put_bytes(fd, (long_u)0, 4); // <sectionlen> 5128 } 5129 5130 // SN_NOCOMPUNDSUGS: nothing 5131 // This is used to notify that no suggestions with compounds are to be 5132 // made. 5133 if (spin->si_nocompoundsugs) 5134 { 5135 putc(SN_NOCOMPOUNDSUGS, fd); // <sectionID> 5136 putc(0, fd); // <sectionflags> 5137 put_bytes(fd, (long_u)0, 4); // <sectionlen> 5138 } 5139 5140 // SN_COMPOUND: compound info. 5141 // We don't mark it required, when not supported all compound words will 5142 // be bad words. 5143 if (spin->si_compflags != NULL) 5144 { 5145 putc(SN_COMPOUND, fd); // <sectionID> 5146 putc(0, fd); // <sectionflags> 5147 5148 l = (int)STRLEN(spin->si_compflags); 5149 for (i = 0; i < spin->si_comppat.ga_len; ++i) 5150 l += (int)STRLEN(((char_u **)(spin->si_comppat.ga_data))[i]) + 1; 5151 put_bytes(fd, (long_u)(l + 7), 4); // <sectionlen> 5152 5153 putc(spin->si_compmax, fd); // <compmax> 5154 putc(spin->si_compminlen, fd); // <compminlen> 5155 putc(spin->si_compsylmax, fd); // <compsylmax> 5156 putc(0, fd); // for Vim 7.0b compatibility 5157 putc(spin->si_compoptions, fd); // <compoptions> 5158 put_bytes(fd, (long_u)spin->si_comppat.ga_len, 2); 5159 // <comppatcount> 5160 for (i = 0; i < spin->si_comppat.ga_len; ++i) 5161 { 5162 p = ((char_u **)(spin->si_comppat.ga_data))[i]; 5163 putc((int)STRLEN(p), fd); // <comppatlen> 5164 fwv &= fwrite(p, (size_t)STRLEN(p), (size_t)1, fd); 5165 // <comppattext> 5166 } 5167 // <compflags> 5168 fwv &= fwrite(spin->si_compflags, (size_t)STRLEN(spin->si_compflags), 5169 (size_t)1, fd); 5170 } 5171 5172 // SN_NOBREAK: NOBREAK flag 5173 if (spin->si_nobreak) 5174 { 5175 putc(SN_NOBREAK, fd); // <sectionID> 5176 putc(0, fd); // <sectionflags> 5177 5178 // It's empty, the presence of the section flags the feature. 5179 put_bytes(fd, (long_u)0, 4); // <sectionlen> 5180 } 5181 5182 // SN_SYLLABLE: syllable info. 5183 // We don't mark it required, when not supported syllables will not be 5184 // counted. 5185 if (spin->si_syllable != NULL) 5186 { 5187 putc(SN_SYLLABLE, fd); // <sectionID> 5188 putc(0, fd); // <sectionflags> 5189 5190 l = (int)STRLEN(spin->si_syllable); 5191 put_bytes(fd, (long_u)l, 4); // <sectionlen> 5192 fwv &= fwrite(spin->si_syllable, (size_t)l, (size_t)1, fd); 5193 // <syllable> 5194 } 5195 5196 // end of <SECTIONS> 5197 putc(SN_END, fd); // <sectionend> 5198 5199 5200 /* 5201 * <LWORDTREE> <KWORDTREE> <PREFIXTREE> 5202 */ 5203 spin->si_memtot = 0; 5204 for (round = 1; round <= 3; ++round) 5205 { 5206 if (round == 1) 5207 tree = spin->si_foldroot->wn_sibling; 5208 else if (round == 2) 5209 tree = spin->si_keeproot->wn_sibling; 5210 else 5211 tree = spin->si_prefroot->wn_sibling; 5212 5213 // Clear the index and wnode fields in the tree. 5214 clear_node(tree); 5215 5216 // Count the number of nodes. Needed to be able to allocate the 5217 // memory when reading the nodes. Also fills in index for shared 5218 // nodes. 5219 nodecount = put_node(NULL, tree, 0, regionmask, round == 3); 5220 5221 // number of nodes in 4 bytes 5222 put_bytes(fd, (long_u)nodecount, 4); // <nodecount> 5223 spin->si_memtot += nodecount + nodecount * sizeof(int); 5224 5225 // Write the nodes. 5226 (void)put_node(fd, tree, 0, regionmask, round == 3); 5227 } 5228 5229 // Write another byte to check for errors (file system full). 5230 if (putc(0, fd) == EOF) 5231 retval = FAIL; 5232 theend: 5233 if (fclose(fd) == EOF) 5234 retval = FAIL; 5235 5236 if (fwv != (size_t)1) 5237 retval = FAIL; 5238 if (retval == FAIL) 5239 emsg(_(e_write)); 5240 5241 return retval; 5242 } 5243 5244 /* 5245 * Clear the index and wnode fields of "node", it siblings and its 5246 * children. This is needed because they are a union with other items to save 5247 * space. 5248 */ 5249 static void 5250 clear_node(wordnode_T *node) 5251 { 5252 wordnode_T *np; 5253 5254 if (node != NULL) 5255 for (np = node; np != NULL; np = np->wn_sibling) 5256 { 5257 np->wn_u1.index = 0; 5258 np->wn_u2.wnode = NULL; 5259 5260 if (np->wn_byte != NUL) 5261 clear_node(np->wn_child); 5262 } 5263 } 5264 5265 5266 /* 5267 * Dump a word tree at node "node". 5268 * 5269 * This first writes the list of possible bytes (siblings). Then for each 5270 * byte recursively write the children. 5271 * 5272 * NOTE: The code here must match the code in read_tree_node(), since 5273 * assumptions are made about the indexes (so that we don't have to write them 5274 * in the file). 5275 * 5276 * Returns the number of nodes used. 5277 */ 5278 static int 5279 put_node( 5280 FILE *fd, // NULL when only counting 5281 wordnode_T *node, 5282 int idx, 5283 int regionmask, 5284 int prefixtree) // TRUE for PREFIXTREE 5285 { 5286 int newindex = idx; 5287 int siblingcount = 0; 5288 wordnode_T *np; 5289 int flags; 5290 5291 // If "node" is zero the tree is empty. 5292 if (node == NULL) 5293 return 0; 5294 5295 // Store the index where this node is written. 5296 node->wn_u1.index = idx; 5297 5298 // Count the number of siblings. 5299 for (np = node; np != NULL; np = np->wn_sibling) 5300 ++siblingcount; 5301 5302 // Write the sibling count. 5303 if (fd != NULL) 5304 putc(siblingcount, fd); // <siblingcount> 5305 5306 // Write each sibling byte and optionally extra info. 5307 for (np = node; np != NULL; np = np->wn_sibling) 5308 { 5309 if (np->wn_byte == 0) 5310 { 5311 if (fd != NULL) 5312 { 5313 // For a NUL byte (end of word) write the flags etc. 5314 if (prefixtree) 5315 { 5316 // In PREFIXTREE write the required affixID and the 5317 // associated condition nr (stored in wn_region). The 5318 // byte value is misused to store the "rare" and "not 5319 // combining" flags 5320 if (np->wn_flags == (short_u)PFX_FLAGS) 5321 putc(BY_NOFLAGS, fd); // <byte> 5322 else 5323 { 5324 putc(BY_FLAGS, fd); // <byte> 5325 putc(np->wn_flags, fd); // <pflags> 5326 } 5327 putc(np->wn_affixID, fd); // <affixID> 5328 put_bytes(fd, (long_u)np->wn_region, 2); // <prefcondnr> 5329 } 5330 else 5331 { 5332 // For word trees we write the flag/region items. 5333 flags = np->wn_flags; 5334 if (regionmask != 0 && np->wn_region != regionmask) 5335 flags |= WF_REGION; 5336 if (np->wn_affixID != 0) 5337 flags |= WF_AFX; 5338 if (flags == 0) 5339 { 5340 // word without flags or region 5341 putc(BY_NOFLAGS, fd); // <byte> 5342 } 5343 else 5344 { 5345 if (np->wn_flags >= 0x100) 5346 { 5347 putc(BY_FLAGS2, fd); // <byte> 5348 putc(flags, fd); // <flags> 5349 putc((unsigned)flags >> 8, fd); // <flags2> 5350 } 5351 else 5352 { 5353 putc(BY_FLAGS, fd); // <byte> 5354 putc(flags, fd); // <flags> 5355 } 5356 if (flags & WF_REGION) 5357 putc(np->wn_region, fd); // <region> 5358 if (flags & WF_AFX) 5359 putc(np->wn_affixID, fd); // <affixID> 5360 } 5361 } 5362 } 5363 } 5364 else 5365 { 5366 if (np->wn_child->wn_u1.index != 0 5367 && np->wn_child->wn_u2.wnode != node) 5368 { 5369 // The child is written elsewhere, write the reference. 5370 if (fd != NULL) 5371 { 5372 putc(BY_INDEX, fd); // <byte> 5373 // <nodeidx> 5374 put_bytes(fd, (long_u)np->wn_child->wn_u1.index, 3); 5375 } 5376 } 5377 else if (np->wn_child->wn_u2.wnode == NULL) 5378 // We will write the child below and give it an index. 5379 np->wn_child->wn_u2.wnode = node; 5380 5381 if (fd != NULL) 5382 if (putc(np->wn_byte, fd) == EOF) // <byte> or <xbyte> 5383 { 5384 emsg(_(e_write)); 5385 return 0; 5386 } 5387 } 5388 } 5389 5390 // Space used in the array when reading: one for each sibling and one for 5391 // the count. 5392 newindex += siblingcount + 1; 5393 5394 // Recursively dump the children of each sibling. 5395 for (np = node; np != NULL; np = np->wn_sibling) 5396 if (np->wn_byte != 0 && np->wn_child->wn_u2.wnode == node) 5397 newindex = put_node(fd, np->wn_child, newindex, regionmask, 5398 prefixtree); 5399 5400 return newindex; 5401 } 5402 5403 5404 /* 5405 * ":mkspell [-ascii] outfile infile ..." 5406 * ":mkspell [-ascii] addfile" 5407 */ 5408 void 5409 ex_mkspell(exarg_T *eap) 5410 { 5411 int fcount; 5412 char_u **fnames; 5413 char_u *arg = eap->arg; 5414 int ascii = FALSE; 5415 5416 if (STRNCMP(arg, "-ascii", 6) == 0) 5417 { 5418 ascii = TRUE; 5419 arg = skipwhite(arg + 6); 5420 } 5421 5422 // Expand all the remaining arguments (e.g., $VIMRUNTIME). 5423 if (get_arglist_exp(arg, &fcount, &fnames, FALSE) == OK) 5424 { 5425 mkspell(fcount, fnames, ascii, eap->forceit, FALSE); 5426 FreeWild(fcount, fnames); 5427 } 5428 } 5429 5430 /* 5431 * Create the .sug file. 5432 * Uses the soundfold info in "spin". 5433 * Writes the file with the name "wfname", with ".spl" changed to ".sug". 5434 */ 5435 static void 5436 spell_make_sugfile(spellinfo_T *spin, char_u *wfname) 5437 { 5438 char_u *fname = NULL; 5439 int len; 5440 slang_T *slang; 5441 int free_slang = FALSE; 5442 5443 /* 5444 * Read back the .spl file that was written. This fills the required 5445 * info for soundfolding. This also uses less memory than the 5446 * pointer-linked version of the trie. And it avoids having two versions 5447 * of the code for the soundfolding stuff. 5448 * It might have been done already by spell_reload_one(). 5449 */ 5450 for (slang = first_lang; slang != NULL; slang = slang->sl_next) 5451 if (fullpathcmp(wfname, slang->sl_fname, FALSE, TRUE) == FPC_SAME) 5452 break; 5453 if (slang == NULL) 5454 { 5455 spell_message(spin, (char_u *)_("Reading back spell file...")); 5456 slang = spell_load_file(wfname, NULL, NULL, FALSE); 5457 if (slang == NULL) 5458 return; 5459 free_slang = TRUE; 5460 } 5461 5462 /* 5463 * Clear the info in "spin" that is used. 5464 */ 5465 spin->si_blocks = NULL; 5466 spin->si_blocks_cnt = 0; 5467 spin->si_compress_cnt = 0; // will stay at 0 all the time 5468 spin->si_free_count = 0; 5469 spin->si_first_free = NULL; 5470 spin->si_foldwcount = 0; 5471 5472 /* 5473 * Go through the trie of good words, soundfold each word and add it to 5474 * the soundfold trie. 5475 */ 5476 spell_message(spin, (char_u *)_("Performing soundfolding...")); 5477 if (sug_filltree(spin, slang) == FAIL) 5478 goto theend; 5479 5480 /* 5481 * Create the table which links each soundfold word with a list of the 5482 * good words it may come from. Creates buffer "spin->si_spellbuf". 5483 * This also removes the wordnr from the NUL byte entries to make 5484 * compression possible. 5485 */ 5486 if (sug_maketable(spin) == FAIL) 5487 goto theend; 5488 5489 smsg(_("Number of words after soundfolding: %ld"), 5490 (long)spin->si_spellbuf->b_ml.ml_line_count); 5491 5492 /* 5493 * Compress the soundfold trie. 5494 */ 5495 spell_message(spin, (char_u *)_(msg_compressing)); 5496 wordtree_compress(spin, spin->si_foldroot); 5497 5498 /* 5499 * Write the .sug file. 5500 * Make the file name by changing ".spl" to ".sug". 5501 */ 5502 fname = alloc(MAXPATHL); 5503 if (fname == NULL) 5504 goto theend; 5505 vim_strncpy(fname, wfname, MAXPATHL - 1); 5506 len = (int)STRLEN(fname); 5507 fname[len - 2] = 'u'; 5508 fname[len - 1] = 'g'; 5509 sug_write(spin, fname); 5510 5511 theend: 5512 vim_free(fname); 5513 if (free_slang) 5514 slang_free(slang); 5515 free_blocks(spin->si_blocks); 5516 close_spellbuf(spin->si_spellbuf); 5517 } 5518 5519 /* 5520 * Build the soundfold trie for language "slang". 5521 */ 5522 static int 5523 sug_filltree(spellinfo_T *spin, slang_T *slang) 5524 { 5525 char_u *byts; 5526 idx_T *idxs; 5527 int depth; 5528 idx_T arridx[MAXWLEN]; 5529 int curi[MAXWLEN]; 5530 char_u tword[MAXWLEN]; 5531 char_u tsalword[MAXWLEN]; 5532 int c; 5533 idx_T n; 5534 unsigned words_done = 0; 5535 int wordcount[MAXWLEN]; 5536 5537 // We use si_foldroot for the soundfolded trie. 5538 spin->si_foldroot = wordtree_alloc(spin); 5539 if (spin->si_foldroot == NULL) 5540 return FAIL; 5541 5542 // let tree_add_word() know we're adding to the soundfolded tree 5543 spin->si_sugtree = TRUE; 5544 5545 /* 5546 * Go through the whole case-folded tree, soundfold each word and put it 5547 * in the trie. 5548 */ 5549 byts = slang->sl_fbyts; 5550 idxs = slang->sl_fidxs; 5551 5552 arridx[0] = 0; 5553 curi[0] = 1; 5554 wordcount[0] = 0; 5555 5556 depth = 0; 5557 while (depth >= 0 && !got_int) 5558 { 5559 if (curi[depth] > byts[arridx[depth]]) 5560 { 5561 // Done all bytes at this node, go up one level. 5562 idxs[arridx[depth]] = wordcount[depth]; 5563 if (depth > 0) 5564 wordcount[depth - 1] += wordcount[depth]; 5565 5566 --depth; 5567 line_breakcheck(); 5568 } 5569 else 5570 { 5571 5572 // Do one more byte at this node. 5573 n = arridx[depth] + curi[depth]; 5574 ++curi[depth]; 5575 5576 c = byts[n]; 5577 if (c == 0) 5578 { 5579 // Sound-fold the word. 5580 tword[depth] = NUL; 5581 spell_soundfold(slang, tword, TRUE, tsalword); 5582 5583 // We use the "flags" field for the MSB of the wordnr, 5584 // "region" for the LSB of the wordnr. 5585 if (tree_add_word(spin, tsalword, spin->si_foldroot, 5586 words_done >> 16, words_done & 0xffff, 5587 0) == FAIL) 5588 return FAIL; 5589 5590 ++words_done; 5591 ++wordcount[depth]; 5592 5593 // Reset the block count each time to avoid compression 5594 // kicking in. 5595 spin->si_blocks_cnt = 0; 5596 5597 // Skip over any other NUL bytes (same word with different 5598 // flags). 5599 while (byts[n + 1] == 0) 5600 { 5601 ++n; 5602 ++curi[depth]; 5603 } 5604 } 5605 else 5606 { 5607 // Normal char, go one level deeper. 5608 tword[depth++] = c; 5609 arridx[depth] = idxs[n]; 5610 curi[depth] = 1; 5611 wordcount[depth] = 0; 5612 } 5613 } 5614 } 5615 5616 smsg(_("Total number of words: %d"), words_done); 5617 5618 return OK; 5619 } 5620 5621 /* 5622 * Make the table that links each word in the soundfold trie to the words it 5623 * can be produced from. 5624 * This is not unlike lines in a file, thus use a memfile to be able to access 5625 * the table efficiently. 5626 * Returns FAIL when out of memory. 5627 */ 5628 static int 5629 sug_maketable(spellinfo_T *spin) 5630 { 5631 garray_T ga; 5632 int res = OK; 5633 5634 // Allocate a buffer, open a memline for it and create the swap file 5635 // (uses a temp file, not a .swp file). 5636 spin->si_spellbuf = open_spellbuf(); 5637 if (spin->si_spellbuf == NULL) 5638 return FAIL; 5639 5640 // Use a buffer to store the line info, avoids allocating many small 5641 // pieces of memory. 5642 ga_init2(&ga, 1, 100); 5643 5644 // recursively go through the tree 5645 if (sug_filltable(spin, spin->si_foldroot->wn_sibling, 0, &ga) == -1) 5646 res = FAIL; 5647 5648 ga_clear(&ga); 5649 return res; 5650 } 5651 5652 /* 5653 * Fill the table for one node and its children. 5654 * Returns the wordnr at the start of the node. 5655 * Returns -1 when out of memory. 5656 */ 5657 static int 5658 sug_filltable( 5659 spellinfo_T *spin, 5660 wordnode_T *node, 5661 int startwordnr, 5662 garray_T *gap) // place to store line of numbers 5663 { 5664 wordnode_T *p, *np; 5665 int wordnr = startwordnr; 5666 int nr; 5667 int prev_nr; 5668 5669 for (p = node; p != NULL; p = p->wn_sibling) 5670 { 5671 if (p->wn_byte == NUL) 5672 { 5673 gap->ga_len = 0; 5674 prev_nr = 0; 5675 for (np = p; np != NULL && np->wn_byte == NUL; np = np->wn_sibling) 5676 { 5677 if (ga_grow(gap, 10) == FAIL) 5678 return -1; 5679 5680 nr = (np->wn_flags << 16) + (np->wn_region & 0xffff); 5681 // Compute the offset from the previous nr and store the 5682 // offset in a way that it takes a minimum number of bytes. 5683 // It's a bit like utf-8, but without the need to mark 5684 // following bytes. 5685 nr -= prev_nr; 5686 prev_nr += nr; 5687 gap->ga_len += offset2bytes(nr, 5688 (char_u *)gap->ga_data + gap->ga_len); 5689 } 5690 5691 // add the NUL byte 5692 ((char_u *)gap->ga_data)[gap->ga_len++] = NUL; 5693 5694 if (ml_append_buf(spin->si_spellbuf, (linenr_T)wordnr, 5695 gap->ga_data, gap->ga_len, TRUE) == FAIL) 5696 return -1; 5697 ++wordnr; 5698 5699 // Remove extra NUL entries, we no longer need them. We don't 5700 // bother freeing the nodes, the won't be reused anyway. 5701 while (p->wn_sibling != NULL && p->wn_sibling->wn_byte == NUL) 5702 p->wn_sibling = p->wn_sibling->wn_sibling; 5703 5704 // Clear the flags on the remaining NUL node, so that compression 5705 // works a lot better. 5706 p->wn_flags = 0; 5707 p->wn_region = 0; 5708 } 5709 else 5710 { 5711 wordnr = sug_filltable(spin, p->wn_child, wordnr, gap); 5712 if (wordnr == -1) 5713 return -1; 5714 } 5715 } 5716 return wordnr; 5717 } 5718 5719 /* 5720 * Convert an offset into a minimal number of bytes. 5721 * Similar to utf_char2byters, but use 8 bits in followup bytes and avoid NUL 5722 * bytes. 5723 */ 5724 static int 5725 offset2bytes(int nr, char_u *buf) 5726 { 5727 int rem; 5728 int b1, b2, b3, b4; 5729 5730 // Split the number in parts of base 255. We need to avoid NUL bytes. 5731 b1 = nr % 255 + 1; 5732 rem = nr / 255; 5733 b2 = rem % 255 + 1; 5734 rem = rem / 255; 5735 b3 = rem % 255 + 1; 5736 b4 = rem / 255 + 1; 5737 5738 if (b4 > 1 || b3 > 0x1f) // 4 bytes 5739 { 5740 buf[0] = 0xe0 + b4; 5741 buf[1] = b3; 5742 buf[2] = b2; 5743 buf[3] = b1; 5744 return 4; 5745 } 5746 if (b3 > 1 || b2 > 0x3f ) // 3 bytes 5747 { 5748 buf[0] = 0xc0 + b3; 5749 buf[1] = b2; 5750 buf[2] = b1; 5751 return 3; 5752 } 5753 if (b2 > 1 || b1 > 0x7f ) // 2 bytes 5754 { 5755 buf[0] = 0x80 + b2; 5756 buf[1] = b1; 5757 return 2; 5758 } 5759 // 1 byte 5760 buf[0] = b1; 5761 return 1; 5762 } 5763 5764 /* 5765 * Write the .sug file in "fname". 5766 */ 5767 static void 5768 sug_write(spellinfo_T *spin, char_u *fname) 5769 { 5770 FILE *fd; 5771 wordnode_T *tree; 5772 int nodecount; 5773 int wcount; 5774 char_u *line; 5775 linenr_T lnum; 5776 int len; 5777 5778 // Create the file. Note that an existing file is silently overwritten! 5779 fd = mch_fopen((char *)fname, "w"); 5780 if (fd == NULL) 5781 { 5782 semsg(_(e_notopen), fname); 5783 return; 5784 } 5785 5786 vim_snprintf((char *)IObuff, IOSIZE, 5787 _("Writing suggestion file %s..."), fname); 5788 spell_message(spin, IObuff); 5789 5790 /* 5791 * <SUGHEADER>: <fileID> <versionnr> <timestamp> 5792 */ 5793 if (fwrite(VIMSUGMAGIC, VIMSUGMAGICL, (size_t)1, fd) != 1) // <fileID> 5794 { 5795 emsg(_(e_write)); 5796 goto theend; 5797 } 5798 putc(VIMSUGVERSION, fd); // <versionnr> 5799 5800 // Write si_sugtime to the file. 5801 put_time(fd, spin->si_sugtime); // <timestamp> 5802 5803 /* 5804 * <SUGWORDTREE> 5805 */ 5806 spin->si_memtot = 0; 5807 tree = spin->si_foldroot->wn_sibling; 5808 5809 // Clear the index and wnode fields in the tree. 5810 clear_node(tree); 5811 5812 // Count the number of nodes. Needed to be able to allocate the 5813 // memory when reading the nodes. Also fills in index for shared 5814 // nodes. 5815 nodecount = put_node(NULL, tree, 0, 0, FALSE); 5816 5817 // number of nodes in 4 bytes 5818 put_bytes(fd, (long_u)nodecount, 4); // <nodecount> 5819 spin->si_memtot += nodecount + nodecount * sizeof(int); 5820 5821 // Write the nodes. 5822 (void)put_node(fd, tree, 0, 0, FALSE); 5823 5824 /* 5825 * <SUGTABLE>: <sugwcount> <sugline> ... 5826 */ 5827 wcount = spin->si_spellbuf->b_ml.ml_line_count; 5828 put_bytes(fd, (long_u)wcount, 4); // <sugwcount> 5829 5830 for (lnum = 1; lnum <= (linenr_T)wcount; ++lnum) 5831 { 5832 // <sugline>: <sugnr> ... NUL 5833 line = ml_get_buf(spin->si_spellbuf, lnum, FALSE); 5834 len = (int)STRLEN(line) + 1; 5835 if (fwrite(line, (size_t)len, (size_t)1, fd) == 0) 5836 { 5837 emsg(_(e_write)); 5838 goto theend; 5839 } 5840 spin->si_memtot += len; 5841 } 5842 5843 // Write another byte to check for errors. 5844 if (putc(0, fd) == EOF) 5845 emsg(_(e_write)); 5846 5847 vim_snprintf((char *)IObuff, IOSIZE, 5848 _("Estimated runtime memory use: %d bytes"), spin->si_memtot); 5849 spell_message(spin, IObuff); 5850 5851 theend: 5852 // close the file 5853 fclose(fd); 5854 } 5855 5856 5857 /* 5858 * Create a Vim spell file from one or more word lists. 5859 * "fnames[0]" is the output file name. 5860 * "fnames[fcount - 1]" is the last input file name. 5861 * Exception: when "fnames[0]" ends in ".add" it's used as the input file name 5862 * and ".spl" is appended to make the output file name. 5863 */ 5864 void 5865 mkspell( 5866 int fcount, 5867 char_u **fnames, 5868 int ascii, // -ascii argument given 5869 int over_write, // overwrite existing output file 5870 int added_word) // invoked through "zg" 5871 { 5872 char_u *fname = NULL; 5873 char_u *wfname; 5874 char_u **innames; 5875 int incount; 5876 afffile_T *(afile[MAXREGIONS]); 5877 int i; 5878 int len; 5879 stat_T st; 5880 int error = FALSE; 5881 spellinfo_T spin; 5882 5883 vim_memset(&spin, 0, sizeof(spin)); 5884 spin.si_verbose = !added_word; 5885 spin.si_ascii = ascii; 5886 spin.si_followup = TRUE; 5887 spin.si_rem_accents = TRUE; 5888 ga_init2(&spin.si_rep, (int)sizeof(fromto_T), 20); 5889 ga_init2(&spin.si_repsal, (int)sizeof(fromto_T), 20); 5890 ga_init2(&spin.si_sal, (int)sizeof(fromto_T), 20); 5891 ga_init2(&spin.si_map, (int)sizeof(char_u), 100); 5892 ga_init2(&spin.si_comppat, (int)sizeof(char_u *), 20); 5893 ga_init2(&spin.si_prefcond, (int)sizeof(char_u *), 50); 5894 hash_init(&spin.si_commonwords); 5895 spin.si_newcompID = 127; // start compound ID at first maximum 5896 5897 // default: fnames[0] is output file, following are input files 5898 innames = &fnames[1]; 5899 incount = fcount - 1; 5900 5901 wfname = alloc(MAXPATHL); 5902 if (wfname == NULL) 5903 return; 5904 5905 if (fcount >= 1) 5906 { 5907 len = (int)STRLEN(fnames[0]); 5908 if (fcount == 1 && len > 4 && STRCMP(fnames[0] + len - 4, ".add") == 0) 5909 { 5910 // For ":mkspell path/en.latin1.add" output file is 5911 // "path/en.latin1.add.spl". 5912 innames = &fnames[0]; 5913 incount = 1; 5914 vim_snprintf((char *)wfname, MAXPATHL, "%s.spl", fnames[0]); 5915 } 5916 else if (fcount == 1) 5917 { 5918 // For ":mkspell path/vim" output file is "path/vim.latin1.spl". 5919 innames = &fnames[0]; 5920 incount = 1; 5921 vim_snprintf((char *)wfname, MAXPATHL, SPL_FNAME_TMPL, 5922 fnames[0], spin.si_ascii ? (char_u *)"ascii" : spell_enc()); 5923 } 5924 else if (len > 4 && STRCMP(fnames[0] + len - 4, ".spl") == 0) 5925 { 5926 // Name ends in ".spl", use as the file name. 5927 vim_strncpy(wfname, fnames[0], MAXPATHL - 1); 5928 } 5929 else 5930 // Name should be language, make the file name from it. 5931 vim_snprintf((char *)wfname, MAXPATHL, SPL_FNAME_TMPL, 5932 fnames[0], spin.si_ascii ? (char_u *)"ascii" : spell_enc()); 5933 5934 // Check for .ascii.spl. 5935 if (strstr((char *)gettail(wfname), SPL_FNAME_ASCII) != NULL) 5936 spin.si_ascii = TRUE; 5937 5938 // Check for .add.spl. 5939 if (strstr((char *)gettail(wfname), SPL_FNAME_ADD) != NULL) 5940 spin.si_add = TRUE; 5941 } 5942 5943 if (incount <= 0) 5944 emsg(_(e_invarg)); // need at least output and input names 5945 else if (vim_strchr(gettail(wfname), '_') != NULL) 5946 emsg(_("E751: Output file name must not have region name")); 5947 else if (incount > MAXREGIONS) 5948 semsg(_("E754: Only up to %d regions supported"), MAXREGIONS); 5949 else 5950 { 5951 // Check for overwriting before doing things that may take a lot of 5952 // time. 5953 if (!over_write && mch_stat((char *)wfname, &st) >= 0) 5954 { 5955 emsg(_(e_exists)); 5956 goto theend; 5957 } 5958 if (mch_isdir(wfname)) 5959 { 5960 semsg(_(e_isadir2), wfname); 5961 goto theend; 5962 } 5963 5964 fname = alloc(MAXPATHL); 5965 if (fname == NULL) 5966 goto theend; 5967 5968 /* 5969 * Init the aff and dic pointers. 5970 * Get the region names if there are more than 2 arguments. 5971 */ 5972 for (i = 0; i < incount; ++i) 5973 { 5974 afile[i] = NULL; 5975 5976 if (incount > 1) 5977 { 5978 len = (int)STRLEN(innames[i]); 5979 if (STRLEN(gettail(innames[i])) < 5 5980 || innames[i][len - 3] != '_') 5981 { 5982 semsg(_("E755: Invalid region in %s"), innames[i]); 5983 goto theend; 5984 } 5985 spin.si_region_name[i * 2] = TOLOWER_ASC(innames[i][len - 2]); 5986 spin.si_region_name[i * 2 + 1] = 5987 TOLOWER_ASC(innames[i][len - 1]); 5988 } 5989 } 5990 spin.si_region_count = incount; 5991 5992 spin.si_foldroot = wordtree_alloc(&spin); 5993 spin.si_keeproot = wordtree_alloc(&spin); 5994 spin.si_prefroot = wordtree_alloc(&spin); 5995 if (spin.si_foldroot == NULL 5996 || spin.si_keeproot == NULL 5997 || spin.si_prefroot == NULL) 5998 { 5999 free_blocks(spin.si_blocks); 6000 goto theend; 6001 } 6002 6003 // When not producing a .add.spl file clear the character table when 6004 // we encounter one in the .aff file. This means we dump the current 6005 // one in the .spl file if the .aff file doesn't define one. That's 6006 // better than guessing the contents, the table will match a 6007 // previously loaded spell file. 6008 if (!spin.si_add) 6009 spin.si_clear_chartab = TRUE; 6010 6011 /* 6012 * Read all the .aff and .dic files. 6013 * Text is converted to 'encoding'. 6014 * Words are stored in the case-folded and keep-case trees. 6015 */ 6016 for (i = 0; i < incount && !error; ++i) 6017 { 6018 spin.si_conv.vc_type = CONV_NONE; 6019 spin.si_region = 1 << i; 6020 6021 vim_snprintf((char *)fname, MAXPATHL, "%s.aff", innames[i]); 6022 if (mch_stat((char *)fname, &st) >= 0) 6023 { 6024 // Read the .aff file. Will init "spin->si_conv" based on the 6025 // "SET" line. 6026 afile[i] = spell_read_aff(&spin, fname); 6027 if (afile[i] == NULL) 6028 error = TRUE; 6029 else 6030 { 6031 // Read the .dic file and store the words in the trees. 6032 vim_snprintf((char *)fname, MAXPATHL, "%s.dic", 6033 innames[i]); 6034 if (spell_read_dic(&spin, fname, afile[i]) == FAIL) 6035 error = TRUE; 6036 } 6037 } 6038 else 6039 { 6040 // No .aff file, try reading the file as a word list. Store 6041 // the words in the trees. 6042 if (spell_read_wordfile(&spin, innames[i]) == FAIL) 6043 error = TRUE; 6044 } 6045 6046 // Free any conversion stuff. 6047 convert_setup(&spin.si_conv, NULL, NULL); 6048 } 6049 6050 if (spin.si_compflags != NULL && spin.si_nobreak) 6051 msg(_("Warning: both compounding and NOBREAK specified")); 6052 6053 if (!error && !got_int) 6054 { 6055 /* 6056 * Combine tails in the tree. 6057 */ 6058 spell_message(&spin, (char_u *)_(msg_compressing)); 6059 wordtree_compress(&spin, spin.si_foldroot); 6060 wordtree_compress(&spin, spin.si_keeproot); 6061 wordtree_compress(&spin, spin.si_prefroot); 6062 } 6063 6064 if (!error && !got_int) 6065 { 6066 /* 6067 * Write the info in the spell file. 6068 */ 6069 vim_snprintf((char *)IObuff, IOSIZE, 6070 _("Writing spell file %s..."), wfname); 6071 spell_message(&spin, IObuff); 6072 6073 error = write_vim_spell(&spin, wfname) == FAIL; 6074 6075 spell_message(&spin, (char_u *)_("Done!")); 6076 vim_snprintf((char *)IObuff, IOSIZE, 6077 _("Estimated runtime memory use: %d bytes"), spin.si_memtot); 6078 spell_message(&spin, IObuff); 6079 6080 /* 6081 * If the file is loaded need to reload it. 6082 */ 6083 if (!error) 6084 spell_reload_one(wfname, added_word); 6085 } 6086 6087 // Free the allocated memory. 6088 ga_clear(&spin.si_rep); 6089 ga_clear(&spin.si_repsal); 6090 ga_clear(&spin.si_sal); 6091 ga_clear(&spin.si_map); 6092 ga_clear(&spin.si_comppat); 6093 ga_clear(&spin.si_prefcond); 6094 hash_clear_all(&spin.si_commonwords, 0); 6095 6096 // Free the .aff file structures. 6097 for (i = 0; i < incount; ++i) 6098 if (afile[i] != NULL) 6099 spell_free_aff(afile[i]); 6100 6101 // Free all the bits and pieces at once. 6102 free_blocks(spin.si_blocks); 6103 6104 /* 6105 * If there is soundfolding info and no NOSUGFILE item create the 6106 * .sug file with the soundfolded word trie. 6107 */ 6108 if (spin.si_sugtime != 0 && !error && !got_int) 6109 spell_make_sugfile(&spin, wfname); 6110 6111 } 6112 6113 theend: 6114 vim_free(fname); 6115 vim_free(wfname); 6116 } 6117 6118 /* 6119 * Display a message for spell file processing when 'verbose' is set or using 6120 * ":mkspell". "str" can be IObuff. 6121 */ 6122 static void 6123 spell_message(spellinfo_T *spin, char_u *str) 6124 { 6125 if (spin->si_verbose || p_verbose > 2) 6126 { 6127 if (!spin->si_verbose) 6128 verbose_enter(); 6129 msg((char *)str); 6130 out_flush(); 6131 if (!spin->si_verbose) 6132 verbose_leave(); 6133 } 6134 } 6135 6136 /* 6137 * ":[count]spellgood {word}" 6138 * ":[count]spellwrong {word}" 6139 * ":[count]spellundo {word}" 6140 * ":[count]spellrare {word}" 6141 */ 6142 void 6143 ex_spell(exarg_T *eap) 6144 { 6145 spell_add_word(eap->arg, (int)STRLEN(eap->arg), 6146 eap->cmdidx == CMD_spellwrong ? SPELL_ADD_BAD : 6147 eap->cmdidx == CMD_spellrare ? SPELL_ADD_RARE : SPELL_ADD_GOOD, 6148 eap->forceit ? 0 : (int)eap->line2, 6149 eap->cmdidx == CMD_spellundo); 6150 } 6151 6152 /* 6153 * Add "word[len]" to 'spellfile' as a good, rare or bad word. 6154 */ 6155 void 6156 spell_add_word( 6157 char_u *word, 6158 int len, 6159 int what, // SPELL_ADD_ values 6160 int idx, // "zG" and "zW": zero, otherwise index in 6161 // 'spellfile' 6162 int undo) // TRUE for "zug", "zuG", "zuw" and "zuW" 6163 { 6164 FILE *fd = NULL; 6165 buf_T *buf = NULL; 6166 int new_spf = FALSE; 6167 char_u *fname; 6168 char_u *fnamebuf = NULL; 6169 char_u line[MAXWLEN * 2]; 6170 long fpos, fpos_next = 0; 6171 int i; 6172 char_u *spf; 6173 6174 if (idx == 0) // use internal wordlist 6175 { 6176 if (int_wordlist == NULL) 6177 { 6178 int_wordlist = vim_tempname('s', FALSE); 6179 if (int_wordlist == NULL) 6180 return; 6181 } 6182 fname = int_wordlist; 6183 } 6184 else 6185 { 6186 // If 'spellfile' isn't set figure out a good default value. 6187 if (*curwin->w_s->b_p_spf == NUL) 6188 { 6189 init_spellfile(); 6190 new_spf = TRUE; 6191 } 6192 6193 if (*curwin->w_s->b_p_spf == NUL) 6194 { 6195 semsg(_(e_notset), "spellfile"); 6196 return; 6197 } 6198 fnamebuf = alloc(MAXPATHL); 6199 if (fnamebuf == NULL) 6200 return; 6201 6202 for (spf = curwin->w_s->b_p_spf, i = 1; *spf != NUL; ++i) 6203 { 6204 copy_option_part(&spf, fnamebuf, MAXPATHL, ","); 6205 if (i == idx) 6206 break; 6207 if (*spf == NUL) 6208 { 6209 semsg(_("E765: 'spellfile' does not have %d entries"), idx); 6210 vim_free(fnamebuf); 6211 return; 6212 } 6213 } 6214 6215 // Check that the user isn't editing the .add file somewhere. 6216 buf = buflist_findname_exp(fnamebuf); 6217 if (buf != NULL && buf->b_ml.ml_mfp == NULL) 6218 buf = NULL; 6219 if (buf != NULL && bufIsChanged(buf)) 6220 { 6221 emsg(_(e_bufloaded)); 6222 vim_free(fnamebuf); 6223 return; 6224 } 6225 6226 fname = fnamebuf; 6227 } 6228 6229 if (what == SPELL_ADD_BAD || undo) 6230 { 6231 // When the word appears as good word we need to remove that one, 6232 // since its flags sort before the one with WF_BANNED. 6233 fd = mch_fopen((char *)fname, "r"); 6234 if (fd != NULL) 6235 { 6236 while (!vim_fgets(line, MAXWLEN * 2, fd)) 6237 { 6238 fpos = fpos_next; 6239 fpos_next = ftell(fd); 6240 if (STRNCMP(word, line, len) == 0 6241 && (line[len] == '/' || line[len] < ' ')) 6242 { 6243 // Found duplicate word. Remove it by writing a '#' at 6244 // the start of the line. Mixing reading and writing 6245 // doesn't work for all systems, close the file first. 6246 fclose(fd); 6247 fd = mch_fopen((char *)fname, "r+"); 6248 if (fd == NULL) 6249 break; 6250 if (fseek(fd, fpos, SEEK_SET) == 0) 6251 { 6252 fputc('#', fd); 6253 if (undo) 6254 { 6255 home_replace(NULL, fname, NameBuff, MAXPATHL, TRUE); 6256 smsg(_("Word '%.*s' removed from %s"), 6257 len, word, NameBuff); 6258 } 6259 } 6260 fseek(fd, fpos_next, SEEK_SET); 6261 } 6262 } 6263 if (fd != NULL) 6264 fclose(fd); 6265 } 6266 } 6267 6268 if (!undo) 6269 { 6270 fd = mch_fopen((char *)fname, "a"); 6271 if (fd == NULL && new_spf) 6272 { 6273 char_u *p; 6274 6275 // We just initialized the 'spellfile' option and can't open the 6276 // file. We may need to create the "spell" directory first. We 6277 // already checked the runtime directory is writable in 6278 // init_spellfile(). 6279 if (!dir_of_file_exists(fname) && (p = gettail_sep(fname)) != fname) 6280 { 6281 int c = *p; 6282 6283 // The directory doesn't exist. Try creating it and opening 6284 // the file again. 6285 *p = NUL; 6286 vim_mkdir(fname, 0755); 6287 *p = c; 6288 fd = mch_fopen((char *)fname, "a"); 6289 } 6290 } 6291 6292 if (fd == NULL) 6293 semsg(_(e_notopen), fname); 6294 else 6295 { 6296 if (what == SPELL_ADD_BAD) 6297 fprintf(fd, "%.*s/!\n", len, word); 6298 else if (what == SPELL_ADD_RARE) 6299 fprintf(fd, "%.*s/?\n", len, word); 6300 else 6301 fprintf(fd, "%.*s\n", len, word); 6302 fclose(fd); 6303 6304 home_replace(NULL, fname, NameBuff, MAXPATHL, TRUE); 6305 smsg(_("Word '%.*s' added to %s"), len, word, NameBuff); 6306 } 6307 } 6308 6309 if (fd != NULL) 6310 { 6311 // Update the .add.spl file. 6312 mkspell(1, &fname, FALSE, TRUE, TRUE); 6313 6314 // If the .add file is edited somewhere, reload it. 6315 if (buf != NULL) 6316 buf_reload(buf, buf->b_orig_mode); 6317 6318 redraw_all_later(SOME_VALID); 6319 } 6320 vim_free(fnamebuf); 6321 } 6322 6323 /* 6324 * Initialize 'spellfile' for the current buffer. 6325 */ 6326 static void 6327 init_spellfile(void) 6328 { 6329 char_u *buf; 6330 int l; 6331 char_u *fname; 6332 char_u *rtp; 6333 char_u *lend; 6334 int aspath = FALSE; 6335 char_u *lstart = curbuf->b_s.b_p_spl; 6336 6337 if (*curwin->w_s->b_p_spl != NUL && curwin->w_s->b_langp.ga_len > 0) 6338 { 6339 buf = alloc(MAXPATHL); 6340 if (buf == NULL) 6341 return; 6342 6343 // Find the end of the language name. Exclude the region. If there 6344 // is a path separator remember the start of the tail. 6345 for (lend = curwin->w_s->b_p_spl; *lend != NUL 6346 && vim_strchr((char_u *)",._", *lend) == NULL; ++lend) 6347 if (vim_ispathsep(*lend)) 6348 { 6349 aspath = TRUE; 6350 lstart = lend + 1; 6351 } 6352 6353 // Loop over all entries in 'runtimepath'. Use the first one where we 6354 // are allowed to write. 6355 rtp = p_rtp; 6356 while (*rtp != NUL) 6357 { 6358 if (aspath) 6359 // Use directory of an entry with path, e.g., for 6360 // "/dir/lg.utf-8.spl" use "/dir". 6361 vim_strncpy(buf, curbuf->b_s.b_p_spl, 6362 lstart - curbuf->b_s.b_p_spl - 1); 6363 else 6364 // Copy the path from 'runtimepath' to buf[]. 6365 copy_option_part(&rtp, buf, MAXPATHL, ","); 6366 if (filewritable(buf) == 2) 6367 { 6368 // Use the first language name from 'spelllang' and the 6369 // encoding used in the first loaded .spl file. 6370 if (aspath) 6371 vim_strncpy(buf, curbuf->b_s.b_p_spl, 6372 lend - curbuf->b_s.b_p_spl); 6373 else 6374 { 6375 // Create the "spell" directory if it doesn't exist yet. 6376 l = (int)STRLEN(buf); 6377 vim_snprintf((char *)buf + l, MAXPATHL - l, "/spell"); 6378 if (filewritable(buf) != 2) 6379 vim_mkdir(buf, 0755); 6380 6381 l = (int)STRLEN(buf); 6382 vim_snprintf((char *)buf + l, MAXPATHL - l, 6383 "/%.*s", (int)(lend - lstart), lstart); 6384 } 6385 l = (int)STRLEN(buf); 6386 fname = LANGP_ENTRY(curwin->w_s->b_langp, 0) 6387 ->lp_slang->sl_fname; 6388 vim_snprintf((char *)buf + l, MAXPATHL - l, ".%s.add", 6389 fname != NULL 6390 && strstr((char *)gettail(fname), ".ascii.") != NULL 6391 ? (char_u *)"ascii" : spell_enc()); 6392 set_option_value((char_u *)"spellfile", 0L, buf, OPT_LOCAL); 6393 break; 6394 } 6395 aspath = FALSE; 6396 } 6397 6398 vim_free(buf); 6399 } 6400 } 6401 6402 6403 6404 /* 6405 * Set the spell character tables from strings in the affix file. 6406 */ 6407 static int 6408 set_spell_chartab(char_u *fol, char_u *low, char_u *upp) 6409 { 6410 // We build the new tables here first, so that we can compare with the 6411 // previous one. 6412 spelltab_T new_st; 6413 char_u *pf = fol, *pl = low, *pu = upp; 6414 int f, l, u; 6415 6416 clear_spell_chartab(&new_st); 6417 6418 while (*pf != NUL) 6419 { 6420 if (*pl == NUL || *pu == NUL) 6421 { 6422 emsg(_(e_affform)); 6423 return FAIL; 6424 } 6425 f = mb_ptr2char_adv(&pf); 6426 l = mb_ptr2char_adv(&pl); 6427 u = mb_ptr2char_adv(&pu); 6428 6429 // Every character that appears is a word character. 6430 if (f < 256) 6431 new_st.st_isw[f] = TRUE; 6432 if (l < 256) 6433 new_st.st_isw[l] = TRUE; 6434 if (u < 256) 6435 new_st.st_isw[u] = TRUE; 6436 6437 // if "LOW" and "FOL" are not the same the "LOW" char needs 6438 // case-folding 6439 if (l < 256 && l != f) 6440 { 6441 if (f >= 256) 6442 { 6443 emsg(_(e_affrange)); 6444 return FAIL; 6445 } 6446 new_st.st_fold[l] = f; 6447 } 6448 6449 // if "UPP" and "FOL" are not the same the "UPP" char needs 6450 // case-folding, it's upper case and the "UPP" is the upper case of 6451 // "FOL" . 6452 if (u < 256 && u != f) 6453 { 6454 if (f >= 256) 6455 { 6456 emsg(_(e_affrange)); 6457 return FAIL; 6458 } 6459 new_st.st_fold[u] = f; 6460 new_st.st_isu[u] = TRUE; 6461 new_st.st_upper[f] = u; 6462 } 6463 } 6464 6465 if (*pl != NUL || *pu != NUL) 6466 { 6467 emsg(_(e_affform)); 6468 return FAIL; 6469 } 6470 6471 return set_spell_finish(&new_st); 6472 } 6473 6474 /* 6475 * Set the spell character tables from strings in the .spl file. 6476 */ 6477 static void 6478 set_spell_charflags( 6479 char_u *flags, 6480 int cnt, // length of "flags" 6481 char_u *fol) 6482 { 6483 // We build the new tables here first, so that we can compare with the 6484 // previous one. 6485 spelltab_T new_st; 6486 int i; 6487 char_u *p = fol; 6488 int c; 6489 6490 clear_spell_chartab(&new_st); 6491 6492 for (i = 0; i < 128; ++i) 6493 { 6494 if (i < cnt) 6495 { 6496 new_st.st_isw[i + 128] = (flags[i] & CF_WORD) != 0; 6497 new_st.st_isu[i + 128] = (flags[i] & CF_UPPER) != 0; 6498 } 6499 6500 if (*p != NUL) 6501 { 6502 c = mb_ptr2char_adv(&p); 6503 new_st.st_fold[i + 128] = c; 6504 if (i + 128 != c && new_st.st_isu[i + 128] && c < 256) 6505 new_st.st_upper[c] = i + 128; 6506 } 6507 } 6508 6509 (void)set_spell_finish(&new_st); 6510 } 6511 6512 static int 6513 set_spell_finish(spelltab_T *new_st) 6514 { 6515 int i; 6516 6517 if (did_set_spelltab) 6518 { 6519 // check that it's the same table 6520 for (i = 0; i < 256; ++i) 6521 { 6522 if (spelltab.st_isw[i] != new_st->st_isw[i] 6523 || spelltab.st_isu[i] != new_st->st_isu[i] 6524 || spelltab.st_fold[i] != new_st->st_fold[i] 6525 || spelltab.st_upper[i] != new_st->st_upper[i]) 6526 { 6527 emsg(_("E763: Word characters differ between spell files")); 6528 return FAIL; 6529 } 6530 } 6531 } 6532 else 6533 { 6534 // copy the new spelltab into the one being used 6535 spelltab = *new_st; 6536 did_set_spelltab = TRUE; 6537 } 6538 6539 return OK; 6540 } 6541 6542 /* 6543 * Write the table with prefix conditions to the .spl file. 6544 * When "fd" is NULL only count the length of what is written. 6545 */ 6546 static int 6547 write_spell_prefcond(FILE *fd, garray_T *gap) 6548 { 6549 int i; 6550 char_u *p; 6551 int len; 6552 int totlen; 6553 size_t x = 1; // collect return value of fwrite() 6554 6555 if (fd != NULL) 6556 put_bytes(fd, (long_u)gap->ga_len, 2); // <prefcondcnt> 6557 6558 totlen = 2 + gap->ga_len; // length of <prefcondcnt> and <condlen> bytes 6559 6560 for (i = 0; i < gap->ga_len; ++i) 6561 { 6562 // <prefcond> : <condlen> <condstr> 6563 p = ((char_u **)gap->ga_data)[i]; 6564 if (p != NULL) 6565 { 6566 len = (int)STRLEN(p); 6567 if (fd != NULL) 6568 { 6569 fputc(len, fd); 6570 x &= fwrite(p, (size_t)len, (size_t)1, fd); 6571 } 6572 totlen += len; 6573 } 6574 else if (fd != NULL) 6575 fputc(0, fd); 6576 } 6577 6578 return totlen; 6579 } 6580 6581 6582 /* 6583 * Use map string "map" for languages "lp". 6584 */ 6585 static void 6586 set_map_str(slang_T *lp, char_u *map) 6587 { 6588 char_u *p; 6589 int headc = 0; 6590 int c; 6591 int i; 6592 6593 if (*map == NUL) 6594 { 6595 lp->sl_has_map = FALSE; 6596 return; 6597 } 6598 lp->sl_has_map = TRUE; 6599 6600 // Init the array and hash tables empty. 6601 for (i = 0; i < 256; ++i) 6602 lp->sl_map_array[i] = 0; 6603 hash_init(&lp->sl_map_hash); 6604 6605 /* 6606 * The similar characters are stored separated with slashes: 6607 * "aaa/bbb/ccc/". Fill sl_map_array[c] with the character before c and 6608 * before the same slash. For characters above 255 sl_map_hash is used. 6609 */ 6610 for (p = map; *p != NUL; ) 6611 { 6612 c = mb_cptr2char_adv(&p); 6613 if (c == '/') 6614 headc = 0; 6615 else 6616 { 6617 if (headc == 0) 6618 headc = c; 6619 6620 // Characters above 255 don't fit in sl_map_array[], put them in 6621 // the hash table. Each entry is the char, a NUL the headchar and 6622 // a NUL. 6623 if (c >= 256) 6624 { 6625 int cl = mb_char2len(c); 6626 int headcl = mb_char2len(headc); 6627 char_u *b; 6628 hash_T hash; 6629 hashitem_T *hi; 6630 6631 b = alloc(cl + headcl + 2); 6632 if (b == NULL) 6633 return; 6634 mb_char2bytes(c, b); 6635 b[cl] = NUL; 6636 mb_char2bytes(headc, b + cl + 1); 6637 b[cl + 1 + headcl] = NUL; 6638 hash = hash_hash(b); 6639 hi = hash_lookup(&lp->sl_map_hash, b, hash); 6640 if (HASHITEM_EMPTY(hi)) 6641 hash_add_item(&lp->sl_map_hash, hi, b, hash); 6642 else 6643 { 6644 // This should have been checked when generating the .spl 6645 // file. 6646 emsg(_("E783: duplicate char in MAP entry")); 6647 vim_free(b); 6648 } 6649 } 6650 else 6651 lp->sl_map_array[c] = headc; 6652 } 6653 } 6654 } 6655 6656 6657 #endif // FEAT_SPELL 6658