1 /* vi:set ts=8 sts=4 sw=4 noet: 2 * 3 * VIM - Vi IMproved by Bram Moolenaar 4 * 5 * Do ":help uganda" in Vim to read copying and usage conditions. 6 * Do ":help credits" in Vim to see a list of people who contributed. 7 * See README.txt for an overview of the Vim source code. 8 */ 9 10 /* 11 * spellfile.c: code for reading and writing spell files. 12 * 13 * See spell.c for information about spell checking. 14 */ 15 16 /* 17 * Vim spell file format: <HEADER> 18 * <SECTIONS> 19 * <LWORDTREE> 20 * <KWORDTREE> 21 * <PREFIXTREE> 22 * 23 * <HEADER>: <fileID> <versionnr> 24 * 25 * <fileID> 8 bytes "VIMspell" 26 * <versionnr> 1 byte VIMSPELLVERSION 27 * 28 * 29 * Sections make it possible to add information to the .spl file without 30 * making it incompatible with previous versions. There are two kinds of 31 * sections: 32 * 1. Not essential for correct spell checking. E.g. for making suggestions. 33 * These are skipped when not supported. 34 * 2. Optional information, but essential for spell checking when present. 35 * E.g. conditions for affixes. When this section is present but not 36 * supported an error message is given. 37 * 38 * <SECTIONS>: <section> ... <sectionend> 39 * 40 * <section>: <sectionID> <sectionflags> <sectionlen> (section contents) 41 * 42 * <sectionID> 1 byte number from 0 to 254 identifying the section 43 * 44 * <sectionflags> 1 byte SNF_REQUIRED: this section is required for correct 45 * spell checking 46 * 47 * <sectionlen> 4 bytes length of section contents, MSB first 48 * 49 * <sectionend> 1 byte SN_END 50 * 51 * 52 * sectionID == SN_INFO: <infotext> 53 * <infotext> N bytes free format text with spell file info (version, 54 * website, etc) 55 * 56 * sectionID == SN_REGION: <regionname> ... 57 * <regionname> 2 bytes Up to MAXREGIONS region names: ca, au, etc. Lower 58 * case. First <regionname> is region 1. 59 * 60 * sectionID == SN_CHARFLAGS: <charflagslen> <charflags> 61 * <folcharslen> <folchars> 62 * <charflagslen> 1 byte Number of bytes in <charflags> (should be 128). 63 * <charflags> N bytes List of flags (first one is for character 128): 64 * 0x01 word character CF_WORD 65 * 0x02 upper-case character CF_UPPER 66 * <folcharslen> 2 bytes Number of bytes in <folchars>. 67 * <folchars> N bytes Folded characters, first one is for character 128. 68 * 69 * sectionID == SN_MIDWORD: <midword> 70 * <midword> N bytes Characters that are word characters only when used 71 * in the middle of a word. 72 * 73 * sectionID == SN_PREFCOND: <prefcondcnt> <prefcond> ... 74 * <prefcondcnt> 2 bytes Number of <prefcond> items following. 75 * <prefcond> : <condlen> <condstr> 76 * <condlen> 1 byte Length of <condstr>. 77 * <condstr> N bytes Condition for the prefix. 78 * 79 * sectionID == SN_REP: <repcount> <rep> ... 80 * <repcount> 2 bytes number of <rep> items, MSB first. 81 * <rep> : <repfromlen> <repfrom> <reptolen> <repto> 82 * <repfromlen> 1 byte length of <repfrom> 83 * <repfrom> N bytes "from" part of replacement 84 * <reptolen> 1 byte length of <repto> 85 * <repto> N bytes "to" part of replacement 86 * 87 * sectionID == SN_REPSAL: <repcount> <rep> ... 88 * just like SN_REP but for soundfolded words 89 * 90 * sectionID == SN_SAL: <salflags> <salcount> <sal> ... 91 * <salflags> 1 byte flags for soundsalike conversion: 92 * SAL_F0LLOWUP 93 * SAL_COLLAPSE 94 * SAL_REM_ACCENTS 95 * <salcount> 2 bytes number of <sal> items following 96 * <sal> : <salfromlen> <salfrom> <saltolen> <salto> 97 * <salfromlen> 1 byte length of <salfrom> 98 * <salfrom> N bytes "from" part of soundsalike 99 * <saltolen> 1 byte length of <salto> 100 * <salto> N bytes "to" part of soundsalike 101 * 102 * sectionID == SN_SOFO: <sofofromlen> <sofofrom> <sofotolen> <sofoto> 103 * <sofofromlen> 2 bytes length of <sofofrom> 104 * <sofofrom> N bytes "from" part of soundfold 105 * <sofotolen> 2 bytes length of <sofoto> 106 * <sofoto> N bytes "to" part of soundfold 107 * 108 * sectionID == SN_SUGFILE: <timestamp> 109 * <timestamp> 8 bytes time in seconds that must match with .sug file 110 * 111 * sectionID == SN_NOSPLITSUGS: nothing 112 * 113 * sectionID == SN_NOCOMPOUNDSUGS: nothing 114 * 115 * sectionID == SN_WORDS: <word> ... 116 * <word> N bytes NUL terminated common word 117 * 118 * sectionID == SN_MAP: <mapstr> 119 * <mapstr> N bytes String with sequences of similar characters, 120 * separated by slashes. 121 * 122 * sectionID == SN_COMPOUND: <compmax> <compminlen> <compsylmax> <compoptions> 123 * <comppatcount> <comppattern> ... <compflags> 124 * <compmax> 1 byte Maximum nr of words in compound word. 125 * <compminlen> 1 byte Minimal word length for compounding. 126 * <compsylmax> 1 byte Maximum nr of syllables in compound word. 127 * <compoptions> 2 bytes COMP_ flags. 128 * <comppatcount> 2 bytes number of <comppattern> following 129 * <compflags> N bytes Flags from COMPOUNDRULE items, separated by 130 * slashes. 131 * 132 * <comppattern>: <comppatlen> <comppattext> 133 * <comppatlen> 1 byte length of <comppattext> 134 * <comppattext> N bytes end or begin chars from CHECKCOMPOUNDPATTERN 135 * 136 * sectionID == SN_NOBREAK: (empty, its presence is what matters) 137 * 138 * sectionID == SN_SYLLABLE: <syllable> 139 * <syllable> N bytes String from SYLLABLE item. 140 * 141 * <LWORDTREE>: <wordtree> 142 * 143 * <KWORDTREE>: <wordtree> 144 * 145 * <PREFIXTREE>: <wordtree> 146 * 147 * 148 * <wordtree>: <nodecount> <nodedata> ... 149 * 150 * <nodecount> 4 bytes Number of nodes following. MSB first. 151 * 152 * <nodedata>: <siblingcount> <sibling> ... 153 * 154 * <siblingcount> 1 byte Number of siblings in this node. The siblings 155 * follow in sorted order. 156 * 157 * <sibling>: <byte> [ <nodeidx> <xbyte> 158 * | <flags> [<flags2>] [<region>] [<affixID>] 159 * | [<pflags>] <affixID> <prefcondnr> ] 160 * 161 * <byte> 1 byte Byte value of the sibling. Special cases: 162 * BY_NOFLAGS: End of word without flags and for all 163 * regions. 164 * For PREFIXTREE <affixID> and 165 * <prefcondnr> follow. 166 * BY_FLAGS: End of word, <flags> follow. 167 * For PREFIXTREE <pflags>, <affixID> 168 * and <prefcondnr> follow. 169 * BY_FLAGS2: End of word, <flags> and <flags2> 170 * follow. Not used in PREFIXTREE. 171 * BY_INDEX: Child of sibling is shared, <nodeidx> 172 * and <xbyte> follow. 173 * 174 * <nodeidx> 3 bytes Index of child for this sibling, MSB first. 175 * 176 * <xbyte> 1 byte byte value of the sibling. 177 * 178 * <flags> 1 byte bitmask of: 179 * WF_ALLCAP word must have only capitals 180 * WF_ONECAP first char of word must be capital 181 * WF_KEEPCAP keep-case word 182 * WF_FIXCAP keep-case word, all caps not allowed 183 * WF_RARE rare word 184 * WF_BANNED bad word 185 * WF_REGION <region> follows 186 * WF_AFX <affixID> follows 187 * 188 * <flags2> 1 byte Bitmask of: 189 * WF_HAS_AFF >> 8 word includes affix 190 * WF_NEEDCOMP >> 8 word only valid in compound 191 * WF_NOSUGGEST >> 8 word not used for suggestions 192 * WF_COMPROOT >> 8 word already a compound 193 * WF_NOCOMPBEF >> 8 no compounding before this word 194 * WF_NOCOMPAFT >> 8 no compounding after this word 195 * 196 * <pflags> 1 byte bitmask of: 197 * WFP_RARE rare prefix 198 * WFP_NC non-combining prefix 199 * WFP_UP letter after prefix made upper case 200 * 201 * <region> 1 byte Bitmask for regions in which word is valid. When 202 * omitted it's valid in all regions. 203 * Lowest bit is for region 1. 204 * 205 * <affixID> 1 byte ID of affix that can be used with this word. In 206 * PREFIXTREE used for the required prefix ID. 207 * 208 * <prefcondnr> 2 bytes Prefix condition number, index in <prefcond> list 209 * from HEADER. 210 * 211 * All text characters are in 'encoding', but stored as single bytes. 212 */ 213 214 /* 215 * Vim .sug file format: <SUGHEADER> 216 * <SUGWORDTREE> 217 * <SUGTABLE> 218 * 219 * <SUGHEADER>: <fileID> <versionnr> <timestamp> 220 * 221 * <fileID> 6 bytes "VIMsug" 222 * <versionnr> 1 byte VIMSUGVERSION 223 * <timestamp> 8 bytes timestamp that must match with .spl file 224 * 225 * 226 * <SUGWORDTREE>: <wordtree> (see above, no flags or region used) 227 * 228 * 229 * <SUGTABLE>: <sugwcount> <sugline> ... 230 * 231 * <sugwcount> 4 bytes number of <sugline> following 232 * 233 * <sugline>: <sugnr> ... NUL 234 * 235 * <sugnr>: X bytes word number that results in this soundfolded word, 236 * stored as an offset to the previous number in as 237 * few bytes as possible, see offset2bytes()) 238 */ 239 240 #include "vim.h" 241 242 #if defined(FEAT_SPELL) || defined(PROTO) 243 244 #ifndef UNIX /* it's in os_unix.h for Unix */ 245 # include <time.h> /* for time_t */ 246 #endif 247 248 #ifndef UNIX /* it's in os_unix.h for Unix */ 249 # include <time.h> /* for time_t */ 250 #endif 251 252 /* Special byte values for <byte>. Some are only used in the tree for 253 * postponed prefixes, some only in the other trees. This is a bit messy... */ 254 #define BY_NOFLAGS 0 /* end of word without flags or region; for 255 * postponed prefix: no <pflags> */ 256 #define BY_INDEX 1 /* child is shared, index follows */ 257 #define BY_FLAGS 2 /* end of word, <flags> byte follows; for 258 * postponed prefix: <pflags> follows */ 259 #define BY_FLAGS2 3 /* end of word, <flags> and <flags2> bytes 260 * follow; never used in prefix tree */ 261 #define BY_SPECIAL BY_FLAGS2 /* highest special byte value */ 262 263 /* Flags used in .spl file for soundsalike flags. */ 264 #define SAL_F0LLOWUP 1 265 #define SAL_COLLAPSE 2 266 #define SAL_REM_ACCENTS 4 267 268 #define VIMSPELLMAGIC "VIMspell" /* string at start of Vim spell file */ 269 #define VIMSPELLMAGICL 8 270 #define VIMSPELLVERSION 50 271 272 /* Section IDs. Only renumber them when VIMSPELLVERSION changes! */ 273 #define SN_REGION 0 /* <regionname> section */ 274 #define SN_CHARFLAGS 1 /* charflags section */ 275 #define SN_MIDWORD 2 /* <midword> section */ 276 #define SN_PREFCOND 3 /* <prefcond> section */ 277 #define SN_REP 4 /* REP items section */ 278 #define SN_SAL 5 /* SAL items section */ 279 #define SN_SOFO 6 /* soundfolding section */ 280 #define SN_MAP 7 /* MAP items section */ 281 #define SN_COMPOUND 8 /* compound words section */ 282 #define SN_SYLLABLE 9 /* syllable section */ 283 #define SN_NOBREAK 10 /* NOBREAK section */ 284 #define SN_SUGFILE 11 /* timestamp for .sug file */ 285 #define SN_REPSAL 12 /* REPSAL items section */ 286 #define SN_WORDS 13 /* common words */ 287 #define SN_NOSPLITSUGS 14 /* don't split word for suggestions */ 288 #define SN_INFO 15 /* info section */ 289 #define SN_NOCOMPOUNDSUGS 16 /* don't compound for suggestions */ 290 #define SN_END 255 /* end of sections */ 291 292 #define SNF_REQUIRED 1 /* <sectionflags>: required section */ 293 294 #define CF_WORD 0x01 295 #define CF_UPPER 0x02 296 297 static int set_spell_finish(spelltab_T *new_st); 298 static int write_spell_prefcond(FILE *fd, garray_T *gap); 299 static int read_region_section(FILE *fd, slang_T *slang, int len); 300 static int read_charflags_section(FILE *fd); 301 static int read_prefcond_section(FILE *fd, slang_T *lp); 302 static int read_rep_section(FILE *fd, garray_T *gap, short *first); 303 static int read_sal_section(FILE *fd, slang_T *slang); 304 static int read_words_section(FILE *fd, slang_T *lp, int len); 305 static int read_sofo_section(FILE *fd, slang_T *slang); 306 static int read_compound(FILE *fd, slang_T *slang, int len); 307 static int set_sofo(slang_T *lp, char_u *from, char_u *to); 308 static void set_sal_first(slang_T *lp); 309 static int *mb_str2wide(char_u *s); 310 static int spell_read_tree(FILE *fd, char_u **bytsp, idx_T **idxsp, int prefixtree, int prefixcnt); 311 static idx_T read_tree_node(FILE *fd, char_u *byts, idx_T *idxs, int maxidx, idx_T startidx, int prefixtree, int maxprefcondnr); 312 static void set_spell_charflags(char_u *flags, int cnt, char_u *upp); 313 static int set_spell_chartab(char_u *fol, char_u *low, char_u *upp); 314 static void set_map_str(slang_T *lp, char_u *map); 315 316 317 static char *e_spell_trunc = N_("E758: Truncated spell file"); 318 static char *e_afftrailing = N_("Trailing text in %s line %d: %s"); 319 static char *e_affname = N_("Affix name too long in %s line %d: %s"); 320 static char *e_affform = N_("E761: Format error in affix file FOL, LOW or UPP"); 321 static char *e_affrange = N_("E762: Character in FOL, LOW or UPP is out of range"); 322 static char *msg_compressing = N_("Compressing word tree..."); 323 324 /* 325 * Load one spell file and store the info into a slang_T. 326 * 327 * This is invoked in three ways: 328 * - From spell_load_cb() to load a spell file for the first time. "lang" is 329 * the language name, "old_lp" is NULL. Will allocate an slang_T. 330 * - To reload a spell file that was changed. "lang" is NULL and "old_lp" 331 * points to the existing slang_T. 332 * - Just after writing a .spl file; it's read back to produce the .sug file. 333 * "old_lp" is NULL and "lang" is NULL. Will allocate an slang_T. 334 * 335 * Returns the slang_T the spell file was loaded into. NULL for error. 336 */ 337 slang_T * 338 spell_load_file( 339 char_u *fname, 340 char_u *lang, 341 slang_T *old_lp, 342 int silent) /* no error if file doesn't exist */ 343 { 344 FILE *fd; 345 char_u buf[VIMSPELLMAGICL]; 346 char_u *p; 347 int i; 348 int n; 349 int len; 350 char_u *save_sourcing_name = sourcing_name; 351 linenr_T save_sourcing_lnum = sourcing_lnum; 352 slang_T *lp = NULL; 353 int c = 0; 354 int res; 355 356 fd = mch_fopen((char *)fname, "r"); 357 if (fd == NULL) 358 { 359 if (!silent) 360 semsg(_(e_notopen), fname); 361 else if (p_verbose > 2) 362 { 363 verbose_enter(); 364 smsg((const char *)e_notopen, fname); 365 verbose_leave(); 366 } 367 goto endFAIL; 368 } 369 if (p_verbose > 2) 370 { 371 verbose_enter(); 372 smsg(_("Reading spell file \"%s\""), fname); 373 verbose_leave(); 374 } 375 376 if (old_lp == NULL) 377 { 378 lp = slang_alloc(lang); 379 if (lp == NULL) 380 goto endFAIL; 381 382 /* Remember the file name, used to reload the file when it's updated. */ 383 lp->sl_fname = vim_strsave(fname); 384 if (lp->sl_fname == NULL) 385 goto endFAIL; 386 387 /* Check for .add.spl (_add.spl for VMS). */ 388 lp->sl_add = strstr((char *)gettail(fname), SPL_FNAME_ADD) != NULL; 389 } 390 else 391 lp = old_lp; 392 393 /* Set sourcing_name, so that error messages mention the file name. */ 394 sourcing_name = fname; 395 sourcing_lnum = 0; 396 397 /* 398 * <HEADER>: <fileID> 399 */ 400 for (i = 0; i < VIMSPELLMAGICL; ++i) 401 buf[i] = getc(fd); /* <fileID> */ 402 if (STRNCMP(buf, VIMSPELLMAGIC, VIMSPELLMAGICL) != 0) 403 { 404 emsg(_("E757: This does not look like a spell file")); 405 goto endFAIL; 406 } 407 c = getc(fd); /* <versionnr> */ 408 if (c < VIMSPELLVERSION) 409 { 410 emsg(_("E771: Old spell file, needs to be updated")); 411 goto endFAIL; 412 } 413 else if (c > VIMSPELLVERSION) 414 { 415 emsg(_("E772: Spell file is for newer version of Vim")); 416 goto endFAIL; 417 } 418 419 420 /* 421 * <SECTIONS>: <section> ... <sectionend> 422 * <section>: <sectionID> <sectionflags> <sectionlen> (section contents) 423 */ 424 for (;;) 425 { 426 n = getc(fd); /* <sectionID> or <sectionend> */ 427 if (n == SN_END) 428 break; 429 c = getc(fd); /* <sectionflags> */ 430 len = get4c(fd); /* <sectionlen> */ 431 if (len < 0) 432 goto truncerr; 433 434 res = 0; 435 switch (n) 436 { 437 case SN_INFO: 438 lp->sl_info = read_string(fd, len); /* <infotext> */ 439 if (lp->sl_info == NULL) 440 goto endFAIL; 441 break; 442 443 case SN_REGION: 444 res = read_region_section(fd, lp, len); 445 break; 446 447 case SN_CHARFLAGS: 448 res = read_charflags_section(fd); 449 break; 450 451 case SN_MIDWORD: 452 lp->sl_midword = read_string(fd, len); /* <midword> */ 453 if (lp->sl_midword == NULL) 454 goto endFAIL; 455 break; 456 457 case SN_PREFCOND: 458 res = read_prefcond_section(fd, lp); 459 break; 460 461 case SN_REP: 462 res = read_rep_section(fd, &lp->sl_rep, lp->sl_rep_first); 463 break; 464 465 case SN_REPSAL: 466 res = read_rep_section(fd, &lp->sl_repsal, lp->sl_repsal_first); 467 break; 468 469 case SN_SAL: 470 res = read_sal_section(fd, lp); 471 break; 472 473 case SN_SOFO: 474 res = read_sofo_section(fd, lp); 475 break; 476 477 case SN_MAP: 478 p = read_string(fd, len); /* <mapstr> */ 479 if (p == NULL) 480 goto endFAIL; 481 set_map_str(lp, p); 482 vim_free(p); 483 break; 484 485 case SN_WORDS: 486 res = read_words_section(fd, lp, len); 487 break; 488 489 case SN_SUGFILE: 490 lp->sl_sugtime = get8ctime(fd); /* <timestamp> */ 491 break; 492 493 case SN_NOSPLITSUGS: 494 lp->sl_nosplitsugs = TRUE; 495 break; 496 497 case SN_NOCOMPOUNDSUGS: 498 lp->sl_nocompoundsugs = TRUE; 499 break; 500 501 case SN_COMPOUND: 502 res = read_compound(fd, lp, len); 503 break; 504 505 case SN_NOBREAK: 506 lp->sl_nobreak = TRUE; 507 break; 508 509 case SN_SYLLABLE: 510 lp->sl_syllable = read_string(fd, len); /* <syllable> */ 511 if (lp->sl_syllable == NULL) 512 goto endFAIL; 513 if (init_syl_tab(lp) == FAIL) 514 goto endFAIL; 515 break; 516 517 default: 518 /* Unsupported section. When it's required give an error 519 * message. When it's not required skip the contents. */ 520 if (c & SNF_REQUIRED) 521 { 522 emsg(_("E770: Unsupported section in spell file")); 523 goto endFAIL; 524 } 525 while (--len >= 0) 526 if (getc(fd) < 0) 527 goto truncerr; 528 break; 529 } 530 someerror: 531 if (res == SP_FORMERROR) 532 { 533 emsg(_(e_format)); 534 goto endFAIL; 535 } 536 if (res == SP_TRUNCERROR) 537 { 538 truncerr: 539 emsg(_(e_spell_trunc)); 540 goto endFAIL; 541 } 542 if (res == SP_OTHERERROR) 543 goto endFAIL; 544 } 545 546 /* <LWORDTREE> */ 547 res = spell_read_tree(fd, &lp->sl_fbyts, &lp->sl_fidxs, FALSE, 0); 548 if (res != 0) 549 goto someerror; 550 551 /* <KWORDTREE> */ 552 res = spell_read_tree(fd, &lp->sl_kbyts, &lp->sl_kidxs, FALSE, 0); 553 if (res != 0) 554 goto someerror; 555 556 /* <PREFIXTREE> */ 557 res = spell_read_tree(fd, &lp->sl_pbyts, &lp->sl_pidxs, TRUE, 558 lp->sl_prefixcnt); 559 if (res != 0) 560 goto someerror; 561 562 /* For a new file link it in the list of spell files. */ 563 if (old_lp == NULL && lang != NULL) 564 { 565 lp->sl_next = first_lang; 566 first_lang = lp; 567 } 568 569 goto endOK; 570 571 endFAIL: 572 if (lang != NULL) 573 /* truncating the name signals the error to spell_load_lang() */ 574 *lang = NUL; 575 if (lp != NULL && old_lp == NULL) 576 slang_free(lp); 577 lp = NULL; 578 579 endOK: 580 if (fd != NULL) 581 fclose(fd); 582 sourcing_name = save_sourcing_name; 583 sourcing_lnum = save_sourcing_lnum; 584 585 return lp; 586 } 587 588 /* 589 * Fill in the wordcount fields for a trie. 590 * Returns the total number of words. 591 */ 592 static void 593 tree_count_words(char_u *byts, idx_T *idxs) 594 { 595 int depth; 596 idx_T arridx[MAXWLEN]; 597 int curi[MAXWLEN]; 598 int c; 599 idx_T n; 600 int wordcount[MAXWLEN]; 601 602 arridx[0] = 0; 603 curi[0] = 1; 604 wordcount[0] = 0; 605 depth = 0; 606 while (depth >= 0 && !got_int) 607 { 608 if (curi[depth] > byts[arridx[depth]]) 609 { 610 /* Done all bytes at this node, go up one level. */ 611 idxs[arridx[depth]] = wordcount[depth]; 612 if (depth > 0) 613 wordcount[depth - 1] += wordcount[depth]; 614 615 --depth; 616 fast_breakcheck(); 617 } 618 else 619 { 620 /* Do one more byte at this node. */ 621 n = arridx[depth] + curi[depth]; 622 ++curi[depth]; 623 624 c = byts[n]; 625 if (c == 0) 626 { 627 /* End of word, count it. */ 628 ++wordcount[depth]; 629 630 /* Skip over any other NUL bytes (same word with different 631 * flags). */ 632 while (byts[n + 1] == 0) 633 { 634 ++n; 635 ++curi[depth]; 636 } 637 } 638 else 639 { 640 /* Normal char, go one level deeper to count the words. */ 641 ++depth; 642 arridx[depth] = idxs[n]; 643 curi[depth] = 1; 644 wordcount[depth] = 0; 645 } 646 } 647 } 648 } 649 650 /* 651 * Load the .sug files for languages that have one and weren't loaded yet. 652 */ 653 void 654 suggest_load_files(void) 655 { 656 langp_T *lp; 657 int lpi; 658 slang_T *slang; 659 char_u *dotp; 660 FILE *fd; 661 char_u buf[MAXWLEN]; 662 int i; 663 time_t timestamp; 664 int wcount; 665 int wordnr; 666 garray_T ga; 667 int c; 668 669 /* Do this for all languages that support sound folding. */ 670 for (lpi = 0; lpi < curwin->w_s->b_langp.ga_len; ++lpi) 671 { 672 lp = LANGP_ENTRY(curwin->w_s->b_langp, lpi); 673 slang = lp->lp_slang; 674 if (slang->sl_sugtime != 0 && !slang->sl_sugloaded) 675 { 676 /* Change ".spl" to ".sug" and open the file. When the file isn't 677 * found silently skip it. Do set "sl_sugloaded" so that we 678 * don't try again and again. */ 679 slang->sl_sugloaded = TRUE; 680 681 dotp = vim_strrchr(slang->sl_fname, '.'); 682 if (dotp == NULL || fnamecmp(dotp, ".spl") != 0) 683 continue; 684 STRCPY(dotp, ".sug"); 685 fd = mch_fopen((char *)slang->sl_fname, "r"); 686 if (fd == NULL) 687 goto nextone; 688 689 /* 690 * <SUGHEADER>: <fileID> <versionnr> <timestamp> 691 */ 692 for (i = 0; i < VIMSUGMAGICL; ++i) 693 buf[i] = getc(fd); /* <fileID> */ 694 if (STRNCMP(buf, VIMSUGMAGIC, VIMSUGMAGICL) != 0) 695 { 696 semsg(_("E778: This does not look like a .sug file: %s"), 697 slang->sl_fname); 698 goto nextone; 699 } 700 c = getc(fd); /* <versionnr> */ 701 if (c < VIMSUGVERSION) 702 { 703 semsg(_("E779: Old .sug file, needs to be updated: %s"), 704 slang->sl_fname); 705 goto nextone; 706 } 707 else if (c > VIMSUGVERSION) 708 { 709 semsg(_("E780: .sug file is for newer version of Vim: %s"), 710 slang->sl_fname); 711 goto nextone; 712 } 713 714 /* Check the timestamp, it must be exactly the same as the one in 715 * the .spl file. Otherwise the word numbers won't match. */ 716 timestamp = get8ctime(fd); /* <timestamp> */ 717 if (timestamp != slang->sl_sugtime) 718 { 719 semsg(_("E781: .sug file doesn't match .spl file: %s"), 720 slang->sl_fname); 721 goto nextone; 722 } 723 724 /* 725 * <SUGWORDTREE>: <wordtree> 726 * Read the trie with the soundfolded words. 727 */ 728 if (spell_read_tree(fd, &slang->sl_sbyts, &slang->sl_sidxs, 729 FALSE, 0) != 0) 730 { 731 someerror: 732 semsg(_("E782: error while reading .sug file: %s"), 733 slang->sl_fname); 734 slang_clear_sug(slang); 735 goto nextone; 736 } 737 738 /* 739 * <SUGTABLE>: <sugwcount> <sugline> ... 740 * 741 * Read the table with word numbers. We use a file buffer for 742 * this, because it's so much like a file with lines. Makes it 743 * possible to swap the info and save on memory use. 744 */ 745 slang->sl_sugbuf = open_spellbuf(); 746 if (slang->sl_sugbuf == NULL) 747 goto someerror; 748 /* <sugwcount> */ 749 wcount = get4c(fd); 750 if (wcount < 0) 751 goto someerror; 752 753 /* Read all the wordnr lists into the buffer, one NUL terminated 754 * list per line. */ 755 ga_init2(&ga, 1, 100); 756 for (wordnr = 0; wordnr < wcount; ++wordnr) 757 { 758 ga.ga_len = 0; 759 for (;;) 760 { 761 c = getc(fd); /* <sugline> */ 762 if (c < 0 || ga_grow(&ga, 1) == FAIL) 763 goto someerror; 764 ((char_u *)ga.ga_data)[ga.ga_len++] = c; 765 if (c == NUL) 766 break; 767 } 768 if (ml_append_buf(slang->sl_sugbuf, (linenr_T)wordnr, 769 ga.ga_data, ga.ga_len, TRUE) == FAIL) 770 goto someerror; 771 } 772 ga_clear(&ga); 773 774 /* 775 * Need to put word counts in the word tries, so that we can find 776 * a word by its number. 777 */ 778 tree_count_words(slang->sl_fbyts, slang->sl_fidxs); 779 tree_count_words(slang->sl_sbyts, slang->sl_sidxs); 780 781 nextone: 782 if (fd != NULL) 783 fclose(fd); 784 STRCPY(dotp, ".spl"); 785 } 786 } 787 } 788 789 790 /* 791 * Read a length field from "fd" in "cnt_bytes" bytes. 792 * Allocate memory, read the string into it and add a NUL at the end. 793 * Returns NULL when the count is zero. 794 * Sets "*cntp" to SP_*ERROR when there is an error, length of the result 795 * otherwise. 796 */ 797 static char_u * 798 read_cnt_string(FILE *fd, int cnt_bytes, int *cntp) 799 { 800 int cnt = 0; 801 int i; 802 char_u *str; 803 804 /* read the length bytes, MSB first */ 805 for (i = 0; i < cnt_bytes; ++i) 806 cnt = (cnt << 8) + getc(fd); 807 if (cnt < 0) 808 { 809 *cntp = SP_TRUNCERROR; 810 return NULL; 811 } 812 *cntp = cnt; 813 if (cnt == 0) 814 return NULL; /* nothing to read, return NULL */ 815 816 str = read_string(fd, cnt); 817 if (str == NULL) 818 *cntp = SP_OTHERERROR; 819 return str; 820 } 821 822 /* 823 * Read SN_REGION: <regionname> ... 824 * Return SP_*ERROR flags. 825 */ 826 static int 827 read_region_section(FILE *fd, slang_T *lp, int len) 828 { 829 int i; 830 831 if (len > MAXREGIONS * 2) 832 return SP_FORMERROR; 833 for (i = 0; i < len; ++i) 834 lp->sl_regions[i] = getc(fd); /* <regionname> */ 835 lp->sl_regions[len] = NUL; 836 return 0; 837 } 838 839 /* 840 * Read SN_CHARFLAGS section: <charflagslen> <charflags> 841 * <folcharslen> <folchars> 842 * Return SP_*ERROR flags. 843 */ 844 static int 845 read_charflags_section(FILE *fd) 846 { 847 char_u *flags; 848 char_u *fol; 849 int flagslen, follen; 850 851 /* <charflagslen> <charflags> */ 852 flags = read_cnt_string(fd, 1, &flagslen); 853 if (flagslen < 0) 854 return flagslen; 855 856 /* <folcharslen> <folchars> */ 857 fol = read_cnt_string(fd, 2, &follen); 858 if (follen < 0) 859 { 860 vim_free(flags); 861 return follen; 862 } 863 864 /* Set the word-char flags and fill SPELL_ISUPPER() table. */ 865 if (flags != NULL && fol != NULL) 866 set_spell_charflags(flags, flagslen, fol); 867 868 vim_free(flags); 869 vim_free(fol); 870 871 /* When <charflagslen> is zero then <fcharlen> must also be zero. */ 872 if ((flags == NULL) != (fol == NULL)) 873 return SP_FORMERROR; 874 return 0; 875 } 876 877 /* 878 * Read SN_PREFCOND section. 879 * Return SP_*ERROR flags. 880 */ 881 static int 882 read_prefcond_section(FILE *fd, slang_T *lp) 883 { 884 int cnt; 885 int i; 886 int n; 887 char_u *p; 888 char_u buf[MAXWLEN + 1]; 889 890 /* <prefcondcnt> <prefcond> ... */ 891 cnt = get2c(fd); /* <prefcondcnt> */ 892 if (cnt <= 0) 893 return SP_FORMERROR; 894 895 lp->sl_prefprog = ALLOC_CLEAR_MULT(regprog_T *, cnt); 896 if (lp->sl_prefprog == NULL) 897 return SP_OTHERERROR; 898 lp->sl_prefixcnt = cnt; 899 900 for (i = 0; i < cnt; ++i) 901 { 902 /* <prefcond> : <condlen> <condstr> */ 903 n = getc(fd); /* <condlen> */ 904 if (n < 0 || n >= MAXWLEN) 905 return SP_FORMERROR; 906 907 /* When <condlen> is zero we have an empty condition. Otherwise 908 * compile the regexp program used to check for the condition. */ 909 if (n > 0) 910 { 911 buf[0] = '^'; /* always match at one position only */ 912 p = buf + 1; 913 while (n-- > 0) 914 *p++ = getc(fd); /* <condstr> */ 915 *p = NUL; 916 lp->sl_prefprog[i] = vim_regcomp(buf, RE_MAGIC + RE_STRING); 917 } 918 } 919 return 0; 920 } 921 922 /* 923 * Read REP or REPSAL items section from "fd": <repcount> <rep> ... 924 * Return SP_*ERROR flags. 925 */ 926 static int 927 read_rep_section(FILE *fd, garray_T *gap, short *first) 928 { 929 int cnt; 930 fromto_T *ftp; 931 int i; 932 933 cnt = get2c(fd); /* <repcount> */ 934 if (cnt < 0) 935 return SP_TRUNCERROR; 936 937 if (ga_grow(gap, cnt) == FAIL) 938 return SP_OTHERERROR; 939 940 /* <rep> : <repfromlen> <repfrom> <reptolen> <repto> */ 941 for (; gap->ga_len < cnt; ++gap->ga_len) 942 { 943 ftp = &((fromto_T *)gap->ga_data)[gap->ga_len]; 944 ftp->ft_from = read_cnt_string(fd, 1, &i); 945 if (i < 0) 946 return i; 947 if (i == 0) 948 return SP_FORMERROR; 949 ftp->ft_to = read_cnt_string(fd, 1, &i); 950 if (i <= 0) 951 { 952 vim_free(ftp->ft_from); 953 if (i < 0) 954 return i; 955 return SP_FORMERROR; 956 } 957 } 958 959 /* Fill the first-index table. */ 960 for (i = 0; i < 256; ++i) 961 first[i] = -1; 962 for (i = 0; i < gap->ga_len; ++i) 963 { 964 ftp = &((fromto_T *)gap->ga_data)[i]; 965 if (first[*ftp->ft_from] == -1) 966 first[*ftp->ft_from] = i; 967 } 968 return 0; 969 } 970 971 /* 972 * Read SN_SAL section: <salflags> <salcount> <sal> ... 973 * Return SP_*ERROR flags. 974 */ 975 static int 976 read_sal_section(FILE *fd, slang_T *slang) 977 { 978 int i; 979 int cnt; 980 garray_T *gap; 981 salitem_T *smp; 982 int ccnt; 983 char_u *p; 984 int c = NUL; 985 986 slang->sl_sofo = FALSE; 987 988 i = getc(fd); /* <salflags> */ 989 if (i & SAL_F0LLOWUP) 990 slang->sl_followup = TRUE; 991 if (i & SAL_COLLAPSE) 992 slang->sl_collapse = TRUE; 993 if (i & SAL_REM_ACCENTS) 994 slang->sl_rem_accents = TRUE; 995 996 cnt = get2c(fd); /* <salcount> */ 997 if (cnt < 0) 998 return SP_TRUNCERROR; 999 1000 gap = &slang->sl_sal; 1001 ga_init2(gap, sizeof(salitem_T), 10); 1002 if (ga_grow(gap, cnt + 1) == FAIL) 1003 return SP_OTHERERROR; 1004 1005 /* <sal> : <salfromlen> <salfrom> <saltolen> <salto> */ 1006 for (; gap->ga_len < cnt; ++gap->ga_len) 1007 { 1008 smp = &((salitem_T *)gap->ga_data)[gap->ga_len]; 1009 ccnt = getc(fd); /* <salfromlen> */ 1010 if (ccnt < 0) 1011 return SP_TRUNCERROR; 1012 if ((p = alloc(ccnt + 2)) == NULL) 1013 return SP_OTHERERROR; 1014 smp->sm_lead = p; 1015 1016 /* Read up to the first special char into sm_lead. */ 1017 for (i = 0; i < ccnt; ++i) 1018 { 1019 c = getc(fd); /* <salfrom> */ 1020 if (vim_strchr((char_u *)"0123456789(-<^$", c) != NULL) 1021 break; 1022 *p++ = c; 1023 } 1024 smp->sm_leadlen = (int)(p - smp->sm_lead); 1025 *p++ = NUL; 1026 1027 /* Put (abc) chars in sm_oneof, if any. */ 1028 if (c == '(') 1029 { 1030 smp->sm_oneof = p; 1031 for (++i; i < ccnt; ++i) 1032 { 1033 c = getc(fd); /* <salfrom> */ 1034 if (c == ')') 1035 break; 1036 *p++ = c; 1037 } 1038 *p++ = NUL; 1039 if (++i < ccnt) 1040 c = getc(fd); 1041 } 1042 else 1043 smp->sm_oneof = NULL; 1044 1045 /* Any following chars go in sm_rules. */ 1046 smp->sm_rules = p; 1047 if (i < ccnt) 1048 /* store the char we got while checking for end of sm_lead */ 1049 *p++ = c; 1050 for (++i; i < ccnt; ++i) 1051 *p++ = getc(fd); /* <salfrom> */ 1052 *p++ = NUL; 1053 1054 /* <saltolen> <salto> */ 1055 smp->sm_to = read_cnt_string(fd, 1, &ccnt); 1056 if (ccnt < 0) 1057 { 1058 vim_free(smp->sm_lead); 1059 return ccnt; 1060 } 1061 1062 if (has_mbyte) 1063 { 1064 /* convert the multi-byte strings to wide char strings */ 1065 smp->sm_lead_w = mb_str2wide(smp->sm_lead); 1066 smp->sm_leadlen = mb_charlen(smp->sm_lead); 1067 if (smp->sm_oneof == NULL) 1068 smp->sm_oneof_w = NULL; 1069 else 1070 smp->sm_oneof_w = mb_str2wide(smp->sm_oneof); 1071 if (smp->sm_to == NULL) 1072 smp->sm_to_w = NULL; 1073 else 1074 smp->sm_to_w = mb_str2wide(smp->sm_to); 1075 if (smp->sm_lead_w == NULL 1076 || (smp->sm_oneof_w == NULL && smp->sm_oneof != NULL) 1077 || (smp->sm_to_w == NULL && smp->sm_to != NULL)) 1078 { 1079 vim_free(smp->sm_lead); 1080 vim_free(smp->sm_to); 1081 vim_free(smp->sm_lead_w); 1082 vim_free(smp->sm_oneof_w); 1083 vim_free(smp->sm_to_w); 1084 return SP_OTHERERROR; 1085 } 1086 } 1087 } 1088 1089 if (gap->ga_len > 0) 1090 { 1091 /* Add one extra entry to mark the end with an empty sm_lead. Avoids 1092 * that we need to check the index every time. */ 1093 smp = &((salitem_T *)gap->ga_data)[gap->ga_len]; 1094 if ((p = alloc(1)) == NULL) 1095 return SP_OTHERERROR; 1096 p[0] = NUL; 1097 smp->sm_lead = p; 1098 smp->sm_leadlen = 0; 1099 smp->sm_oneof = NULL; 1100 smp->sm_rules = p; 1101 smp->sm_to = NULL; 1102 if (has_mbyte) 1103 { 1104 smp->sm_lead_w = mb_str2wide(smp->sm_lead); 1105 smp->sm_leadlen = 0; 1106 smp->sm_oneof_w = NULL; 1107 smp->sm_to_w = NULL; 1108 } 1109 ++gap->ga_len; 1110 } 1111 1112 /* Fill the first-index table. */ 1113 set_sal_first(slang); 1114 1115 return 0; 1116 } 1117 1118 /* 1119 * Read SN_WORDS: <word> ... 1120 * Return SP_*ERROR flags. 1121 */ 1122 static int 1123 read_words_section(FILE *fd, slang_T *lp, int len) 1124 { 1125 int done = 0; 1126 int i; 1127 int c; 1128 char_u word[MAXWLEN]; 1129 1130 while (done < len) 1131 { 1132 /* Read one word at a time. */ 1133 for (i = 0; ; ++i) 1134 { 1135 c = getc(fd); 1136 if (c == EOF) 1137 return SP_TRUNCERROR; 1138 word[i] = c; 1139 if (word[i] == NUL) 1140 break; 1141 if (i == MAXWLEN - 1) 1142 return SP_FORMERROR; 1143 } 1144 1145 /* Init the count to 10. */ 1146 count_common_word(lp, word, -1, 10); 1147 done += i + 1; 1148 } 1149 return 0; 1150 } 1151 1152 /* 1153 * SN_SOFO: <sofofromlen> <sofofrom> <sofotolen> <sofoto> 1154 * Return SP_*ERROR flags. 1155 */ 1156 static int 1157 read_sofo_section(FILE *fd, slang_T *slang) 1158 { 1159 int cnt; 1160 char_u *from, *to; 1161 int res; 1162 1163 slang->sl_sofo = TRUE; 1164 1165 /* <sofofromlen> <sofofrom> */ 1166 from = read_cnt_string(fd, 2, &cnt); 1167 if (cnt < 0) 1168 return cnt; 1169 1170 /* <sofotolen> <sofoto> */ 1171 to = read_cnt_string(fd, 2, &cnt); 1172 if (cnt < 0) 1173 { 1174 vim_free(from); 1175 return cnt; 1176 } 1177 1178 /* Store the info in slang->sl_sal and/or slang->sl_sal_first. */ 1179 if (from != NULL && to != NULL) 1180 res = set_sofo(slang, from, to); 1181 else if (from != NULL || to != NULL) 1182 res = SP_FORMERROR; /* only one of two strings is an error */ 1183 else 1184 res = 0; 1185 1186 vim_free(from); 1187 vim_free(to); 1188 return res; 1189 } 1190 1191 /* 1192 * Read the compound section from the .spl file: 1193 * <compmax> <compminlen> <compsylmax> <compoptions> <compflags> 1194 * Returns SP_*ERROR flags. 1195 */ 1196 static int 1197 read_compound(FILE *fd, slang_T *slang, int len) 1198 { 1199 int todo = len; 1200 int c; 1201 int atstart; 1202 char_u *pat; 1203 char_u *pp; 1204 char_u *cp; 1205 char_u *ap; 1206 char_u *crp; 1207 int cnt; 1208 garray_T *gap; 1209 1210 if (todo < 2) 1211 return SP_FORMERROR; /* need at least two bytes */ 1212 1213 --todo; 1214 c = getc(fd); /* <compmax> */ 1215 if (c < 2) 1216 c = MAXWLEN; 1217 slang->sl_compmax = c; 1218 1219 --todo; 1220 c = getc(fd); /* <compminlen> */ 1221 if (c < 1) 1222 c = 0; 1223 slang->sl_compminlen = c; 1224 1225 --todo; 1226 c = getc(fd); /* <compsylmax> */ 1227 if (c < 1) 1228 c = MAXWLEN; 1229 slang->sl_compsylmax = c; 1230 1231 c = getc(fd); /* <compoptions> */ 1232 if (c != 0) 1233 ungetc(c, fd); /* be backwards compatible with Vim 7.0b */ 1234 else 1235 { 1236 --todo; 1237 c = getc(fd); /* only use the lower byte for now */ 1238 --todo; 1239 slang->sl_compoptions = c; 1240 1241 gap = &slang->sl_comppat; 1242 c = get2c(fd); /* <comppatcount> */ 1243 todo -= 2; 1244 ga_init2(gap, sizeof(char_u *), c); 1245 if (ga_grow(gap, c) == OK) 1246 while (--c >= 0) 1247 { 1248 ((char_u **)(gap->ga_data))[gap->ga_len++] = 1249 read_cnt_string(fd, 1, &cnt); 1250 /* <comppatlen> <comppattext> */ 1251 if (cnt < 0) 1252 return cnt; 1253 todo -= cnt + 1; 1254 } 1255 } 1256 if (todo < 0) 1257 return SP_FORMERROR; 1258 1259 /* Turn the COMPOUNDRULE items into a regexp pattern: 1260 * "a[bc]/a*b+" -> "^\(a[bc]\|a*b\+\)$". 1261 * Inserting backslashes may double the length, "^\(\)$<Nul>" is 7 bytes. 1262 * Conversion to utf-8 may double the size. */ 1263 c = todo * 2 + 7; 1264 if (enc_utf8) 1265 c += todo * 2; 1266 pat = alloc(c); 1267 if (pat == NULL) 1268 return SP_OTHERERROR; 1269 1270 /* We also need a list of all flags that can appear at the start and one 1271 * for all flags. */ 1272 cp = alloc(todo + 1); 1273 if (cp == NULL) 1274 { 1275 vim_free(pat); 1276 return SP_OTHERERROR; 1277 } 1278 slang->sl_compstartflags = cp; 1279 *cp = NUL; 1280 1281 ap = alloc(todo + 1); 1282 if (ap == NULL) 1283 { 1284 vim_free(pat); 1285 return SP_OTHERERROR; 1286 } 1287 slang->sl_compallflags = ap; 1288 *ap = NUL; 1289 1290 /* And a list of all patterns in their original form, for checking whether 1291 * compounding may work in match_compoundrule(). This is freed when we 1292 * encounter a wildcard, the check doesn't work then. */ 1293 crp = alloc(todo + 1); 1294 slang->sl_comprules = crp; 1295 1296 pp = pat; 1297 *pp++ = '^'; 1298 *pp++ = '\\'; 1299 *pp++ = '('; 1300 1301 atstart = 1; 1302 while (todo-- > 0) 1303 { 1304 c = getc(fd); /* <compflags> */ 1305 if (c == EOF) 1306 { 1307 vim_free(pat); 1308 return SP_TRUNCERROR; 1309 } 1310 1311 /* Add all flags to "sl_compallflags". */ 1312 if (vim_strchr((char_u *)"?*+[]/", c) == NULL 1313 && !byte_in_str(slang->sl_compallflags, c)) 1314 { 1315 *ap++ = c; 1316 *ap = NUL; 1317 } 1318 1319 if (atstart != 0) 1320 { 1321 /* At start of item: copy flags to "sl_compstartflags". For a 1322 * [abc] item set "atstart" to 2 and copy up to the ']'. */ 1323 if (c == '[') 1324 atstart = 2; 1325 else if (c == ']') 1326 atstart = 0; 1327 else 1328 { 1329 if (!byte_in_str(slang->sl_compstartflags, c)) 1330 { 1331 *cp++ = c; 1332 *cp = NUL; 1333 } 1334 if (atstart == 1) 1335 atstart = 0; 1336 } 1337 } 1338 1339 /* Copy flag to "sl_comprules", unless we run into a wildcard. */ 1340 if (crp != NULL) 1341 { 1342 if (c == '?' || c == '+' || c == '*') 1343 { 1344 VIM_CLEAR(slang->sl_comprules); 1345 crp = NULL; 1346 } 1347 else 1348 *crp++ = c; 1349 } 1350 1351 if (c == '/') /* slash separates two items */ 1352 { 1353 *pp++ = '\\'; 1354 *pp++ = '|'; 1355 atstart = 1; 1356 } 1357 else /* normal char, "[abc]" and '*' are copied as-is */ 1358 { 1359 if (c == '?' || c == '+' || c == '~') 1360 *pp++ = '\\'; /* "a?" becomes "a\?", "a+" becomes "a\+" */ 1361 if (enc_utf8) 1362 pp += mb_char2bytes(c, pp); 1363 else 1364 *pp++ = c; 1365 } 1366 } 1367 1368 *pp++ = '\\'; 1369 *pp++ = ')'; 1370 *pp++ = '$'; 1371 *pp = NUL; 1372 1373 if (crp != NULL) 1374 *crp = NUL; 1375 1376 slang->sl_compprog = vim_regcomp(pat, RE_MAGIC + RE_STRING + RE_STRICT); 1377 vim_free(pat); 1378 if (slang->sl_compprog == NULL) 1379 return SP_FORMERROR; 1380 1381 return 0; 1382 } 1383 1384 /* 1385 * Set the SOFOFROM and SOFOTO items in language "lp". 1386 * Returns SP_*ERROR flags when there is something wrong. 1387 */ 1388 static int 1389 set_sofo(slang_T *lp, char_u *from, char_u *to) 1390 { 1391 int i; 1392 1393 garray_T *gap; 1394 char_u *s; 1395 char_u *p; 1396 int c; 1397 int *inp; 1398 1399 if (has_mbyte) 1400 { 1401 /* Use "sl_sal" as an array with 256 pointers to a list of wide 1402 * characters. The index is the low byte of the character. 1403 * The list contains from-to pairs with a terminating NUL. 1404 * sl_sal_first[] is used for latin1 "from" characters. */ 1405 gap = &lp->sl_sal; 1406 ga_init2(gap, sizeof(int *), 1); 1407 if (ga_grow(gap, 256) == FAIL) 1408 return SP_OTHERERROR; 1409 vim_memset(gap->ga_data, 0, sizeof(int *) * 256); 1410 gap->ga_len = 256; 1411 1412 /* First count the number of items for each list. Temporarily use 1413 * sl_sal_first[] for this. */ 1414 for (p = from, s = to; *p != NUL && *s != NUL; ) 1415 { 1416 c = mb_cptr2char_adv(&p); 1417 MB_CPTR_ADV(s); 1418 if (c >= 256) 1419 ++lp->sl_sal_first[c & 0xff]; 1420 } 1421 if (*p != NUL || *s != NUL) /* lengths differ */ 1422 return SP_FORMERROR; 1423 1424 /* Allocate the lists. */ 1425 for (i = 0; i < 256; ++i) 1426 if (lp->sl_sal_first[i] > 0) 1427 { 1428 p = alloc(sizeof(int) * (lp->sl_sal_first[i] * 2 + 1)); 1429 if (p == NULL) 1430 return SP_OTHERERROR; 1431 ((int **)gap->ga_data)[i] = (int *)p; 1432 *(int *)p = 0; 1433 } 1434 1435 /* Put the characters up to 255 in sl_sal_first[] the rest in a sl_sal 1436 * list. */ 1437 vim_memset(lp->sl_sal_first, 0, sizeof(salfirst_T) * 256); 1438 for (p = from, s = to; *p != NUL && *s != NUL; ) 1439 { 1440 c = mb_cptr2char_adv(&p); 1441 i = mb_cptr2char_adv(&s); 1442 if (c >= 256) 1443 { 1444 /* Append the from-to chars at the end of the list with 1445 * the low byte. */ 1446 inp = ((int **)gap->ga_data)[c & 0xff]; 1447 while (*inp != 0) 1448 ++inp; 1449 *inp++ = c; /* from char */ 1450 *inp++ = i; /* to char */ 1451 *inp++ = NUL; /* NUL at the end */ 1452 } 1453 else 1454 /* mapping byte to char is done in sl_sal_first[] */ 1455 lp->sl_sal_first[c] = i; 1456 } 1457 } 1458 else 1459 { 1460 /* mapping bytes to bytes is done in sl_sal_first[] */ 1461 if (STRLEN(from) != STRLEN(to)) 1462 return SP_FORMERROR; 1463 1464 for (i = 0; to[i] != NUL; ++i) 1465 lp->sl_sal_first[from[i]] = to[i]; 1466 lp->sl_sal.ga_len = 1; /* indicates we have soundfolding */ 1467 } 1468 1469 return 0; 1470 } 1471 1472 /* 1473 * Fill the first-index table for "lp". 1474 */ 1475 static void 1476 set_sal_first(slang_T *lp) 1477 { 1478 salfirst_T *sfirst; 1479 int i; 1480 salitem_T *smp; 1481 int c; 1482 garray_T *gap = &lp->sl_sal; 1483 1484 sfirst = lp->sl_sal_first; 1485 for (i = 0; i < 256; ++i) 1486 sfirst[i] = -1; 1487 smp = (salitem_T *)gap->ga_data; 1488 for (i = 0; i < gap->ga_len; ++i) 1489 { 1490 if (has_mbyte) 1491 /* Use the lowest byte of the first character. For latin1 it's 1492 * the character, for other encodings it should differ for most 1493 * characters. */ 1494 c = *smp[i].sm_lead_w & 0xff; 1495 else 1496 c = *smp[i].sm_lead; 1497 if (sfirst[c] == -1) 1498 { 1499 sfirst[c] = i; 1500 if (has_mbyte) 1501 { 1502 int n; 1503 1504 /* Make sure all entries with this byte are following each 1505 * other. Move the ones that are in the wrong position. Do 1506 * keep the same ordering! */ 1507 while (i + 1 < gap->ga_len 1508 && (*smp[i + 1].sm_lead_w & 0xff) == c) 1509 /* Skip over entry with same index byte. */ 1510 ++i; 1511 1512 for (n = 1; i + n < gap->ga_len; ++n) 1513 if ((*smp[i + n].sm_lead_w & 0xff) == c) 1514 { 1515 salitem_T tsal; 1516 1517 /* Move entry with same index byte after the entries 1518 * we already found. */ 1519 ++i; 1520 --n; 1521 tsal = smp[i + n]; 1522 mch_memmove(smp + i + 1, smp + i, 1523 sizeof(salitem_T) * n); 1524 smp[i] = tsal; 1525 } 1526 } 1527 } 1528 } 1529 } 1530 1531 /* 1532 * Turn a multi-byte string into a wide character string. 1533 * Return it in allocated memory (NULL for out-of-memory) 1534 */ 1535 static int * 1536 mb_str2wide(char_u *s) 1537 { 1538 int *res; 1539 char_u *p; 1540 int i = 0; 1541 1542 res = ALLOC_MULT(int, mb_charlen(s) + 1); 1543 if (res != NULL) 1544 { 1545 for (p = s; *p != NUL; ) 1546 res[i++] = mb_ptr2char_adv(&p); 1547 res[i] = NUL; 1548 } 1549 return res; 1550 } 1551 1552 /* 1553 * Read a tree from the .spl or .sug file. 1554 * Allocates the memory and stores pointers in "bytsp" and "idxsp". 1555 * This is skipped when the tree has zero length. 1556 * Returns zero when OK, SP_ value for an error. 1557 */ 1558 static int 1559 spell_read_tree( 1560 FILE *fd, 1561 char_u **bytsp, 1562 idx_T **idxsp, 1563 int prefixtree, /* TRUE for the prefix tree */ 1564 int prefixcnt) /* when "prefixtree" is TRUE: prefix count */ 1565 { 1566 long len; 1567 int idx; 1568 char_u *bp; 1569 idx_T *ip; 1570 1571 /* The tree size was computed when writing the file, so that we can 1572 * allocate it as one long block. <nodecount> */ 1573 len = get4c(fd); 1574 if (len < 0) 1575 return SP_TRUNCERROR; 1576 if (len >= LONG_MAX / (long)sizeof(int)) 1577 /* Invalid length, multiply with sizeof(int) would overflow. */ 1578 return SP_FORMERROR; 1579 if (len > 0) 1580 { 1581 /* Allocate the byte array. */ 1582 bp = alloc(len); 1583 if (bp == NULL) 1584 return SP_OTHERERROR; 1585 *bytsp = bp; 1586 1587 /* Allocate the index array. */ 1588 ip = lalloc_clear(len * sizeof(int), TRUE); 1589 if (ip == NULL) 1590 return SP_OTHERERROR; 1591 *idxsp = ip; 1592 1593 /* Recursively read the tree and store it in the array. */ 1594 idx = read_tree_node(fd, bp, ip, len, 0, prefixtree, prefixcnt); 1595 if (idx < 0) 1596 return idx; 1597 } 1598 return 0; 1599 } 1600 1601 /* 1602 * Read one row of siblings from the spell file and store it in the byte array 1603 * "byts" and index array "idxs". Recursively read the children. 1604 * 1605 * NOTE: The code here must match put_node()! 1606 * 1607 * Returns the index (>= 0) following the siblings. 1608 * Returns SP_TRUNCERROR if the file is shorter than expected. 1609 * Returns SP_FORMERROR if there is a format error. 1610 */ 1611 static idx_T 1612 read_tree_node( 1613 FILE *fd, 1614 char_u *byts, 1615 idx_T *idxs, 1616 int maxidx, /* size of arrays */ 1617 idx_T startidx, /* current index in "byts" and "idxs" */ 1618 int prefixtree, /* TRUE for reading PREFIXTREE */ 1619 int maxprefcondnr) /* maximum for <prefcondnr> */ 1620 { 1621 int len; 1622 int i; 1623 int n; 1624 idx_T idx = startidx; 1625 int c; 1626 int c2; 1627 #define SHARED_MASK 0x8000000 1628 1629 len = getc(fd); /* <siblingcount> */ 1630 if (len <= 0) 1631 return SP_TRUNCERROR; 1632 1633 if (startidx + len >= maxidx) 1634 return SP_FORMERROR; 1635 byts[idx++] = len; 1636 1637 /* Read the byte values, flag/region bytes and shared indexes. */ 1638 for (i = 1; i <= len; ++i) 1639 { 1640 c = getc(fd); /* <byte> */ 1641 if (c < 0) 1642 return SP_TRUNCERROR; 1643 if (c <= BY_SPECIAL) 1644 { 1645 if (c == BY_NOFLAGS && !prefixtree) 1646 { 1647 /* No flags, all regions. */ 1648 idxs[idx] = 0; 1649 c = 0; 1650 } 1651 else if (c != BY_INDEX) 1652 { 1653 if (prefixtree) 1654 { 1655 /* Read the optional pflags byte, the prefix ID and the 1656 * condition nr. In idxs[] store the prefix ID in the low 1657 * byte, the condition index shifted up 8 bits, the flags 1658 * shifted up 24 bits. */ 1659 if (c == BY_FLAGS) 1660 c = getc(fd) << 24; /* <pflags> */ 1661 else 1662 c = 0; 1663 1664 c |= getc(fd); /* <affixID> */ 1665 1666 n = get2c(fd); /* <prefcondnr> */ 1667 if (n >= maxprefcondnr) 1668 return SP_FORMERROR; 1669 c |= (n << 8); 1670 } 1671 else /* c must be BY_FLAGS or BY_FLAGS2 */ 1672 { 1673 /* Read flags and optional region and prefix ID. In 1674 * idxs[] the flags go in the low two bytes, region above 1675 * that and prefix ID above the region. */ 1676 c2 = c; 1677 c = getc(fd); /* <flags> */ 1678 if (c2 == BY_FLAGS2) 1679 c = (getc(fd) << 8) + c; /* <flags2> */ 1680 if (c & WF_REGION) 1681 c = (getc(fd) << 16) + c; /* <region> */ 1682 if (c & WF_AFX) 1683 c = (getc(fd) << 24) + c; /* <affixID> */ 1684 } 1685 1686 idxs[idx] = c; 1687 c = 0; 1688 } 1689 else /* c == BY_INDEX */ 1690 { 1691 /* <nodeidx> */ 1692 n = get3c(fd); 1693 if (n < 0 || n >= maxidx) 1694 return SP_FORMERROR; 1695 idxs[idx] = n + SHARED_MASK; 1696 c = getc(fd); /* <xbyte> */ 1697 } 1698 } 1699 byts[idx++] = c; 1700 } 1701 1702 /* Recursively read the children for non-shared siblings. 1703 * Skip the end-of-word ones (zero byte value) and the shared ones (and 1704 * remove SHARED_MASK) */ 1705 for (i = 1; i <= len; ++i) 1706 if (byts[startidx + i] != 0) 1707 { 1708 if (idxs[startidx + i] & SHARED_MASK) 1709 idxs[startidx + i] &= ~SHARED_MASK; 1710 else 1711 { 1712 idxs[startidx + i] = idx; 1713 idx = read_tree_node(fd, byts, idxs, maxidx, idx, 1714 prefixtree, maxprefcondnr); 1715 if (idx < 0) 1716 break; 1717 } 1718 } 1719 1720 return idx; 1721 } 1722 1723 /* 1724 * Reload the spell file "fname" if it's loaded. 1725 */ 1726 static void 1727 spell_reload_one( 1728 char_u *fname, 1729 int added_word) /* invoked through "zg" */ 1730 { 1731 slang_T *slang; 1732 int didit = FALSE; 1733 1734 for (slang = first_lang; slang != NULL; slang = slang->sl_next) 1735 { 1736 if (fullpathcmp(fname, slang->sl_fname, FALSE, TRUE) == FPC_SAME) 1737 { 1738 slang_clear(slang); 1739 if (spell_load_file(fname, NULL, slang, FALSE) == NULL) 1740 /* reloading failed, clear the language */ 1741 slang_clear(slang); 1742 redraw_all_later(SOME_VALID); 1743 didit = TRUE; 1744 } 1745 } 1746 1747 /* When "zg" was used and the file wasn't loaded yet, should redo 1748 * 'spelllang' to load it now. */ 1749 if (added_word && !didit) 1750 did_set_spelllang(curwin); 1751 } 1752 1753 1754 /* 1755 * Functions for ":mkspell". 1756 */ 1757 1758 #define MAXLINELEN 500 /* Maximum length in bytes of a line in a .aff 1759 and .dic file. */ 1760 /* 1761 * Main structure to store the contents of a ".aff" file. 1762 */ 1763 typedef struct afffile_S 1764 { 1765 char_u *af_enc; /* "SET", normalized, alloc'ed string or NULL */ 1766 int af_flagtype; /* AFT_CHAR, AFT_LONG, AFT_NUM or AFT_CAPLONG */ 1767 unsigned af_rare; /* RARE ID for rare word */ 1768 unsigned af_keepcase; /* KEEPCASE ID for keep-case word */ 1769 unsigned af_bad; /* BAD ID for banned word */ 1770 unsigned af_needaffix; /* NEEDAFFIX ID */ 1771 unsigned af_circumfix; /* CIRCUMFIX ID */ 1772 unsigned af_needcomp; /* NEEDCOMPOUND ID */ 1773 unsigned af_comproot; /* COMPOUNDROOT ID */ 1774 unsigned af_compforbid; /* COMPOUNDFORBIDFLAG ID */ 1775 unsigned af_comppermit; /* COMPOUNDPERMITFLAG ID */ 1776 unsigned af_nosuggest; /* NOSUGGEST ID */ 1777 int af_pfxpostpone; /* postpone prefixes without chop string and 1778 without flags */ 1779 int af_ignoreextra; /* IGNOREEXTRA present */ 1780 hashtab_T af_pref; /* hashtable for prefixes, affheader_T */ 1781 hashtab_T af_suff; /* hashtable for suffixes, affheader_T */ 1782 hashtab_T af_comp; /* hashtable for compound flags, compitem_T */ 1783 } afffile_T; 1784 1785 #define AFT_CHAR 0 /* flags are one character */ 1786 #define AFT_LONG 1 /* flags are two characters */ 1787 #define AFT_CAPLONG 2 /* flags are one or two characters */ 1788 #define AFT_NUM 3 /* flags are numbers, comma separated */ 1789 1790 typedef struct affentry_S affentry_T; 1791 /* Affix entry from ".aff" file. Used for prefixes and suffixes. */ 1792 struct affentry_S 1793 { 1794 affentry_T *ae_next; /* next affix with same name/number */ 1795 char_u *ae_chop; /* text to chop off basic word (can be NULL) */ 1796 char_u *ae_add; /* text to add to basic word (can be NULL) */ 1797 char_u *ae_flags; /* flags on the affix (can be NULL) */ 1798 char_u *ae_cond; /* condition (NULL for ".") */ 1799 regprog_T *ae_prog; /* regexp program for ae_cond or NULL */ 1800 char ae_compforbid; /* COMPOUNDFORBIDFLAG found */ 1801 char ae_comppermit; /* COMPOUNDPERMITFLAG found */ 1802 }; 1803 1804 #define AH_KEY_LEN 17 /* 2 x 8 bytes + NUL */ 1805 1806 /* Affix header from ".aff" file. Used for af_pref and af_suff. */ 1807 typedef struct affheader_S 1808 { 1809 char_u ah_key[AH_KEY_LEN]; /* key for hashtab == name of affix */ 1810 unsigned ah_flag; /* affix name as number, uses "af_flagtype" */ 1811 int ah_newID; /* prefix ID after renumbering; 0 if not used */ 1812 int ah_combine; /* suffix may combine with prefix */ 1813 int ah_follows; /* another affix block should be following */ 1814 affentry_T *ah_first; /* first affix entry */ 1815 } affheader_T; 1816 1817 #define HI2AH(hi) ((affheader_T *)(hi)->hi_key) 1818 1819 /* Flag used in compound items. */ 1820 typedef struct compitem_S 1821 { 1822 char_u ci_key[AH_KEY_LEN]; /* key for hashtab == name of compound */ 1823 unsigned ci_flag; /* affix name as number, uses "af_flagtype" */ 1824 int ci_newID; /* affix ID after renumbering. */ 1825 } compitem_T; 1826 1827 #define HI2CI(hi) ((compitem_T *)(hi)->hi_key) 1828 1829 /* 1830 * Structure that is used to store the items in the word tree. This avoids 1831 * the need to keep track of each allocated thing, everything is freed all at 1832 * once after ":mkspell" is done. 1833 * Note: "sb_next" must be just before "sb_data" to make sure the alignment of 1834 * "sb_data" is correct for systems where pointers must be aligned on 1835 * pointer-size boundaries and sizeof(pointer) > sizeof(int) (e.g., Sparc). 1836 */ 1837 #define SBLOCKSIZE 16000 /* size of sb_data */ 1838 typedef struct sblock_S sblock_T; 1839 struct sblock_S 1840 { 1841 int sb_used; /* nr of bytes already in use */ 1842 sblock_T *sb_next; /* next block in list */ 1843 char_u sb_data[1]; /* data, actually longer */ 1844 }; 1845 1846 /* 1847 * A node in the tree. 1848 */ 1849 typedef struct wordnode_S wordnode_T; 1850 struct wordnode_S 1851 { 1852 union /* shared to save space */ 1853 { 1854 char_u hashkey[6]; /* the hash key, only used while compressing */ 1855 int index; /* index in written nodes (valid after first 1856 round) */ 1857 } wn_u1; 1858 union /* shared to save space */ 1859 { 1860 wordnode_T *next; /* next node with same hash key */ 1861 wordnode_T *wnode; /* parent node that will write this node */ 1862 } wn_u2; 1863 wordnode_T *wn_child; /* child (next byte in word) */ 1864 wordnode_T *wn_sibling; /* next sibling (alternate byte in word, 1865 always sorted) */ 1866 int wn_refs; /* Nr. of references to this node. Only 1867 relevant for first node in a list of 1868 siblings, in following siblings it is 1869 always one. */ 1870 char_u wn_byte; /* Byte for this node. NUL for word end */ 1871 1872 /* Info for when "wn_byte" is NUL. 1873 * In PREFIXTREE "wn_region" is used for the prefcondnr. 1874 * In the soundfolded word tree "wn_flags" has the MSW of the wordnr and 1875 * "wn_region" the LSW of the wordnr. */ 1876 char_u wn_affixID; /* supported/required prefix ID or 0 */ 1877 short_u wn_flags; /* WF_ flags */ 1878 short wn_region; /* region mask */ 1879 1880 #ifdef SPELL_PRINTTREE 1881 int wn_nr; /* sequence nr for printing */ 1882 #endif 1883 }; 1884 1885 #define WN_MASK 0xffff /* mask relevant bits of "wn_flags" */ 1886 1887 #define HI2WN(hi) (wordnode_T *)((hi)->hi_key) 1888 1889 /* 1890 * Info used while reading the spell files. 1891 */ 1892 typedef struct spellinfo_S 1893 { 1894 wordnode_T *si_foldroot; /* tree with case-folded words */ 1895 long si_foldwcount; /* nr of words in si_foldroot */ 1896 1897 wordnode_T *si_keeproot; /* tree with keep-case words */ 1898 long si_keepwcount; /* nr of words in si_keeproot */ 1899 1900 wordnode_T *si_prefroot; /* tree with postponed prefixes */ 1901 1902 long si_sugtree; /* creating the soundfolding trie */ 1903 1904 sblock_T *si_blocks; /* memory blocks used */ 1905 long si_blocks_cnt; /* memory blocks allocated */ 1906 int si_did_emsg; /* TRUE when ran out of memory */ 1907 1908 long si_compress_cnt; /* words to add before lowering 1909 compression limit */ 1910 wordnode_T *si_first_free; /* List of nodes that have been freed during 1911 compression, linked by "wn_child" field. */ 1912 long si_free_count; /* number of nodes in si_first_free */ 1913 #ifdef SPELL_PRINTTREE 1914 int si_wordnode_nr; /* sequence nr for nodes */ 1915 #endif 1916 buf_T *si_spellbuf; /* buffer used to store soundfold word table */ 1917 1918 int si_ascii; /* handling only ASCII words */ 1919 int si_add; /* addition file */ 1920 int si_clear_chartab; /* when TRUE clear char tables */ 1921 int si_region; /* region mask */ 1922 vimconv_T si_conv; /* for conversion to 'encoding' */ 1923 int si_memtot; /* runtime memory used */ 1924 int si_verbose; /* verbose messages */ 1925 int si_msg_count; /* number of words added since last message */ 1926 char_u *si_info; /* info text chars or NULL */ 1927 int si_region_count; /* number of regions supported (1 when there 1928 are no regions) */ 1929 char_u si_region_name[MAXREGIONS * 2 + 1]; 1930 /* region names; used only if 1931 * si_region_count > 1) */ 1932 1933 garray_T si_rep; /* list of fromto_T entries from REP lines */ 1934 garray_T si_repsal; /* list of fromto_T entries from REPSAL lines */ 1935 garray_T si_sal; /* list of fromto_T entries from SAL lines */ 1936 char_u *si_sofofr; /* SOFOFROM text */ 1937 char_u *si_sofoto; /* SOFOTO text */ 1938 int si_nosugfile; /* NOSUGFILE item found */ 1939 int si_nosplitsugs; /* NOSPLITSUGS item found */ 1940 int si_nocompoundsugs; /* NOCOMPOUNDSUGS item found */ 1941 int si_followup; /* soundsalike: ? */ 1942 int si_collapse; /* soundsalike: ? */ 1943 hashtab_T si_commonwords; /* hashtable for common words */ 1944 time_t si_sugtime; /* timestamp for .sug file */ 1945 int si_rem_accents; /* soundsalike: remove accents */ 1946 garray_T si_map; /* MAP info concatenated */ 1947 char_u *si_midword; /* MIDWORD chars or NULL */ 1948 int si_compmax; /* max nr of words for compounding */ 1949 int si_compminlen; /* minimal length for compounding */ 1950 int si_compsylmax; /* max nr of syllables for compounding */ 1951 int si_compoptions; /* COMP_ flags */ 1952 garray_T si_comppat; /* CHECKCOMPOUNDPATTERN items, each stored as 1953 a string */ 1954 char_u *si_compflags; /* flags used for compounding */ 1955 char_u si_nobreak; /* NOBREAK */ 1956 char_u *si_syllable; /* syllable string */ 1957 garray_T si_prefcond; /* table with conditions for postponed 1958 * prefixes, each stored as a string */ 1959 int si_newprefID; /* current value for ah_newID */ 1960 int si_newcompID; /* current value for compound ID */ 1961 } spellinfo_T; 1962 1963 static int is_aff_rule(char_u **items, int itemcnt, char *rulename, int mincount); 1964 static void aff_process_flags(afffile_T *affile, affentry_T *entry); 1965 static int spell_info_item(char_u *s); 1966 static unsigned affitem2flag(int flagtype, char_u *item, char_u *fname, int lnum); 1967 static unsigned get_affitem(int flagtype, char_u **pp); 1968 static void process_compflags(spellinfo_T *spin, afffile_T *aff, char_u *compflags); 1969 static void check_renumber(spellinfo_T *spin); 1970 static void aff_check_number(int spinval, int affval, char *name); 1971 static void aff_check_string(char_u *spinval, char_u *affval, char *name); 1972 static int str_equal(char_u *s1, char_u *s2); 1973 static void add_fromto(spellinfo_T *spin, garray_T *gap, char_u *from, char_u *to); 1974 static int sal_to_bool(char_u *s); 1975 static int get_affix_flags(afffile_T *affile, char_u *afflist); 1976 static int get_pfxlist(afffile_T *affile, char_u *afflist, char_u *store_afflist); 1977 static void get_compflags(afffile_T *affile, char_u *afflist, char_u *store_afflist); 1978 static int store_aff_word(spellinfo_T *spin, char_u *word, char_u *afflist, afffile_T *affile, hashtab_T *ht, hashtab_T *xht, int condit, int flags, char_u *pfxlist, int pfxlen); 1979 static void *getroom(spellinfo_T *spin, size_t len, int align); 1980 static char_u *getroom_save(spellinfo_T *spin, char_u *s); 1981 static int store_word(spellinfo_T *spin, char_u *word, int flags, int region, char_u *pfxlist, int need_affix); 1982 static int tree_add_word(spellinfo_T *spin, char_u *word, wordnode_T *tree, int flags, int region, int affixID); 1983 static wordnode_T *get_wordnode(spellinfo_T *spin); 1984 static void free_wordnode(spellinfo_T *spin, wordnode_T *n); 1985 static void wordtree_compress(spellinfo_T *spin, wordnode_T *root); 1986 static int node_compress(spellinfo_T *spin, wordnode_T *node, hashtab_T *ht, int *tot); 1987 static int node_equal(wordnode_T *n1, wordnode_T *n2); 1988 static void clear_node(wordnode_T *node); 1989 static int put_node(FILE *fd, wordnode_T *node, int idx, int regionmask, int prefixtree); 1990 static int sug_filltree(spellinfo_T *spin, slang_T *slang); 1991 static int sug_maketable(spellinfo_T *spin); 1992 static int sug_filltable(spellinfo_T *spin, wordnode_T *node, int startwordnr, garray_T *gap); 1993 static int offset2bytes(int nr, char_u *buf); 1994 static void sug_write(spellinfo_T *spin, char_u *fname); 1995 static void spell_message(spellinfo_T *spin, char_u *str); 1996 static void init_spellfile(void); 1997 1998 /* In the postponed prefixes tree wn_flags is used to store the WFP_ flags, 1999 * but it must be negative to indicate the prefix tree to tree_add_word(). 2000 * Use a negative number with the lower 8 bits zero. */ 2001 #define PFX_FLAGS -256 2002 2003 /* flags for "condit" argument of store_aff_word() */ 2004 #define CONDIT_COMB 1 /* affix must combine */ 2005 #define CONDIT_CFIX 2 /* affix must have CIRCUMFIX flag */ 2006 #define CONDIT_SUF 4 /* add a suffix for matching flags */ 2007 #define CONDIT_AFF 8 /* word already has an affix */ 2008 2009 /* 2010 * Tunable parameters for when the tree is compressed. See 'mkspellmem'. 2011 */ 2012 static long compress_start = 30000; /* memory / SBLOCKSIZE */ 2013 static long compress_inc = 100; /* memory / SBLOCKSIZE */ 2014 static long compress_added = 500000; /* word count */ 2015 2016 /* 2017 * Check the 'mkspellmem' option. Return FAIL if it's wrong. 2018 * Sets "sps_flags". 2019 */ 2020 int 2021 spell_check_msm(void) 2022 { 2023 char_u *p = p_msm; 2024 long start = 0; 2025 long incr = 0; 2026 long added = 0; 2027 2028 if (!VIM_ISDIGIT(*p)) 2029 return FAIL; 2030 /* block count = (value * 1024) / SBLOCKSIZE (but avoid overflow)*/ 2031 start = (getdigits(&p) * 10) / (SBLOCKSIZE / 102); 2032 if (*p != ',') 2033 return FAIL; 2034 ++p; 2035 if (!VIM_ISDIGIT(*p)) 2036 return FAIL; 2037 incr = (getdigits(&p) * 102) / (SBLOCKSIZE / 10); 2038 if (*p != ',') 2039 return FAIL; 2040 ++p; 2041 if (!VIM_ISDIGIT(*p)) 2042 return FAIL; 2043 added = getdigits(&p) * 1024; 2044 if (*p != NUL) 2045 return FAIL; 2046 2047 if (start == 0 || incr == 0 || added == 0 || incr > start) 2048 return FAIL; 2049 2050 compress_start = start; 2051 compress_inc = incr; 2052 compress_added = added; 2053 return OK; 2054 } 2055 2056 #ifdef SPELL_PRINTTREE 2057 /* 2058 * For debugging the tree code: print the current tree in a (more or less) 2059 * readable format, so that we can see what happens when adding a word and/or 2060 * compressing the tree. 2061 * Based on code from Olaf Seibert. 2062 */ 2063 #define PRINTLINESIZE 1000 2064 #define PRINTWIDTH 6 2065 2066 #define PRINTSOME(l, depth, fmt, a1, a2) vim_snprintf(l + depth * PRINTWIDTH, \ 2067 PRINTLINESIZE - PRINTWIDTH * depth, fmt, a1, a2) 2068 2069 static char line1[PRINTLINESIZE]; 2070 static char line2[PRINTLINESIZE]; 2071 static char line3[PRINTLINESIZE]; 2072 2073 static void 2074 spell_clear_flags(wordnode_T *node) 2075 { 2076 wordnode_T *np; 2077 2078 for (np = node; np != NULL; np = np->wn_sibling) 2079 { 2080 np->wn_u1.index = FALSE; 2081 spell_clear_flags(np->wn_child); 2082 } 2083 } 2084 2085 static void 2086 spell_print_node(wordnode_T *node, int depth) 2087 { 2088 if (node->wn_u1.index) 2089 { 2090 /* Done this node before, print the reference. */ 2091 PRINTSOME(line1, depth, "(%d)", node->wn_nr, 0); 2092 PRINTSOME(line2, depth, " ", 0, 0); 2093 PRINTSOME(line3, depth, " ", 0, 0); 2094 msg(line1); 2095 msg(line2); 2096 msg(line3); 2097 } 2098 else 2099 { 2100 node->wn_u1.index = TRUE; 2101 2102 if (node->wn_byte != NUL) 2103 { 2104 if (node->wn_child != NULL) 2105 PRINTSOME(line1, depth, " %c -> ", node->wn_byte, 0); 2106 else 2107 /* Cannot happen? */ 2108 PRINTSOME(line1, depth, " %c ???", node->wn_byte, 0); 2109 } 2110 else 2111 PRINTSOME(line1, depth, " $ ", 0, 0); 2112 2113 PRINTSOME(line2, depth, "%d/%d ", node->wn_nr, node->wn_refs); 2114 2115 if (node->wn_sibling != NULL) 2116 PRINTSOME(line3, depth, " | ", 0, 0); 2117 else 2118 PRINTSOME(line3, depth, " ", 0, 0); 2119 2120 if (node->wn_byte == NUL) 2121 { 2122 msg(line1); 2123 msg(line2); 2124 msg(line3); 2125 } 2126 2127 /* do the children */ 2128 if (node->wn_byte != NUL && node->wn_child != NULL) 2129 spell_print_node(node->wn_child, depth + 1); 2130 2131 /* do the siblings */ 2132 if (node->wn_sibling != NULL) 2133 { 2134 /* get rid of all parent details except | */ 2135 STRCPY(line1, line3); 2136 STRCPY(line2, line3); 2137 spell_print_node(node->wn_sibling, depth); 2138 } 2139 } 2140 } 2141 2142 static void 2143 spell_print_tree(wordnode_T *root) 2144 { 2145 if (root != NULL) 2146 { 2147 /* Clear the "wn_u1.index" fields, used to remember what has been 2148 * done. */ 2149 spell_clear_flags(root); 2150 2151 /* Recursively print the tree. */ 2152 spell_print_node(root, 0); 2153 } 2154 } 2155 #endif /* SPELL_PRINTTREE */ 2156 2157 /* 2158 * Read the affix file "fname". 2159 * Returns an afffile_T, NULL for complete failure. 2160 */ 2161 static afffile_T * 2162 spell_read_aff(spellinfo_T *spin, char_u *fname) 2163 { 2164 FILE *fd; 2165 afffile_T *aff; 2166 char_u rline[MAXLINELEN]; 2167 char_u *line; 2168 char_u *pc = NULL; 2169 #define MAXITEMCNT 30 2170 char_u *(items[MAXITEMCNT]); 2171 int itemcnt; 2172 char_u *p; 2173 int lnum = 0; 2174 affheader_T *cur_aff = NULL; 2175 int did_postpone_prefix = FALSE; 2176 int aff_todo = 0; 2177 hashtab_T *tp; 2178 char_u *low = NULL; 2179 char_u *fol = NULL; 2180 char_u *upp = NULL; 2181 int do_rep; 2182 int do_repsal; 2183 int do_sal; 2184 int do_mapline; 2185 int found_map = FALSE; 2186 hashitem_T *hi; 2187 int l; 2188 int compminlen = 0; /* COMPOUNDMIN value */ 2189 int compsylmax = 0; /* COMPOUNDSYLMAX value */ 2190 int compoptions = 0; /* COMP_ flags */ 2191 int compmax = 0; /* COMPOUNDWORDMAX value */ 2192 char_u *compflags = NULL; /* COMPOUNDFLAG and COMPOUNDRULE 2193 concatenated */ 2194 char_u *midword = NULL; /* MIDWORD value */ 2195 char_u *syllable = NULL; /* SYLLABLE value */ 2196 char_u *sofofrom = NULL; /* SOFOFROM value */ 2197 char_u *sofoto = NULL; /* SOFOTO value */ 2198 2199 /* 2200 * Open the file. 2201 */ 2202 fd = mch_fopen((char *)fname, "r"); 2203 if (fd == NULL) 2204 { 2205 semsg(_(e_notopen), fname); 2206 return NULL; 2207 } 2208 2209 vim_snprintf((char *)IObuff, IOSIZE, _("Reading affix file %s..."), fname); 2210 spell_message(spin, IObuff); 2211 2212 /* Only do REP lines when not done in another .aff file already. */ 2213 do_rep = spin->si_rep.ga_len == 0; 2214 2215 /* Only do REPSAL lines when not done in another .aff file already. */ 2216 do_repsal = spin->si_repsal.ga_len == 0; 2217 2218 /* Only do SAL lines when not done in another .aff file already. */ 2219 do_sal = spin->si_sal.ga_len == 0; 2220 2221 /* Only do MAP lines when not done in another .aff file already. */ 2222 do_mapline = spin->si_map.ga_len == 0; 2223 2224 /* 2225 * Allocate and init the afffile_T structure. 2226 */ 2227 aff = (afffile_T *)getroom(spin, sizeof(afffile_T), TRUE); 2228 if (aff == NULL) 2229 { 2230 fclose(fd); 2231 return NULL; 2232 } 2233 hash_init(&aff->af_pref); 2234 hash_init(&aff->af_suff); 2235 hash_init(&aff->af_comp); 2236 2237 /* 2238 * Read all the lines in the file one by one. 2239 */ 2240 while (!vim_fgets(rline, MAXLINELEN, fd) && !got_int) 2241 { 2242 line_breakcheck(); 2243 ++lnum; 2244 2245 /* Skip comment lines. */ 2246 if (*rline == '#') 2247 continue; 2248 2249 /* Convert from "SET" to 'encoding' when needed. */ 2250 vim_free(pc); 2251 if (spin->si_conv.vc_type != CONV_NONE) 2252 { 2253 pc = string_convert(&spin->si_conv, rline, NULL); 2254 if (pc == NULL) 2255 { 2256 smsg(_("Conversion failure for word in %s line %d: %s"), 2257 fname, lnum, rline); 2258 continue; 2259 } 2260 line = pc; 2261 } 2262 else 2263 { 2264 pc = NULL; 2265 line = rline; 2266 } 2267 2268 /* Split the line up in white separated items. Put a NUL after each 2269 * item. */ 2270 itemcnt = 0; 2271 for (p = line; ; ) 2272 { 2273 while (*p != NUL && *p <= ' ') /* skip white space and CR/NL */ 2274 ++p; 2275 if (*p == NUL) 2276 break; 2277 if (itemcnt == MAXITEMCNT) /* too many items */ 2278 break; 2279 items[itemcnt++] = p; 2280 /* A few items have arbitrary text argument, don't split them. */ 2281 if (itemcnt == 2 && spell_info_item(items[0])) 2282 while (*p >= ' ' || *p == TAB) /* skip until CR/NL */ 2283 ++p; 2284 else 2285 while (*p > ' ') /* skip until white space or CR/NL */ 2286 ++p; 2287 if (*p == NUL) 2288 break; 2289 *p++ = NUL; 2290 } 2291 2292 /* Handle non-empty lines. */ 2293 if (itemcnt > 0) 2294 { 2295 if (is_aff_rule(items, itemcnt, "SET", 2) && aff->af_enc == NULL) 2296 { 2297 /* Setup for conversion from "ENC" to 'encoding'. */ 2298 aff->af_enc = enc_canonize(items[1]); 2299 if (aff->af_enc != NULL && !spin->si_ascii 2300 && convert_setup(&spin->si_conv, aff->af_enc, 2301 p_enc) == FAIL) 2302 smsg(_("Conversion in %s not supported: from %s to %s"), 2303 fname, aff->af_enc, p_enc); 2304 spin->si_conv.vc_fail = TRUE; 2305 } 2306 else if (is_aff_rule(items, itemcnt, "FLAG", 2) 2307 && aff->af_flagtype == AFT_CHAR) 2308 { 2309 if (STRCMP(items[1], "long") == 0) 2310 aff->af_flagtype = AFT_LONG; 2311 else if (STRCMP(items[1], "num") == 0) 2312 aff->af_flagtype = AFT_NUM; 2313 else if (STRCMP(items[1], "caplong") == 0) 2314 aff->af_flagtype = AFT_CAPLONG; 2315 else 2316 smsg(_("Invalid value for FLAG in %s line %d: %s"), 2317 fname, lnum, items[1]); 2318 if (aff->af_rare != 0 2319 || aff->af_keepcase != 0 2320 || aff->af_bad != 0 2321 || aff->af_needaffix != 0 2322 || aff->af_circumfix != 0 2323 || aff->af_needcomp != 0 2324 || aff->af_comproot != 0 2325 || aff->af_nosuggest != 0 2326 || compflags != NULL 2327 || aff->af_suff.ht_used > 0 2328 || aff->af_pref.ht_used > 0) 2329 smsg(_("FLAG after using flags in %s line %d: %s"), 2330 fname, lnum, items[1]); 2331 } 2332 else if (spell_info_item(items[0])) 2333 { 2334 p = (char_u *)getroom(spin, 2335 (spin->si_info == NULL ? 0 : STRLEN(spin->si_info)) 2336 + STRLEN(items[0]) 2337 + STRLEN(items[1]) + 3, FALSE); 2338 if (p != NULL) 2339 { 2340 if (spin->si_info != NULL) 2341 { 2342 STRCPY(p, spin->si_info); 2343 STRCAT(p, "\n"); 2344 } 2345 STRCAT(p, items[0]); 2346 STRCAT(p, " "); 2347 STRCAT(p, items[1]); 2348 spin->si_info = p; 2349 } 2350 } 2351 else if (is_aff_rule(items, itemcnt, "MIDWORD", 2) 2352 && midword == NULL) 2353 { 2354 midword = getroom_save(spin, items[1]); 2355 } 2356 else if (is_aff_rule(items, itemcnt, "TRY", 2)) 2357 { 2358 /* ignored, we look in the tree for what chars may appear */ 2359 } 2360 /* TODO: remove "RAR" later */ 2361 else if ((is_aff_rule(items, itemcnt, "RAR", 2) 2362 || is_aff_rule(items, itemcnt, "RARE", 2)) 2363 && aff->af_rare == 0) 2364 { 2365 aff->af_rare = affitem2flag(aff->af_flagtype, items[1], 2366 fname, lnum); 2367 } 2368 /* TODO: remove "KEP" later */ 2369 else if ((is_aff_rule(items, itemcnt, "KEP", 2) 2370 || is_aff_rule(items, itemcnt, "KEEPCASE", 2)) 2371 && aff->af_keepcase == 0) 2372 { 2373 aff->af_keepcase = affitem2flag(aff->af_flagtype, items[1], 2374 fname, lnum); 2375 } 2376 else if ((is_aff_rule(items, itemcnt, "BAD", 2) 2377 || is_aff_rule(items, itemcnt, "FORBIDDENWORD", 2)) 2378 && aff->af_bad == 0) 2379 { 2380 aff->af_bad = affitem2flag(aff->af_flagtype, items[1], 2381 fname, lnum); 2382 } 2383 else if (is_aff_rule(items, itemcnt, "NEEDAFFIX", 2) 2384 && aff->af_needaffix == 0) 2385 { 2386 aff->af_needaffix = affitem2flag(aff->af_flagtype, items[1], 2387 fname, lnum); 2388 } 2389 else if (is_aff_rule(items, itemcnt, "CIRCUMFIX", 2) 2390 && aff->af_circumfix == 0) 2391 { 2392 aff->af_circumfix = affitem2flag(aff->af_flagtype, items[1], 2393 fname, lnum); 2394 } 2395 else if (is_aff_rule(items, itemcnt, "NOSUGGEST", 2) 2396 && aff->af_nosuggest == 0) 2397 { 2398 aff->af_nosuggest = affitem2flag(aff->af_flagtype, items[1], 2399 fname, lnum); 2400 } 2401 else if ((is_aff_rule(items, itemcnt, "NEEDCOMPOUND", 2) 2402 || is_aff_rule(items, itemcnt, "ONLYINCOMPOUND", 2)) 2403 && aff->af_needcomp == 0) 2404 { 2405 aff->af_needcomp = affitem2flag(aff->af_flagtype, items[1], 2406 fname, lnum); 2407 } 2408 else if (is_aff_rule(items, itemcnt, "COMPOUNDROOT", 2) 2409 && aff->af_comproot == 0) 2410 { 2411 aff->af_comproot = affitem2flag(aff->af_flagtype, items[1], 2412 fname, lnum); 2413 } 2414 else if (is_aff_rule(items, itemcnt, "COMPOUNDFORBIDFLAG", 2) 2415 && aff->af_compforbid == 0) 2416 { 2417 aff->af_compforbid = affitem2flag(aff->af_flagtype, items[1], 2418 fname, lnum); 2419 if (aff->af_pref.ht_used > 0) 2420 smsg(_("Defining COMPOUNDFORBIDFLAG after PFX item may give wrong results in %s line %d"), 2421 fname, lnum); 2422 } 2423 else if (is_aff_rule(items, itemcnt, "COMPOUNDPERMITFLAG", 2) 2424 && aff->af_comppermit == 0) 2425 { 2426 aff->af_comppermit = affitem2flag(aff->af_flagtype, items[1], 2427 fname, lnum); 2428 if (aff->af_pref.ht_used > 0) 2429 smsg(_("Defining COMPOUNDPERMITFLAG after PFX item may give wrong results in %s line %d"), 2430 fname, lnum); 2431 } 2432 else if (is_aff_rule(items, itemcnt, "COMPOUNDFLAG", 2) 2433 && compflags == NULL) 2434 { 2435 /* Turn flag "c" into COMPOUNDRULE compatible string "c+", 2436 * "Na" into "Na+", "1234" into "1234+". */ 2437 p = getroom(spin, STRLEN(items[1]) + 2, FALSE); 2438 if (p != NULL) 2439 { 2440 STRCPY(p, items[1]); 2441 STRCAT(p, "+"); 2442 compflags = p; 2443 } 2444 } 2445 else if (is_aff_rule(items, itemcnt, "COMPOUNDRULES", 2)) 2446 { 2447 /* We don't use the count, but do check that it's a number and 2448 * not COMPOUNDRULE mistyped. */ 2449 if (atoi((char *)items[1]) == 0) 2450 smsg(_("Wrong COMPOUNDRULES value in %s line %d: %s"), 2451 fname, lnum, items[1]); 2452 } 2453 else if (is_aff_rule(items, itemcnt, "COMPOUNDRULE", 2)) 2454 { 2455 /* Don't use the first rule if it is a number. */ 2456 if (compflags != NULL || *skipdigits(items[1]) != NUL) 2457 { 2458 /* Concatenate this string to previously defined ones, 2459 * using a slash to separate them. */ 2460 l = (int)STRLEN(items[1]) + 1; 2461 if (compflags != NULL) 2462 l += (int)STRLEN(compflags) + 1; 2463 p = getroom(spin, l, FALSE); 2464 if (p != NULL) 2465 { 2466 if (compflags != NULL) 2467 { 2468 STRCPY(p, compflags); 2469 STRCAT(p, "/"); 2470 } 2471 STRCAT(p, items[1]); 2472 compflags = p; 2473 } 2474 } 2475 } 2476 else if (is_aff_rule(items, itemcnt, "COMPOUNDWORDMAX", 2) 2477 && compmax == 0) 2478 { 2479 compmax = atoi((char *)items[1]); 2480 if (compmax == 0) 2481 smsg(_("Wrong COMPOUNDWORDMAX value in %s line %d: %s"), 2482 fname, lnum, items[1]); 2483 } 2484 else if (is_aff_rule(items, itemcnt, "COMPOUNDMIN", 2) 2485 && compminlen == 0) 2486 { 2487 compminlen = atoi((char *)items[1]); 2488 if (compminlen == 0) 2489 smsg(_("Wrong COMPOUNDMIN value in %s line %d: %s"), 2490 fname, lnum, items[1]); 2491 } 2492 else if (is_aff_rule(items, itemcnt, "COMPOUNDSYLMAX", 2) 2493 && compsylmax == 0) 2494 { 2495 compsylmax = atoi((char *)items[1]); 2496 if (compsylmax == 0) 2497 smsg(_("Wrong COMPOUNDSYLMAX value in %s line %d: %s"), 2498 fname, lnum, items[1]); 2499 } 2500 else if (is_aff_rule(items, itemcnt, "CHECKCOMPOUNDDUP", 1)) 2501 { 2502 compoptions |= COMP_CHECKDUP; 2503 } 2504 else if (is_aff_rule(items, itemcnt, "CHECKCOMPOUNDREP", 1)) 2505 { 2506 compoptions |= COMP_CHECKREP; 2507 } 2508 else if (is_aff_rule(items, itemcnt, "CHECKCOMPOUNDCASE", 1)) 2509 { 2510 compoptions |= COMP_CHECKCASE; 2511 } 2512 else if (is_aff_rule(items, itemcnt, "CHECKCOMPOUNDTRIPLE", 1)) 2513 { 2514 compoptions |= COMP_CHECKTRIPLE; 2515 } 2516 else if (is_aff_rule(items, itemcnt, "CHECKCOMPOUNDPATTERN", 2)) 2517 { 2518 if (atoi((char *)items[1]) == 0) 2519 smsg(_("Wrong CHECKCOMPOUNDPATTERN value in %s line %d: %s"), 2520 fname, lnum, items[1]); 2521 } 2522 else if (is_aff_rule(items, itemcnt, "CHECKCOMPOUNDPATTERN", 3)) 2523 { 2524 garray_T *gap = &spin->si_comppat; 2525 int i; 2526 2527 /* Only add the couple if it isn't already there. */ 2528 for (i = 0; i < gap->ga_len - 1; i += 2) 2529 if (STRCMP(((char_u **)(gap->ga_data))[i], items[1]) == 0 2530 && STRCMP(((char_u **)(gap->ga_data))[i + 1], 2531 items[2]) == 0) 2532 break; 2533 if (i >= gap->ga_len && ga_grow(gap, 2) == OK) 2534 { 2535 ((char_u **)(gap->ga_data))[gap->ga_len++] 2536 = getroom_save(spin, items[1]); 2537 ((char_u **)(gap->ga_data))[gap->ga_len++] 2538 = getroom_save(spin, items[2]); 2539 } 2540 } 2541 else if (is_aff_rule(items, itemcnt, "SYLLABLE", 2) 2542 && syllable == NULL) 2543 { 2544 syllable = getroom_save(spin, items[1]); 2545 } 2546 else if (is_aff_rule(items, itemcnt, "NOBREAK", 1)) 2547 { 2548 spin->si_nobreak = TRUE; 2549 } 2550 else if (is_aff_rule(items, itemcnt, "NOSPLITSUGS", 1)) 2551 { 2552 spin->si_nosplitsugs = TRUE; 2553 } 2554 else if (is_aff_rule(items, itemcnt, "NOCOMPOUNDSUGS", 1)) 2555 { 2556 spin->si_nocompoundsugs = TRUE; 2557 } 2558 else if (is_aff_rule(items, itemcnt, "NOSUGFILE", 1)) 2559 { 2560 spin->si_nosugfile = TRUE; 2561 } 2562 else if (is_aff_rule(items, itemcnt, "PFXPOSTPONE", 1)) 2563 { 2564 aff->af_pfxpostpone = TRUE; 2565 } 2566 else if (is_aff_rule(items, itemcnt, "IGNOREEXTRA", 1)) 2567 { 2568 aff->af_ignoreextra = TRUE; 2569 } 2570 else if ((STRCMP(items[0], "PFX") == 0 2571 || STRCMP(items[0], "SFX") == 0) 2572 && aff_todo == 0 2573 && itemcnt >= 4) 2574 { 2575 int lasti = 4; 2576 char_u key[AH_KEY_LEN]; 2577 2578 if (*items[0] == 'P') 2579 tp = &aff->af_pref; 2580 else 2581 tp = &aff->af_suff; 2582 2583 /* Myspell allows the same affix name to be used multiple 2584 * times. The affix files that do this have an undocumented 2585 * "S" flag on all but the last block, thus we check for that 2586 * and store it in ah_follows. */ 2587 vim_strncpy(key, items[1], AH_KEY_LEN - 1); 2588 hi = hash_find(tp, key); 2589 if (!HASHITEM_EMPTY(hi)) 2590 { 2591 cur_aff = HI2AH(hi); 2592 if (cur_aff->ah_combine != (*items[2] == 'Y')) 2593 smsg(_("Different combining flag in continued affix block in %s line %d: %s"), 2594 fname, lnum, items[1]); 2595 if (!cur_aff->ah_follows) 2596 smsg(_("Duplicate affix in %s line %d: %s"), 2597 fname, lnum, items[1]); 2598 } 2599 else 2600 { 2601 /* New affix letter. */ 2602 cur_aff = (affheader_T *)getroom(spin, 2603 sizeof(affheader_T), TRUE); 2604 if (cur_aff == NULL) 2605 break; 2606 cur_aff->ah_flag = affitem2flag(aff->af_flagtype, items[1], 2607 fname, lnum); 2608 if (cur_aff->ah_flag == 0 || STRLEN(items[1]) >= AH_KEY_LEN) 2609 break; 2610 if (cur_aff->ah_flag == aff->af_bad 2611 || cur_aff->ah_flag == aff->af_rare 2612 || cur_aff->ah_flag == aff->af_keepcase 2613 || cur_aff->ah_flag == aff->af_needaffix 2614 || cur_aff->ah_flag == aff->af_circumfix 2615 || cur_aff->ah_flag == aff->af_nosuggest 2616 || cur_aff->ah_flag == aff->af_needcomp 2617 || cur_aff->ah_flag == aff->af_comproot) 2618 smsg(_("Affix also used for BAD/RARE/KEEPCASE/NEEDAFFIX/NEEDCOMPOUND/NOSUGGEST in %s line %d: %s"), 2619 fname, lnum, items[1]); 2620 STRCPY(cur_aff->ah_key, items[1]); 2621 hash_add(tp, cur_aff->ah_key); 2622 2623 cur_aff->ah_combine = (*items[2] == 'Y'); 2624 } 2625 2626 /* Check for the "S" flag, which apparently means that another 2627 * block with the same affix name is following. */ 2628 if (itemcnt > lasti && STRCMP(items[lasti], "S") == 0) 2629 { 2630 ++lasti; 2631 cur_aff->ah_follows = TRUE; 2632 } 2633 else 2634 cur_aff->ah_follows = FALSE; 2635 2636 /* Myspell allows extra text after the item, but that might 2637 * mean mistakes go unnoticed. Require a comment-starter. */ 2638 if (itemcnt > lasti && *items[lasti] != '#') 2639 smsg(_(e_afftrailing), fname, lnum, items[lasti]); 2640 2641 if (STRCMP(items[2], "Y") != 0 && STRCMP(items[2], "N") != 0) 2642 smsg(_("Expected Y or N in %s line %d: %s"), 2643 fname, lnum, items[2]); 2644 2645 if (*items[0] == 'P' && aff->af_pfxpostpone) 2646 { 2647 if (cur_aff->ah_newID == 0) 2648 { 2649 /* Use a new number in the .spl file later, to be able 2650 * to handle multiple .aff files. */ 2651 check_renumber(spin); 2652 cur_aff->ah_newID = ++spin->si_newprefID; 2653 2654 /* We only really use ah_newID if the prefix is 2655 * postponed. We know that only after handling all 2656 * the items. */ 2657 did_postpone_prefix = FALSE; 2658 } 2659 else 2660 /* Did use the ID in a previous block. */ 2661 did_postpone_prefix = TRUE; 2662 } 2663 2664 aff_todo = atoi((char *)items[3]); 2665 } 2666 else if ((STRCMP(items[0], "PFX") == 0 2667 || STRCMP(items[0], "SFX") == 0) 2668 && aff_todo > 0 2669 && STRCMP(cur_aff->ah_key, items[1]) == 0 2670 && itemcnt >= 5) 2671 { 2672 affentry_T *aff_entry; 2673 int upper = FALSE; 2674 int lasti = 5; 2675 2676 /* Myspell allows extra text after the item, but that might 2677 * mean mistakes go unnoticed. Require a comment-starter, 2678 * unless IGNOREEXTRA is used. Hunspell uses a "-" item. */ 2679 if (itemcnt > lasti 2680 && !aff->af_ignoreextra 2681 && *items[lasti] != '#' 2682 && (STRCMP(items[lasti], "-") != 0 2683 || itemcnt != lasti + 1)) 2684 smsg(_(e_afftrailing), fname, lnum, items[lasti]); 2685 2686 /* New item for an affix letter. */ 2687 --aff_todo; 2688 aff_entry = (affentry_T *)getroom(spin, 2689 sizeof(affentry_T), TRUE); 2690 if (aff_entry == NULL) 2691 break; 2692 2693 if (STRCMP(items[2], "0") != 0) 2694 aff_entry->ae_chop = getroom_save(spin, items[2]); 2695 if (STRCMP(items[3], "0") != 0) 2696 { 2697 aff_entry->ae_add = getroom_save(spin, items[3]); 2698 2699 /* Recognize flags on the affix: abcd/XYZ */ 2700 aff_entry->ae_flags = vim_strchr(aff_entry->ae_add, '/'); 2701 if (aff_entry->ae_flags != NULL) 2702 { 2703 *aff_entry->ae_flags++ = NUL; 2704 aff_process_flags(aff, aff_entry); 2705 } 2706 } 2707 2708 /* Don't use an affix entry with non-ASCII characters when 2709 * "spin->si_ascii" is TRUE. */ 2710 if (!spin->si_ascii || !(has_non_ascii(aff_entry->ae_chop) 2711 || has_non_ascii(aff_entry->ae_add))) 2712 { 2713 aff_entry->ae_next = cur_aff->ah_first; 2714 cur_aff->ah_first = aff_entry; 2715 2716 if (STRCMP(items[4], ".") != 0) 2717 { 2718 char_u buf[MAXLINELEN]; 2719 2720 aff_entry->ae_cond = getroom_save(spin, items[4]); 2721 if (*items[0] == 'P') 2722 sprintf((char *)buf, "^%s", items[4]); 2723 else 2724 sprintf((char *)buf, "%s$", items[4]); 2725 aff_entry->ae_prog = vim_regcomp(buf, 2726 RE_MAGIC + RE_STRING + RE_STRICT); 2727 if (aff_entry->ae_prog == NULL) 2728 smsg(_("Broken condition in %s line %d: %s"), 2729 fname, lnum, items[4]); 2730 } 2731 2732 /* For postponed prefixes we need an entry in si_prefcond 2733 * for the condition. Use an existing one if possible. 2734 * Can't be done for an affix with flags, ignoring 2735 * COMPOUNDFORBIDFLAG and COMPOUNDPERMITFLAG. */ 2736 if (*items[0] == 'P' && aff->af_pfxpostpone 2737 && aff_entry->ae_flags == NULL) 2738 { 2739 /* When the chop string is one lower-case letter and 2740 * the add string ends in the upper-case letter we set 2741 * the "upper" flag, clear "ae_chop" and remove the 2742 * letters from "ae_add". The condition must either 2743 * be empty or start with the same letter. */ 2744 if (aff_entry->ae_chop != NULL 2745 && aff_entry->ae_add != NULL 2746 && aff_entry->ae_chop[(*mb_ptr2len)( 2747 aff_entry->ae_chop)] == NUL) 2748 { 2749 int c, c_up; 2750 2751 c = PTR2CHAR(aff_entry->ae_chop); 2752 c_up = SPELL_TOUPPER(c); 2753 if (c_up != c 2754 && (aff_entry->ae_cond == NULL 2755 || PTR2CHAR(aff_entry->ae_cond) == c)) 2756 { 2757 p = aff_entry->ae_add 2758 + STRLEN(aff_entry->ae_add); 2759 MB_PTR_BACK(aff_entry->ae_add, p); 2760 if (PTR2CHAR(p) == c_up) 2761 { 2762 upper = TRUE; 2763 aff_entry->ae_chop = NULL; 2764 *p = NUL; 2765 2766 /* The condition is matched with the 2767 * actual word, thus must check for the 2768 * upper-case letter. */ 2769 if (aff_entry->ae_cond != NULL) 2770 { 2771 char_u buf[MAXLINELEN]; 2772 2773 if (has_mbyte) 2774 { 2775 onecap_copy(items[4], buf, TRUE); 2776 aff_entry->ae_cond = getroom_save( 2777 spin, buf); 2778 } 2779 else 2780 *aff_entry->ae_cond = c_up; 2781 if (aff_entry->ae_cond != NULL) 2782 { 2783 sprintf((char *)buf, "^%s", 2784 aff_entry->ae_cond); 2785 vim_regfree(aff_entry->ae_prog); 2786 aff_entry->ae_prog = vim_regcomp( 2787 buf, RE_MAGIC + RE_STRING); 2788 } 2789 } 2790 } 2791 } 2792 } 2793 2794 if (aff_entry->ae_chop == NULL 2795 && aff_entry->ae_flags == NULL) 2796 { 2797 int idx; 2798 char_u **pp; 2799 int n; 2800 2801 /* Find a previously used condition. */ 2802 for (idx = spin->si_prefcond.ga_len - 1; idx >= 0; 2803 --idx) 2804 { 2805 p = ((char_u **)spin->si_prefcond.ga_data)[idx]; 2806 if (str_equal(p, aff_entry->ae_cond)) 2807 break; 2808 } 2809 if (idx < 0 && ga_grow(&spin->si_prefcond, 1) == OK) 2810 { 2811 /* Not found, add a new condition. */ 2812 idx = spin->si_prefcond.ga_len++; 2813 pp = ((char_u **)spin->si_prefcond.ga_data) 2814 + idx; 2815 if (aff_entry->ae_cond == NULL) 2816 *pp = NULL; 2817 else 2818 *pp = getroom_save(spin, 2819 aff_entry->ae_cond); 2820 } 2821 2822 /* Add the prefix to the prefix tree. */ 2823 if (aff_entry->ae_add == NULL) 2824 p = (char_u *)""; 2825 else 2826 p = aff_entry->ae_add; 2827 2828 /* PFX_FLAGS is a negative number, so that 2829 * tree_add_word() knows this is the prefix tree. */ 2830 n = PFX_FLAGS; 2831 if (!cur_aff->ah_combine) 2832 n |= WFP_NC; 2833 if (upper) 2834 n |= WFP_UP; 2835 if (aff_entry->ae_comppermit) 2836 n |= WFP_COMPPERMIT; 2837 if (aff_entry->ae_compforbid) 2838 n |= WFP_COMPFORBID; 2839 tree_add_word(spin, p, spin->si_prefroot, n, 2840 idx, cur_aff->ah_newID); 2841 did_postpone_prefix = TRUE; 2842 } 2843 2844 /* Didn't actually use ah_newID, backup si_newprefID. */ 2845 if (aff_todo == 0 && !did_postpone_prefix) 2846 { 2847 --spin->si_newprefID; 2848 cur_aff->ah_newID = 0; 2849 } 2850 } 2851 } 2852 } 2853 else if (is_aff_rule(items, itemcnt, "FOL", 2) && fol == NULL) 2854 { 2855 fol = vim_strsave(items[1]); 2856 } 2857 else if (is_aff_rule(items, itemcnt, "LOW", 2) && low == NULL) 2858 { 2859 low = vim_strsave(items[1]); 2860 } 2861 else if (is_aff_rule(items, itemcnt, "UPP", 2) && upp == NULL) 2862 { 2863 upp = vim_strsave(items[1]); 2864 } 2865 else if (is_aff_rule(items, itemcnt, "REP", 2) 2866 || is_aff_rule(items, itemcnt, "REPSAL", 2)) 2867 { 2868 /* Ignore REP/REPSAL count */; 2869 if (!isdigit(*items[1])) 2870 smsg(_("Expected REP(SAL) count in %s line %d"), 2871 fname, lnum); 2872 } 2873 else if ((STRCMP(items[0], "REP") == 0 2874 || STRCMP(items[0], "REPSAL") == 0) 2875 && itemcnt >= 3) 2876 { 2877 /* REP/REPSAL item */ 2878 /* Myspell ignores extra arguments, we require it starts with 2879 * # to detect mistakes. */ 2880 if (itemcnt > 3 && items[3][0] != '#') 2881 smsg(_(e_afftrailing), fname, lnum, items[3]); 2882 if (items[0][3] == 'S' ? do_repsal : do_rep) 2883 { 2884 /* Replace underscore with space (can't include a space 2885 * directly). */ 2886 for (p = items[1]; *p != NUL; MB_PTR_ADV(p)) 2887 if (*p == '_') 2888 *p = ' '; 2889 for (p = items[2]; *p != NUL; MB_PTR_ADV(p)) 2890 if (*p == '_') 2891 *p = ' '; 2892 add_fromto(spin, items[0][3] == 'S' 2893 ? &spin->si_repsal 2894 : &spin->si_rep, items[1], items[2]); 2895 } 2896 } 2897 else if (is_aff_rule(items, itemcnt, "MAP", 2)) 2898 { 2899 /* MAP item or count */ 2900 if (!found_map) 2901 { 2902 /* First line contains the count. */ 2903 found_map = TRUE; 2904 if (!isdigit(*items[1])) 2905 smsg(_("Expected MAP count in %s line %d"), 2906 fname, lnum); 2907 } 2908 else if (do_mapline) 2909 { 2910 int c; 2911 2912 /* Check that every character appears only once. */ 2913 for (p = items[1]; *p != NUL; ) 2914 { 2915 c = mb_ptr2char_adv(&p); 2916 if ((spin->si_map.ga_len > 0 2917 && vim_strchr(spin->si_map.ga_data, c) 2918 != NULL) 2919 || vim_strchr(p, c) != NULL) 2920 smsg(_("Duplicate character in MAP in %s line %d"), 2921 fname, lnum); 2922 } 2923 2924 /* We simply concatenate all the MAP strings, separated by 2925 * slashes. */ 2926 ga_concat(&spin->si_map, items[1]); 2927 ga_append(&spin->si_map, '/'); 2928 } 2929 } 2930 /* Accept "SAL from to" and "SAL from to #comment". */ 2931 else if (is_aff_rule(items, itemcnt, "SAL", 3)) 2932 { 2933 if (do_sal) 2934 { 2935 /* SAL item (sounds-a-like) 2936 * Either one of the known keys or a from-to pair. */ 2937 if (STRCMP(items[1], "followup") == 0) 2938 spin->si_followup = sal_to_bool(items[2]); 2939 else if (STRCMP(items[1], "collapse_result") == 0) 2940 spin->si_collapse = sal_to_bool(items[2]); 2941 else if (STRCMP(items[1], "remove_accents") == 0) 2942 spin->si_rem_accents = sal_to_bool(items[2]); 2943 else 2944 /* when "to" is "_" it means empty */ 2945 add_fromto(spin, &spin->si_sal, items[1], 2946 STRCMP(items[2], "_") == 0 ? (char_u *)"" 2947 : items[2]); 2948 } 2949 } 2950 else if (is_aff_rule(items, itemcnt, "SOFOFROM", 2) 2951 && sofofrom == NULL) 2952 { 2953 sofofrom = getroom_save(spin, items[1]); 2954 } 2955 else if (is_aff_rule(items, itemcnt, "SOFOTO", 2) 2956 && sofoto == NULL) 2957 { 2958 sofoto = getroom_save(spin, items[1]); 2959 } 2960 else if (STRCMP(items[0], "COMMON") == 0) 2961 { 2962 int i; 2963 2964 for (i = 1; i < itemcnt; ++i) 2965 { 2966 if (HASHITEM_EMPTY(hash_find(&spin->si_commonwords, 2967 items[i]))) 2968 { 2969 p = vim_strsave(items[i]); 2970 if (p == NULL) 2971 break; 2972 hash_add(&spin->si_commonwords, p); 2973 } 2974 } 2975 } 2976 else 2977 smsg(_("Unrecognized or duplicate item in %s line %d: %s"), 2978 fname, lnum, items[0]); 2979 } 2980 } 2981 2982 if (fol != NULL || low != NULL || upp != NULL) 2983 { 2984 if (spin->si_clear_chartab) 2985 { 2986 /* Clear the char type tables, don't want to use any of the 2987 * currently used spell properties. */ 2988 init_spell_chartab(); 2989 spin->si_clear_chartab = FALSE; 2990 } 2991 2992 /* 2993 * Don't write a word table for an ASCII file, so that we don't check 2994 * for conflicts with a word table that matches 'encoding'. 2995 * Don't write one for utf-8 either, we use utf_*() and 2996 * mb_get_class(), the list of chars in the file will be incomplete. 2997 */ 2998 if (!spin->si_ascii && !enc_utf8) 2999 { 3000 if (fol == NULL || low == NULL || upp == NULL) 3001 smsg(_("Missing FOL/LOW/UPP line in %s"), fname); 3002 else 3003 (void)set_spell_chartab(fol, low, upp); 3004 } 3005 3006 vim_free(fol); 3007 vim_free(low); 3008 vim_free(upp); 3009 } 3010 3011 /* Use compound specifications of the .aff file for the spell info. */ 3012 if (compmax != 0) 3013 { 3014 aff_check_number(spin->si_compmax, compmax, "COMPOUNDWORDMAX"); 3015 spin->si_compmax = compmax; 3016 } 3017 3018 if (compminlen != 0) 3019 { 3020 aff_check_number(spin->si_compminlen, compminlen, "COMPOUNDMIN"); 3021 spin->si_compminlen = compminlen; 3022 } 3023 3024 if (compsylmax != 0) 3025 { 3026 if (syllable == NULL) 3027 smsg(_("COMPOUNDSYLMAX used without SYLLABLE")); 3028 aff_check_number(spin->si_compsylmax, compsylmax, "COMPOUNDSYLMAX"); 3029 spin->si_compsylmax = compsylmax; 3030 } 3031 3032 if (compoptions != 0) 3033 { 3034 aff_check_number(spin->si_compoptions, compoptions, "COMPOUND options"); 3035 spin->si_compoptions |= compoptions; 3036 } 3037 3038 if (compflags != NULL) 3039 process_compflags(spin, aff, compflags); 3040 3041 /* Check that we didn't use too many renumbered flags. */ 3042 if (spin->si_newcompID < spin->si_newprefID) 3043 { 3044 if (spin->si_newcompID == 127 || spin->si_newcompID == 255) 3045 msg(_("Too many postponed prefixes")); 3046 else if (spin->si_newprefID == 0 || spin->si_newprefID == 127) 3047 msg(_("Too many compound flags")); 3048 else 3049 msg(_("Too many postponed prefixes and/or compound flags")); 3050 } 3051 3052 if (syllable != NULL) 3053 { 3054 aff_check_string(spin->si_syllable, syllable, "SYLLABLE"); 3055 spin->si_syllable = syllable; 3056 } 3057 3058 if (sofofrom != NULL || sofoto != NULL) 3059 { 3060 if (sofofrom == NULL || sofoto == NULL) 3061 smsg(_("Missing SOFO%s line in %s"), 3062 sofofrom == NULL ? "FROM" : "TO", fname); 3063 else if (spin->si_sal.ga_len > 0) 3064 smsg(_("Both SAL and SOFO lines in %s"), fname); 3065 else 3066 { 3067 aff_check_string(spin->si_sofofr, sofofrom, "SOFOFROM"); 3068 aff_check_string(spin->si_sofoto, sofoto, "SOFOTO"); 3069 spin->si_sofofr = sofofrom; 3070 spin->si_sofoto = sofoto; 3071 } 3072 } 3073 3074 if (midword != NULL) 3075 { 3076 aff_check_string(spin->si_midword, midword, "MIDWORD"); 3077 spin->si_midword = midword; 3078 } 3079 3080 vim_free(pc); 3081 fclose(fd); 3082 return aff; 3083 } 3084 3085 /* 3086 * Return TRUE when items[0] equals "rulename", there are "mincount" items or 3087 * a comment is following after item "mincount". 3088 */ 3089 static int 3090 is_aff_rule( 3091 char_u **items, 3092 int itemcnt, 3093 char *rulename, 3094 int mincount) 3095 { 3096 return (STRCMP(items[0], rulename) == 0 3097 && (itemcnt == mincount 3098 || (itemcnt > mincount && items[mincount][0] == '#'))); 3099 } 3100 3101 /* 3102 * For affix "entry" move COMPOUNDFORBIDFLAG and COMPOUNDPERMITFLAG from 3103 * ae_flags to ae_comppermit and ae_compforbid. 3104 */ 3105 static void 3106 aff_process_flags(afffile_T *affile, affentry_T *entry) 3107 { 3108 char_u *p; 3109 char_u *prevp; 3110 unsigned flag; 3111 3112 if (entry->ae_flags != NULL 3113 && (affile->af_compforbid != 0 || affile->af_comppermit != 0)) 3114 { 3115 for (p = entry->ae_flags; *p != NUL; ) 3116 { 3117 prevp = p; 3118 flag = get_affitem(affile->af_flagtype, &p); 3119 if (flag == affile->af_comppermit || flag == affile->af_compforbid) 3120 { 3121 STRMOVE(prevp, p); 3122 p = prevp; 3123 if (flag == affile->af_comppermit) 3124 entry->ae_comppermit = TRUE; 3125 else 3126 entry->ae_compforbid = TRUE; 3127 } 3128 if (affile->af_flagtype == AFT_NUM && *p == ',') 3129 ++p; 3130 } 3131 if (*entry->ae_flags == NUL) 3132 entry->ae_flags = NULL; /* nothing left */ 3133 } 3134 } 3135 3136 /* 3137 * Return TRUE if "s" is the name of an info item in the affix file. 3138 */ 3139 static int 3140 spell_info_item(char_u *s) 3141 { 3142 return STRCMP(s, "NAME") == 0 3143 || STRCMP(s, "HOME") == 0 3144 || STRCMP(s, "VERSION") == 0 3145 || STRCMP(s, "AUTHOR") == 0 3146 || STRCMP(s, "EMAIL") == 0 3147 || STRCMP(s, "COPYRIGHT") == 0; 3148 } 3149 3150 /* 3151 * Turn an affix flag name into a number, according to the FLAG type. 3152 * returns zero for failure. 3153 */ 3154 static unsigned 3155 affitem2flag( 3156 int flagtype, 3157 char_u *item, 3158 char_u *fname, 3159 int lnum) 3160 { 3161 unsigned res; 3162 char_u *p = item; 3163 3164 res = get_affitem(flagtype, &p); 3165 if (res == 0) 3166 { 3167 if (flagtype == AFT_NUM) 3168 smsg(_("Flag is not a number in %s line %d: %s"), 3169 fname, lnum, item); 3170 else 3171 smsg(_("Illegal flag in %s line %d: %s"), 3172 fname, lnum, item); 3173 } 3174 if (*p != NUL) 3175 { 3176 smsg(_(e_affname), fname, lnum, item); 3177 return 0; 3178 } 3179 3180 return res; 3181 } 3182 3183 /* 3184 * Get one affix name from "*pp" and advance the pointer. 3185 * Returns zero for an error, still advances the pointer then. 3186 */ 3187 static unsigned 3188 get_affitem(int flagtype, char_u **pp) 3189 { 3190 int res; 3191 3192 if (flagtype == AFT_NUM) 3193 { 3194 if (!VIM_ISDIGIT(**pp)) 3195 { 3196 ++*pp; /* always advance, avoid getting stuck */ 3197 return 0; 3198 } 3199 res = getdigits(pp); 3200 } 3201 else 3202 { 3203 res = mb_ptr2char_adv(pp); 3204 if (flagtype == AFT_LONG || (flagtype == AFT_CAPLONG 3205 && res >= 'A' && res <= 'Z')) 3206 { 3207 if (**pp == NUL) 3208 return 0; 3209 res = mb_ptr2char_adv(pp) + (res << 16); 3210 } 3211 } 3212 return res; 3213 } 3214 3215 /* 3216 * Process the "compflags" string used in an affix file and append it to 3217 * spin->si_compflags. 3218 * The processing involves changing the affix names to ID numbers, so that 3219 * they fit in one byte. 3220 */ 3221 static void 3222 process_compflags( 3223 spellinfo_T *spin, 3224 afffile_T *aff, 3225 char_u *compflags) 3226 { 3227 char_u *p; 3228 char_u *prevp; 3229 unsigned flag; 3230 compitem_T *ci; 3231 int id; 3232 int len; 3233 char_u *tp; 3234 char_u key[AH_KEY_LEN]; 3235 hashitem_T *hi; 3236 3237 /* Make room for the old and the new compflags, concatenated with a / in 3238 * between. Processing it makes it shorter, but we don't know by how 3239 * much, thus allocate the maximum. */ 3240 len = (int)STRLEN(compflags) + 1; 3241 if (spin->si_compflags != NULL) 3242 len += (int)STRLEN(spin->si_compflags) + 1; 3243 p = getroom(spin, len, FALSE); 3244 if (p == NULL) 3245 return; 3246 if (spin->si_compflags != NULL) 3247 { 3248 STRCPY(p, spin->si_compflags); 3249 STRCAT(p, "/"); 3250 } 3251 spin->si_compflags = p; 3252 tp = p + STRLEN(p); 3253 3254 for (p = compflags; *p != NUL; ) 3255 { 3256 if (vim_strchr((char_u *)"/?*+[]", *p) != NULL) 3257 /* Copy non-flag characters directly. */ 3258 *tp++ = *p++; 3259 else 3260 { 3261 /* First get the flag number, also checks validity. */ 3262 prevp = p; 3263 flag = get_affitem(aff->af_flagtype, &p); 3264 if (flag != 0) 3265 { 3266 /* Find the flag in the hashtable. If it was used before, use 3267 * the existing ID. Otherwise add a new entry. */ 3268 vim_strncpy(key, prevp, p - prevp); 3269 hi = hash_find(&aff->af_comp, key); 3270 if (!HASHITEM_EMPTY(hi)) 3271 id = HI2CI(hi)->ci_newID; 3272 else 3273 { 3274 ci = (compitem_T *)getroom(spin, sizeof(compitem_T), TRUE); 3275 if (ci == NULL) 3276 break; 3277 STRCPY(ci->ci_key, key); 3278 ci->ci_flag = flag; 3279 /* Avoid using a flag ID that has a special meaning in a 3280 * regexp (also inside []). */ 3281 do 3282 { 3283 check_renumber(spin); 3284 id = spin->si_newcompID--; 3285 } while (vim_strchr((char_u *)"/?*+[]\\-^", id) != NULL); 3286 ci->ci_newID = id; 3287 hash_add(&aff->af_comp, ci->ci_key); 3288 } 3289 *tp++ = id; 3290 } 3291 if (aff->af_flagtype == AFT_NUM && *p == ',') 3292 ++p; 3293 } 3294 } 3295 3296 *tp = NUL; 3297 } 3298 3299 /* 3300 * Check that the new IDs for postponed affixes and compounding don't overrun 3301 * each other. We have almost 255 available, but start at 0-127 to avoid 3302 * using two bytes for utf-8. When the 0-127 range is used up go to 128-255. 3303 * When that is used up an error message is given. 3304 */ 3305 static void 3306 check_renumber(spellinfo_T *spin) 3307 { 3308 if (spin->si_newprefID == spin->si_newcompID && spin->si_newcompID < 128) 3309 { 3310 spin->si_newprefID = 127; 3311 spin->si_newcompID = 255; 3312 } 3313 } 3314 3315 /* 3316 * Return TRUE if flag "flag" appears in affix list "afflist". 3317 */ 3318 static int 3319 flag_in_afflist(int flagtype, char_u *afflist, unsigned flag) 3320 { 3321 char_u *p; 3322 unsigned n; 3323 3324 switch (flagtype) 3325 { 3326 case AFT_CHAR: 3327 return vim_strchr(afflist, flag) != NULL; 3328 3329 case AFT_CAPLONG: 3330 case AFT_LONG: 3331 for (p = afflist; *p != NUL; ) 3332 { 3333 n = mb_ptr2char_adv(&p); 3334 if ((flagtype == AFT_LONG || (n >= 'A' && n <= 'Z')) 3335 && *p != NUL) 3336 n = mb_ptr2char_adv(&p) + (n << 16); 3337 if (n == flag) 3338 return TRUE; 3339 } 3340 break; 3341 3342 case AFT_NUM: 3343 for (p = afflist; *p != NUL; ) 3344 { 3345 n = getdigits(&p); 3346 if (n == flag) 3347 return TRUE; 3348 if (*p != NUL) /* skip over comma */ 3349 ++p; 3350 } 3351 break; 3352 } 3353 return FALSE; 3354 } 3355 3356 /* 3357 * Give a warning when "spinval" and "affval" numbers are set and not the same. 3358 */ 3359 static void 3360 aff_check_number(int spinval, int affval, char *name) 3361 { 3362 if (spinval != 0 && spinval != affval) 3363 smsg(_("%s value differs from what is used in another .aff file"), name); 3364 } 3365 3366 /* 3367 * Give a warning when "spinval" and "affval" strings are set and not the same. 3368 */ 3369 static void 3370 aff_check_string(char_u *spinval, char_u *affval, char *name) 3371 { 3372 if (spinval != NULL && STRCMP(spinval, affval) != 0) 3373 smsg(_("%s value differs from what is used in another .aff file"), name); 3374 } 3375 3376 /* 3377 * Return TRUE if strings "s1" and "s2" are equal. Also consider both being 3378 * NULL as equal. 3379 */ 3380 static int 3381 str_equal(char_u *s1, char_u *s2) 3382 { 3383 if (s1 == NULL || s2 == NULL) 3384 return s1 == s2; 3385 return STRCMP(s1, s2) == 0; 3386 } 3387 3388 /* 3389 * Add a from-to item to "gap". Used for REP and SAL items. 3390 * They are stored case-folded. 3391 */ 3392 static void 3393 add_fromto( 3394 spellinfo_T *spin, 3395 garray_T *gap, 3396 char_u *from, 3397 char_u *to) 3398 { 3399 fromto_T *ftp; 3400 char_u word[MAXWLEN]; 3401 3402 if (ga_grow(gap, 1) == OK) 3403 { 3404 ftp = ((fromto_T *)gap->ga_data) + gap->ga_len; 3405 (void)spell_casefold(from, (int)STRLEN(from), word, MAXWLEN); 3406 ftp->ft_from = getroom_save(spin, word); 3407 (void)spell_casefold(to, (int)STRLEN(to), word, MAXWLEN); 3408 ftp->ft_to = getroom_save(spin, word); 3409 ++gap->ga_len; 3410 } 3411 } 3412 3413 /* 3414 * Convert a boolean argument in a SAL line to TRUE or FALSE; 3415 */ 3416 static int 3417 sal_to_bool(char_u *s) 3418 { 3419 return STRCMP(s, "1") == 0 || STRCMP(s, "true") == 0; 3420 } 3421 3422 /* 3423 * Free the structure filled by spell_read_aff(). 3424 */ 3425 static void 3426 spell_free_aff(afffile_T *aff) 3427 { 3428 hashtab_T *ht; 3429 hashitem_T *hi; 3430 int todo; 3431 affheader_T *ah; 3432 affentry_T *ae; 3433 3434 vim_free(aff->af_enc); 3435 3436 /* All this trouble to free the "ae_prog" items... */ 3437 for (ht = &aff->af_pref; ; ht = &aff->af_suff) 3438 { 3439 todo = (int)ht->ht_used; 3440 for (hi = ht->ht_array; todo > 0; ++hi) 3441 { 3442 if (!HASHITEM_EMPTY(hi)) 3443 { 3444 --todo; 3445 ah = HI2AH(hi); 3446 for (ae = ah->ah_first; ae != NULL; ae = ae->ae_next) 3447 vim_regfree(ae->ae_prog); 3448 } 3449 } 3450 if (ht == &aff->af_suff) 3451 break; 3452 } 3453 3454 hash_clear(&aff->af_pref); 3455 hash_clear(&aff->af_suff); 3456 hash_clear(&aff->af_comp); 3457 } 3458 3459 /* 3460 * Read dictionary file "fname". 3461 * Returns OK or FAIL; 3462 */ 3463 static int 3464 spell_read_dic(spellinfo_T *spin, char_u *fname, afffile_T *affile) 3465 { 3466 hashtab_T ht; 3467 char_u line[MAXLINELEN]; 3468 char_u *p; 3469 char_u *afflist; 3470 char_u store_afflist[MAXWLEN]; 3471 int pfxlen; 3472 int need_affix; 3473 char_u *dw; 3474 char_u *pc; 3475 char_u *w; 3476 int l; 3477 hash_T hash; 3478 hashitem_T *hi; 3479 FILE *fd; 3480 int lnum = 1; 3481 int non_ascii = 0; 3482 int retval = OK; 3483 char_u message[MAXLINELEN + MAXWLEN]; 3484 int flags; 3485 int duplicate = 0; 3486 3487 /* 3488 * Open the file. 3489 */ 3490 fd = mch_fopen((char *)fname, "r"); 3491 if (fd == NULL) 3492 { 3493 semsg(_(e_notopen), fname); 3494 return FAIL; 3495 } 3496 3497 /* The hashtable is only used to detect duplicated words. */ 3498 hash_init(&ht); 3499 3500 vim_snprintf((char *)IObuff, IOSIZE, 3501 _("Reading dictionary file %s..."), fname); 3502 spell_message(spin, IObuff); 3503 3504 /* start with a message for the first line */ 3505 spin->si_msg_count = 999999; 3506 3507 /* Read and ignore the first line: word count. */ 3508 (void)vim_fgets(line, MAXLINELEN, fd); 3509 if (!vim_isdigit(*skipwhite(line))) 3510 semsg(_("E760: No word count in %s"), fname); 3511 3512 /* 3513 * Read all the lines in the file one by one. 3514 * The words are converted to 'encoding' here, before being added to 3515 * the hashtable. 3516 */ 3517 while (!vim_fgets(line, MAXLINELEN, fd) && !got_int) 3518 { 3519 line_breakcheck(); 3520 ++lnum; 3521 if (line[0] == '#' || line[0] == '/') 3522 continue; /* comment line */ 3523 3524 /* Remove CR, LF and white space from the end. White space halfway 3525 * the word is kept to allow e.g., "et al.". */ 3526 l = (int)STRLEN(line); 3527 while (l > 0 && line[l - 1] <= ' ') 3528 --l; 3529 if (l == 0) 3530 continue; /* empty line */ 3531 line[l] = NUL; 3532 3533 /* Convert from "SET" to 'encoding' when needed. */ 3534 if (spin->si_conv.vc_type != CONV_NONE) 3535 { 3536 pc = string_convert(&spin->si_conv, line, NULL); 3537 if (pc == NULL) 3538 { 3539 smsg(_("Conversion failure for word in %s line %d: %s"), 3540 fname, lnum, line); 3541 continue; 3542 } 3543 w = pc; 3544 } 3545 else 3546 { 3547 pc = NULL; 3548 w = line; 3549 } 3550 3551 /* Truncate the word at the "/", set "afflist" to what follows. 3552 * Replace "\/" by "/" and "\\" by "\". */ 3553 afflist = NULL; 3554 for (p = w; *p != NUL; MB_PTR_ADV(p)) 3555 { 3556 if (*p == '\\' && (p[1] == '\\' || p[1] == '/')) 3557 STRMOVE(p, p + 1); 3558 else if (*p == '/') 3559 { 3560 *p = NUL; 3561 afflist = p + 1; 3562 break; 3563 } 3564 } 3565 3566 /* Skip non-ASCII words when "spin->si_ascii" is TRUE. */ 3567 if (spin->si_ascii && has_non_ascii(w)) 3568 { 3569 ++non_ascii; 3570 vim_free(pc); 3571 continue; 3572 } 3573 3574 /* This takes time, print a message every 10000 words. */ 3575 if (spin->si_verbose && spin->si_msg_count > 10000) 3576 { 3577 spin->si_msg_count = 0; 3578 vim_snprintf((char *)message, sizeof(message), 3579 _("line %6d, word %6ld - %s"), 3580 lnum, spin->si_foldwcount + spin->si_keepwcount, w); 3581 msg_start(); 3582 msg_outtrans_long_attr(message, 0); 3583 msg_clr_eos(); 3584 msg_didout = FALSE; 3585 msg_col = 0; 3586 out_flush(); 3587 } 3588 3589 /* Store the word in the hashtable to be able to find duplicates. */ 3590 dw = (char_u *)getroom_save(spin, w); 3591 if (dw == NULL) 3592 { 3593 retval = FAIL; 3594 vim_free(pc); 3595 break; 3596 } 3597 3598 hash = hash_hash(dw); 3599 hi = hash_lookup(&ht, dw, hash); 3600 if (!HASHITEM_EMPTY(hi)) 3601 { 3602 if (p_verbose > 0) 3603 smsg(_("Duplicate word in %s line %d: %s"), 3604 fname, lnum, dw); 3605 else if (duplicate == 0) 3606 smsg(_("First duplicate word in %s line %d: %s"), 3607 fname, lnum, dw); 3608 ++duplicate; 3609 } 3610 else 3611 hash_add_item(&ht, hi, dw, hash); 3612 3613 flags = 0; 3614 store_afflist[0] = NUL; 3615 pfxlen = 0; 3616 need_affix = FALSE; 3617 if (afflist != NULL) 3618 { 3619 /* Extract flags from the affix list. */ 3620 flags |= get_affix_flags(affile, afflist); 3621 3622 if (affile->af_needaffix != 0 && flag_in_afflist( 3623 affile->af_flagtype, afflist, affile->af_needaffix)) 3624 need_affix = TRUE; 3625 3626 if (affile->af_pfxpostpone) 3627 /* Need to store the list of prefix IDs with the word. */ 3628 pfxlen = get_pfxlist(affile, afflist, store_afflist); 3629 3630 if (spin->si_compflags != NULL) 3631 /* Need to store the list of compound flags with the word. 3632 * Concatenate them to the list of prefix IDs. */ 3633 get_compflags(affile, afflist, store_afflist + pfxlen); 3634 } 3635 3636 /* Add the word to the word tree(s). */ 3637 if (store_word(spin, dw, flags, spin->si_region, 3638 store_afflist, need_affix) == FAIL) 3639 retval = FAIL; 3640 3641 if (afflist != NULL) 3642 { 3643 /* Find all matching suffixes and add the resulting words. 3644 * Additionally do matching prefixes that combine. */ 3645 if (store_aff_word(spin, dw, afflist, affile, 3646 &affile->af_suff, &affile->af_pref, 3647 CONDIT_SUF, flags, store_afflist, pfxlen) == FAIL) 3648 retval = FAIL; 3649 3650 /* Find all matching prefixes and add the resulting words. */ 3651 if (store_aff_word(spin, dw, afflist, affile, 3652 &affile->af_pref, NULL, 3653 CONDIT_SUF, flags, store_afflist, pfxlen) == FAIL) 3654 retval = FAIL; 3655 } 3656 3657 vim_free(pc); 3658 } 3659 3660 if (duplicate > 0) 3661 smsg(_("%d duplicate word(s) in %s"), duplicate, fname); 3662 if (spin->si_ascii && non_ascii > 0) 3663 smsg(_("Ignored %d word(s) with non-ASCII characters in %s"), 3664 non_ascii, fname); 3665 hash_clear(&ht); 3666 3667 fclose(fd); 3668 return retval; 3669 } 3670 3671 /* 3672 * Check for affix flags in "afflist" that are turned into word flags. 3673 * Return WF_ flags. 3674 */ 3675 static int 3676 get_affix_flags(afffile_T *affile, char_u *afflist) 3677 { 3678 int flags = 0; 3679 3680 if (affile->af_keepcase != 0 && flag_in_afflist( 3681 affile->af_flagtype, afflist, affile->af_keepcase)) 3682 flags |= WF_KEEPCAP | WF_FIXCAP; 3683 if (affile->af_rare != 0 && flag_in_afflist( 3684 affile->af_flagtype, afflist, affile->af_rare)) 3685 flags |= WF_RARE; 3686 if (affile->af_bad != 0 && flag_in_afflist( 3687 affile->af_flagtype, afflist, affile->af_bad)) 3688 flags |= WF_BANNED; 3689 if (affile->af_needcomp != 0 && flag_in_afflist( 3690 affile->af_flagtype, afflist, affile->af_needcomp)) 3691 flags |= WF_NEEDCOMP; 3692 if (affile->af_comproot != 0 && flag_in_afflist( 3693 affile->af_flagtype, afflist, affile->af_comproot)) 3694 flags |= WF_COMPROOT; 3695 if (affile->af_nosuggest != 0 && flag_in_afflist( 3696 affile->af_flagtype, afflist, affile->af_nosuggest)) 3697 flags |= WF_NOSUGGEST; 3698 return flags; 3699 } 3700 3701 /* 3702 * Get the list of prefix IDs from the affix list "afflist". 3703 * Used for PFXPOSTPONE. 3704 * Put the resulting flags in "store_afflist[MAXWLEN]" with a terminating NUL 3705 * and return the number of affixes. 3706 */ 3707 static int 3708 get_pfxlist( 3709 afffile_T *affile, 3710 char_u *afflist, 3711 char_u *store_afflist) 3712 { 3713 char_u *p; 3714 char_u *prevp; 3715 int cnt = 0; 3716 int id; 3717 char_u key[AH_KEY_LEN]; 3718 hashitem_T *hi; 3719 3720 for (p = afflist; *p != NUL; ) 3721 { 3722 prevp = p; 3723 if (get_affitem(affile->af_flagtype, &p) != 0) 3724 { 3725 /* A flag is a postponed prefix flag if it appears in "af_pref" 3726 * and its ID is not zero. */ 3727 vim_strncpy(key, prevp, p - prevp); 3728 hi = hash_find(&affile->af_pref, key); 3729 if (!HASHITEM_EMPTY(hi)) 3730 { 3731 id = HI2AH(hi)->ah_newID; 3732 if (id != 0) 3733 store_afflist[cnt++] = id; 3734 } 3735 } 3736 if (affile->af_flagtype == AFT_NUM && *p == ',') 3737 ++p; 3738 } 3739 3740 store_afflist[cnt] = NUL; 3741 return cnt; 3742 } 3743 3744 /* 3745 * Get the list of compound IDs from the affix list "afflist" that are used 3746 * for compound words. 3747 * Puts the flags in "store_afflist[]". 3748 */ 3749 static void 3750 get_compflags( 3751 afffile_T *affile, 3752 char_u *afflist, 3753 char_u *store_afflist) 3754 { 3755 char_u *p; 3756 char_u *prevp; 3757 int cnt = 0; 3758 char_u key[AH_KEY_LEN]; 3759 hashitem_T *hi; 3760 3761 for (p = afflist; *p != NUL; ) 3762 { 3763 prevp = p; 3764 if (get_affitem(affile->af_flagtype, &p) != 0) 3765 { 3766 /* A flag is a compound flag if it appears in "af_comp". */ 3767 vim_strncpy(key, prevp, p - prevp); 3768 hi = hash_find(&affile->af_comp, key); 3769 if (!HASHITEM_EMPTY(hi)) 3770 store_afflist[cnt++] = HI2CI(hi)->ci_newID; 3771 } 3772 if (affile->af_flagtype == AFT_NUM && *p == ',') 3773 ++p; 3774 } 3775 3776 store_afflist[cnt] = NUL; 3777 } 3778 3779 /* 3780 * Apply affixes to a word and store the resulting words. 3781 * "ht" is the hashtable with affentry_T that need to be applied, either 3782 * prefixes or suffixes. 3783 * "xht", when not NULL, is the prefix hashtable, to be used additionally on 3784 * the resulting words for combining affixes. 3785 * 3786 * Returns FAIL when out of memory. 3787 */ 3788 static int 3789 store_aff_word( 3790 spellinfo_T *spin, /* spell info */ 3791 char_u *word, /* basic word start */ 3792 char_u *afflist, /* list of names of supported affixes */ 3793 afffile_T *affile, 3794 hashtab_T *ht, 3795 hashtab_T *xht, 3796 int condit, /* CONDIT_SUF et al. */ 3797 int flags, /* flags for the word */ 3798 char_u *pfxlist, /* list of prefix IDs */ 3799 int pfxlen) /* nr of flags in "pfxlist" for prefixes, rest 3800 * is compound flags */ 3801 { 3802 int todo; 3803 hashitem_T *hi; 3804 affheader_T *ah; 3805 affentry_T *ae; 3806 char_u newword[MAXWLEN]; 3807 int retval = OK; 3808 int i, j; 3809 char_u *p; 3810 int use_flags; 3811 char_u *use_pfxlist; 3812 int use_pfxlen; 3813 int need_affix; 3814 char_u store_afflist[MAXWLEN]; 3815 char_u pfx_pfxlist[MAXWLEN]; 3816 size_t wordlen = STRLEN(word); 3817 int use_condit; 3818 3819 todo = (int)ht->ht_used; 3820 for (hi = ht->ht_array; todo > 0 && retval == OK; ++hi) 3821 { 3822 if (!HASHITEM_EMPTY(hi)) 3823 { 3824 --todo; 3825 ah = HI2AH(hi); 3826 3827 /* Check that the affix combines, if required, and that the word 3828 * supports this affix. */ 3829 if (((condit & CONDIT_COMB) == 0 || ah->ah_combine) 3830 && flag_in_afflist(affile->af_flagtype, afflist, 3831 ah->ah_flag)) 3832 { 3833 /* Loop over all affix entries with this name. */ 3834 for (ae = ah->ah_first; ae != NULL; ae = ae->ae_next) 3835 { 3836 /* Check the condition. It's not logical to match case 3837 * here, but it is required for compatibility with 3838 * Myspell. 3839 * Another requirement from Myspell is that the chop 3840 * string is shorter than the word itself. 3841 * For prefixes, when "PFXPOSTPONE" was used, only do 3842 * prefixes with a chop string and/or flags. 3843 * When a previously added affix had CIRCUMFIX this one 3844 * must have it too, if it had not then this one must not 3845 * have one either. */ 3846 if ((xht != NULL || !affile->af_pfxpostpone 3847 || ae->ae_chop != NULL 3848 || ae->ae_flags != NULL) 3849 && (ae->ae_chop == NULL 3850 || STRLEN(ae->ae_chop) < wordlen) 3851 && (ae->ae_prog == NULL 3852 || vim_regexec_prog(&ae->ae_prog, FALSE, 3853 word, (colnr_T)0)) 3854 && (((condit & CONDIT_CFIX) == 0) 3855 == ((condit & CONDIT_AFF) == 0 3856 || ae->ae_flags == NULL 3857 || !flag_in_afflist(affile->af_flagtype, 3858 ae->ae_flags, affile->af_circumfix)))) 3859 { 3860 /* Match. Remove the chop and add the affix. */ 3861 if (xht == NULL) 3862 { 3863 /* prefix: chop/add at the start of the word */ 3864 if (ae->ae_add == NULL) 3865 *newword = NUL; 3866 else 3867 vim_strncpy(newword, ae->ae_add, MAXWLEN - 1); 3868 p = word; 3869 if (ae->ae_chop != NULL) 3870 { 3871 /* Skip chop string. */ 3872 if (has_mbyte) 3873 { 3874 i = mb_charlen(ae->ae_chop); 3875 for ( ; i > 0; --i) 3876 MB_PTR_ADV(p); 3877 } 3878 else 3879 p += STRLEN(ae->ae_chop); 3880 } 3881 STRCAT(newword, p); 3882 } 3883 else 3884 { 3885 /* suffix: chop/add at the end of the word */ 3886 vim_strncpy(newword, word, MAXWLEN - 1); 3887 if (ae->ae_chop != NULL) 3888 { 3889 /* Remove chop string. */ 3890 p = newword + STRLEN(newword); 3891 i = (int)MB_CHARLEN(ae->ae_chop); 3892 for ( ; i > 0; --i) 3893 MB_PTR_BACK(newword, p); 3894 *p = NUL; 3895 } 3896 if (ae->ae_add != NULL) 3897 STRCAT(newword, ae->ae_add); 3898 } 3899 3900 use_flags = flags; 3901 use_pfxlist = pfxlist; 3902 use_pfxlen = pfxlen; 3903 need_affix = FALSE; 3904 use_condit = condit | CONDIT_COMB | CONDIT_AFF; 3905 if (ae->ae_flags != NULL) 3906 { 3907 /* Extract flags from the affix list. */ 3908 use_flags |= get_affix_flags(affile, ae->ae_flags); 3909 3910 if (affile->af_needaffix != 0 && flag_in_afflist( 3911 affile->af_flagtype, ae->ae_flags, 3912 affile->af_needaffix)) 3913 need_affix = TRUE; 3914 3915 /* When there is a CIRCUMFIX flag the other affix 3916 * must also have it and we don't add the word 3917 * with one affix. */ 3918 if (affile->af_circumfix != 0 && flag_in_afflist( 3919 affile->af_flagtype, ae->ae_flags, 3920 affile->af_circumfix)) 3921 { 3922 use_condit |= CONDIT_CFIX; 3923 if ((condit & CONDIT_CFIX) == 0) 3924 need_affix = TRUE; 3925 } 3926 3927 if (affile->af_pfxpostpone 3928 || spin->si_compflags != NULL) 3929 { 3930 if (affile->af_pfxpostpone) 3931 /* Get prefix IDS from the affix list. */ 3932 use_pfxlen = get_pfxlist(affile, 3933 ae->ae_flags, store_afflist); 3934 else 3935 use_pfxlen = 0; 3936 use_pfxlist = store_afflist; 3937 3938 /* Combine the prefix IDs. Avoid adding the 3939 * same ID twice. */ 3940 for (i = 0; i < pfxlen; ++i) 3941 { 3942 for (j = 0; j < use_pfxlen; ++j) 3943 if (pfxlist[i] == use_pfxlist[j]) 3944 break; 3945 if (j == use_pfxlen) 3946 use_pfxlist[use_pfxlen++] = pfxlist[i]; 3947 } 3948 3949 if (spin->si_compflags != NULL) 3950 /* Get compound IDS from the affix list. */ 3951 get_compflags(affile, ae->ae_flags, 3952 use_pfxlist + use_pfxlen); 3953 3954 /* Combine the list of compound flags. 3955 * Concatenate them to the prefix IDs list. 3956 * Avoid adding the same ID twice. */ 3957 for (i = pfxlen; pfxlist[i] != NUL; ++i) 3958 { 3959 for (j = use_pfxlen; 3960 use_pfxlist[j] != NUL; ++j) 3961 if (pfxlist[i] == use_pfxlist[j]) 3962 break; 3963 if (use_pfxlist[j] == NUL) 3964 { 3965 use_pfxlist[j++] = pfxlist[i]; 3966 use_pfxlist[j] = NUL; 3967 } 3968 } 3969 } 3970 } 3971 3972 /* Obey a "COMPOUNDFORBIDFLAG" of the affix: don't 3973 * use the compound flags. */ 3974 if (use_pfxlist != NULL && ae->ae_compforbid) 3975 { 3976 vim_strncpy(pfx_pfxlist, use_pfxlist, use_pfxlen); 3977 use_pfxlist = pfx_pfxlist; 3978 } 3979 3980 /* When there are postponed prefixes... */ 3981 if (spin->si_prefroot != NULL 3982 && spin->si_prefroot->wn_sibling != NULL) 3983 { 3984 /* ... add a flag to indicate an affix was used. */ 3985 use_flags |= WF_HAS_AFF; 3986 3987 /* ... don't use a prefix list if combining 3988 * affixes is not allowed. But do use the 3989 * compound flags after them. */ 3990 if (!ah->ah_combine && use_pfxlist != NULL) 3991 use_pfxlist += use_pfxlen; 3992 } 3993 3994 /* When compounding is supported and there is no 3995 * "COMPOUNDPERMITFLAG" then forbid compounding on the 3996 * side where the affix is applied. */ 3997 if (spin->si_compflags != NULL && !ae->ae_comppermit) 3998 { 3999 if (xht != NULL) 4000 use_flags |= WF_NOCOMPAFT; 4001 else 4002 use_flags |= WF_NOCOMPBEF; 4003 } 4004 4005 /* Store the modified word. */ 4006 if (store_word(spin, newword, use_flags, 4007 spin->si_region, use_pfxlist, 4008 need_affix) == FAIL) 4009 retval = FAIL; 4010 4011 /* When added a prefix or a first suffix and the affix 4012 * has flags may add a(nother) suffix. RECURSIVE! */ 4013 if ((condit & CONDIT_SUF) && ae->ae_flags != NULL) 4014 if (store_aff_word(spin, newword, ae->ae_flags, 4015 affile, &affile->af_suff, xht, 4016 use_condit & (xht == NULL 4017 ? ~0 : ~CONDIT_SUF), 4018 use_flags, use_pfxlist, pfxlen) == FAIL) 4019 retval = FAIL; 4020 4021 /* When added a suffix and combining is allowed also 4022 * try adding a prefix additionally. Both for the 4023 * word flags and for the affix flags. RECURSIVE! */ 4024 if (xht != NULL && ah->ah_combine) 4025 { 4026 if (store_aff_word(spin, newword, 4027 afflist, affile, 4028 xht, NULL, use_condit, 4029 use_flags, use_pfxlist, 4030 pfxlen) == FAIL 4031 || (ae->ae_flags != NULL 4032 && store_aff_word(spin, newword, 4033 ae->ae_flags, affile, 4034 xht, NULL, use_condit, 4035 use_flags, use_pfxlist, 4036 pfxlen) == FAIL)) 4037 retval = FAIL; 4038 } 4039 } 4040 } 4041 } 4042 } 4043 } 4044 4045 return retval; 4046 } 4047 4048 /* 4049 * Read a file with a list of words. 4050 */ 4051 static int 4052 spell_read_wordfile(spellinfo_T *spin, char_u *fname) 4053 { 4054 FILE *fd; 4055 long lnum = 0; 4056 char_u rline[MAXLINELEN]; 4057 char_u *line; 4058 char_u *pc = NULL; 4059 char_u *p; 4060 int l; 4061 int retval = OK; 4062 int did_word = FALSE; 4063 int non_ascii = 0; 4064 int flags; 4065 int regionmask; 4066 4067 /* 4068 * Open the file. 4069 */ 4070 fd = mch_fopen((char *)fname, "r"); 4071 if (fd == NULL) 4072 { 4073 semsg(_(e_notopen), fname); 4074 return FAIL; 4075 } 4076 4077 vim_snprintf((char *)IObuff, IOSIZE, _("Reading word file %s..."), fname); 4078 spell_message(spin, IObuff); 4079 4080 /* 4081 * Read all the lines in the file one by one. 4082 */ 4083 while (!vim_fgets(rline, MAXLINELEN, fd) && !got_int) 4084 { 4085 line_breakcheck(); 4086 ++lnum; 4087 4088 /* Skip comment lines. */ 4089 if (*rline == '#') 4090 continue; 4091 4092 /* Remove CR, LF and white space from the end. */ 4093 l = (int)STRLEN(rline); 4094 while (l > 0 && rline[l - 1] <= ' ') 4095 --l; 4096 if (l == 0) 4097 continue; /* empty or blank line */ 4098 rline[l] = NUL; 4099 4100 /* Convert from "/encoding={encoding}" to 'encoding' when needed. */ 4101 vim_free(pc); 4102 if (spin->si_conv.vc_type != CONV_NONE) 4103 { 4104 pc = string_convert(&spin->si_conv, rline, NULL); 4105 if (pc == NULL) 4106 { 4107 smsg(_("Conversion failure for word in %s line %d: %s"), 4108 fname, lnum, rline); 4109 continue; 4110 } 4111 line = pc; 4112 } 4113 else 4114 { 4115 pc = NULL; 4116 line = rline; 4117 } 4118 4119 if (*line == '/') 4120 { 4121 ++line; 4122 if (STRNCMP(line, "encoding=", 9) == 0) 4123 { 4124 if (spin->si_conv.vc_type != CONV_NONE) 4125 smsg(_("Duplicate /encoding= line ignored in %s line %d: %s"), 4126 fname, lnum, line - 1); 4127 else if (did_word) 4128 smsg(_("/encoding= line after word ignored in %s line %d: %s"), 4129 fname, lnum, line - 1); 4130 else 4131 { 4132 char_u *enc; 4133 4134 /* Setup for conversion to 'encoding'. */ 4135 line += 9; 4136 enc = enc_canonize(line); 4137 if (enc != NULL && !spin->si_ascii 4138 && convert_setup(&spin->si_conv, enc, 4139 p_enc) == FAIL) 4140 smsg(_("Conversion in %s not supported: from %s to %s"), 4141 fname, line, p_enc); 4142 vim_free(enc); 4143 spin->si_conv.vc_fail = TRUE; 4144 } 4145 continue; 4146 } 4147 4148 if (STRNCMP(line, "regions=", 8) == 0) 4149 { 4150 if (spin->si_region_count > 1) 4151 smsg(_("Duplicate /regions= line ignored in %s line %d: %s"), 4152 fname, lnum, line); 4153 else 4154 { 4155 line += 8; 4156 if (STRLEN(line) > MAXREGIONS * 2) 4157 smsg(_("Too many regions in %s line %d: %s"), 4158 fname, lnum, line); 4159 else 4160 { 4161 spin->si_region_count = (int)STRLEN(line) / 2; 4162 STRCPY(spin->si_region_name, line); 4163 4164 /* Adjust the mask for a word valid in all regions. */ 4165 spin->si_region = (1 << spin->si_region_count) - 1; 4166 } 4167 } 4168 continue; 4169 } 4170 4171 smsg(_("/ line ignored in %s line %d: %s"), 4172 fname, lnum, line - 1); 4173 continue; 4174 } 4175 4176 flags = 0; 4177 regionmask = spin->si_region; 4178 4179 /* Check for flags and region after a slash. */ 4180 p = vim_strchr(line, '/'); 4181 if (p != NULL) 4182 { 4183 *p++ = NUL; 4184 while (*p != NUL) 4185 { 4186 if (*p == '=') /* keep-case word */ 4187 flags |= WF_KEEPCAP | WF_FIXCAP; 4188 else if (*p == '!') /* Bad, bad, wicked word. */ 4189 flags |= WF_BANNED; 4190 else if (*p == '?') /* Rare word. */ 4191 flags |= WF_RARE; 4192 else if (VIM_ISDIGIT(*p)) /* region number(s) */ 4193 { 4194 if ((flags & WF_REGION) == 0) /* first one */ 4195 regionmask = 0; 4196 flags |= WF_REGION; 4197 4198 l = *p - '0'; 4199 if (l == 0 || l > spin->si_region_count) 4200 { 4201 smsg(_("Invalid region nr in %s line %d: %s"), 4202 fname, lnum, p); 4203 break; 4204 } 4205 regionmask |= 1 << (l - 1); 4206 } 4207 else 4208 { 4209 smsg(_("Unrecognized flags in %s line %d: %s"), 4210 fname, lnum, p); 4211 break; 4212 } 4213 ++p; 4214 } 4215 } 4216 4217 /* Skip non-ASCII words when "spin->si_ascii" is TRUE. */ 4218 if (spin->si_ascii && has_non_ascii(line)) 4219 { 4220 ++non_ascii; 4221 continue; 4222 } 4223 4224 /* Normal word: store it. */ 4225 if (store_word(spin, line, flags, regionmask, NULL, FALSE) == FAIL) 4226 { 4227 retval = FAIL; 4228 break; 4229 } 4230 did_word = TRUE; 4231 } 4232 4233 vim_free(pc); 4234 fclose(fd); 4235 4236 if (spin->si_ascii && non_ascii > 0) 4237 { 4238 vim_snprintf((char *)IObuff, IOSIZE, 4239 _("Ignored %d words with non-ASCII characters"), non_ascii); 4240 spell_message(spin, IObuff); 4241 } 4242 4243 return retval; 4244 } 4245 4246 /* 4247 * Get part of an sblock_T, "len" bytes long. 4248 * This avoids calling free() for every little struct we use (and keeping 4249 * track of them). 4250 * The memory is cleared to all zeros. 4251 * Returns NULL when out of memory. 4252 */ 4253 static void * 4254 getroom( 4255 spellinfo_T *spin, 4256 size_t len, /* length needed */ 4257 int align) /* align for pointer */ 4258 { 4259 char_u *p; 4260 sblock_T *bl = spin->si_blocks; 4261 4262 if (align && bl != NULL) 4263 /* Round size up for alignment. On some systems structures need to be 4264 * aligned to the size of a pointer (e.g., SPARC). */ 4265 bl->sb_used = (bl->sb_used + sizeof(char *) - 1) 4266 & ~(sizeof(char *) - 1); 4267 4268 if (bl == NULL || bl->sb_used + len > SBLOCKSIZE) 4269 { 4270 if (len >= SBLOCKSIZE) 4271 bl = NULL; 4272 else 4273 /* Allocate a block of memory. It is not freed until much later. */ 4274 bl = alloc_clear(sizeof(sblock_T) + SBLOCKSIZE); 4275 if (bl == NULL) 4276 { 4277 if (!spin->si_did_emsg) 4278 { 4279 emsg(_("E845: Insufficient memory, word list will be incomplete")); 4280 spin->si_did_emsg = TRUE; 4281 } 4282 return NULL; 4283 } 4284 bl->sb_next = spin->si_blocks; 4285 spin->si_blocks = bl; 4286 bl->sb_used = 0; 4287 ++spin->si_blocks_cnt; 4288 } 4289 4290 p = bl->sb_data + bl->sb_used; 4291 bl->sb_used += (int)len; 4292 4293 return p; 4294 } 4295 4296 /* 4297 * Make a copy of a string into memory allocated with getroom(). 4298 * Returns NULL when out of memory. 4299 */ 4300 static char_u * 4301 getroom_save(spellinfo_T *spin, char_u *s) 4302 { 4303 char_u *sc; 4304 4305 sc = (char_u *)getroom(spin, STRLEN(s) + 1, FALSE); 4306 if (sc != NULL) 4307 STRCPY(sc, s); 4308 return sc; 4309 } 4310 4311 4312 /* 4313 * Free the list of allocated sblock_T. 4314 */ 4315 static void 4316 free_blocks(sblock_T *bl) 4317 { 4318 sblock_T *next; 4319 4320 while (bl != NULL) 4321 { 4322 next = bl->sb_next; 4323 vim_free(bl); 4324 bl = next; 4325 } 4326 } 4327 4328 /* 4329 * Allocate the root of a word tree. 4330 * Returns NULL when out of memory. 4331 */ 4332 static wordnode_T * 4333 wordtree_alloc(spellinfo_T *spin) 4334 { 4335 return (wordnode_T *)getroom(spin, sizeof(wordnode_T), TRUE); 4336 } 4337 4338 /* 4339 * Store a word in the tree(s). 4340 * Always store it in the case-folded tree. For a keep-case word this is 4341 * useful when the word can also be used with all caps (no WF_FIXCAP flag) and 4342 * used to find suggestions. 4343 * For a keep-case word also store it in the keep-case tree. 4344 * When "pfxlist" is not NULL store the word for each postponed prefix ID and 4345 * compound flag. 4346 */ 4347 static int 4348 store_word( 4349 spellinfo_T *spin, 4350 char_u *word, 4351 int flags, /* extra flags, WF_BANNED */ 4352 int region, /* supported region(s) */ 4353 char_u *pfxlist, /* list of prefix IDs or NULL */ 4354 int need_affix) /* only store word with affix ID */ 4355 { 4356 int len = (int)STRLEN(word); 4357 int ct = captype(word, word + len); 4358 char_u foldword[MAXWLEN]; 4359 int res = OK; 4360 char_u *p; 4361 4362 (void)spell_casefold(word, len, foldword, MAXWLEN); 4363 for (p = pfxlist; res == OK; ++p) 4364 { 4365 if (!need_affix || (p != NULL && *p != NUL)) 4366 res = tree_add_word(spin, foldword, spin->si_foldroot, ct | flags, 4367 region, p == NULL ? 0 : *p); 4368 if (p == NULL || *p == NUL) 4369 break; 4370 } 4371 ++spin->si_foldwcount; 4372 4373 if (res == OK && (ct == WF_KEEPCAP || (flags & WF_KEEPCAP))) 4374 { 4375 for (p = pfxlist; res == OK; ++p) 4376 { 4377 if (!need_affix || (p != NULL && *p != NUL)) 4378 res = tree_add_word(spin, word, spin->si_keeproot, flags, 4379 region, p == NULL ? 0 : *p); 4380 if (p == NULL || *p == NUL) 4381 break; 4382 } 4383 ++spin->si_keepwcount; 4384 } 4385 return res; 4386 } 4387 4388 /* 4389 * Add word "word" to a word tree at "root". 4390 * When "flags" < 0 we are adding to the prefix tree where "flags" is used for 4391 * "rare" and "region" is the condition nr. 4392 * Returns FAIL when out of memory. 4393 */ 4394 static int 4395 tree_add_word( 4396 spellinfo_T *spin, 4397 char_u *word, 4398 wordnode_T *root, 4399 int flags, 4400 int region, 4401 int affixID) 4402 { 4403 wordnode_T *node = root; 4404 wordnode_T *np; 4405 wordnode_T *copyp, **copyprev; 4406 wordnode_T **prev = NULL; 4407 int i; 4408 4409 /* Add each byte of the word to the tree, including the NUL at the end. */ 4410 for (i = 0; ; ++i) 4411 { 4412 /* When there is more than one reference to this node we need to make 4413 * a copy, so that we can modify it. Copy the whole list of siblings 4414 * (we don't optimize for a partly shared list of siblings). */ 4415 if (node != NULL && node->wn_refs > 1) 4416 { 4417 --node->wn_refs; 4418 copyprev = prev; 4419 for (copyp = node; copyp != NULL; copyp = copyp->wn_sibling) 4420 { 4421 /* Allocate a new node and copy the info. */ 4422 np = get_wordnode(spin); 4423 if (np == NULL) 4424 return FAIL; 4425 np->wn_child = copyp->wn_child; 4426 if (np->wn_child != NULL) 4427 ++np->wn_child->wn_refs; /* child gets extra ref */ 4428 np->wn_byte = copyp->wn_byte; 4429 if (np->wn_byte == NUL) 4430 { 4431 np->wn_flags = copyp->wn_flags; 4432 np->wn_region = copyp->wn_region; 4433 np->wn_affixID = copyp->wn_affixID; 4434 } 4435 4436 /* Link the new node in the list, there will be one ref. */ 4437 np->wn_refs = 1; 4438 if (copyprev != NULL) 4439 *copyprev = np; 4440 copyprev = &np->wn_sibling; 4441 4442 /* Let "node" point to the head of the copied list. */ 4443 if (copyp == node) 4444 node = np; 4445 } 4446 } 4447 4448 /* Look for the sibling that has the same character. They are sorted 4449 * on byte value, thus stop searching when a sibling is found with a 4450 * higher byte value. For zero bytes (end of word) the sorting is 4451 * done on flags and then on affixID. */ 4452 while (node != NULL 4453 && (node->wn_byte < word[i] 4454 || (node->wn_byte == NUL 4455 && (flags < 0 4456 ? node->wn_affixID < (unsigned)affixID 4457 : (node->wn_flags < (unsigned)(flags & WN_MASK) 4458 || (node->wn_flags == (flags & WN_MASK) 4459 && (spin->si_sugtree 4460 ? (node->wn_region & 0xffff) < region 4461 : node->wn_affixID 4462 < (unsigned)affixID))))))) 4463 { 4464 prev = &node->wn_sibling; 4465 node = *prev; 4466 } 4467 if (node == NULL 4468 || node->wn_byte != word[i] 4469 || (word[i] == NUL 4470 && (flags < 0 4471 || spin->si_sugtree 4472 || node->wn_flags != (flags & WN_MASK) 4473 || node->wn_affixID != affixID))) 4474 { 4475 /* Allocate a new node. */ 4476 np = get_wordnode(spin); 4477 if (np == NULL) 4478 return FAIL; 4479 np->wn_byte = word[i]; 4480 4481 /* If "node" is NULL this is a new child or the end of the sibling 4482 * list: ref count is one. Otherwise use ref count of sibling and 4483 * make ref count of sibling one (matters when inserting in front 4484 * of the list of siblings). */ 4485 if (node == NULL) 4486 np->wn_refs = 1; 4487 else 4488 { 4489 np->wn_refs = node->wn_refs; 4490 node->wn_refs = 1; 4491 } 4492 if (prev != NULL) 4493 *prev = np; 4494 np->wn_sibling = node; 4495 node = np; 4496 } 4497 4498 if (word[i] == NUL) 4499 { 4500 node->wn_flags = flags; 4501 node->wn_region |= region; 4502 node->wn_affixID = affixID; 4503 break; 4504 } 4505 prev = &node->wn_child; 4506 node = *prev; 4507 } 4508 #ifdef SPELL_PRINTTREE 4509 smsg("Added \"%s\"", word); 4510 spell_print_tree(root->wn_sibling); 4511 #endif 4512 4513 /* count nr of words added since last message */ 4514 ++spin->si_msg_count; 4515 4516 if (spin->si_compress_cnt > 1) 4517 { 4518 if (--spin->si_compress_cnt == 1) 4519 /* Did enough words to lower the block count limit. */ 4520 spin->si_blocks_cnt += compress_inc; 4521 } 4522 4523 /* 4524 * When we have allocated lots of memory we need to compress the word tree 4525 * to free up some room. But compression is slow, and we might actually 4526 * need that room, thus only compress in the following situations: 4527 * 1. When not compressed before (si_compress_cnt == 0): when using 4528 * "compress_start" blocks. 4529 * 2. When compressed before and used "compress_inc" blocks before 4530 * adding "compress_added" words (si_compress_cnt > 1). 4531 * 3. When compressed before, added "compress_added" words 4532 * (si_compress_cnt == 1) and the number of free nodes drops below the 4533 * maximum word length. 4534 */ 4535 #ifndef SPELL_COMPRESS_ALLWAYS 4536 if (spin->si_compress_cnt == 1 4537 ? spin->si_free_count < MAXWLEN 4538 : spin->si_blocks_cnt >= compress_start) 4539 #endif 4540 { 4541 /* Decrement the block counter. The effect is that we compress again 4542 * when the freed up room has been used and another "compress_inc" 4543 * blocks have been allocated. Unless "compress_added" words have 4544 * been added, then the limit is put back again. */ 4545 spin->si_blocks_cnt -= compress_inc; 4546 spin->si_compress_cnt = compress_added; 4547 4548 if (spin->si_verbose) 4549 { 4550 msg_start(); 4551 msg_puts(_(msg_compressing)); 4552 msg_clr_eos(); 4553 msg_didout = FALSE; 4554 msg_col = 0; 4555 out_flush(); 4556 } 4557 4558 /* Compress both trees. Either they both have many nodes, which makes 4559 * compression useful, or one of them is small, which means 4560 * compression goes fast. But when filling the soundfold word tree 4561 * there is no keep-case tree. */ 4562 wordtree_compress(spin, spin->si_foldroot); 4563 if (affixID >= 0) 4564 wordtree_compress(spin, spin->si_keeproot); 4565 } 4566 4567 return OK; 4568 } 4569 4570 /* 4571 * Get a wordnode_T, either from the list of previously freed nodes or 4572 * allocate a new one. 4573 * Returns NULL when out of memory. 4574 */ 4575 static wordnode_T * 4576 get_wordnode(spellinfo_T *spin) 4577 { 4578 wordnode_T *n; 4579 4580 if (spin->si_first_free == NULL) 4581 n = (wordnode_T *)getroom(spin, sizeof(wordnode_T), TRUE); 4582 else 4583 { 4584 n = spin->si_first_free; 4585 spin->si_first_free = n->wn_child; 4586 vim_memset(n, 0, sizeof(wordnode_T)); 4587 --spin->si_free_count; 4588 } 4589 #ifdef SPELL_PRINTTREE 4590 if (n != NULL) 4591 n->wn_nr = ++spin->si_wordnode_nr; 4592 #endif 4593 return n; 4594 } 4595 4596 /* 4597 * Decrement the reference count on a node (which is the head of a list of 4598 * siblings). If the reference count becomes zero free the node and its 4599 * siblings. 4600 * Returns the number of nodes actually freed. 4601 */ 4602 static int 4603 deref_wordnode(spellinfo_T *spin, wordnode_T *node) 4604 { 4605 wordnode_T *np; 4606 int cnt = 0; 4607 4608 if (--node->wn_refs == 0) 4609 { 4610 for (np = node; np != NULL; np = np->wn_sibling) 4611 { 4612 if (np->wn_child != NULL) 4613 cnt += deref_wordnode(spin, np->wn_child); 4614 free_wordnode(spin, np); 4615 ++cnt; 4616 } 4617 ++cnt; /* length field */ 4618 } 4619 return cnt; 4620 } 4621 4622 /* 4623 * Free a wordnode_T for re-use later. 4624 * Only the "wn_child" field becomes invalid. 4625 */ 4626 static void 4627 free_wordnode(spellinfo_T *spin, wordnode_T *n) 4628 { 4629 n->wn_child = spin->si_first_free; 4630 spin->si_first_free = n; 4631 ++spin->si_free_count; 4632 } 4633 4634 /* 4635 * Compress a tree: find tails that are identical and can be shared. 4636 */ 4637 static void 4638 wordtree_compress(spellinfo_T *spin, wordnode_T *root) 4639 { 4640 hashtab_T ht; 4641 int n; 4642 int tot = 0; 4643 int perc; 4644 4645 /* Skip the root itself, it's not actually used. The first sibling is the 4646 * start of the tree. */ 4647 if (root->wn_sibling != NULL) 4648 { 4649 hash_init(&ht); 4650 n = node_compress(spin, root->wn_sibling, &ht, &tot); 4651 4652 #ifndef SPELL_PRINTTREE 4653 if (spin->si_verbose || p_verbose > 2) 4654 #endif 4655 { 4656 if (tot > 1000000) 4657 perc = (tot - n) / (tot / 100); 4658 else if (tot == 0) 4659 perc = 0; 4660 else 4661 perc = (tot - n) * 100 / tot; 4662 vim_snprintf((char *)IObuff, IOSIZE, 4663 _("Compressed %d of %d nodes; %d (%d%%) remaining"), 4664 n, tot, tot - n, perc); 4665 spell_message(spin, IObuff); 4666 } 4667 #ifdef SPELL_PRINTTREE 4668 spell_print_tree(root->wn_sibling); 4669 #endif 4670 hash_clear(&ht); 4671 } 4672 } 4673 4674 /* 4675 * Compress a node, its siblings and its children, depth first. 4676 * Returns the number of compressed nodes. 4677 */ 4678 static int 4679 node_compress( 4680 spellinfo_T *spin, 4681 wordnode_T *node, 4682 hashtab_T *ht, 4683 int *tot) /* total count of nodes before compressing, 4684 incremented while going through the tree */ 4685 { 4686 wordnode_T *np; 4687 wordnode_T *tp; 4688 wordnode_T *child; 4689 hash_T hash; 4690 hashitem_T *hi; 4691 int len = 0; 4692 unsigned nr, n; 4693 int compressed = 0; 4694 4695 /* 4696 * Go through the list of siblings. Compress each child and then try 4697 * finding an identical child to replace it. 4698 * Note that with "child" we mean not just the node that is pointed to, 4699 * but the whole list of siblings of which the child node is the first. 4700 */ 4701 for (np = node; np != NULL && !got_int; np = np->wn_sibling) 4702 { 4703 ++len; 4704 if ((child = np->wn_child) != NULL) 4705 { 4706 /* Compress the child first. This fills hashkey. */ 4707 compressed += node_compress(spin, child, ht, tot); 4708 4709 /* Try to find an identical child. */ 4710 hash = hash_hash(child->wn_u1.hashkey); 4711 hi = hash_lookup(ht, child->wn_u1.hashkey, hash); 4712 if (!HASHITEM_EMPTY(hi)) 4713 { 4714 /* There are children we encountered before with a hash value 4715 * identical to the current child. Now check if there is one 4716 * that is really identical. */ 4717 for (tp = HI2WN(hi); tp != NULL; tp = tp->wn_u2.next) 4718 if (node_equal(child, tp)) 4719 { 4720 /* Found one! Now use that child in place of the 4721 * current one. This means the current child and all 4722 * its siblings is unlinked from the tree. */ 4723 ++tp->wn_refs; 4724 compressed += deref_wordnode(spin, child); 4725 np->wn_child = tp; 4726 break; 4727 } 4728 if (tp == NULL) 4729 { 4730 /* No other child with this hash value equals the child of 4731 * the node, add it to the linked list after the first 4732 * item. */ 4733 tp = HI2WN(hi); 4734 child->wn_u2.next = tp->wn_u2.next; 4735 tp->wn_u2.next = child; 4736 } 4737 } 4738 else 4739 /* No other child has this hash value, add it to the 4740 * hashtable. */ 4741 hash_add_item(ht, hi, child->wn_u1.hashkey, hash); 4742 } 4743 } 4744 *tot += len + 1; /* add one for the node that stores the length */ 4745 4746 /* 4747 * Make a hash key for the node and its siblings, so that we can quickly 4748 * find a lookalike node. This must be done after compressing the sibling 4749 * list, otherwise the hash key would become invalid by the compression. 4750 */ 4751 node->wn_u1.hashkey[0] = len; 4752 nr = 0; 4753 for (np = node; np != NULL; np = np->wn_sibling) 4754 { 4755 if (np->wn_byte == NUL) 4756 /* end node: use wn_flags, wn_region and wn_affixID */ 4757 n = np->wn_flags + (np->wn_region << 8) + (np->wn_affixID << 16); 4758 else 4759 /* byte node: use the byte value and the child pointer */ 4760 n = (unsigned)(np->wn_byte + ((long_u)np->wn_child << 8)); 4761 nr = nr * 101 + n; 4762 } 4763 4764 /* Avoid NUL bytes, it terminates the hash key. */ 4765 n = nr & 0xff; 4766 node->wn_u1.hashkey[1] = n == 0 ? 1 : n; 4767 n = (nr >> 8) & 0xff; 4768 node->wn_u1.hashkey[2] = n == 0 ? 1 : n; 4769 n = (nr >> 16) & 0xff; 4770 node->wn_u1.hashkey[3] = n == 0 ? 1 : n; 4771 n = (nr >> 24) & 0xff; 4772 node->wn_u1.hashkey[4] = n == 0 ? 1 : n; 4773 node->wn_u1.hashkey[5] = NUL; 4774 4775 /* Check for CTRL-C pressed now and then. */ 4776 fast_breakcheck(); 4777 4778 return compressed; 4779 } 4780 4781 /* 4782 * Return TRUE when two nodes have identical siblings and children. 4783 */ 4784 static int 4785 node_equal(wordnode_T *n1, wordnode_T *n2) 4786 { 4787 wordnode_T *p1; 4788 wordnode_T *p2; 4789 4790 for (p1 = n1, p2 = n2; p1 != NULL && p2 != NULL; 4791 p1 = p1->wn_sibling, p2 = p2->wn_sibling) 4792 if (p1->wn_byte != p2->wn_byte 4793 || (p1->wn_byte == NUL 4794 ? (p1->wn_flags != p2->wn_flags 4795 || p1->wn_region != p2->wn_region 4796 || p1->wn_affixID != p2->wn_affixID) 4797 : (p1->wn_child != p2->wn_child))) 4798 break; 4799 4800 return p1 == NULL && p2 == NULL; 4801 } 4802 4803 static int rep_compare(const void *s1, const void *s2); 4804 4805 /* 4806 * Function given to qsort() to sort the REP items on "from" string. 4807 */ 4808 static int 4809 rep_compare(const void *s1, const void *s2) 4810 { 4811 fromto_T *p1 = (fromto_T *)s1; 4812 fromto_T *p2 = (fromto_T *)s2; 4813 4814 return STRCMP(p1->ft_from, p2->ft_from); 4815 } 4816 4817 /* 4818 * Write the Vim .spl file "fname". 4819 * Return FAIL or OK; 4820 */ 4821 static int 4822 write_vim_spell(spellinfo_T *spin, char_u *fname) 4823 { 4824 FILE *fd; 4825 int regionmask; 4826 int round; 4827 wordnode_T *tree; 4828 int nodecount; 4829 int i; 4830 int l; 4831 garray_T *gap; 4832 fromto_T *ftp; 4833 char_u *p; 4834 int rr; 4835 int retval = OK; 4836 size_t fwv = 1; /* collect return value of fwrite() to avoid 4837 warnings from picky compiler */ 4838 4839 fd = mch_fopen((char *)fname, "w"); 4840 if (fd == NULL) 4841 { 4842 semsg(_(e_notopen), fname); 4843 return FAIL; 4844 } 4845 4846 /* <HEADER>: <fileID> <versionnr> */ 4847 /* <fileID> */ 4848 fwv &= fwrite(VIMSPELLMAGIC, VIMSPELLMAGICL, (size_t)1, fd); 4849 if (fwv != (size_t)1) 4850 /* Catch first write error, don't try writing more. */ 4851 goto theend; 4852 4853 putc(VIMSPELLVERSION, fd); /* <versionnr> */ 4854 4855 /* 4856 * <SECTIONS>: <section> ... <sectionend> 4857 */ 4858 4859 /* SN_INFO: <infotext> */ 4860 if (spin->si_info != NULL) 4861 { 4862 putc(SN_INFO, fd); /* <sectionID> */ 4863 putc(0, fd); /* <sectionflags> */ 4864 4865 i = (int)STRLEN(spin->si_info); 4866 put_bytes(fd, (long_u)i, 4); /* <sectionlen> */ 4867 fwv &= fwrite(spin->si_info, (size_t)i, (size_t)1, fd); /* <infotext> */ 4868 } 4869 4870 /* SN_REGION: <regionname> ... 4871 * Write the region names only if there is more than one. */ 4872 if (spin->si_region_count > 1) 4873 { 4874 putc(SN_REGION, fd); /* <sectionID> */ 4875 putc(SNF_REQUIRED, fd); /* <sectionflags> */ 4876 l = spin->si_region_count * 2; 4877 put_bytes(fd, (long_u)l, 4); /* <sectionlen> */ 4878 fwv &= fwrite(spin->si_region_name, (size_t)l, (size_t)1, fd); 4879 /* <regionname> ... */ 4880 regionmask = (1 << spin->si_region_count) - 1; 4881 } 4882 else 4883 regionmask = 0; 4884 4885 /* SN_CHARFLAGS: <charflagslen> <charflags> <folcharslen> <folchars> 4886 * 4887 * The table with character flags and the table for case folding. 4888 * This makes sure the same characters are recognized as word characters 4889 * when generating an when using a spell file. 4890 * Skip this for ASCII, the table may conflict with the one used for 4891 * 'encoding'. 4892 * Also skip this for an .add.spl file, the main spell file must contain 4893 * the table (avoids that it conflicts). File is shorter too. 4894 */ 4895 if (!spin->si_ascii && !spin->si_add) 4896 { 4897 char_u folchars[128 * 8]; 4898 int flags; 4899 4900 putc(SN_CHARFLAGS, fd); /* <sectionID> */ 4901 putc(SNF_REQUIRED, fd); /* <sectionflags> */ 4902 4903 /* Form the <folchars> string first, we need to know its length. */ 4904 l = 0; 4905 for (i = 128; i < 256; ++i) 4906 { 4907 if (has_mbyte) 4908 l += mb_char2bytes(spelltab.st_fold[i], folchars + l); 4909 else 4910 folchars[l++] = spelltab.st_fold[i]; 4911 } 4912 put_bytes(fd, (long_u)(1 + 128 + 2 + l), 4); /* <sectionlen> */ 4913 4914 fputc(128, fd); /* <charflagslen> */ 4915 for (i = 128; i < 256; ++i) 4916 { 4917 flags = 0; 4918 if (spelltab.st_isw[i]) 4919 flags |= CF_WORD; 4920 if (spelltab.st_isu[i]) 4921 flags |= CF_UPPER; 4922 fputc(flags, fd); /* <charflags> */ 4923 } 4924 4925 put_bytes(fd, (long_u)l, 2); /* <folcharslen> */ 4926 fwv &= fwrite(folchars, (size_t)l, (size_t)1, fd); /* <folchars> */ 4927 } 4928 4929 /* SN_MIDWORD: <midword> */ 4930 if (spin->si_midword != NULL) 4931 { 4932 putc(SN_MIDWORD, fd); /* <sectionID> */ 4933 putc(SNF_REQUIRED, fd); /* <sectionflags> */ 4934 4935 i = (int)STRLEN(spin->si_midword); 4936 put_bytes(fd, (long_u)i, 4); /* <sectionlen> */ 4937 fwv &= fwrite(spin->si_midword, (size_t)i, (size_t)1, fd); 4938 /* <midword> */ 4939 } 4940 4941 /* SN_PREFCOND: <prefcondcnt> <prefcond> ... */ 4942 if (spin->si_prefcond.ga_len > 0) 4943 { 4944 putc(SN_PREFCOND, fd); /* <sectionID> */ 4945 putc(SNF_REQUIRED, fd); /* <sectionflags> */ 4946 4947 l = write_spell_prefcond(NULL, &spin->si_prefcond); 4948 put_bytes(fd, (long_u)l, 4); /* <sectionlen> */ 4949 4950 write_spell_prefcond(fd, &spin->si_prefcond); 4951 } 4952 4953 /* SN_REP: <repcount> <rep> ... 4954 * SN_SAL: <salflags> <salcount> <sal> ... 4955 * SN_REPSAL: <repcount> <rep> ... */ 4956 4957 /* round 1: SN_REP section 4958 * round 2: SN_SAL section (unless SN_SOFO is used) 4959 * round 3: SN_REPSAL section */ 4960 for (round = 1; round <= 3; ++round) 4961 { 4962 if (round == 1) 4963 gap = &spin->si_rep; 4964 else if (round == 2) 4965 { 4966 /* Don't write SN_SAL when using a SN_SOFO section */ 4967 if (spin->si_sofofr != NULL && spin->si_sofoto != NULL) 4968 continue; 4969 gap = &spin->si_sal; 4970 } 4971 else 4972 gap = &spin->si_repsal; 4973 4974 /* Don't write the section if there are no items. */ 4975 if (gap->ga_len == 0) 4976 continue; 4977 4978 /* Sort the REP/REPSAL items. */ 4979 if (round != 2) 4980 qsort(gap->ga_data, (size_t)gap->ga_len, 4981 sizeof(fromto_T), rep_compare); 4982 4983 i = round == 1 ? SN_REP : (round == 2 ? SN_SAL : SN_REPSAL); 4984 putc(i, fd); /* <sectionID> */ 4985 4986 /* This is for making suggestions, section is not required. */ 4987 putc(0, fd); /* <sectionflags> */ 4988 4989 /* Compute the length of what follows. */ 4990 l = 2; /* count <repcount> or <salcount> */ 4991 for (i = 0; i < gap->ga_len; ++i) 4992 { 4993 ftp = &((fromto_T *)gap->ga_data)[i]; 4994 l += 1 + (int)STRLEN(ftp->ft_from); /* count <*fromlen> and <*from> */ 4995 l += 1 + (int)STRLEN(ftp->ft_to); /* count <*tolen> and <*to> */ 4996 } 4997 if (round == 2) 4998 ++l; /* count <salflags> */ 4999 put_bytes(fd, (long_u)l, 4); /* <sectionlen> */ 5000 5001 if (round == 2) 5002 { 5003 i = 0; 5004 if (spin->si_followup) 5005 i |= SAL_F0LLOWUP; 5006 if (spin->si_collapse) 5007 i |= SAL_COLLAPSE; 5008 if (spin->si_rem_accents) 5009 i |= SAL_REM_ACCENTS; 5010 putc(i, fd); /* <salflags> */ 5011 } 5012 5013 put_bytes(fd, (long_u)gap->ga_len, 2); /* <repcount> or <salcount> */ 5014 for (i = 0; i < gap->ga_len; ++i) 5015 { 5016 /* <rep> : <repfromlen> <repfrom> <reptolen> <repto> */ 5017 /* <sal> : <salfromlen> <salfrom> <saltolen> <salto> */ 5018 ftp = &((fromto_T *)gap->ga_data)[i]; 5019 for (rr = 1; rr <= 2; ++rr) 5020 { 5021 p = rr == 1 ? ftp->ft_from : ftp->ft_to; 5022 l = (int)STRLEN(p); 5023 putc(l, fd); 5024 if (l > 0) 5025 fwv &= fwrite(p, l, (size_t)1, fd); 5026 } 5027 } 5028 5029 } 5030 5031 /* SN_SOFO: <sofofromlen> <sofofrom> <sofotolen> <sofoto> 5032 * This is for making suggestions, section is not required. */ 5033 if (spin->si_sofofr != NULL && spin->si_sofoto != NULL) 5034 { 5035 putc(SN_SOFO, fd); /* <sectionID> */ 5036 putc(0, fd); /* <sectionflags> */ 5037 5038 l = (int)STRLEN(spin->si_sofofr); 5039 put_bytes(fd, (long_u)(l + STRLEN(spin->si_sofoto) + 4), 4); 5040 /* <sectionlen> */ 5041 5042 put_bytes(fd, (long_u)l, 2); /* <sofofromlen> */ 5043 fwv &= fwrite(spin->si_sofofr, l, (size_t)1, fd); /* <sofofrom> */ 5044 5045 l = (int)STRLEN(spin->si_sofoto); 5046 put_bytes(fd, (long_u)l, 2); /* <sofotolen> */ 5047 fwv &= fwrite(spin->si_sofoto, l, (size_t)1, fd); /* <sofoto> */ 5048 } 5049 5050 /* SN_WORDS: <word> ... 5051 * This is for making suggestions, section is not required. */ 5052 if (spin->si_commonwords.ht_used > 0) 5053 { 5054 putc(SN_WORDS, fd); /* <sectionID> */ 5055 putc(0, fd); /* <sectionflags> */ 5056 5057 /* round 1: count the bytes 5058 * round 2: write the bytes */ 5059 for (round = 1; round <= 2; ++round) 5060 { 5061 int todo; 5062 int len = 0; 5063 hashitem_T *hi; 5064 5065 todo = (int)spin->si_commonwords.ht_used; 5066 for (hi = spin->si_commonwords.ht_array; todo > 0; ++hi) 5067 if (!HASHITEM_EMPTY(hi)) 5068 { 5069 l = (int)STRLEN(hi->hi_key) + 1; 5070 len += l; 5071 if (round == 2) /* <word> */ 5072 fwv &= fwrite(hi->hi_key, (size_t)l, (size_t)1, fd); 5073 --todo; 5074 } 5075 if (round == 1) 5076 put_bytes(fd, (long_u)len, 4); /* <sectionlen> */ 5077 } 5078 } 5079 5080 /* SN_MAP: <mapstr> 5081 * This is for making suggestions, section is not required. */ 5082 if (spin->si_map.ga_len > 0) 5083 { 5084 putc(SN_MAP, fd); /* <sectionID> */ 5085 putc(0, fd); /* <sectionflags> */ 5086 l = spin->si_map.ga_len; 5087 put_bytes(fd, (long_u)l, 4); /* <sectionlen> */ 5088 fwv &= fwrite(spin->si_map.ga_data, (size_t)l, (size_t)1, fd); 5089 /* <mapstr> */ 5090 } 5091 5092 /* SN_SUGFILE: <timestamp> 5093 * This is used to notify that a .sug file may be available and at the 5094 * same time allows for checking that a .sug file that is found matches 5095 * with this .spl file. That's because the word numbers must be exactly 5096 * right. */ 5097 if (!spin->si_nosugfile 5098 && (spin->si_sal.ga_len > 0 5099 || (spin->si_sofofr != NULL && spin->si_sofoto != NULL))) 5100 { 5101 putc(SN_SUGFILE, fd); /* <sectionID> */ 5102 putc(0, fd); /* <sectionflags> */ 5103 put_bytes(fd, (long_u)8, 4); /* <sectionlen> */ 5104 5105 /* Set si_sugtime and write it to the file. */ 5106 spin->si_sugtime = time(NULL); 5107 put_time(fd, spin->si_sugtime); /* <timestamp> */ 5108 } 5109 5110 /* SN_NOSPLITSUGS: nothing 5111 * This is used to notify that no suggestions with word splits are to be 5112 * made. */ 5113 if (spin->si_nosplitsugs) 5114 { 5115 putc(SN_NOSPLITSUGS, fd); /* <sectionID> */ 5116 putc(0, fd); /* <sectionflags> */ 5117 put_bytes(fd, (long_u)0, 4); /* <sectionlen> */ 5118 } 5119 5120 /* SN_NOCOMPUNDSUGS: nothing 5121 * This is used to notify that no suggestions with compounds are to be 5122 * made. */ 5123 if (spin->si_nocompoundsugs) 5124 { 5125 putc(SN_NOCOMPOUNDSUGS, fd); /* <sectionID> */ 5126 putc(0, fd); /* <sectionflags> */ 5127 put_bytes(fd, (long_u)0, 4); /* <sectionlen> */ 5128 } 5129 5130 /* SN_COMPOUND: compound info. 5131 * We don't mark it required, when not supported all compound words will 5132 * be bad words. */ 5133 if (spin->si_compflags != NULL) 5134 { 5135 putc(SN_COMPOUND, fd); /* <sectionID> */ 5136 putc(0, fd); /* <sectionflags> */ 5137 5138 l = (int)STRLEN(spin->si_compflags); 5139 for (i = 0; i < spin->si_comppat.ga_len; ++i) 5140 l += (int)STRLEN(((char_u **)(spin->si_comppat.ga_data))[i]) + 1; 5141 put_bytes(fd, (long_u)(l + 7), 4); /* <sectionlen> */ 5142 5143 putc(spin->si_compmax, fd); /* <compmax> */ 5144 putc(spin->si_compminlen, fd); /* <compminlen> */ 5145 putc(spin->si_compsylmax, fd); /* <compsylmax> */ 5146 putc(0, fd); /* for Vim 7.0b compatibility */ 5147 putc(spin->si_compoptions, fd); /* <compoptions> */ 5148 put_bytes(fd, (long_u)spin->si_comppat.ga_len, 2); 5149 /* <comppatcount> */ 5150 for (i = 0; i < spin->si_comppat.ga_len; ++i) 5151 { 5152 p = ((char_u **)(spin->si_comppat.ga_data))[i]; 5153 putc((int)STRLEN(p), fd); /* <comppatlen> */ 5154 fwv &= fwrite(p, (size_t)STRLEN(p), (size_t)1, fd); 5155 /* <comppattext> */ 5156 } 5157 /* <compflags> */ 5158 fwv &= fwrite(spin->si_compflags, (size_t)STRLEN(spin->si_compflags), 5159 (size_t)1, fd); 5160 } 5161 5162 /* SN_NOBREAK: NOBREAK flag */ 5163 if (spin->si_nobreak) 5164 { 5165 putc(SN_NOBREAK, fd); /* <sectionID> */ 5166 putc(0, fd); /* <sectionflags> */ 5167 5168 /* It's empty, the presence of the section flags the feature. */ 5169 put_bytes(fd, (long_u)0, 4); /* <sectionlen> */ 5170 } 5171 5172 /* SN_SYLLABLE: syllable info. 5173 * We don't mark it required, when not supported syllables will not be 5174 * counted. */ 5175 if (spin->si_syllable != NULL) 5176 { 5177 putc(SN_SYLLABLE, fd); /* <sectionID> */ 5178 putc(0, fd); /* <sectionflags> */ 5179 5180 l = (int)STRLEN(spin->si_syllable); 5181 put_bytes(fd, (long_u)l, 4); /* <sectionlen> */ 5182 fwv &= fwrite(spin->si_syllable, (size_t)l, (size_t)1, fd); 5183 /* <syllable> */ 5184 } 5185 5186 /* end of <SECTIONS> */ 5187 putc(SN_END, fd); /* <sectionend> */ 5188 5189 5190 /* 5191 * <LWORDTREE> <KWORDTREE> <PREFIXTREE> 5192 */ 5193 spin->si_memtot = 0; 5194 for (round = 1; round <= 3; ++round) 5195 { 5196 if (round == 1) 5197 tree = spin->si_foldroot->wn_sibling; 5198 else if (round == 2) 5199 tree = spin->si_keeproot->wn_sibling; 5200 else 5201 tree = spin->si_prefroot->wn_sibling; 5202 5203 /* Clear the index and wnode fields in the tree. */ 5204 clear_node(tree); 5205 5206 /* Count the number of nodes. Needed to be able to allocate the 5207 * memory when reading the nodes. Also fills in index for shared 5208 * nodes. */ 5209 nodecount = put_node(NULL, tree, 0, regionmask, round == 3); 5210 5211 /* number of nodes in 4 bytes */ 5212 put_bytes(fd, (long_u)nodecount, 4); /* <nodecount> */ 5213 spin->si_memtot += nodecount + nodecount * sizeof(int); 5214 5215 /* Write the nodes. */ 5216 (void)put_node(fd, tree, 0, regionmask, round == 3); 5217 } 5218 5219 /* Write another byte to check for errors (file system full). */ 5220 if (putc(0, fd) == EOF) 5221 retval = FAIL; 5222 theend: 5223 if (fclose(fd) == EOF) 5224 retval = FAIL; 5225 5226 if (fwv != (size_t)1) 5227 retval = FAIL; 5228 if (retval == FAIL) 5229 emsg(_(e_write)); 5230 5231 return retval; 5232 } 5233 5234 /* 5235 * Clear the index and wnode fields of "node", it siblings and its 5236 * children. This is needed because they are a union with other items to save 5237 * space. 5238 */ 5239 static void 5240 clear_node(wordnode_T *node) 5241 { 5242 wordnode_T *np; 5243 5244 if (node != NULL) 5245 for (np = node; np != NULL; np = np->wn_sibling) 5246 { 5247 np->wn_u1.index = 0; 5248 np->wn_u2.wnode = NULL; 5249 5250 if (np->wn_byte != NUL) 5251 clear_node(np->wn_child); 5252 } 5253 } 5254 5255 5256 /* 5257 * Dump a word tree at node "node". 5258 * 5259 * This first writes the list of possible bytes (siblings). Then for each 5260 * byte recursively write the children. 5261 * 5262 * NOTE: The code here must match the code in read_tree_node(), since 5263 * assumptions are made about the indexes (so that we don't have to write them 5264 * in the file). 5265 * 5266 * Returns the number of nodes used. 5267 */ 5268 static int 5269 put_node( 5270 FILE *fd, /* NULL when only counting */ 5271 wordnode_T *node, 5272 int idx, 5273 int regionmask, 5274 int prefixtree) /* TRUE for PREFIXTREE */ 5275 { 5276 int newindex = idx; 5277 int siblingcount = 0; 5278 wordnode_T *np; 5279 int flags; 5280 5281 /* If "node" is zero the tree is empty. */ 5282 if (node == NULL) 5283 return 0; 5284 5285 /* Store the index where this node is written. */ 5286 node->wn_u1.index = idx; 5287 5288 /* Count the number of siblings. */ 5289 for (np = node; np != NULL; np = np->wn_sibling) 5290 ++siblingcount; 5291 5292 /* Write the sibling count. */ 5293 if (fd != NULL) 5294 putc(siblingcount, fd); /* <siblingcount> */ 5295 5296 /* Write each sibling byte and optionally extra info. */ 5297 for (np = node; np != NULL; np = np->wn_sibling) 5298 { 5299 if (np->wn_byte == 0) 5300 { 5301 if (fd != NULL) 5302 { 5303 /* For a NUL byte (end of word) write the flags etc. */ 5304 if (prefixtree) 5305 { 5306 /* In PREFIXTREE write the required affixID and the 5307 * associated condition nr (stored in wn_region). The 5308 * byte value is misused to store the "rare" and "not 5309 * combining" flags */ 5310 if (np->wn_flags == (short_u)PFX_FLAGS) 5311 putc(BY_NOFLAGS, fd); /* <byte> */ 5312 else 5313 { 5314 putc(BY_FLAGS, fd); /* <byte> */ 5315 putc(np->wn_flags, fd); /* <pflags> */ 5316 } 5317 putc(np->wn_affixID, fd); /* <affixID> */ 5318 put_bytes(fd, (long_u)np->wn_region, 2); /* <prefcondnr> */ 5319 } 5320 else 5321 { 5322 /* For word trees we write the flag/region items. */ 5323 flags = np->wn_flags; 5324 if (regionmask != 0 && np->wn_region != regionmask) 5325 flags |= WF_REGION; 5326 if (np->wn_affixID != 0) 5327 flags |= WF_AFX; 5328 if (flags == 0) 5329 { 5330 /* word without flags or region */ 5331 putc(BY_NOFLAGS, fd); /* <byte> */ 5332 } 5333 else 5334 { 5335 if (np->wn_flags >= 0x100) 5336 { 5337 putc(BY_FLAGS2, fd); /* <byte> */ 5338 putc(flags, fd); /* <flags> */ 5339 putc((unsigned)flags >> 8, fd); /* <flags2> */ 5340 } 5341 else 5342 { 5343 putc(BY_FLAGS, fd); /* <byte> */ 5344 putc(flags, fd); /* <flags> */ 5345 } 5346 if (flags & WF_REGION) 5347 putc(np->wn_region, fd); /* <region> */ 5348 if (flags & WF_AFX) 5349 putc(np->wn_affixID, fd); /* <affixID> */ 5350 } 5351 } 5352 } 5353 } 5354 else 5355 { 5356 if (np->wn_child->wn_u1.index != 0 5357 && np->wn_child->wn_u2.wnode != node) 5358 { 5359 /* The child is written elsewhere, write the reference. */ 5360 if (fd != NULL) 5361 { 5362 putc(BY_INDEX, fd); /* <byte> */ 5363 /* <nodeidx> */ 5364 put_bytes(fd, (long_u)np->wn_child->wn_u1.index, 3); 5365 } 5366 } 5367 else if (np->wn_child->wn_u2.wnode == NULL) 5368 /* We will write the child below and give it an index. */ 5369 np->wn_child->wn_u2.wnode = node; 5370 5371 if (fd != NULL) 5372 if (putc(np->wn_byte, fd) == EOF) /* <byte> or <xbyte> */ 5373 { 5374 emsg(_(e_write)); 5375 return 0; 5376 } 5377 } 5378 } 5379 5380 /* Space used in the array when reading: one for each sibling and one for 5381 * the count. */ 5382 newindex += siblingcount + 1; 5383 5384 /* Recursively dump the children of each sibling. */ 5385 for (np = node; np != NULL; np = np->wn_sibling) 5386 if (np->wn_byte != 0 && np->wn_child->wn_u2.wnode == node) 5387 newindex = put_node(fd, np->wn_child, newindex, regionmask, 5388 prefixtree); 5389 5390 return newindex; 5391 } 5392 5393 5394 /* 5395 * ":mkspell [-ascii] outfile infile ..." 5396 * ":mkspell [-ascii] addfile" 5397 */ 5398 void 5399 ex_mkspell(exarg_T *eap) 5400 { 5401 int fcount; 5402 char_u **fnames; 5403 char_u *arg = eap->arg; 5404 int ascii = FALSE; 5405 5406 if (STRNCMP(arg, "-ascii", 6) == 0) 5407 { 5408 ascii = TRUE; 5409 arg = skipwhite(arg + 6); 5410 } 5411 5412 /* Expand all the remaining arguments (e.g., $VIMRUNTIME). */ 5413 if (get_arglist_exp(arg, &fcount, &fnames, FALSE) == OK) 5414 { 5415 mkspell(fcount, fnames, ascii, eap->forceit, FALSE); 5416 FreeWild(fcount, fnames); 5417 } 5418 } 5419 5420 /* 5421 * Create the .sug file. 5422 * Uses the soundfold info in "spin". 5423 * Writes the file with the name "wfname", with ".spl" changed to ".sug". 5424 */ 5425 static void 5426 spell_make_sugfile(spellinfo_T *spin, char_u *wfname) 5427 { 5428 char_u *fname = NULL; 5429 int len; 5430 slang_T *slang; 5431 int free_slang = FALSE; 5432 5433 /* 5434 * Read back the .spl file that was written. This fills the required 5435 * info for soundfolding. This also uses less memory than the 5436 * pointer-linked version of the trie. And it avoids having two versions 5437 * of the code for the soundfolding stuff. 5438 * It might have been done already by spell_reload_one(). 5439 */ 5440 for (slang = first_lang; slang != NULL; slang = slang->sl_next) 5441 if (fullpathcmp(wfname, slang->sl_fname, FALSE, TRUE) == FPC_SAME) 5442 break; 5443 if (slang == NULL) 5444 { 5445 spell_message(spin, (char_u *)_("Reading back spell file...")); 5446 slang = spell_load_file(wfname, NULL, NULL, FALSE); 5447 if (slang == NULL) 5448 return; 5449 free_slang = TRUE; 5450 } 5451 5452 /* 5453 * Clear the info in "spin" that is used. 5454 */ 5455 spin->si_blocks = NULL; 5456 spin->si_blocks_cnt = 0; 5457 spin->si_compress_cnt = 0; /* will stay at 0 all the time*/ 5458 spin->si_free_count = 0; 5459 spin->si_first_free = NULL; 5460 spin->si_foldwcount = 0; 5461 5462 /* 5463 * Go through the trie of good words, soundfold each word and add it to 5464 * the soundfold trie. 5465 */ 5466 spell_message(spin, (char_u *)_("Performing soundfolding...")); 5467 if (sug_filltree(spin, slang) == FAIL) 5468 goto theend; 5469 5470 /* 5471 * Create the table which links each soundfold word with a list of the 5472 * good words it may come from. Creates buffer "spin->si_spellbuf". 5473 * This also removes the wordnr from the NUL byte entries to make 5474 * compression possible. 5475 */ 5476 if (sug_maketable(spin) == FAIL) 5477 goto theend; 5478 5479 smsg(_("Number of words after soundfolding: %ld"), 5480 (long)spin->si_spellbuf->b_ml.ml_line_count); 5481 5482 /* 5483 * Compress the soundfold trie. 5484 */ 5485 spell_message(spin, (char_u *)_(msg_compressing)); 5486 wordtree_compress(spin, spin->si_foldroot); 5487 5488 /* 5489 * Write the .sug file. 5490 * Make the file name by changing ".spl" to ".sug". 5491 */ 5492 fname = alloc(MAXPATHL); 5493 if (fname == NULL) 5494 goto theend; 5495 vim_strncpy(fname, wfname, MAXPATHL - 1); 5496 len = (int)STRLEN(fname); 5497 fname[len - 2] = 'u'; 5498 fname[len - 1] = 'g'; 5499 sug_write(spin, fname); 5500 5501 theend: 5502 vim_free(fname); 5503 if (free_slang) 5504 slang_free(slang); 5505 free_blocks(spin->si_blocks); 5506 close_spellbuf(spin->si_spellbuf); 5507 } 5508 5509 /* 5510 * Build the soundfold trie for language "slang". 5511 */ 5512 static int 5513 sug_filltree(spellinfo_T *spin, slang_T *slang) 5514 { 5515 char_u *byts; 5516 idx_T *idxs; 5517 int depth; 5518 idx_T arridx[MAXWLEN]; 5519 int curi[MAXWLEN]; 5520 char_u tword[MAXWLEN]; 5521 char_u tsalword[MAXWLEN]; 5522 int c; 5523 idx_T n; 5524 unsigned words_done = 0; 5525 int wordcount[MAXWLEN]; 5526 5527 /* We use si_foldroot for the soundfolded trie. */ 5528 spin->si_foldroot = wordtree_alloc(spin); 5529 if (spin->si_foldroot == NULL) 5530 return FAIL; 5531 5532 /* let tree_add_word() know we're adding to the soundfolded tree */ 5533 spin->si_sugtree = TRUE; 5534 5535 /* 5536 * Go through the whole case-folded tree, soundfold each word and put it 5537 * in the trie. 5538 */ 5539 byts = slang->sl_fbyts; 5540 idxs = slang->sl_fidxs; 5541 5542 arridx[0] = 0; 5543 curi[0] = 1; 5544 wordcount[0] = 0; 5545 5546 depth = 0; 5547 while (depth >= 0 && !got_int) 5548 { 5549 if (curi[depth] > byts[arridx[depth]]) 5550 { 5551 /* Done all bytes at this node, go up one level. */ 5552 idxs[arridx[depth]] = wordcount[depth]; 5553 if (depth > 0) 5554 wordcount[depth - 1] += wordcount[depth]; 5555 5556 --depth; 5557 line_breakcheck(); 5558 } 5559 else 5560 { 5561 5562 /* Do one more byte at this node. */ 5563 n = arridx[depth] + curi[depth]; 5564 ++curi[depth]; 5565 5566 c = byts[n]; 5567 if (c == 0) 5568 { 5569 /* Sound-fold the word. */ 5570 tword[depth] = NUL; 5571 spell_soundfold(slang, tword, TRUE, tsalword); 5572 5573 /* We use the "flags" field for the MSB of the wordnr, 5574 * "region" for the LSB of the wordnr. */ 5575 if (tree_add_word(spin, tsalword, spin->si_foldroot, 5576 words_done >> 16, words_done & 0xffff, 5577 0) == FAIL) 5578 return FAIL; 5579 5580 ++words_done; 5581 ++wordcount[depth]; 5582 5583 /* Reset the block count each time to avoid compression 5584 * kicking in. */ 5585 spin->si_blocks_cnt = 0; 5586 5587 /* Skip over any other NUL bytes (same word with different 5588 * flags). */ 5589 while (byts[n + 1] == 0) 5590 { 5591 ++n; 5592 ++curi[depth]; 5593 } 5594 } 5595 else 5596 { 5597 /* Normal char, go one level deeper. */ 5598 tword[depth++] = c; 5599 arridx[depth] = idxs[n]; 5600 curi[depth] = 1; 5601 wordcount[depth] = 0; 5602 } 5603 } 5604 } 5605 5606 smsg(_("Total number of words: %d"), words_done); 5607 5608 return OK; 5609 } 5610 5611 /* 5612 * Make the table that links each word in the soundfold trie to the words it 5613 * can be produced from. 5614 * This is not unlike lines in a file, thus use a memfile to be able to access 5615 * the table efficiently. 5616 * Returns FAIL when out of memory. 5617 */ 5618 static int 5619 sug_maketable(spellinfo_T *spin) 5620 { 5621 garray_T ga; 5622 int res = OK; 5623 5624 /* Allocate a buffer, open a memline for it and create the swap file 5625 * (uses a temp file, not a .swp file). */ 5626 spin->si_spellbuf = open_spellbuf(); 5627 if (spin->si_spellbuf == NULL) 5628 return FAIL; 5629 5630 /* Use a buffer to store the line info, avoids allocating many small 5631 * pieces of memory. */ 5632 ga_init2(&ga, 1, 100); 5633 5634 /* recursively go through the tree */ 5635 if (sug_filltable(spin, spin->si_foldroot->wn_sibling, 0, &ga) == -1) 5636 res = FAIL; 5637 5638 ga_clear(&ga); 5639 return res; 5640 } 5641 5642 /* 5643 * Fill the table for one node and its children. 5644 * Returns the wordnr at the start of the node. 5645 * Returns -1 when out of memory. 5646 */ 5647 static int 5648 sug_filltable( 5649 spellinfo_T *spin, 5650 wordnode_T *node, 5651 int startwordnr, 5652 garray_T *gap) /* place to store line of numbers */ 5653 { 5654 wordnode_T *p, *np; 5655 int wordnr = startwordnr; 5656 int nr; 5657 int prev_nr; 5658 5659 for (p = node; p != NULL; p = p->wn_sibling) 5660 { 5661 if (p->wn_byte == NUL) 5662 { 5663 gap->ga_len = 0; 5664 prev_nr = 0; 5665 for (np = p; np != NULL && np->wn_byte == NUL; np = np->wn_sibling) 5666 { 5667 if (ga_grow(gap, 10) == FAIL) 5668 return -1; 5669 5670 nr = (np->wn_flags << 16) + (np->wn_region & 0xffff); 5671 /* Compute the offset from the previous nr and store the 5672 * offset in a way that it takes a minimum number of bytes. 5673 * It's a bit like utf-8, but without the need to mark 5674 * following bytes. */ 5675 nr -= prev_nr; 5676 prev_nr += nr; 5677 gap->ga_len += offset2bytes(nr, 5678 (char_u *)gap->ga_data + gap->ga_len); 5679 } 5680 5681 /* add the NUL byte */ 5682 ((char_u *)gap->ga_data)[gap->ga_len++] = NUL; 5683 5684 if (ml_append_buf(spin->si_spellbuf, (linenr_T)wordnr, 5685 gap->ga_data, gap->ga_len, TRUE) == FAIL) 5686 return -1; 5687 ++wordnr; 5688 5689 /* Remove extra NUL entries, we no longer need them. We don't 5690 * bother freeing the nodes, the won't be reused anyway. */ 5691 while (p->wn_sibling != NULL && p->wn_sibling->wn_byte == NUL) 5692 p->wn_sibling = p->wn_sibling->wn_sibling; 5693 5694 /* Clear the flags on the remaining NUL node, so that compression 5695 * works a lot better. */ 5696 p->wn_flags = 0; 5697 p->wn_region = 0; 5698 } 5699 else 5700 { 5701 wordnr = sug_filltable(spin, p->wn_child, wordnr, gap); 5702 if (wordnr == -1) 5703 return -1; 5704 } 5705 } 5706 return wordnr; 5707 } 5708 5709 /* 5710 * Convert an offset into a minimal number of bytes. 5711 * Similar to utf_char2byters, but use 8 bits in followup bytes and avoid NUL 5712 * bytes. 5713 */ 5714 static int 5715 offset2bytes(int nr, char_u *buf) 5716 { 5717 int rem; 5718 int b1, b2, b3, b4; 5719 5720 /* Split the number in parts of base 255. We need to avoid NUL bytes. */ 5721 b1 = nr % 255 + 1; 5722 rem = nr / 255; 5723 b2 = rem % 255 + 1; 5724 rem = rem / 255; 5725 b3 = rem % 255 + 1; 5726 b4 = rem / 255 + 1; 5727 5728 if (b4 > 1 || b3 > 0x1f) /* 4 bytes */ 5729 { 5730 buf[0] = 0xe0 + b4; 5731 buf[1] = b3; 5732 buf[2] = b2; 5733 buf[3] = b1; 5734 return 4; 5735 } 5736 if (b3 > 1 || b2 > 0x3f ) /* 3 bytes */ 5737 { 5738 buf[0] = 0xc0 + b3; 5739 buf[1] = b2; 5740 buf[2] = b1; 5741 return 3; 5742 } 5743 if (b2 > 1 || b1 > 0x7f ) /* 2 bytes */ 5744 { 5745 buf[0] = 0x80 + b2; 5746 buf[1] = b1; 5747 return 2; 5748 } 5749 /* 1 byte */ 5750 buf[0] = b1; 5751 return 1; 5752 } 5753 5754 /* 5755 * Write the .sug file in "fname". 5756 */ 5757 static void 5758 sug_write(spellinfo_T *spin, char_u *fname) 5759 { 5760 FILE *fd; 5761 wordnode_T *tree; 5762 int nodecount; 5763 int wcount; 5764 char_u *line; 5765 linenr_T lnum; 5766 int len; 5767 5768 /* Create the file. Note that an existing file is silently overwritten! */ 5769 fd = mch_fopen((char *)fname, "w"); 5770 if (fd == NULL) 5771 { 5772 semsg(_(e_notopen), fname); 5773 return; 5774 } 5775 5776 vim_snprintf((char *)IObuff, IOSIZE, 5777 _("Writing suggestion file %s..."), fname); 5778 spell_message(spin, IObuff); 5779 5780 /* 5781 * <SUGHEADER>: <fileID> <versionnr> <timestamp> 5782 */ 5783 if (fwrite(VIMSUGMAGIC, VIMSUGMAGICL, (size_t)1, fd) != 1) /* <fileID> */ 5784 { 5785 emsg(_(e_write)); 5786 goto theend; 5787 } 5788 putc(VIMSUGVERSION, fd); /* <versionnr> */ 5789 5790 /* Write si_sugtime to the file. */ 5791 put_time(fd, spin->si_sugtime); /* <timestamp> */ 5792 5793 /* 5794 * <SUGWORDTREE> 5795 */ 5796 spin->si_memtot = 0; 5797 tree = spin->si_foldroot->wn_sibling; 5798 5799 /* Clear the index and wnode fields in the tree. */ 5800 clear_node(tree); 5801 5802 /* Count the number of nodes. Needed to be able to allocate the 5803 * memory when reading the nodes. Also fills in index for shared 5804 * nodes. */ 5805 nodecount = put_node(NULL, tree, 0, 0, FALSE); 5806 5807 /* number of nodes in 4 bytes */ 5808 put_bytes(fd, (long_u)nodecount, 4); /* <nodecount> */ 5809 spin->si_memtot += nodecount + nodecount * sizeof(int); 5810 5811 /* Write the nodes. */ 5812 (void)put_node(fd, tree, 0, 0, FALSE); 5813 5814 /* 5815 * <SUGTABLE>: <sugwcount> <sugline> ... 5816 */ 5817 wcount = spin->si_spellbuf->b_ml.ml_line_count; 5818 put_bytes(fd, (long_u)wcount, 4); /* <sugwcount> */ 5819 5820 for (lnum = 1; lnum <= (linenr_T)wcount; ++lnum) 5821 { 5822 /* <sugline>: <sugnr> ... NUL */ 5823 line = ml_get_buf(spin->si_spellbuf, lnum, FALSE); 5824 len = (int)STRLEN(line) + 1; 5825 if (fwrite(line, (size_t)len, (size_t)1, fd) == 0) 5826 { 5827 emsg(_(e_write)); 5828 goto theend; 5829 } 5830 spin->si_memtot += len; 5831 } 5832 5833 /* Write another byte to check for errors. */ 5834 if (putc(0, fd) == EOF) 5835 emsg(_(e_write)); 5836 5837 vim_snprintf((char *)IObuff, IOSIZE, 5838 _("Estimated runtime memory use: %d bytes"), spin->si_memtot); 5839 spell_message(spin, IObuff); 5840 5841 theend: 5842 /* close the file */ 5843 fclose(fd); 5844 } 5845 5846 5847 /* 5848 * Create a Vim spell file from one or more word lists. 5849 * "fnames[0]" is the output file name. 5850 * "fnames[fcount - 1]" is the last input file name. 5851 * Exception: when "fnames[0]" ends in ".add" it's used as the input file name 5852 * and ".spl" is appended to make the output file name. 5853 */ 5854 void 5855 mkspell( 5856 int fcount, 5857 char_u **fnames, 5858 int ascii, /* -ascii argument given */ 5859 int over_write, /* overwrite existing output file */ 5860 int added_word) /* invoked through "zg" */ 5861 { 5862 char_u *fname = NULL; 5863 char_u *wfname; 5864 char_u **innames; 5865 int incount; 5866 afffile_T *(afile[MAXREGIONS]); 5867 int i; 5868 int len; 5869 stat_T st; 5870 int error = FALSE; 5871 spellinfo_T spin; 5872 5873 vim_memset(&spin, 0, sizeof(spin)); 5874 spin.si_verbose = !added_word; 5875 spin.si_ascii = ascii; 5876 spin.si_followup = TRUE; 5877 spin.si_rem_accents = TRUE; 5878 ga_init2(&spin.si_rep, (int)sizeof(fromto_T), 20); 5879 ga_init2(&spin.si_repsal, (int)sizeof(fromto_T), 20); 5880 ga_init2(&spin.si_sal, (int)sizeof(fromto_T), 20); 5881 ga_init2(&spin.si_map, (int)sizeof(char_u), 100); 5882 ga_init2(&spin.si_comppat, (int)sizeof(char_u *), 20); 5883 ga_init2(&spin.si_prefcond, (int)sizeof(char_u *), 50); 5884 hash_init(&spin.si_commonwords); 5885 spin.si_newcompID = 127; /* start compound ID at first maximum */ 5886 5887 /* default: fnames[0] is output file, following are input files */ 5888 innames = &fnames[1]; 5889 incount = fcount - 1; 5890 5891 wfname = alloc(MAXPATHL); 5892 if (wfname == NULL) 5893 return; 5894 5895 if (fcount >= 1) 5896 { 5897 len = (int)STRLEN(fnames[0]); 5898 if (fcount == 1 && len > 4 && STRCMP(fnames[0] + len - 4, ".add") == 0) 5899 { 5900 /* For ":mkspell path/en.latin1.add" output file is 5901 * "path/en.latin1.add.spl". */ 5902 innames = &fnames[0]; 5903 incount = 1; 5904 vim_snprintf((char *)wfname, MAXPATHL, "%s.spl", fnames[0]); 5905 } 5906 else if (fcount == 1) 5907 { 5908 /* For ":mkspell path/vim" output file is "path/vim.latin1.spl". */ 5909 innames = &fnames[0]; 5910 incount = 1; 5911 vim_snprintf((char *)wfname, MAXPATHL, SPL_FNAME_TMPL, 5912 fnames[0], spin.si_ascii ? (char_u *)"ascii" : spell_enc()); 5913 } 5914 else if (len > 4 && STRCMP(fnames[0] + len - 4, ".spl") == 0) 5915 { 5916 /* Name ends in ".spl", use as the file name. */ 5917 vim_strncpy(wfname, fnames[0], MAXPATHL - 1); 5918 } 5919 else 5920 /* Name should be language, make the file name from it. */ 5921 vim_snprintf((char *)wfname, MAXPATHL, SPL_FNAME_TMPL, 5922 fnames[0], spin.si_ascii ? (char_u *)"ascii" : spell_enc()); 5923 5924 /* Check for .ascii.spl. */ 5925 if (strstr((char *)gettail(wfname), SPL_FNAME_ASCII) != NULL) 5926 spin.si_ascii = TRUE; 5927 5928 /* Check for .add.spl. */ 5929 if (strstr((char *)gettail(wfname), SPL_FNAME_ADD) != NULL) 5930 spin.si_add = TRUE; 5931 } 5932 5933 if (incount <= 0) 5934 emsg(_(e_invarg)); /* need at least output and input names */ 5935 else if (vim_strchr(gettail(wfname), '_') != NULL) 5936 emsg(_("E751: Output file name must not have region name")); 5937 else if (incount > MAXREGIONS) 5938 semsg(_("E754: Only up to %d regions supported"), MAXREGIONS); 5939 else 5940 { 5941 /* Check for overwriting before doing things that may take a lot of 5942 * time. */ 5943 if (!over_write && mch_stat((char *)wfname, &st) >= 0) 5944 { 5945 emsg(_(e_exists)); 5946 goto theend; 5947 } 5948 if (mch_isdir(wfname)) 5949 { 5950 semsg(_(e_isadir2), wfname); 5951 goto theend; 5952 } 5953 5954 fname = alloc(MAXPATHL); 5955 if (fname == NULL) 5956 goto theend; 5957 5958 /* 5959 * Init the aff and dic pointers. 5960 * Get the region names if there are more than 2 arguments. 5961 */ 5962 for (i = 0; i < incount; ++i) 5963 { 5964 afile[i] = NULL; 5965 5966 if (incount > 1) 5967 { 5968 len = (int)STRLEN(innames[i]); 5969 if (STRLEN(gettail(innames[i])) < 5 5970 || innames[i][len - 3] != '_') 5971 { 5972 semsg(_("E755: Invalid region in %s"), innames[i]); 5973 goto theend; 5974 } 5975 spin.si_region_name[i * 2] = TOLOWER_ASC(innames[i][len - 2]); 5976 spin.si_region_name[i * 2 + 1] = 5977 TOLOWER_ASC(innames[i][len - 1]); 5978 } 5979 } 5980 spin.si_region_count = incount; 5981 5982 spin.si_foldroot = wordtree_alloc(&spin); 5983 spin.si_keeproot = wordtree_alloc(&spin); 5984 spin.si_prefroot = wordtree_alloc(&spin); 5985 if (spin.si_foldroot == NULL 5986 || spin.si_keeproot == NULL 5987 || spin.si_prefroot == NULL) 5988 { 5989 free_blocks(spin.si_blocks); 5990 goto theend; 5991 } 5992 5993 /* When not producing a .add.spl file clear the character table when 5994 * we encounter one in the .aff file. This means we dump the current 5995 * one in the .spl file if the .aff file doesn't define one. That's 5996 * better than guessing the contents, the table will match a 5997 * previously loaded spell file. */ 5998 if (!spin.si_add) 5999 spin.si_clear_chartab = TRUE; 6000 6001 /* 6002 * Read all the .aff and .dic files. 6003 * Text is converted to 'encoding'. 6004 * Words are stored in the case-folded and keep-case trees. 6005 */ 6006 for (i = 0; i < incount && !error; ++i) 6007 { 6008 spin.si_conv.vc_type = CONV_NONE; 6009 spin.si_region = 1 << i; 6010 6011 vim_snprintf((char *)fname, MAXPATHL, "%s.aff", innames[i]); 6012 if (mch_stat((char *)fname, &st) >= 0) 6013 { 6014 /* Read the .aff file. Will init "spin->si_conv" based on the 6015 * "SET" line. */ 6016 afile[i] = spell_read_aff(&spin, fname); 6017 if (afile[i] == NULL) 6018 error = TRUE; 6019 else 6020 { 6021 /* Read the .dic file and store the words in the trees. */ 6022 vim_snprintf((char *)fname, MAXPATHL, "%s.dic", 6023 innames[i]); 6024 if (spell_read_dic(&spin, fname, afile[i]) == FAIL) 6025 error = TRUE; 6026 } 6027 } 6028 else 6029 { 6030 /* No .aff file, try reading the file as a word list. Store 6031 * the words in the trees. */ 6032 if (spell_read_wordfile(&spin, innames[i]) == FAIL) 6033 error = TRUE; 6034 } 6035 6036 /* Free any conversion stuff. */ 6037 convert_setup(&spin.si_conv, NULL, NULL); 6038 } 6039 6040 if (spin.si_compflags != NULL && spin.si_nobreak) 6041 msg(_("Warning: both compounding and NOBREAK specified")); 6042 6043 if (!error && !got_int) 6044 { 6045 /* 6046 * Combine tails in the tree. 6047 */ 6048 spell_message(&spin, (char_u *)_(msg_compressing)); 6049 wordtree_compress(&spin, spin.si_foldroot); 6050 wordtree_compress(&spin, spin.si_keeproot); 6051 wordtree_compress(&spin, spin.si_prefroot); 6052 } 6053 6054 if (!error && !got_int) 6055 { 6056 /* 6057 * Write the info in the spell file. 6058 */ 6059 vim_snprintf((char *)IObuff, IOSIZE, 6060 _("Writing spell file %s..."), wfname); 6061 spell_message(&spin, IObuff); 6062 6063 error = write_vim_spell(&spin, wfname) == FAIL; 6064 6065 spell_message(&spin, (char_u *)_("Done!")); 6066 vim_snprintf((char *)IObuff, IOSIZE, 6067 _("Estimated runtime memory use: %d bytes"), spin.si_memtot); 6068 spell_message(&spin, IObuff); 6069 6070 /* 6071 * If the file is loaded need to reload it. 6072 */ 6073 if (!error) 6074 spell_reload_one(wfname, added_word); 6075 } 6076 6077 /* Free the allocated memory. */ 6078 ga_clear(&spin.si_rep); 6079 ga_clear(&spin.si_repsal); 6080 ga_clear(&spin.si_sal); 6081 ga_clear(&spin.si_map); 6082 ga_clear(&spin.si_comppat); 6083 ga_clear(&spin.si_prefcond); 6084 hash_clear_all(&spin.si_commonwords, 0); 6085 6086 /* Free the .aff file structures. */ 6087 for (i = 0; i < incount; ++i) 6088 if (afile[i] != NULL) 6089 spell_free_aff(afile[i]); 6090 6091 /* Free all the bits and pieces at once. */ 6092 free_blocks(spin.si_blocks); 6093 6094 /* 6095 * If there is soundfolding info and no NOSUGFILE item create the 6096 * .sug file with the soundfolded word trie. 6097 */ 6098 if (spin.si_sugtime != 0 && !error && !got_int) 6099 spell_make_sugfile(&spin, wfname); 6100 6101 } 6102 6103 theend: 6104 vim_free(fname); 6105 vim_free(wfname); 6106 } 6107 6108 /* 6109 * Display a message for spell file processing when 'verbose' is set or using 6110 * ":mkspell". "str" can be IObuff. 6111 */ 6112 static void 6113 spell_message(spellinfo_T *spin, char_u *str) 6114 { 6115 if (spin->si_verbose || p_verbose > 2) 6116 { 6117 if (!spin->si_verbose) 6118 verbose_enter(); 6119 msg((char *)str); 6120 out_flush(); 6121 if (!spin->si_verbose) 6122 verbose_leave(); 6123 } 6124 } 6125 6126 /* 6127 * ":[count]spellgood {word}" 6128 * ":[count]spellwrong {word}" 6129 * ":[count]spellundo {word}" 6130 */ 6131 void 6132 ex_spell(exarg_T *eap) 6133 { 6134 spell_add_word(eap->arg, (int)STRLEN(eap->arg), eap->cmdidx == CMD_spellwrong, 6135 eap->forceit ? 0 : (int)eap->line2, 6136 eap->cmdidx == CMD_spellundo); 6137 } 6138 6139 /* 6140 * Add "word[len]" to 'spellfile' as a good or bad word. 6141 */ 6142 void 6143 spell_add_word( 6144 char_u *word, 6145 int len, 6146 int bad, 6147 int idx, /* "zG" and "zW": zero, otherwise index in 6148 'spellfile' */ 6149 int undo) /* TRUE for "zug", "zuG", "zuw" and "zuW" */ 6150 { 6151 FILE *fd = NULL; 6152 buf_T *buf = NULL; 6153 int new_spf = FALSE; 6154 char_u *fname; 6155 char_u *fnamebuf = NULL; 6156 char_u line[MAXWLEN * 2]; 6157 long fpos, fpos_next = 0; 6158 int i; 6159 char_u *spf; 6160 6161 if (idx == 0) /* use internal wordlist */ 6162 { 6163 if (int_wordlist == NULL) 6164 { 6165 int_wordlist = vim_tempname('s', FALSE); 6166 if (int_wordlist == NULL) 6167 return; 6168 } 6169 fname = int_wordlist; 6170 } 6171 else 6172 { 6173 /* If 'spellfile' isn't set figure out a good default value. */ 6174 if (*curwin->w_s->b_p_spf == NUL) 6175 { 6176 init_spellfile(); 6177 new_spf = TRUE; 6178 } 6179 6180 if (*curwin->w_s->b_p_spf == NUL) 6181 { 6182 semsg(_(e_notset), "spellfile"); 6183 return; 6184 } 6185 fnamebuf = alloc(MAXPATHL); 6186 if (fnamebuf == NULL) 6187 return; 6188 6189 for (spf = curwin->w_s->b_p_spf, i = 1; *spf != NUL; ++i) 6190 { 6191 copy_option_part(&spf, fnamebuf, MAXPATHL, ","); 6192 if (i == idx) 6193 break; 6194 if (*spf == NUL) 6195 { 6196 semsg(_("E765: 'spellfile' does not have %d entries"), idx); 6197 vim_free(fnamebuf); 6198 return; 6199 } 6200 } 6201 6202 /* Check that the user isn't editing the .add file somewhere. */ 6203 buf = buflist_findname_exp(fnamebuf); 6204 if (buf != NULL && buf->b_ml.ml_mfp == NULL) 6205 buf = NULL; 6206 if (buf != NULL && bufIsChanged(buf)) 6207 { 6208 emsg(_(e_bufloaded)); 6209 vim_free(fnamebuf); 6210 return; 6211 } 6212 6213 fname = fnamebuf; 6214 } 6215 6216 if (bad || undo) 6217 { 6218 /* When the word appears as good word we need to remove that one, 6219 * since its flags sort before the one with WF_BANNED. */ 6220 fd = mch_fopen((char *)fname, "r"); 6221 if (fd != NULL) 6222 { 6223 while (!vim_fgets(line, MAXWLEN * 2, fd)) 6224 { 6225 fpos = fpos_next; 6226 fpos_next = ftell(fd); 6227 if (STRNCMP(word, line, len) == 0 6228 && (line[len] == '/' || line[len] < ' ')) 6229 { 6230 /* Found duplicate word. Remove it by writing a '#' at 6231 * the start of the line. Mixing reading and writing 6232 * doesn't work for all systems, close the file first. */ 6233 fclose(fd); 6234 fd = mch_fopen((char *)fname, "r+"); 6235 if (fd == NULL) 6236 break; 6237 if (fseek(fd, fpos, SEEK_SET) == 0) 6238 { 6239 fputc('#', fd); 6240 if (undo) 6241 { 6242 home_replace(NULL, fname, NameBuff, MAXPATHL, TRUE); 6243 smsg(_("Word '%.*s' removed from %s"), 6244 len, word, NameBuff); 6245 } 6246 } 6247 fseek(fd, fpos_next, SEEK_SET); 6248 } 6249 } 6250 if (fd != NULL) 6251 fclose(fd); 6252 } 6253 } 6254 6255 if (!undo) 6256 { 6257 fd = mch_fopen((char *)fname, "a"); 6258 if (fd == NULL && new_spf) 6259 { 6260 char_u *p; 6261 6262 /* We just initialized the 'spellfile' option and can't open the 6263 * file. We may need to create the "spell" directory first. We 6264 * already checked the runtime directory is writable in 6265 * init_spellfile(). */ 6266 if (!dir_of_file_exists(fname) && (p = gettail_sep(fname)) != fname) 6267 { 6268 int c = *p; 6269 6270 /* The directory doesn't exist. Try creating it and opening 6271 * the file again. */ 6272 *p = NUL; 6273 vim_mkdir(fname, 0755); 6274 *p = c; 6275 fd = mch_fopen((char *)fname, "a"); 6276 } 6277 } 6278 6279 if (fd == NULL) 6280 semsg(_(e_notopen), fname); 6281 else 6282 { 6283 if (bad) 6284 fprintf(fd, "%.*s/!\n", len, word); 6285 else 6286 fprintf(fd, "%.*s\n", len, word); 6287 fclose(fd); 6288 6289 home_replace(NULL, fname, NameBuff, MAXPATHL, TRUE); 6290 smsg(_("Word '%.*s' added to %s"), len, word, NameBuff); 6291 } 6292 } 6293 6294 if (fd != NULL) 6295 { 6296 /* Update the .add.spl file. */ 6297 mkspell(1, &fname, FALSE, TRUE, TRUE); 6298 6299 /* If the .add file is edited somewhere, reload it. */ 6300 if (buf != NULL) 6301 buf_reload(buf, buf->b_orig_mode); 6302 6303 redraw_all_later(SOME_VALID); 6304 } 6305 vim_free(fnamebuf); 6306 } 6307 6308 /* 6309 * Initialize 'spellfile' for the current buffer. 6310 */ 6311 static void 6312 init_spellfile(void) 6313 { 6314 char_u *buf; 6315 int l; 6316 char_u *fname; 6317 char_u *rtp; 6318 char_u *lend; 6319 int aspath = FALSE; 6320 char_u *lstart = curbuf->b_s.b_p_spl; 6321 6322 if (*curwin->w_s->b_p_spl != NUL && curwin->w_s->b_langp.ga_len > 0) 6323 { 6324 buf = alloc(MAXPATHL); 6325 if (buf == NULL) 6326 return; 6327 6328 /* Find the end of the language name. Exclude the region. If there 6329 * is a path separator remember the start of the tail. */ 6330 for (lend = curwin->w_s->b_p_spl; *lend != NUL 6331 && vim_strchr((char_u *)",._", *lend) == NULL; ++lend) 6332 if (vim_ispathsep(*lend)) 6333 { 6334 aspath = TRUE; 6335 lstart = lend + 1; 6336 } 6337 6338 /* Loop over all entries in 'runtimepath'. Use the first one where we 6339 * are allowed to write. */ 6340 rtp = p_rtp; 6341 while (*rtp != NUL) 6342 { 6343 if (aspath) 6344 /* Use directory of an entry with path, e.g., for 6345 * "/dir/lg.utf-8.spl" use "/dir". */ 6346 vim_strncpy(buf, curbuf->b_s.b_p_spl, 6347 lstart - curbuf->b_s.b_p_spl - 1); 6348 else 6349 /* Copy the path from 'runtimepath' to buf[]. */ 6350 copy_option_part(&rtp, buf, MAXPATHL, ","); 6351 if (filewritable(buf) == 2) 6352 { 6353 /* Use the first language name from 'spelllang' and the 6354 * encoding used in the first loaded .spl file. */ 6355 if (aspath) 6356 vim_strncpy(buf, curbuf->b_s.b_p_spl, 6357 lend - curbuf->b_s.b_p_spl); 6358 else 6359 { 6360 /* Create the "spell" directory if it doesn't exist yet. */ 6361 l = (int)STRLEN(buf); 6362 vim_snprintf((char *)buf + l, MAXPATHL - l, "/spell"); 6363 if (filewritable(buf) != 2) 6364 vim_mkdir(buf, 0755); 6365 6366 l = (int)STRLEN(buf); 6367 vim_snprintf((char *)buf + l, MAXPATHL - l, 6368 "/%.*s", (int)(lend - lstart), lstart); 6369 } 6370 l = (int)STRLEN(buf); 6371 fname = LANGP_ENTRY(curwin->w_s->b_langp, 0) 6372 ->lp_slang->sl_fname; 6373 vim_snprintf((char *)buf + l, MAXPATHL - l, ".%s.add", 6374 fname != NULL 6375 && strstr((char *)gettail(fname), ".ascii.") != NULL 6376 ? (char_u *)"ascii" : spell_enc()); 6377 set_option_value((char_u *)"spellfile", 0L, buf, OPT_LOCAL); 6378 break; 6379 } 6380 aspath = FALSE; 6381 } 6382 6383 vim_free(buf); 6384 } 6385 } 6386 6387 6388 6389 /* 6390 * Set the spell character tables from strings in the affix file. 6391 */ 6392 static int 6393 set_spell_chartab(char_u *fol, char_u *low, char_u *upp) 6394 { 6395 /* We build the new tables here first, so that we can compare with the 6396 * previous one. */ 6397 spelltab_T new_st; 6398 char_u *pf = fol, *pl = low, *pu = upp; 6399 int f, l, u; 6400 6401 clear_spell_chartab(&new_st); 6402 6403 while (*pf != NUL) 6404 { 6405 if (*pl == NUL || *pu == NUL) 6406 { 6407 emsg(_(e_affform)); 6408 return FAIL; 6409 } 6410 f = mb_ptr2char_adv(&pf); 6411 l = mb_ptr2char_adv(&pl); 6412 u = mb_ptr2char_adv(&pu); 6413 6414 /* Every character that appears is a word character. */ 6415 if (f < 256) 6416 new_st.st_isw[f] = TRUE; 6417 if (l < 256) 6418 new_st.st_isw[l] = TRUE; 6419 if (u < 256) 6420 new_st.st_isw[u] = TRUE; 6421 6422 /* if "LOW" and "FOL" are not the same the "LOW" char needs 6423 * case-folding */ 6424 if (l < 256 && l != f) 6425 { 6426 if (f >= 256) 6427 { 6428 emsg(_(e_affrange)); 6429 return FAIL; 6430 } 6431 new_st.st_fold[l] = f; 6432 } 6433 6434 /* if "UPP" and "FOL" are not the same the "UPP" char needs 6435 * case-folding, it's upper case and the "UPP" is the upper case of 6436 * "FOL" . */ 6437 if (u < 256 && u != f) 6438 { 6439 if (f >= 256) 6440 { 6441 emsg(_(e_affrange)); 6442 return FAIL; 6443 } 6444 new_st.st_fold[u] = f; 6445 new_st.st_isu[u] = TRUE; 6446 new_st.st_upper[f] = u; 6447 } 6448 } 6449 6450 if (*pl != NUL || *pu != NUL) 6451 { 6452 emsg(_(e_affform)); 6453 return FAIL; 6454 } 6455 6456 return set_spell_finish(&new_st); 6457 } 6458 6459 /* 6460 * Set the spell character tables from strings in the .spl file. 6461 */ 6462 static void 6463 set_spell_charflags( 6464 char_u *flags, 6465 int cnt, /* length of "flags" */ 6466 char_u *fol) 6467 { 6468 /* We build the new tables here first, so that we can compare with the 6469 * previous one. */ 6470 spelltab_T new_st; 6471 int i; 6472 char_u *p = fol; 6473 int c; 6474 6475 clear_spell_chartab(&new_st); 6476 6477 for (i = 0; i < 128; ++i) 6478 { 6479 if (i < cnt) 6480 { 6481 new_st.st_isw[i + 128] = (flags[i] & CF_WORD) != 0; 6482 new_st.st_isu[i + 128] = (flags[i] & CF_UPPER) != 0; 6483 } 6484 6485 if (*p != NUL) 6486 { 6487 c = mb_ptr2char_adv(&p); 6488 new_st.st_fold[i + 128] = c; 6489 if (i + 128 != c && new_st.st_isu[i + 128] && c < 256) 6490 new_st.st_upper[c] = i + 128; 6491 } 6492 } 6493 6494 (void)set_spell_finish(&new_st); 6495 } 6496 6497 static int 6498 set_spell_finish(spelltab_T *new_st) 6499 { 6500 int i; 6501 6502 if (did_set_spelltab) 6503 { 6504 /* check that it's the same table */ 6505 for (i = 0; i < 256; ++i) 6506 { 6507 if (spelltab.st_isw[i] != new_st->st_isw[i] 6508 || spelltab.st_isu[i] != new_st->st_isu[i] 6509 || spelltab.st_fold[i] != new_st->st_fold[i] 6510 || spelltab.st_upper[i] != new_st->st_upper[i]) 6511 { 6512 emsg(_("E763: Word characters differ between spell files")); 6513 return FAIL; 6514 } 6515 } 6516 } 6517 else 6518 { 6519 /* copy the new spelltab into the one being used */ 6520 spelltab = *new_st; 6521 did_set_spelltab = TRUE; 6522 } 6523 6524 return OK; 6525 } 6526 6527 /* 6528 * Write the table with prefix conditions to the .spl file. 6529 * When "fd" is NULL only count the length of what is written. 6530 */ 6531 static int 6532 write_spell_prefcond(FILE *fd, garray_T *gap) 6533 { 6534 int i; 6535 char_u *p; 6536 int len; 6537 int totlen; 6538 size_t x = 1; /* collect return value of fwrite() */ 6539 6540 if (fd != NULL) 6541 put_bytes(fd, (long_u)gap->ga_len, 2); /* <prefcondcnt> */ 6542 6543 totlen = 2 + gap->ga_len; /* length of <prefcondcnt> and <condlen> bytes */ 6544 6545 for (i = 0; i < gap->ga_len; ++i) 6546 { 6547 /* <prefcond> : <condlen> <condstr> */ 6548 p = ((char_u **)gap->ga_data)[i]; 6549 if (p != NULL) 6550 { 6551 len = (int)STRLEN(p); 6552 if (fd != NULL) 6553 { 6554 fputc(len, fd); 6555 x &= fwrite(p, (size_t)len, (size_t)1, fd); 6556 } 6557 totlen += len; 6558 } 6559 else if (fd != NULL) 6560 fputc(0, fd); 6561 } 6562 6563 return totlen; 6564 } 6565 6566 6567 /* 6568 * Use map string "map" for languages "lp". 6569 */ 6570 static void 6571 set_map_str(slang_T *lp, char_u *map) 6572 { 6573 char_u *p; 6574 int headc = 0; 6575 int c; 6576 int i; 6577 6578 if (*map == NUL) 6579 { 6580 lp->sl_has_map = FALSE; 6581 return; 6582 } 6583 lp->sl_has_map = TRUE; 6584 6585 /* Init the array and hash tables empty. */ 6586 for (i = 0; i < 256; ++i) 6587 lp->sl_map_array[i] = 0; 6588 hash_init(&lp->sl_map_hash); 6589 6590 /* 6591 * The similar characters are stored separated with slashes: 6592 * "aaa/bbb/ccc/". Fill sl_map_array[c] with the character before c and 6593 * before the same slash. For characters above 255 sl_map_hash is used. 6594 */ 6595 for (p = map; *p != NUL; ) 6596 { 6597 c = mb_cptr2char_adv(&p); 6598 if (c == '/') 6599 headc = 0; 6600 else 6601 { 6602 if (headc == 0) 6603 headc = c; 6604 6605 /* Characters above 255 don't fit in sl_map_array[], put them in 6606 * the hash table. Each entry is the char, a NUL the headchar and 6607 * a NUL. */ 6608 if (c >= 256) 6609 { 6610 int cl = mb_char2len(c); 6611 int headcl = mb_char2len(headc); 6612 char_u *b; 6613 hash_T hash; 6614 hashitem_T *hi; 6615 6616 b = alloc(cl + headcl + 2); 6617 if (b == NULL) 6618 return; 6619 mb_char2bytes(c, b); 6620 b[cl] = NUL; 6621 mb_char2bytes(headc, b + cl + 1); 6622 b[cl + 1 + headcl] = NUL; 6623 hash = hash_hash(b); 6624 hi = hash_lookup(&lp->sl_map_hash, b, hash); 6625 if (HASHITEM_EMPTY(hi)) 6626 hash_add_item(&lp->sl_map_hash, hi, b, hash); 6627 else 6628 { 6629 /* This should have been checked when generating the .spl 6630 * file. */ 6631 emsg(_("E783: duplicate char in MAP entry")); 6632 vim_free(b); 6633 } 6634 } 6635 else 6636 lp->sl_map_array[c] = headc; 6637 } 6638 } 6639 } 6640 6641 6642 #endif /* FEAT_SPELL */ 6643