1 /* vi:set ts=8 sts=4 sw=4 noet: 2 * 3 * VIM - Vi IMproved by Bram Moolenaar 4 * 5 * Do ":help uganda" in Vim to read copying and usage conditions. 6 * Do ":help credits" in Vim to see a list of people who contributed. 7 * See README.txt for an overview of the Vim source code. 8 */ 9 10 /* 11 * spellfile.c: code for reading and writing spell files. 12 * 13 * See spell.c for information about spell checking. 14 */ 15 16 /* 17 * Vim spell file format: <HEADER> 18 * <SECTIONS> 19 * <LWORDTREE> 20 * <KWORDTREE> 21 * <PREFIXTREE> 22 * 23 * <HEADER>: <fileID> <versionnr> 24 * 25 * <fileID> 8 bytes "VIMspell" 26 * <versionnr> 1 byte VIMSPELLVERSION 27 * 28 * 29 * Sections make it possible to add information to the .spl file without 30 * making it incompatible with previous versions. There are two kinds of 31 * sections: 32 * 1. Not essential for correct spell checking. E.g. for making suggestions. 33 * These are skipped when not supported. 34 * 2. Optional information, but essential for spell checking when present. 35 * E.g. conditions for affixes. When this section is present but not 36 * supported an error message is given. 37 * 38 * <SECTIONS>: <section> ... <sectionend> 39 * 40 * <section>: <sectionID> <sectionflags> <sectionlen> (section contents) 41 * 42 * <sectionID> 1 byte number from 0 to 254 identifying the section 43 * 44 * <sectionflags> 1 byte SNF_REQUIRED: this section is required for correct 45 * spell checking 46 * 47 * <sectionlen> 4 bytes length of section contents, MSB first 48 * 49 * <sectionend> 1 byte SN_END 50 * 51 * 52 * sectionID == SN_INFO: <infotext> 53 * <infotext> N bytes free format text with spell file info (version, 54 * website, etc) 55 * 56 * sectionID == SN_REGION: <regionname> ... 57 * <regionname> 2 bytes Up to MAXREGIONS region names: ca, au, etc. Lower 58 * case. First <regionname> is region 1. 59 * 60 * sectionID == SN_CHARFLAGS: <charflagslen> <charflags> 61 * <folcharslen> <folchars> 62 * <charflagslen> 1 byte Number of bytes in <charflags> (should be 128). 63 * <charflags> N bytes List of flags (first one is for character 128): 64 * 0x01 word character CF_WORD 65 * 0x02 upper-case character CF_UPPER 66 * <folcharslen> 2 bytes Number of bytes in <folchars>. 67 * <folchars> N bytes Folded characters, first one is for character 128. 68 * 69 * sectionID == SN_MIDWORD: <midword> 70 * <midword> N bytes Characters that are word characters only when used 71 * in the middle of a word. 72 * 73 * sectionID == SN_PREFCOND: <prefcondcnt> <prefcond> ... 74 * <prefcondcnt> 2 bytes Number of <prefcond> items following. 75 * <prefcond> : <condlen> <condstr> 76 * <condlen> 1 byte Length of <condstr>. 77 * <condstr> N bytes Condition for the prefix. 78 * 79 * sectionID == SN_REP: <repcount> <rep> ... 80 * <repcount> 2 bytes number of <rep> items, MSB first. 81 * <rep> : <repfromlen> <repfrom> <reptolen> <repto> 82 * <repfromlen> 1 byte length of <repfrom> 83 * <repfrom> N bytes "from" part of replacement 84 * <reptolen> 1 byte length of <repto> 85 * <repto> N bytes "to" part of replacement 86 * 87 * sectionID == SN_REPSAL: <repcount> <rep> ... 88 * just like SN_REP but for soundfolded words 89 * 90 * sectionID == SN_SAL: <salflags> <salcount> <sal> ... 91 * <salflags> 1 byte flags for soundsalike conversion: 92 * SAL_F0LLOWUP 93 * SAL_COLLAPSE 94 * SAL_REM_ACCENTS 95 * <salcount> 2 bytes number of <sal> items following 96 * <sal> : <salfromlen> <salfrom> <saltolen> <salto> 97 * <salfromlen> 1 byte length of <salfrom> 98 * <salfrom> N bytes "from" part of soundsalike 99 * <saltolen> 1 byte length of <salto> 100 * <salto> N bytes "to" part of soundsalike 101 * 102 * sectionID == SN_SOFO: <sofofromlen> <sofofrom> <sofotolen> <sofoto> 103 * <sofofromlen> 2 bytes length of <sofofrom> 104 * <sofofrom> N bytes "from" part of soundfold 105 * <sofotolen> 2 bytes length of <sofoto> 106 * <sofoto> N bytes "to" part of soundfold 107 * 108 * sectionID == SN_SUGFILE: <timestamp> 109 * <timestamp> 8 bytes time in seconds that must match with .sug file 110 * 111 * sectionID == SN_NOSPLITSUGS: nothing 112 * 113 * sectionID == SN_NOCOMPOUNDSUGS: nothing 114 * 115 * sectionID == SN_WORDS: <word> ... 116 * <word> N bytes NUL terminated common word 117 * 118 * sectionID == SN_MAP: <mapstr> 119 * <mapstr> N bytes String with sequences of similar characters, 120 * separated by slashes. 121 * 122 * sectionID == SN_COMPOUND: <compmax> <compminlen> <compsylmax> <compoptions> 123 * <comppatcount> <comppattern> ... <compflags> 124 * <compmax> 1 byte Maximum nr of words in compound word. 125 * <compminlen> 1 byte Minimal word length for compounding. 126 * <compsylmax> 1 byte Maximum nr of syllables in compound word. 127 * <compoptions> 2 bytes COMP_ flags. 128 * <comppatcount> 2 bytes number of <comppattern> following 129 * <compflags> N bytes Flags from COMPOUNDRULE items, separated by 130 * slashes. 131 * 132 * <comppattern>: <comppatlen> <comppattext> 133 * <comppatlen> 1 byte length of <comppattext> 134 * <comppattext> N bytes end or begin chars from CHECKCOMPOUNDPATTERN 135 * 136 * sectionID == SN_NOBREAK: (empty, its presence is what matters) 137 * 138 * sectionID == SN_SYLLABLE: <syllable> 139 * <syllable> N bytes String from SYLLABLE item. 140 * 141 * <LWORDTREE>: <wordtree> 142 * 143 * <KWORDTREE>: <wordtree> 144 * 145 * <PREFIXTREE>: <wordtree> 146 * 147 * 148 * <wordtree>: <nodecount> <nodedata> ... 149 * 150 * <nodecount> 4 bytes Number of nodes following. MSB first. 151 * 152 * <nodedata>: <siblingcount> <sibling> ... 153 * 154 * <siblingcount> 1 byte Number of siblings in this node. The siblings 155 * follow in sorted order. 156 * 157 * <sibling>: <byte> [ <nodeidx> <xbyte> 158 * | <flags> [<flags2>] [<region>] [<affixID>] 159 * | [<pflags>] <affixID> <prefcondnr> ] 160 * 161 * <byte> 1 byte Byte value of the sibling. Special cases: 162 * BY_NOFLAGS: End of word without flags and for all 163 * regions. 164 * For PREFIXTREE <affixID> and 165 * <prefcondnr> follow. 166 * BY_FLAGS: End of word, <flags> follow. 167 * For PREFIXTREE <pflags>, <affixID> 168 * and <prefcondnr> follow. 169 * BY_FLAGS2: End of word, <flags> and <flags2> 170 * follow. Not used in PREFIXTREE. 171 * BY_INDEX: Child of sibling is shared, <nodeidx> 172 * and <xbyte> follow. 173 * 174 * <nodeidx> 3 bytes Index of child for this sibling, MSB first. 175 * 176 * <xbyte> 1 byte byte value of the sibling. 177 * 178 * <flags> 1 byte bitmask of: 179 * WF_ALLCAP word must have only capitals 180 * WF_ONECAP first char of word must be capital 181 * WF_KEEPCAP keep-case word 182 * WF_FIXCAP keep-case word, all caps not allowed 183 * WF_RARE rare word 184 * WF_BANNED bad word 185 * WF_REGION <region> follows 186 * WF_AFX <affixID> follows 187 * 188 * <flags2> 1 byte Bitmask of: 189 * WF_HAS_AFF >> 8 word includes affix 190 * WF_NEEDCOMP >> 8 word only valid in compound 191 * WF_NOSUGGEST >> 8 word not used for suggestions 192 * WF_COMPROOT >> 8 word already a compound 193 * WF_NOCOMPBEF >> 8 no compounding before this word 194 * WF_NOCOMPAFT >> 8 no compounding after this word 195 * 196 * <pflags> 1 byte bitmask of: 197 * WFP_RARE rare prefix 198 * WFP_NC non-combining prefix 199 * WFP_UP letter after prefix made upper case 200 * 201 * <region> 1 byte Bitmask for regions in which word is valid. When 202 * omitted it's valid in all regions. 203 * Lowest bit is for region 1. 204 * 205 * <affixID> 1 byte ID of affix that can be used with this word. In 206 * PREFIXTREE used for the required prefix ID. 207 * 208 * <prefcondnr> 2 bytes Prefix condition number, index in <prefcond> list 209 * from HEADER. 210 * 211 * All text characters are in 'encoding', but stored as single bytes. 212 */ 213 214 /* 215 * Vim .sug file format: <SUGHEADER> 216 * <SUGWORDTREE> 217 * <SUGTABLE> 218 * 219 * <SUGHEADER>: <fileID> <versionnr> <timestamp> 220 * 221 * <fileID> 6 bytes "VIMsug" 222 * <versionnr> 1 byte VIMSUGVERSION 223 * <timestamp> 8 bytes timestamp that must match with .spl file 224 * 225 * 226 * <SUGWORDTREE>: <wordtree> (see above, no flags or region used) 227 * 228 * 229 * <SUGTABLE>: <sugwcount> <sugline> ... 230 * 231 * <sugwcount> 4 bytes number of <sugline> following 232 * 233 * <sugline>: <sugnr> ... NUL 234 * 235 * <sugnr>: X bytes word number that results in this soundfolded word, 236 * stored as an offset to the previous number in as 237 * few bytes as possible, see offset2bytes()) 238 */ 239 240 #include "vim.h" 241 242 #if defined(FEAT_SPELL) || defined(PROTO) 243 244 #ifndef UNIX /* it's in os_unix.h for Unix */ 245 # include <time.h> /* for time_t */ 246 #endif 247 248 #ifndef UNIX /* it's in os_unix.h for Unix */ 249 # include <time.h> /* for time_t */ 250 #endif 251 252 /* Special byte values for <byte>. Some are only used in the tree for 253 * postponed prefixes, some only in the other trees. This is a bit messy... */ 254 #define BY_NOFLAGS 0 /* end of word without flags or region; for 255 * postponed prefix: no <pflags> */ 256 #define BY_INDEX 1 /* child is shared, index follows */ 257 #define BY_FLAGS 2 /* end of word, <flags> byte follows; for 258 * postponed prefix: <pflags> follows */ 259 #define BY_FLAGS2 3 /* end of word, <flags> and <flags2> bytes 260 * follow; never used in prefix tree */ 261 #define BY_SPECIAL BY_FLAGS2 /* highest special byte value */ 262 263 /* Flags used in .spl file for soundsalike flags. */ 264 #define SAL_F0LLOWUP 1 265 #define SAL_COLLAPSE 2 266 #define SAL_REM_ACCENTS 4 267 268 #define VIMSPELLMAGIC "VIMspell" /* string at start of Vim spell file */ 269 #define VIMSPELLMAGICL 8 270 #define VIMSPELLVERSION 50 271 272 /* Section IDs. Only renumber them when VIMSPELLVERSION changes! */ 273 #define SN_REGION 0 /* <regionname> section */ 274 #define SN_CHARFLAGS 1 /* charflags section */ 275 #define SN_MIDWORD 2 /* <midword> section */ 276 #define SN_PREFCOND 3 /* <prefcond> section */ 277 #define SN_REP 4 /* REP items section */ 278 #define SN_SAL 5 /* SAL items section */ 279 #define SN_SOFO 6 /* soundfolding section */ 280 #define SN_MAP 7 /* MAP items section */ 281 #define SN_COMPOUND 8 /* compound words section */ 282 #define SN_SYLLABLE 9 /* syllable section */ 283 #define SN_NOBREAK 10 /* NOBREAK section */ 284 #define SN_SUGFILE 11 /* timestamp for .sug file */ 285 #define SN_REPSAL 12 /* REPSAL items section */ 286 #define SN_WORDS 13 /* common words */ 287 #define SN_NOSPLITSUGS 14 /* don't split word for suggestions */ 288 #define SN_INFO 15 /* info section */ 289 #define SN_NOCOMPOUNDSUGS 16 /* don't compound for suggestions */ 290 #define SN_END 255 /* end of sections */ 291 292 #define SNF_REQUIRED 1 /* <sectionflags>: required section */ 293 294 #define CF_WORD 0x01 295 #define CF_UPPER 0x02 296 297 static int set_spell_finish(spelltab_T *new_st); 298 static int write_spell_prefcond(FILE *fd, garray_T *gap); 299 static int read_region_section(FILE *fd, slang_T *slang, int len); 300 static int read_charflags_section(FILE *fd); 301 static int read_prefcond_section(FILE *fd, slang_T *lp); 302 static int read_rep_section(FILE *fd, garray_T *gap, short *first); 303 static int read_sal_section(FILE *fd, slang_T *slang); 304 static int read_words_section(FILE *fd, slang_T *lp, int len); 305 static int read_sofo_section(FILE *fd, slang_T *slang); 306 static int read_compound(FILE *fd, slang_T *slang, int len); 307 static int set_sofo(slang_T *lp, char_u *from, char_u *to); 308 static void set_sal_first(slang_T *lp); 309 static int *mb_str2wide(char_u *s); 310 static int spell_read_tree(FILE *fd, char_u **bytsp, idx_T **idxsp, int prefixtree, int prefixcnt); 311 static idx_T read_tree_node(FILE *fd, char_u *byts, idx_T *idxs, int maxidx, idx_T startidx, int prefixtree, int maxprefcondnr); 312 static void set_spell_charflags(char_u *flags, int cnt, char_u *upp); 313 static int set_spell_chartab(char_u *fol, char_u *low, char_u *upp); 314 static void set_map_str(slang_T *lp, char_u *map); 315 316 317 static char *e_spell_trunc = N_("E758: Truncated spell file"); 318 static char *e_afftrailing = N_("Trailing text in %s line %d: %s"); 319 static char *e_affname = N_("Affix name too long in %s line %d: %s"); 320 static char *e_affform = N_("E761: Format error in affix file FOL, LOW or UPP"); 321 static char *e_affrange = N_("E762: Character in FOL, LOW or UPP is out of range"); 322 static char *msg_compressing = N_("Compressing word tree..."); 323 324 /* 325 * Load one spell file and store the info into a slang_T. 326 * 327 * This is invoked in three ways: 328 * - From spell_load_cb() to load a spell file for the first time. "lang" is 329 * the language name, "old_lp" is NULL. Will allocate an slang_T. 330 * - To reload a spell file that was changed. "lang" is NULL and "old_lp" 331 * points to the existing slang_T. 332 * - Just after writing a .spl file; it's read back to produce the .sug file. 333 * "old_lp" is NULL and "lang" is NULL. Will allocate an slang_T. 334 * 335 * Returns the slang_T the spell file was loaded into. NULL for error. 336 */ 337 slang_T * 338 spell_load_file( 339 char_u *fname, 340 char_u *lang, 341 slang_T *old_lp, 342 int silent) /* no error if file doesn't exist */ 343 { 344 FILE *fd; 345 char_u buf[VIMSPELLMAGICL]; 346 char_u *p; 347 int i; 348 int n; 349 int len; 350 char_u *save_sourcing_name = sourcing_name; 351 linenr_T save_sourcing_lnum = sourcing_lnum; 352 slang_T *lp = NULL; 353 int c = 0; 354 int res; 355 356 fd = mch_fopen((char *)fname, "r"); 357 if (fd == NULL) 358 { 359 if (!silent) 360 semsg(_(e_notopen), fname); 361 else if (p_verbose > 2) 362 { 363 verbose_enter(); 364 smsg((const char *)e_notopen, fname); 365 verbose_leave(); 366 } 367 goto endFAIL; 368 } 369 if (p_verbose > 2) 370 { 371 verbose_enter(); 372 smsg(_("Reading spell file \"%s\""), fname); 373 verbose_leave(); 374 } 375 376 if (old_lp == NULL) 377 { 378 lp = slang_alloc(lang); 379 if (lp == NULL) 380 goto endFAIL; 381 382 /* Remember the file name, used to reload the file when it's updated. */ 383 lp->sl_fname = vim_strsave(fname); 384 if (lp->sl_fname == NULL) 385 goto endFAIL; 386 387 /* Check for .add.spl (_add.spl for VMS). */ 388 lp->sl_add = strstr((char *)gettail(fname), SPL_FNAME_ADD) != NULL; 389 } 390 else 391 lp = old_lp; 392 393 /* Set sourcing_name, so that error messages mention the file name. */ 394 sourcing_name = fname; 395 sourcing_lnum = 0; 396 397 /* 398 * <HEADER>: <fileID> 399 */ 400 for (i = 0; i < VIMSPELLMAGICL; ++i) 401 buf[i] = getc(fd); /* <fileID> */ 402 if (STRNCMP(buf, VIMSPELLMAGIC, VIMSPELLMAGICL) != 0) 403 { 404 emsg(_("E757: This does not look like a spell file")); 405 goto endFAIL; 406 } 407 c = getc(fd); /* <versionnr> */ 408 if (c < VIMSPELLVERSION) 409 { 410 emsg(_("E771: Old spell file, needs to be updated")); 411 goto endFAIL; 412 } 413 else if (c > VIMSPELLVERSION) 414 { 415 emsg(_("E772: Spell file is for newer version of Vim")); 416 goto endFAIL; 417 } 418 419 420 /* 421 * <SECTIONS>: <section> ... <sectionend> 422 * <section>: <sectionID> <sectionflags> <sectionlen> (section contents) 423 */ 424 for (;;) 425 { 426 n = getc(fd); /* <sectionID> or <sectionend> */ 427 if (n == SN_END) 428 break; 429 c = getc(fd); /* <sectionflags> */ 430 len = get4c(fd); /* <sectionlen> */ 431 if (len < 0) 432 goto truncerr; 433 434 res = 0; 435 switch (n) 436 { 437 case SN_INFO: 438 lp->sl_info = read_string(fd, len); /* <infotext> */ 439 if (lp->sl_info == NULL) 440 goto endFAIL; 441 break; 442 443 case SN_REGION: 444 res = read_region_section(fd, lp, len); 445 break; 446 447 case SN_CHARFLAGS: 448 res = read_charflags_section(fd); 449 break; 450 451 case SN_MIDWORD: 452 lp->sl_midword = read_string(fd, len); /* <midword> */ 453 if (lp->sl_midword == NULL) 454 goto endFAIL; 455 break; 456 457 case SN_PREFCOND: 458 res = read_prefcond_section(fd, lp); 459 break; 460 461 case SN_REP: 462 res = read_rep_section(fd, &lp->sl_rep, lp->sl_rep_first); 463 break; 464 465 case SN_REPSAL: 466 res = read_rep_section(fd, &lp->sl_repsal, lp->sl_repsal_first); 467 break; 468 469 case SN_SAL: 470 res = read_sal_section(fd, lp); 471 break; 472 473 case SN_SOFO: 474 res = read_sofo_section(fd, lp); 475 break; 476 477 case SN_MAP: 478 p = read_string(fd, len); /* <mapstr> */ 479 if (p == NULL) 480 goto endFAIL; 481 set_map_str(lp, p); 482 vim_free(p); 483 break; 484 485 case SN_WORDS: 486 res = read_words_section(fd, lp, len); 487 break; 488 489 case SN_SUGFILE: 490 lp->sl_sugtime = get8ctime(fd); /* <timestamp> */ 491 break; 492 493 case SN_NOSPLITSUGS: 494 lp->sl_nosplitsugs = TRUE; 495 break; 496 497 case SN_NOCOMPOUNDSUGS: 498 lp->sl_nocompoundsugs = TRUE; 499 break; 500 501 case SN_COMPOUND: 502 res = read_compound(fd, lp, len); 503 break; 504 505 case SN_NOBREAK: 506 lp->sl_nobreak = TRUE; 507 break; 508 509 case SN_SYLLABLE: 510 lp->sl_syllable = read_string(fd, len); /* <syllable> */ 511 if (lp->sl_syllable == NULL) 512 goto endFAIL; 513 if (init_syl_tab(lp) == FAIL) 514 goto endFAIL; 515 break; 516 517 default: 518 /* Unsupported section. When it's required give an error 519 * message. When it's not required skip the contents. */ 520 if (c & SNF_REQUIRED) 521 { 522 emsg(_("E770: Unsupported section in spell file")); 523 goto endFAIL; 524 } 525 while (--len >= 0) 526 if (getc(fd) < 0) 527 goto truncerr; 528 break; 529 } 530 someerror: 531 if (res == SP_FORMERROR) 532 { 533 emsg(_(e_format)); 534 goto endFAIL; 535 } 536 if (res == SP_TRUNCERROR) 537 { 538 truncerr: 539 emsg(_(e_spell_trunc)); 540 goto endFAIL; 541 } 542 if (res == SP_OTHERERROR) 543 goto endFAIL; 544 } 545 546 /* <LWORDTREE> */ 547 res = spell_read_tree(fd, &lp->sl_fbyts, &lp->sl_fidxs, FALSE, 0); 548 if (res != 0) 549 goto someerror; 550 551 /* <KWORDTREE> */ 552 res = spell_read_tree(fd, &lp->sl_kbyts, &lp->sl_kidxs, FALSE, 0); 553 if (res != 0) 554 goto someerror; 555 556 /* <PREFIXTREE> */ 557 res = spell_read_tree(fd, &lp->sl_pbyts, &lp->sl_pidxs, TRUE, 558 lp->sl_prefixcnt); 559 if (res != 0) 560 goto someerror; 561 562 /* For a new file link it in the list of spell files. */ 563 if (old_lp == NULL && lang != NULL) 564 { 565 lp->sl_next = first_lang; 566 first_lang = lp; 567 } 568 569 goto endOK; 570 571 endFAIL: 572 if (lang != NULL) 573 /* truncating the name signals the error to spell_load_lang() */ 574 *lang = NUL; 575 if (lp != NULL && old_lp == NULL) 576 slang_free(lp); 577 lp = NULL; 578 579 endOK: 580 if (fd != NULL) 581 fclose(fd); 582 sourcing_name = save_sourcing_name; 583 sourcing_lnum = save_sourcing_lnum; 584 585 return lp; 586 } 587 588 /* 589 * Fill in the wordcount fields for a trie. 590 * Returns the total number of words. 591 */ 592 static void 593 tree_count_words(char_u *byts, idx_T *idxs) 594 { 595 int depth; 596 idx_T arridx[MAXWLEN]; 597 int curi[MAXWLEN]; 598 int c; 599 idx_T n; 600 int wordcount[MAXWLEN]; 601 602 arridx[0] = 0; 603 curi[0] = 1; 604 wordcount[0] = 0; 605 depth = 0; 606 while (depth >= 0 && !got_int) 607 { 608 if (curi[depth] > byts[arridx[depth]]) 609 { 610 /* Done all bytes at this node, go up one level. */ 611 idxs[arridx[depth]] = wordcount[depth]; 612 if (depth > 0) 613 wordcount[depth - 1] += wordcount[depth]; 614 615 --depth; 616 fast_breakcheck(); 617 } 618 else 619 { 620 /* Do one more byte at this node. */ 621 n = arridx[depth] + curi[depth]; 622 ++curi[depth]; 623 624 c = byts[n]; 625 if (c == 0) 626 { 627 /* End of word, count it. */ 628 ++wordcount[depth]; 629 630 /* Skip over any other NUL bytes (same word with different 631 * flags). */ 632 while (byts[n + 1] == 0) 633 { 634 ++n; 635 ++curi[depth]; 636 } 637 } 638 else 639 { 640 /* Normal char, go one level deeper to count the words. */ 641 ++depth; 642 arridx[depth] = idxs[n]; 643 curi[depth] = 1; 644 wordcount[depth] = 0; 645 } 646 } 647 } 648 } 649 650 /* 651 * Load the .sug files for languages that have one and weren't loaded yet. 652 */ 653 void 654 suggest_load_files(void) 655 { 656 langp_T *lp; 657 int lpi; 658 slang_T *slang; 659 char_u *dotp; 660 FILE *fd; 661 char_u buf[MAXWLEN]; 662 int i; 663 time_t timestamp; 664 int wcount; 665 int wordnr; 666 garray_T ga; 667 int c; 668 669 /* Do this for all languages that support sound folding. */ 670 for (lpi = 0; lpi < curwin->w_s->b_langp.ga_len; ++lpi) 671 { 672 lp = LANGP_ENTRY(curwin->w_s->b_langp, lpi); 673 slang = lp->lp_slang; 674 if (slang->sl_sugtime != 0 && !slang->sl_sugloaded) 675 { 676 /* Change ".spl" to ".sug" and open the file. When the file isn't 677 * found silently skip it. Do set "sl_sugloaded" so that we 678 * don't try again and again. */ 679 slang->sl_sugloaded = TRUE; 680 681 dotp = vim_strrchr(slang->sl_fname, '.'); 682 if (dotp == NULL || fnamecmp(dotp, ".spl") != 0) 683 continue; 684 STRCPY(dotp, ".sug"); 685 fd = mch_fopen((char *)slang->sl_fname, "r"); 686 if (fd == NULL) 687 goto nextone; 688 689 /* 690 * <SUGHEADER>: <fileID> <versionnr> <timestamp> 691 */ 692 for (i = 0; i < VIMSUGMAGICL; ++i) 693 buf[i] = getc(fd); /* <fileID> */ 694 if (STRNCMP(buf, VIMSUGMAGIC, VIMSUGMAGICL) != 0) 695 { 696 semsg(_("E778: This does not look like a .sug file: %s"), 697 slang->sl_fname); 698 goto nextone; 699 } 700 c = getc(fd); /* <versionnr> */ 701 if (c < VIMSUGVERSION) 702 { 703 semsg(_("E779: Old .sug file, needs to be updated: %s"), 704 slang->sl_fname); 705 goto nextone; 706 } 707 else if (c > VIMSUGVERSION) 708 { 709 semsg(_("E780: .sug file is for newer version of Vim: %s"), 710 slang->sl_fname); 711 goto nextone; 712 } 713 714 /* Check the timestamp, it must be exactly the same as the one in 715 * the .spl file. Otherwise the word numbers won't match. */ 716 timestamp = get8ctime(fd); /* <timestamp> */ 717 if (timestamp != slang->sl_sugtime) 718 { 719 semsg(_("E781: .sug file doesn't match .spl file: %s"), 720 slang->sl_fname); 721 goto nextone; 722 } 723 724 /* 725 * <SUGWORDTREE>: <wordtree> 726 * Read the trie with the soundfolded words. 727 */ 728 if (spell_read_tree(fd, &slang->sl_sbyts, &slang->sl_sidxs, 729 FALSE, 0) != 0) 730 { 731 someerror: 732 semsg(_("E782: error while reading .sug file: %s"), 733 slang->sl_fname); 734 slang_clear_sug(slang); 735 goto nextone; 736 } 737 738 /* 739 * <SUGTABLE>: <sugwcount> <sugline> ... 740 * 741 * Read the table with word numbers. We use a file buffer for 742 * this, because it's so much like a file with lines. Makes it 743 * possible to swap the info and save on memory use. 744 */ 745 slang->sl_sugbuf = open_spellbuf(); 746 if (slang->sl_sugbuf == NULL) 747 goto someerror; 748 /* <sugwcount> */ 749 wcount = get4c(fd); 750 if (wcount < 0) 751 goto someerror; 752 753 /* Read all the wordnr lists into the buffer, one NUL terminated 754 * list per line. */ 755 ga_init2(&ga, 1, 100); 756 for (wordnr = 0; wordnr < wcount; ++wordnr) 757 { 758 ga.ga_len = 0; 759 for (;;) 760 { 761 c = getc(fd); /* <sugline> */ 762 if (c < 0 || ga_grow(&ga, 1) == FAIL) 763 goto someerror; 764 ((char_u *)ga.ga_data)[ga.ga_len++] = c; 765 if (c == NUL) 766 break; 767 } 768 if (ml_append_buf(slang->sl_sugbuf, (linenr_T)wordnr, 769 ga.ga_data, ga.ga_len, TRUE) == FAIL) 770 goto someerror; 771 } 772 ga_clear(&ga); 773 774 /* 775 * Need to put word counts in the word tries, so that we can find 776 * a word by its number. 777 */ 778 tree_count_words(slang->sl_fbyts, slang->sl_fidxs); 779 tree_count_words(slang->sl_sbyts, slang->sl_sidxs); 780 781 nextone: 782 if (fd != NULL) 783 fclose(fd); 784 STRCPY(dotp, ".spl"); 785 } 786 } 787 } 788 789 790 /* 791 * Read a length field from "fd" in "cnt_bytes" bytes. 792 * Allocate memory, read the string into it and add a NUL at the end. 793 * Returns NULL when the count is zero. 794 * Sets "*cntp" to SP_*ERROR when there is an error, length of the result 795 * otherwise. 796 */ 797 static char_u * 798 read_cnt_string(FILE *fd, int cnt_bytes, int *cntp) 799 { 800 int cnt = 0; 801 int i; 802 char_u *str; 803 804 /* read the length bytes, MSB first */ 805 for (i = 0; i < cnt_bytes; ++i) 806 cnt = (cnt << 8) + getc(fd); 807 if (cnt < 0) 808 { 809 *cntp = SP_TRUNCERROR; 810 return NULL; 811 } 812 *cntp = cnt; 813 if (cnt == 0) 814 return NULL; /* nothing to read, return NULL */ 815 816 str = read_string(fd, cnt); 817 if (str == NULL) 818 *cntp = SP_OTHERERROR; 819 return str; 820 } 821 822 /* 823 * Read SN_REGION: <regionname> ... 824 * Return SP_*ERROR flags. 825 */ 826 static int 827 read_region_section(FILE *fd, slang_T *lp, int len) 828 { 829 int i; 830 831 if (len > MAXREGIONS * 2) 832 return SP_FORMERROR; 833 for (i = 0; i < len; ++i) 834 lp->sl_regions[i] = getc(fd); /* <regionname> */ 835 lp->sl_regions[len] = NUL; 836 return 0; 837 } 838 839 /* 840 * Read SN_CHARFLAGS section: <charflagslen> <charflags> 841 * <folcharslen> <folchars> 842 * Return SP_*ERROR flags. 843 */ 844 static int 845 read_charflags_section(FILE *fd) 846 { 847 char_u *flags; 848 char_u *fol; 849 int flagslen, follen; 850 851 /* <charflagslen> <charflags> */ 852 flags = read_cnt_string(fd, 1, &flagslen); 853 if (flagslen < 0) 854 return flagslen; 855 856 /* <folcharslen> <folchars> */ 857 fol = read_cnt_string(fd, 2, &follen); 858 if (follen < 0) 859 { 860 vim_free(flags); 861 return follen; 862 } 863 864 /* Set the word-char flags and fill SPELL_ISUPPER() table. */ 865 if (flags != NULL && fol != NULL) 866 set_spell_charflags(flags, flagslen, fol); 867 868 vim_free(flags); 869 vim_free(fol); 870 871 /* When <charflagslen> is zero then <fcharlen> must also be zero. */ 872 if ((flags == NULL) != (fol == NULL)) 873 return SP_FORMERROR; 874 return 0; 875 } 876 877 /* 878 * Read SN_PREFCOND section. 879 * Return SP_*ERROR flags. 880 */ 881 static int 882 read_prefcond_section(FILE *fd, slang_T *lp) 883 { 884 int cnt; 885 int i; 886 int n; 887 char_u *p; 888 char_u buf[MAXWLEN + 1]; 889 890 /* <prefcondcnt> <prefcond> ... */ 891 cnt = get2c(fd); /* <prefcondcnt> */ 892 if (cnt <= 0) 893 return SP_FORMERROR; 894 895 lp->sl_prefprog = (regprog_T **)alloc_clear( 896 (unsigned)sizeof(regprog_T *) * cnt); 897 if (lp->sl_prefprog == NULL) 898 return SP_OTHERERROR; 899 lp->sl_prefixcnt = cnt; 900 901 for (i = 0; i < cnt; ++i) 902 { 903 /* <prefcond> : <condlen> <condstr> */ 904 n = getc(fd); /* <condlen> */ 905 if (n < 0 || n >= MAXWLEN) 906 return SP_FORMERROR; 907 908 /* When <condlen> is zero we have an empty condition. Otherwise 909 * compile the regexp program used to check for the condition. */ 910 if (n > 0) 911 { 912 buf[0] = '^'; /* always match at one position only */ 913 p = buf + 1; 914 while (n-- > 0) 915 *p++ = getc(fd); /* <condstr> */ 916 *p = NUL; 917 lp->sl_prefprog[i] = vim_regcomp(buf, RE_MAGIC + RE_STRING); 918 } 919 } 920 return 0; 921 } 922 923 /* 924 * Read REP or REPSAL items section from "fd": <repcount> <rep> ... 925 * Return SP_*ERROR flags. 926 */ 927 static int 928 read_rep_section(FILE *fd, garray_T *gap, short *first) 929 { 930 int cnt; 931 fromto_T *ftp; 932 int i; 933 934 cnt = get2c(fd); /* <repcount> */ 935 if (cnt < 0) 936 return SP_TRUNCERROR; 937 938 if (ga_grow(gap, cnt) == FAIL) 939 return SP_OTHERERROR; 940 941 /* <rep> : <repfromlen> <repfrom> <reptolen> <repto> */ 942 for (; gap->ga_len < cnt; ++gap->ga_len) 943 { 944 ftp = &((fromto_T *)gap->ga_data)[gap->ga_len]; 945 ftp->ft_from = read_cnt_string(fd, 1, &i); 946 if (i < 0) 947 return i; 948 if (i == 0) 949 return SP_FORMERROR; 950 ftp->ft_to = read_cnt_string(fd, 1, &i); 951 if (i <= 0) 952 { 953 vim_free(ftp->ft_from); 954 if (i < 0) 955 return i; 956 return SP_FORMERROR; 957 } 958 } 959 960 /* Fill the first-index table. */ 961 for (i = 0; i < 256; ++i) 962 first[i] = -1; 963 for (i = 0; i < gap->ga_len; ++i) 964 { 965 ftp = &((fromto_T *)gap->ga_data)[i]; 966 if (first[*ftp->ft_from] == -1) 967 first[*ftp->ft_from] = i; 968 } 969 return 0; 970 } 971 972 /* 973 * Read SN_SAL section: <salflags> <salcount> <sal> ... 974 * Return SP_*ERROR flags. 975 */ 976 static int 977 read_sal_section(FILE *fd, slang_T *slang) 978 { 979 int i; 980 int cnt; 981 garray_T *gap; 982 salitem_T *smp; 983 int ccnt; 984 char_u *p; 985 int c = NUL; 986 987 slang->sl_sofo = FALSE; 988 989 i = getc(fd); /* <salflags> */ 990 if (i & SAL_F0LLOWUP) 991 slang->sl_followup = TRUE; 992 if (i & SAL_COLLAPSE) 993 slang->sl_collapse = TRUE; 994 if (i & SAL_REM_ACCENTS) 995 slang->sl_rem_accents = TRUE; 996 997 cnt = get2c(fd); /* <salcount> */ 998 if (cnt < 0) 999 return SP_TRUNCERROR; 1000 1001 gap = &slang->sl_sal; 1002 ga_init2(gap, sizeof(salitem_T), 10); 1003 if (ga_grow(gap, cnt + 1) == FAIL) 1004 return SP_OTHERERROR; 1005 1006 /* <sal> : <salfromlen> <salfrom> <saltolen> <salto> */ 1007 for (; gap->ga_len < cnt; ++gap->ga_len) 1008 { 1009 smp = &((salitem_T *)gap->ga_data)[gap->ga_len]; 1010 ccnt = getc(fd); /* <salfromlen> */ 1011 if (ccnt < 0) 1012 return SP_TRUNCERROR; 1013 if ((p = alloc(ccnt + 2)) == NULL) 1014 return SP_OTHERERROR; 1015 smp->sm_lead = p; 1016 1017 /* Read up to the first special char into sm_lead. */ 1018 for (i = 0; i < ccnt; ++i) 1019 { 1020 c = getc(fd); /* <salfrom> */ 1021 if (vim_strchr((char_u *)"0123456789(-<^$", c) != NULL) 1022 break; 1023 *p++ = c; 1024 } 1025 smp->sm_leadlen = (int)(p - smp->sm_lead); 1026 *p++ = NUL; 1027 1028 /* Put (abc) chars in sm_oneof, if any. */ 1029 if (c == '(') 1030 { 1031 smp->sm_oneof = p; 1032 for (++i; i < ccnt; ++i) 1033 { 1034 c = getc(fd); /* <salfrom> */ 1035 if (c == ')') 1036 break; 1037 *p++ = c; 1038 } 1039 *p++ = NUL; 1040 if (++i < ccnt) 1041 c = getc(fd); 1042 } 1043 else 1044 smp->sm_oneof = NULL; 1045 1046 /* Any following chars go in sm_rules. */ 1047 smp->sm_rules = p; 1048 if (i < ccnt) 1049 /* store the char we got while checking for end of sm_lead */ 1050 *p++ = c; 1051 for (++i; i < ccnt; ++i) 1052 *p++ = getc(fd); /* <salfrom> */ 1053 *p++ = NUL; 1054 1055 /* <saltolen> <salto> */ 1056 smp->sm_to = read_cnt_string(fd, 1, &ccnt); 1057 if (ccnt < 0) 1058 { 1059 vim_free(smp->sm_lead); 1060 return ccnt; 1061 } 1062 1063 if (has_mbyte) 1064 { 1065 /* convert the multi-byte strings to wide char strings */ 1066 smp->sm_lead_w = mb_str2wide(smp->sm_lead); 1067 smp->sm_leadlen = mb_charlen(smp->sm_lead); 1068 if (smp->sm_oneof == NULL) 1069 smp->sm_oneof_w = NULL; 1070 else 1071 smp->sm_oneof_w = mb_str2wide(smp->sm_oneof); 1072 if (smp->sm_to == NULL) 1073 smp->sm_to_w = NULL; 1074 else 1075 smp->sm_to_w = mb_str2wide(smp->sm_to); 1076 if (smp->sm_lead_w == NULL 1077 || (smp->sm_oneof_w == NULL && smp->sm_oneof != NULL) 1078 || (smp->sm_to_w == NULL && smp->sm_to != NULL)) 1079 { 1080 vim_free(smp->sm_lead); 1081 vim_free(smp->sm_to); 1082 vim_free(smp->sm_lead_w); 1083 vim_free(smp->sm_oneof_w); 1084 vim_free(smp->sm_to_w); 1085 return SP_OTHERERROR; 1086 } 1087 } 1088 } 1089 1090 if (gap->ga_len > 0) 1091 { 1092 /* Add one extra entry to mark the end with an empty sm_lead. Avoids 1093 * that we need to check the index every time. */ 1094 smp = &((salitem_T *)gap->ga_data)[gap->ga_len]; 1095 if ((p = alloc(1)) == NULL) 1096 return SP_OTHERERROR; 1097 p[0] = NUL; 1098 smp->sm_lead = p; 1099 smp->sm_leadlen = 0; 1100 smp->sm_oneof = NULL; 1101 smp->sm_rules = p; 1102 smp->sm_to = NULL; 1103 if (has_mbyte) 1104 { 1105 smp->sm_lead_w = mb_str2wide(smp->sm_lead); 1106 smp->sm_leadlen = 0; 1107 smp->sm_oneof_w = NULL; 1108 smp->sm_to_w = NULL; 1109 } 1110 ++gap->ga_len; 1111 } 1112 1113 /* Fill the first-index table. */ 1114 set_sal_first(slang); 1115 1116 return 0; 1117 } 1118 1119 /* 1120 * Read SN_WORDS: <word> ... 1121 * Return SP_*ERROR flags. 1122 */ 1123 static int 1124 read_words_section(FILE *fd, slang_T *lp, int len) 1125 { 1126 int done = 0; 1127 int i; 1128 int c; 1129 char_u word[MAXWLEN]; 1130 1131 while (done < len) 1132 { 1133 /* Read one word at a time. */ 1134 for (i = 0; ; ++i) 1135 { 1136 c = getc(fd); 1137 if (c == EOF) 1138 return SP_TRUNCERROR; 1139 word[i] = c; 1140 if (word[i] == NUL) 1141 break; 1142 if (i == MAXWLEN - 1) 1143 return SP_FORMERROR; 1144 } 1145 1146 /* Init the count to 10. */ 1147 count_common_word(lp, word, -1, 10); 1148 done += i + 1; 1149 } 1150 return 0; 1151 } 1152 1153 /* 1154 * SN_SOFO: <sofofromlen> <sofofrom> <sofotolen> <sofoto> 1155 * Return SP_*ERROR flags. 1156 */ 1157 static int 1158 read_sofo_section(FILE *fd, slang_T *slang) 1159 { 1160 int cnt; 1161 char_u *from, *to; 1162 int res; 1163 1164 slang->sl_sofo = TRUE; 1165 1166 /* <sofofromlen> <sofofrom> */ 1167 from = read_cnt_string(fd, 2, &cnt); 1168 if (cnt < 0) 1169 return cnt; 1170 1171 /* <sofotolen> <sofoto> */ 1172 to = read_cnt_string(fd, 2, &cnt); 1173 if (cnt < 0) 1174 { 1175 vim_free(from); 1176 return cnt; 1177 } 1178 1179 /* Store the info in slang->sl_sal and/or slang->sl_sal_first. */ 1180 if (from != NULL && to != NULL) 1181 res = set_sofo(slang, from, to); 1182 else if (from != NULL || to != NULL) 1183 res = SP_FORMERROR; /* only one of two strings is an error */ 1184 else 1185 res = 0; 1186 1187 vim_free(from); 1188 vim_free(to); 1189 return res; 1190 } 1191 1192 /* 1193 * Read the compound section from the .spl file: 1194 * <compmax> <compminlen> <compsylmax> <compoptions> <compflags> 1195 * Returns SP_*ERROR flags. 1196 */ 1197 static int 1198 read_compound(FILE *fd, slang_T *slang, int len) 1199 { 1200 int todo = len; 1201 int c; 1202 int atstart; 1203 char_u *pat; 1204 char_u *pp; 1205 char_u *cp; 1206 char_u *ap; 1207 char_u *crp; 1208 int cnt; 1209 garray_T *gap; 1210 1211 if (todo < 2) 1212 return SP_FORMERROR; /* need at least two bytes */ 1213 1214 --todo; 1215 c = getc(fd); /* <compmax> */ 1216 if (c < 2) 1217 c = MAXWLEN; 1218 slang->sl_compmax = c; 1219 1220 --todo; 1221 c = getc(fd); /* <compminlen> */ 1222 if (c < 1) 1223 c = 0; 1224 slang->sl_compminlen = c; 1225 1226 --todo; 1227 c = getc(fd); /* <compsylmax> */ 1228 if (c < 1) 1229 c = MAXWLEN; 1230 slang->sl_compsylmax = c; 1231 1232 c = getc(fd); /* <compoptions> */ 1233 if (c != 0) 1234 ungetc(c, fd); /* be backwards compatible with Vim 7.0b */ 1235 else 1236 { 1237 --todo; 1238 c = getc(fd); /* only use the lower byte for now */ 1239 --todo; 1240 slang->sl_compoptions = c; 1241 1242 gap = &slang->sl_comppat; 1243 c = get2c(fd); /* <comppatcount> */ 1244 todo -= 2; 1245 ga_init2(gap, sizeof(char_u *), c); 1246 if (ga_grow(gap, c) == OK) 1247 while (--c >= 0) 1248 { 1249 ((char_u **)(gap->ga_data))[gap->ga_len++] = 1250 read_cnt_string(fd, 1, &cnt); 1251 /* <comppatlen> <comppattext> */ 1252 if (cnt < 0) 1253 return cnt; 1254 todo -= cnt + 1; 1255 } 1256 } 1257 if (todo < 0) 1258 return SP_FORMERROR; 1259 1260 /* Turn the COMPOUNDRULE items into a regexp pattern: 1261 * "a[bc]/a*b+" -> "^\(a[bc]\|a*b\+\)$". 1262 * Inserting backslashes may double the length, "^\(\)$<Nul>" is 7 bytes. 1263 * Conversion to utf-8 may double the size. */ 1264 c = todo * 2 + 7; 1265 if (enc_utf8) 1266 c += todo * 2; 1267 pat = alloc((unsigned)c); 1268 if (pat == NULL) 1269 return SP_OTHERERROR; 1270 1271 /* We also need a list of all flags that can appear at the start and one 1272 * for all flags. */ 1273 cp = alloc(todo + 1); 1274 if (cp == NULL) 1275 { 1276 vim_free(pat); 1277 return SP_OTHERERROR; 1278 } 1279 slang->sl_compstartflags = cp; 1280 *cp = NUL; 1281 1282 ap = alloc(todo + 1); 1283 if (ap == NULL) 1284 { 1285 vim_free(pat); 1286 return SP_OTHERERROR; 1287 } 1288 slang->sl_compallflags = ap; 1289 *ap = NUL; 1290 1291 /* And a list of all patterns in their original form, for checking whether 1292 * compounding may work in match_compoundrule(). This is freed when we 1293 * encounter a wildcard, the check doesn't work then. */ 1294 crp = alloc(todo + 1); 1295 slang->sl_comprules = crp; 1296 1297 pp = pat; 1298 *pp++ = '^'; 1299 *pp++ = '\\'; 1300 *pp++ = '('; 1301 1302 atstart = 1; 1303 while (todo-- > 0) 1304 { 1305 c = getc(fd); /* <compflags> */ 1306 if (c == EOF) 1307 { 1308 vim_free(pat); 1309 return SP_TRUNCERROR; 1310 } 1311 1312 /* Add all flags to "sl_compallflags". */ 1313 if (vim_strchr((char_u *)"?*+[]/", c) == NULL 1314 && !byte_in_str(slang->sl_compallflags, c)) 1315 { 1316 *ap++ = c; 1317 *ap = NUL; 1318 } 1319 1320 if (atstart != 0) 1321 { 1322 /* At start of item: copy flags to "sl_compstartflags". For a 1323 * [abc] item set "atstart" to 2 and copy up to the ']'. */ 1324 if (c == '[') 1325 atstart = 2; 1326 else if (c == ']') 1327 atstart = 0; 1328 else 1329 { 1330 if (!byte_in_str(slang->sl_compstartflags, c)) 1331 { 1332 *cp++ = c; 1333 *cp = NUL; 1334 } 1335 if (atstart == 1) 1336 atstart = 0; 1337 } 1338 } 1339 1340 /* Copy flag to "sl_comprules", unless we run into a wildcard. */ 1341 if (crp != NULL) 1342 { 1343 if (c == '?' || c == '+' || c == '*') 1344 { 1345 VIM_CLEAR(slang->sl_comprules); 1346 crp = NULL; 1347 } 1348 else 1349 *crp++ = c; 1350 } 1351 1352 if (c == '/') /* slash separates two items */ 1353 { 1354 *pp++ = '\\'; 1355 *pp++ = '|'; 1356 atstart = 1; 1357 } 1358 else /* normal char, "[abc]" and '*' are copied as-is */ 1359 { 1360 if (c == '?' || c == '+' || c == '~') 1361 *pp++ = '\\'; /* "a?" becomes "a\?", "a+" becomes "a\+" */ 1362 if (enc_utf8) 1363 pp += mb_char2bytes(c, pp); 1364 else 1365 *pp++ = c; 1366 } 1367 } 1368 1369 *pp++ = '\\'; 1370 *pp++ = ')'; 1371 *pp++ = '$'; 1372 *pp = NUL; 1373 1374 if (crp != NULL) 1375 *crp = NUL; 1376 1377 slang->sl_compprog = vim_regcomp(pat, RE_MAGIC + RE_STRING + RE_STRICT); 1378 vim_free(pat); 1379 if (slang->sl_compprog == NULL) 1380 return SP_FORMERROR; 1381 1382 return 0; 1383 } 1384 1385 /* 1386 * Set the SOFOFROM and SOFOTO items in language "lp". 1387 * Returns SP_*ERROR flags when there is something wrong. 1388 */ 1389 static int 1390 set_sofo(slang_T *lp, char_u *from, char_u *to) 1391 { 1392 int i; 1393 1394 garray_T *gap; 1395 char_u *s; 1396 char_u *p; 1397 int c; 1398 int *inp; 1399 1400 if (has_mbyte) 1401 { 1402 /* Use "sl_sal" as an array with 256 pointers to a list of wide 1403 * characters. The index is the low byte of the character. 1404 * The list contains from-to pairs with a terminating NUL. 1405 * sl_sal_first[] is used for latin1 "from" characters. */ 1406 gap = &lp->sl_sal; 1407 ga_init2(gap, sizeof(int *), 1); 1408 if (ga_grow(gap, 256) == FAIL) 1409 return SP_OTHERERROR; 1410 vim_memset(gap->ga_data, 0, sizeof(int *) * 256); 1411 gap->ga_len = 256; 1412 1413 /* First count the number of items for each list. Temporarily use 1414 * sl_sal_first[] for this. */ 1415 for (p = from, s = to; *p != NUL && *s != NUL; ) 1416 { 1417 c = mb_cptr2char_adv(&p); 1418 MB_CPTR_ADV(s); 1419 if (c >= 256) 1420 ++lp->sl_sal_first[c & 0xff]; 1421 } 1422 if (*p != NUL || *s != NUL) /* lengths differ */ 1423 return SP_FORMERROR; 1424 1425 /* Allocate the lists. */ 1426 for (i = 0; i < 256; ++i) 1427 if (lp->sl_sal_first[i] > 0) 1428 { 1429 p = alloc(sizeof(int) * (lp->sl_sal_first[i] * 2 + 1)); 1430 if (p == NULL) 1431 return SP_OTHERERROR; 1432 ((int **)gap->ga_data)[i] = (int *)p; 1433 *(int *)p = 0; 1434 } 1435 1436 /* Put the characters up to 255 in sl_sal_first[] the rest in a sl_sal 1437 * list. */ 1438 vim_memset(lp->sl_sal_first, 0, sizeof(salfirst_T) * 256); 1439 for (p = from, s = to; *p != NUL && *s != NUL; ) 1440 { 1441 c = mb_cptr2char_adv(&p); 1442 i = mb_cptr2char_adv(&s); 1443 if (c >= 256) 1444 { 1445 /* Append the from-to chars at the end of the list with 1446 * the low byte. */ 1447 inp = ((int **)gap->ga_data)[c & 0xff]; 1448 while (*inp != 0) 1449 ++inp; 1450 *inp++ = c; /* from char */ 1451 *inp++ = i; /* to char */ 1452 *inp++ = NUL; /* NUL at the end */ 1453 } 1454 else 1455 /* mapping byte to char is done in sl_sal_first[] */ 1456 lp->sl_sal_first[c] = i; 1457 } 1458 } 1459 else 1460 { 1461 /* mapping bytes to bytes is done in sl_sal_first[] */ 1462 if (STRLEN(from) != STRLEN(to)) 1463 return SP_FORMERROR; 1464 1465 for (i = 0; to[i] != NUL; ++i) 1466 lp->sl_sal_first[from[i]] = to[i]; 1467 lp->sl_sal.ga_len = 1; /* indicates we have soundfolding */ 1468 } 1469 1470 return 0; 1471 } 1472 1473 /* 1474 * Fill the first-index table for "lp". 1475 */ 1476 static void 1477 set_sal_first(slang_T *lp) 1478 { 1479 salfirst_T *sfirst; 1480 int i; 1481 salitem_T *smp; 1482 int c; 1483 garray_T *gap = &lp->sl_sal; 1484 1485 sfirst = lp->sl_sal_first; 1486 for (i = 0; i < 256; ++i) 1487 sfirst[i] = -1; 1488 smp = (salitem_T *)gap->ga_data; 1489 for (i = 0; i < gap->ga_len; ++i) 1490 { 1491 if (has_mbyte) 1492 /* Use the lowest byte of the first character. For latin1 it's 1493 * the character, for other encodings it should differ for most 1494 * characters. */ 1495 c = *smp[i].sm_lead_w & 0xff; 1496 else 1497 c = *smp[i].sm_lead; 1498 if (sfirst[c] == -1) 1499 { 1500 sfirst[c] = i; 1501 if (has_mbyte) 1502 { 1503 int n; 1504 1505 /* Make sure all entries with this byte are following each 1506 * other. Move the ones that are in the wrong position. Do 1507 * keep the same ordering! */ 1508 while (i + 1 < gap->ga_len 1509 && (*smp[i + 1].sm_lead_w & 0xff) == c) 1510 /* Skip over entry with same index byte. */ 1511 ++i; 1512 1513 for (n = 1; i + n < gap->ga_len; ++n) 1514 if ((*smp[i + n].sm_lead_w & 0xff) == c) 1515 { 1516 salitem_T tsal; 1517 1518 /* Move entry with same index byte after the entries 1519 * we already found. */ 1520 ++i; 1521 --n; 1522 tsal = smp[i + n]; 1523 mch_memmove(smp + i + 1, smp + i, 1524 sizeof(salitem_T) * n); 1525 smp[i] = tsal; 1526 } 1527 } 1528 } 1529 } 1530 } 1531 1532 /* 1533 * Turn a multi-byte string into a wide character string. 1534 * Return it in allocated memory (NULL for out-of-memory) 1535 */ 1536 static int * 1537 mb_str2wide(char_u *s) 1538 { 1539 int *res; 1540 char_u *p; 1541 int i = 0; 1542 1543 res = (int *)alloc(sizeof(int) * (mb_charlen(s) + 1)); 1544 if (res != NULL) 1545 { 1546 for (p = s; *p != NUL; ) 1547 res[i++] = mb_ptr2char_adv(&p); 1548 res[i] = NUL; 1549 } 1550 return res; 1551 } 1552 1553 /* 1554 * Read a tree from the .spl or .sug file. 1555 * Allocates the memory and stores pointers in "bytsp" and "idxsp". 1556 * This is skipped when the tree has zero length. 1557 * Returns zero when OK, SP_ value for an error. 1558 */ 1559 static int 1560 spell_read_tree( 1561 FILE *fd, 1562 char_u **bytsp, 1563 idx_T **idxsp, 1564 int prefixtree, /* TRUE for the prefix tree */ 1565 int prefixcnt) /* when "prefixtree" is TRUE: prefix count */ 1566 { 1567 long len; 1568 int idx; 1569 char_u *bp; 1570 idx_T *ip; 1571 1572 /* The tree size was computed when writing the file, so that we can 1573 * allocate it as one long block. <nodecount> */ 1574 len = get4c(fd); 1575 if (len < 0) 1576 return SP_TRUNCERROR; 1577 if (len >= LONG_MAX / (long)sizeof(int)) 1578 /* Invalid length, multiply with sizeof(int) would overflow. */ 1579 return SP_FORMERROR; 1580 if (len > 0) 1581 { 1582 /* Allocate the byte array. */ 1583 bp = lalloc((long_u)len, TRUE); 1584 if (bp == NULL) 1585 return SP_OTHERERROR; 1586 *bytsp = bp; 1587 1588 /* Allocate the index array. */ 1589 ip = (idx_T *)lalloc_clear((long_u)(len * sizeof(int)), TRUE); 1590 if (ip == NULL) 1591 return SP_OTHERERROR; 1592 *idxsp = ip; 1593 1594 /* Recursively read the tree and store it in the array. */ 1595 idx = read_tree_node(fd, bp, ip, len, 0, prefixtree, prefixcnt); 1596 if (idx < 0) 1597 return idx; 1598 } 1599 return 0; 1600 } 1601 1602 /* 1603 * Read one row of siblings from the spell file and store it in the byte array 1604 * "byts" and index array "idxs". Recursively read the children. 1605 * 1606 * NOTE: The code here must match put_node()! 1607 * 1608 * Returns the index (>= 0) following the siblings. 1609 * Returns SP_TRUNCERROR if the file is shorter than expected. 1610 * Returns SP_FORMERROR if there is a format error. 1611 */ 1612 static idx_T 1613 read_tree_node( 1614 FILE *fd, 1615 char_u *byts, 1616 idx_T *idxs, 1617 int maxidx, /* size of arrays */ 1618 idx_T startidx, /* current index in "byts" and "idxs" */ 1619 int prefixtree, /* TRUE for reading PREFIXTREE */ 1620 int maxprefcondnr) /* maximum for <prefcondnr> */ 1621 { 1622 int len; 1623 int i; 1624 int n; 1625 idx_T idx = startidx; 1626 int c; 1627 int c2; 1628 #define SHARED_MASK 0x8000000 1629 1630 len = getc(fd); /* <siblingcount> */ 1631 if (len <= 0) 1632 return SP_TRUNCERROR; 1633 1634 if (startidx + len >= maxidx) 1635 return SP_FORMERROR; 1636 byts[idx++] = len; 1637 1638 /* Read the byte values, flag/region bytes and shared indexes. */ 1639 for (i = 1; i <= len; ++i) 1640 { 1641 c = getc(fd); /* <byte> */ 1642 if (c < 0) 1643 return SP_TRUNCERROR; 1644 if (c <= BY_SPECIAL) 1645 { 1646 if (c == BY_NOFLAGS && !prefixtree) 1647 { 1648 /* No flags, all regions. */ 1649 idxs[idx] = 0; 1650 c = 0; 1651 } 1652 else if (c != BY_INDEX) 1653 { 1654 if (prefixtree) 1655 { 1656 /* Read the optional pflags byte, the prefix ID and the 1657 * condition nr. In idxs[] store the prefix ID in the low 1658 * byte, the condition index shifted up 8 bits, the flags 1659 * shifted up 24 bits. */ 1660 if (c == BY_FLAGS) 1661 c = getc(fd) << 24; /* <pflags> */ 1662 else 1663 c = 0; 1664 1665 c |= getc(fd); /* <affixID> */ 1666 1667 n = get2c(fd); /* <prefcondnr> */ 1668 if (n >= maxprefcondnr) 1669 return SP_FORMERROR; 1670 c |= (n << 8); 1671 } 1672 else /* c must be BY_FLAGS or BY_FLAGS2 */ 1673 { 1674 /* Read flags and optional region and prefix ID. In 1675 * idxs[] the flags go in the low two bytes, region above 1676 * that and prefix ID above the region. */ 1677 c2 = c; 1678 c = getc(fd); /* <flags> */ 1679 if (c2 == BY_FLAGS2) 1680 c = (getc(fd) << 8) + c; /* <flags2> */ 1681 if (c & WF_REGION) 1682 c = (getc(fd) << 16) + c; /* <region> */ 1683 if (c & WF_AFX) 1684 c = (getc(fd) << 24) + c; /* <affixID> */ 1685 } 1686 1687 idxs[idx] = c; 1688 c = 0; 1689 } 1690 else /* c == BY_INDEX */ 1691 { 1692 /* <nodeidx> */ 1693 n = get3c(fd); 1694 if (n < 0 || n >= maxidx) 1695 return SP_FORMERROR; 1696 idxs[idx] = n + SHARED_MASK; 1697 c = getc(fd); /* <xbyte> */ 1698 } 1699 } 1700 byts[idx++] = c; 1701 } 1702 1703 /* Recursively read the children for non-shared siblings. 1704 * Skip the end-of-word ones (zero byte value) and the shared ones (and 1705 * remove SHARED_MASK) */ 1706 for (i = 1; i <= len; ++i) 1707 if (byts[startidx + i] != 0) 1708 { 1709 if (idxs[startidx + i] & SHARED_MASK) 1710 idxs[startidx + i] &= ~SHARED_MASK; 1711 else 1712 { 1713 idxs[startidx + i] = idx; 1714 idx = read_tree_node(fd, byts, idxs, maxidx, idx, 1715 prefixtree, maxprefcondnr); 1716 if (idx < 0) 1717 break; 1718 } 1719 } 1720 1721 return idx; 1722 } 1723 1724 /* 1725 * Reload the spell file "fname" if it's loaded. 1726 */ 1727 static void 1728 spell_reload_one( 1729 char_u *fname, 1730 int added_word) /* invoked through "zg" */ 1731 { 1732 slang_T *slang; 1733 int didit = FALSE; 1734 1735 for (slang = first_lang; slang != NULL; slang = slang->sl_next) 1736 { 1737 if (fullpathcmp(fname, slang->sl_fname, FALSE) == FPC_SAME) 1738 { 1739 slang_clear(slang); 1740 if (spell_load_file(fname, NULL, slang, FALSE) == NULL) 1741 /* reloading failed, clear the language */ 1742 slang_clear(slang); 1743 redraw_all_later(SOME_VALID); 1744 didit = TRUE; 1745 } 1746 } 1747 1748 /* When "zg" was used and the file wasn't loaded yet, should redo 1749 * 'spelllang' to load it now. */ 1750 if (added_word && !didit) 1751 did_set_spelllang(curwin); 1752 } 1753 1754 1755 /* 1756 * Functions for ":mkspell". 1757 */ 1758 1759 #define MAXLINELEN 500 /* Maximum length in bytes of a line in a .aff 1760 and .dic file. */ 1761 /* 1762 * Main structure to store the contents of a ".aff" file. 1763 */ 1764 typedef struct afffile_S 1765 { 1766 char_u *af_enc; /* "SET", normalized, alloc'ed string or NULL */ 1767 int af_flagtype; /* AFT_CHAR, AFT_LONG, AFT_NUM or AFT_CAPLONG */ 1768 unsigned af_rare; /* RARE ID for rare word */ 1769 unsigned af_keepcase; /* KEEPCASE ID for keep-case word */ 1770 unsigned af_bad; /* BAD ID for banned word */ 1771 unsigned af_needaffix; /* NEEDAFFIX ID */ 1772 unsigned af_circumfix; /* CIRCUMFIX ID */ 1773 unsigned af_needcomp; /* NEEDCOMPOUND ID */ 1774 unsigned af_comproot; /* COMPOUNDROOT ID */ 1775 unsigned af_compforbid; /* COMPOUNDFORBIDFLAG ID */ 1776 unsigned af_comppermit; /* COMPOUNDPERMITFLAG ID */ 1777 unsigned af_nosuggest; /* NOSUGGEST ID */ 1778 int af_pfxpostpone; /* postpone prefixes without chop string and 1779 without flags */ 1780 int af_ignoreextra; /* IGNOREEXTRA present */ 1781 hashtab_T af_pref; /* hashtable for prefixes, affheader_T */ 1782 hashtab_T af_suff; /* hashtable for suffixes, affheader_T */ 1783 hashtab_T af_comp; /* hashtable for compound flags, compitem_T */ 1784 } afffile_T; 1785 1786 #define AFT_CHAR 0 /* flags are one character */ 1787 #define AFT_LONG 1 /* flags are two characters */ 1788 #define AFT_CAPLONG 2 /* flags are one or two characters */ 1789 #define AFT_NUM 3 /* flags are numbers, comma separated */ 1790 1791 typedef struct affentry_S affentry_T; 1792 /* Affix entry from ".aff" file. Used for prefixes and suffixes. */ 1793 struct affentry_S 1794 { 1795 affentry_T *ae_next; /* next affix with same name/number */ 1796 char_u *ae_chop; /* text to chop off basic word (can be NULL) */ 1797 char_u *ae_add; /* text to add to basic word (can be NULL) */ 1798 char_u *ae_flags; /* flags on the affix (can be NULL) */ 1799 char_u *ae_cond; /* condition (NULL for ".") */ 1800 regprog_T *ae_prog; /* regexp program for ae_cond or NULL */ 1801 char ae_compforbid; /* COMPOUNDFORBIDFLAG found */ 1802 char ae_comppermit; /* COMPOUNDPERMITFLAG found */ 1803 }; 1804 1805 #define AH_KEY_LEN 17 /* 2 x 8 bytes + NUL */ 1806 1807 /* Affix header from ".aff" file. Used for af_pref and af_suff. */ 1808 typedef struct affheader_S 1809 { 1810 char_u ah_key[AH_KEY_LEN]; /* key for hashtab == name of affix */ 1811 unsigned ah_flag; /* affix name as number, uses "af_flagtype" */ 1812 int ah_newID; /* prefix ID after renumbering; 0 if not used */ 1813 int ah_combine; /* suffix may combine with prefix */ 1814 int ah_follows; /* another affix block should be following */ 1815 affentry_T *ah_first; /* first affix entry */ 1816 } affheader_T; 1817 1818 #define HI2AH(hi) ((affheader_T *)(hi)->hi_key) 1819 1820 /* Flag used in compound items. */ 1821 typedef struct compitem_S 1822 { 1823 char_u ci_key[AH_KEY_LEN]; /* key for hashtab == name of compound */ 1824 unsigned ci_flag; /* affix name as number, uses "af_flagtype" */ 1825 int ci_newID; /* affix ID after renumbering. */ 1826 } compitem_T; 1827 1828 #define HI2CI(hi) ((compitem_T *)(hi)->hi_key) 1829 1830 /* 1831 * Structure that is used to store the items in the word tree. This avoids 1832 * the need to keep track of each allocated thing, everything is freed all at 1833 * once after ":mkspell" is done. 1834 * Note: "sb_next" must be just before "sb_data" to make sure the alignment of 1835 * "sb_data" is correct for systems where pointers must be aligned on 1836 * pointer-size boundaries and sizeof(pointer) > sizeof(int) (e.g., Sparc). 1837 */ 1838 #define SBLOCKSIZE 16000 /* size of sb_data */ 1839 typedef struct sblock_S sblock_T; 1840 struct sblock_S 1841 { 1842 int sb_used; /* nr of bytes already in use */ 1843 sblock_T *sb_next; /* next block in list */ 1844 char_u sb_data[1]; /* data, actually longer */ 1845 }; 1846 1847 /* 1848 * A node in the tree. 1849 */ 1850 typedef struct wordnode_S wordnode_T; 1851 struct wordnode_S 1852 { 1853 union /* shared to save space */ 1854 { 1855 char_u hashkey[6]; /* the hash key, only used while compressing */ 1856 int index; /* index in written nodes (valid after first 1857 round) */ 1858 } wn_u1; 1859 union /* shared to save space */ 1860 { 1861 wordnode_T *next; /* next node with same hash key */ 1862 wordnode_T *wnode; /* parent node that will write this node */ 1863 } wn_u2; 1864 wordnode_T *wn_child; /* child (next byte in word) */ 1865 wordnode_T *wn_sibling; /* next sibling (alternate byte in word, 1866 always sorted) */ 1867 int wn_refs; /* Nr. of references to this node. Only 1868 relevant for first node in a list of 1869 siblings, in following siblings it is 1870 always one. */ 1871 char_u wn_byte; /* Byte for this node. NUL for word end */ 1872 1873 /* Info for when "wn_byte" is NUL. 1874 * In PREFIXTREE "wn_region" is used for the prefcondnr. 1875 * In the soundfolded word tree "wn_flags" has the MSW of the wordnr and 1876 * "wn_region" the LSW of the wordnr. */ 1877 char_u wn_affixID; /* supported/required prefix ID or 0 */ 1878 short_u wn_flags; /* WF_ flags */ 1879 short wn_region; /* region mask */ 1880 1881 #ifdef SPELL_PRINTTREE 1882 int wn_nr; /* sequence nr for printing */ 1883 #endif 1884 }; 1885 1886 #define WN_MASK 0xffff /* mask relevant bits of "wn_flags" */ 1887 1888 #define HI2WN(hi) (wordnode_T *)((hi)->hi_key) 1889 1890 /* 1891 * Info used while reading the spell files. 1892 */ 1893 typedef struct spellinfo_S 1894 { 1895 wordnode_T *si_foldroot; /* tree with case-folded words */ 1896 long si_foldwcount; /* nr of words in si_foldroot */ 1897 1898 wordnode_T *si_keeproot; /* tree with keep-case words */ 1899 long si_keepwcount; /* nr of words in si_keeproot */ 1900 1901 wordnode_T *si_prefroot; /* tree with postponed prefixes */ 1902 1903 long si_sugtree; /* creating the soundfolding trie */ 1904 1905 sblock_T *si_blocks; /* memory blocks used */ 1906 long si_blocks_cnt; /* memory blocks allocated */ 1907 int si_did_emsg; /* TRUE when ran out of memory */ 1908 1909 long si_compress_cnt; /* words to add before lowering 1910 compression limit */ 1911 wordnode_T *si_first_free; /* List of nodes that have been freed during 1912 compression, linked by "wn_child" field. */ 1913 long si_free_count; /* number of nodes in si_first_free */ 1914 #ifdef SPELL_PRINTTREE 1915 int si_wordnode_nr; /* sequence nr for nodes */ 1916 #endif 1917 buf_T *si_spellbuf; /* buffer used to store soundfold word table */ 1918 1919 int si_ascii; /* handling only ASCII words */ 1920 int si_add; /* addition file */ 1921 int si_clear_chartab; /* when TRUE clear char tables */ 1922 int si_region; /* region mask */ 1923 vimconv_T si_conv; /* for conversion to 'encoding' */ 1924 int si_memtot; /* runtime memory used */ 1925 int si_verbose; /* verbose messages */ 1926 int si_msg_count; /* number of words added since last message */ 1927 char_u *si_info; /* info text chars or NULL */ 1928 int si_region_count; /* number of regions supported (1 when there 1929 are no regions) */ 1930 char_u si_region_name[MAXREGIONS * 2 + 1]; 1931 /* region names; used only if 1932 * si_region_count > 1) */ 1933 1934 garray_T si_rep; /* list of fromto_T entries from REP lines */ 1935 garray_T si_repsal; /* list of fromto_T entries from REPSAL lines */ 1936 garray_T si_sal; /* list of fromto_T entries from SAL lines */ 1937 char_u *si_sofofr; /* SOFOFROM text */ 1938 char_u *si_sofoto; /* SOFOTO text */ 1939 int si_nosugfile; /* NOSUGFILE item found */ 1940 int si_nosplitsugs; /* NOSPLITSUGS item found */ 1941 int si_nocompoundsugs; /* NOCOMPOUNDSUGS item found */ 1942 int si_followup; /* soundsalike: ? */ 1943 int si_collapse; /* soundsalike: ? */ 1944 hashtab_T si_commonwords; /* hashtable for common words */ 1945 time_t si_sugtime; /* timestamp for .sug file */ 1946 int si_rem_accents; /* soundsalike: remove accents */ 1947 garray_T si_map; /* MAP info concatenated */ 1948 char_u *si_midword; /* MIDWORD chars or NULL */ 1949 int si_compmax; /* max nr of words for compounding */ 1950 int si_compminlen; /* minimal length for compounding */ 1951 int si_compsylmax; /* max nr of syllables for compounding */ 1952 int si_compoptions; /* COMP_ flags */ 1953 garray_T si_comppat; /* CHECKCOMPOUNDPATTERN items, each stored as 1954 a string */ 1955 char_u *si_compflags; /* flags used for compounding */ 1956 char_u si_nobreak; /* NOBREAK */ 1957 char_u *si_syllable; /* syllable string */ 1958 garray_T si_prefcond; /* table with conditions for postponed 1959 * prefixes, each stored as a string */ 1960 int si_newprefID; /* current value for ah_newID */ 1961 int si_newcompID; /* current value for compound ID */ 1962 } spellinfo_T; 1963 1964 static int is_aff_rule(char_u **items, int itemcnt, char *rulename, int mincount); 1965 static void aff_process_flags(afffile_T *affile, affentry_T *entry); 1966 static int spell_info_item(char_u *s); 1967 static unsigned affitem2flag(int flagtype, char_u *item, char_u *fname, int lnum); 1968 static unsigned get_affitem(int flagtype, char_u **pp); 1969 static void process_compflags(spellinfo_T *spin, afffile_T *aff, char_u *compflags); 1970 static void check_renumber(spellinfo_T *spin); 1971 static void aff_check_number(int spinval, int affval, char *name); 1972 static void aff_check_string(char_u *spinval, char_u *affval, char *name); 1973 static int str_equal(char_u *s1, char_u *s2); 1974 static void add_fromto(spellinfo_T *spin, garray_T *gap, char_u *from, char_u *to); 1975 static int sal_to_bool(char_u *s); 1976 static int get_affix_flags(afffile_T *affile, char_u *afflist); 1977 static int get_pfxlist(afffile_T *affile, char_u *afflist, char_u *store_afflist); 1978 static void get_compflags(afffile_T *affile, char_u *afflist, char_u *store_afflist); 1979 static int store_aff_word(spellinfo_T *spin, char_u *word, char_u *afflist, afffile_T *affile, hashtab_T *ht, hashtab_T *xht, int condit, int flags, char_u *pfxlist, int pfxlen); 1980 static void *getroom(spellinfo_T *spin, size_t len, int align); 1981 static char_u *getroom_save(spellinfo_T *spin, char_u *s); 1982 static int store_word(spellinfo_T *spin, char_u *word, int flags, int region, char_u *pfxlist, int need_affix); 1983 static int tree_add_word(spellinfo_T *spin, char_u *word, wordnode_T *tree, int flags, int region, int affixID); 1984 static wordnode_T *get_wordnode(spellinfo_T *spin); 1985 static void free_wordnode(spellinfo_T *spin, wordnode_T *n); 1986 static void wordtree_compress(spellinfo_T *spin, wordnode_T *root); 1987 static int node_compress(spellinfo_T *spin, wordnode_T *node, hashtab_T *ht, int *tot); 1988 static int node_equal(wordnode_T *n1, wordnode_T *n2); 1989 static void clear_node(wordnode_T *node); 1990 static int put_node(FILE *fd, wordnode_T *node, int idx, int regionmask, int prefixtree); 1991 static int sug_filltree(spellinfo_T *spin, slang_T *slang); 1992 static int sug_maketable(spellinfo_T *spin); 1993 static int sug_filltable(spellinfo_T *spin, wordnode_T *node, int startwordnr, garray_T *gap); 1994 static int offset2bytes(int nr, char_u *buf); 1995 static void sug_write(spellinfo_T *spin, char_u *fname); 1996 static void spell_message(spellinfo_T *spin, char_u *str); 1997 static void init_spellfile(void); 1998 1999 /* In the postponed prefixes tree wn_flags is used to store the WFP_ flags, 2000 * but it must be negative to indicate the prefix tree to tree_add_word(). 2001 * Use a negative number with the lower 8 bits zero. */ 2002 #define PFX_FLAGS -256 2003 2004 /* flags for "condit" argument of store_aff_word() */ 2005 #define CONDIT_COMB 1 /* affix must combine */ 2006 #define CONDIT_CFIX 2 /* affix must have CIRCUMFIX flag */ 2007 #define CONDIT_SUF 4 /* add a suffix for matching flags */ 2008 #define CONDIT_AFF 8 /* word already has an affix */ 2009 2010 /* 2011 * Tunable parameters for when the tree is compressed. See 'mkspellmem'. 2012 */ 2013 static long compress_start = 30000; /* memory / SBLOCKSIZE */ 2014 static long compress_inc = 100; /* memory / SBLOCKSIZE */ 2015 static long compress_added = 500000; /* word count */ 2016 2017 /* 2018 * Check the 'mkspellmem' option. Return FAIL if it's wrong. 2019 * Sets "sps_flags". 2020 */ 2021 int 2022 spell_check_msm(void) 2023 { 2024 char_u *p = p_msm; 2025 long start = 0; 2026 long incr = 0; 2027 long added = 0; 2028 2029 if (!VIM_ISDIGIT(*p)) 2030 return FAIL; 2031 /* block count = (value * 1024) / SBLOCKSIZE (but avoid overflow)*/ 2032 start = (getdigits(&p) * 10) / (SBLOCKSIZE / 102); 2033 if (*p != ',') 2034 return FAIL; 2035 ++p; 2036 if (!VIM_ISDIGIT(*p)) 2037 return FAIL; 2038 incr = (getdigits(&p) * 102) / (SBLOCKSIZE / 10); 2039 if (*p != ',') 2040 return FAIL; 2041 ++p; 2042 if (!VIM_ISDIGIT(*p)) 2043 return FAIL; 2044 added = getdigits(&p) * 1024; 2045 if (*p != NUL) 2046 return FAIL; 2047 2048 if (start == 0 || incr == 0 || added == 0 || incr > start) 2049 return FAIL; 2050 2051 compress_start = start; 2052 compress_inc = incr; 2053 compress_added = added; 2054 return OK; 2055 } 2056 2057 #ifdef SPELL_PRINTTREE 2058 /* 2059 * For debugging the tree code: print the current tree in a (more or less) 2060 * readable format, so that we can see what happens when adding a word and/or 2061 * compressing the tree. 2062 * Based on code from Olaf Seibert. 2063 */ 2064 #define PRINTLINESIZE 1000 2065 #define PRINTWIDTH 6 2066 2067 #define PRINTSOME(l, depth, fmt, a1, a2) vim_snprintf(l + depth * PRINTWIDTH, \ 2068 PRINTLINESIZE - PRINTWIDTH * depth, fmt, a1, a2) 2069 2070 static char line1[PRINTLINESIZE]; 2071 static char line2[PRINTLINESIZE]; 2072 static char line3[PRINTLINESIZE]; 2073 2074 static void 2075 spell_clear_flags(wordnode_T *node) 2076 { 2077 wordnode_T *np; 2078 2079 for (np = node; np != NULL; np = np->wn_sibling) 2080 { 2081 np->wn_u1.index = FALSE; 2082 spell_clear_flags(np->wn_child); 2083 } 2084 } 2085 2086 static void 2087 spell_print_node(wordnode_T *node, int depth) 2088 { 2089 if (node->wn_u1.index) 2090 { 2091 /* Done this node before, print the reference. */ 2092 PRINTSOME(line1, depth, "(%d)", node->wn_nr, 0); 2093 PRINTSOME(line2, depth, " ", 0, 0); 2094 PRINTSOME(line3, depth, " ", 0, 0); 2095 msg(line1); 2096 msg(line2); 2097 msg(line3); 2098 } 2099 else 2100 { 2101 node->wn_u1.index = TRUE; 2102 2103 if (node->wn_byte != NUL) 2104 { 2105 if (node->wn_child != NULL) 2106 PRINTSOME(line1, depth, " %c -> ", node->wn_byte, 0); 2107 else 2108 /* Cannot happen? */ 2109 PRINTSOME(line1, depth, " %c ???", node->wn_byte, 0); 2110 } 2111 else 2112 PRINTSOME(line1, depth, " $ ", 0, 0); 2113 2114 PRINTSOME(line2, depth, "%d/%d ", node->wn_nr, node->wn_refs); 2115 2116 if (node->wn_sibling != NULL) 2117 PRINTSOME(line3, depth, " | ", 0, 0); 2118 else 2119 PRINTSOME(line3, depth, " ", 0, 0); 2120 2121 if (node->wn_byte == NUL) 2122 { 2123 msg(line1); 2124 msg(line2); 2125 msg(line3); 2126 } 2127 2128 /* do the children */ 2129 if (node->wn_byte != NUL && node->wn_child != NULL) 2130 spell_print_node(node->wn_child, depth + 1); 2131 2132 /* do the siblings */ 2133 if (node->wn_sibling != NULL) 2134 { 2135 /* get rid of all parent details except | */ 2136 STRCPY(line1, line3); 2137 STRCPY(line2, line3); 2138 spell_print_node(node->wn_sibling, depth); 2139 } 2140 } 2141 } 2142 2143 static void 2144 spell_print_tree(wordnode_T *root) 2145 { 2146 if (root != NULL) 2147 { 2148 /* Clear the "wn_u1.index" fields, used to remember what has been 2149 * done. */ 2150 spell_clear_flags(root); 2151 2152 /* Recursively print the tree. */ 2153 spell_print_node(root, 0); 2154 } 2155 } 2156 #endif /* SPELL_PRINTTREE */ 2157 2158 /* 2159 * Read the affix file "fname". 2160 * Returns an afffile_T, NULL for complete failure. 2161 */ 2162 static afffile_T * 2163 spell_read_aff(spellinfo_T *spin, char_u *fname) 2164 { 2165 FILE *fd; 2166 afffile_T *aff; 2167 char_u rline[MAXLINELEN]; 2168 char_u *line; 2169 char_u *pc = NULL; 2170 #define MAXITEMCNT 30 2171 char_u *(items[MAXITEMCNT]); 2172 int itemcnt; 2173 char_u *p; 2174 int lnum = 0; 2175 affheader_T *cur_aff = NULL; 2176 int did_postpone_prefix = FALSE; 2177 int aff_todo = 0; 2178 hashtab_T *tp; 2179 char_u *low = NULL; 2180 char_u *fol = NULL; 2181 char_u *upp = NULL; 2182 int do_rep; 2183 int do_repsal; 2184 int do_sal; 2185 int do_mapline; 2186 int found_map = FALSE; 2187 hashitem_T *hi; 2188 int l; 2189 int compminlen = 0; /* COMPOUNDMIN value */ 2190 int compsylmax = 0; /* COMPOUNDSYLMAX value */ 2191 int compoptions = 0; /* COMP_ flags */ 2192 int compmax = 0; /* COMPOUNDWORDMAX value */ 2193 char_u *compflags = NULL; /* COMPOUNDFLAG and COMPOUNDRULE 2194 concatenated */ 2195 char_u *midword = NULL; /* MIDWORD value */ 2196 char_u *syllable = NULL; /* SYLLABLE value */ 2197 char_u *sofofrom = NULL; /* SOFOFROM value */ 2198 char_u *sofoto = NULL; /* SOFOTO value */ 2199 2200 /* 2201 * Open the file. 2202 */ 2203 fd = mch_fopen((char *)fname, "r"); 2204 if (fd == NULL) 2205 { 2206 semsg(_(e_notopen), fname); 2207 return NULL; 2208 } 2209 2210 vim_snprintf((char *)IObuff, IOSIZE, _("Reading affix file %s..."), fname); 2211 spell_message(spin, IObuff); 2212 2213 /* Only do REP lines when not done in another .aff file already. */ 2214 do_rep = spin->si_rep.ga_len == 0; 2215 2216 /* Only do REPSAL lines when not done in another .aff file already. */ 2217 do_repsal = spin->si_repsal.ga_len == 0; 2218 2219 /* Only do SAL lines when not done in another .aff file already. */ 2220 do_sal = spin->si_sal.ga_len == 0; 2221 2222 /* Only do MAP lines when not done in another .aff file already. */ 2223 do_mapline = spin->si_map.ga_len == 0; 2224 2225 /* 2226 * Allocate and init the afffile_T structure. 2227 */ 2228 aff = (afffile_T *)getroom(spin, sizeof(afffile_T), TRUE); 2229 if (aff == NULL) 2230 { 2231 fclose(fd); 2232 return NULL; 2233 } 2234 hash_init(&aff->af_pref); 2235 hash_init(&aff->af_suff); 2236 hash_init(&aff->af_comp); 2237 2238 /* 2239 * Read all the lines in the file one by one. 2240 */ 2241 while (!vim_fgets(rline, MAXLINELEN, fd) && !got_int) 2242 { 2243 line_breakcheck(); 2244 ++lnum; 2245 2246 /* Skip comment lines. */ 2247 if (*rline == '#') 2248 continue; 2249 2250 /* Convert from "SET" to 'encoding' when needed. */ 2251 vim_free(pc); 2252 if (spin->si_conv.vc_type != CONV_NONE) 2253 { 2254 pc = string_convert(&spin->si_conv, rline, NULL); 2255 if (pc == NULL) 2256 { 2257 smsg(_("Conversion failure for word in %s line %d: %s"), 2258 fname, lnum, rline); 2259 continue; 2260 } 2261 line = pc; 2262 } 2263 else 2264 { 2265 pc = NULL; 2266 line = rline; 2267 } 2268 2269 /* Split the line up in white separated items. Put a NUL after each 2270 * item. */ 2271 itemcnt = 0; 2272 for (p = line; ; ) 2273 { 2274 while (*p != NUL && *p <= ' ') /* skip white space and CR/NL */ 2275 ++p; 2276 if (*p == NUL) 2277 break; 2278 if (itemcnt == MAXITEMCNT) /* too many items */ 2279 break; 2280 items[itemcnt++] = p; 2281 /* A few items have arbitrary text argument, don't split them. */ 2282 if (itemcnt == 2 && spell_info_item(items[0])) 2283 while (*p >= ' ' || *p == TAB) /* skip until CR/NL */ 2284 ++p; 2285 else 2286 while (*p > ' ') /* skip until white space or CR/NL */ 2287 ++p; 2288 if (*p == NUL) 2289 break; 2290 *p++ = NUL; 2291 } 2292 2293 /* Handle non-empty lines. */ 2294 if (itemcnt > 0) 2295 { 2296 if (is_aff_rule(items, itemcnt, "SET", 2) && aff->af_enc == NULL) 2297 { 2298 /* Setup for conversion from "ENC" to 'encoding'. */ 2299 aff->af_enc = enc_canonize(items[1]); 2300 if (aff->af_enc != NULL && !spin->si_ascii 2301 && convert_setup(&spin->si_conv, aff->af_enc, 2302 p_enc) == FAIL) 2303 smsg(_("Conversion in %s not supported: from %s to %s"), 2304 fname, aff->af_enc, p_enc); 2305 spin->si_conv.vc_fail = TRUE; 2306 } 2307 else if (is_aff_rule(items, itemcnt, "FLAG", 2) 2308 && aff->af_flagtype == AFT_CHAR) 2309 { 2310 if (STRCMP(items[1], "long") == 0) 2311 aff->af_flagtype = AFT_LONG; 2312 else if (STRCMP(items[1], "num") == 0) 2313 aff->af_flagtype = AFT_NUM; 2314 else if (STRCMP(items[1], "caplong") == 0) 2315 aff->af_flagtype = AFT_CAPLONG; 2316 else 2317 smsg(_("Invalid value for FLAG in %s line %d: %s"), 2318 fname, lnum, items[1]); 2319 if (aff->af_rare != 0 2320 || aff->af_keepcase != 0 2321 || aff->af_bad != 0 2322 || aff->af_needaffix != 0 2323 || aff->af_circumfix != 0 2324 || aff->af_needcomp != 0 2325 || aff->af_comproot != 0 2326 || aff->af_nosuggest != 0 2327 || compflags != NULL 2328 || aff->af_suff.ht_used > 0 2329 || aff->af_pref.ht_used > 0) 2330 smsg(_("FLAG after using flags in %s line %d: %s"), 2331 fname, lnum, items[1]); 2332 } 2333 else if (spell_info_item(items[0])) 2334 { 2335 p = (char_u *)getroom(spin, 2336 (spin->si_info == NULL ? 0 : STRLEN(spin->si_info)) 2337 + STRLEN(items[0]) 2338 + STRLEN(items[1]) + 3, FALSE); 2339 if (p != NULL) 2340 { 2341 if (spin->si_info != NULL) 2342 { 2343 STRCPY(p, spin->si_info); 2344 STRCAT(p, "\n"); 2345 } 2346 STRCAT(p, items[0]); 2347 STRCAT(p, " "); 2348 STRCAT(p, items[1]); 2349 spin->si_info = p; 2350 } 2351 } 2352 else if (is_aff_rule(items, itemcnt, "MIDWORD", 2) 2353 && midword == NULL) 2354 { 2355 midword = getroom_save(spin, items[1]); 2356 } 2357 else if (is_aff_rule(items, itemcnt, "TRY", 2)) 2358 { 2359 /* ignored, we look in the tree for what chars may appear */ 2360 } 2361 /* TODO: remove "RAR" later */ 2362 else if ((is_aff_rule(items, itemcnt, "RAR", 2) 2363 || is_aff_rule(items, itemcnt, "RARE", 2)) 2364 && aff->af_rare == 0) 2365 { 2366 aff->af_rare = affitem2flag(aff->af_flagtype, items[1], 2367 fname, lnum); 2368 } 2369 /* TODO: remove "KEP" later */ 2370 else if ((is_aff_rule(items, itemcnt, "KEP", 2) 2371 || is_aff_rule(items, itemcnt, "KEEPCASE", 2)) 2372 && aff->af_keepcase == 0) 2373 { 2374 aff->af_keepcase = affitem2flag(aff->af_flagtype, items[1], 2375 fname, lnum); 2376 } 2377 else if ((is_aff_rule(items, itemcnt, "BAD", 2) 2378 || is_aff_rule(items, itemcnt, "FORBIDDENWORD", 2)) 2379 && aff->af_bad == 0) 2380 { 2381 aff->af_bad = affitem2flag(aff->af_flagtype, items[1], 2382 fname, lnum); 2383 } 2384 else if (is_aff_rule(items, itemcnt, "NEEDAFFIX", 2) 2385 && aff->af_needaffix == 0) 2386 { 2387 aff->af_needaffix = affitem2flag(aff->af_flagtype, items[1], 2388 fname, lnum); 2389 } 2390 else if (is_aff_rule(items, itemcnt, "CIRCUMFIX", 2) 2391 && aff->af_circumfix == 0) 2392 { 2393 aff->af_circumfix = affitem2flag(aff->af_flagtype, items[1], 2394 fname, lnum); 2395 } 2396 else if (is_aff_rule(items, itemcnt, "NOSUGGEST", 2) 2397 && aff->af_nosuggest == 0) 2398 { 2399 aff->af_nosuggest = affitem2flag(aff->af_flagtype, items[1], 2400 fname, lnum); 2401 } 2402 else if ((is_aff_rule(items, itemcnt, "NEEDCOMPOUND", 2) 2403 || is_aff_rule(items, itemcnt, "ONLYINCOMPOUND", 2)) 2404 && aff->af_needcomp == 0) 2405 { 2406 aff->af_needcomp = affitem2flag(aff->af_flagtype, items[1], 2407 fname, lnum); 2408 } 2409 else if (is_aff_rule(items, itemcnt, "COMPOUNDROOT", 2) 2410 && aff->af_comproot == 0) 2411 { 2412 aff->af_comproot = affitem2flag(aff->af_flagtype, items[1], 2413 fname, lnum); 2414 } 2415 else if (is_aff_rule(items, itemcnt, "COMPOUNDFORBIDFLAG", 2) 2416 && aff->af_compforbid == 0) 2417 { 2418 aff->af_compforbid = affitem2flag(aff->af_flagtype, items[1], 2419 fname, lnum); 2420 if (aff->af_pref.ht_used > 0) 2421 smsg(_("Defining COMPOUNDFORBIDFLAG after PFX item may give wrong results in %s line %d"), 2422 fname, lnum); 2423 } 2424 else if (is_aff_rule(items, itemcnt, "COMPOUNDPERMITFLAG", 2) 2425 && aff->af_comppermit == 0) 2426 { 2427 aff->af_comppermit = affitem2flag(aff->af_flagtype, items[1], 2428 fname, lnum); 2429 if (aff->af_pref.ht_used > 0) 2430 smsg(_("Defining COMPOUNDPERMITFLAG after PFX item may give wrong results in %s line %d"), 2431 fname, lnum); 2432 } 2433 else if (is_aff_rule(items, itemcnt, "COMPOUNDFLAG", 2) 2434 && compflags == NULL) 2435 { 2436 /* Turn flag "c" into COMPOUNDRULE compatible string "c+", 2437 * "Na" into "Na+", "1234" into "1234+". */ 2438 p = getroom(spin, STRLEN(items[1]) + 2, FALSE); 2439 if (p != NULL) 2440 { 2441 STRCPY(p, items[1]); 2442 STRCAT(p, "+"); 2443 compflags = p; 2444 } 2445 } 2446 else if (is_aff_rule(items, itemcnt, "COMPOUNDRULES", 2)) 2447 { 2448 /* We don't use the count, but do check that it's a number and 2449 * not COMPOUNDRULE mistyped. */ 2450 if (atoi((char *)items[1]) == 0) 2451 smsg(_("Wrong COMPOUNDRULES value in %s line %d: %s"), 2452 fname, lnum, items[1]); 2453 } 2454 else if (is_aff_rule(items, itemcnt, "COMPOUNDRULE", 2)) 2455 { 2456 /* Don't use the first rule if it is a number. */ 2457 if (compflags != NULL || *skipdigits(items[1]) != NUL) 2458 { 2459 /* Concatenate this string to previously defined ones, 2460 * using a slash to separate them. */ 2461 l = (int)STRLEN(items[1]) + 1; 2462 if (compflags != NULL) 2463 l += (int)STRLEN(compflags) + 1; 2464 p = getroom(spin, l, FALSE); 2465 if (p != NULL) 2466 { 2467 if (compflags != NULL) 2468 { 2469 STRCPY(p, compflags); 2470 STRCAT(p, "/"); 2471 } 2472 STRCAT(p, items[1]); 2473 compflags = p; 2474 } 2475 } 2476 } 2477 else if (is_aff_rule(items, itemcnt, "COMPOUNDWORDMAX", 2) 2478 && compmax == 0) 2479 { 2480 compmax = atoi((char *)items[1]); 2481 if (compmax == 0) 2482 smsg(_("Wrong COMPOUNDWORDMAX value in %s line %d: %s"), 2483 fname, lnum, items[1]); 2484 } 2485 else if (is_aff_rule(items, itemcnt, "COMPOUNDMIN", 2) 2486 && compminlen == 0) 2487 { 2488 compminlen = atoi((char *)items[1]); 2489 if (compminlen == 0) 2490 smsg(_("Wrong COMPOUNDMIN value in %s line %d: %s"), 2491 fname, lnum, items[1]); 2492 } 2493 else if (is_aff_rule(items, itemcnt, "COMPOUNDSYLMAX", 2) 2494 && compsylmax == 0) 2495 { 2496 compsylmax = atoi((char *)items[1]); 2497 if (compsylmax == 0) 2498 smsg(_("Wrong COMPOUNDSYLMAX value in %s line %d: %s"), 2499 fname, lnum, items[1]); 2500 } 2501 else if (is_aff_rule(items, itemcnt, "CHECKCOMPOUNDDUP", 1)) 2502 { 2503 compoptions |= COMP_CHECKDUP; 2504 } 2505 else if (is_aff_rule(items, itemcnt, "CHECKCOMPOUNDREP", 1)) 2506 { 2507 compoptions |= COMP_CHECKREP; 2508 } 2509 else if (is_aff_rule(items, itemcnt, "CHECKCOMPOUNDCASE", 1)) 2510 { 2511 compoptions |= COMP_CHECKCASE; 2512 } 2513 else if (is_aff_rule(items, itemcnt, "CHECKCOMPOUNDTRIPLE", 1)) 2514 { 2515 compoptions |= COMP_CHECKTRIPLE; 2516 } 2517 else if (is_aff_rule(items, itemcnt, "CHECKCOMPOUNDPATTERN", 2)) 2518 { 2519 if (atoi((char *)items[1]) == 0) 2520 smsg(_("Wrong CHECKCOMPOUNDPATTERN value in %s line %d: %s"), 2521 fname, lnum, items[1]); 2522 } 2523 else if (is_aff_rule(items, itemcnt, "CHECKCOMPOUNDPATTERN", 3)) 2524 { 2525 garray_T *gap = &spin->si_comppat; 2526 int i; 2527 2528 /* Only add the couple if it isn't already there. */ 2529 for (i = 0; i < gap->ga_len - 1; i += 2) 2530 if (STRCMP(((char_u **)(gap->ga_data))[i], items[1]) == 0 2531 && STRCMP(((char_u **)(gap->ga_data))[i + 1], 2532 items[2]) == 0) 2533 break; 2534 if (i >= gap->ga_len && ga_grow(gap, 2) == OK) 2535 { 2536 ((char_u **)(gap->ga_data))[gap->ga_len++] 2537 = getroom_save(spin, items[1]); 2538 ((char_u **)(gap->ga_data))[gap->ga_len++] 2539 = getroom_save(spin, items[2]); 2540 } 2541 } 2542 else if (is_aff_rule(items, itemcnt, "SYLLABLE", 2) 2543 && syllable == NULL) 2544 { 2545 syllable = getroom_save(spin, items[1]); 2546 } 2547 else if (is_aff_rule(items, itemcnt, "NOBREAK", 1)) 2548 { 2549 spin->si_nobreak = TRUE; 2550 } 2551 else if (is_aff_rule(items, itemcnt, "NOSPLITSUGS", 1)) 2552 { 2553 spin->si_nosplitsugs = TRUE; 2554 } 2555 else if (is_aff_rule(items, itemcnt, "NOCOMPOUNDSUGS", 1)) 2556 { 2557 spin->si_nocompoundsugs = TRUE; 2558 } 2559 else if (is_aff_rule(items, itemcnt, "NOSUGFILE", 1)) 2560 { 2561 spin->si_nosugfile = TRUE; 2562 } 2563 else if (is_aff_rule(items, itemcnt, "PFXPOSTPONE", 1)) 2564 { 2565 aff->af_pfxpostpone = TRUE; 2566 } 2567 else if (is_aff_rule(items, itemcnt, "IGNOREEXTRA", 1)) 2568 { 2569 aff->af_ignoreextra = TRUE; 2570 } 2571 else if ((STRCMP(items[0], "PFX") == 0 2572 || STRCMP(items[0], "SFX") == 0) 2573 && aff_todo == 0 2574 && itemcnt >= 4) 2575 { 2576 int lasti = 4; 2577 char_u key[AH_KEY_LEN]; 2578 2579 if (*items[0] == 'P') 2580 tp = &aff->af_pref; 2581 else 2582 tp = &aff->af_suff; 2583 2584 /* Myspell allows the same affix name to be used multiple 2585 * times. The affix files that do this have an undocumented 2586 * "S" flag on all but the last block, thus we check for that 2587 * and store it in ah_follows. */ 2588 vim_strncpy(key, items[1], AH_KEY_LEN - 1); 2589 hi = hash_find(tp, key); 2590 if (!HASHITEM_EMPTY(hi)) 2591 { 2592 cur_aff = HI2AH(hi); 2593 if (cur_aff->ah_combine != (*items[2] == 'Y')) 2594 smsg(_("Different combining flag in continued affix block in %s line %d: %s"), 2595 fname, lnum, items[1]); 2596 if (!cur_aff->ah_follows) 2597 smsg(_("Duplicate affix in %s line %d: %s"), 2598 fname, lnum, items[1]); 2599 } 2600 else 2601 { 2602 /* New affix letter. */ 2603 cur_aff = (affheader_T *)getroom(spin, 2604 sizeof(affheader_T), TRUE); 2605 if (cur_aff == NULL) 2606 break; 2607 cur_aff->ah_flag = affitem2flag(aff->af_flagtype, items[1], 2608 fname, lnum); 2609 if (cur_aff->ah_flag == 0 || STRLEN(items[1]) >= AH_KEY_LEN) 2610 break; 2611 if (cur_aff->ah_flag == aff->af_bad 2612 || cur_aff->ah_flag == aff->af_rare 2613 || cur_aff->ah_flag == aff->af_keepcase 2614 || cur_aff->ah_flag == aff->af_needaffix 2615 || cur_aff->ah_flag == aff->af_circumfix 2616 || cur_aff->ah_flag == aff->af_nosuggest 2617 || cur_aff->ah_flag == aff->af_needcomp 2618 || cur_aff->ah_flag == aff->af_comproot) 2619 smsg(_("Affix also used for BAD/RARE/KEEPCASE/NEEDAFFIX/NEEDCOMPOUND/NOSUGGEST in %s line %d: %s"), 2620 fname, lnum, items[1]); 2621 STRCPY(cur_aff->ah_key, items[1]); 2622 hash_add(tp, cur_aff->ah_key); 2623 2624 cur_aff->ah_combine = (*items[2] == 'Y'); 2625 } 2626 2627 /* Check for the "S" flag, which apparently means that another 2628 * block with the same affix name is following. */ 2629 if (itemcnt > lasti && STRCMP(items[lasti], "S") == 0) 2630 { 2631 ++lasti; 2632 cur_aff->ah_follows = TRUE; 2633 } 2634 else 2635 cur_aff->ah_follows = FALSE; 2636 2637 /* Myspell allows extra text after the item, but that might 2638 * mean mistakes go unnoticed. Require a comment-starter. */ 2639 if (itemcnt > lasti && *items[lasti] != '#') 2640 smsg(_(e_afftrailing), fname, lnum, items[lasti]); 2641 2642 if (STRCMP(items[2], "Y") != 0 && STRCMP(items[2], "N") != 0) 2643 smsg(_("Expected Y or N in %s line %d: %s"), 2644 fname, lnum, items[2]); 2645 2646 if (*items[0] == 'P' && aff->af_pfxpostpone) 2647 { 2648 if (cur_aff->ah_newID == 0) 2649 { 2650 /* Use a new number in the .spl file later, to be able 2651 * to handle multiple .aff files. */ 2652 check_renumber(spin); 2653 cur_aff->ah_newID = ++spin->si_newprefID; 2654 2655 /* We only really use ah_newID if the prefix is 2656 * postponed. We know that only after handling all 2657 * the items. */ 2658 did_postpone_prefix = FALSE; 2659 } 2660 else 2661 /* Did use the ID in a previous block. */ 2662 did_postpone_prefix = TRUE; 2663 } 2664 2665 aff_todo = atoi((char *)items[3]); 2666 } 2667 else if ((STRCMP(items[0], "PFX") == 0 2668 || STRCMP(items[0], "SFX") == 0) 2669 && aff_todo > 0 2670 && STRCMP(cur_aff->ah_key, items[1]) == 0 2671 && itemcnt >= 5) 2672 { 2673 affentry_T *aff_entry; 2674 int upper = FALSE; 2675 int lasti = 5; 2676 2677 /* Myspell allows extra text after the item, but that might 2678 * mean mistakes go unnoticed. Require a comment-starter, 2679 * unless IGNOREEXTRA is used. Hunspell uses a "-" item. */ 2680 if (itemcnt > lasti 2681 && !aff->af_ignoreextra 2682 && *items[lasti] != '#' 2683 && (STRCMP(items[lasti], "-") != 0 2684 || itemcnt != lasti + 1)) 2685 smsg(_(e_afftrailing), fname, lnum, items[lasti]); 2686 2687 /* New item for an affix letter. */ 2688 --aff_todo; 2689 aff_entry = (affentry_T *)getroom(spin, 2690 sizeof(affentry_T), TRUE); 2691 if (aff_entry == NULL) 2692 break; 2693 2694 if (STRCMP(items[2], "0") != 0) 2695 aff_entry->ae_chop = getroom_save(spin, items[2]); 2696 if (STRCMP(items[3], "0") != 0) 2697 { 2698 aff_entry->ae_add = getroom_save(spin, items[3]); 2699 2700 /* Recognize flags on the affix: abcd/XYZ */ 2701 aff_entry->ae_flags = vim_strchr(aff_entry->ae_add, '/'); 2702 if (aff_entry->ae_flags != NULL) 2703 { 2704 *aff_entry->ae_flags++ = NUL; 2705 aff_process_flags(aff, aff_entry); 2706 } 2707 } 2708 2709 /* Don't use an affix entry with non-ASCII characters when 2710 * "spin->si_ascii" is TRUE. */ 2711 if (!spin->si_ascii || !(has_non_ascii(aff_entry->ae_chop) 2712 || has_non_ascii(aff_entry->ae_add))) 2713 { 2714 aff_entry->ae_next = cur_aff->ah_first; 2715 cur_aff->ah_first = aff_entry; 2716 2717 if (STRCMP(items[4], ".") != 0) 2718 { 2719 char_u buf[MAXLINELEN]; 2720 2721 aff_entry->ae_cond = getroom_save(spin, items[4]); 2722 if (*items[0] == 'P') 2723 sprintf((char *)buf, "^%s", items[4]); 2724 else 2725 sprintf((char *)buf, "%s$", items[4]); 2726 aff_entry->ae_prog = vim_regcomp(buf, 2727 RE_MAGIC + RE_STRING + RE_STRICT); 2728 if (aff_entry->ae_prog == NULL) 2729 smsg(_("Broken condition in %s line %d: %s"), 2730 fname, lnum, items[4]); 2731 } 2732 2733 /* For postponed prefixes we need an entry in si_prefcond 2734 * for the condition. Use an existing one if possible. 2735 * Can't be done for an affix with flags, ignoring 2736 * COMPOUNDFORBIDFLAG and COMPOUNDPERMITFLAG. */ 2737 if (*items[0] == 'P' && aff->af_pfxpostpone 2738 && aff_entry->ae_flags == NULL) 2739 { 2740 /* When the chop string is one lower-case letter and 2741 * the add string ends in the upper-case letter we set 2742 * the "upper" flag, clear "ae_chop" and remove the 2743 * letters from "ae_add". The condition must either 2744 * be empty or start with the same letter. */ 2745 if (aff_entry->ae_chop != NULL 2746 && aff_entry->ae_add != NULL 2747 && aff_entry->ae_chop[(*mb_ptr2len)( 2748 aff_entry->ae_chop)] == NUL) 2749 { 2750 int c, c_up; 2751 2752 c = PTR2CHAR(aff_entry->ae_chop); 2753 c_up = SPELL_TOUPPER(c); 2754 if (c_up != c 2755 && (aff_entry->ae_cond == NULL 2756 || PTR2CHAR(aff_entry->ae_cond) == c)) 2757 { 2758 p = aff_entry->ae_add 2759 + STRLEN(aff_entry->ae_add); 2760 MB_PTR_BACK(aff_entry->ae_add, p); 2761 if (PTR2CHAR(p) == c_up) 2762 { 2763 upper = TRUE; 2764 aff_entry->ae_chop = NULL; 2765 *p = NUL; 2766 2767 /* The condition is matched with the 2768 * actual word, thus must check for the 2769 * upper-case letter. */ 2770 if (aff_entry->ae_cond != NULL) 2771 { 2772 char_u buf[MAXLINELEN]; 2773 2774 if (has_mbyte) 2775 { 2776 onecap_copy(items[4], buf, TRUE); 2777 aff_entry->ae_cond = getroom_save( 2778 spin, buf); 2779 } 2780 else 2781 *aff_entry->ae_cond = c_up; 2782 if (aff_entry->ae_cond != NULL) 2783 { 2784 sprintf((char *)buf, "^%s", 2785 aff_entry->ae_cond); 2786 vim_regfree(aff_entry->ae_prog); 2787 aff_entry->ae_prog = vim_regcomp( 2788 buf, RE_MAGIC + RE_STRING); 2789 } 2790 } 2791 } 2792 } 2793 } 2794 2795 if (aff_entry->ae_chop == NULL 2796 && aff_entry->ae_flags == NULL) 2797 { 2798 int idx; 2799 char_u **pp; 2800 int n; 2801 2802 /* Find a previously used condition. */ 2803 for (idx = spin->si_prefcond.ga_len - 1; idx >= 0; 2804 --idx) 2805 { 2806 p = ((char_u **)spin->si_prefcond.ga_data)[idx]; 2807 if (str_equal(p, aff_entry->ae_cond)) 2808 break; 2809 } 2810 if (idx < 0 && ga_grow(&spin->si_prefcond, 1) == OK) 2811 { 2812 /* Not found, add a new condition. */ 2813 idx = spin->si_prefcond.ga_len++; 2814 pp = ((char_u **)spin->si_prefcond.ga_data) 2815 + idx; 2816 if (aff_entry->ae_cond == NULL) 2817 *pp = NULL; 2818 else 2819 *pp = getroom_save(spin, 2820 aff_entry->ae_cond); 2821 } 2822 2823 /* Add the prefix to the prefix tree. */ 2824 if (aff_entry->ae_add == NULL) 2825 p = (char_u *)""; 2826 else 2827 p = aff_entry->ae_add; 2828 2829 /* PFX_FLAGS is a negative number, so that 2830 * tree_add_word() knows this is the prefix tree. */ 2831 n = PFX_FLAGS; 2832 if (!cur_aff->ah_combine) 2833 n |= WFP_NC; 2834 if (upper) 2835 n |= WFP_UP; 2836 if (aff_entry->ae_comppermit) 2837 n |= WFP_COMPPERMIT; 2838 if (aff_entry->ae_compforbid) 2839 n |= WFP_COMPFORBID; 2840 tree_add_word(spin, p, spin->si_prefroot, n, 2841 idx, cur_aff->ah_newID); 2842 did_postpone_prefix = TRUE; 2843 } 2844 2845 /* Didn't actually use ah_newID, backup si_newprefID. */ 2846 if (aff_todo == 0 && !did_postpone_prefix) 2847 { 2848 --spin->si_newprefID; 2849 cur_aff->ah_newID = 0; 2850 } 2851 } 2852 } 2853 } 2854 else if (is_aff_rule(items, itemcnt, "FOL", 2) && fol == NULL) 2855 { 2856 fol = vim_strsave(items[1]); 2857 } 2858 else if (is_aff_rule(items, itemcnt, "LOW", 2) && low == NULL) 2859 { 2860 low = vim_strsave(items[1]); 2861 } 2862 else if (is_aff_rule(items, itemcnt, "UPP", 2) && upp == NULL) 2863 { 2864 upp = vim_strsave(items[1]); 2865 } 2866 else if (is_aff_rule(items, itemcnt, "REP", 2) 2867 || is_aff_rule(items, itemcnt, "REPSAL", 2)) 2868 { 2869 /* Ignore REP/REPSAL count */; 2870 if (!isdigit(*items[1])) 2871 smsg(_("Expected REP(SAL) count in %s line %d"), 2872 fname, lnum); 2873 } 2874 else if ((STRCMP(items[0], "REP") == 0 2875 || STRCMP(items[0], "REPSAL") == 0) 2876 && itemcnt >= 3) 2877 { 2878 /* REP/REPSAL item */ 2879 /* Myspell ignores extra arguments, we require it starts with 2880 * # to detect mistakes. */ 2881 if (itemcnt > 3 && items[3][0] != '#') 2882 smsg(_(e_afftrailing), fname, lnum, items[3]); 2883 if (items[0][3] == 'S' ? do_repsal : do_rep) 2884 { 2885 /* Replace underscore with space (can't include a space 2886 * directly). */ 2887 for (p = items[1]; *p != NUL; MB_PTR_ADV(p)) 2888 if (*p == '_') 2889 *p = ' '; 2890 for (p = items[2]; *p != NUL; MB_PTR_ADV(p)) 2891 if (*p == '_') 2892 *p = ' '; 2893 add_fromto(spin, items[0][3] == 'S' 2894 ? &spin->si_repsal 2895 : &spin->si_rep, items[1], items[2]); 2896 } 2897 } 2898 else if (is_aff_rule(items, itemcnt, "MAP", 2)) 2899 { 2900 /* MAP item or count */ 2901 if (!found_map) 2902 { 2903 /* First line contains the count. */ 2904 found_map = TRUE; 2905 if (!isdigit(*items[1])) 2906 smsg(_("Expected MAP count in %s line %d"), 2907 fname, lnum); 2908 } 2909 else if (do_mapline) 2910 { 2911 int c; 2912 2913 /* Check that every character appears only once. */ 2914 for (p = items[1]; *p != NUL; ) 2915 { 2916 c = mb_ptr2char_adv(&p); 2917 if ((spin->si_map.ga_len > 0 2918 && vim_strchr(spin->si_map.ga_data, c) 2919 != NULL) 2920 || vim_strchr(p, c) != NULL) 2921 smsg(_("Duplicate character in MAP in %s line %d"), 2922 fname, lnum); 2923 } 2924 2925 /* We simply concatenate all the MAP strings, separated by 2926 * slashes. */ 2927 ga_concat(&spin->si_map, items[1]); 2928 ga_append(&spin->si_map, '/'); 2929 } 2930 } 2931 /* Accept "SAL from to" and "SAL from to #comment". */ 2932 else if (is_aff_rule(items, itemcnt, "SAL", 3)) 2933 { 2934 if (do_sal) 2935 { 2936 /* SAL item (sounds-a-like) 2937 * Either one of the known keys or a from-to pair. */ 2938 if (STRCMP(items[1], "followup") == 0) 2939 spin->si_followup = sal_to_bool(items[2]); 2940 else if (STRCMP(items[1], "collapse_result") == 0) 2941 spin->si_collapse = sal_to_bool(items[2]); 2942 else if (STRCMP(items[1], "remove_accents") == 0) 2943 spin->si_rem_accents = sal_to_bool(items[2]); 2944 else 2945 /* when "to" is "_" it means empty */ 2946 add_fromto(spin, &spin->si_sal, items[1], 2947 STRCMP(items[2], "_") == 0 ? (char_u *)"" 2948 : items[2]); 2949 } 2950 } 2951 else if (is_aff_rule(items, itemcnt, "SOFOFROM", 2) 2952 && sofofrom == NULL) 2953 { 2954 sofofrom = getroom_save(spin, items[1]); 2955 } 2956 else if (is_aff_rule(items, itemcnt, "SOFOTO", 2) 2957 && sofoto == NULL) 2958 { 2959 sofoto = getroom_save(spin, items[1]); 2960 } 2961 else if (STRCMP(items[0], "COMMON") == 0) 2962 { 2963 int i; 2964 2965 for (i = 1; i < itemcnt; ++i) 2966 { 2967 if (HASHITEM_EMPTY(hash_find(&spin->si_commonwords, 2968 items[i]))) 2969 { 2970 p = vim_strsave(items[i]); 2971 if (p == NULL) 2972 break; 2973 hash_add(&spin->si_commonwords, p); 2974 } 2975 } 2976 } 2977 else 2978 smsg(_("Unrecognized or duplicate item in %s line %d: %s"), 2979 fname, lnum, items[0]); 2980 } 2981 } 2982 2983 if (fol != NULL || low != NULL || upp != NULL) 2984 { 2985 if (spin->si_clear_chartab) 2986 { 2987 /* Clear the char type tables, don't want to use any of the 2988 * currently used spell properties. */ 2989 init_spell_chartab(); 2990 spin->si_clear_chartab = FALSE; 2991 } 2992 2993 /* 2994 * Don't write a word table for an ASCII file, so that we don't check 2995 * for conflicts with a word table that matches 'encoding'. 2996 * Don't write one for utf-8 either, we use utf_*() and 2997 * mb_get_class(), the list of chars in the file will be incomplete. 2998 */ 2999 if (!spin->si_ascii && !enc_utf8) 3000 { 3001 if (fol == NULL || low == NULL || upp == NULL) 3002 smsg(_("Missing FOL/LOW/UPP line in %s"), fname); 3003 else 3004 (void)set_spell_chartab(fol, low, upp); 3005 } 3006 3007 vim_free(fol); 3008 vim_free(low); 3009 vim_free(upp); 3010 } 3011 3012 /* Use compound specifications of the .aff file for the spell info. */ 3013 if (compmax != 0) 3014 { 3015 aff_check_number(spin->si_compmax, compmax, "COMPOUNDWORDMAX"); 3016 spin->si_compmax = compmax; 3017 } 3018 3019 if (compminlen != 0) 3020 { 3021 aff_check_number(spin->si_compminlen, compminlen, "COMPOUNDMIN"); 3022 spin->si_compminlen = compminlen; 3023 } 3024 3025 if (compsylmax != 0) 3026 { 3027 if (syllable == NULL) 3028 smsg(_("COMPOUNDSYLMAX used without SYLLABLE")); 3029 aff_check_number(spin->si_compsylmax, compsylmax, "COMPOUNDSYLMAX"); 3030 spin->si_compsylmax = compsylmax; 3031 } 3032 3033 if (compoptions != 0) 3034 { 3035 aff_check_number(spin->si_compoptions, compoptions, "COMPOUND options"); 3036 spin->si_compoptions |= compoptions; 3037 } 3038 3039 if (compflags != NULL) 3040 process_compflags(spin, aff, compflags); 3041 3042 /* Check that we didn't use too many renumbered flags. */ 3043 if (spin->si_newcompID < spin->si_newprefID) 3044 { 3045 if (spin->si_newcompID == 127 || spin->si_newcompID == 255) 3046 msg(_("Too many postponed prefixes")); 3047 else if (spin->si_newprefID == 0 || spin->si_newprefID == 127) 3048 msg(_("Too many compound flags")); 3049 else 3050 msg(_("Too many postponed prefixes and/or compound flags")); 3051 } 3052 3053 if (syllable != NULL) 3054 { 3055 aff_check_string(spin->si_syllable, syllable, "SYLLABLE"); 3056 spin->si_syllable = syllable; 3057 } 3058 3059 if (sofofrom != NULL || sofoto != NULL) 3060 { 3061 if (sofofrom == NULL || sofoto == NULL) 3062 smsg(_("Missing SOFO%s line in %s"), 3063 sofofrom == NULL ? "FROM" : "TO", fname); 3064 else if (spin->si_sal.ga_len > 0) 3065 smsg(_("Both SAL and SOFO lines in %s"), fname); 3066 else 3067 { 3068 aff_check_string(spin->si_sofofr, sofofrom, "SOFOFROM"); 3069 aff_check_string(spin->si_sofoto, sofoto, "SOFOTO"); 3070 spin->si_sofofr = sofofrom; 3071 spin->si_sofoto = sofoto; 3072 } 3073 } 3074 3075 if (midword != NULL) 3076 { 3077 aff_check_string(spin->si_midword, midword, "MIDWORD"); 3078 spin->si_midword = midword; 3079 } 3080 3081 vim_free(pc); 3082 fclose(fd); 3083 return aff; 3084 } 3085 3086 /* 3087 * Return TRUE when items[0] equals "rulename", there are "mincount" items or 3088 * a comment is following after item "mincount". 3089 */ 3090 static int 3091 is_aff_rule( 3092 char_u **items, 3093 int itemcnt, 3094 char *rulename, 3095 int mincount) 3096 { 3097 return (STRCMP(items[0], rulename) == 0 3098 && (itemcnt == mincount 3099 || (itemcnt > mincount && items[mincount][0] == '#'))); 3100 } 3101 3102 /* 3103 * For affix "entry" move COMPOUNDFORBIDFLAG and COMPOUNDPERMITFLAG from 3104 * ae_flags to ae_comppermit and ae_compforbid. 3105 */ 3106 static void 3107 aff_process_flags(afffile_T *affile, affentry_T *entry) 3108 { 3109 char_u *p; 3110 char_u *prevp; 3111 unsigned flag; 3112 3113 if (entry->ae_flags != NULL 3114 && (affile->af_compforbid != 0 || affile->af_comppermit != 0)) 3115 { 3116 for (p = entry->ae_flags; *p != NUL; ) 3117 { 3118 prevp = p; 3119 flag = get_affitem(affile->af_flagtype, &p); 3120 if (flag == affile->af_comppermit || flag == affile->af_compforbid) 3121 { 3122 STRMOVE(prevp, p); 3123 p = prevp; 3124 if (flag == affile->af_comppermit) 3125 entry->ae_comppermit = TRUE; 3126 else 3127 entry->ae_compforbid = TRUE; 3128 } 3129 if (affile->af_flagtype == AFT_NUM && *p == ',') 3130 ++p; 3131 } 3132 if (*entry->ae_flags == NUL) 3133 entry->ae_flags = NULL; /* nothing left */ 3134 } 3135 } 3136 3137 /* 3138 * Return TRUE if "s" is the name of an info item in the affix file. 3139 */ 3140 static int 3141 spell_info_item(char_u *s) 3142 { 3143 return STRCMP(s, "NAME") == 0 3144 || STRCMP(s, "HOME") == 0 3145 || STRCMP(s, "VERSION") == 0 3146 || STRCMP(s, "AUTHOR") == 0 3147 || STRCMP(s, "EMAIL") == 0 3148 || STRCMP(s, "COPYRIGHT") == 0; 3149 } 3150 3151 /* 3152 * Turn an affix flag name into a number, according to the FLAG type. 3153 * returns zero for failure. 3154 */ 3155 static unsigned 3156 affitem2flag( 3157 int flagtype, 3158 char_u *item, 3159 char_u *fname, 3160 int lnum) 3161 { 3162 unsigned res; 3163 char_u *p = item; 3164 3165 res = get_affitem(flagtype, &p); 3166 if (res == 0) 3167 { 3168 if (flagtype == AFT_NUM) 3169 smsg(_("Flag is not a number in %s line %d: %s"), 3170 fname, lnum, item); 3171 else 3172 smsg(_("Illegal flag in %s line %d: %s"), 3173 fname, lnum, item); 3174 } 3175 if (*p != NUL) 3176 { 3177 smsg(_(e_affname), fname, lnum, item); 3178 return 0; 3179 } 3180 3181 return res; 3182 } 3183 3184 /* 3185 * Get one affix name from "*pp" and advance the pointer. 3186 * Returns zero for an error, still advances the pointer then. 3187 */ 3188 static unsigned 3189 get_affitem(int flagtype, char_u **pp) 3190 { 3191 int res; 3192 3193 if (flagtype == AFT_NUM) 3194 { 3195 if (!VIM_ISDIGIT(**pp)) 3196 { 3197 ++*pp; /* always advance, avoid getting stuck */ 3198 return 0; 3199 } 3200 res = getdigits(pp); 3201 } 3202 else 3203 { 3204 res = mb_ptr2char_adv(pp); 3205 if (flagtype == AFT_LONG || (flagtype == AFT_CAPLONG 3206 && res >= 'A' && res <= 'Z')) 3207 { 3208 if (**pp == NUL) 3209 return 0; 3210 res = mb_ptr2char_adv(pp) + (res << 16); 3211 } 3212 } 3213 return res; 3214 } 3215 3216 /* 3217 * Process the "compflags" string used in an affix file and append it to 3218 * spin->si_compflags. 3219 * The processing involves changing the affix names to ID numbers, so that 3220 * they fit in one byte. 3221 */ 3222 static void 3223 process_compflags( 3224 spellinfo_T *spin, 3225 afffile_T *aff, 3226 char_u *compflags) 3227 { 3228 char_u *p; 3229 char_u *prevp; 3230 unsigned flag; 3231 compitem_T *ci; 3232 int id; 3233 int len; 3234 char_u *tp; 3235 char_u key[AH_KEY_LEN]; 3236 hashitem_T *hi; 3237 3238 /* Make room for the old and the new compflags, concatenated with a / in 3239 * between. Processing it makes it shorter, but we don't know by how 3240 * much, thus allocate the maximum. */ 3241 len = (int)STRLEN(compflags) + 1; 3242 if (spin->si_compflags != NULL) 3243 len += (int)STRLEN(spin->si_compflags) + 1; 3244 p = getroom(spin, len, FALSE); 3245 if (p == NULL) 3246 return; 3247 if (spin->si_compflags != NULL) 3248 { 3249 STRCPY(p, spin->si_compflags); 3250 STRCAT(p, "/"); 3251 } 3252 spin->si_compflags = p; 3253 tp = p + STRLEN(p); 3254 3255 for (p = compflags; *p != NUL; ) 3256 { 3257 if (vim_strchr((char_u *)"/?*+[]", *p) != NULL) 3258 /* Copy non-flag characters directly. */ 3259 *tp++ = *p++; 3260 else 3261 { 3262 /* First get the flag number, also checks validity. */ 3263 prevp = p; 3264 flag = get_affitem(aff->af_flagtype, &p); 3265 if (flag != 0) 3266 { 3267 /* Find the flag in the hashtable. If it was used before, use 3268 * the existing ID. Otherwise add a new entry. */ 3269 vim_strncpy(key, prevp, p - prevp); 3270 hi = hash_find(&aff->af_comp, key); 3271 if (!HASHITEM_EMPTY(hi)) 3272 id = HI2CI(hi)->ci_newID; 3273 else 3274 { 3275 ci = (compitem_T *)getroom(spin, sizeof(compitem_T), TRUE); 3276 if (ci == NULL) 3277 break; 3278 STRCPY(ci->ci_key, key); 3279 ci->ci_flag = flag; 3280 /* Avoid using a flag ID that has a special meaning in a 3281 * regexp (also inside []). */ 3282 do 3283 { 3284 check_renumber(spin); 3285 id = spin->si_newcompID--; 3286 } while (vim_strchr((char_u *)"/?*+[]\\-^", id) != NULL); 3287 ci->ci_newID = id; 3288 hash_add(&aff->af_comp, ci->ci_key); 3289 } 3290 *tp++ = id; 3291 } 3292 if (aff->af_flagtype == AFT_NUM && *p == ',') 3293 ++p; 3294 } 3295 } 3296 3297 *tp = NUL; 3298 } 3299 3300 /* 3301 * Check that the new IDs for postponed affixes and compounding don't overrun 3302 * each other. We have almost 255 available, but start at 0-127 to avoid 3303 * using two bytes for utf-8. When the 0-127 range is used up go to 128-255. 3304 * When that is used up an error message is given. 3305 */ 3306 static void 3307 check_renumber(spellinfo_T *spin) 3308 { 3309 if (spin->si_newprefID == spin->si_newcompID && spin->si_newcompID < 128) 3310 { 3311 spin->si_newprefID = 127; 3312 spin->si_newcompID = 255; 3313 } 3314 } 3315 3316 /* 3317 * Return TRUE if flag "flag" appears in affix list "afflist". 3318 */ 3319 static int 3320 flag_in_afflist(int flagtype, char_u *afflist, unsigned flag) 3321 { 3322 char_u *p; 3323 unsigned n; 3324 3325 switch (flagtype) 3326 { 3327 case AFT_CHAR: 3328 return vim_strchr(afflist, flag) != NULL; 3329 3330 case AFT_CAPLONG: 3331 case AFT_LONG: 3332 for (p = afflist; *p != NUL; ) 3333 { 3334 n = mb_ptr2char_adv(&p); 3335 if ((flagtype == AFT_LONG || (n >= 'A' && n <= 'Z')) 3336 && *p != NUL) 3337 n = mb_ptr2char_adv(&p) + (n << 16); 3338 if (n == flag) 3339 return TRUE; 3340 } 3341 break; 3342 3343 case AFT_NUM: 3344 for (p = afflist; *p != NUL; ) 3345 { 3346 n = getdigits(&p); 3347 if (n == flag) 3348 return TRUE; 3349 if (*p != NUL) /* skip over comma */ 3350 ++p; 3351 } 3352 break; 3353 } 3354 return FALSE; 3355 } 3356 3357 /* 3358 * Give a warning when "spinval" and "affval" numbers are set and not the same. 3359 */ 3360 static void 3361 aff_check_number(int spinval, int affval, char *name) 3362 { 3363 if (spinval != 0 && spinval != affval) 3364 smsg(_("%s value differs from what is used in another .aff file"), name); 3365 } 3366 3367 /* 3368 * Give a warning when "spinval" and "affval" strings are set and not the same. 3369 */ 3370 static void 3371 aff_check_string(char_u *spinval, char_u *affval, char *name) 3372 { 3373 if (spinval != NULL && STRCMP(spinval, affval) != 0) 3374 smsg(_("%s value differs from what is used in another .aff file"), name); 3375 } 3376 3377 /* 3378 * Return TRUE if strings "s1" and "s2" are equal. Also consider both being 3379 * NULL as equal. 3380 */ 3381 static int 3382 str_equal(char_u *s1, char_u *s2) 3383 { 3384 if (s1 == NULL || s2 == NULL) 3385 return s1 == s2; 3386 return STRCMP(s1, s2) == 0; 3387 } 3388 3389 /* 3390 * Add a from-to item to "gap". Used for REP and SAL items. 3391 * They are stored case-folded. 3392 */ 3393 static void 3394 add_fromto( 3395 spellinfo_T *spin, 3396 garray_T *gap, 3397 char_u *from, 3398 char_u *to) 3399 { 3400 fromto_T *ftp; 3401 char_u word[MAXWLEN]; 3402 3403 if (ga_grow(gap, 1) == OK) 3404 { 3405 ftp = ((fromto_T *)gap->ga_data) + gap->ga_len; 3406 (void)spell_casefold(from, (int)STRLEN(from), word, MAXWLEN); 3407 ftp->ft_from = getroom_save(spin, word); 3408 (void)spell_casefold(to, (int)STRLEN(to), word, MAXWLEN); 3409 ftp->ft_to = getroom_save(spin, word); 3410 ++gap->ga_len; 3411 } 3412 } 3413 3414 /* 3415 * Convert a boolean argument in a SAL line to TRUE or FALSE; 3416 */ 3417 static int 3418 sal_to_bool(char_u *s) 3419 { 3420 return STRCMP(s, "1") == 0 || STRCMP(s, "true") == 0; 3421 } 3422 3423 /* 3424 * Free the structure filled by spell_read_aff(). 3425 */ 3426 static void 3427 spell_free_aff(afffile_T *aff) 3428 { 3429 hashtab_T *ht; 3430 hashitem_T *hi; 3431 int todo; 3432 affheader_T *ah; 3433 affentry_T *ae; 3434 3435 vim_free(aff->af_enc); 3436 3437 /* All this trouble to free the "ae_prog" items... */ 3438 for (ht = &aff->af_pref; ; ht = &aff->af_suff) 3439 { 3440 todo = (int)ht->ht_used; 3441 for (hi = ht->ht_array; todo > 0; ++hi) 3442 { 3443 if (!HASHITEM_EMPTY(hi)) 3444 { 3445 --todo; 3446 ah = HI2AH(hi); 3447 for (ae = ah->ah_first; ae != NULL; ae = ae->ae_next) 3448 vim_regfree(ae->ae_prog); 3449 } 3450 } 3451 if (ht == &aff->af_suff) 3452 break; 3453 } 3454 3455 hash_clear(&aff->af_pref); 3456 hash_clear(&aff->af_suff); 3457 hash_clear(&aff->af_comp); 3458 } 3459 3460 /* 3461 * Read dictionary file "fname". 3462 * Returns OK or FAIL; 3463 */ 3464 static int 3465 spell_read_dic(spellinfo_T *spin, char_u *fname, afffile_T *affile) 3466 { 3467 hashtab_T ht; 3468 char_u line[MAXLINELEN]; 3469 char_u *p; 3470 char_u *afflist; 3471 char_u store_afflist[MAXWLEN]; 3472 int pfxlen; 3473 int need_affix; 3474 char_u *dw; 3475 char_u *pc; 3476 char_u *w; 3477 int l; 3478 hash_T hash; 3479 hashitem_T *hi; 3480 FILE *fd; 3481 int lnum = 1; 3482 int non_ascii = 0; 3483 int retval = OK; 3484 char_u message[MAXLINELEN + MAXWLEN]; 3485 int flags; 3486 int duplicate = 0; 3487 3488 /* 3489 * Open the file. 3490 */ 3491 fd = mch_fopen((char *)fname, "r"); 3492 if (fd == NULL) 3493 { 3494 semsg(_(e_notopen), fname); 3495 return FAIL; 3496 } 3497 3498 /* The hashtable is only used to detect duplicated words. */ 3499 hash_init(&ht); 3500 3501 vim_snprintf((char *)IObuff, IOSIZE, 3502 _("Reading dictionary file %s..."), fname); 3503 spell_message(spin, IObuff); 3504 3505 /* start with a message for the first line */ 3506 spin->si_msg_count = 999999; 3507 3508 /* Read and ignore the first line: word count. */ 3509 (void)vim_fgets(line, MAXLINELEN, fd); 3510 if (!vim_isdigit(*skipwhite(line))) 3511 semsg(_("E760: No word count in %s"), fname); 3512 3513 /* 3514 * Read all the lines in the file one by one. 3515 * The words are converted to 'encoding' here, before being added to 3516 * the hashtable. 3517 */ 3518 while (!vim_fgets(line, MAXLINELEN, fd) && !got_int) 3519 { 3520 line_breakcheck(); 3521 ++lnum; 3522 if (line[0] == '#' || line[0] == '/') 3523 continue; /* comment line */ 3524 3525 /* Remove CR, LF and white space from the end. White space halfway 3526 * the word is kept to allow e.g., "et al.". */ 3527 l = (int)STRLEN(line); 3528 while (l > 0 && line[l - 1] <= ' ') 3529 --l; 3530 if (l == 0) 3531 continue; /* empty line */ 3532 line[l] = NUL; 3533 3534 /* Convert from "SET" to 'encoding' when needed. */ 3535 if (spin->si_conv.vc_type != CONV_NONE) 3536 { 3537 pc = string_convert(&spin->si_conv, line, NULL); 3538 if (pc == NULL) 3539 { 3540 smsg(_("Conversion failure for word in %s line %d: %s"), 3541 fname, lnum, line); 3542 continue; 3543 } 3544 w = pc; 3545 } 3546 else 3547 { 3548 pc = NULL; 3549 w = line; 3550 } 3551 3552 /* Truncate the word at the "/", set "afflist" to what follows. 3553 * Replace "\/" by "/" and "\\" by "\". */ 3554 afflist = NULL; 3555 for (p = w; *p != NUL; MB_PTR_ADV(p)) 3556 { 3557 if (*p == '\\' && (p[1] == '\\' || p[1] == '/')) 3558 STRMOVE(p, p + 1); 3559 else if (*p == '/') 3560 { 3561 *p = NUL; 3562 afflist = p + 1; 3563 break; 3564 } 3565 } 3566 3567 /* Skip non-ASCII words when "spin->si_ascii" is TRUE. */ 3568 if (spin->si_ascii && has_non_ascii(w)) 3569 { 3570 ++non_ascii; 3571 vim_free(pc); 3572 continue; 3573 } 3574 3575 /* This takes time, print a message every 10000 words. */ 3576 if (spin->si_verbose && spin->si_msg_count > 10000) 3577 { 3578 spin->si_msg_count = 0; 3579 vim_snprintf((char *)message, sizeof(message), 3580 _("line %6d, word %6ld - %s"), 3581 lnum, spin->si_foldwcount + spin->si_keepwcount, w); 3582 msg_start(); 3583 msg_outtrans_long_attr(message, 0); 3584 msg_clr_eos(); 3585 msg_didout = FALSE; 3586 msg_col = 0; 3587 out_flush(); 3588 } 3589 3590 /* Store the word in the hashtable to be able to find duplicates. */ 3591 dw = (char_u *)getroom_save(spin, w); 3592 if (dw == NULL) 3593 { 3594 retval = FAIL; 3595 vim_free(pc); 3596 break; 3597 } 3598 3599 hash = hash_hash(dw); 3600 hi = hash_lookup(&ht, dw, hash); 3601 if (!HASHITEM_EMPTY(hi)) 3602 { 3603 if (p_verbose > 0) 3604 smsg(_("Duplicate word in %s line %d: %s"), 3605 fname, lnum, dw); 3606 else if (duplicate == 0) 3607 smsg(_("First duplicate word in %s line %d: %s"), 3608 fname, lnum, dw); 3609 ++duplicate; 3610 } 3611 else 3612 hash_add_item(&ht, hi, dw, hash); 3613 3614 flags = 0; 3615 store_afflist[0] = NUL; 3616 pfxlen = 0; 3617 need_affix = FALSE; 3618 if (afflist != NULL) 3619 { 3620 /* Extract flags from the affix list. */ 3621 flags |= get_affix_flags(affile, afflist); 3622 3623 if (affile->af_needaffix != 0 && flag_in_afflist( 3624 affile->af_flagtype, afflist, affile->af_needaffix)) 3625 need_affix = TRUE; 3626 3627 if (affile->af_pfxpostpone) 3628 /* Need to store the list of prefix IDs with the word. */ 3629 pfxlen = get_pfxlist(affile, afflist, store_afflist); 3630 3631 if (spin->si_compflags != NULL) 3632 /* Need to store the list of compound flags with the word. 3633 * Concatenate them to the list of prefix IDs. */ 3634 get_compflags(affile, afflist, store_afflist + pfxlen); 3635 } 3636 3637 /* Add the word to the word tree(s). */ 3638 if (store_word(spin, dw, flags, spin->si_region, 3639 store_afflist, need_affix) == FAIL) 3640 retval = FAIL; 3641 3642 if (afflist != NULL) 3643 { 3644 /* Find all matching suffixes and add the resulting words. 3645 * Additionally do matching prefixes that combine. */ 3646 if (store_aff_word(spin, dw, afflist, affile, 3647 &affile->af_suff, &affile->af_pref, 3648 CONDIT_SUF, flags, store_afflist, pfxlen) == FAIL) 3649 retval = FAIL; 3650 3651 /* Find all matching prefixes and add the resulting words. */ 3652 if (store_aff_word(spin, dw, afflist, affile, 3653 &affile->af_pref, NULL, 3654 CONDIT_SUF, flags, store_afflist, pfxlen) == FAIL) 3655 retval = FAIL; 3656 } 3657 3658 vim_free(pc); 3659 } 3660 3661 if (duplicate > 0) 3662 smsg(_("%d duplicate word(s) in %s"), duplicate, fname); 3663 if (spin->si_ascii && non_ascii > 0) 3664 smsg(_("Ignored %d word(s) with non-ASCII characters in %s"), 3665 non_ascii, fname); 3666 hash_clear(&ht); 3667 3668 fclose(fd); 3669 return retval; 3670 } 3671 3672 /* 3673 * Check for affix flags in "afflist" that are turned into word flags. 3674 * Return WF_ flags. 3675 */ 3676 static int 3677 get_affix_flags(afffile_T *affile, char_u *afflist) 3678 { 3679 int flags = 0; 3680 3681 if (affile->af_keepcase != 0 && flag_in_afflist( 3682 affile->af_flagtype, afflist, affile->af_keepcase)) 3683 flags |= WF_KEEPCAP | WF_FIXCAP; 3684 if (affile->af_rare != 0 && flag_in_afflist( 3685 affile->af_flagtype, afflist, affile->af_rare)) 3686 flags |= WF_RARE; 3687 if (affile->af_bad != 0 && flag_in_afflist( 3688 affile->af_flagtype, afflist, affile->af_bad)) 3689 flags |= WF_BANNED; 3690 if (affile->af_needcomp != 0 && flag_in_afflist( 3691 affile->af_flagtype, afflist, affile->af_needcomp)) 3692 flags |= WF_NEEDCOMP; 3693 if (affile->af_comproot != 0 && flag_in_afflist( 3694 affile->af_flagtype, afflist, affile->af_comproot)) 3695 flags |= WF_COMPROOT; 3696 if (affile->af_nosuggest != 0 && flag_in_afflist( 3697 affile->af_flagtype, afflist, affile->af_nosuggest)) 3698 flags |= WF_NOSUGGEST; 3699 return flags; 3700 } 3701 3702 /* 3703 * Get the list of prefix IDs from the affix list "afflist". 3704 * Used for PFXPOSTPONE. 3705 * Put the resulting flags in "store_afflist[MAXWLEN]" with a terminating NUL 3706 * and return the number of affixes. 3707 */ 3708 static int 3709 get_pfxlist( 3710 afffile_T *affile, 3711 char_u *afflist, 3712 char_u *store_afflist) 3713 { 3714 char_u *p; 3715 char_u *prevp; 3716 int cnt = 0; 3717 int id; 3718 char_u key[AH_KEY_LEN]; 3719 hashitem_T *hi; 3720 3721 for (p = afflist; *p != NUL; ) 3722 { 3723 prevp = p; 3724 if (get_affitem(affile->af_flagtype, &p) != 0) 3725 { 3726 /* A flag is a postponed prefix flag if it appears in "af_pref" 3727 * and its ID is not zero. */ 3728 vim_strncpy(key, prevp, p - prevp); 3729 hi = hash_find(&affile->af_pref, key); 3730 if (!HASHITEM_EMPTY(hi)) 3731 { 3732 id = HI2AH(hi)->ah_newID; 3733 if (id != 0) 3734 store_afflist[cnt++] = id; 3735 } 3736 } 3737 if (affile->af_flagtype == AFT_NUM && *p == ',') 3738 ++p; 3739 } 3740 3741 store_afflist[cnt] = NUL; 3742 return cnt; 3743 } 3744 3745 /* 3746 * Get the list of compound IDs from the affix list "afflist" that are used 3747 * for compound words. 3748 * Puts the flags in "store_afflist[]". 3749 */ 3750 static void 3751 get_compflags( 3752 afffile_T *affile, 3753 char_u *afflist, 3754 char_u *store_afflist) 3755 { 3756 char_u *p; 3757 char_u *prevp; 3758 int cnt = 0; 3759 char_u key[AH_KEY_LEN]; 3760 hashitem_T *hi; 3761 3762 for (p = afflist; *p != NUL; ) 3763 { 3764 prevp = p; 3765 if (get_affitem(affile->af_flagtype, &p) != 0) 3766 { 3767 /* A flag is a compound flag if it appears in "af_comp". */ 3768 vim_strncpy(key, prevp, p - prevp); 3769 hi = hash_find(&affile->af_comp, key); 3770 if (!HASHITEM_EMPTY(hi)) 3771 store_afflist[cnt++] = HI2CI(hi)->ci_newID; 3772 } 3773 if (affile->af_flagtype == AFT_NUM && *p == ',') 3774 ++p; 3775 } 3776 3777 store_afflist[cnt] = NUL; 3778 } 3779 3780 /* 3781 * Apply affixes to a word and store the resulting words. 3782 * "ht" is the hashtable with affentry_T that need to be applied, either 3783 * prefixes or suffixes. 3784 * "xht", when not NULL, is the prefix hashtable, to be used additionally on 3785 * the resulting words for combining affixes. 3786 * 3787 * Returns FAIL when out of memory. 3788 */ 3789 static int 3790 store_aff_word( 3791 spellinfo_T *spin, /* spell info */ 3792 char_u *word, /* basic word start */ 3793 char_u *afflist, /* list of names of supported affixes */ 3794 afffile_T *affile, 3795 hashtab_T *ht, 3796 hashtab_T *xht, 3797 int condit, /* CONDIT_SUF et al. */ 3798 int flags, /* flags for the word */ 3799 char_u *pfxlist, /* list of prefix IDs */ 3800 int pfxlen) /* nr of flags in "pfxlist" for prefixes, rest 3801 * is compound flags */ 3802 { 3803 int todo; 3804 hashitem_T *hi; 3805 affheader_T *ah; 3806 affentry_T *ae; 3807 char_u newword[MAXWLEN]; 3808 int retval = OK; 3809 int i, j; 3810 char_u *p; 3811 int use_flags; 3812 char_u *use_pfxlist; 3813 int use_pfxlen; 3814 int need_affix; 3815 char_u store_afflist[MAXWLEN]; 3816 char_u pfx_pfxlist[MAXWLEN]; 3817 size_t wordlen = STRLEN(word); 3818 int use_condit; 3819 3820 todo = (int)ht->ht_used; 3821 for (hi = ht->ht_array; todo > 0 && retval == OK; ++hi) 3822 { 3823 if (!HASHITEM_EMPTY(hi)) 3824 { 3825 --todo; 3826 ah = HI2AH(hi); 3827 3828 /* Check that the affix combines, if required, and that the word 3829 * supports this affix. */ 3830 if (((condit & CONDIT_COMB) == 0 || ah->ah_combine) 3831 && flag_in_afflist(affile->af_flagtype, afflist, 3832 ah->ah_flag)) 3833 { 3834 /* Loop over all affix entries with this name. */ 3835 for (ae = ah->ah_first; ae != NULL; ae = ae->ae_next) 3836 { 3837 /* Check the condition. It's not logical to match case 3838 * here, but it is required for compatibility with 3839 * Myspell. 3840 * Another requirement from Myspell is that the chop 3841 * string is shorter than the word itself. 3842 * For prefixes, when "PFXPOSTPONE" was used, only do 3843 * prefixes with a chop string and/or flags. 3844 * When a previously added affix had CIRCUMFIX this one 3845 * must have it too, if it had not then this one must not 3846 * have one either. */ 3847 if ((xht != NULL || !affile->af_pfxpostpone 3848 || ae->ae_chop != NULL 3849 || ae->ae_flags != NULL) 3850 && (ae->ae_chop == NULL 3851 || STRLEN(ae->ae_chop) < wordlen) 3852 && (ae->ae_prog == NULL 3853 || vim_regexec_prog(&ae->ae_prog, FALSE, 3854 word, (colnr_T)0)) 3855 && (((condit & CONDIT_CFIX) == 0) 3856 == ((condit & CONDIT_AFF) == 0 3857 || ae->ae_flags == NULL 3858 || !flag_in_afflist(affile->af_flagtype, 3859 ae->ae_flags, affile->af_circumfix)))) 3860 { 3861 /* Match. Remove the chop and add the affix. */ 3862 if (xht == NULL) 3863 { 3864 /* prefix: chop/add at the start of the word */ 3865 if (ae->ae_add == NULL) 3866 *newword = NUL; 3867 else 3868 vim_strncpy(newword, ae->ae_add, MAXWLEN - 1); 3869 p = word; 3870 if (ae->ae_chop != NULL) 3871 { 3872 /* Skip chop string. */ 3873 if (has_mbyte) 3874 { 3875 i = mb_charlen(ae->ae_chop); 3876 for ( ; i > 0; --i) 3877 MB_PTR_ADV(p); 3878 } 3879 else 3880 p += STRLEN(ae->ae_chop); 3881 } 3882 STRCAT(newword, p); 3883 } 3884 else 3885 { 3886 /* suffix: chop/add at the end of the word */ 3887 vim_strncpy(newword, word, MAXWLEN - 1); 3888 if (ae->ae_chop != NULL) 3889 { 3890 /* Remove chop string. */ 3891 p = newword + STRLEN(newword); 3892 i = (int)MB_CHARLEN(ae->ae_chop); 3893 for ( ; i > 0; --i) 3894 MB_PTR_BACK(newword, p); 3895 *p = NUL; 3896 } 3897 if (ae->ae_add != NULL) 3898 STRCAT(newword, ae->ae_add); 3899 } 3900 3901 use_flags = flags; 3902 use_pfxlist = pfxlist; 3903 use_pfxlen = pfxlen; 3904 need_affix = FALSE; 3905 use_condit = condit | CONDIT_COMB | CONDIT_AFF; 3906 if (ae->ae_flags != NULL) 3907 { 3908 /* Extract flags from the affix list. */ 3909 use_flags |= get_affix_flags(affile, ae->ae_flags); 3910 3911 if (affile->af_needaffix != 0 && flag_in_afflist( 3912 affile->af_flagtype, ae->ae_flags, 3913 affile->af_needaffix)) 3914 need_affix = TRUE; 3915 3916 /* When there is a CIRCUMFIX flag the other affix 3917 * must also have it and we don't add the word 3918 * with one affix. */ 3919 if (affile->af_circumfix != 0 && flag_in_afflist( 3920 affile->af_flagtype, ae->ae_flags, 3921 affile->af_circumfix)) 3922 { 3923 use_condit |= CONDIT_CFIX; 3924 if ((condit & CONDIT_CFIX) == 0) 3925 need_affix = TRUE; 3926 } 3927 3928 if (affile->af_pfxpostpone 3929 || spin->si_compflags != NULL) 3930 { 3931 if (affile->af_pfxpostpone) 3932 /* Get prefix IDS from the affix list. */ 3933 use_pfxlen = get_pfxlist(affile, 3934 ae->ae_flags, store_afflist); 3935 else 3936 use_pfxlen = 0; 3937 use_pfxlist = store_afflist; 3938 3939 /* Combine the prefix IDs. Avoid adding the 3940 * same ID twice. */ 3941 for (i = 0; i < pfxlen; ++i) 3942 { 3943 for (j = 0; j < use_pfxlen; ++j) 3944 if (pfxlist[i] == use_pfxlist[j]) 3945 break; 3946 if (j == use_pfxlen) 3947 use_pfxlist[use_pfxlen++] = pfxlist[i]; 3948 } 3949 3950 if (spin->si_compflags != NULL) 3951 /* Get compound IDS from the affix list. */ 3952 get_compflags(affile, ae->ae_flags, 3953 use_pfxlist + use_pfxlen); 3954 3955 /* Combine the list of compound flags. 3956 * Concatenate them to the prefix IDs list. 3957 * Avoid adding the same ID twice. */ 3958 for (i = pfxlen; pfxlist[i] != NUL; ++i) 3959 { 3960 for (j = use_pfxlen; 3961 use_pfxlist[j] != NUL; ++j) 3962 if (pfxlist[i] == use_pfxlist[j]) 3963 break; 3964 if (use_pfxlist[j] == NUL) 3965 { 3966 use_pfxlist[j++] = pfxlist[i]; 3967 use_pfxlist[j] = NUL; 3968 } 3969 } 3970 } 3971 } 3972 3973 /* Obey a "COMPOUNDFORBIDFLAG" of the affix: don't 3974 * use the compound flags. */ 3975 if (use_pfxlist != NULL && ae->ae_compforbid) 3976 { 3977 vim_strncpy(pfx_pfxlist, use_pfxlist, use_pfxlen); 3978 use_pfxlist = pfx_pfxlist; 3979 } 3980 3981 /* When there are postponed prefixes... */ 3982 if (spin->si_prefroot != NULL 3983 && spin->si_prefroot->wn_sibling != NULL) 3984 { 3985 /* ... add a flag to indicate an affix was used. */ 3986 use_flags |= WF_HAS_AFF; 3987 3988 /* ... don't use a prefix list if combining 3989 * affixes is not allowed. But do use the 3990 * compound flags after them. */ 3991 if (!ah->ah_combine && use_pfxlist != NULL) 3992 use_pfxlist += use_pfxlen; 3993 } 3994 3995 /* When compounding is supported and there is no 3996 * "COMPOUNDPERMITFLAG" then forbid compounding on the 3997 * side where the affix is applied. */ 3998 if (spin->si_compflags != NULL && !ae->ae_comppermit) 3999 { 4000 if (xht != NULL) 4001 use_flags |= WF_NOCOMPAFT; 4002 else 4003 use_flags |= WF_NOCOMPBEF; 4004 } 4005 4006 /* Store the modified word. */ 4007 if (store_word(spin, newword, use_flags, 4008 spin->si_region, use_pfxlist, 4009 need_affix) == FAIL) 4010 retval = FAIL; 4011 4012 /* When added a prefix or a first suffix and the affix 4013 * has flags may add a(nother) suffix. RECURSIVE! */ 4014 if ((condit & CONDIT_SUF) && ae->ae_flags != NULL) 4015 if (store_aff_word(spin, newword, ae->ae_flags, 4016 affile, &affile->af_suff, xht, 4017 use_condit & (xht == NULL 4018 ? ~0 : ~CONDIT_SUF), 4019 use_flags, use_pfxlist, pfxlen) == FAIL) 4020 retval = FAIL; 4021 4022 /* When added a suffix and combining is allowed also 4023 * try adding a prefix additionally. Both for the 4024 * word flags and for the affix flags. RECURSIVE! */ 4025 if (xht != NULL && ah->ah_combine) 4026 { 4027 if (store_aff_word(spin, newword, 4028 afflist, affile, 4029 xht, NULL, use_condit, 4030 use_flags, use_pfxlist, 4031 pfxlen) == FAIL 4032 || (ae->ae_flags != NULL 4033 && store_aff_word(spin, newword, 4034 ae->ae_flags, affile, 4035 xht, NULL, use_condit, 4036 use_flags, use_pfxlist, 4037 pfxlen) == FAIL)) 4038 retval = FAIL; 4039 } 4040 } 4041 } 4042 } 4043 } 4044 } 4045 4046 return retval; 4047 } 4048 4049 /* 4050 * Read a file with a list of words. 4051 */ 4052 static int 4053 spell_read_wordfile(spellinfo_T *spin, char_u *fname) 4054 { 4055 FILE *fd; 4056 long lnum = 0; 4057 char_u rline[MAXLINELEN]; 4058 char_u *line; 4059 char_u *pc = NULL; 4060 char_u *p; 4061 int l; 4062 int retval = OK; 4063 int did_word = FALSE; 4064 int non_ascii = 0; 4065 int flags; 4066 int regionmask; 4067 4068 /* 4069 * Open the file. 4070 */ 4071 fd = mch_fopen((char *)fname, "r"); 4072 if (fd == NULL) 4073 { 4074 semsg(_(e_notopen), fname); 4075 return FAIL; 4076 } 4077 4078 vim_snprintf((char *)IObuff, IOSIZE, _("Reading word file %s..."), fname); 4079 spell_message(spin, IObuff); 4080 4081 /* 4082 * Read all the lines in the file one by one. 4083 */ 4084 while (!vim_fgets(rline, MAXLINELEN, fd) && !got_int) 4085 { 4086 line_breakcheck(); 4087 ++lnum; 4088 4089 /* Skip comment lines. */ 4090 if (*rline == '#') 4091 continue; 4092 4093 /* Remove CR, LF and white space from the end. */ 4094 l = (int)STRLEN(rline); 4095 while (l > 0 && rline[l - 1] <= ' ') 4096 --l; 4097 if (l == 0) 4098 continue; /* empty or blank line */ 4099 rline[l] = NUL; 4100 4101 /* Convert from "/encoding={encoding}" to 'encoding' when needed. */ 4102 vim_free(pc); 4103 if (spin->si_conv.vc_type != CONV_NONE) 4104 { 4105 pc = string_convert(&spin->si_conv, rline, NULL); 4106 if (pc == NULL) 4107 { 4108 smsg(_("Conversion failure for word in %s line %d: %s"), 4109 fname, lnum, rline); 4110 continue; 4111 } 4112 line = pc; 4113 } 4114 else 4115 { 4116 pc = NULL; 4117 line = rline; 4118 } 4119 4120 if (*line == '/') 4121 { 4122 ++line; 4123 if (STRNCMP(line, "encoding=", 9) == 0) 4124 { 4125 if (spin->si_conv.vc_type != CONV_NONE) 4126 smsg(_("Duplicate /encoding= line ignored in %s line %d: %s"), 4127 fname, lnum, line - 1); 4128 else if (did_word) 4129 smsg(_("/encoding= line after word ignored in %s line %d: %s"), 4130 fname, lnum, line - 1); 4131 else 4132 { 4133 char_u *enc; 4134 4135 /* Setup for conversion to 'encoding'. */ 4136 line += 9; 4137 enc = enc_canonize(line); 4138 if (enc != NULL && !spin->si_ascii 4139 && convert_setup(&spin->si_conv, enc, 4140 p_enc) == FAIL) 4141 smsg(_("Conversion in %s not supported: from %s to %s"), 4142 fname, line, p_enc); 4143 vim_free(enc); 4144 spin->si_conv.vc_fail = TRUE; 4145 } 4146 continue; 4147 } 4148 4149 if (STRNCMP(line, "regions=", 8) == 0) 4150 { 4151 if (spin->si_region_count > 1) 4152 smsg(_("Duplicate /regions= line ignored in %s line %d: %s"), 4153 fname, lnum, line); 4154 else 4155 { 4156 line += 8; 4157 if (STRLEN(line) > MAXREGIONS * 2) 4158 smsg(_("Too many regions in %s line %d: %s"), 4159 fname, lnum, line); 4160 else 4161 { 4162 spin->si_region_count = (int)STRLEN(line) / 2; 4163 STRCPY(spin->si_region_name, line); 4164 4165 /* Adjust the mask for a word valid in all regions. */ 4166 spin->si_region = (1 << spin->si_region_count) - 1; 4167 } 4168 } 4169 continue; 4170 } 4171 4172 smsg(_("/ line ignored in %s line %d: %s"), 4173 fname, lnum, line - 1); 4174 continue; 4175 } 4176 4177 flags = 0; 4178 regionmask = spin->si_region; 4179 4180 /* Check for flags and region after a slash. */ 4181 p = vim_strchr(line, '/'); 4182 if (p != NULL) 4183 { 4184 *p++ = NUL; 4185 while (*p != NUL) 4186 { 4187 if (*p == '=') /* keep-case word */ 4188 flags |= WF_KEEPCAP | WF_FIXCAP; 4189 else if (*p == '!') /* Bad, bad, wicked word. */ 4190 flags |= WF_BANNED; 4191 else if (*p == '?') /* Rare word. */ 4192 flags |= WF_RARE; 4193 else if (VIM_ISDIGIT(*p)) /* region number(s) */ 4194 { 4195 if ((flags & WF_REGION) == 0) /* first one */ 4196 regionmask = 0; 4197 flags |= WF_REGION; 4198 4199 l = *p - '0'; 4200 if (l == 0 || l > spin->si_region_count) 4201 { 4202 smsg(_("Invalid region nr in %s line %d: %s"), 4203 fname, lnum, p); 4204 break; 4205 } 4206 regionmask |= 1 << (l - 1); 4207 } 4208 else 4209 { 4210 smsg(_("Unrecognized flags in %s line %d: %s"), 4211 fname, lnum, p); 4212 break; 4213 } 4214 ++p; 4215 } 4216 } 4217 4218 /* Skip non-ASCII words when "spin->si_ascii" is TRUE. */ 4219 if (spin->si_ascii && has_non_ascii(line)) 4220 { 4221 ++non_ascii; 4222 continue; 4223 } 4224 4225 /* Normal word: store it. */ 4226 if (store_word(spin, line, flags, regionmask, NULL, FALSE) == FAIL) 4227 { 4228 retval = FAIL; 4229 break; 4230 } 4231 did_word = TRUE; 4232 } 4233 4234 vim_free(pc); 4235 fclose(fd); 4236 4237 if (spin->si_ascii && non_ascii > 0) 4238 { 4239 vim_snprintf((char *)IObuff, IOSIZE, 4240 _("Ignored %d words with non-ASCII characters"), non_ascii); 4241 spell_message(spin, IObuff); 4242 } 4243 4244 return retval; 4245 } 4246 4247 /* 4248 * Get part of an sblock_T, "len" bytes long. 4249 * This avoids calling free() for every little struct we use (and keeping 4250 * track of them). 4251 * The memory is cleared to all zeros. 4252 * Returns NULL when out of memory. 4253 */ 4254 static void * 4255 getroom( 4256 spellinfo_T *spin, 4257 size_t len, /* length needed */ 4258 int align) /* align for pointer */ 4259 { 4260 char_u *p; 4261 sblock_T *bl = spin->si_blocks; 4262 4263 if (align && bl != NULL) 4264 /* Round size up for alignment. On some systems structures need to be 4265 * aligned to the size of a pointer (e.g., SPARC). */ 4266 bl->sb_used = (bl->sb_used + sizeof(char *) - 1) 4267 & ~(sizeof(char *) - 1); 4268 4269 if (bl == NULL || bl->sb_used + len > SBLOCKSIZE) 4270 { 4271 if (len >= SBLOCKSIZE) 4272 bl = NULL; 4273 else 4274 /* Allocate a block of memory. It is not freed until much later. */ 4275 bl = (sblock_T *)alloc_clear( 4276 (unsigned)(sizeof(sblock_T) + SBLOCKSIZE)); 4277 if (bl == NULL) 4278 { 4279 if (!spin->si_did_emsg) 4280 { 4281 emsg(_("E845: Insufficient memory, word list will be incomplete")); 4282 spin->si_did_emsg = TRUE; 4283 } 4284 return NULL; 4285 } 4286 bl->sb_next = spin->si_blocks; 4287 spin->si_blocks = bl; 4288 bl->sb_used = 0; 4289 ++spin->si_blocks_cnt; 4290 } 4291 4292 p = bl->sb_data + bl->sb_used; 4293 bl->sb_used += (int)len; 4294 4295 return p; 4296 } 4297 4298 /* 4299 * Make a copy of a string into memory allocated with getroom(). 4300 * Returns NULL when out of memory. 4301 */ 4302 static char_u * 4303 getroom_save(spellinfo_T *spin, char_u *s) 4304 { 4305 char_u *sc; 4306 4307 sc = (char_u *)getroom(spin, STRLEN(s) + 1, FALSE); 4308 if (sc != NULL) 4309 STRCPY(sc, s); 4310 return sc; 4311 } 4312 4313 4314 /* 4315 * Free the list of allocated sblock_T. 4316 */ 4317 static void 4318 free_blocks(sblock_T *bl) 4319 { 4320 sblock_T *next; 4321 4322 while (bl != NULL) 4323 { 4324 next = bl->sb_next; 4325 vim_free(bl); 4326 bl = next; 4327 } 4328 } 4329 4330 /* 4331 * Allocate the root of a word tree. 4332 * Returns NULL when out of memory. 4333 */ 4334 static wordnode_T * 4335 wordtree_alloc(spellinfo_T *spin) 4336 { 4337 return (wordnode_T *)getroom(spin, sizeof(wordnode_T), TRUE); 4338 } 4339 4340 /* 4341 * Store a word in the tree(s). 4342 * Always store it in the case-folded tree. For a keep-case word this is 4343 * useful when the word can also be used with all caps (no WF_FIXCAP flag) and 4344 * used to find suggestions. 4345 * For a keep-case word also store it in the keep-case tree. 4346 * When "pfxlist" is not NULL store the word for each postponed prefix ID and 4347 * compound flag. 4348 */ 4349 static int 4350 store_word( 4351 spellinfo_T *spin, 4352 char_u *word, 4353 int flags, /* extra flags, WF_BANNED */ 4354 int region, /* supported region(s) */ 4355 char_u *pfxlist, /* list of prefix IDs or NULL */ 4356 int need_affix) /* only store word with affix ID */ 4357 { 4358 int len = (int)STRLEN(word); 4359 int ct = captype(word, word + len); 4360 char_u foldword[MAXWLEN]; 4361 int res = OK; 4362 char_u *p; 4363 4364 (void)spell_casefold(word, len, foldword, MAXWLEN); 4365 for (p = pfxlist; res == OK; ++p) 4366 { 4367 if (!need_affix || (p != NULL && *p != NUL)) 4368 res = tree_add_word(spin, foldword, spin->si_foldroot, ct | flags, 4369 region, p == NULL ? 0 : *p); 4370 if (p == NULL || *p == NUL) 4371 break; 4372 } 4373 ++spin->si_foldwcount; 4374 4375 if (res == OK && (ct == WF_KEEPCAP || (flags & WF_KEEPCAP))) 4376 { 4377 for (p = pfxlist; res == OK; ++p) 4378 { 4379 if (!need_affix || (p != NULL && *p != NUL)) 4380 res = tree_add_word(spin, word, spin->si_keeproot, flags, 4381 region, p == NULL ? 0 : *p); 4382 if (p == NULL || *p == NUL) 4383 break; 4384 } 4385 ++spin->si_keepwcount; 4386 } 4387 return res; 4388 } 4389 4390 /* 4391 * Add word "word" to a word tree at "root". 4392 * When "flags" < 0 we are adding to the prefix tree where "flags" is used for 4393 * "rare" and "region" is the condition nr. 4394 * Returns FAIL when out of memory. 4395 */ 4396 static int 4397 tree_add_word( 4398 spellinfo_T *spin, 4399 char_u *word, 4400 wordnode_T *root, 4401 int flags, 4402 int region, 4403 int affixID) 4404 { 4405 wordnode_T *node = root; 4406 wordnode_T *np; 4407 wordnode_T *copyp, **copyprev; 4408 wordnode_T **prev = NULL; 4409 int i; 4410 4411 /* Add each byte of the word to the tree, including the NUL at the end. */ 4412 for (i = 0; ; ++i) 4413 { 4414 /* When there is more than one reference to this node we need to make 4415 * a copy, so that we can modify it. Copy the whole list of siblings 4416 * (we don't optimize for a partly shared list of siblings). */ 4417 if (node != NULL && node->wn_refs > 1) 4418 { 4419 --node->wn_refs; 4420 copyprev = prev; 4421 for (copyp = node; copyp != NULL; copyp = copyp->wn_sibling) 4422 { 4423 /* Allocate a new node and copy the info. */ 4424 np = get_wordnode(spin); 4425 if (np == NULL) 4426 return FAIL; 4427 np->wn_child = copyp->wn_child; 4428 if (np->wn_child != NULL) 4429 ++np->wn_child->wn_refs; /* child gets extra ref */ 4430 np->wn_byte = copyp->wn_byte; 4431 if (np->wn_byte == NUL) 4432 { 4433 np->wn_flags = copyp->wn_flags; 4434 np->wn_region = copyp->wn_region; 4435 np->wn_affixID = copyp->wn_affixID; 4436 } 4437 4438 /* Link the new node in the list, there will be one ref. */ 4439 np->wn_refs = 1; 4440 if (copyprev != NULL) 4441 *copyprev = np; 4442 copyprev = &np->wn_sibling; 4443 4444 /* Let "node" point to the head of the copied list. */ 4445 if (copyp == node) 4446 node = np; 4447 } 4448 } 4449 4450 /* Look for the sibling that has the same character. They are sorted 4451 * on byte value, thus stop searching when a sibling is found with a 4452 * higher byte value. For zero bytes (end of word) the sorting is 4453 * done on flags and then on affixID. */ 4454 while (node != NULL 4455 && (node->wn_byte < word[i] 4456 || (node->wn_byte == NUL 4457 && (flags < 0 4458 ? node->wn_affixID < (unsigned)affixID 4459 : (node->wn_flags < (unsigned)(flags & WN_MASK) 4460 || (node->wn_flags == (flags & WN_MASK) 4461 && (spin->si_sugtree 4462 ? (node->wn_region & 0xffff) < region 4463 : node->wn_affixID 4464 < (unsigned)affixID))))))) 4465 { 4466 prev = &node->wn_sibling; 4467 node = *prev; 4468 } 4469 if (node == NULL 4470 || node->wn_byte != word[i] 4471 || (word[i] == NUL 4472 && (flags < 0 4473 || spin->si_sugtree 4474 || node->wn_flags != (flags & WN_MASK) 4475 || node->wn_affixID != affixID))) 4476 { 4477 /* Allocate a new node. */ 4478 np = get_wordnode(spin); 4479 if (np == NULL) 4480 return FAIL; 4481 np->wn_byte = word[i]; 4482 4483 /* If "node" is NULL this is a new child or the end of the sibling 4484 * list: ref count is one. Otherwise use ref count of sibling and 4485 * make ref count of sibling one (matters when inserting in front 4486 * of the list of siblings). */ 4487 if (node == NULL) 4488 np->wn_refs = 1; 4489 else 4490 { 4491 np->wn_refs = node->wn_refs; 4492 node->wn_refs = 1; 4493 } 4494 if (prev != NULL) 4495 *prev = np; 4496 np->wn_sibling = node; 4497 node = np; 4498 } 4499 4500 if (word[i] == NUL) 4501 { 4502 node->wn_flags = flags; 4503 node->wn_region |= region; 4504 node->wn_affixID = affixID; 4505 break; 4506 } 4507 prev = &node->wn_child; 4508 node = *prev; 4509 } 4510 #ifdef SPELL_PRINTTREE 4511 smsg("Added \"%s\"", word); 4512 spell_print_tree(root->wn_sibling); 4513 #endif 4514 4515 /* count nr of words added since last message */ 4516 ++spin->si_msg_count; 4517 4518 if (spin->si_compress_cnt > 1) 4519 { 4520 if (--spin->si_compress_cnt == 1) 4521 /* Did enough words to lower the block count limit. */ 4522 spin->si_blocks_cnt += compress_inc; 4523 } 4524 4525 /* 4526 * When we have allocated lots of memory we need to compress the word tree 4527 * to free up some room. But compression is slow, and we might actually 4528 * need that room, thus only compress in the following situations: 4529 * 1. When not compressed before (si_compress_cnt == 0): when using 4530 * "compress_start" blocks. 4531 * 2. When compressed before and used "compress_inc" blocks before 4532 * adding "compress_added" words (si_compress_cnt > 1). 4533 * 3. When compressed before, added "compress_added" words 4534 * (si_compress_cnt == 1) and the number of free nodes drops below the 4535 * maximum word length. 4536 */ 4537 #ifndef SPELL_COMPRESS_ALLWAYS 4538 if (spin->si_compress_cnt == 1 4539 ? spin->si_free_count < MAXWLEN 4540 : spin->si_blocks_cnt >= compress_start) 4541 #endif 4542 { 4543 /* Decrement the block counter. The effect is that we compress again 4544 * when the freed up room has been used and another "compress_inc" 4545 * blocks have been allocated. Unless "compress_added" words have 4546 * been added, then the limit is put back again. */ 4547 spin->si_blocks_cnt -= compress_inc; 4548 spin->si_compress_cnt = compress_added; 4549 4550 if (spin->si_verbose) 4551 { 4552 msg_start(); 4553 msg_puts(_(msg_compressing)); 4554 msg_clr_eos(); 4555 msg_didout = FALSE; 4556 msg_col = 0; 4557 out_flush(); 4558 } 4559 4560 /* Compress both trees. Either they both have many nodes, which makes 4561 * compression useful, or one of them is small, which means 4562 * compression goes fast. But when filling the soundfold word tree 4563 * there is no keep-case tree. */ 4564 wordtree_compress(spin, spin->si_foldroot); 4565 if (affixID >= 0) 4566 wordtree_compress(spin, spin->si_keeproot); 4567 } 4568 4569 return OK; 4570 } 4571 4572 /* 4573 * Get a wordnode_T, either from the list of previously freed nodes or 4574 * allocate a new one. 4575 * Returns NULL when out of memory. 4576 */ 4577 static wordnode_T * 4578 get_wordnode(spellinfo_T *spin) 4579 { 4580 wordnode_T *n; 4581 4582 if (spin->si_first_free == NULL) 4583 n = (wordnode_T *)getroom(spin, sizeof(wordnode_T), TRUE); 4584 else 4585 { 4586 n = spin->si_first_free; 4587 spin->si_first_free = n->wn_child; 4588 vim_memset(n, 0, sizeof(wordnode_T)); 4589 --spin->si_free_count; 4590 } 4591 #ifdef SPELL_PRINTTREE 4592 if (n != NULL) 4593 n->wn_nr = ++spin->si_wordnode_nr; 4594 #endif 4595 return n; 4596 } 4597 4598 /* 4599 * Decrement the reference count on a node (which is the head of a list of 4600 * siblings). If the reference count becomes zero free the node and its 4601 * siblings. 4602 * Returns the number of nodes actually freed. 4603 */ 4604 static int 4605 deref_wordnode(spellinfo_T *spin, wordnode_T *node) 4606 { 4607 wordnode_T *np; 4608 int cnt = 0; 4609 4610 if (--node->wn_refs == 0) 4611 { 4612 for (np = node; np != NULL; np = np->wn_sibling) 4613 { 4614 if (np->wn_child != NULL) 4615 cnt += deref_wordnode(spin, np->wn_child); 4616 free_wordnode(spin, np); 4617 ++cnt; 4618 } 4619 ++cnt; /* length field */ 4620 } 4621 return cnt; 4622 } 4623 4624 /* 4625 * Free a wordnode_T for re-use later. 4626 * Only the "wn_child" field becomes invalid. 4627 */ 4628 static void 4629 free_wordnode(spellinfo_T *spin, wordnode_T *n) 4630 { 4631 n->wn_child = spin->si_first_free; 4632 spin->si_first_free = n; 4633 ++spin->si_free_count; 4634 } 4635 4636 /* 4637 * Compress a tree: find tails that are identical and can be shared. 4638 */ 4639 static void 4640 wordtree_compress(spellinfo_T *spin, wordnode_T *root) 4641 { 4642 hashtab_T ht; 4643 int n; 4644 int tot = 0; 4645 int perc; 4646 4647 /* Skip the root itself, it's not actually used. The first sibling is the 4648 * start of the tree. */ 4649 if (root->wn_sibling != NULL) 4650 { 4651 hash_init(&ht); 4652 n = node_compress(spin, root->wn_sibling, &ht, &tot); 4653 4654 #ifndef SPELL_PRINTTREE 4655 if (spin->si_verbose || p_verbose > 2) 4656 #endif 4657 { 4658 if (tot > 1000000) 4659 perc = (tot - n) / (tot / 100); 4660 else if (tot == 0) 4661 perc = 0; 4662 else 4663 perc = (tot - n) * 100 / tot; 4664 vim_snprintf((char *)IObuff, IOSIZE, 4665 _("Compressed %d of %d nodes; %d (%d%%) remaining"), 4666 n, tot, tot - n, perc); 4667 spell_message(spin, IObuff); 4668 } 4669 #ifdef SPELL_PRINTTREE 4670 spell_print_tree(root->wn_sibling); 4671 #endif 4672 hash_clear(&ht); 4673 } 4674 } 4675 4676 /* 4677 * Compress a node, its siblings and its children, depth first. 4678 * Returns the number of compressed nodes. 4679 */ 4680 static int 4681 node_compress( 4682 spellinfo_T *spin, 4683 wordnode_T *node, 4684 hashtab_T *ht, 4685 int *tot) /* total count of nodes before compressing, 4686 incremented while going through the tree */ 4687 { 4688 wordnode_T *np; 4689 wordnode_T *tp; 4690 wordnode_T *child; 4691 hash_T hash; 4692 hashitem_T *hi; 4693 int len = 0; 4694 unsigned nr, n; 4695 int compressed = 0; 4696 4697 /* 4698 * Go through the list of siblings. Compress each child and then try 4699 * finding an identical child to replace it. 4700 * Note that with "child" we mean not just the node that is pointed to, 4701 * but the whole list of siblings of which the child node is the first. 4702 */ 4703 for (np = node; np != NULL && !got_int; np = np->wn_sibling) 4704 { 4705 ++len; 4706 if ((child = np->wn_child) != NULL) 4707 { 4708 /* Compress the child first. This fills hashkey. */ 4709 compressed += node_compress(spin, child, ht, tot); 4710 4711 /* Try to find an identical child. */ 4712 hash = hash_hash(child->wn_u1.hashkey); 4713 hi = hash_lookup(ht, child->wn_u1.hashkey, hash); 4714 if (!HASHITEM_EMPTY(hi)) 4715 { 4716 /* There are children we encountered before with a hash value 4717 * identical to the current child. Now check if there is one 4718 * that is really identical. */ 4719 for (tp = HI2WN(hi); tp != NULL; tp = tp->wn_u2.next) 4720 if (node_equal(child, tp)) 4721 { 4722 /* Found one! Now use that child in place of the 4723 * current one. This means the current child and all 4724 * its siblings is unlinked from the tree. */ 4725 ++tp->wn_refs; 4726 compressed += deref_wordnode(spin, child); 4727 np->wn_child = tp; 4728 break; 4729 } 4730 if (tp == NULL) 4731 { 4732 /* No other child with this hash value equals the child of 4733 * the node, add it to the linked list after the first 4734 * item. */ 4735 tp = HI2WN(hi); 4736 child->wn_u2.next = tp->wn_u2.next; 4737 tp->wn_u2.next = child; 4738 } 4739 } 4740 else 4741 /* No other child has this hash value, add it to the 4742 * hashtable. */ 4743 hash_add_item(ht, hi, child->wn_u1.hashkey, hash); 4744 } 4745 } 4746 *tot += len + 1; /* add one for the node that stores the length */ 4747 4748 /* 4749 * Make a hash key for the node and its siblings, so that we can quickly 4750 * find a lookalike node. This must be done after compressing the sibling 4751 * list, otherwise the hash key would become invalid by the compression. 4752 */ 4753 node->wn_u1.hashkey[0] = len; 4754 nr = 0; 4755 for (np = node; np != NULL; np = np->wn_sibling) 4756 { 4757 if (np->wn_byte == NUL) 4758 /* end node: use wn_flags, wn_region and wn_affixID */ 4759 n = np->wn_flags + (np->wn_region << 8) + (np->wn_affixID << 16); 4760 else 4761 /* byte node: use the byte value and the child pointer */ 4762 n = (unsigned)(np->wn_byte + ((long_u)np->wn_child << 8)); 4763 nr = nr * 101 + n; 4764 } 4765 4766 /* Avoid NUL bytes, it terminates the hash key. */ 4767 n = nr & 0xff; 4768 node->wn_u1.hashkey[1] = n == 0 ? 1 : n; 4769 n = (nr >> 8) & 0xff; 4770 node->wn_u1.hashkey[2] = n == 0 ? 1 : n; 4771 n = (nr >> 16) & 0xff; 4772 node->wn_u1.hashkey[3] = n == 0 ? 1 : n; 4773 n = (nr >> 24) & 0xff; 4774 node->wn_u1.hashkey[4] = n == 0 ? 1 : n; 4775 node->wn_u1.hashkey[5] = NUL; 4776 4777 /* Check for CTRL-C pressed now and then. */ 4778 fast_breakcheck(); 4779 4780 return compressed; 4781 } 4782 4783 /* 4784 * Return TRUE when two nodes have identical siblings and children. 4785 */ 4786 static int 4787 node_equal(wordnode_T *n1, wordnode_T *n2) 4788 { 4789 wordnode_T *p1; 4790 wordnode_T *p2; 4791 4792 for (p1 = n1, p2 = n2; p1 != NULL && p2 != NULL; 4793 p1 = p1->wn_sibling, p2 = p2->wn_sibling) 4794 if (p1->wn_byte != p2->wn_byte 4795 || (p1->wn_byte == NUL 4796 ? (p1->wn_flags != p2->wn_flags 4797 || p1->wn_region != p2->wn_region 4798 || p1->wn_affixID != p2->wn_affixID) 4799 : (p1->wn_child != p2->wn_child))) 4800 break; 4801 4802 return p1 == NULL && p2 == NULL; 4803 } 4804 4805 static int 4806 #ifdef __BORLANDC__ 4807 _RTLENTRYF 4808 #endif 4809 rep_compare(const void *s1, const void *s2); 4810 4811 /* 4812 * Function given to qsort() to sort the REP items on "from" string. 4813 */ 4814 static int 4815 #ifdef __BORLANDC__ 4816 _RTLENTRYF 4817 #endif 4818 rep_compare(const void *s1, const void *s2) 4819 { 4820 fromto_T *p1 = (fromto_T *)s1; 4821 fromto_T *p2 = (fromto_T *)s2; 4822 4823 return STRCMP(p1->ft_from, p2->ft_from); 4824 } 4825 4826 /* 4827 * Write the Vim .spl file "fname". 4828 * Return FAIL or OK; 4829 */ 4830 static int 4831 write_vim_spell(spellinfo_T *spin, char_u *fname) 4832 { 4833 FILE *fd; 4834 int regionmask; 4835 int round; 4836 wordnode_T *tree; 4837 int nodecount; 4838 int i; 4839 int l; 4840 garray_T *gap; 4841 fromto_T *ftp; 4842 char_u *p; 4843 int rr; 4844 int retval = OK; 4845 size_t fwv = 1; /* collect return value of fwrite() to avoid 4846 warnings from picky compiler */ 4847 4848 fd = mch_fopen((char *)fname, "w"); 4849 if (fd == NULL) 4850 { 4851 semsg(_(e_notopen), fname); 4852 return FAIL; 4853 } 4854 4855 /* <HEADER>: <fileID> <versionnr> */ 4856 /* <fileID> */ 4857 fwv &= fwrite(VIMSPELLMAGIC, VIMSPELLMAGICL, (size_t)1, fd); 4858 if (fwv != (size_t)1) 4859 /* Catch first write error, don't try writing more. */ 4860 goto theend; 4861 4862 putc(VIMSPELLVERSION, fd); /* <versionnr> */ 4863 4864 /* 4865 * <SECTIONS>: <section> ... <sectionend> 4866 */ 4867 4868 /* SN_INFO: <infotext> */ 4869 if (spin->si_info != NULL) 4870 { 4871 putc(SN_INFO, fd); /* <sectionID> */ 4872 putc(0, fd); /* <sectionflags> */ 4873 4874 i = (int)STRLEN(spin->si_info); 4875 put_bytes(fd, (long_u)i, 4); /* <sectionlen> */ 4876 fwv &= fwrite(spin->si_info, (size_t)i, (size_t)1, fd); /* <infotext> */ 4877 } 4878 4879 /* SN_REGION: <regionname> ... 4880 * Write the region names only if there is more than one. */ 4881 if (spin->si_region_count > 1) 4882 { 4883 putc(SN_REGION, fd); /* <sectionID> */ 4884 putc(SNF_REQUIRED, fd); /* <sectionflags> */ 4885 l = spin->si_region_count * 2; 4886 put_bytes(fd, (long_u)l, 4); /* <sectionlen> */ 4887 fwv &= fwrite(spin->si_region_name, (size_t)l, (size_t)1, fd); 4888 /* <regionname> ... */ 4889 regionmask = (1 << spin->si_region_count) - 1; 4890 } 4891 else 4892 regionmask = 0; 4893 4894 /* SN_CHARFLAGS: <charflagslen> <charflags> <folcharslen> <folchars> 4895 * 4896 * The table with character flags and the table for case folding. 4897 * This makes sure the same characters are recognized as word characters 4898 * when generating an when using a spell file. 4899 * Skip this for ASCII, the table may conflict with the one used for 4900 * 'encoding'. 4901 * Also skip this for an .add.spl file, the main spell file must contain 4902 * the table (avoids that it conflicts). File is shorter too. 4903 */ 4904 if (!spin->si_ascii && !spin->si_add) 4905 { 4906 char_u folchars[128 * 8]; 4907 int flags; 4908 4909 putc(SN_CHARFLAGS, fd); /* <sectionID> */ 4910 putc(SNF_REQUIRED, fd); /* <sectionflags> */ 4911 4912 /* Form the <folchars> string first, we need to know its length. */ 4913 l = 0; 4914 for (i = 128; i < 256; ++i) 4915 { 4916 if (has_mbyte) 4917 l += mb_char2bytes(spelltab.st_fold[i], folchars + l); 4918 else 4919 folchars[l++] = spelltab.st_fold[i]; 4920 } 4921 put_bytes(fd, (long_u)(1 + 128 + 2 + l), 4); /* <sectionlen> */ 4922 4923 fputc(128, fd); /* <charflagslen> */ 4924 for (i = 128; i < 256; ++i) 4925 { 4926 flags = 0; 4927 if (spelltab.st_isw[i]) 4928 flags |= CF_WORD; 4929 if (spelltab.st_isu[i]) 4930 flags |= CF_UPPER; 4931 fputc(flags, fd); /* <charflags> */ 4932 } 4933 4934 put_bytes(fd, (long_u)l, 2); /* <folcharslen> */ 4935 fwv &= fwrite(folchars, (size_t)l, (size_t)1, fd); /* <folchars> */ 4936 } 4937 4938 /* SN_MIDWORD: <midword> */ 4939 if (spin->si_midword != NULL) 4940 { 4941 putc(SN_MIDWORD, fd); /* <sectionID> */ 4942 putc(SNF_REQUIRED, fd); /* <sectionflags> */ 4943 4944 i = (int)STRLEN(spin->si_midword); 4945 put_bytes(fd, (long_u)i, 4); /* <sectionlen> */ 4946 fwv &= fwrite(spin->si_midword, (size_t)i, (size_t)1, fd); 4947 /* <midword> */ 4948 } 4949 4950 /* SN_PREFCOND: <prefcondcnt> <prefcond> ... */ 4951 if (spin->si_prefcond.ga_len > 0) 4952 { 4953 putc(SN_PREFCOND, fd); /* <sectionID> */ 4954 putc(SNF_REQUIRED, fd); /* <sectionflags> */ 4955 4956 l = write_spell_prefcond(NULL, &spin->si_prefcond); 4957 put_bytes(fd, (long_u)l, 4); /* <sectionlen> */ 4958 4959 write_spell_prefcond(fd, &spin->si_prefcond); 4960 } 4961 4962 /* SN_REP: <repcount> <rep> ... 4963 * SN_SAL: <salflags> <salcount> <sal> ... 4964 * SN_REPSAL: <repcount> <rep> ... */ 4965 4966 /* round 1: SN_REP section 4967 * round 2: SN_SAL section (unless SN_SOFO is used) 4968 * round 3: SN_REPSAL section */ 4969 for (round = 1; round <= 3; ++round) 4970 { 4971 if (round == 1) 4972 gap = &spin->si_rep; 4973 else if (round == 2) 4974 { 4975 /* Don't write SN_SAL when using a SN_SOFO section */ 4976 if (spin->si_sofofr != NULL && spin->si_sofoto != NULL) 4977 continue; 4978 gap = &spin->si_sal; 4979 } 4980 else 4981 gap = &spin->si_repsal; 4982 4983 /* Don't write the section if there are no items. */ 4984 if (gap->ga_len == 0) 4985 continue; 4986 4987 /* Sort the REP/REPSAL items. */ 4988 if (round != 2) 4989 qsort(gap->ga_data, (size_t)gap->ga_len, 4990 sizeof(fromto_T), rep_compare); 4991 4992 i = round == 1 ? SN_REP : (round == 2 ? SN_SAL : SN_REPSAL); 4993 putc(i, fd); /* <sectionID> */ 4994 4995 /* This is for making suggestions, section is not required. */ 4996 putc(0, fd); /* <sectionflags> */ 4997 4998 /* Compute the length of what follows. */ 4999 l = 2; /* count <repcount> or <salcount> */ 5000 for (i = 0; i < gap->ga_len; ++i) 5001 { 5002 ftp = &((fromto_T *)gap->ga_data)[i]; 5003 l += 1 + (int)STRLEN(ftp->ft_from); /* count <*fromlen> and <*from> */ 5004 l += 1 + (int)STRLEN(ftp->ft_to); /* count <*tolen> and <*to> */ 5005 } 5006 if (round == 2) 5007 ++l; /* count <salflags> */ 5008 put_bytes(fd, (long_u)l, 4); /* <sectionlen> */ 5009 5010 if (round == 2) 5011 { 5012 i = 0; 5013 if (spin->si_followup) 5014 i |= SAL_F0LLOWUP; 5015 if (spin->si_collapse) 5016 i |= SAL_COLLAPSE; 5017 if (spin->si_rem_accents) 5018 i |= SAL_REM_ACCENTS; 5019 putc(i, fd); /* <salflags> */ 5020 } 5021 5022 put_bytes(fd, (long_u)gap->ga_len, 2); /* <repcount> or <salcount> */ 5023 for (i = 0; i < gap->ga_len; ++i) 5024 { 5025 /* <rep> : <repfromlen> <repfrom> <reptolen> <repto> */ 5026 /* <sal> : <salfromlen> <salfrom> <saltolen> <salto> */ 5027 ftp = &((fromto_T *)gap->ga_data)[i]; 5028 for (rr = 1; rr <= 2; ++rr) 5029 { 5030 p = rr == 1 ? ftp->ft_from : ftp->ft_to; 5031 l = (int)STRLEN(p); 5032 putc(l, fd); 5033 if (l > 0) 5034 fwv &= fwrite(p, l, (size_t)1, fd); 5035 } 5036 } 5037 5038 } 5039 5040 /* SN_SOFO: <sofofromlen> <sofofrom> <sofotolen> <sofoto> 5041 * This is for making suggestions, section is not required. */ 5042 if (spin->si_sofofr != NULL && spin->si_sofoto != NULL) 5043 { 5044 putc(SN_SOFO, fd); /* <sectionID> */ 5045 putc(0, fd); /* <sectionflags> */ 5046 5047 l = (int)STRLEN(spin->si_sofofr); 5048 put_bytes(fd, (long_u)(l + STRLEN(spin->si_sofoto) + 4), 4); 5049 /* <sectionlen> */ 5050 5051 put_bytes(fd, (long_u)l, 2); /* <sofofromlen> */ 5052 fwv &= fwrite(spin->si_sofofr, l, (size_t)1, fd); /* <sofofrom> */ 5053 5054 l = (int)STRLEN(spin->si_sofoto); 5055 put_bytes(fd, (long_u)l, 2); /* <sofotolen> */ 5056 fwv &= fwrite(spin->si_sofoto, l, (size_t)1, fd); /* <sofoto> */ 5057 } 5058 5059 /* SN_WORDS: <word> ... 5060 * This is for making suggestions, section is not required. */ 5061 if (spin->si_commonwords.ht_used > 0) 5062 { 5063 putc(SN_WORDS, fd); /* <sectionID> */ 5064 putc(0, fd); /* <sectionflags> */ 5065 5066 /* round 1: count the bytes 5067 * round 2: write the bytes */ 5068 for (round = 1; round <= 2; ++round) 5069 { 5070 int todo; 5071 int len = 0; 5072 hashitem_T *hi; 5073 5074 todo = (int)spin->si_commonwords.ht_used; 5075 for (hi = spin->si_commonwords.ht_array; todo > 0; ++hi) 5076 if (!HASHITEM_EMPTY(hi)) 5077 { 5078 l = (int)STRLEN(hi->hi_key) + 1; 5079 len += l; 5080 if (round == 2) /* <word> */ 5081 fwv &= fwrite(hi->hi_key, (size_t)l, (size_t)1, fd); 5082 --todo; 5083 } 5084 if (round == 1) 5085 put_bytes(fd, (long_u)len, 4); /* <sectionlen> */ 5086 } 5087 } 5088 5089 /* SN_MAP: <mapstr> 5090 * This is for making suggestions, section is not required. */ 5091 if (spin->si_map.ga_len > 0) 5092 { 5093 putc(SN_MAP, fd); /* <sectionID> */ 5094 putc(0, fd); /* <sectionflags> */ 5095 l = spin->si_map.ga_len; 5096 put_bytes(fd, (long_u)l, 4); /* <sectionlen> */ 5097 fwv &= fwrite(spin->si_map.ga_data, (size_t)l, (size_t)1, fd); 5098 /* <mapstr> */ 5099 } 5100 5101 /* SN_SUGFILE: <timestamp> 5102 * This is used to notify that a .sug file may be available and at the 5103 * same time allows for checking that a .sug file that is found matches 5104 * with this .spl file. That's because the word numbers must be exactly 5105 * right. */ 5106 if (!spin->si_nosugfile 5107 && (spin->si_sal.ga_len > 0 5108 || (spin->si_sofofr != NULL && spin->si_sofoto != NULL))) 5109 { 5110 putc(SN_SUGFILE, fd); /* <sectionID> */ 5111 putc(0, fd); /* <sectionflags> */ 5112 put_bytes(fd, (long_u)8, 4); /* <sectionlen> */ 5113 5114 /* Set si_sugtime and write it to the file. */ 5115 spin->si_sugtime = time(NULL); 5116 put_time(fd, spin->si_sugtime); /* <timestamp> */ 5117 } 5118 5119 /* SN_NOSPLITSUGS: nothing 5120 * This is used to notify that no suggestions with word splits are to be 5121 * made. */ 5122 if (spin->si_nosplitsugs) 5123 { 5124 putc(SN_NOSPLITSUGS, fd); /* <sectionID> */ 5125 putc(0, fd); /* <sectionflags> */ 5126 put_bytes(fd, (long_u)0, 4); /* <sectionlen> */ 5127 } 5128 5129 /* SN_NOCOMPUNDSUGS: nothing 5130 * This is used to notify that no suggestions with compounds are to be 5131 * made. */ 5132 if (spin->si_nocompoundsugs) 5133 { 5134 putc(SN_NOCOMPOUNDSUGS, fd); /* <sectionID> */ 5135 putc(0, fd); /* <sectionflags> */ 5136 put_bytes(fd, (long_u)0, 4); /* <sectionlen> */ 5137 } 5138 5139 /* SN_COMPOUND: compound info. 5140 * We don't mark it required, when not supported all compound words will 5141 * be bad words. */ 5142 if (spin->si_compflags != NULL) 5143 { 5144 putc(SN_COMPOUND, fd); /* <sectionID> */ 5145 putc(0, fd); /* <sectionflags> */ 5146 5147 l = (int)STRLEN(spin->si_compflags); 5148 for (i = 0; i < spin->si_comppat.ga_len; ++i) 5149 l += (int)STRLEN(((char_u **)(spin->si_comppat.ga_data))[i]) + 1; 5150 put_bytes(fd, (long_u)(l + 7), 4); /* <sectionlen> */ 5151 5152 putc(spin->si_compmax, fd); /* <compmax> */ 5153 putc(spin->si_compminlen, fd); /* <compminlen> */ 5154 putc(spin->si_compsylmax, fd); /* <compsylmax> */ 5155 putc(0, fd); /* for Vim 7.0b compatibility */ 5156 putc(spin->si_compoptions, fd); /* <compoptions> */ 5157 put_bytes(fd, (long_u)spin->si_comppat.ga_len, 2); 5158 /* <comppatcount> */ 5159 for (i = 0; i < spin->si_comppat.ga_len; ++i) 5160 { 5161 p = ((char_u **)(spin->si_comppat.ga_data))[i]; 5162 putc((int)STRLEN(p), fd); /* <comppatlen> */ 5163 fwv &= fwrite(p, (size_t)STRLEN(p), (size_t)1, fd); 5164 /* <comppattext> */ 5165 } 5166 /* <compflags> */ 5167 fwv &= fwrite(spin->si_compflags, (size_t)STRLEN(spin->si_compflags), 5168 (size_t)1, fd); 5169 } 5170 5171 /* SN_NOBREAK: NOBREAK flag */ 5172 if (spin->si_nobreak) 5173 { 5174 putc(SN_NOBREAK, fd); /* <sectionID> */ 5175 putc(0, fd); /* <sectionflags> */ 5176 5177 /* It's empty, the presence of the section flags the feature. */ 5178 put_bytes(fd, (long_u)0, 4); /* <sectionlen> */ 5179 } 5180 5181 /* SN_SYLLABLE: syllable info. 5182 * We don't mark it required, when not supported syllables will not be 5183 * counted. */ 5184 if (spin->si_syllable != NULL) 5185 { 5186 putc(SN_SYLLABLE, fd); /* <sectionID> */ 5187 putc(0, fd); /* <sectionflags> */ 5188 5189 l = (int)STRLEN(spin->si_syllable); 5190 put_bytes(fd, (long_u)l, 4); /* <sectionlen> */ 5191 fwv &= fwrite(spin->si_syllable, (size_t)l, (size_t)1, fd); 5192 /* <syllable> */ 5193 } 5194 5195 /* end of <SECTIONS> */ 5196 putc(SN_END, fd); /* <sectionend> */ 5197 5198 5199 /* 5200 * <LWORDTREE> <KWORDTREE> <PREFIXTREE> 5201 */ 5202 spin->si_memtot = 0; 5203 for (round = 1; round <= 3; ++round) 5204 { 5205 if (round == 1) 5206 tree = spin->si_foldroot->wn_sibling; 5207 else if (round == 2) 5208 tree = spin->si_keeproot->wn_sibling; 5209 else 5210 tree = spin->si_prefroot->wn_sibling; 5211 5212 /* Clear the index and wnode fields in the tree. */ 5213 clear_node(tree); 5214 5215 /* Count the number of nodes. Needed to be able to allocate the 5216 * memory when reading the nodes. Also fills in index for shared 5217 * nodes. */ 5218 nodecount = put_node(NULL, tree, 0, regionmask, round == 3); 5219 5220 /* number of nodes in 4 bytes */ 5221 put_bytes(fd, (long_u)nodecount, 4); /* <nodecount> */ 5222 spin->si_memtot += nodecount + nodecount * sizeof(int); 5223 5224 /* Write the nodes. */ 5225 (void)put_node(fd, tree, 0, regionmask, round == 3); 5226 } 5227 5228 /* Write another byte to check for errors (file system full). */ 5229 if (putc(0, fd) == EOF) 5230 retval = FAIL; 5231 theend: 5232 if (fclose(fd) == EOF) 5233 retval = FAIL; 5234 5235 if (fwv != (size_t)1) 5236 retval = FAIL; 5237 if (retval == FAIL) 5238 emsg(_(e_write)); 5239 5240 return retval; 5241 } 5242 5243 /* 5244 * Clear the index and wnode fields of "node", it siblings and its 5245 * children. This is needed because they are a union with other items to save 5246 * space. 5247 */ 5248 static void 5249 clear_node(wordnode_T *node) 5250 { 5251 wordnode_T *np; 5252 5253 if (node != NULL) 5254 for (np = node; np != NULL; np = np->wn_sibling) 5255 { 5256 np->wn_u1.index = 0; 5257 np->wn_u2.wnode = NULL; 5258 5259 if (np->wn_byte != NUL) 5260 clear_node(np->wn_child); 5261 } 5262 } 5263 5264 5265 /* 5266 * Dump a word tree at node "node". 5267 * 5268 * This first writes the list of possible bytes (siblings). Then for each 5269 * byte recursively write the children. 5270 * 5271 * NOTE: The code here must match the code in read_tree_node(), since 5272 * assumptions are made about the indexes (so that we don't have to write them 5273 * in the file). 5274 * 5275 * Returns the number of nodes used. 5276 */ 5277 static int 5278 put_node( 5279 FILE *fd, /* NULL when only counting */ 5280 wordnode_T *node, 5281 int idx, 5282 int regionmask, 5283 int prefixtree) /* TRUE for PREFIXTREE */ 5284 { 5285 int newindex = idx; 5286 int siblingcount = 0; 5287 wordnode_T *np; 5288 int flags; 5289 5290 /* If "node" is zero the tree is empty. */ 5291 if (node == NULL) 5292 return 0; 5293 5294 /* Store the index where this node is written. */ 5295 node->wn_u1.index = idx; 5296 5297 /* Count the number of siblings. */ 5298 for (np = node; np != NULL; np = np->wn_sibling) 5299 ++siblingcount; 5300 5301 /* Write the sibling count. */ 5302 if (fd != NULL) 5303 putc(siblingcount, fd); /* <siblingcount> */ 5304 5305 /* Write each sibling byte and optionally extra info. */ 5306 for (np = node; np != NULL; np = np->wn_sibling) 5307 { 5308 if (np->wn_byte == 0) 5309 { 5310 if (fd != NULL) 5311 { 5312 /* For a NUL byte (end of word) write the flags etc. */ 5313 if (prefixtree) 5314 { 5315 /* In PREFIXTREE write the required affixID and the 5316 * associated condition nr (stored in wn_region). The 5317 * byte value is misused to store the "rare" and "not 5318 * combining" flags */ 5319 if (np->wn_flags == (short_u)PFX_FLAGS) 5320 putc(BY_NOFLAGS, fd); /* <byte> */ 5321 else 5322 { 5323 putc(BY_FLAGS, fd); /* <byte> */ 5324 putc(np->wn_flags, fd); /* <pflags> */ 5325 } 5326 putc(np->wn_affixID, fd); /* <affixID> */ 5327 put_bytes(fd, (long_u)np->wn_region, 2); /* <prefcondnr> */ 5328 } 5329 else 5330 { 5331 /* For word trees we write the flag/region items. */ 5332 flags = np->wn_flags; 5333 if (regionmask != 0 && np->wn_region != regionmask) 5334 flags |= WF_REGION; 5335 if (np->wn_affixID != 0) 5336 flags |= WF_AFX; 5337 if (flags == 0) 5338 { 5339 /* word without flags or region */ 5340 putc(BY_NOFLAGS, fd); /* <byte> */ 5341 } 5342 else 5343 { 5344 if (np->wn_flags >= 0x100) 5345 { 5346 putc(BY_FLAGS2, fd); /* <byte> */ 5347 putc(flags, fd); /* <flags> */ 5348 putc((unsigned)flags >> 8, fd); /* <flags2> */ 5349 } 5350 else 5351 { 5352 putc(BY_FLAGS, fd); /* <byte> */ 5353 putc(flags, fd); /* <flags> */ 5354 } 5355 if (flags & WF_REGION) 5356 putc(np->wn_region, fd); /* <region> */ 5357 if (flags & WF_AFX) 5358 putc(np->wn_affixID, fd); /* <affixID> */ 5359 } 5360 } 5361 } 5362 } 5363 else 5364 { 5365 if (np->wn_child->wn_u1.index != 0 5366 && np->wn_child->wn_u2.wnode != node) 5367 { 5368 /* The child is written elsewhere, write the reference. */ 5369 if (fd != NULL) 5370 { 5371 putc(BY_INDEX, fd); /* <byte> */ 5372 /* <nodeidx> */ 5373 put_bytes(fd, (long_u)np->wn_child->wn_u1.index, 3); 5374 } 5375 } 5376 else if (np->wn_child->wn_u2.wnode == NULL) 5377 /* We will write the child below and give it an index. */ 5378 np->wn_child->wn_u2.wnode = node; 5379 5380 if (fd != NULL) 5381 if (putc(np->wn_byte, fd) == EOF) /* <byte> or <xbyte> */ 5382 { 5383 emsg(_(e_write)); 5384 return 0; 5385 } 5386 } 5387 } 5388 5389 /* Space used in the array when reading: one for each sibling and one for 5390 * the count. */ 5391 newindex += siblingcount + 1; 5392 5393 /* Recursively dump the children of each sibling. */ 5394 for (np = node; np != NULL; np = np->wn_sibling) 5395 if (np->wn_byte != 0 && np->wn_child->wn_u2.wnode == node) 5396 newindex = put_node(fd, np->wn_child, newindex, regionmask, 5397 prefixtree); 5398 5399 return newindex; 5400 } 5401 5402 5403 /* 5404 * ":mkspell [-ascii] outfile infile ..." 5405 * ":mkspell [-ascii] addfile" 5406 */ 5407 void 5408 ex_mkspell(exarg_T *eap) 5409 { 5410 int fcount; 5411 char_u **fnames; 5412 char_u *arg = eap->arg; 5413 int ascii = FALSE; 5414 5415 if (STRNCMP(arg, "-ascii", 6) == 0) 5416 { 5417 ascii = TRUE; 5418 arg = skipwhite(arg + 6); 5419 } 5420 5421 /* Expand all the remaining arguments (e.g., $VIMRUNTIME). */ 5422 if (get_arglist_exp(arg, &fcount, &fnames, FALSE) == OK) 5423 { 5424 mkspell(fcount, fnames, ascii, eap->forceit, FALSE); 5425 FreeWild(fcount, fnames); 5426 } 5427 } 5428 5429 /* 5430 * Create the .sug file. 5431 * Uses the soundfold info in "spin". 5432 * Writes the file with the name "wfname", with ".spl" changed to ".sug". 5433 */ 5434 static void 5435 spell_make_sugfile(spellinfo_T *spin, char_u *wfname) 5436 { 5437 char_u *fname = NULL; 5438 int len; 5439 slang_T *slang; 5440 int free_slang = FALSE; 5441 5442 /* 5443 * Read back the .spl file that was written. This fills the required 5444 * info for soundfolding. This also uses less memory than the 5445 * pointer-linked version of the trie. And it avoids having two versions 5446 * of the code for the soundfolding stuff. 5447 * It might have been done already by spell_reload_one(). 5448 */ 5449 for (slang = first_lang; slang != NULL; slang = slang->sl_next) 5450 if (fullpathcmp(wfname, slang->sl_fname, FALSE) == FPC_SAME) 5451 break; 5452 if (slang == NULL) 5453 { 5454 spell_message(spin, (char_u *)_("Reading back spell file...")); 5455 slang = spell_load_file(wfname, NULL, NULL, FALSE); 5456 if (slang == NULL) 5457 return; 5458 free_slang = TRUE; 5459 } 5460 5461 /* 5462 * Clear the info in "spin" that is used. 5463 */ 5464 spin->si_blocks = NULL; 5465 spin->si_blocks_cnt = 0; 5466 spin->si_compress_cnt = 0; /* will stay at 0 all the time*/ 5467 spin->si_free_count = 0; 5468 spin->si_first_free = NULL; 5469 spin->si_foldwcount = 0; 5470 5471 /* 5472 * Go through the trie of good words, soundfold each word and add it to 5473 * the soundfold trie. 5474 */ 5475 spell_message(spin, (char_u *)_("Performing soundfolding...")); 5476 if (sug_filltree(spin, slang) == FAIL) 5477 goto theend; 5478 5479 /* 5480 * Create the table which links each soundfold word with a list of the 5481 * good words it may come from. Creates buffer "spin->si_spellbuf". 5482 * This also removes the wordnr from the NUL byte entries to make 5483 * compression possible. 5484 */ 5485 if (sug_maketable(spin) == FAIL) 5486 goto theend; 5487 5488 smsg(_("Number of words after soundfolding: %ld"), 5489 (long)spin->si_spellbuf->b_ml.ml_line_count); 5490 5491 /* 5492 * Compress the soundfold trie. 5493 */ 5494 spell_message(spin, (char_u *)_(msg_compressing)); 5495 wordtree_compress(spin, spin->si_foldroot); 5496 5497 /* 5498 * Write the .sug file. 5499 * Make the file name by changing ".spl" to ".sug". 5500 */ 5501 fname = alloc(MAXPATHL); 5502 if (fname == NULL) 5503 goto theend; 5504 vim_strncpy(fname, wfname, MAXPATHL - 1); 5505 len = (int)STRLEN(fname); 5506 fname[len - 2] = 'u'; 5507 fname[len - 1] = 'g'; 5508 sug_write(spin, fname); 5509 5510 theend: 5511 vim_free(fname); 5512 if (free_slang) 5513 slang_free(slang); 5514 free_blocks(spin->si_blocks); 5515 close_spellbuf(spin->si_spellbuf); 5516 } 5517 5518 /* 5519 * Build the soundfold trie for language "slang". 5520 */ 5521 static int 5522 sug_filltree(spellinfo_T *spin, slang_T *slang) 5523 { 5524 char_u *byts; 5525 idx_T *idxs; 5526 int depth; 5527 idx_T arridx[MAXWLEN]; 5528 int curi[MAXWLEN]; 5529 char_u tword[MAXWLEN]; 5530 char_u tsalword[MAXWLEN]; 5531 int c; 5532 idx_T n; 5533 unsigned words_done = 0; 5534 int wordcount[MAXWLEN]; 5535 5536 /* We use si_foldroot for the soundfolded trie. */ 5537 spin->si_foldroot = wordtree_alloc(spin); 5538 if (spin->si_foldroot == NULL) 5539 return FAIL; 5540 5541 /* let tree_add_word() know we're adding to the soundfolded tree */ 5542 spin->si_sugtree = TRUE; 5543 5544 /* 5545 * Go through the whole case-folded tree, soundfold each word and put it 5546 * in the trie. 5547 */ 5548 byts = slang->sl_fbyts; 5549 idxs = slang->sl_fidxs; 5550 5551 arridx[0] = 0; 5552 curi[0] = 1; 5553 wordcount[0] = 0; 5554 5555 depth = 0; 5556 while (depth >= 0 && !got_int) 5557 { 5558 if (curi[depth] > byts[arridx[depth]]) 5559 { 5560 /* Done all bytes at this node, go up one level. */ 5561 idxs[arridx[depth]] = wordcount[depth]; 5562 if (depth > 0) 5563 wordcount[depth - 1] += wordcount[depth]; 5564 5565 --depth; 5566 line_breakcheck(); 5567 } 5568 else 5569 { 5570 5571 /* Do one more byte at this node. */ 5572 n = arridx[depth] + curi[depth]; 5573 ++curi[depth]; 5574 5575 c = byts[n]; 5576 if (c == 0) 5577 { 5578 /* Sound-fold the word. */ 5579 tword[depth] = NUL; 5580 spell_soundfold(slang, tword, TRUE, tsalword); 5581 5582 /* We use the "flags" field for the MSB of the wordnr, 5583 * "region" for the LSB of the wordnr. */ 5584 if (tree_add_word(spin, tsalword, spin->si_foldroot, 5585 words_done >> 16, words_done & 0xffff, 5586 0) == FAIL) 5587 return FAIL; 5588 5589 ++words_done; 5590 ++wordcount[depth]; 5591 5592 /* Reset the block count each time to avoid compression 5593 * kicking in. */ 5594 spin->si_blocks_cnt = 0; 5595 5596 /* Skip over any other NUL bytes (same word with different 5597 * flags). */ 5598 while (byts[n + 1] == 0) 5599 { 5600 ++n; 5601 ++curi[depth]; 5602 } 5603 } 5604 else 5605 { 5606 /* Normal char, go one level deeper. */ 5607 tword[depth++] = c; 5608 arridx[depth] = idxs[n]; 5609 curi[depth] = 1; 5610 wordcount[depth] = 0; 5611 } 5612 } 5613 } 5614 5615 smsg(_("Total number of words: %d"), words_done); 5616 5617 return OK; 5618 } 5619 5620 /* 5621 * Make the table that links each word in the soundfold trie to the words it 5622 * can be produced from. 5623 * This is not unlike lines in a file, thus use a memfile to be able to access 5624 * the table efficiently. 5625 * Returns FAIL when out of memory. 5626 */ 5627 static int 5628 sug_maketable(spellinfo_T *spin) 5629 { 5630 garray_T ga; 5631 int res = OK; 5632 5633 /* Allocate a buffer, open a memline for it and create the swap file 5634 * (uses a temp file, not a .swp file). */ 5635 spin->si_spellbuf = open_spellbuf(); 5636 if (spin->si_spellbuf == NULL) 5637 return FAIL; 5638 5639 /* Use a buffer to store the line info, avoids allocating many small 5640 * pieces of memory. */ 5641 ga_init2(&ga, 1, 100); 5642 5643 /* recursively go through the tree */ 5644 if (sug_filltable(spin, spin->si_foldroot->wn_sibling, 0, &ga) == -1) 5645 res = FAIL; 5646 5647 ga_clear(&ga); 5648 return res; 5649 } 5650 5651 /* 5652 * Fill the table for one node and its children. 5653 * Returns the wordnr at the start of the node. 5654 * Returns -1 when out of memory. 5655 */ 5656 static int 5657 sug_filltable( 5658 spellinfo_T *spin, 5659 wordnode_T *node, 5660 int startwordnr, 5661 garray_T *gap) /* place to store line of numbers */ 5662 { 5663 wordnode_T *p, *np; 5664 int wordnr = startwordnr; 5665 int nr; 5666 int prev_nr; 5667 5668 for (p = node; p != NULL; p = p->wn_sibling) 5669 { 5670 if (p->wn_byte == NUL) 5671 { 5672 gap->ga_len = 0; 5673 prev_nr = 0; 5674 for (np = p; np != NULL && np->wn_byte == NUL; np = np->wn_sibling) 5675 { 5676 if (ga_grow(gap, 10) == FAIL) 5677 return -1; 5678 5679 nr = (np->wn_flags << 16) + (np->wn_region & 0xffff); 5680 /* Compute the offset from the previous nr and store the 5681 * offset in a way that it takes a minimum number of bytes. 5682 * It's a bit like utf-8, but without the need to mark 5683 * following bytes. */ 5684 nr -= prev_nr; 5685 prev_nr += nr; 5686 gap->ga_len += offset2bytes(nr, 5687 (char_u *)gap->ga_data + gap->ga_len); 5688 } 5689 5690 /* add the NUL byte */ 5691 ((char_u *)gap->ga_data)[gap->ga_len++] = NUL; 5692 5693 if (ml_append_buf(spin->si_spellbuf, (linenr_T)wordnr, 5694 gap->ga_data, gap->ga_len, TRUE) == FAIL) 5695 return -1; 5696 ++wordnr; 5697 5698 /* Remove extra NUL entries, we no longer need them. We don't 5699 * bother freeing the nodes, the won't be reused anyway. */ 5700 while (p->wn_sibling != NULL && p->wn_sibling->wn_byte == NUL) 5701 p->wn_sibling = p->wn_sibling->wn_sibling; 5702 5703 /* Clear the flags on the remaining NUL node, so that compression 5704 * works a lot better. */ 5705 p->wn_flags = 0; 5706 p->wn_region = 0; 5707 } 5708 else 5709 { 5710 wordnr = sug_filltable(spin, p->wn_child, wordnr, gap); 5711 if (wordnr == -1) 5712 return -1; 5713 } 5714 } 5715 return wordnr; 5716 } 5717 5718 /* 5719 * Convert an offset into a minimal number of bytes. 5720 * Similar to utf_char2byters, but use 8 bits in followup bytes and avoid NUL 5721 * bytes. 5722 */ 5723 static int 5724 offset2bytes(int nr, char_u *buf) 5725 { 5726 int rem; 5727 int b1, b2, b3, b4; 5728 5729 /* Split the number in parts of base 255. We need to avoid NUL bytes. */ 5730 b1 = nr % 255 + 1; 5731 rem = nr / 255; 5732 b2 = rem % 255 + 1; 5733 rem = rem / 255; 5734 b3 = rem % 255 + 1; 5735 b4 = rem / 255 + 1; 5736 5737 if (b4 > 1 || b3 > 0x1f) /* 4 bytes */ 5738 { 5739 buf[0] = 0xe0 + b4; 5740 buf[1] = b3; 5741 buf[2] = b2; 5742 buf[3] = b1; 5743 return 4; 5744 } 5745 if (b3 > 1 || b2 > 0x3f ) /* 3 bytes */ 5746 { 5747 buf[0] = 0xc0 + b3; 5748 buf[1] = b2; 5749 buf[2] = b1; 5750 return 3; 5751 } 5752 if (b2 > 1 || b1 > 0x7f ) /* 2 bytes */ 5753 { 5754 buf[0] = 0x80 + b2; 5755 buf[1] = b1; 5756 return 2; 5757 } 5758 /* 1 byte */ 5759 buf[0] = b1; 5760 return 1; 5761 } 5762 5763 /* 5764 * Write the .sug file in "fname". 5765 */ 5766 static void 5767 sug_write(spellinfo_T *spin, char_u *fname) 5768 { 5769 FILE *fd; 5770 wordnode_T *tree; 5771 int nodecount; 5772 int wcount; 5773 char_u *line; 5774 linenr_T lnum; 5775 int len; 5776 5777 /* Create the file. Note that an existing file is silently overwritten! */ 5778 fd = mch_fopen((char *)fname, "w"); 5779 if (fd == NULL) 5780 { 5781 semsg(_(e_notopen), fname); 5782 return; 5783 } 5784 5785 vim_snprintf((char *)IObuff, IOSIZE, 5786 _("Writing suggestion file %s..."), fname); 5787 spell_message(spin, IObuff); 5788 5789 /* 5790 * <SUGHEADER>: <fileID> <versionnr> <timestamp> 5791 */ 5792 if (fwrite(VIMSUGMAGIC, VIMSUGMAGICL, (size_t)1, fd) != 1) /* <fileID> */ 5793 { 5794 emsg(_(e_write)); 5795 goto theend; 5796 } 5797 putc(VIMSUGVERSION, fd); /* <versionnr> */ 5798 5799 /* Write si_sugtime to the file. */ 5800 put_time(fd, spin->si_sugtime); /* <timestamp> */ 5801 5802 /* 5803 * <SUGWORDTREE> 5804 */ 5805 spin->si_memtot = 0; 5806 tree = spin->si_foldroot->wn_sibling; 5807 5808 /* Clear the index and wnode fields in the tree. */ 5809 clear_node(tree); 5810 5811 /* Count the number of nodes. Needed to be able to allocate the 5812 * memory when reading the nodes. Also fills in index for shared 5813 * nodes. */ 5814 nodecount = put_node(NULL, tree, 0, 0, FALSE); 5815 5816 /* number of nodes in 4 bytes */ 5817 put_bytes(fd, (long_u)nodecount, 4); /* <nodecount> */ 5818 spin->si_memtot += nodecount + nodecount * sizeof(int); 5819 5820 /* Write the nodes. */ 5821 (void)put_node(fd, tree, 0, 0, FALSE); 5822 5823 /* 5824 * <SUGTABLE>: <sugwcount> <sugline> ... 5825 */ 5826 wcount = spin->si_spellbuf->b_ml.ml_line_count; 5827 put_bytes(fd, (long_u)wcount, 4); /* <sugwcount> */ 5828 5829 for (lnum = 1; lnum <= (linenr_T)wcount; ++lnum) 5830 { 5831 /* <sugline>: <sugnr> ... NUL */ 5832 line = ml_get_buf(spin->si_spellbuf, lnum, FALSE); 5833 len = (int)STRLEN(line) + 1; 5834 if (fwrite(line, (size_t)len, (size_t)1, fd) == 0) 5835 { 5836 emsg(_(e_write)); 5837 goto theend; 5838 } 5839 spin->si_memtot += len; 5840 } 5841 5842 /* Write another byte to check for errors. */ 5843 if (putc(0, fd) == EOF) 5844 emsg(_(e_write)); 5845 5846 vim_snprintf((char *)IObuff, IOSIZE, 5847 _("Estimated runtime memory use: %d bytes"), spin->si_memtot); 5848 spell_message(spin, IObuff); 5849 5850 theend: 5851 /* close the file */ 5852 fclose(fd); 5853 } 5854 5855 5856 /* 5857 * Create a Vim spell file from one or more word lists. 5858 * "fnames[0]" is the output file name. 5859 * "fnames[fcount - 1]" is the last input file name. 5860 * Exception: when "fnames[0]" ends in ".add" it's used as the input file name 5861 * and ".spl" is appended to make the output file name. 5862 */ 5863 void 5864 mkspell( 5865 int fcount, 5866 char_u **fnames, 5867 int ascii, /* -ascii argument given */ 5868 int over_write, /* overwrite existing output file */ 5869 int added_word) /* invoked through "zg" */ 5870 { 5871 char_u *fname = NULL; 5872 char_u *wfname; 5873 char_u **innames; 5874 int incount; 5875 afffile_T *(afile[MAXREGIONS]); 5876 int i; 5877 int len; 5878 stat_T st; 5879 int error = FALSE; 5880 spellinfo_T spin; 5881 5882 vim_memset(&spin, 0, sizeof(spin)); 5883 spin.si_verbose = !added_word; 5884 spin.si_ascii = ascii; 5885 spin.si_followup = TRUE; 5886 spin.si_rem_accents = TRUE; 5887 ga_init2(&spin.si_rep, (int)sizeof(fromto_T), 20); 5888 ga_init2(&spin.si_repsal, (int)sizeof(fromto_T), 20); 5889 ga_init2(&spin.si_sal, (int)sizeof(fromto_T), 20); 5890 ga_init2(&spin.si_map, (int)sizeof(char_u), 100); 5891 ga_init2(&spin.si_comppat, (int)sizeof(char_u *), 20); 5892 ga_init2(&spin.si_prefcond, (int)sizeof(char_u *), 50); 5893 hash_init(&spin.si_commonwords); 5894 spin.si_newcompID = 127; /* start compound ID at first maximum */ 5895 5896 /* default: fnames[0] is output file, following are input files */ 5897 innames = &fnames[1]; 5898 incount = fcount - 1; 5899 5900 wfname = alloc(MAXPATHL); 5901 if (wfname == NULL) 5902 return; 5903 5904 if (fcount >= 1) 5905 { 5906 len = (int)STRLEN(fnames[0]); 5907 if (fcount == 1 && len > 4 && STRCMP(fnames[0] + len - 4, ".add") == 0) 5908 { 5909 /* For ":mkspell path/en.latin1.add" output file is 5910 * "path/en.latin1.add.spl". */ 5911 innames = &fnames[0]; 5912 incount = 1; 5913 vim_snprintf((char *)wfname, MAXPATHL, "%s.spl", fnames[0]); 5914 } 5915 else if (fcount == 1) 5916 { 5917 /* For ":mkspell path/vim" output file is "path/vim.latin1.spl". */ 5918 innames = &fnames[0]; 5919 incount = 1; 5920 vim_snprintf((char *)wfname, MAXPATHL, SPL_FNAME_TMPL, 5921 fnames[0], spin.si_ascii ? (char_u *)"ascii" : spell_enc()); 5922 } 5923 else if (len > 4 && STRCMP(fnames[0] + len - 4, ".spl") == 0) 5924 { 5925 /* Name ends in ".spl", use as the file name. */ 5926 vim_strncpy(wfname, fnames[0], MAXPATHL - 1); 5927 } 5928 else 5929 /* Name should be language, make the file name from it. */ 5930 vim_snprintf((char *)wfname, MAXPATHL, SPL_FNAME_TMPL, 5931 fnames[0], spin.si_ascii ? (char_u *)"ascii" : spell_enc()); 5932 5933 /* Check for .ascii.spl. */ 5934 if (strstr((char *)gettail(wfname), SPL_FNAME_ASCII) != NULL) 5935 spin.si_ascii = TRUE; 5936 5937 /* Check for .add.spl. */ 5938 if (strstr((char *)gettail(wfname), SPL_FNAME_ADD) != NULL) 5939 spin.si_add = TRUE; 5940 } 5941 5942 if (incount <= 0) 5943 emsg(_(e_invarg)); /* need at least output and input names */ 5944 else if (vim_strchr(gettail(wfname), '_') != NULL) 5945 emsg(_("E751: Output file name must not have region name")); 5946 else if (incount > MAXREGIONS) 5947 semsg(_("E754: Only up to %d regions supported"), MAXREGIONS); 5948 else 5949 { 5950 /* Check for overwriting before doing things that may take a lot of 5951 * time. */ 5952 if (!over_write && mch_stat((char *)wfname, &st) >= 0) 5953 { 5954 emsg(_(e_exists)); 5955 goto theend; 5956 } 5957 if (mch_isdir(wfname)) 5958 { 5959 semsg(_(e_isadir2), wfname); 5960 goto theend; 5961 } 5962 5963 fname = alloc(MAXPATHL); 5964 if (fname == NULL) 5965 goto theend; 5966 5967 /* 5968 * Init the aff and dic pointers. 5969 * Get the region names if there are more than 2 arguments. 5970 */ 5971 for (i = 0; i < incount; ++i) 5972 { 5973 afile[i] = NULL; 5974 5975 if (incount > 1) 5976 { 5977 len = (int)STRLEN(innames[i]); 5978 if (STRLEN(gettail(innames[i])) < 5 5979 || innames[i][len - 3] != '_') 5980 { 5981 semsg(_("E755: Invalid region in %s"), innames[i]); 5982 goto theend; 5983 } 5984 spin.si_region_name[i * 2] = TOLOWER_ASC(innames[i][len - 2]); 5985 spin.si_region_name[i * 2 + 1] = 5986 TOLOWER_ASC(innames[i][len - 1]); 5987 } 5988 } 5989 spin.si_region_count = incount; 5990 5991 spin.si_foldroot = wordtree_alloc(&spin); 5992 spin.si_keeproot = wordtree_alloc(&spin); 5993 spin.si_prefroot = wordtree_alloc(&spin); 5994 if (spin.si_foldroot == NULL 5995 || spin.si_keeproot == NULL 5996 || spin.si_prefroot == NULL) 5997 { 5998 free_blocks(spin.si_blocks); 5999 goto theend; 6000 } 6001 6002 /* When not producing a .add.spl file clear the character table when 6003 * we encounter one in the .aff file. This means we dump the current 6004 * one in the .spl file if the .aff file doesn't define one. That's 6005 * better than guessing the contents, the table will match a 6006 * previously loaded spell file. */ 6007 if (!spin.si_add) 6008 spin.si_clear_chartab = TRUE; 6009 6010 /* 6011 * Read all the .aff and .dic files. 6012 * Text is converted to 'encoding'. 6013 * Words are stored in the case-folded and keep-case trees. 6014 */ 6015 for (i = 0; i < incount && !error; ++i) 6016 { 6017 spin.si_conv.vc_type = CONV_NONE; 6018 spin.si_region = 1 << i; 6019 6020 vim_snprintf((char *)fname, MAXPATHL, "%s.aff", innames[i]); 6021 if (mch_stat((char *)fname, &st) >= 0) 6022 { 6023 /* Read the .aff file. Will init "spin->si_conv" based on the 6024 * "SET" line. */ 6025 afile[i] = spell_read_aff(&spin, fname); 6026 if (afile[i] == NULL) 6027 error = TRUE; 6028 else 6029 { 6030 /* Read the .dic file and store the words in the trees. */ 6031 vim_snprintf((char *)fname, MAXPATHL, "%s.dic", 6032 innames[i]); 6033 if (spell_read_dic(&spin, fname, afile[i]) == FAIL) 6034 error = TRUE; 6035 } 6036 } 6037 else 6038 { 6039 /* No .aff file, try reading the file as a word list. Store 6040 * the words in the trees. */ 6041 if (spell_read_wordfile(&spin, innames[i]) == FAIL) 6042 error = TRUE; 6043 } 6044 6045 /* Free any conversion stuff. */ 6046 convert_setup(&spin.si_conv, NULL, NULL); 6047 } 6048 6049 if (spin.si_compflags != NULL && spin.si_nobreak) 6050 msg(_("Warning: both compounding and NOBREAK specified")); 6051 6052 if (!error && !got_int) 6053 { 6054 /* 6055 * Combine tails in the tree. 6056 */ 6057 spell_message(&spin, (char_u *)_(msg_compressing)); 6058 wordtree_compress(&spin, spin.si_foldroot); 6059 wordtree_compress(&spin, spin.si_keeproot); 6060 wordtree_compress(&spin, spin.si_prefroot); 6061 } 6062 6063 if (!error && !got_int) 6064 { 6065 /* 6066 * Write the info in the spell file. 6067 */ 6068 vim_snprintf((char *)IObuff, IOSIZE, 6069 _("Writing spell file %s..."), wfname); 6070 spell_message(&spin, IObuff); 6071 6072 error = write_vim_spell(&spin, wfname) == FAIL; 6073 6074 spell_message(&spin, (char_u *)_("Done!")); 6075 vim_snprintf((char *)IObuff, IOSIZE, 6076 _("Estimated runtime memory use: %d bytes"), spin.si_memtot); 6077 spell_message(&spin, IObuff); 6078 6079 /* 6080 * If the file is loaded need to reload it. 6081 */ 6082 if (!error) 6083 spell_reload_one(wfname, added_word); 6084 } 6085 6086 /* Free the allocated memory. */ 6087 ga_clear(&spin.si_rep); 6088 ga_clear(&spin.si_repsal); 6089 ga_clear(&spin.si_sal); 6090 ga_clear(&spin.si_map); 6091 ga_clear(&spin.si_comppat); 6092 ga_clear(&spin.si_prefcond); 6093 hash_clear_all(&spin.si_commonwords, 0); 6094 6095 /* Free the .aff file structures. */ 6096 for (i = 0; i < incount; ++i) 6097 if (afile[i] != NULL) 6098 spell_free_aff(afile[i]); 6099 6100 /* Free all the bits and pieces at once. */ 6101 free_blocks(spin.si_blocks); 6102 6103 /* 6104 * If there is soundfolding info and no NOSUGFILE item create the 6105 * .sug file with the soundfolded word trie. 6106 */ 6107 if (spin.si_sugtime != 0 && !error && !got_int) 6108 spell_make_sugfile(&spin, wfname); 6109 6110 } 6111 6112 theend: 6113 vim_free(fname); 6114 vim_free(wfname); 6115 } 6116 6117 /* 6118 * Display a message for spell file processing when 'verbose' is set or using 6119 * ":mkspell". "str" can be IObuff. 6120 */ 6121 static void 6122 spell_message(spellinfo_T *spin, char_u *str) 6123 { 6124 if (spin->si_verbose || p_verbose > 2) 6125 { 6126 if (!spin->si_verbose) 6127 verbose_enter(); 6128 msg((char *)str); 6129 out_flush(); 6130 if (!spin->si_verbose) 6131 verbose_leave(); 6132 } 6133 } 6134 6135 /* 6136 * ":[count]spellgood {word}" 6137 * ":[count]spellwrong {word}" 6138 * ":[count]spellundo {word}" 6139 */ 6140 void 6141 ex_spell(exarg_T *eap) 6142 { 6143 spell_add_word(eap->arg, (int)STRLEN(eap->arg), eap->cmdidx == CMD_spellwrong, 6144 eap->forceit ? 0 : (int)eap->line2, 6145 eap->cmdidx == CMD_spellundo); 6146 } 6147 6148 /* 6149 * Add "word[len]" to 'spellfile' as a good or bad word. 6150 */ 6151 void 6152 spell_add_word( 6153 char_u *word, 6154 int len, 6155 int bad, 6156 int idx, /* "zG" and "zW": zero, otherwise index in 6157 'spellfile' */ 6158 int undo) /* TRUE for "zug", "zuG", "zuw" and "zuW" */ 6159 { 6160 FILE *fd = NULL; 6161 buf_T *buf = NULL; 6162 int new_spf = FALSE; 6163 char_u *fname; 6164 char_u *fnamebuf = NULL; 6165 char_u line[MAXWLEN * 2]; 6166 long fpos, fpos_next = 0; 6167 int i; 6168 char_u *spf; 6169 6170 if (idx == 0) /* use internal wordlist */ 6171 { 6172 if (int_wordlist == NULL) 6173 { 6174 int_wordlist = vim_tempname('s', FALSE); 6175 if (int_wordlist == NULL) 6176 return; 6177 } 6178 fname = int_wordlist; 6179 } 6180 else 6181 { 6182 /* If 'spellfile' isn't set figure out a good default value. */ 6183 if (*curwin->w_s->b_p_spf == NUL) 6184 { 6185 init_spellfile(); 6186 new_spf = TRUE; 6187 } 6188 6189 if (*curwin->w_s->b_p_spf == NUL) 6190 { 6191 semsg(_(e_notset), "spellfile"); 6192 return; 6193 } 6194 fnamebuf = alloc(MAXPATHL); 6195 if (fnamebuf == NULL) 6196 return; 6197 6198 for (spf = curwin->w_s->b_p_spf, i = 1; *spf != NUL; ++i) 6199 { 6200 copy_option_part(&spf, fnamebuf, MAXPATHL, ","); 6201 if (i == idx) 6202 break; 6203 if (*spf == NUL) 6204 { 6205 semsg(_("E765: 'spellfile' does not have %d entries"), idx); 6206 vim_free(fnamebuf); 6207 return; 6208 } 6209 } 6210 6211 /* Check that the user isn't editing the .add file somewhere. */ 6212 buf = buflist_findname_exp(fnamebuf); 6213 if (buf != NULL && buf->b_ml.ml_mfp == NULL) 6214 buf = NULL; 6215 if (buf != NULL && bufIsChanged(buf)) 6216 { 6217 emsg(_(e_bufloaded)); 6218 vim_free(fnamebuf); 6219 return; 6220 } 6221 6222 fname = fnamebuf; 6223 } 6224 6225 if (bad || undo) 6226 { 6227 /* When the word appears as good word we need to remove that one, 6228 * since its flags sort before the one with WF_BANNED. */ 6229 fd = mch_fopen((char *)fname, "r"); 6230 if (fd != NULL) 6231 { 6232 while (!vim_fgets(line, MAXWLEN * 2, fd)) 6233 { 6234 fpos = fpos_next; 6235 fpos_next = ftell(fd); 6236 if (STRNCMP(word, line, len) == 0 6237 && (line[len] == '/' || line[len] < ' ')) 6238 { 6239 /* Found duplicate word. Remove it by writing a '#' at 6240 * the start of the line. Mixing reading and writing 6241 * doesn't work for all systems, close the file first. */ 6242 fclose(fd); 6243 fd = mch_fopen((char *)fname, "r+"); 6244 if (fd == NULL) 6245 break; 6246 if (fseek(fd, fpos, SEEK_SET) == 0) 6247 { 6248 fputc('#', fd); 6249 if (undo) 6250 { 6251 home_replace(NULL, fname, NameBuff, MAXPATHL, TRUE); 6252 smsg(_("Word '%.*s' removed from %s"), 6253 len, word, NameBuff); 6254 } 6255 } 6256 fseek(fd, fpos_next, SEEK_SET); 6257 } 6258 } 6259 if (fd != NULL) 6260 fclose(fd); 6261 } 6262 } 6263 6264 if (!undo) 6265 { 6266 fd = mch_fopen((char *)fname, "a"); 6267 if (fd == NULL && new_spf) 6268 { 6269 char_u *p; 6270 6271 /* We just initialized the 'spellfile' option and can't open the 6272 * file. We may need to create the "spell" directory first. We 6273 * already checked the runtime directory is writable in 6274 * init_spellfile(). */ 6275 if (!dir_of_file_exists(fname) && (p = gettail_sep(fname)) != fname) 6276 { 6277 int c = *p; 6278 6279 /* The directory doesn't exist. Try creating it and opening 6280 * the file again. */ 6281 *p = NUL; 6282 vim_mkdir(fname, 0755); 6283 *p = c; 6284 fd = mch_fopen((char *)fname, "a"); 6285 } 6286 } 6287 6288 if (fd == NULL) 6289 semsg(_(e_notopen), fname); 6290 else 6291 { 6292 if (bad) 6293 fprintf(fd, "%.*s/!\n", len, word); 6294 else 6295 fprintf(fd, "%.*s\n", len, word); 6296 fclose(fd); 6297 6298 home_replace(NULL, fname, NameBuff, MAXPATHL, TRUE); 6299 smsg(_("Word '%.*s' added to %s"), len, word, NameBuff); 6300 } 6301 } 6302 6303 if (fd != NULL) 6304 { 6305 /* Update the .add.spl file. */ 6306 mkspell(1, &fname, FALSE, TRUE, TRUE); 6307 6308 /* If the .add file is edited somewhere, reload it. */ 6309 if (buf != NULL) 6310 buf_reload(buf, buf->b_orig_mode); 6311 6312 redraw_all_later(SOME_VALID); 6313 } 6314 vim_free(fnamebuf); 6315 } 6316 6317 /* 6318 * Initialize 'spellfile' for the current buffer. 6319 */ 6320 static void 6321 init_spellfile(void) 6322 { 6323 char_u *buf; 6324 int l; 6325 char_u *fname; 6326 char_u *rtp; 6327 char_u *lend; 6328 int aspath = FALSE; 6329 char_u *lstart = curbuf->b_s.b_p_spl; 6330 6331 if (*curwin->w_s->b_p_spl != NUL && curwin->w_s->b_langp.ga_len > 0) 6332 { 6333 buf = alloc(MAXPATHL); 6334 if (buf == NULL) 6335 return; 6336 6337 /* Find the end of the language name. Exclude the region. If there 6338 * is a path separator remember the start of the tail. */ 6339 for (lend = curwin->w_s->b_p_spl; *lend != NUL 6340 && vim_strchr((char_u *)",._", *lend) == NULL; ++lend) 6341 if (vim_ispathsep(*lend)) 6342 { 6343 aspath = TRUE; 6344 lstart = lend + 1; 6345 } 6346 6347 /* Loop over all entries in 'runtimepath'. Use the first one where we 6348 * are allowed to write. */ 6349 rtp = p_rtp; 6350 while (*rtp != NUL) 6351 { 6352 if (aspath) 6353 /* Use directory of an entry with path, e.g., for 6354 * "/dir/lg.utf-8.spl" use "/dir". */ 6355 vim_strncpy(buf, curbuf->b_s.b_p_spl, 6356 lstart - curbuf->b_s.b_p_spl - 1); 6357 else 6358 /* Copy the path from 'runtimepath' to buf[]. */ 6359 copy_option_part(&rtp, buf, MAXPATHL, ","); 6360 if (filewritable(buf) == 2) 6361 { 6362 /* Use the first language name from 'spelllang' and the 6363 * encoding used in the first loaded .spl file. */ 6364 if (aspath) 6365 vim_strncpy(buf, curbuf->b_s.b_p_spl, 6366 lend - curbuf->b_s.b_p_spl); 6367 else 6368 { 6369 /* Create the "spell" directory if it doesn't exist yet. */ 6370 l = (int)STRLEN(buf); 6371 vim_snprintf((char *)buf + l, MAXPATHL - l, "/spell"); 6372 if (filewritable(buf) != 2) 6373 vim_mkdir(buf, 0755); 6374 6375 l = (int)STRLEN(buf); 6376 vim_snprintf((char *)buf + l, MAXPATHL - l, 6377 "/%.*s", (int)(lend - lstart), lstart); 6378 } 6379 l = (int)STRLEN(buf); 6380 fname = LANGP_ENTRY(curwin->w_s->b_langp, 0) 6381 ->lp_slang->sl_fname; 6382 vim_snprintf((char *)buf + l, MAXPATHL - l, ".%s.add", 6383 fname != NULL 6384 && strstr((char *)gettail(fname), ".ascii.") != NULL 6385 ? (char_u *)"ascii" : spell_enc()); 6386 set_option_value((char_u *)"spellfile", 0L, buf, OPT_LOCAL); 6387 break; 6388 } 6389 aspath = FALSE; 6390 } 6391 6392 vim_free(buf); 6393 } 6394 } 6395 6396 6397 6398 /* 6399 * Set the spell character tables from strings in the affix file. 6400 */ 6401 static int 6402 set_spell_chartab(char_u *fol, char_u *low, char_u *upp) 6403 { 6404 /* We build the new tables here first, so that we can compare with the 6405 * previous one. */ 6406 spelltab_T new_st; 6407 char_u *pf = fol, *pl = low, *pu = upp; 6408 int f, l, u; 6409 6410 clear_spell_chartab(&new_st); 6411 6412 while (*pf != NUL) 6413 { 6414 if (*pl == NUL || *pu == NUL) 6415 { 6416 emsg(_(e_affform)); 6417 return FAIL; 6418 } 6419 f = mb_ptr2char_adv(&pf); 6420 l = mb_ptr2char_adv(&pl); 6421 u = mb_ptr2char_adv(&pu); 6422 6423 /* Every character that appears is a word character. */ 6424 if (f < 256) 6425 new_st.st_isw[f] = TRUE; 6426 if (l < 256) 6427 new_st.st_isw[l] = TRUE; 6428 if (u < 256) 6429 new_st.st_isw[u] = TRUE; 6430 6431 /* if "LOW" and "FOL" are not the same the "LOW" char needs 6432 * case-folding */ 6433 if (l < 256 && l != f) 6434 { 6435 if (f >= 256) 6436 { 6437 emsg(_(e_affrange)); 6438 return FAIL; 6439 } 6440 new_st.st_fold[l] = f; 6441 } 6442 6443 /* if "UPP" and "FOL" are not the same the "UPP" char needs 6444 * case-folding, it's upper case and the "UPP" is the upper case of 6445 * "FOL" . */ 6446 if (u < 256 && u != f) 6447 { 6448 if (f >= 256) 6449 { 6450 emsg(_(e_affrange)); 6451 return FAIL; 6452 } 6453 new_st.st_fold[u] = f; 6454 new_st.st_isu[u] = TRUE; 6455 new_st.st_upper[f] = u; 6456 } 6457 } 6458 6459 if (*pl != NUL || *pu != NUL) 6460 { 6461 emsg(_(e_affform)); 6462 return FAIL; 6463 } 6464 6465 return set_spell_finish(&new_st); 6466 } 6467 6468 /* 6469 * Set the spell character tables from strings in the .spl file. 6470 */ 6471 static void 6472 set_spell_charflags( 6473 char_u *flags, 6474 int cnt, /* length of "flags" */ 6475 char_u *fol) 6476 { 6477 /* We build the new tables here first, so that we can compare with the 6478 * previous one. */ 6479 spelltab_T new_st; 6480 int i; 6481 char_u *p = fol; 6482 int c; 6483 6484 clear_spell_chartab(&new_st); 6485 6486 for (i = 0; i < 128; ++i) 6487 { 6488 if (i < cnt) 6489 { 6490 new_st.st_isw[i + 128] = (flags[i] & CF_WORD) != 0; 6491 new_st.st_isu[i + 128] = (flags[i] & CF_UPPER) != 0; 6492 } 6493 6494 if (*p != NUL) 6495 { 6496 c = mb_ptr2char_adv(&p); 6497 new_st.st_fold[i + 128] = c; 6498 if (i + 128 != c && new_st.st_isu[i + 128] && c < 256) 6499 new_st.st_upper[c] = i + 128; 6500 } 6501 } 6502 6503 (void)set_spell_finish(&new_st); 6504 } 6505 6506 static int 6507 set_spell_finish(spelltab_T *new_st) 6508 { 6509 int i; 6510 6511 if (did_set_spelltab) 6512 { 6513 /* check that it's the same table */ 6514 for (i = 0; i < 256; ++i) 6515 { 6516 if (spelltab.st_isw[i] != new_st->st_isw[i] 6517 || spelltab.st_isu[i] != new_st->st_isu[i] 6518 || spelltab.st_fold[i] != new_st->st_fold[i] 6519 || spelltab.st_upper[i] != new_st->st_upper[i]) 6520 { 6521 emsg(_("E763: Word characters differ between spell files")); 6522 return FAIL; 6523 } 6524 } 6525 } 6526 else 6527 { 6528 /* copy the new spelltab into the one being used */ 6529 spelltab = *new_st; 6530 did_set_spelltab = TRUE; 6531 } 6532 6533 return OK; 6534 } 6535 6536 /* 6537 * Write the table with prefix conditions to the .spl file. 6538 * When "fd" is NULL only count the length of what is written. 6539 */ 6540 static int 6541 write_spell_prefcond(FILE *fd, garray_T *gap) 6542 { 6543 int i; 6544 char_u *p; 6545 int len; 6546 int totlen; 6547 size_t x = 1; /* collect return value of fwrite() */ 6548 6549 if (fd != NULL) 6550 put_bytes(fd, (long_u)gap->ga_len, 2); /* <prefcondcnt> */ 6551 6552 totlen = 2 + gap->ga_len; /* length of <prefcondcnt> and <condlen> bytes */ 6553 6554 for (i = 0; i < gap->ga_len; ++i) 6555 { 6556 /* <prefcond> : <condlen> <condstr> */ 6557 p = ((char_u **)gap->ga_data)[i]; 6558 if (p != NULL) 6559 { 6560 len = (int)STRLEN(p); 6561 if (fd != NULL) 6562 { 6563 fputc(len, fd); 6564 x &= fwrite(p, (size_t)len, (size_t)1, fd); 6565 } 6566 totlen += len; 6567 } 6568 else if (fd != NULL) 6569 fputc(0, fd); 6570 } 6571 6572 return totlen; 6573 } 6574 6575 6576 /* 6577 * Use map string "map" for languages "lp". 6578 */ 6579 static void 6580 set_map_str(slang_T *lp, char_u *map) 6581 { 6582 char_u *p; 6583 int headc = 0; 6584 int c; 6585 int i; 6586 6587 if (*map == NUL) 6588 { 6589 lp->sl_has_map = FALSE; 6590 return; 6591 } 6592 lp->sl_has_map = TRUE; 6593 6594 /* Init the array and hash tables empty. */ 6595 for (i = 0; i < 256; ++i) 6596 lp->sl_map_array[i] = 0; 6597 hash_init(&lp->sl_map_hash); 6598 6599 /* 6600 * The similar characters are stored separated with slashes: 6601 * "aaa/bbb/ccc/". Fill sl_map_array[c] with the character before c and 6602 * before the same slash. For characters above 255 sl_map_hash is used. 6603 */ 6604 for (p = map; *p != NUL; ) 6605 { 6606 c = mb_cptr2char_adv(&p); 6607 if (c == '/') 6608 headc = 0; 6609 else 6610 { 6611 if (headc == 0) 6612 headc = c; 6613 6614 /* Characters above 255 don't fit in sl_map_array[], put them in 6615 * the hash table. Each entry is the char, a NUL the headchar and 6616 * a NUL. */ 6617 if (c >= 256) 6618 { 6619 int cl = mb_char2len(c); 6620 int headcl = mb_char2len(headc); 6621 char_u *b; 6622 hash_T hash; 6623 hashitem_T *hi; 6624 6625 b = alloc((unsigned)(cl + headcl + 2)); 6626 if (b == NULL) 6627 return; 6628 mb_char2bytes(c, b); 6629 b[cl] = NUL; 6630 mb_char2bytes(headc, b + cl + 1); 6631 b[cl + 1 + headcl] = NUL; 6632 hash = hash_hash(b); 6633 hi = hash_lookup(&lp->sl_map_hash, b, hash); 6634 if (HASHITEM_EMPTY(hi)) 6635 hash_add_item(&lp->sl_map_hash, hi, b, hash); 6636 else 6637 { 6638 /* This should have been checked when generating the .spl 6639 * file. */ 6640 emsg(_("E783: duplicate char in MAP entry")); 6641 vim_free(b); 6642 } 6643 } 6644 else 6645 lp->sl_map_array[c] = headc; 6646 } 6647 } 6648 } 6649 6650 6651 #endif /* FEAT_SPELL */ 6652