1 /* vi:set ts=8 sts=4 sw=4 noet: 2 * 3 * VIM - Vi IMproved by Bram Moolenaar 4 * 5 * Do ":help uganda" in Vim to read copying and usage conditions. 6 * Do ":help credits" in Vim to see a list of people who contributed. 7 * See README.txt for an overview of the Vim source code. 8 */ 9 10 /* 11 * strings.c: string manipulation functions 12 */ 13 14 #include "vim.h" 15 16 /* 17 * Copy "string" into newly allocated memory. 18 */ 19 char_u * 20 vim_strsave(char_u *string) 21 { 22 char_u *p; 23 size_t len; 24 25 len = STRLEN(string) + 1; 26 p = alloc(len); 27 if (p != NULL) 28 mch_memmove(p, string, len); 29 return p; 30 } 31 32 /* 33 * Copy up to "len" bytes of "string" into newly allocated memory and 34 * terminate with a NUL. 35 * The allocated memory always has size "len + 1", also when "string" is 36 * shorter. 37 */ 38 char_u * 39 vim_strnsave(char_u *string, size_t len) 40 { 41 char_u *p; 42 43 p = alloc(len + 1); 44 if (p != NULL) 45 { 46 STRNCPY(p, string, len); 47 p[len] = NUL; 48 } 49 return p; 50 } 51 52 /* 53 * Same as vim_strsave(), but any characters found in esc_chars are preceded 54 * by a backslash. 55 */ 56 char_u * 57 vim_strsave_escaped(char_u *string, char_u *esc_chars) 58 { 59 return vim_strsave_escaped_ext(string, esc_chars, '\\', FALSE); 60 } 61 62 /* 63 * Same as vim_strsave_escaped(), but when "bsl" is TRUE also escape 64 * characters where rem_backslash() would remove the backslash. 65 * Escape the characters with "cc". 66 */ 67 char_u * 68 vim_strsave_escaped_ext( 69 char_u *string, 70 char_u *esc_chars, 71 int cc, 72 int bsl) 73 { 74 char_u *p; 75 char_u *p2; 76 char_u *escaped_string; 77 unsigned length; 78 int l; 79 80 /* 81 * First count the number of backslashes required. 82 * Then allocate the memory and insert them. 83 */ 84 length = 1; // count the trailing NUL 85 for (p = string; *p; p++) 86 { 87 if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1) 88 { 89 length += l; // count a multibyte char 90 p += l - 1; 91 continue; 92 } 93 if (vim_strchr(esc_chars, *p) != NULL || (bsl && rem_backslash(p))) 94 ++length; // count a backslash 95 ++length; // count an ordinary char 96 } 97 escaped_string = alloc(length); 98 if (escaped_string != NULL) 99 { 100 p2 = escaped_string; 101 for (p = string; *p; p++) 102 { 103 if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1) 104 { 105 mch_memmove(p2, p, (size_t)l); 106 p2 += l; 107 p += l - 1; // skip multibyte char 108 continue; 109 } 110 if (vim_strchr(esc_chars, *p) != NULL || (bsl && rem_backslash(p))) 111 *p2++ = cc; 112 *p2++ = *p; 113 } 114 *p2 = NUL; 115 } 116 return escaped_string; 117 } 118 119 /* 120 * Return TRUE when 'shell' has "csh" in the tail. 121 */ 122 int 123 csh_like_shell(void) 124 { 125 return (strstr((char *)gettail(p_sh), "csh") != NULL); 126 } 127 128 /* 129 * Escape "string" for use as a shell argument with system(). 130 * This uses single quotes, except when we know we need to use double quotes 131 * (MS-DOS and MS-Windows not using PowerShell and without 'shellslash' set). 132 * PowerShell also uses a novel escaping for enclosed single quotes - double 133 * them up. 134 * Escape a newline, depending on the 'shell' option. 135 * When "do_special" is TRUE also replace "!", "%", "#" and things starting 136 * with "<" like "<cfile>". 137 * When "do_newline" is FALSE do not escape newline unless it is csh shell. 138 * Returns the result in allocated memory, NULL if we have run out. 139 */ 140 char_u * 141 vim_strsave_shellescape(char_u *string, int do_special, int do_newline) 142 { 143 unsigned length; 144 char_u *p; 145 char_u *d; 146 char_u *escaped_string; 147 int l; 148 int csh_like; 149 char_u *shname; 150 int powershell; 151 # ifdef MSWIN 152 int double_quotes; 153 # endif 154 155 // Only csh and similar shells expand '!' within single quotes. For sh and 156 // the like we must not put a backslash before it, it will be taken 157 // literally. If do_special is set the '!' will be escaped twice. 158 // Csh also needs to have "\n" escaped twice when do_special is set. 159 csh_like = csh_like_shell(); 160 161 // PowerShell uses it's own version for quoting single quotes 162 shname = gettail(p_sh); 163 powershell = strstr((char *)shname, "pwsh") != NULL; 164 # ifdef MSWIN 165 powershell = powershell || strstr((char *)shname, "powershell") != NULL; 166 // PowerShell only accepts single quotes so override shellslash. 167 double_quotes = !powershell && !p_ssl; 168 # endif 169 170 // First count the number of extra bytes required. 171 length = (unsigned)STRLEN(string) + 3; // two quotes and a trailing NUL 172 for (p = string; *p != NUL; MB_PTR_ADV(p)) 173 { 174 # ifdef MSWIN 175 if (double_quotes) 176 { 177 if (*p == '"') 178 ++length; // " -> "" 179 } 180 else 181 # endif 182 if (*p == '\'') 183 { 184 if (powershell) 185 length +=2; // ' => '' 186 else 187 length += 3; // ' => '\'' 188 } 189 if ((*p == '\n' && (csh_like || do_newline)) 190 || (*p == '!' && (csh_like || do_special))) 191 { 192 ++length; // insert backslash 193 if (csh_like && do_special) 194 ++length; // insert backslash 195 } 196 if (do_special && find_cmdline_var(p, &l) >= 0) 197 { 198 ++length; // insert backslash 199 p += l - 1; 200 } 201 } 202 203 // Allocate memory for the result and fill it. 204 escaped_string = alloc(length); 205 if (escaped_string != NULL) 206 { 207 d = escaped_string; 208 209 // add opening quote 210 # ifdef MSWIN 211 if (double_quotes) 212 *d++ = '"'; 213 else 214 # endif 215 *d++ = '\''; 216 217 for (p = string; *p != NUL; ) 218 { 219 # ifdef MSWIN 220 if (double_quotes) 221 { 222 if (*p == '"') 223 { 224 *d++ = '"'; 225 *d++ = '"'; 226 ++p; 227 continue; 228 } 229 } 230 else 231 # endif 232 if (*p == '\'') 233 { 234 if (powershell) 235 { 236 *d++ = '\''; 237 *d++ = '\''; 238 } 239 else 240 { 241 *d++ = '\''; 242 *d++ = '\\'; 243 *d++ = '\''; 244 *d++ = '\''; 245 } 246 ++p; 247 continue; 248 } 249 if ((*p == '\n' && (csh_like || do_newline)) 250 || (*p == '!' && (csh_like || do_special))) 251 { 252 *d++ = '\\'; 253 if (csh_like && do_special) 254 *d++ = '\\'; 255 *d++ = *p++; 256 continue; 257 } 258 if (do_special && find_cmdline_var(p, &l) >= 0) 259 { 260 *d++ = '\\'; // insert backslash 261 while (--l >= 0) // copy the var 262 *d++ = *p++; 263 continue; 264 } 265 266 MB_COPY_CHAR(p, d); 267 } 268 269 // add terminating quote and finish with a NUL 270 # ifdef MSWIN 271 if (double_quotes) 272 *d++ = '"'; 273 else 274 # endif 275 *d++ = '\''; 276 *d = NUL; 277 } 278 279 return escaped_string; 280 } 281 282 /* 283 * Like vim_strsave(), but make all characters uppercase. 284 * This uses ASCII lower-to-upper case translation, language independent. 285 */ 286 char_u * 287 vim_strsave_up(char_u *string) 288 { 289 char_u *p1; 290 291 p1 = vim_strsave(string); 292 vim_strup(p1); 293 return p1; 294 } 295 296 /* 297 * Like vim_strnsave(), but make all characters uppercase. 298 * This uses ASCII lower-to-upper case translation, language independent. 299 */ 300 char_u * 301 vim_strnsave_up(char_u *string, size_t len) 302 { 303 char_u *p1; 304 305 p1 = vim_strnsave(string, len); 306 vim_strup(p1); 307 return p1; 308 } 309 310 /* 311 * ASCII lower-to-upper case translation, language independent. 312 */ 313 void 314 vim_strup( 315 char_u *p) 316 { 317 char_u *p2; 318 int c; 319 320 if (p != NULL) 321 { 322 p2 = p; 323 while ((c = *p2) != NUL) 324 #ifdef EBCDIC 325 *p2++ = isalpha(c) ? toupper(c) : c; 326 #else 327 *p2++ = (c < 'a' || c > 'z') ? c : (c - 0x20); 328 #endif 329 } 330 } 331 332 #if defined(FEAT_EVAL) || defined(FEAT_SPELL) || defined(PROTO) 333 /* 334 * Make string "s" all upper-case and return it in allocated memory. 335 * Handles multi-byte characters as well as possible. 336 * Returns NULL when out of memory. 337 */ 338 static char_u * 339 strup_save(char_u *orig) 340 { 341 char_u *p; 342 char_u *res; 343 344 res = p = vim_strsave(orig); 345 346 if (res != NULL) 347 while (*p != NUL) 348 { 349 int l; 350 351 if (enc_utf8) 352 { 353 int c, uc; 354 int newl; 355 char_u *s; 356 357 c = utf_ptr2char(p); 358 l = utf_ptr2len(p); 359 if (c == 0) 360 { 361 // overlong sequence, use only the first byte 362 c = *p; 363 l = 1; 364 } 365 uc = utf_toupper(c); 366 367 // Reallocate string when byte count changes. This is rare, 368 // thus it's OK to do another malloc()/free(). 369 newl = utf_char2len(uc); 370 if (newl != l) 371 { 372 s = alloc(STRLEN(res) + 1 + newl - l); 373 if (s == NULL) 374 { 375 vim_free(res); 376 return NULL; 377 } 378 mch_memmove(s, res, p - res); 379 STRCPY(s + (p - res) + newl, p + l); 380 p = s + (p - res); 381 vim_free(res); 382 res = s; 383 } 384 385 utf_char2bytes(uc, p); 386 p += newl; 387 } 388 else if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1) 389 p += l; // skip multi-byte character 390 else 391 { 392 *p = TOUPPER_LOC(*p); // note that toupper() can be a macro 393 p++; 394 } 395 } 396 397 return res; 398 } 399 400 /* 401 * Make string "s" all lower-case and return it in allocated memory. 402 * Handles multi-byte characters as well as possible. 403 * Returns NULL when out of memory. 404 */ 405 char_u * 406 strlow_save(char_u *orig) 407 { 408 char_u *p; 409 char_u *res; 410 411 res = p = vim_strsave(orig); 412 413 if (res != NULL) 414 while (*p != NUL) 415 { 416 int l; 417 418 if (enc_utf8) 419 { 420 int c, lc; 421 int newl; 422 char_u *s; 423 424 c = utf_ptr2char(p); 425 l = utf_ptr2len(p); 426 if (c == 0) 427 { 428 // overlong sequence, use only the first byte 429 c = *p; 430 l = 1; 431 } 432 lc = utf_tolower(c); 433 434 // Reallocate string when byte count changes. This is rare, 435 // thus it's OK to do another malloc()/free(). 436 newl = utf_char2len(lc); 437 if (newl != l) 438 { 439 s = alloc(STRLEN(res) + 1 + newl - l); 440 if (s == NULL) 441 { 442 vim_free(res); 443 return NULL; 444 } 445 mch_memmove(s, res, p - res); 446 STRCPY(s + (p - res) + newl, p + l); 447 p = s + (p - res); 448 vim_free(res); 449 res = s; 450 } 451 452 utf_char2bytes(lc, p); 453 p += newl; 454 } 455 else if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1) 456 p += l; // skip multi-byte character 457 else 458 { 459 *p = TOLOWER_LOC(*p); // note that tolower() can be a macro 460 p++; 461 } 462 } 463 464 return res; 465 } 466 #endif 467 468 /* 469 * delete spaces at the end of a string 470 */ 471 void 472 del_trailing_spaces(char_u *ptr) 473 { 474 char_u *q; 475 476 q = ptr + STRLEN(ptr); 477 while (--q > ptr && VIM_ISWHITE(q[0]) && q[-1] != '\\' && q[-1] != Ctrl_V) 478 *q = NUL; 479 } 480 481 /* 482 * Like strncpy(), but always terminate the result with one NUL. 483 * "to" must be "len + 1" long! 484 */ 485 void 486 vim_strncpy(char_u *to, char_u *from, size_t len) 487 { 488 STRNCPY(to, from, len); 489 to[len] = NUL; 490 } 491 492 /* 493 * Like strcat(), but make sure the result fits in "tosize" bytes and is 494 * always NUL terminated. "from" and "to" may overlap. 495 */ 496 void 497 vim_strcat(char_u *to, char_u *from, size_t tosize) 498 { 499 size_t tolen = STRLEN(to); 500 size_t fromlen = STRLEN(from); 501 502 if (tolen + fromlen + 1 > tosize) 503 { 504 mch_memmove(to + tolen, from, tosize - tolen - 1); 505 to[tosize - 1] = NUL; 506 } 507 else 508 mch_memmove(to + tolen, from, fromlen + 1); 509 } 510 511 #if (!defined(HAVE_STRCASECMP) && !defined(HAVE_STRICMP)) || defined(PROTO) 512 /* 513 * Compare two strings, ignoring case, using current locale. 514 * Doesn't work for multi-byte characters. 515 * return 0 for match, < 0 for smaller, > 0 for bigger 516 */ 517 int 518 vim_stricmp(char *s1, char *s2) 519 { 520 int i; 521 522 for (;;) 523 { 524 i = (int)TOLOWER_LOC(*s1) - (int)TOLOWER_LOC(*s2); 525 if (i != 0) 526 return i; // this character different 527 if (*s1 == NUL) 528 break; // strings match until NUL 529 ++s1; 530 ++s2; 531 } 532 return 0; // strings match 533 } 534 #endif 535 536 #if (!defined(HAVE_STRNCASECMP) && !defined(HAVE_STRNICMP)) || defined(PROTO) 537 /* 538 * Compare two strings, for length "len", ignoring case, using current locale. 539 * Doesn't work for multi-byte characters. 540 * return 0 for match, < 0 for smaller, > 0 for bigger 541 */ 542 int 543 vim_strnicmp(char *s1, char *s2, size_t len) 544 { 545 int i; 546 547 while (len > 0) 548 { 549 i = (int)TOLOWER_LOC(*s1) - (int)TOLOWER_LOC(*s2); 550 if (i != 0) 551 return i; // this character different 552 if (*s1 == NUL) 553 break; // strings match until NUL 554 ++s1; 555 ++s2; 556 --len; 557 } 558 return 0; // strings match 559 } 560 #endif 561 562 /* 563 * Search for first occurrence of "c" in "string". 564 * Version of strchr() that handles unsigned char strings with characters from 565 * 128 to 255 correctly. It also doesn't return a pointer to the NUL at the 566 * end of the string. 567 */ 568 char_u * 569 vim_strchr(char_u *string, int c) 570 { 571 char_u *p; 572 int b; 573 574 p = string; 575 if (enc_utf8 && c >= 0x80) 576 { 577 while (*p != NUL) 578 { 579 int l = utfc_ptr2len(p); 580 581 // Avoid matching an illegal byte here. 582 if (utf_ptr2char(p) == c && l > 1) 583 return p; 584 p += l; 585 } 586 return NULL; 587 } 588 if (enc_dbcs != 0 && c > 255) 589 { 590 int n2 = c & 0xff; 591 592 c = ((unsigned)c >> 8) & 0xff; 593 while ((b = *p) != NUL) 594 { 595 if (b == c && p[1] == n2) 596 return p; 597 p += (*mb_ptr2len)(p); 598 } 599 return NULL; 600 } 601 if (has_mbyte) 602 { 603 while ((b = *p) != NUL) 604 { 605 if (b == c) 606 return p; 607 p += (*mb_ptr2len)(p); 608 } 609 return NULL; 610 } 611 while ((b = *p) != NUL) 612 { 613 if (b == c) 614 return p; 615 ++p; 616 } 617 return NULL; 618 } 619 620 /* 621 * Version of strchr() that only works for bytes and handles unsigned char 622 * strings with characters above 128 correctly. It also doesn't return a 623 * pointer to the NUL at the end of the string. 624 */ 625 char_u * 626 vim_strbyte(char_u *string, int c) 627 { 628 char_u *p = string; 629 630 while (*p != NUL) 631 { 632 if (*p == c) 633 return p; 634 ++p; 635 } 636 return NULL; 637 } 638 639 /* 640 * Search for last occurrence of "c" in "string". 641 * Version of strrchr() that handles unsigned char strings with characters from 642 * 128 to 255 correctly. It also doesn't return a pointer to the NUL at the 643 * end of the string. 644 * Return NULL if not found. 645 * Does not handle multi-byte char for "c"! 646 */ 647 char_u * 648 vim_strrchr(char_u *string, int c) 649 { 650 char_u *retval = NULL; 651 char_u *p = string; 652 653 while (*p) 654 { 655 if (*p == c) 656 retval = p; 657 MB_PTR_ADV(p); 658 } 659 return retval; 660 } 661 662 /* 663 * Vim's version of strpbrk(), in case it's missing. 664 * Don't generate a prototype for this, causes problems when it's not used. 665 */ 666 #ifndef PROTO 667 # ifndef HAVE_STRPBRK 668 # ifdef vim_strpbrk 669 # undef vim_strpbrk 670 # endif 671 char_u * 672 vim_strpbrk(char_u *s, char_u *charset) 673 { 674 while (*s) 675 { 676 if (vim_strchr(charset, *s) != NULL) 677 return s; 678 MB_PTR_ADV(s); 679 } 680 return NULL; 681 } 682 # endif 683 #endif 684 685 /* 686 * Sort an array of strings. 687 */ 688 static int sort_compare(const void *s1, const void *s2); 689 690 static int 691 sort_compare(const void *s1, const void *s2) 692 { 693 return STRCMP(*(char **)s1, *(char **)s2); 694 } 695 696 void 697 sort_strings( 698 char_u **files, 699 int count) 700 { 701 qsort((void *)files, (size_t)count, sizeof(char_u *), sort_compare); 702 } 703 704 #if defined(FEAT_QUICKFIX) || defined(FEAT_SPELL) || defined(PROTO) 705 /* 706 * Return TRUE if string "s" contains a non-ASCII character (128 or higher). 707 * When "s" is NULL FALSE is returned. 708 */ 709 int 710 has_non_ascii(char_u *s) 711 { 712 char_u *p; 713 714 if (s != NULL) 715 for (p = s; *p != NUL; ++p) 716 if (*p >= 128) 717 return TRUE; 718 return FALSE; 719 } 720 #endif 721 722 /* 723 * Concatenate two strings and return the result in allocated memory. 724 * Returns NULL when out of memory. 725 */ 726 char_u * 727 concat_str(char_u *str1, char_u *str2) 728 { 729 char_u *dest; 730 size_t l = str1 == NULL ? 0 : STRLEN(str1); 731 732 dest = alloc(l + (str2 == NULL ? 0 : STRLEN(str2)) + 1L); 733 if (dest != NULL) 734 { 735 if (str1 == NULL) 736 *dest = NUL; 737 else 738 STRCPY(dest, str1); 739 if (str2 != NULL) 740 STRCPY(dest + l, str2); 741 } 742 return dest; 743 } 744 745 #if defined(FEAT_EVAL) || defined(PROTO) 746 747 /* 748 * Return string "str" in ' quotes, doubling ' characters. 749 * If "str" is NULL an empty string is assumed. 750 * If "function" is TRUE make it function('string'). 751 */ 752 char_u * 753 string_quote(char_u *str, int function) 754 { 755 unsigned len; 756 char_u *p, *r, *s; 757 758 len = (function ? 13 : 3); 759 if (str != NULL) 760 { 761 len += (unsigned)STRLEN(str); 762 for (p = str; *p != NUL; MB_PTR_ADV(p)) 763 if (*p == '\'') 764 ++len; 765 } 766 s = r = alloc(len); 767 if (r != NULL) 768 { 769 if (function) 770 { 771 STRCPY(r, "function('"); 772 r += 10; 773 } 774 else 775 *r++ = '\''; 776 if (str != NULL) 777 for (p = str; *p != NUL; ) 778 { 779 if (*p == '\'') 780 *r++ = '\''; 781 MB_COPY_CHAR(p, r); 782 } 783 *r++ = '\''; 784 if (function) 785 *r++ = ')'; 786 *r++ = NUL; 787 } 788 return s; 789 } 790 791 static void 792 byteidx(typval_T *argvars, typval_T *rettv, int comp UNUSED) 793 { 794 char_u *t; 795 char_u *str; 796 varnumber_T idx; 797 798 rettv->vval.v_number = -1; 799 800 if (in_vim9script() 801 && (check_for_string_arg(argvars, 0) == FAIL 802 || check_for_number_arg(argvars, 1) == FAIL)) 803 return; 804 805 str = tv_get_string_chk(&argvars[0]); 806 idx = tv_get_number_chk(&argvars[1], NULL); 807 if (str == NULL || idx < 0) 808 return; 809 810 t = str; 811 for ( ; idx > 0; idx--) 812 { 813 if (*t == NUL) // EOL reached 814 return; 815 if (enc_utf8 && comp) 816 t += utf_ptr2len(t); 817 else 818 t += (*mb_ptr2len)(t); 819 } 820 rettv->vval.v_number = (varnumber_T)(t - str); 821 } 822 823 /* 824 * "byteidx()" function 825 */ 826 void 827 f_byteidx(typval_T *argvars, typval_T *rettv) 828 { 829 byteidx(argvars, rettv, FALSE); 830 } 831 832 /* 833 * "byteidxcomp()" function 834 */ 835 void 836 f_byteidxcomp(typval_T *argvars, typval_T *rettv) 837 { 838 byteidx(argvars, rettv, TRUE); 839 } 840 841 /* 842 * "charidx()" function 843 */ 844 void 845 f_charidx(typval_T *argvars, typval_T *rettv) 846 { 847 char_u *str; 848 varnumber_T idx; 849 varnumber_T countcc = FALSE; 850 char_u *p; 851 int len; 852 int (*ptr2len)(char_u *); 853 854 rettv->vval.v_number = -1; 855 856 if (argvars[0].v_type != VAR_STRING || argvars[1].v_type != VAR_NUMBER 857 || (argvars[2].v_type != VAR_UNKNOWN 858 && argvars[2].v_type != VAR_NUMBER 859 && argvars[2].v_type != VAR_BOOL)) 860 { 861 emsg(_(e_invarg)); 862 return; 863 } 864 865 str = tv_get_string_chk(&argvars[0]); 866 idx = tv_get_number_chk(&argvars[1], NULL); 867 if (str == NULL || idx < 0) 868 return; 869 870 if (argvars[2].v_type != VAR_UNKNOWN) 871 countcc = tv_get_bool(&argvars[2]); 872 if (countcc < 0 || countcc > 1) 873 { 874 semsg(_(e_using_number_as_bool_nr), countcc); 875 return; 876 } 877 878 if (enc_utf8 && countcc) 879 ptr2len = utf_ptr2len; 880 else 881 ptr2len = mb_ptr2len; 882 883 for (p = str, len = 0; p <= str + idx; len++) 884 { 885 if (*p == NUL) 886 return; 887 p += ptr2len(p); 888 } 889 890 rettv->vval.v_number = len > 0 ? len - 1 : 0; 891 } 892 893 /* 894 * "str2list()" function 895 */ 896 void 897 f_str2list(typval_T *argvars, typval_T *rettv) 898 { 899 char_u *p; 900 int utf8 = FALSE; 901 902 if (rettv_list_alloc(rettv) == FAIL) 903 return; 904 905 if (in_vim9script() 906 && (check_for_string_arg(argvars, 0) == FAIL 907 || check_for_opt_bool_arg(argvars, 1) == FAIL)) 908 return; 909 910 if (argvars[1].v_type != VAR_UNKNOWN) 911 utf8 = (int)tv_get_bool_chk(&argvars[1], NULL); 912 913 p = tv_get_string(&argvars[0]); 914 915 if (has_mbyte || utf8) 916 { 917 int (*ptr2len)(char_u *); 918 int (*ptr2char)(char_u *); 919 920 if (utf8 || enc_utf8) 921 { 922 ptr2len = utf_ptr2len; 923 ptr2char = utf_ptr2char; 924 } 925 else 926 { 927 ptr2len = mb_ptr2len; 928 ptr2char = mb_ptr2char; 929 } 930 931 for ( ; *p != NUL; p += (*ptr2len)(p)) 932 list_append_number(rettv->vval.v_list, (*ptr2char)(p)); 933 } 934 else 935 for ( ; *p != NUL; ++p) 936 list_append_number(rettv->vval.v_list, *p); 937 } 938 939 /* 940 * "str2nr()" function 941 */ 942 void 943 f_str2nr(typval_T *argvars, typval_T *rettv) 944 { 945 int base = 10; 946 char_u *p; 947 varnumber_T n; 948 int what = 0; 949 int isneg; 950 951 if (argvars[1].v_type != VAR_UNKNOWN) 952 { 953 base = (int)tv_get_number(&argvars[1]); 954 if (base != 2 && base != 8 && base != 10 && base != 16) 955 { 956 emsg(_(e_invarg)); 957 return; 958 } 959 if (argvars[2].v_type != VAR_UNKNOWN && tv_get_bool(&argvars[2])) 960 what |= STR2NR_QUOTE; 961 } 962 963 p = skipwhite(tv_get_string_strict(&argvars[0])); 964 isneg = (*p == '-'); 965 if (*p == '+' || *p == '-') 966 p = skipwhite(p + 1); 967 switch (base) 968 { 969 case 2: what |= STR2NR_BIN + STR2NR_FORCE; break; 970 case 8: what |= STR2NR_OCT + STR2NR_OOCT + STR2NR_FORCE; break; 971 case 16: what |= STR2NR_HEX + STR2NR_FORCE; break; 972 } 973 vim_str2nr(p, NULL, NULL, what, &n, NULL, 0, FALSE); 974 // Text after the number is silently ignored. 975 if (isneg) 976 rettv->vval.v_number = -n; 977 else 978 rettv->vval.v_number = n; 979 980 } 981 982 /* 983 * "strgetchar()" function 984 */ 985 void 986 f_strgetchar(typval_T *argvars, typval_T *rettv) 987 { 988 char_u *str; 989 int len; 990 int error = FALSE; 991 int charidx; 992 int byteidx = 0; 993 994 rettv->vval.v_number = -1; 995 996 if (in_vim9script() 997 && (check_for_string_arg(argvars, 0) == FAIL 998 || check_for_number_arg(argvars, 1) == FAIL)) 999 return; 1000 1001 str = tv_get_string_chk(&argvars[0]); 1002 if (str == NULL) 1003 return; 1004 len = (int)STRLEN(str); 1005 charidx = (int)tv_get_number_chk(&argvars[1], &error); 1006 if (error) 1007 return; 1008 1009 while (charidx >= 0 && byteidx < len) 1010 { 1011 if (charidx == 0) 1012 { 1013 rettv->vval.v_number = mb_ptr2char(str + byteidx); 1014 break; 1015 } 1016 --charidx; 1017 byteidx += MB_CPTR2LEN(str + byteidx); 1018 } 1019 } 1020 1021 /* 1022 * "stridx()" function 1023 */ 1024 void 1025 f_stridx(typval_T *argvars, typval_T *rettv) 1026 { 1027 char_u buf[NUMBUFLEN]; 1028 char_u *needle; 1029 char_u *haystack; 1030 char_u *save_haystack; 1031 char_u *pos; 1032 int start_idx; 1033 1034 needle = tv_get_string_chk(&argvars[1]); 1035 save_haystack = haystack = tv_get_string_buf_chk(&argvars[0], buf); 1036 rettv->vval.v_number = -1; 1037 if (needle == NULL || haystack == NULL) 1038 return; // type error; errmsg already given 1039 1040 if (argvars[2].v_type != VAR_UNKNOWN) 1041 { 1042 int error = FALSE; 1043 1044 start_idx = (int)tv_get_number_chk(&argvars[2], &error); 1045 if (error || start_idx >= (int)STRLEN(haystack)) 1046 return; 1047 if (start_idx >= 0) 1048 haystack += start_idx; 1049 } 1050 1051 pos = (char_u *)strstr((char *)haystack, (char *)needle); 1052 if (pos != NULL) 1053 rettv->vval.v_number = (varnumber_T)(pos - save_haystack); 1054 } 1055 1056 /* 1057 * "string()" function 1058 */ 1059 void 1060 f_string(typval_T *argvars, typval_T *rettv) 1061 { 1062 char_u *tofree; 1063 char_u numbuf[NUMBUFLEN]; 1064 1065 rettv->v_type = VAR_STRING; 1066 rettv->vval.v_string = tv2string(&argvars[0], &tofree, numbuf, 1067 get_copyID()); 1068 // Make a copy if we have a value but it's not in allocated memory. 1069 if (rettv->vval.v_string != NULL && tofree == NULL) 1070 rettv->vval.v_string = vim_strsave(rettv->vval.v_string); 1071 } 1072 1073 /* 1074 * "strlen()" function 1075 */ 1076 void 1077 f_strlen(typval_T *argvars, typval_T *rettv) 1078 { 1079 rettv->vval.v_number = (varnumber_T)(STRLEN( 1080 tv_get_string(&argvars[0]))); 1081 } 1082 1083 static void 1084 strchar_common(typval_T *argvars, typval_T *rettv, int skipcc) 1085 { 1086 char_u *s = tv_get_string(&argvars[0]); 1087 varnumber_T len = 0; 1088 int (*func_mb_ptr2char_adv)(char_u **pp); 1089 1090 func_mb_ptr2char_adv = skipcc ? mb_ptr2char_adv : mb_cptr2char_adv; 1091 while (*s != NUL) 1092 { 1093 func_mb_ptr2char_adv(&s); 1094 ++len; 1095 } 1096 rettv->vval.v_number = len; 1097 } 1098 1099 /* 1100 * "strcharlen()" function 1101 */ 1102 void 1103 f_strcharlen(typval_T *argvars, typval_T *rettv) 1104 { 1105 strchar_common(argvars, rettv, TRUE); 1106 } 1107 1108 /* 1109 * "strchars()" function 1110 */ 1111 void 1112 f_strchars(typval_T *argvars, typval_T *rettv) 1113 { 1114 varnumber_T skipcc = FALSE; 1115 1116 if (in_vim9script() 1117 && (check_for_string_arg(argvars, 0) == FAIL 1118 || check_for_opt_bool_arg(argvars, 1) == FAIL)) 1119 return; 1120 1121 if (argvars[1].v_type != VAR_UNKNOWN) 1122 skipcc = tv_get_bool(&argvars[1]); 1123 if (skipcc < 0 || skipcc > 1) 1124 semsg(_(e_using_number_as_bool_nr), skipcc); 1125 else 1126 strchar_common(argvars, rettv, skipcc); 1127 } 1128 1129 /* 1130 * "strdisplaywidth()" function 1131 */ 1132 void 1133 f_strdisplaywidth(typval_T *argvars, typval_T *rettv) 1134 { 1135 char_u *s; 1136 int col = 0; 1137 1138 rettv->vval.v_number = -1; 1139 1140 if (in_vim9script() 1141 && (check_for_string_arg(argvars, 0) == FAIL 1142 || check_for_opt_number_arg(argvars, 1) == FAIL)) 1143 return; 1144 1145 s = tv_get_string(&argvars[0]); 1146 if (argvars[1].v_type != VAR_UNKNOWN) 1147 col = (int)tv_get_number(&argvars[1]); 1148 1149 rettv->vval.v_number = (varnumber_T)(linetabsize_col(col, s) - col); 1150 } 1151 1152 /* 1153 * "strwidth()" function 1154 */ 1155 void 1156 f_strwidth(typval_T *argvars, typval_T *rettv) 1157 { 1158 char_u *s = tv_get_string_strict(&argvars[0]); 1159 1160 rettv->vval.v_number = (varnumber_T)(mb_string2cells(s, -1)); 1161 } 1162 1163 /* 1164 * "strcharpart()" function 1165 */ 1166 void 1167 f_strcharpart(typval_T *argvars, typval_T *rettv) 1168 { 1169 char_u *p; 1170 int nchar; 1171 int nbyte = 0; 1172 int charlen; 1173 int skipcc = FALSE; 1174 int len = 0; 1175 int slen; 1176 int error = FALSE; 1177 1178 if (in_vim9script() 1179 && (check_for_string_arg(argvars, 0) == FAIL 1180 || check_for_number_arg(argvars, 1) == FAIL 1181 || check_for_opt_number_arg(argvars, 2) == FAIL 1182 || (argvars[2].v_type != VAR_UNKNOWN 1183 && check_for_opt_bool_arg(argvars, 3) == FAIL))) 1184 return; 1185 1186 p = tv_get_string(&argvars[0]); 1187 slen = (int)STRLEN(p); 1188 1189 nchar = (int)tv_get_number_chk(&argvars[1], &error); 1190 if (!error) 1191 { 1192 if (argvars[2].v_type != VAR_UNKNOWN 1193 && argvars[3].v_type != VAR_UNKNOWN) 1194 { 1195 skipcc = tv_get_bool(&argvars[3]); 1196 if (skipcc < 0 || skipcc > 1) 1197 { 1198 semsg(_(e_using_number_as_bool_nr), skipcc); 1199 return; 1200 } 1201 } 1202 1203 if (nchar > 0) 1204 while (nchar > 0 && nbyte < slen) 1205 { 1206 if (skipcc) 1207 nbyte += mb_ptr2len(p + nbyte); 1208 else 1209 nbyte += MB_CPTR2LEN(p + nbyte); 1210 --nchar; 1211 } 1212 else 1213 nbyte = nchar; 1214 if (argvars[2].v_type != VAR_UNKNOWN) 1215 { 1216 charlen = (int)tv_get_number(&argvars[2]); 1217 while (charlen > 0 && nbyte + len < slen) 1218 { 1219 int off = nbyte + len; 1220 1221 if (off < 0) 1222 len += 1; 1223 else 1224 { 1225 if (skipcc) 1226 len += mb_ptr2len(p + off); 1227 else 1228 len += MB_CPTR2LEN(p + off); 1229 } 1230 --charlen; 1231 } 1232 } 1233 else 1234 len = slen - nbyte; // default: all bytes that are available. 1235 } 1236 1237 /* 1238 * Only return the overlap between the specified part and the actual 1239 * string. 1240 */ 1241 if (nbyte < 0) 1242 { 1243 len += nbyte; 1244 nbyte = 0; 1245 } 1246 else if (nbyte > slen) 1247 nbyte = slen; 1248 if (len < 0) 1249 len = 0; 1250 else if (nbyte + len > slen) 1251 len = slen - nbyte; 1252 1253 rettv->v_type = VAR_STRING; 1254 rettv->vval.v_string = vim_strnsave(p + nbyte, len); 1255 } 1256 1257 /* 1258 * "strpart()" function 1259 */ 1260 void 1261 f_strpart(typval_T *argvars, typval_T *rettv) 1262 { 1263 char_u *p; 1264 int n; 1265 int len; 1266 int slen; 1267 int error = FALSE; 1268 1269 if (in_vim9script() 1270 && (check_for_string_arg(argvars, 0) == FAIL 1271 || check_for_number_arg(argvars, 1) == FAIL 1272 || check_for_opt_number_arg(argvars, 2) == FAIL 1273 || (argvars[2].v_type != VAR_UNKNOWN 1274 && check_for_opt_bool_arg(argvars, 3) == FAIL))) 1275 return; 1276 1277 p = tv_get_string(&argvars[0]); 1278 slen = (int)STRLEN(p); 1279 1280 n = (int)tv_get_number_chk(&argvars[1], &error); 1281 if (error) 1282 len = 0; 1283 else if (argvars[2].v_type != VAR_UNKNOWN) 1284 len = (int)tv_get_number(&argvars[2]); 1285 else 1286 len = slen - n; // default len: all bytes that are available. 1287 1288 // Only return the overlap between the specified part and the actual 1289 // string. 1290 if (n < 0) 1291 { 1292 len += n; 1293 n = 0; 1294 } 1295 else if (n > slen) 1296 n = slen; 1297 if (len < 0) 1298 len = 0; 1299 else if (n + len > slen) 1300 len = slen - n; 1301 1302 if (argvars[2].v_type != VAR_UNKNOWN && argvars[3].v_type != VAR_UNKNOWN) 1303 { 1304 int off; 1305 1306 // length in characters 1307 for (off = n; off < slen && len > 0; --len) 1308 off += mb_ptr2len(p + off); 1309 len = off - n; 1310 } 1311 1312 rettv->v_type = VAR_STRING; 1313 rettv->vval.v_string = vim_strnsave(p + n, len); 1314 } 1315 1316 /* 1317 * "strridx()" function 1318 */ 1319 void 1320 f_strridx(typval_T *argvars, typval_T *rettv) 1321 { 1322 char_u buf[NUMBUFLEN]; 1323 char_u *needle; 1324 char_u *haystack; 1325 char_u *rest; 1326 char_u *lastmatch = NULL; 1327 int haystack_len, end_idx; 1328 1329 needle = tv_get_string_chk(&argvars[1]); 1330 haystack = tv_get_string_buf_chk(&argvars[0], buf); 1331 1332 rettv->vval.v_number = -1; 1333 if (needle == NULL || haystack == NULL) 1334 return; // type error; errmsg already given 1335 1336 haystack_len = (int)STRLEN(haystack); 1337 if (argvars[2].v_type != VAR_UNKNOWN) 1338 { 1339 // Third argument: upper limit for index 1340 end_idx = (int)tv_get_number_chk(&argvars[2], NULL); 1341 if (end_idx < 0) 1342 return; // can never find a match 1343 } 1344 else 1345 end_idx = haystack_len; 1346 1347 if (*needle == NUL) 1348 { 1349 // Empty string matches past the end. 1350 lastmatch = haystack + end_idx; 1351 } 1352 else 1353 { 1354 for (rest = haystack; *rest != '\0'; ++rest) 1355 { 1356 rest = (char_u *)strstr((char *)rest, (char *)needle); 1357 if (rest == NULL || rest > haystack + end_idx) 1358 break; 1359 lastmatch = rest; 1360 } 1361 } 1362 1363 if (lastmatch == NULL) 1364 rettv->vval.v_number = -1; 1365 else 1366 rettv->vval.v_number = (varnumber_T)(lastmatch - haystack); 1367 } 1368 1369 /* 1370 * "strtrans()" function 1371 */ 1372 void 1373 f_strtrans(typval_T *argvars, typval_T *rettv) 1374 { 1375 rettv->v_type = VAR_STRING; 1376 rettv->vval.v_string = transstr(tv_get_string(&argvars[0])); 1377 } 1378 1379 /* 1380 * "tolower(string)" function 1381 */ 1382 void 1383 f_tolower(typval_T *argvars, typval_T *rettv) 1384 { 1385 rettv->v_type = VAR_STRING; 1386 rettv->vval.v_string = strlow_save(tv_get_string(&argvars[0])); 1387 } 1388 1389 /* 1390 * "toupper(string)" function 1391 */ 1392 void 1393 f_toupper(typval_T *argvars, typval_T *rettv) 1394 { 1395 rettv->v_type = VAR_STRING; 1396 rettv->vval.v_string = strup_save(tv_get_string(&argvars[0])); 1397 } 1398 1399 /* 1400 * "tr(string, fromstr, tostr)" function 1401 */ 1402 void 1403 f_tr(typval_T *argvars, typval_T *rettv) 1404 { 1405 char_u *in_str; 1406 char_u *fromstr; 1407 char_u *tostr; 1408 char_u *p; 1409 int inlen; 1410 int fromlen; 1411 int tolen; 1412 int idx; 1413 char_u *cpstr; 1414 int cplen; 1415 int first = TRUE; 1416 char_u buf[NUMBUFLEN]; 1417 char_u buf2[NUMBUFLEN]; 1418 garray_T ga; 1419 1420 in_str = tv_get_string(&argvars[0]); 1421 fromstr = tv_get_string_buf_chk(&argvars[1], buf); 1422 tostr = tv_get_string_buf_chk(&argvars[2], buf2); 1423 1424 // Default return value: empty string. 1425 rettv->v_type = VAR_STRING; 1426 rettv->vval.v_string = NULL; 1427 if (fromstr == NULL || tostr == NULL) 1428 return; // type error; errmsg already given 1429 ga_init2(&ga, (int)sizeof(char), 80); 1430 1431 if (!has_mbyte) 1432 // not multi-byte: fromstr and tostr must be the same length 1433 if (STRLEN(fromstr) != STRLEN(tostr)) 1434 { 1435 error: 1436 semsg(_(e_invarg2), fromstr); 1437 ga_clear(&ga); 1438 return; 1439 } 1440 1441 // fromstr and tostr have to contain the same number of chars 1442 while (*in_str != NUL) 1443 { 1444 if (has_mbyte) 1445 { 1446 inlen = (*mb_ptr2len)(in_str); 1447 cpstr = in_str; 1448 cplen = inlen; 1449 idx = 0; 1450 for (p = fromstr; *p != NUL; p += fromlen) 1451 { 1452 fromlen = (*mb_ptr2len)(p); 1453 if (fromlen == inlen && STRNCMP(in_str, p, inlen) == 0) 1454 { 1455 for (p = tostr; *p != NUL; p += tolen) 1456 { 1457 tolen = (*mb_ptr2len)(p); 1458 if (idx-- == 0) 1459 { 1460 cplen = tolen; 1461 cpstr = p; 1462 break; 1463 } 1464 } 1465 if (*p == NUL) // tostr is shorter than fromstr 1466 goto error; 1467 break; 1468 } 1469 ++idx; 1470 } 1471 1472 if (first && cpstr == in_str) 1473 { 1474 // Check that fromstr and tostr have the same number of 1475 // (multi-byte) characters. Done only once when a character 1476 // of in_str doesn't appear in fromstr. 1477 first = FALSE; 1478 for (p = tostr; *p != NUL; p += tolen) 1479 { 1480 tolen = (*mb_ptr2len)(p); 1481 --idx; 1482 } 1483 if (idx != 0) 1484 goto error; 1485 } 1486 1487 (void)ga_grow(&ga, cplen); 1488 mch_memmove((char *)ga.ga_data + ga.ga_len, cpstr, (size_t)cplen); 1489 ga.ga_len += cplen; 1490 1491 in_str += inlen; 1492 } 1493 else 1494 { 1495 // When not using multi-byte chars we can do it faster. 1496 p = vim_strchr(fromstr, *in_str); 1497 if (p != NULL) 1498 ga_append(&ga, tostr[p - fromstr]); 1499 else 1500 ga_append(&ga, *in_str); 1501 ++in_str; 1502 } 1503 } 1504 1505 // add a terminating NUL 1506 (void)ga_grow(&ga, 1); 1507 ga_append(&ga, NUL); 1508 1509 rettv->vval.v_string = ga.ga_data; 1510 } 1511 1512 /* 1513 * "trim({expr})" function 1514 */ 1515 void 1516 f_trim(typval_T *argvars, typval_T *rettv) 1517 { 1518 char_u buf1[NUMBUFLEN]; 1519 char_u buf2[NUMBUFLEN]; 1520 char_u *head = tv_get_string_buf_chk(&argvars[0], buf1); 1521 char_u *mask = NULL; 1522 char_u *tail; 1523 char_u *prev; 1524 char_u *p; 1525 int c1; 1526 int dir = 0; 1527 1528 rettv->v_type = VAR_STRING; 1529 rettv->vval.v_string = NULL; 1530 if (head == NULL) 1531 return; 1532 1533 if (argvars[1].v_type != VAR_UNKNOWN && argvars[1].v_type != VAR_STRING) 1534 { 1535 semsg(_(e_invarg2), tv_get_string(&argvars[1])); 1536 return; 1537 } 1538 1539 if (argvars[1].v_type == VAR_STRING) 1540 { 1541 mask = tv_get_string_buf_chk(&argvars[1], buf2); 1542 1543 if (argvars[2].v_type != VAR_UNKNOWN) 1544 { 1545 int error = 0; 1546 1547 // leading or trailing characters to trim 1548 dir = (int)tv_get_number_chk(&argvars[2], &error); 1549 if (error) 1550 return; 1551 if (dir < 0 || dir > 2) 1552 { 1553 semsg(_(e_invarg2), tv_get_string(&argvars[2])); 1554 return; 1555 } 1556 } 1557 } 1558 1559 if (dir == 0 || dir == 1) 1560 { 1561 // Trim leading characters 1562 while (*head != NUL) 1563 { 1564 c1 = PTR2CHAR(head); 1565 if (mask == NULL) 1566 { 1567 if (c1 > ' ' && c1 != 0xa0) 1568 break; 1569 } 1570 else 1571 { 1572 for (p = mask; *p != NUL; MB_PTR_ADV(p)) 1573 if (c1 == PTR2CHAR(p)) 1574 break; 1575 if (*p == NUL) 1576 break; 1577 } 1578 MB_PTR_ADV(head); 1579 } 1580 } 1581 1582 tail = head + STRLEN(head); 1583 if (dir == 0 || dir == 2) 1584 { 1585 // Trim trailing characters 1586 for (; tail > head; tail = prev) 1587 { 1588 prev = tail; 1589 MB_PTR_BACK(head, prev); 1590 c1 = PTR2CHAR(prev); 1591 if (mask == NULL) 1592 { 1593 if (c1 > ' ' && c1 != 0xa0) 1594 break; 1595 } 1596 else 1597 { 1598 for (p = mask; *p != NUL; MB_PTR_ADV(p)) 1599 if (c1 == PTR2CHAR(p)) 1600 break; 1601 if (*p == NUL) 1602 break; 1603 } 1604 } 1605 } 1606 rettv->vval.v_string = vim_strnsave(head, tail - head); 1607 } 1608 1609 #endif 1610