1 /* vi:set ts=8 sts=4 sw=4 noet: 2 * 3 * VIM - Vi IMproved by Bram Moolenaar 4 * 5 * Do ":help uganda" in Vim to read copying and usage conditions. 6 * Do ":help credits" in Vim to see a list of people who contributed. 7 * See README.txt for an overview of the Vim source code. 8 */ 9 10 /* 11 * strings.c: string manipulation functions 12 */ 13 14 #include "vim.h" 15 16 /* 17 * Copy "string" into newly allocated memory. 18 */ 19 char_u * 20 vim_strsave(char_u *string) 21 { 22 char_u *p; 23 size_t len; 24 25 len = STRLEN(string) + 1; 26 p = alloc(len); 27 if (p != NULL) 28 mch_memmove(p, string, len); 29 return p; 30 } 31 32 /* 33 * Copy up to "len" bytes of "string" into newly allocated memory and 34 * terminate with a NUL. 35 * The allocated memory always has size "len + 1", also when "string" is 36 * shorter. 37 */ 38 char_u * 39 vim_strnsave(char_u *string, size_t len) 40 { 41 char_u *p; 42 43 p = alloc(len + 1); 44 if (p != NULL) 45 { 46 STRNCPY(p, string, len); 47 p[len] = NUL; 48 } 49 return p; 50 } 51 52 /* 53 * Same as vim_strsave(), but any characters found in esc_chars are preceded 54 * by a backslash. 55 */ 56 char_u * 57 vim_strsave_escaped(char_u *string, char_u *esc_chars) 58 { 59 return vim_strsave_escaped_ext(string, esc_chars, '\\', FALSE); 60 } 61 62 /* 63 * Same as vim_strsave_escaped(), but when "bsl" is TRUE also escape 64 * characters where rem_backslash() would remove the backslash. 65 * Escape the characters with "cc". 66 */ 67 char_u * 68 vim_strsave_escaped_ext( 69 char_u *string, 70 char_u *esc_chars, 71 int cc, 72 int bsl) 73 { 74 char_u *p; 75 char_u *p2; 76 char_u *escaped_string; 77 unsigned length; 78 int l; 79 80 /* 81 * First count the number of backslashes required. 82 * Then allocate the memory and insert them. 83 */ 84 length = 1; // count the trailing NUL 85 for (p = string; *p; p++) 86 { 87 if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1) 88 { 89 length += l; // count a multibyte char 90 p += l - 1; 91 continue; 92 } 93 if (vim_strchr(esc_chars, *p) != NULL || (bsl && rem_backslash(p))) 94 ++length; // count a backslash 95 ++length; // count an ordinary char 96 } 97 escaped_string = alloc(length); 98 if (escaped_string != NULL) 99 { 100 p2 = escaped_string; 101 for (p = string; *p; p++) 102 { 103 if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1) 104 { 105 mch_memmove(p2, p, (size_t)l); 106 p2 += l; 107 p += l - 1; // skip multibyte char 108 continue; 109 } 110 if (vim_strchr(esc_chars, *p) != NULL || (bsl && rem_backslash(p))) 111 *p2++ = cc; 112 *p2++ = *p; 113 } 114 *p2 = NUL; 115 } 116 return escaped_string; 117 } 118 119 /* 120 * Return TRUE when 'shell' has "csh" in the tail. 121 */ 122 int 123 csh_like_shell(void) 124 { 125 return (strstr((char *)gettail(p_sh), "csh") != NULL); 126 } 127 128 /* 129 * Escape "string" for use as a shell argument with system(). 130 * This uses single quotes, except when we know we need to use double quotes 131 * (MS-DOS and MS-Windows not using PowerShell and without 'shellslash' set). 132 * PowerShell also uses a novel escaping for enclosed single quotes - double 133 * them up. 134 * Escape a newline, depending on the 'shell' option. 135 * When "do_special" is TRUE also replace "!", "%", "#" and things starting 136 * with "<" like "<cfile>". 137 * When "do_newline" is FALSE do not escape newline unless it is csh shell. 138 * Returns the result in allocated memory, NULL if we have run out. 139 */ 140 char_u * 141 vim_strsave_shellescape(char_u *string, int do_special, int do_newline) 142 { 143 unsigned length; 144 char_u *p; 145 char_u *d; 146 char_u *escaped_string; 147 int l; 148 int csh_like; 149 char_u *shname; 150 int powershell; 151 # ifdef MSWIN 152 int double_quotes; 153 # endif 154 155 // Only csh and similar shells expand '!' within single quotes. For sh and 156 // the like we must not put a backslash before it, it will be taken 157 // literally. If do_special is set the '!' will be escaped twice. 158 // Csh also needs to have "\n" escaped twice when do_special is set. 159 csh_like = csh_like_shell(); 160 161 // PowerShell uses it's own version for quoting single quotes 162 shname = gettail(p_sh); 163 powershell = strstr((char *)shname, "pwsh") != NULL; 164 # ifdef MSWIN 165 powershell = powershell || strstr((char *)shname, "powershell") != NULL; 166 // PowerShell only accepts single quotes so override shellslash. 167 double_quotes = !powershell && !p_ssl; 168 # endif 169 170 // First count the number of extra bytes required. 171 length = (unsigned)STRLEN(string) + 3; // two quotes and a trailing NUL 172 for (p = string; *p != NUL; MB_PTR_ADV(p)) 173 { 174 # ifdef MSWIN 175 if (double_quotes) 176 { 177 if (*p == '"') 178 ++length; // " -> "" 179 } 180 else 181 # endif 182 if (*p == '\'') 183 { 184 if (powershell) 185 length +=2; // ' => '' 186 else 187 length += 3; // ' => '\'' 188 } 189 if ((*p == '\n' && (csh_like || do_newline)) 190 || (*p == '!' && (csh_like || do_special))) 191 { 192 ++length; // insert backslash 193 if (csh_like && do_special) 194 ++length; // insert backslash 195 } 196 if (do_special && find_cmdline_var(p, &l) >= 0) 197 { 198 ++length; // insert backslash 199 p += l - 1; 200 } 201 } 202 203 // Allocate memory for the result and fill it. 204 escaped_string = alloc(length); 205 if (escaped_string != NULL) 206 { 207 d = escaped_string; 208 209 // add opening quote 210 # ifdef MSWIN 211 if (double_quotes) 212 *d++ = '"'; 213 else 214 # endif 215 *d++ = '\''; 216 217 for (p = string; *p != NUL; ) 218 { 219 # ifdef MSWIN 220 if (double_quotes) 221 { 222 if (*p == '"') 223 { 224 *d++ = '"'; 225 *d++ = '"'; 226 ++p; 227 continue; 228 } 229 } 230 else 231 # endif 232 if (*p == '\'') 233 { 234 if (powershell) 235 { 236 *d++ = '\''; 237 *d++ = '\''; 238 } 239 else 240 { 241 *d++ = '\''; 242 *d++ = '\\'; 243 *d++ = '\''; 244 *d++ = '\''; 245 } 246 ++p; 247 continue; 248 } 249 if ((*p == '\n' && (csh_like || do_newline)) 250 || (*p == '!' && (csh_like || do_special))) 251 { 252 *d++ = '\\'; 253 if (csh_like && do_special) 254 *d++ = '\\'; 255 *d++ = *p++; 256 continue; 257 } 258 if (do_special && find_cmdline_var(p, &l) >= 0) 259 { 260 *d++ = '\\'; // insert backslash 261 while (--l >= 0) // copy the var 262 *d++ = *p++; 263 continue; 264 } 265 266 MB_COPY_CHAR(p, d); 267 } 268 269 // add terminating quote and finish with a NUL 270 # ifdef MSWIN 271 if (double_quotes) 272 *d++ = '"'; 273 else 274 # endif 275 *d++ = '\''; 276 *d = NUL; 277 } 278 279 return escaped_string; 280 } 281 282 /* 283 * Like vim_strsave(), but make all characters uppercase. 284 * This uses ASCII lower-to-upper case translation, language independent. 285 */ 286 char_u * 287 vim_strsave_up(char_u *string) 288 { 289 char_u *p1; 290 291 p1 = vim_strsave(string); 292 vim_strup(p1); 293 return p1; 294 } 295 296 /* 297 * Like vim_strnsave(), but make all characters uppercase. 298 * This uses ASCII lower-to-upper case translation, language independent. 299 */ 300 char_u * 301 vim_strnsave_up(char_u *string, size_t len) 302 { 303 char_u *p1; 304 305 p1 = vim_strnsave(string, len); 306 vim_strup(p1); 307 return p1; 308 } 309 310 /* 311 * ASCII lower-to-upper case translation, language independent. 312 */ 313 void 314 vim_strup( 315 char_u *p) 316 { 317 char_u *p2; 318 int c; 319 320 if (p != NULL) 321 { 322 p2 = p; 323 while ((c = *p2) != NUL) 324 #ifdef EBCDIC 325 *p2++ = isalpha(c) ? toupper(c) : c; 326 #else 327 *p2++ = (c < 'a' || c > 'z') ? c : (c - 0x20); 328 #endif 329 } 330 } 331 332 #if defined(FEAT_EVAL) || defined(FEAT_SPELL) || defined(PROTO) 333 /* 334 * Make string "s" all upper-case and return it in allocated memory. 335 * Handles multi-byte characters as well as possible. 336 * Returns NULL when out of memory. 337 */ 338 static char_u * 339 strup_save(char_u *orig) 340 { 341 char_u *p; 342 char_u *res; 343 344 res = p = vim_strsave(orig); 345 346 if (res != NULL) 347 while (*p != NUL) 348 { 349 int l; 350 351 if (enc_utf8) 352 { 353 int c, uc; 354 int newl; 355 char_u *s; 356 357 c = utf_ptr2char(p); 358 l = utf_ptr2len(p); 359 if (c == 0) 360 { 361 // overlong sequence, use only the first byte 362 c = *p; 363 l = 1; 364 } 365 uc = utf_toupper(c); 366 367 // Reallocate string when byte count changes. This is rare, 368 // thus it's OK to do another malloc()/free(). 369 newl = utf_char2len(uc); 370 if (newl != l) 371 { 372 s = alloc(STRLEN(res) + 1 + newl - l); 373 if (s == NULL) 374 { 375 vim_free(res); 376 return NULL; 377 } 378 mch_memmove(s, res, p - res); 379 STRCPY(s + (p - res) + newl, p + l); 380 p = s + (p - res); 381 vim_free(res); 382 res = s; 383 } 384 385 utf_char2bytes(uc, p); 386 p += newl; 387 } 388 else if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1) 389 p += l; // skip multi-byte character 390 else 391 { 392 *p = TOUPPER_LOC(*p); // note that toupper() can be a macro 393 p++; 394 } 395 } 396 397 return res; 398 } 399 400 /* 401 * Make string "s" all lower-case and return it in allocated memory. 402 * Handles multi-byte characters as well as possible. 403 * Returns NULL when out of memory. 404 */ 405 char_u * 406 strlow_save(char_u *orig) 407 { 408 char_u *p; 409 char_u *res; 410 411 res = p = vim_strsave(orig); 412 413 if (res != NULL) 414 while (*p != NUL) 415 { 416 int l; 417 418 if (enc_utf8) 419 { 420 int c, lc; 421 int newl; 422 char_u *s; 423 424 c = utf_ptr2char(p); 425 l = utf_ptr2len(p); 426 if (c == 0) 427 { 428 // overlong sequence, use only the first byte 429 c = *p; 430 l = 1; 431 } 432 lc = utf_tolower(c); 433 434 // Reallocate string when byte count changes. This is rare, 435 // thus it's OK to do another malloc()/free(). 436 newl = utf_char2len(lc); 437 if (newl != l) 438 { 439 s = alloc(STRLEN(res) + 1 + newl - l); 440 if (s == NULL) 441 { 442 vim_free(res); 443 return NULL; 444 } 445 mch_memmove(s, res, p - res); 446 STRCPY(s + (p - res) + newl, p + l); 447 p = s + (p - res); 448 vim_free(res); 449 res = s; 450 } 451 452 utf_char2bytes(lc, p); 453 p += newl; 454 } 455 else if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1) 456 p += l; // skip multi-byte character 457 else 458 { 459 *p = TOLOWER_LOC(*p); // note that tolower() can be a macro 460 p++; 461 } 462 } 463 464 return res; 465 } 466 #endif 467 468 /* 469 * delete spaces at the end of a string 470 */ 471 void 472 del_trailing_spaces(char_u *ptr) 473 { 474 char_u *q; 475 476 q = ptr + STRLEN(ptr); 477 while (--q > ptr && VIM_ISWHITE(q[0]) && q[-1] != '\\' && q[-1] != Ctrl_V) 478 *q = NUL; 479 } 480 481 /* 482 * Like strncpy(), but always terminate the result with one NUL. 483 * "to" must be "len + 1" long! 484 */ 485 void 486 vim_strncpy(char_u *to, char_u *from, size_t len) 487 { 488 STRNCPY(to, from, len); 489 to[len] = NUL; 490 } 491 492 /* 493 * Like strcat(), but make sure the result fits in "tosize" bytes and is 494 * always NUL terminated. "from" and "to" may overlap. 495 */ 496 void 497 vim_strcat(char_u *to, char_u *from, size_t tosize) 498 { 499 size_t tolen = STRLEN(to); 500 size_t fromlen = STRLEN(from); 501 502 if (tolen + fromlen + 1 > tosize) 503 { 504 mch_memmove(to + tolen, from, tosize - tolen - 1); 505 to[tosize - 1] = NUL; 506 } 507 else 508 mch_memmove(to + tolen, from, fromlen + 1); 509 } 510 511 #if (!defined(HAVE_STRCASECMP) && !defined(HAVE_STRICMP)) || defined(PROTO) 512 /* 513 * Compare two strings, ignoring case, using current locale. 514 * Doesn't work for multi-byte characters. 515 * return 0 for match, < 0 for smaller, > 0 for bigger 516 */ 517 int 518 vim_stricmp(char *s1, char *s2) 519 { 520 int i; 521 522 for (;;) 523 { 524 i = (int)TOLOWER_LOC(*s1) - (int)TOLOWER_LOC(*s2); 525 if (i != 0) 526 return i; // this character different 527 if (*s1 == NUL) 528 break; // strings match until NUL 529 ++s1; 530 ++s2; 531 } 532 return 0; // strings match 533 } 534 #endif 535 536 #if (!defined(HAVE_STRNCASECMP) && !defined(HAVE_STRNICMP)) || defined(PROTO) 537 /* 538 * Compare two strings, for length "len", ignoring case, using current locale. 539 * Doesn't work for multi-byte characters. 540 * return 0 for match, < 0 for smaller, > 0 for bigger 541 */ 542 int 543 vim_strnicmp(char *s1, char *s2, size_t len) 544 { 545 int i; 546 547 while (len > 0) 548 { 549 i = (int)TOLOWER_LOC(*s1) - (int)TOLOWER_LOC(*s2); 550 if (i != 0) 551 return i; // this character different 552 if (*s1 == NUL) 553 break; // strings match until NUL 554 ++s1; 555 ++s2; 556 --len; 557 } 558 return 0; // strings match 559 } 560 #endif 561 562 /* 563 * Search for first occurrence of "c" in "string". 564 * Version of strchr() that handles unsigned char strings with characters from 565 * 128 to 255 correctly. It also doesn't return a pointer to the NUL at the 566 * end of the string. 567 */ 568 char_u * 569 vim_strchr(char_u *string, int c) 570 { 571 char_u *p; 572 int b; 573 574 p = string; 575 if (enc_utf8 && c >= 0x80) 576 { 577 while (*p != NUL) 578 { 579 int l = utfc_ptr2len(p); 580 581 // Avoid matching an illegal byte here. 582 if (utf_ptr2char(p) == c && l > 1) 583 return p; 584 p += l; 585 } 586 return NULL; 587 } 588 if (enc_dbcs != 0 && c > 255) 589 { 590 int n2 = c & 0xff; 591 592 c = ((unsigned)c >> 8) & 0xff; 593 while ((b = *p) != NUL) 594 { 595 if (b == c && p[1] == n2) 596 return p; 597 p += (*mb_ptr2len)(p); 598 } 599 return NULL; 600 } 601 if (has_mbyte) 602 { 603 while ((b = *p) != NUL) 604 { 605 if (b == c) 606 return p; 607 p += (*mb_ptr2len)(p); 608 } 609 return NULL; 610 } 611 while ((b = *p) != NUL) 612 { 613 if (b == c) 614 return p; 615 ++p; 616 } 617 return NULL; 618 } 619 620 /* 621 * Version of strchr() that only works for bytes and handles unsigned char 622 * strings with characters above 128 correctly. It also doesn't return a 623 * pointer to the NUL at the end of the string. 624 */ 625 char_u * 626 vim_strbyte(char_u *string, int c) 627 { 628 char_u *p = string; 629 630 while (*p != NUL) 631 { 632 if (*p == c) 633 return p; 634 ++p; 635 } 636 return NULL; 637 } 638 639 /* 640 * Search for last occurrence of "c" in "string". 641 * Version of strrchr() that handles unsigned char strings with characters from 642 * 128 to 255 correctly. It also doesn't return a pointer to the NUL at the 643 * end of the string. 644 * Return NULL if not found. 645 * Does not handle multi-byte char for "c"! 646 */ 647 char_u * 648 vim_strrchr(char_u *string, int c) 649 { 650 char_u *retval = NULL; 651 char_u *p = string; 652 653 while (*p) 654 { 655 if (*p == c) 656 retval = p; 657 MB_PTR_ADV(p); 658 } 659 return retval; 660 } 661 662 /* 663 * Vim's version of strpbrk(), in case it's missing. 664 * Don't generate a prototype for this, causes problems when it's not used. 665 */ 666 #ifndef PROTO 667 # ifndef HAVE_STRPBRK 668 # ifdef vim_strpbrk 669 # undef vim_strpbrk 670 # endif 671 char_u * 672 vim_strpbrk(char_u *s, char_u *charset) 673 { 674 while (*s) 675 { 676 if (vim_strchr(charset, *s) != NULL) 677 return s; 678 MB_PTR_ADV(s); 679 } 680 return NULL; 681 } 682 # endif 683 #endif 684 685 /* 686 * Sort an array of strings. 687 */ 688 static int sort_compare(const void *s1, const void *s2); 689 690 static int 691 sort_compare(const void *s1, const void *s2) 692 { 693 return STRCMP(*(char **)s1, *(char **)s2); 694 } 695 696 void 697 sort_strings( 698 char_u **files, 699 int count) 700 { 701 qsort((void *)files, (size_t)count, sizeof(char_u *), sort_compare); 702 } 703 704 #if defined(FEAT_QUICKFIX) || defined(FEAT_SPELL) || defined(PROTO) 705 /* 706 * Return TRUE if string "s" contains a non-ASCII character (128 or higher). 707 * When "s" is NULL FALSE is returned. 708 */ 709 int 710 has_non_ascii(char_u *s) 711 { 712 char_u *p; 713 714 if (s != NULL) 715 for (p = s; *p != NUL; ++p) 716 if (*p >= 128) 717 return TRUE; 718 return FALSE; 719 } 720 #endif 721 722 /* 723 * Concatenate two strings and return the result in allocated memory. 724 * Returns NULL when out of memory. 725 */ 726 char_u * 727 concat_str(char_u *str1, char_u *str2) 728 { 729 char_u *dest; 730 size_t l = str1 == NULL ? 0 : STRLEN(str1); 731 732 dest = alloc(l + (str2 == NULL ? 0 : STRLEN(str2)) + 1L); 733 if (dest != NULL) 734 { 735 if (str1 == NULL) 736 *dest = NUL; 737 else 738 STRCPY(dest, str1); 739 if (str2 != NULL) 740 STRCPY(dest + l, str2); 741 } 742 return dest; 743 } 744 745 #if defined(FEAT_EVAL) || defined(PROTO) 746 747 /* 748 * Return string "str" in ' quotes, doubling ' characters. 749 * If "str" is NULL an empty string is assumed. 750 * If "function" is TRUE make it function('string'). 751 */ 752 char_u * 753 string_quote(char_u *str, int function) 754 { 755 unsigned len; 756 char_u *p, *r, *s; 757 758 len = (function ? 13 : 3); 759 if (str != NULL) 760 { 761 len += (unsigned)STRLEN(str); 762 for (p = str; *p != NUL; MB_PTR_ADV(p)) 763 if (*p == '\'') 764 ++len; 765 } 766 s = r = alloc(len); 767 if (r != NULL) 768 { 769 if (function) 770 { 771 STRCPY(r, "function('"); 772 r += 10; 773 } 774 else 775 *r++ = '\''; 776 if (str != NULL) 777 for (p = str; *p != NUL; ) 778 { 779 if (*p == '\'') 780 *r++ = '\''; 781 MB_COPY_CHAR(p, r); 782 } 783 *r++ = '\''; 784 if (function) 785 *r++ = ')'; 786 *r++ = NUL; 787 } 788 return s; 789 } 790 791 static void 792 byteidx(typval_T *argvars, typval_T *rettv, int comp UNUSED) 793 { 794 char_u *t; 795 char_u *str; 796 varnumber_T idx; 797 798 rettv->vval.v_number = -1; 799 800 if (in_vim9script() 801 && (check_for_string_arg(argvars, 0) == FAIL 802 || check_for_number_arg(argvars, 1) == FAIL)) 803 return; 804 805 str = tv_get_string_chk(&argvars[0]); 806 idx = tv_get_number_chk(&argvars[1], NULL); 807 if (str == NULL || idx < 0) 808 return; 809 810 t = str; 811 for ( ; idx > 0; idx--) 812 { 813 if (*t == NUL) // EOL reached 814 return; 815 if (enc_utf8 && comp) 816 t += utf_ptr2len(t); 817 else 818 t += (*mb_ptr2len)(t); 819 } 820 rettv->vval.v_number = (varnumber_T)(t - str); 821 } 822 823 /* 824 * "byteidx()" function 825 */ 826 void 827 f_byteidx(typval_T *argvars, typval_T *rettv) 828 { 829 byteidx(argvars, rettv, FALSE); 830 } 831 832 /* 833 * "byteidxcomp()" function 834 */ 835 void 836 f_byteidxcomp(typval_T *argvars, typval_T *rettv) 837 { 838 byteidx(argvars, rettv, TRUE); 839 } 840 841 /* 842 * "charidx()" function 843 */ 844 void 845 f_charidx(typval_T *argvars, typval_T *rettv) 846 { 847 char_u *str; 848 varnumber_T idx; 849 varnumber_T countcc = FALSE; 850 char_u *p; 851 int len; 852 int (*ptr2len)(char_u *); 853 854 rettv->vval.v_number = -1; 855 856 if (in_vim9script() 857 && (check_for_string_arg(argvars, 0) == FAIL 858 || check_for_number_arg(argvars, 1) == FAIL 859 || check_for_opt_bool_arg(argvars, 2) == FAIL)) 860 return; 861 862 if (argvars[0].v_type != VAR_STRING || argvars[1].v_type != VAR_NUMBER 863 || (argvars[2].v_type != VAR_UNKNOWN 864 && argvars[2].v_type != VAR_NUMBER 865 && argvars[2].v_type != VAR_BOOL)) 866 { 867 emsg(_(e_invarg)); 868 return; 869 } 870 871 str = tv_get_string_chk(&argvars[0]); 872 idx = tv_get_number_chk(&argvars[1], NULL); 873 if (str == NULL || idx < 0) 874 return; 875 876 if (argvars[2].v_type != VAR_UNKNOWN) 877 countcc = tv_get_bool(&argvars[2]); 878 if (countcc < 0 || countcc > 1) 879 { 880 semsg(_(e_using_number_as_bool_nr), countcc); 881 return; 882 } 883 884 if (enc_utf8 && countcc) 885 ptr2len = utf_ptr2len; 886 else 887 ptr2len = mb_ptr2len; 888 889 for (p = str, len = 0; p <= str + idx; len++) 890 { 891 if (*p == NUL) 892 return; 893 p += ptr2len(p); 894 } 895 896 rettv->vval.v_number = len > 0 ? len - 1 : 0; 897 } 898 899 /* 900 * "str2list()" function 901 */ 902 void 903 f_str2list(typval_T *argvars, typval_T *rettv) 904 { 905 char_u *p; 906 int utf8 = FALSE; 907 908 if (rettv_list_alloc(rettv) == FAIL) 909 return; 910 911 if (in_vim9script() 912 && (check_for_string_arg(argvars, 0) == FAIL 913 || check_for_opt_bool_arg(argvars, 1) == FAIL)) 914 return; 915 916 if (argvars[1].v_type != VAR_UNKNOWN) 917 utf8 = (int)tv_get_bool_chk(&argvars[1], NULL); 918 919 p = tv_get_string(&argvars[0]); 920 921 if (has_mbyte || utf8) 922 { 923 int (*ptr2len)(char_u *); 924 int (*ptr2char)(char_u *); 925 926 if (utf8 || enc_utf8) 927 { 928 ptr2len = utf_ptr2len; 929 ptr2char = utf_ptr2char; 930 } 931 else 932 { 933 ptr2len = mb_ptr2len; 934 ptr2char = mb_ptr2char; 935 } 936 937 for ( ; *p != NUL; p += (*ptr2len)(p)) 938 list_append_number(rettv->vval.v_list, (*ptr2char)(p)); 939 } 940 else 941 for ( ; *p != NUL; ++p) 942 list_append_number(rettv->vval.v_list, *p); 943 } 944 945 /* 946 * "str2nr()" function 947 */ 948 void 949 f_str2nr(typval_T *argvars, typval_T *rettv) 950 { 951 int base = 10; 952 char_u *p; 953 varnumber_T n; 954 int what = 0; 955 int isneg; 956 957 if (in_vim9script() 958 && (check_for_string_arg(argvars, 0) == FAIL 959 || check_for_opt_number_arg(argvars, 1) == FAIL 960 || (argvars[1].v_type != VAR_UNKNOWN 961 && check_for_opt_bool_arg(argvars, 2) == FAIL))) 962 return; 963 964 if (argvars[1].v_type != VAR_UNKNOWN) 965 { 966 base = (int)tv_get_number(&argvars[1]); 967 if (base != 2 && base != 8 && base != 10 && base != 16) 968 { 969 emsg(_(e_invarg)); 970 return; 971 } 972 if (argvars[2].v_type != VAR_UNKNOWN && tv_get_bool(&argvars[2])) 973 what |= STR2NR_QUOTE; 974 } 975 976 p = skipwhite(tv_get_string_strict(&argvars[0])); 977 isneg = (*p == '-'); 978 if (*p == '+' || *p == '-') 979 p = skipwhite(p + 1); 980 switch (base) 981 { 982 case 2: what |= STR2NR_BIN + STR2NR_FORCE; break; 983 case 8: what |= STR2NR_OCT + STR2NR_OOCT + STR2NR_FORCE; break; 984 case 16: what |= STR2NR_HEX + STR2NR_FORCE; break; 985 } 986 vim_str2nr(p, NULL, NULL, what, &n, NULL, 0, FALSE); 987 // Text after the number is silently ignored. 988 if (isneg) 989 rettv->vval.v_number = -n; 990 else 991 rettv->vval.v_number = n; 992 993 } 994 995 /* 996 * "strgetchar()" function 997 */ 998 void 999 f_strgetchar(typval_T *argvars, typval_T *rettv) 1000 { 1001 char_u *str; 1002 int len; 1003 int error = FALSE; 1004 int charidx; 1005 int byteidx = 0; 1006 1007 rettv->vval.v_number = -1; 1008 1009 if (in_vim9script() 1010 && (check_for_string_arg(argvars, 0) == FAIL 1011 || check_for_number_arg(argvars, 1) == FAIL)) 1012 return; 1013 1014 str = tv_get_string_chk(&argvars[0]); 1015 if (str == NULL) 1016 return; 1017 len = (int)STRLEN(str); 1018 charidx = (int)tv_get_number_chk(&argvars[1], &error); 1019 if (error) 1020 return; 1021 1022 while (charidx >= 0 && byteidx < len) 1023 { 1024 if (charidx == 0) 1025 { 1026 rettv->vval.v_number = mb_ptr2char(str + byteidx); 1027 break; 1028 } 1029 --charidx; 1030 byteidx += MB_CPTR2LEN(str + byteidx); 1031 } 1032 } 1033 1034 /* 1035 * "stridx()" function 1036 */ 1037 void 1038 f_stridx(typval_T *argvars, typval_T *rettv) 1039 { 1040 char_u buf[NUMBUFLEN]; 1041 char_u *needle; 1042 char_u *haystack; 1043 char_u *save_haystack; 1044 char_u *pos; 1045 int start_idx; 1046 1047 if (in_vim9script() 1048 && (check_for_string_arg(argvars, 0) == FAIL 1049 || check_for_string_arg(argvars, 1) == FAIL 1050 || check_for_opt_number_arg(argvars, 2) == FAIL)) 1051 return; 1052 1053 needle = tv_get_string_chk(&argvars[1]); 1054 save_haystack = haystack = tv_get_string_buf_chk(&argvars[0], buf); 1055 rettv->vval.v_number = -1; 1056 if (needle == NULL || haystack == NULL) 1057 return; // type error; errmsg already given 1058 1059 if (argvars[2].v_type != VAR_UNKNOWN) 1060 { 1061 int error = FALSE; 1062 1063 start_idx = (int)tv_get_number_chk(&argvars[2], &error); 1064 if (error || start_idx >= (int)STRLEN(haystack)) 1065 return; 1066 if (start_idx >= 0) 1067 haystack += start_idx; 1068 } 1069 1070 pos = (char_u *)strstr((char *)haystack, (char *)needle); 1071 if (pos != NULL) 1072 rettv->vval.v_number = (varnumber_T)(pos - save_haystack); 1073 } 1074 1075 /* 1076 * "string()" function 1077 */ 1078 void 1079 f_string(typval_T *argvars, typval_T *rettv) 1080 { 1081 char_u *tofree; 1082 char_u numbuf[NUMBUFLEN]; 1083 1084 rettv->v_type = VAR_STRING; 1085 rettv->vval.v_string = tv2string(&argvars[0], &tofree, numbuf, 1086 get_copyID()); 1087 // Make a copy if we have a value but it's not in allocated memory. 1088 if (rettv->vval.v_string != NULL && tofree == NULL) 1089 rettv->vval.v_string = vim_strsave(rettv->vval.v_string); 1090 } 1091 1092 /* 1093 * "strlen()" function 1094 */ 1095 void 1096 f_strlen(typval_T *argvars, typval_T *rettv) 1097 { 1098 if (in_vim9script() 1099 && check_for_string_or_number_arg(argvars, 0) == FAIL) 1100 return; 1101 1102 rettv->vval.v_number = (varnumber_T)(STRLEN( 1103 tv_get_string(&argvars[0]))); 1104 } 1105 1106 static void 1107 strchar_common(typval_T *argvars, typval_T *rettv, int skipcc) 1108 { 1109 char_u *s = tv_get_string(&argvars[0]); 1110 varnumber_T len = 0; 1111 int (*func_mb_ptr2char_adv)(char_u **pp); 1112 1113 func_mb_ptr2char_adv = skipcc ? mb_ptr2char_adv : mb_cptr2char_adv; 1114 while (*s != NUL) 1115 { 1116 func_mb_ptr2char_adv(&s); 1117 ++len; 1118 } 1119 rettv->vval.v_number = len; 1120 } 1121 1122 /* 1123 * "strcharlen()" function 1124 */ 1125 void 1126 f_strcharlen(typval_T *argvars, typval_T *rettv) 1127 { 1128 if (in_vim9script() 1129 && check_for_string_or_number_arg(argvars, 0) == FAIL) 1130 return; 1131 1132 strchar_common(argvars, rettv, TRUE); 1133 } 1134 1135 /* 1136 * "strchars()" function 1137 */ 1138 void 1139 f_strchars(typval_T *argvars, typval_T *rettv) 1140 { 1141 varnumber_T skipcc = FALSE; 1142 1143 if (in_vim9script() 1144 && (check_for_string_arg(argvars, 0) == FAIL 1145 || check_for_opt_bool_arg(argvars, 1) == FAIL)) 1146 return; 1147 1148 if (argvars[1].v_type != VAR_UNKNOWN) 1149 skipcc = tv_get_bool(&argvars[1]); 1150 if (skipcc < 0 || skipcc > 1) 1151 semsg(_(e_using_number_as_bool_nr), skipcc); 1152 else 1153 strchar_common(argvars, rettv, skipcc); 1154 } 1155 1156 /* 1157 * "strdisplaywidth()" function 1158 */ 1159 void 1160 f_strdisplaywidth(typval_T *argvars, typval_T *rettv) 1161 { 1162 char_u *s; 1163 int col = 0; 1164 1165 rettv->vval.v_number = -1; 1166 1167 if (in_vim9script() 1168 && (check_for_string_arg(argvars, 0) == FAIL 1169 || check_for_opt_number_arg(argvars, 1) == FAIL)) 1170 return; 1171 1172 s = tv_get_string(&argvars[0]); 1173 if (argvars[1].v_type != VAR_UNKNOWN) 1174 col = (int)tv_get_number(&argvars[1]); 1175 1176 rettv->vval.v_number = (varnumber_T)(linetabsize_col(col, s) - col); 1177 } 1178 1179 /* 1180 * "strwidth()" function 1181 */ 1182 void 1183 f_strwidth(typval_T *argvars, typval_T *rettv) 1184 { 1185 char_u *s; 1186 1187 if (in_vim9script() && check_for_string_arg(argvars, 0) == FAIL) 1188 return; 1189 1190 s = tv_get_string_strict(&argvars[0]); 1191 rettv->vval.v_number = (varnumber_T)(mb_string2cells(s, -1)); 1192 } 1193 1194 /* 1195 * "strcharpart()" function 1196 */ 1197 void 1198 f_strcharpart(typval_T *argvars, typval_T *rettv) 1199 { 1200 char_u *p; 1201 int nchar; 1202 int nbyte = 0; 1203 int charlen; 1204 int skipcc = FALSE; 1205 int len = 0; 1206 int slen; 1207 int error = FALSE; 1208 1209 if (in_vim9script() 1210 && (check_for_string_arg(argvars, 0) == FAIL 1211 || check_for_number_arg(argvars, 1) == FAIL 1212 || check_for_opt_number_arg(argvars, 2) == FAIL 1213 || (argvars[2].v_type != VAR_UNKNOWN 1214 && check_for_opt_bool_arg(argvars, 3) == FAIL))) 1215 return; 1216 1217 p = tv_get_string(&argvars[0]); 1218 slen = (int)STRLEN(p); 1219 1220 nchar = (int)tv_get_number_chk(&argvars[1], &error); 1221 if (!error) 1222 { 1223 if (argvars[2].v_type != VAR_UNKNOWN 1224 && argvars[3].v_type != VAR_UNKNOWN) 1225 { 1226 skipcc = tv_get_bool(&argvars[3]); 1227 if (skipcc < 0 || skipcc > 1) 1228 { 1229 semsg(_(e_using_number_as_bool_nr), skipcc); 1230 return; 1231 } 1232 } 1233 1234 if (nchar > 0) 1235 while (nchar > 0 && nbyte < slen) 1236 { 1237 if (skipcc) 1238 nbyte += mb_ptr2len(p + nbyte); 1239 else 1240 nbyte += MB_CPTR2LEN(p + nbyte); 1241 --nchar; 1242 } 1243 else 1244 nbyte = nchar; 1245 if (argvars[2].v_type != VAR_UNKNOWN) 1246 { 1247 charlen = (int)tv_get_number(&argvars[2]); 1248 while (charlen > 0 && nbyte + len < slen) 1249 { 1250 int off = nbyte + len; 1251 1252 if (off < 0) 1253 len += 1; 1254 else 1255 { 1256 if (skipcc) 1257 len += mb_ptr2len(p + off); 1258 else 1259 len += MB_CPTR2LEN(p + off); 1260 } 1261 --charlen; 1262 } 1263 } 1264 else 1265 len = slen - nbyte; // default: all bytes that are available. 1266 } 1267 1268 /* 1269 * Only return the overlap between the specified part and the actual 1270 * string. 1271 */ 1272 if (nbyte < 0) 1273 { 1274 len += nbyte; 1275 nbyte = 0; 1276 } 1277 else if (nbyte > slen) 1278 nbyte = slen; 1279 if (len < 0) 1280 len = 0; 1281 else if (nbyte + len > slen) 1282 len = slen - nbyte; 1283 1284 rettv->v_type = VAR_STRING; 1285 rettv->vval.v_string = vim_strnsave(p + nbyte, len); 1286 } 1287 1288 /* 1289 * "strpart()" function 1290 */ 1291 void 1292 f_strpart(typval_T *argvars, typval_T *rettv) 1293 { 1294 char_u *p; 1295 int n; 1296 int len; 1297 int slen; 1298 int error = FALSE; 1299 1300 if (in_vim9script() 1301 && (check_for_string_arg(argvars, 0) == FAIL 1302 || check_for_number_arg(argvars, 1) == FAIL 1303 || check_for_opt_number_arg(argvars, 2) == FAIL 1304 || (argvars[2].v_type != VAR_UNKNOWN 1305 && check_for_opt_bool_arg(argvars, 3) == FAIL))) 1306 return; 1307 1308 p = tv_get_string(&argvars[0]); 1309 slen = (int)STRLEN(p); 1310 1311 n = (int)tv_get_number_chk(&argvars[1], &error); 1312 if (error) 1313 len = 0; 1314 else if (argvars[2].v_type != VAR_UNKNOWN) 1315 len = (int)tv_get_number(&argvars[2]); 1316 else 1317 len = slen - n; // default len: all bytes that are available. 1318 1319 // Only return the overlap between the specified part and the actual 1320 // string. 1321 if (n < 0) 1322 { 1323 len += n; 1324 n = 0; 1325 } 1326 else if (n > slen) 1327 n = slen; 1328 if (len < 0) 1329 len = 0; 1330 else if (n + len > slen) 1331 len = slen - n; 1332 1333 if (argvars[2].v_type != VAR_UNKNOWN && argvars[3].v_type != VAR_UNKNOWN) 1334 { 1335 int off; 1336 1337 // length in characters 1338 for (off = n; off < slen && len > 0; --len) 1339 off += mb_ptr2len(p + off); 1340 len = off - n; 1341 } 1342 1343 rettv->v_type = VAR_STRING; 1344 rettv->vval.v_string = vim_strnsave(p + n, len); 1345 } 1346 1347 /* 1348 * "strridx()" function 1349 */ 1350 void 1351 f_strridx(typval_T *argvars, typval_T *rettv) 1352 { 1353 char_u buf[NUMBUFLEN]; 1354 char_u *needle; 1355 char_u *haystack; 1356 char_u *rest; 1357 char_u *lastmatch = NULL; 1358 int haystack_len, end_idx; 1359 1360 if (in_vim9script() 1361 && (check_for_string_arg(argvars, 0) == FAIL 1362 || check_for_string_arg(argvars, 1) == FAIL 1363 || check_for_opt_number_arg(argvars, 2) == FAIL)) 1364 return; 1365 1366 needle = tv_get_string_chk(&argvars[1]); 1367 haystack = tv_get_string_buf_chk(&argvars[0], buf); 1368 1369 rettv->vval.v_number = -1; 1370 if (needle == NULL || haystack == NULL) 1371 return; // type error; errmsg already given 1372 1373 haystack_len = (int)STRLEN(haystack); 1374 if (argvars[2].v_type != VAR_UNKNOWN) 1375 { 1376 // Third argument: upper limit for index 1377 end_idx = (int)tv_get_number_chk(&argvars[2], NULL); 1378 if (end_idx < 0) 1379 return; // can never find a match 1380 } 1381 else 1382 end_idx = haystack_len; 1383 1384 if (*needle == NUL) 1385 { 1386 // Empty string matches past the end. 1387 lastmatch = haystack + end_idx; 1388 } 1389 else 1390 { 1391 for (rest = haystack; *rest != '\0'; ++rest) 1392 { 1393 rest = (char_u *)strstr((char *)rest, (char *)needle); 1394 if (rest == NULL || rest > haystack + end_idx) 1395 break; 1396 lastmatch = rest; 1397 } 1398 } 1399 1400 if (lastmatch == NULL) 1401 rettv->vval.v_number = -1; 1402 else 1403 rettv->vval.v_number = (varnumber_T)(lastmatch - haystack); 1404 } 1405 1406 /* 1407 * "strtrans()" function 1408 */ 1409 void 1410 f_strtrans(typval_T *argvars, typval_T *rettv) 1411 { 1412 if (in_vim9script() && check_for_string_arg(argvars, 0) == FAIL) 1413 return; 1414 1415 rettv->v_type = VAR_STRING; 1416 rettv->vval.v_string = transstr(tv_get_string(&argvars[0])); 1417 } 1418 1419 /* 1420 * "tolower(string)" function 1421 */ 1422 void 1423 f_tolower(typval_T *argvars, typval_T *rettv) 1424 { 1425 if (in_vim9script() && check_for_string_arg(argvars, 0) == FAIL) 1426 return; 1427 1428 rettv->v_type = VAR_STRING; 1429 rettv->vval.v_string = strlow_save(tv_get_string(&argvars[0])); 1430 } 1431 1432 /* 1433 * "toupper(string)" function 1434 */ 1435 void 1436 f_toupper(typval_T *argvars, typval_T *rettv) 1437 { 1438 if (in_vim9script() && check_for_string_arg(argvars, 0) == FAIL) 1439 return; 1440 1441 rettv->v_type = VAR_STRING; 1442 rettv->vval.v_string = strup_save(tv_get_string(&argvars[0])); 1443 } 1444 1445 /* 1446 * "tr(string, fromstr, tostr)" function 1447 */ 1448 void 1449 f_tr(typval_T *argvars, typval_T *rettv) 1450 { 1451 char_u *in_str; 1452 char_u *fromstr; 1453 char_u *tostr; 1454 char_u *p; 1455 int inlen; 1456 int fromlen; 1457 int tolen; 1458 int idx; 1459 char_u *cpstr; 1460 int cplen; 1461 int first = TRUE; 1462 char_u buf[NUMBUFLEN]; 1463 char_u buf2[NUMBUFLEN]; 1464 garray_T ga; 1465 1466 if (in_vim9script() 1467 && (check_for_string_arg(argvars, 0) == FAIL 1468 || check_for_string_arg(argvars, 1) == FAIL 1469 || check_for_string_arg(argvars, 2) == FAIL)) 1470 return; 1471 1472 in_str = tv_get_string(&argvars[0]); 1473 fromstr = tv_get_string_buf_chk(&argvars[1], buf); 1474 tostr = tv_get_string_buf_chk(&argvars[2], buf2); 1475 1476 // Default return value: empty string. 1477 rettv->v_type = VAR_STRING; 1478 rettv->vval.v_string = NULL; 1479 if (fromstr == NULL || tostr == NULL) 1480 return; // type error; errmsg already given 1481 ga_init2(&ga, (int)sizeof(char), 80); 1482 1483 if (!has_mbyte) 1484 // not multi-byte: fromstr and tostr must be the same length 1485 if (STRLEN(fromstr) != STRLEN(tostr)) 1486 { 1487 error: 1488 semsg(_(e_invarg2), fromstr); 1489 ga_clear(&ga); 1490 return; 1491 } 1492 1493 // fromstr and tostr have to contain the same number of chars 1494 while (*in_str != NUL) 1495 { 1496 if (has_mbyte) 1497 { 1498 inlen = (*mb_ptr2len)(in_str); 1499 cpstr = in_str; 1500 cplen = inlen; 1501 idx = 0; 1502 for (p = fromstr; *p != NUL; p += fromlen) 1503 { 1504 fromlen = (*mb_ptr2len)(p); 1505 if (fromlen == inlen && STRNCMP(in_str, p, inlen) == 0) 1506 { 1507 for (p = tostr; *p != NUL; p += tolen) 1508 { 1509 tolen = (*mb_ptr2len)(p); 1510 if (idx-- == 0) 1511 { 1512 cplen = tolen; 1513 cpstr = p; 1514 break; 1515 } 1516 } 1517 if (*p == NUL) // tostr is shorter than fromstr 1518 goto error; 1519 break; 1520 } 1521 ++idx; 1522 } 1523 1524 if (first && cpstr == in_str) 1525 { 1526 // Check that fromstr and tostr have the same number of 1527 // (multi-byte) characters. Done only once when a character 1528 // of in_str doesn't appear in fromstr. 1529 first = FALSE; 1530 for (p = tostr; *p != NUL; p += tolen) 1531 { 1532 tolen = (*mb_ptr2len)(p); 1533 --idx; 1534 } 1535 if (idx != 0) 1536 goto error; 1537 } 1538 1539 (void)ga_grow(&ga, cplen); 1540 mch_memmove((char *)ga.ga_data + ga.ga_len, cpstr, (size_t)cplen); 1541 ga.ga_len += cplen; 1542 1543 in_str += inlen; 1544 } 1545 else 1546 { 1547 // When not using multi-byte chars we can do it faster. 1548 p = vim_strchr(fromstr, *in_str); 1549 if (p != NULL) 1550 ga_append(&ga, tostr[p - fromstr]); 1551 else 1552 ga_append(&ga, *in_str); 1553 ++in_str; 1554 } 1555 } 1556 1557 // add a terminating NUL 1558 (void)ga_grow(&ga, 1); 1559 ga_append(&ga, NUL); 1560 1561 rettv->vval.v_string = ga.ga_data; 1562 } 1563 1564 /* 1565 * "trim({expr})" function 1566 */ 1567 void 1568 f_trim(typval_T *argvars, typval_T *rettv) 1569 { 1570 char_u buf1[NUMBUFLEN]; 1571 char_u buf2[NUMBUFLEN]; 1572 char_u *head; 1573 char_u *mask = NULL; 1574 char_u *tail; 1575 char_u *prev; 1576 char_u *p; 1577 int c1; 1578 int dir = 0; 1579 1580 rettv->v_type = VAR_STRING; 1581 rettv->vval.v_string = NULL; 1582 1583 if (in_vim9script() 1584 && (check_for_string_arg(argvars, 0) == FAIL 1585 || check_for_opt_string_arg(argvars, 1) == FAIL 1586 || (argvars[1].v_type != VAR_UNKNOWN 1587 && check_for_opt_number_arg(argvars, 2) == FAIL))) 1588 return; 1589 1590 head = tv_get_string_buf_chk(&argvars[0], buf1); 1591 if (head == NULL) 1592 return; 1593 1594 if (argvars[1].v_type != VAR_UNKNOWN && argvars[1].v_type != VAR_STRING) 1595 { 1596 semsg(_(e_invarg2), tv_get_string(&argvars[1])); 1597 return; 1598 } 1599 1600 if (argvars[1].v_type == VAR_STRING) 1601 { 1602 mask = tv_get_string_buf_chk(&argvars[1], buf2); 1603 1604 if (argvars[2].v_type != VAR_UNKNOWN) 1605 { 1606 int error = 0; 1607 1608 // leading or trailing characters to trim 1609 dir = (int)tv_get_number_chk(&argvars[2], &error); 1610 if (error) 1611 return; 1612 if (dir < 0 || dir > 2) 1613 { 1614 semsg(_(e_invarg2), tv_get_string(&argvars[2])); 1615 return; 1616 } 1617 } 1618 } 1619 1620 if (dir == 0 || dir == 1) 1621 { 1622 // Trim leading characters 1623 while (*head != NUL) 1624 { 1625 c1 = PTR2CHAR(head); 1626 if (mask == NULL) 1627 { 1628 if (c1 > ' ' && c1 != 0xa0) 1629 break; 1630 } 1631 else 1632 { 1633 for (p = mask; *p != NUL; MB_PTR_ADV(p)) 1634 if (c1 == PTR2CHAR(p)) 1635 break; 1636 if (*p == NUL) 1637 break; 1638 } 1639 MB_PTR_ADV(head); 1640 } 1641 } 1642 1643 tail = head + STRLEN(head); 1644 if (dir == 0 || dir == 2) 1645 { 1646 // Trim trailing characters 1647 for (; tail > head; tail = prev) 1648 { 1649 prev = tail; 1650 MB_PTR_BACK(head, prev); 1651 c1 = PTR2CHAR(prev); 1652 if (mask == NULL) 1653 { 1654 if (c1 > ' ' && c1 != 0xa0) 1655 break; 1656 } 1657 else 1658 { 1659 for (p = mask; *p != NUL; MB_PTR_ADV(p)) 1660 if (c1 == PTR2CHAR(p)) 1661 break; 1662 if (*p == NUL) 1663 break; 1664 } 1665 } 1666 } 1667 rettv->vval.v_string = vim_strnsave(head, tail - head); 1668 } 1669 1670 #endif 1671