xref: /vim-8.2.3635/src/strings.c (revision 7d60384a)
1 /* vi:set ts=8 sts=4 sw=4 noet:
2  *
3  * VIM - Vi IMproved	by Bram Moolenaar
4  *
5  * Do ":help uganda"  in Vim to read copying and usage conditions.
6  * Do ":help credits" in Vim to see a list of people who contributed.
7  * See README.txt for an overview of the Vim source code.
8  */
9 
10 /*
11  * strings.c: string manipulation functions
12  */
13 
14 #include "vim.h"
15 
16 /*
17  * Copy "string" into newly allocated memory.
18  */
19     char_u *
20 vim_strsave(char_u *string)
21 {
22     char_u	*p;
23     size_t	len;
24 
25     len = STRLEN(string) + 1;
26     p = alloc(len);
27     if (p != NULL)
28 	mch_memmove(p, string, len);
29     return p;
30 }
31 
32 /*
33  * Copy up to "len" bytes of "string" into newly allocated memory and
34  * terminate with a NUL.
35  * The allocated memory always has size "len + 1", also when "string" is
36  * shorter.
37  */
38     char_u *
39 vim_strnsave(char_u *string, size_t len)
40 {
41     char_u	*p;
42 
43     p = alloc(len + 1);
44     if (p != NULL)
45     {
46 	STRNCPY(p, string, len);
47 	p[len] = NUL;
48     }
49     return p;
50 }
51 
52 /*
53  * Same as vim_strsave(), but any characters found in esc_chars are preceded
54  * by a backslash.
55  */
56     char_u *
57 vim_strsave_escaped(char_u *string, char_u *esc_chars)
58 {
59     return vim_strsave_escaped_ext(string, esc_chars, '\\', FALSE);
60 }
61 
62 /*
63  * Same as vim_strsave_escaped(), but when "bsl" is TRUE also escape
64  * characters where rem_backslash() would remove the backslash.
65  * Escape the characters with "cc".
66  */
67     char_u *
68 vim_strsave_escaped_ext(
69     char_u	*string,
70     char_u	*esc_chars,
71     int		cc,
72     int		bsl)
73 {
74     char_u	*p;
75     char_u	*p2;
76     char_u	*escaped_string;
77     unsigned	length;
78     int		l;
79 
80     /*
81      * First count the number of backslashes required.
82      * Then allocate the memory and insert them.
83      */
84     length = 1;				// count the trailing NUL
85     for (p = string; *p; p++)
86     {
87 	if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
88 	{
89 	    length += l;		// count a multibyte char
90 	    p += l - 1;
91 	    continue;
92 	}
93 	if (vim_strchr(esc_chars, *p) != NULL || (bsl && rem_backslash(p)))
94 	    ++length;			// count a backslash
95 	++length;			// count an ordinary char
96     }
97     escaped_string = alloc(length);
98     if (escaped_string != NULL)
99     {
100 	p2 = escaped_string;
101 	for (p = string; *p; p++)
102 	{
103 	    if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
104 	    {
105 		mch_memmove(p2, p, (size_t)l);
106 		p2 += l;
107 		p += l - 1;		// skip multibyte char
108 		continue;
109 	    }
110 	    if (vim_strchr(esc_chars, *p) != NULL || (bsl && rem_backslash(p)))
111 		*p2++ = cc;
112 	    *p2++ = *p;
113 	}
114 	*p2 = NUL;
115     }
116     return escaped_string;
117 }
118 
119 /*
120  * Return TRUE when 'shell' has "csh" in the tail.
121  */
122     int
123 csh_like_shell(void)
124 {
125     return (strstr((char *)gettail(p_sh), "csh") != NULL);
126 }
127 
128 /*
129  * Escape "string" for use as a shell argument with system().
130  * This uses single quotes, except when we know we need to use double quotes
131  * (MS-DOS and MS-Windows not using PowerShell and without 'shellslash' set).
132  * PowerShell also uses a novel escaping for enclosed single quotes - double
133  * them up.
134  * Escape a newline, depending on the 'shell' option.
135  * When "do_special" is TRUE also replace "!", "%", "#" and things starting
136  * with "<" like "<cfile>".
137  * When "do_newline" is FALSE do not escape newline unless it is csh shell.
138  * Returns the result in allocated memory, NULL if we have run out.
139  */
140     char_u *
141 vim_strsave_shellescape(char_u *string, int do_special, int do_newline)
142 {
143     unsigned	length;
144     char_u	*p;
145     char_u	*d;
146     char_u	*escaped_string;
147     int		l;
148     int		csh_like;
149     char_u	*shname;
150     int		powershell;
151 # ifdef MSWIN
152     int		double_quotes;
153 # endif
154 
155     // Only csh and similar shells expand '!' within single quotes.  For sh and
156     // the like we must not put a backslash before it, it will be taken
157     // literally.  If do_special is set the '!' will be escaped twice.
158     // Csh also needs to have "\n" escaped twice when do_special is set.
159     csh_like = csh_like_shell();
160 
161     // PowerShell uses it's own version for quoting single quotes
162     shname = gettail(p_sh);
163     powershell = strstr((char *)shname, "pwsh") != NULL;
164 # ifdef MSWIN
165     powershell = powershell || strstr((char *)shname, "powershell") != NULL;
166     // PowerShell only accepts single quotes so override shellslash.
167     double_quotes = !powershell && !p_ssl;
168 # endif
169 
170     // First count the number of extra bytes required.
171     length = (unsigned)STRLEN(string) + 3;  // two quotes and a trailing NUL
172     for (p = string; *p != NUL; MB_PTR_ADV(p))
173     {
174 # ifdef MSWIN
175 	if (double_quotes)
176 	{
177 	    if (*p == '"')
178 		++length;		// " -> ""
179 	}
180 	else
181 # endif
182 	if (*p == '\'')
183 	{
184 	    if (powershell)
185 		length +=2;		// ' => ''
186 	    else
187 		length += 3;		// ' => '\''
188 	}
189 	if ((*p == '\n' && (csh_like || do_newline))
190 		|| (*p == '!' && (csh_like || do_special)))
191 	{
192 	    ++length;			// insert backslash
193 	    if (csh_like && do_special)
194 		++length;		// insert backslash
195 	}
196 	if (do_special && find_cmdline_var(p, &l) >= 0)
197 	{
198 	    ++length;			// insert backslash
199 	    p += l - 1;
200 	}
201     }
202 
203     // Allocate memory for the result and fill it.
204     escaped_string = alloc(length);
205     if (escaped_string != NULL)
206     {
207 	d = escaped_string;
208 
209 	// add opening quote
210 # ifdef MSWIN
211 	if (double_quotes)
212 	    *d++ = '"';
213 	else
214 # endif
215 	    *d++ = '\'';
216 
217 	for (p = string; *p != NUL; )
218 	{
219 # ifdef MSWIN
220 	    if (double_quotes)
221 	    {
222 		if (*p == '"')
223 		{
224 		    *d++ = '"';
225 		    *d++ = '"';
226 		    ++p;
227 		    continue;
228 		}
229 	    }
230 	    else
231 # endif
232 	    if (*p == '\'')
233 	    {
234 		if (powershell)
235 		{
236 		    *d++ = '\'';
237 		    *d++ = '\'';
238 		}
239 		else
240 		{
241 		    *d++ = '\'';
242 		    *d++ = '\\';
243 		    *d++ = '\'';
244 		    *d++ = '\'';
245 		}
246 		++p;
247 		continue;
248 	    }
249 	    if ((*p == '\n' && (csh_like || do_newline))
250 		    || (*p == '!' && (csh_like || do_special)))
251 	    {
252 		*d++ = '\\';
253 		if (csh_like && do_special)
254 		    *d++ = '\\';
255 		*d++ = *p++;
256 		continue;
257 	    }
258 	    if (do_special && find_cmdline_var(p, &l) >= 0)
259 	    {
260 		*d++ = '\\';		// insert backslash
261 		while (--l >= 0)	// copy the var
262 		    *d++ = *p++;
263 		continue;
264 	    }
265 
266 	    MB_COPY_CHAR(p, d);
267 	}
268 
269 	// add terminating quote and finish with a NUL
270 # ifdef MSWIN
271 	if (double_quotes)
272 	    *d++ = '"';
273 	else
274 # endif
275 	    *d++ = '\'';
276 	*d = NUL;
277     }
278 
279     return escaped_string;
280 }
281 
282 /*
283  * Like vim_strsave(), but make all characters uppercase.
284  * This uses ASCII lower-to-upper case translation, language independent.
285  */
286     char_u *
287 vim_strsave_up(char_u *string)
288 {
289     char_u *p1;
290 
291     p1 = vim_strsave(string);
292     vim_strup(p1);
293     return p1;
294 }
295 
296 /*
297  * Like vim_strnsave(), but make all characters uppercase.
298  * This uses ASCII lower-to-upper case translation, language independent.
299  */
300     char_u *
301 vim_strnsave_up(char_u *string, size_t len)
302 {
303     char_u *p1;
304 
305     p1 = vim_strnsave(string, len);
306     vim_strup(p1);
307     return p1;
308 }
309 
310 /*
311  * ASCII lower-to-upper case translation, language independent.
312  */
313     void
314 vim_strup(
315     char_u	*p)
316 {
317     char_u  *p2;
318     int	    c;
319 
320     if (p != NULL)
321     {
322 	p2 = p;
323 	while ((c = *p2) != NUL)
324 #ifdef EBCDIC
325 	    *p2++ = isalpha(c) ? toupper(c) : c;
326 #else
327 	    *p2++ = (c < 'a' || c > 'z') ? c : (c - 0x20);
328 #endif
329     }
330 }
331 
332 #if defined(FEAT_EVAL) || defined(FEAT_SPELL) || defined(PROTO)
333 /*
334  * Make string "s" all upper-case and return it in allocated memory.
335  * Handles multi-byte characters as well as possible.
336  * Returns NULL when out of memory.
337  */
338     static char_u *
339 strup_save(char_u *orig)
340 {
341     char_u	*p;
342     char_u	*res;
343 
344     res = p = vim_strsave(orig);
345 
346     if (res != NULL)
347 	while (*p != NUL)
348 	{
349 	    int		l;
350 
351 	    if (enc_utf8)
352 	    {
353 		int	c, uc;
354 		int	newl;
355 		char_u	*s;
356 
357 		c = utf_ptr2char(p);
358 		l = utf_ptr2len(p);
359 		if (c == 0)
360 		{
361 		    // overlong sequence, use only the first byte
362 		    c = *p;
363 		    l = 1;
364 		}
365 		uc = utf_toupper(c);
366 
367 		// Reallocate string when byte count changes.  This is rare,
368 		// thus it's OK to do another malloc()/free().
369 		newl = utf_char2len(uc);
370 		if (newl != l)
371 		{
372 		    s = alloc(STRLEN(res) + 1 + newl - l);
373 		    if (s == NULL)
374 		    {
375 			vim_free(res);
376 			return NULL;
377 		    }
378 		    mch_memmove(s, res, p - res);
379 		    STRCPY(s + (p - res) + newl, p + l);
380 		    p = s + (p - res);
381 		    vim_free(res);
382 		    res = s;
383 		}
384 
385 		utf_char2bytes(uc, p);
386 		p += newl;
387 	    }
388 	    else if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
389 		p += l;		// skip multi-byte character
390 	    else
391 	    {
392 		*p = TOUPPER_LOC(*p); // note that toupper() can be a macro
393 		p++;
394 	    }
395 	}
396 
397     return res;
398 }
399 
400 /*
401  * Make string "s" all lower-case and return it in allocated memory.
402  * Handles multi-byte characters as well as possible.
403  * Returns NULL when out of memory.
404  */
405     char_u *
406 strlow_save(char_u *orig)
407 {
408     char_u	*p;
409     char_u	*res;
410 
411     res = p = vim_strsave(orig);
412 
413     if (res != NULL)
414 	while (*p != NUL)
415 	{
416 	    int		l;
417 
418 	    if (enc_utf8)
419 	    {
420 		int	c, lc;
421 		int	newl;
422 		char_u	*s;
423 
424 		c = utf_ptr2char(p);
425 		l = utf_ptr2len(p);
426 		if (c == 0)
427 		{
428 		    // overlong sequence, use only the first byte
429 		    c = *p;
430 		    l = 1;
431 		}
432 		lc = utf_tolower(c);
433 
434 		// Reallocate string when byte count changes.  This is rare,
435 		// thus it's OK to do another malloc()/free().
436 		newl = utf_char2len(lc);
437 		if (newl != l)
438 		{
439 		    s = alloc(STRLEN(res) + 1 + newl - l);
440 		    if (s == NULL)
441 		    {
442 			vim_free(res);
443 			return NULL;
444 		    }
445 		    mch_memmove(s, res, p - res);
446 		    STRCPY(s + (p - res) + newl, p + l);
447 		    p = s + (p - res);
448 		    vim_free(res);
449 		    res = s;
450 		}
451 
452 		utf_char2bytes(lc, p);
453 		p += newl;
454 	    }
455 	    else if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
456 		p += l;		// skip multi-byte character
457 	    else
458 	    {
459 		*p = TOLOWER_LOC(*p); // note that tolower() can be a macro
460 		p++;
461 	    }
462 	}
463 
464     return res;
465 }
466 #endif
467 
468 /*
469  * delete spaces at the end of a string
470  */
471     void
472 del_trailing_spaces(char_u *ptr)
473 {
474     char_u	*q;
475 
476     q = ptr + STRLEN(ptr);
477     while (--q > ptr && VIM_ISWHITE(q[0]) && q[-1] != '\\' && q[-1] != Ctrl_V)
478 	*q = NUL;
479 }
480 
481 /*
482  * Like strncpy(), but always terminate the result with one NUL.
483  * "to" must be "len + 1" long!
484  */
485     void
486 vim_strncpy(char_u *to, char_u *from, size_t len)
487 {
488     STRNCPY(to, from, len);
489     to[len] = NUL;
490 }
491 
492 /*
493  * Like strcat(), but make sure the result fits in "tosize" bytes and is
494  * always NUL terminated. "from" and "to" may overlap.
495  */
496     void
497 vim_strcat(char_u *to, char_u *from, size_t tosize)
498 {
499     size_t tolen = STRLEN(to);
500     size_t fromlen = STRLEN(from);
501 
502     if (tolen + fromlen + 1 > tosize)
503     {
504 	mch_memmove(to + tolen, from, tosize - tolen - 1);
505 	to[tosize - 1] = NUL;
506     }
507     else
508 	mch_memmove(to + tolen, from, fromlen + 1);
509 }
510 
511 #if (!defined(HAVE_STRCASECMP) && !defined(HAVE_STRICMP)) || defined(PROTO)
512 /*
513  * Compare two strings, ignoring case, using current locale.
514  * Doesn't work for multi-byte characters.
515  * return 0 for match, < 0 for smaller, > 0 for bigger
516  */
517     int
518 vim_stricmp(char *s1, char *s2)
519 {
520     int		i;
521 
522     for (;;)
523     {
524 	i = (int)TOLOWER_LOC(*s1) - (int)TOLOWER_LOC(*s2);
525 	if (i != 0)
526 	    return i;			    // this character different
527 	if (*s1 == NUL)
528 	    break;			    // strings match until NUL
529 	++s1;
530 	++s2;
531     }
532     return 0;				    // strings match
533 }
534 #endif
535 
536 #if (!defined(HAVE_STRNCASECMP) && !defined(HAVE_STRNICMP)) || defined(PROTO)
537 /*
538  * Compare two strings, for length "len", ignoring case, using current locale.
539  * Doesn't work for multi-byte characters.
540  * return 0 for match, < 0 for smaller, > 0 for bigger
541  */
542     int
543 vim_strnicmp(char *s1, char *s2, size_t len)
544 {
545     int		i;
546 
547     while (len > 0)
548     {
549 	i = (int)TOLOWER_LOC(*s1) - (int)TOLOWER_LOC(*s2);
550 	if (i != 0)
551 	    return i;			    // this character different
552 	if (*s1 == NUL)
553 	    break;			    // strings match until NUL
554 	++s1;
555 	++s2;
556 	--len;
557     }
558     return 0;				    // strings match
559 }
560 #endif
561 
562 /*
563  * Search for first occurrence of "c" in "string".
564  * Version of strchr() that handles unsigned char strings with characters from
565  * 128 to 255 correctly.  It also doesn't return a pointer to the NUL at the
566  * end of the string.
567  */
568     char_u  *
569 vim_strchr(char_u *string, int c)
570 {
571     char_u	*p;
572     int		b;
573 
574     p = string;
575     if (enc_utf8 && c >= 0x80)
576     {
577 	while (*p != NUL)
578 	{
579 	    int l = utfc_ptr2len(p);
580 
581 	    // Avoid matching an illegal byte here.
582 	    if (utf_ptr2char(p) == c && l > 1)
583 		return p;
584 	    p += l;
585 	}
586 	return NULL;
587     }
588     if (enc_dbcs != 0 && c > 255)
589     {
590 	int	n2 = c & 0xff;
591 
592 	c = ((unsigned)c >> 8) & 0xff;
593 	while ((b = *p) != NUL)
594 	{
595 	    if (b == c && p[1] == n2)
596 		return p;
597 	    p += (*mb_ptr2len)(p);
598 	}
599 	return NULL;
600     }
601     if (has_mbyte)
602     {
603 	while ((b = *p) != NUL)
604 	{
605 	    if (b == c)
606 		return p;
607 	    p += (*mb_ptr2len)(p);
608 	}
609 	return NULL;
610     }
611     while ((b = *p) != NUL)
612     {
613 	if (b == c)
614 	    return p;
615 	++p;
616     }
617     return NULL;
618 }
619 
620 /*
621  * Version of strchr() that only works for bytes and handles unsigned char
622  * strings with characters above 128 correctly. It also doesn't return a
623  * pointer to the NUL at the end of the string.
624  */
625     char_u  *
626 vim_strbyte(char_u *string, int c)
627 {
628     char_u	*p = string;
629 
630     while (*p != NUL)
631     {
632 	if (*p == c)
633 	    return p;
634 	++p;
635     }
636     return NULL;
637 }
638 
639 /*
640  * Search for last occurrence of "c" in "string".
641  * Version of strrchr() that handles unsigned char strings with characters from
642  * 128 to 255 correctly.  It also doesn't return a pointer to the NUL at the
643  * end of the string.
644  * Return NULL if not found.
645  * Does not handle multi-byte char for "c"!
646  */
647     char_u  *
648 vim_strrchr(char_u *string, int c)
649 {
650     char_u	*retval = NULL;
651     char_u	*p = string;
652 
653     while (*p)
654     {
655 	if (*p == c)
656 	    retval = p;
657 	MB_PTR_ADV(p);
658     }
659     return retval;
660 }
661 
662 /*
663  * Vim's version of strpbrk(), in case it's missing.
664  * Don't generate a prototype for this, causes problems when it's not used.
665  */
666 #ifndef PROTO
667 # ifndef HAVE_STRPBRK
668 #  ifdef vim_strpbrk
669 #   undef vim_strpbrk
670 #  endif
671     char_u *
672 vim_strpbrk(char_u *s, char_u *charset)
673 {
674     while (*s)
675     {
676 	if (vim_strchr(charset, *s) != NULL)
677 	    return s;
678 	MB_PTR_ADV(s);
679     }
680     return NULL;
681 }
682 # endif
683 #endif
684 
685 /*
686  * Sort an array of strings.
687  */
688 static int sort_compare(const void *s1, const void *s2);
689 
690     static int
691 sort_compare(const void *s1, const void *s2)
692 {
693     return STRCMP(*(char **)s1, *(char **)s2);
694 }
695 
696     void
697 sort_strings(
698     char_u	**files,
699     int		count)
700 {
701     qsort((void *)files, (size_t)count, sizeof(char_u *), sort_compare);
702 }
703 
704 #if defined(FEAT_QUICKFIX) || defined(FEAT_SPELL) || defined(PROTO)
705 /*
706  * Return TRUE if string "s" contains a non-ASCII character (128 or higher).
707  * When "s" is NULL FALSE is returned.
708  */
709     int
710 has_non_ascii(char_u *s)
711 {
712     char_u	*p;
713 
714     if (s != NULL)
715 	for (p = s; *p != NUL; ++p)
716 	    if (*p >= 128)
717 		return TRUE;
718     return FALSE;
719 }
720 #endif
721 
722 /*
723  * Concatenate two strings and return the result in allocated memory.
724  * Returns NULL when out of memory.
725  */
726     char_u  *
727 concat_str(char_u *str1, char_u *str2)
728 {
729     char_u  *dest;
730     size_t  l = str1 == NULL ? 0 : STRLEN(str1);
731 
732     dest = alloc(l + (str2 == NULL ? 0 : STRLEN(str2)) + 1L);
733     if (dest != NULL)
734     {
735 	if (str1 == NULL)
736 	    *dest = NUL;
737 	else
738 	    STRCPY(dest, str1);
739 	if (str2 != NULL)
740 	    STRCPY(dest + l, str2);
741     }
742     return dest;
743 }
744 
745 #if defined(FEAT_EVAL) || defined(PROTO)
746 
747 /*
748  * Return string "str" in ' quotes, doubling ' characters.
749  * If "str" is NULL an empty string is assumed.
750  * If "function" is TRUE make it function('string').
751  */
752     char_u *
753 string_quote(char_u *str, int function)
754 {
755     unsigned	len;
756     char_u	*p, *r, *s;
757 
758     len = (function ? 13 : 3);
759     if (str != NULL)
760     {
761 	len += (unsigned)STRLEN(str);
762 	for (p = str; *p != NUL; MB_PTR_ADV(p))
763 	    if (*p == '\'')
764 		++len;
765     }
766     s = r = alloc(len);
767     if (r != NULL)
768     {
769 	if (function)
770 	{
771 	    STRCPY(r, "function('");
772 	    r += 10;
773 	}
774 	else
775 	    *r++ = '\'';
776 	if (str != NULL)
777 	    for (p = str; *p != NUL; )
778 	    {
779 		if (*p == '\'')
780 		    *r++ = '\'';
781 		MB_COPY_CHAR(p, r);
782 	    }
783 	*r++ = '\'';
784 	if (function)
785 	    *r++ = ')';
786 	*r++ = NUL;
787     }
788     return s;
789 }
790 
791     static void
792 byteidx(typval_T *argvars, typval_T *rettv, int comp UNUSED)
793 {
794     char_u	*t;
795     char_u	*str;
796     varnumber_T	idx;
797 
798     rettv->vval.v_number = -1;
799 
800     if (in_vim9script()
801 	    && (check_for_string_arg(argvars, 0) == FAIL
802 		|| check_for_number_arg(argvars, 1) == FAIL))
803 	return;
804 
805     str = tv_get_string_chk(&argvars[0]);
806     idx = tv_get_number_chk(&argvars[1], NULL);
807     if (str == NULL || idx < 0)
808 	return;
809 
810     t = str;
811     for ( ; idx > 0; idx--)
812     {
813 	if (*t == NUL)		// EOL reached
814 	    return;
815 	if (enc_utf8 && comp)
816 	    t += utf_ptr2len(t);
817 	else
818 	    t += (*mb_ptr2len)(t);
819     }
820     rettv->vval.v_number = (varnumber_T)(t - str);
821 }
822 
823 /*
824  * "byteidx()" function
825  */
826     void
827 f_byteidx(typval_T *argvars, typval_T *rettv)
828 {
829     byteidx(argvars, rettv, FALSE);
830 }
831 
832 /*
833  * "byteidxcomp()" function
834  */
835     void
836 f_byteidxcomp(typval_T *argvars, typval_T *rettv)
837 {
838     byteidx(argvars, rettv, TRUE);
839 }
840 
841 /*
842  * "charidx()" function
843  */
844     void
845 f_charidx(typval_T *argvars, typval_T *rettv)
846 {
847     char_u	*str;
848     varnumber_T	idx;
849     varnumber_T	countcc = FALSE;
850     char_u	*p;
851     int		len;
852     int		(*ptr2len)(char_u *);
853 
854     rettv->vval.v_number = -1;
855 
856     if (argvars[0].v_type != VAR_STRING || argvars[1].v_type != VAR_NUMBER
857 	    || (argvars[2].v_type != VAR_UNKNOWN
858 					   && argvars[2].v_type != VAR_NUMBER
859 					   && argvars[2].v_type != VAR_BOOL))
860     {
861 	emsg(_(e_invarg));
862 	return;
863     }
864 
865     str = tv_get_string_chk(&argvars[0]);
866     idx = tv_get_number_chk(&argvars[1], NULL);
867     if (str == NULL || idx < 0)
868 	return;
869 
870     if (argvars[2].v_type != VAR_UNKNOWN)
871 	countcc = tv_get_bool(&argvars[2]);
872     if (countcc < 0 || countcc > 1)
873     {
874 	semsg(_(e_using_number_as_bool_nr), countcc);
875 	return;
876     }
877 
878     if (enc_utf8 && countcc)
879 	ptr2len = utf_ptr2len;
880     else
881 	ptr2len = mb_ptr2len;
882 
883     for (p = str, len = 0; p <= str + idx; len++)
884     {
885 	if (*p == NUL)
886 	    return;
887 	p += ptr2len(p);
888     }
889 
890     rettv->vval.v_number = len > 0 ? len - 1 : 0;
891 }
892 
893 /*
894  * "str2list()" function
895  */
896     void
897 f_str2list(typval_T *argvars, typval_T *rettv)
898 {
899     char_u	*p;
900     int		utf8 = FALSE;
901 
902     if (rettv_list_alloc(rettv) == FAIL)
903 	return;
904 
905     if (in_vim9script()
906 	    && (check_for_string_arg(argvars, 0) == FAIL
907 		|| check_for_opt_bool_arg(argvars, 1) == FAIL))
908 	return;
909 
910     if (argvars[1].v_type != VAR_UNKNOWN)
911 	utf8 = (int)tv_get_bool_chk(&argvars[1], NULL);
912 
913     p = tv_get_string(&argvars[0]);
914 
915     if (has_mbyte || utf8)
916     {
917 	int (*ptr2len)(char_u *);
918 	int (*ptr2char)(char_u *);
919 
920 	if (utf8 || enc_utf8)
921 	{
922 	    ptr2len = utf_ptr2len;
923 	    ptr2char = utf_ptr2char;
924 	}
925 	else
926 	{
927 	    ptr2len = mb_ptr2len;
928 	    ptr2char = mb_ptr2char;
929 	}
930 
931 	for ( ; *p != NUL; p += (*ptr2len)(p))
932 	    list_append_number(rettv->vval.v_list, (*ptr2char)(p));
933     }
934     else
935 	for ( ; *p != NUL; ++p)
936 	    list_append_number(rettv->vval.v_list, *p);
937 }
938 
939 /*
940  * "str2nr()" function
941  */
942     void
943 f_str2nr(typval_T *argvars, typval_T *rettv)
944 {
945     int		base = 10;
946     char_u	*p;
947     varnumber_T	n;
948     int		what = 0;
949     int		isneg;
950 
951     if (argvars[1].v_type != VAR_UNKNOWN)
952     {
953 	base = (int)tv_get_number(&argvars[1]);
954 	if (base != 2 && base != 8 && base != 10 && base != 16)
955 	{
956 	    emsg(_(e_invarg));
957 	    return;
958 	}
959 	if (argvars[2].v_type != VAR_UNKNOWN && tv_get_bool(&argvars[2]))
960 	    what |= STR2NR_QUOTE;
961     }
962 
963     p = skipwhite(tv_get_string_strict(&argvars[0]));
964     isneg = (*p == '-');
965     if (*p == '+' || *p == '-')
966 	p = skipwhite(p + 1);
967     switch (base)
968     {
969 	case 2: what |= STR2NR_BIN + STR2NR_FORCE; break;
970 	case 8: what |= STR2NR_OCT + STR2NR_OOCT + STR2NR_FORCE; break;
971 	case 16: what |= STR2NR_HEX + STR2NR_FORCE; break;
972     }
973     vim_str2nr(p, NULL, NULL, what, &n, NULL, 0, FALSE);
974     // Text after the number is silently ignored.
975     if (isneg)
976 	rettv->vval.v_number = -n;
977     else
978 	rettv->vval.v_number = n;
979 
980 }
981 
982 /*
983  * "strgetchar()" function
984  */
985     void
986 f_strgetchar(typval_T *argvars, typval_T *rettv)
987 {
988     char_u	*str;
989     int		len;
990     int		error = FALSE;
991     int		charidx;
992     int		byteidx = 0;
993 
994     rettv->vval.v_number = -1;
995 
996     if (in_vim9script()
997 	    && (check_for_string_arg(argvars, 0) == FAIL
998 		|| check_for_number_arg(argvars, 1) == FAIL))
999 	return;
1000 
1001     str = tv_get_string_chk(&argvars[0]);
1002     if (str == NULL)
1003 	return;
1004     len = (int)STRLEN(str);
1005     charidx = (int)tv_get_number_chk(&argvars[1], &error);
1006     if (error)
1007 	return;
1008 
1009     while (charidx >= 0 && byteidx < len)
1010     {
1011 	if (charidx == 0)
1012 	{
1013 	    rettv->vval.v_number = mb_ptr2char(str + byteidx);
1014 	    break;
1015 	}
1016 	--charidx;
1017 	byteidx += MB_CPTR2LEN(str + byteidx);
1018     }
1019 }
1020 
1021 /*
1022  * "stridx()" function
1023  */
1024     void
1025 f_stridx(typval_T *argvars, typval_T *rettv)
1026 {
1027     char_u	buf[NUMBUFLEN];
1028     char_u	*needle;
1029     char_u	*haystack;
1030     char_u	*save_haystack;
1031     char_u	*pos;
1032     int		start_idx;
1033 
1034     needle = tv_get_string_chk(&argvars[1]);
1035     save_haystack = haystack = tv_get_string_buf_chk(&argvars[0], buf);
1036     rettv->vval.v_number = -1;
1037     if (needle == NULL || haystack == NULL)
1038 	return;		// type error; errmsg already given
1039 
1040     if (argvars[2].v_type != VAR_UNKNOWN)
1041     {
1042 	int	    error = FALSE;
1043 
1044 	start_idx = (int)tv_get_number_chk(&argvars[2], &error);
1045 	if (error || start_idx >= (int)STRLEN(haystack))
1046 	    return;
1047 	if (start_idx >= 0)
1048 	    haystack += start_idx;
1049     }
1050 
1051     pos	= (char_u *)strstr((char *)haystack, (char *)needle);
1052     if (pos != NULL)
1053 	rettv->vval.v_number = (varnumber_T)(pos - save_haystack);
1054 }
1055 
1056 /*
1057  * "string()" function
1058  */
1059     void
1060 f_string(typval_T *argvars, typval_T *rettv)
1061 {
1062     char_u	*tofree;
1063     char_u	numbuf[NUMBUFLEN];
1064 
1065     rettv->v_type = VAR_STRING;
1066     rettv->vval.v_string = tv2string(&argvars[0], &tofree, numbuf,
1067 								get_copyID());
1068     // Make a copy if we have a value but it's not in allocated memory.
1069     if (rettv->vval.v_string != NULL && tofree == NULL)
1070 	rettv->vval.v_string = vim_strsave(rettv->vval.v_string);
1071 }
1072 
1073 /*
1074  * "strlen()" function
1075  */
1076     void
1077 f_strlen(typval_T *argvars, typval_T *rettv)
1078 {
1079     rettv->vval.v_number = (varnumber_T)(STRLEN(
1080 					      tv_get_string(&argvars[0])));
1081 }
1082 
1083     static void
1084 strchar_common(typval_T *argvars, typval_T *rettv, int skipcc)
1085 {
1086     char_u		*s = tv_get_string(&argvars[0]);
1087     varnumber_T		len = 0;
1088     int			(*func_mb_ptr2char_adv)(char_u **pp);
1089 
1090     func_mb_ptr2char_adv = skipcc ? mb_ptr2char_adv : mb_cptr2char_adv;
1091     while (*s != NUL)
1092     {
1093 	func_mb_ptr2char_adv(&s);
1094 	++len;
1095     }
1096     rettv->vval.v_number = len;
1097 }
1098 
1099 /*
1100  * "strcharlen()" function
1101  */
1102     void
1103 f_strcharlen(typval_T *argvars, typval_T *rettv)
1104 {
1105     strchar_common(argvars, rettv, TRUE);
1106 }
1107 
1108 /*
1109  * "strchars()" function
1110  */
1111     void
1112 f_strchars(typval_T *argvars, typval_T *rettv)
1113 {
1114     varnumber_T		skipcc = FALSE;
1115 
1116     if (in_vim9script()
1117 	    && (check_for_string_arg(argvars, 0) == FAIL
1118 		|| check_for_opt_bool_arg(argvars, 1) == FAIL))
1119 	return;
1120 
1121     if (argvars[1].v_type != VAR_UNKNOWN)
1122 	skipcc = tv_get_bool(&argvars[1]);
1123     if (skipcc < 0 || skipcc > 1)
1124 	semsg(_(e_using_number_as_bool_nr), skipcc);
1125     else
1126 	strchar_common(argvars, rettv, skipcc);
1127 }
1128 
1129 /*
1130  * "strdisplaywidth()" function
1131  */
1132     void
1133 f_strdisplaywidth(typval_T *argvars, typval_T *rettv)
1134 {
1135     char_u	*s;
1136     int		col = 0;
1137 
1138     rettv->vval.v_number = -1;
1139 
1140     if (in_vim9script()
1141 	    && (check_for_string_arg(argvars, 0) == FAIL
1142 		|| check_for_opt_number_arg(argvars, 1) == FAIL))
1143 	return;
1144 
1145     s = tv_get_string(&argvars[0]);
1146     if (argvars[1].v_type != VAR_UNKNOWN)
1147 	col = (int)tv_get_number(&argvars[1]);
1148 
1149     rettv->vval.v_number = (varnumber_T)(linetabsize_col(col, s) - col);
1150 }
1151 
1152 /*
1153  * "strwidth()" function
1154  */
1155     void
1156 f_strwidth(typval_T *argvars, typval_T *rettv)
1157 {
1158     char_u	*s = tv_get_string_strict(&argvars[0]);
1159 
1160     rettv->vval.v_number = (varnumber_T)(mb_string2cells(s, -1));
1161 }
1162 
1163 /*
1164  * "strcharpart()" function
1165  */
1166     void
1167 f_strcharpart(typval_T *argvars, typval_T *rettv)
1168 {
1169     char_u	*p;
1170     int		nchar;
1171     int		nbyte = 0;
1172     int		charlen;
1173     int		skipcc = FALSE;
1174     int		len = 0;
1175     int		slen;
1176     int		error = FALSE;
1177 
1178     if (in_vim9script()
1179 	    && (check_for_string_arg(argvars, 0) == FAIL
1180 		|| check_for_number_arg(argvars, 1) == FAIL
1181 		|| check_for_opt_number_arg(argvars, 2) == FAIL
1182 		|| (argvars[2].v_type != VAR_UNKNOWN
1183 		    && check_for_opt_bool_arg(argvars, 3) == FAIL)))
1184 	return;
1185 
1186     p = tv_get_string(&argvars[0]);
1187     slen = (int)STRLEN(p);
1188 
1189     nchar = (int)tv_get_number_chk(&argvars[1], &error);
1190     if (!error)
1191     {
1192 	if (argvars[2].v_type != VAR_UNKNOWN
1193 					   && argvars[3].v_type != VAR_UNKNOWN)
1194 	{
1195 	    skipcc = tv_get_bool(&argvars[3]);
1196 	    if (skipcc < 0 || skipcc > 1)
1197 	    {
1198 		semsg(_(e_using_number_as_bool_nr), skipcc);
1199 		return;
1200 	    }
1201 	}
1202 
1203 	if (nchar > 0)
1204 	    while (nchar > 0 && nbyte < slen)
1205 	    {
1206 		if (skipcc)
1207 		    nbyte += mb_ptr2len(p + nbyte);
1208 		else
1209 		    nbyte += MB_CPTR2LEN(p + nbyte);
1210 		--nchar;
1211 	    }
1212 	else
1213 	    nbyte = nchar;
1214 	if (argvars[2].v_type != VAR_UNKNOWN)
1215 	{
1216 	    charlen = (int)tv_get_number(&argvars[2]);
1217 	    while (charlen > 0 && nbyte + len < slen)
1218 	    {
1219 		int off = nbyte + len;
1220 
1221 		if (off < 0)
1222 		    len += 1;
1223 		else
1224 		{
1225 		    if (skipcc)
1226 			len += mb_ptr2len(p + off);
1227 		    else
1228 			len += MB_CPTR2LEN(p + off);
1229 		}
1230 		--charlen;
1231 	    }
1232 	}
1233 	else
1234 	    len = slen - nbyte;    // default: all bytes that are available.
1235     }
1236 
1237     /*
1238      * Only return the overlap between the specified part and the actual
1239      * string.
1240      */
1241     if (nbyte < 0)
1242     {
1243 	len += nbyte;
1244 	nbyte = 0;
1245     }
1246     else if (nbyte > slen)
1247 	nbyte = slen;
1248     if (len < 0)
1249 	len = 0;
1250     else if (nbyte + len > slen)
1251 	len = slen - nbyte;
1252 
1253     rettv->v_type = VAR_STRING;
1254     rettv->vval.v_string = vim_strnsave(p + nbyte, len);
1255 }
1256 
1257 /*
1258  * "strpart()" function
1259  */
1260     void
1261 f_strpart(typval_T *argvars, typval_T *rettv)
1262 {
1263     char_u	*p;
1264     int		n;
1265     int		len;
1266     int		slen;
1267     int		error = FALSE;
1268 
1269     if (in_vim9script()
1270 	    && (check_for_string_arg(argvars, 0) == FAIL
1271 		|| check_for_number_arg(argvars, 1) == FAIL
1272 		|| check_for_opt_number_arg(argvars, 2) == FAIL
1273 		|| (argvars[2].v_type != VAR_UNKNOWN
1274 		    && check_for_opt_bool_arg(argvars, 3) == FAIL)))
1275 	return;
1276 
1277     p = tv_get_string(&argvars[0]);
1278     slen = (int)STRLEN(p);
1279 
1280     n = (int)tv_get_number_chk(&argvars[1], &error);
1281     if (error)
1282 	len = 0;
1283     else if (argvars[2].v_type != VAR_UNKNOWN)
1284 	len = (int)tv_get_number(&argvars[2]);
1285     else
1286 	len = slen - n;	    // default len: all bytes that are available.
1287 
1288     // Only return the overlap between the specified part and the actual
1289     // string.
1290     if (n < 0)
1291     {
1292 	len += n;
1293 	n = 0;
1294     }
1295     else if (n > slen)
1296 	n = slen;
1297     if (len < 0)
1298 	len = 0;
1299     else if (n + len > slen)
1300 	len = slen - n;
1301 
1302     if (argvars[2].v_type != VAR_UNKNOWN && argvars[3].v_type != VAR_UNKNOWN)
1303     {
1304 	int off;
1305 
1306 	// length in characters
1307 	for (off = n; off < slen && len > 0; --len)
1308 	    off += mb_ptr2len(p + off);
1309 	len = off - n;
1310     }
1311 
1312     rettv->v_type = VAR_STRING;
1313     rettv->vval.v_string = vim_strnsave(p + n, len);
1314 }
1315 
1316 /*
1317  * "strridx()" function
1318  */
1319     void
1320 f_strridx(typval_T *argvars, typval_T *rettv)
1321 {
1322     char_u	buf[NUMBUFLEN];
1323     char_u	*needle;
1324     char_u	*haystack;
1325     char_u	*rest;
1326     char_u	*lastmatch = NULL;
1327     int		haystack_len, end_idx;
1328 
1329     needle = tv_get_string_chk(&argvars[1]);
1330     haystack = tv_get_string_buf_chk(&argvars[0], buf);
1331 
1332     rettv->vval.v_number = -1;
1333     if (needle == NULL || haystack == NULL)
1334 	return;		// type error; errmsg already given
1335 
1336     haystack_len = (int)STRLEN(haystack);
1337     if (argvars[2].v_type != VAR_UNKNOWN)
1338     {
1339 	// Third argument: upper limit for index
1340 	end_idx = (int)tv_get_number_chk(&argvars[2], NULL);
1341 	if (end_idx < 0)
1342 	    return;	// can never find a match
1343     }
1344     else
1345 	end_idx = haystack_len;
1346 
1347     if (*needle == NUL)
1348     {
1349 	// Empty string matches past the end.
1350 	lastmatch = haystack + end_idx;
1351     }
1352     else
1353     {
1354 	for (rest = haystack; *rest != '\0'; ++rest)
1355 	{
1356 	    rest = (char_u *)strstr((char *)rest, (char *)needle);
1357 	    if (rest == NULL || rest > haystack + end_idx)
1358 		break;
1359 	    lastmatch = rest;
1360 	}
1361     }
1362 
1363     if (lastmatch == NULL)
1364 	rettv->vval.v_number = -1;
1365     else
1366 	rettv->vval.v_number = (varnumber_T)(lastmatch - haystack);
1367 }
1368 
1369 /*
1370  * "strtrans()" function
1371  */
1372     void
1373 f_strtrans(typval_T *argvars, typval_T *rettv)
1374 {
1375     rettv->v_type = VAR_STRING;
1376     rettv->vval.v_string = transstr(tv_get_string(&argvars[0]));
1377 }
1378 
1379 /*
1380  * "tolower(string)" function
1381  */
1382     void
1383 f_tolower(typval_T *argvars, typval_T *rettv)
1384 {
1385     rettv->v_type = VAR_STRING;
1386     rettv->vval.v_string = strlow_save(tv_get_string(&argvars[0]));
1387 }
1388 
1389 /*
1390  * "toupper(string)" function
1391  */
1392     void
1393 f_toupper(typval_T *argvars, typval_T *rettv)
1394 {
1395     rettv->v_type = VAR_STRING;
1396     rettv->vval.v_string = strup_save(tv_get_string(&argvars[0]));
1397 }
1398 
1399 /*
1400  * "tr(string, fromstr, tostr)" function
1401  */
1402     void
1403 f_tr(typval_T *argvars, typval_T *rettv)
1404 {
1405     char_u	*in_str;
1406     char_u	*fromstr;
1407     char_u	*tostr;
1408     char_u	*p;
1409     int		inlen;
1410     int		fromlen;
1411     int		tolen;
1412     int		idx;
1413     char_u	*cpstr;
1414     int		cplen;
1415     int		first = TRUE;
1416     char_u	buf[NUMBUFLEN];
1417     char_u	buf2[NUMBUFLEN];
1418     garray_T	ga;
1419 
1420     in_str = tv_get_string(&argvars[0]);
1421     fromstr = tv_get_string_buf_chk(&argvars[1], buf);
1422     tostr = tv_get_string_buf_chk(&argvars[2], buf2);
1423 
1424     // Default return value: empty string.
1425     rettv->v_type = VAR_STRING;
1426     rettv->vval.v_string = NULL;
1427     if (fromstr == NULL || tostr == NULL)
1428 	    return;		// type error; errmsg already given
1429     ga_init2(&ga, (int)sizeof(char), 80);
1430 
1431     if (!has_mbyte)
1432 	// not multi-byte: fromstr and tostr must be the same length
1433 	if (STRLEN(fromstr) != STRLEN(tostr))
1434 	{
1435 error:
1436 	    semsg(_(e_invarg2), fromstr);
1437 	    ga_clear(&ga);
1438 	    return;
1439 	}
1440 
1441     // fromstr and tostr have to contain the same number of chars
1442     while (*in_str != NUL)
1443     {
1444 	if (has_mbyte)
1445 	{
1446 	    inlen = (*mb_ptr2len)(in_str);
1447 	    cpstr = in_str;
1448 	    cplen = inlen;
1449 	    idx = 0;
1450 	    for (p = fromstr; *p != NUL; p += fromlen)
1451 	    {
1452 		fromlen = (*mb_ptr2len)(p);
1453 		if (fromlen == inlen && STRNCMP(in_str, p, inlen) == 0)
1454 		{
1455 		    for (p = tostr; *p != NUL; p += tolen)
1456 		    {
1457 			tolen = (*mb_ptr2len)(p);
1458 			if (idx-- == 0)
1459 			{
1460 			    cplen = tolen;
1461 			    cpstr = p;
1462 			    break;
1463 			}
1464 		    }
1465 		    if (*p == NUL)	// tostr is shorter than fromstr
1466 			goto error;
1467 		    break;
1468 		}
1469 		++idx;
1470 	    }
1471 
1472 	    if (first && cpstr == in_str)
1473 	    {
1474 		// Check that fromstr and tostr have the same number of
1475 		// (multi-byte) characters.  Done only once when a character
1476 		// of in_str doesn't appear in fromstr.
1477 		first = FALSE;
1478 		for (p = tostr; *p != NUL; p += tolen)
1479 		{
1480 		    tolen = (*mb_ptr2len)(p);
1481 		    --idx;
1482 		}
1483 		if (idx != 0)
1484 		    goto error;
1485 	    }
1486 
1487 	    (void)ga_grow(&ga, cplen);
1488 	    mch_memmove((char *)ga.ga_data + ga.ga_len, cpstr, (size_t)cplen);
1489 	    ga.ga_len += cplen;
1490 
1491 	    in_str += inlen;
1492 	}
1493 	else
1494 	{
1495 	    // When not using multi-byte chars we can do it faster.
1496 	    p = vim_strchr(fromstr, *in_str);
1497 	    if (p != NULL)
1498 		ga_append(&ga, tostr[p - fromstr]);
1499 	    else
1500 		ga_append(&ga, *in_str);
1501 	    ++in_str;
1502 	}
1503     }
1504 
1505     // add a terminating NUL
1506     (void)ga_grow(&ga, 1);
1507     ga_append(&ga, NUL);
1508 
1509     rettv->vval.v_string = ga.ga_data;
1510 }
1511 
1512 /*
1513  * "trim({expr})" function
1514  */
1515     void
1516 f_trim(typval_T *argvars, typval_T *rettv)
1517 {
1518     char_u	buf1[NUMBUFLEN];
1519     char_u	buf2[NUMBUFLEN];
1520     char_u	*head = tv_get_string_buf_chk(&argvars[0], buf1);
1521     char_u	*mask = NULL;
1522     char_u	*tail;
1523     char_u	*prev;
1524     char_u	*p;
1525     int		c1;
1526     int		dir = 0;
1527 
1528     rettv->v_type = VAR_STRING;
1529     rettv->vval.v_string = NULL;
1530     if (head == NULL)
1531 	return;
1532 
1533     if (argvars[1].v_type != VAR_UNKNOWN && argvars[1].v_type != VAR_STRING)
1534     {
1535 	semsg(_(e_invarg2), tv_get_string(&argvars[1]));
1536 	return;
1537     }
1538 
1539     if (argvars[1].v_type == VAR_STRING)
1540     {
1541 	mask = tv_get_string_buf_chk(&argvars[1], buf2);
1542 
1543 	if (argvars[2].v_type != VAR_UNKNOWN)
1544 	{
1545 	    int	error = 0;
1546 
1547 	    // leading or trailing characters to trim
1548 	    dir = (int)tv_get_number_chk(&argvars[2], &error);
1549 	    if (error)
1550 		return;
1551 	    if (dir < 0 || dir > 2)
1552 	    {
1553 		semsg(_(e_invarg2), tv_get_string(&argvars[2]));
1554 		return;
1555 	    }
1556 	}
1557     }
1558 
1559     if (dir == 0 || dir == 1)
1560     {
1561 	// Trim leading characters
1562 	while (*head != NUL)
1563 	{
1564 	    c1 = PTR2CHAR(head);
1565 	    if (mask == NULL)
1566 	    {
1567 		if (c1 > ' ' && c1 != 0xa0)
1568 		    break;
1569 	    }
1570 	    else
1571 	    {
1572 		for (p = mask; *p != NUL; MB_PTR_ADV(p))
1573 		    if (c1 == PTR2CHAR(p))
1574 			break;
1575 		if (*p == NUL)
1576 		    break;
1577 	    }
1578 	    MB_PTR_ADV(head);
1579 	}
1580     }
1581 
1582     tail = head + STRLEN(head);
1583     if (dir == 0 || dir == 2)
1584     {
1585 	// Trim trailing characters
1586 	for (; tail > head; tail = prev)
1587 	{
1588 	    prev = tail;
1589 	    MB_PTR_BACK(head, prev);
1590 	    c1 = PTR2CHAR(prev);
1591 	    if (mask == NULL)
1592 	    {
1593 		if (c1 > ' ' && c1 != 0xa0)
1594 		    break;
1595 	    }
1596 	    else
1597 	    {
1598 		for (p = mask; *p != NUL; MB_PTR_ADV(p))
1599 		    if (c1 == PTR2CHAR(p))
1600 			break;
1601 		if (*p == NUL)
1602 		    break;
1603 	    }
1604 	}
1605     }
1606     rettv->vval.v_string = vim_strnsave(head, tail - head);
1607 }
1608 
1609 #endif
1610