xref: /vim-8.2.3635/src/charset.c (revision 93ff6720)
1 /* vi:set ts=8 sts=4 sw=4 noet:
2  *
3  * VIM - Vi IMproved	by Bram Moolenaar
4  *
5  * Do ":help uganda"  in Vim to read copying and usage conditions.
6  * Do ":help credits" in Vim to see a list of people who contributed.
7  * See README.txt for an overview of the Vim source code.
8  */
9 
10 #include "vim.h"
11 
12 #if defined(HAVE_WCHAR_H)
13 # include <wchar.h>	    // for towupper() and towlower()
14 #endif
15 static int win_nolbr_chartabsize(win_T *wp, char_u *s, colnr_T col, int *headp);
16 
17 static unsigned nr2hex(unsigned c);
18 
19 static int    chartab_initialized = FALSE;
20 
21 // b_chartab[] is an array of 32 bytes, each bit representing one of the
22 // characters 0-255.
23 #define SET_CHARTAB(buf, c) (buf)->b_chartab[(unsigned)(c) >> 3] |= (1 << ((c) & 0x7))
24 #define RESET_CHARTAB(buf, c) (buf)->b_chartab[(unsigned)(c) >> 3] &= ~(1 << ((c) & 0x7))
25 #define GET_CHARTAB(buf, c) ((buf)->b_chartab[(unsigned)(c) >> 3] & (1 << ((c) & 0x7)))
26 
27 // table used below, see init_chartab() for an explanation
28 static char_u	g_chartab[256];
29 
30 /*
31  * Flags for g_chartab[].
32  */
33 #define CT_CELL_MASK	0x07	// mask: nr of display cells (1, 2 or 4)
34 #define CT_PRINT_CHAR	0x10	// flag: set for printable chars
35 #define CT_ID_CHAR	0x20	// flag: set for ID chars
36 #define CT_FNAME_CHAR	0x40	// flag: set for file name chars
37 
38 static int in_win_border(win_T *wp, colnr_T vcol);
39 
40 /*
41  * Fill g_chartab[].  Also fills curbuf->b_chartab[] with flags for keyword
42  * characters for current buffer.
43  *
44  * Depends on the option settings 'iskeyword', 'isident', 'isfname',
45  * 'isprint' and 'encoding'.
46  *
47  * The index in g_chartab[] depends on 'encoding':
48  * - For non-multi-byte index with the byte (same as the character).
49  * - For DBCS index with the first byte.
50  * - For UTF-8 index with the character (when first byte is up to 0x80 it is
51  *   the same as the character, if the first byte is 0x80 and above it depends
52  *   on further bytes).
53  *
54  * The contents of g_chartab[]:
55  * - The lower two bits, masked by CT_CELL_MASK, give the number of display
56  *   cells the character occupies (1 or 2).  Not valid for UTF-8 above 0x80.
57  * - CT_PRINT_CHAR bit is set when the character is printable (no need to
58  *   translate the character before displaying it).  Note that only DBCS
59  *   characters can have 2 display cells and still be printable.
60  * - CT_FNAME_CHAR bit is set when the character can be in a file name.
61  * - CT_ID_CHAR bit is set when the character can be in an identifier.
62  *
63  * Return FAIL if 'iskeyword', 'isident', 'isfname' or 'isprint' option has an
64  * error, OK otherwise.
65  */
66     int
init_chartab(void)67 init_chartab(void)
68 {
69     return buf_init_chartab(curbuf, TRUE);
70 }
71 
72     int
buf_init_chartab(buf_T * buf,int global)73 buf_init_chartab(
74     buf_T	*buf,
75     int		global)		// FALSE: only set buf->b_chartab[]
76 {
77     int		c;
78     int		c2;
79     char_u	*p;
80     int		i;
81     int		tilde;
82     int		do_isalpha;
83 
84     if (global)
85     {
86 	/*
87 	 * Set the default size for printable characters:
88 	 * From <Space> to '~' is 1 (printable), others are 2 (not printable).
89 	 * This also inits all 'isident' and 'isfname' flags to FALSE.
90 	 *
91 	 * EBCDIC: all chars below ' ' are not printable, all others are
92 	 * printable.
93 	 */
94 	c = 0;
95 	while (c < ' ')
96 	    g_chartab[c++] = (dy_flags & DY_UHEX) ? 4 : 2;
97 #ifdef EBCDIC
98 	while (c < 255)
99 #else
100 	while (c <= '~')
101 #endif
102 	    g_chartab[c++] = 1 + CT_PRINT_CHAR;
103 	while (c < 256)
104 	{
105 	    // UTF-8: bytes 0xa0 - 0xff are printable (latin1)
106 	    if (enc_utf8 && c >= 0xa0)
107 		g_chartab[c++] = CT_PRINT_CHAR + 1;
108 	    // euc-jp characters starting with 0x8e are single width
109 	    else if (enc_dbcs == DBCS_JPNU && c == 0x8e)
110 		g_chartab[c++] = CT_PRINT_CHAR + 1;
111 	    // other double-byte chars can be printable AND double-width
112 	    else if (enc_dbcs != 0 && MB_BYTE2LEN(c) == 2)
113 		g_chartab[c++] = CT_PRINT_CHAR + 2;
114 	    else
115 		// the rest is unprintable by default
116 		g_chartab[c++] = (dy_flags & DY_UHEX) ? 4 : 2;
117 	}
118 
119 	// Assume that every multi-byte char is a filename character.
120 	for (c = 1; c < 256; ++c)
121 	    if ((enc_dbcs != 0 && MB_BYTE2LEN(c) > 1)
122 		    || (enc_dbcs == DBCS_JPNU && c == 0x8e)
123 		    || (enc_utf8 && c >= 0xa0))
124 		g_chartab[c] |= CT_FNAME_CHAR;
125     }
126 
127     /*
128      * Init word char flags all to FALSE
129      */
130     CLEAR_FIELD(buf->b_chartab);
131     if (enc_dbcs != 0)
132 	for (c = 0; c < 256; ++c)
133 	{
134 	    // double-byte characters are probably word characters
135 	    if (MB_BYTE2LEN(c) == 2)
136 		SET_CHARTAB(buf, c);
137 	}
138 
139 #ifdef FEAT_LISP
140     /*
141      * In lisp mode the '-' character is included in keywords.
142      */
143     if (buf->b_p_lisp)
144 	SET_CHARTAB(buf, '-');
145 #endif
146 
147     // Walk through the 'isident', 'iskeyword', 'isfname' and 'isprint'
148     // options Each option is a list of characters, character numbers or
149     // ranges, separated by commas, e.g.: "200-210,x,#-178,-"
150     for (i = global ? 0 : 3; i <= 3; ++i)
151     {
152 	if (i == 0)
153 	    p = p_isi;		// first round: 'isident'
154 	else if (i == 1)
155 	    p = p_isp;		// second round: 'isprint'
156 	else if (i == 2)
157 	    p = p_isf;		// third round: 'isfname'
158 	else	// i == 3
159 	    p = buf->b_p_isk;	// fourth round: 'iskeyword'
160 
161 	while (*p)
162 	{
163 	    tilde = FALSE;
164 	    do_isalpha = FALSE;
165 	    if (*p == '^' && p[1] != NUL)
166 	    {
167 		tilde = TRUE;
168 		++p;
169 	    }
170 	    if (VIM_ISDIGIT(*p))
171 		c = getdigits(&p);
172 	    else if (has_mbyte)
173 		c = mb_ptr2char_adv(&p);
174 	    else
175 		c = *p++;
176 	    c2 = -1;
177 	    if (*p == '-' && p[1] != NUL)
178 	    {
179 		++p;
180 		if (VIM_ISDIGIT(*p))
181 		    c2 = getdigits(&p);
182 		else if (has_mbyte)
183 		    c2 = mb_ptr2char_adv(&p);
184 		else
185 		    c2 = *p++;
186 	    }
187 	    if (c <= 0 || c >= 256 || (c2 < c && c2 != -1) || c2 >= 256
188 						 || !(*p == NUL || *p == ','))
189 		return FAIL;
190 
191 	    if (c2 == -1)	// not a range
192 	    {
193 		/*
194 		 * A single '@' (not "@-@"):
195 		 * Decide on letters being ID/printable/keyword chars with
196 		 * standard function isalpha(). This takes care of locale for
197 		 * single-byte characters).
198 		 */
199 		if (c == '@')
200 		{
201 		    do_isalpha = TRUE;
202 		    c = 1;
203 		    c2 = 255;
204 		}
205 		else
206 		    c2 = c;
207 	    }
208 	    while (c <= c2)
209 	    {
210 		// Use the MB_ functions here, because isalpha() doesn't
211 		// work properly when 'encoding' is "latin1" and the locale is
212 		// "C".
213 		if (!do_isalpha || MB_ISLOWER(c) || MB_ISUPPER(c))
214 		{
215 		    if (i == 0)			// (re)set ID flag
216 		    {
217 			if (tilde)
218 			    g_chartab[c] &= ~CT_ID_CHAR;
219 			else
220 			    g_chartab[c] |= CT_ID_CHAR;
221 		    }
222 		    else if (i == 1)		// (re)set printable
223 		    {
224 			if ((c < ' '
225 #ifndef EBCDIC
226 				    || c > '~'
227 #endif
228 				// For double-byte we keep the cell width, so
229 				// that we can detect it from the first byte.
230 			    ) && !(enc_dbcs && MB_BYTE2LEN(c) == 2))
231 			{
232 			    if (tilde)
233 			    {
234 				g_chartab[c] = (g_chartab[c] & ~CT_CELL_MASK)
235 					     + ((dy_flags & DY_UHEX) ? 4 : 2);
236 				g_chartab[c] &= ~CT_PRINT_CHAR;
237 			    }
238 			    else
239 			    {
240 				g_chartab[c] = (g_chartab[c] & ~CT_CELL_MASK) + 1;
241 				g_chartab[c] |= CT_PRINT_CHAR;
242 			    }
243 			}
244 		    }
245 		    else if (i == 2)		// (re)set fname flag
246 		    {
247 			if (tilde)
248 			    g_chartab[c] &= ~CT_FNAME_CHAR;
249 			else
250 			    g_chartab[c] |= CT_FNAME_CHAR;
251 		    }
252 		    else // i == 3		 (re)set keyword flag
253 		    {
254 			if (tilde)
255 			    RESET_CHARTAB(buf, c);
256 			else
257 			    SET_CHARTAB(buf, c);
258 		    }
259 		}
260 		++c;
261 	    }
262 
263 	    c = *p;
264 	    p = skip_to_option_part(p);
265 	    if (c == ',' && *p == NUL)
266 		// Trailing comma is not allowed.
267 		return FAIL;
268 	}
269     }
270     chartab_initialized = TRUE;
271     return OK;
272 }
273 
274 /*
275  * Translate any special characters in buf[bufsize] in-place.
276  * The result is a string with only printable characters, but if there is not
277  * enough room, not all characters will be translated.
278  */
279     void
trans_characters(char_u * buf,int bufsize)280 trans_characters(
281     char_u	*buf,
282     int		bufsize)
283 {
284     int		len;		// length of string needing translation
285     int		room;		// room in buffer after string
286     char_u	*trs;		// translated character
287     int		trs_len;	// length of trs[]
288 
289     len = (int)STRLEN(buf);
290     room = bufsize - len;
291     while (*buf != 0)
292     {
293 	// Assume a multi-byte character doesn't need translation.
294 	if (has_mbyte && (trs_len = (*mb_ptr2len)(buf)) > 1)
295 	    len -= trs_len;
296 	else
297 	{
298 	    trs = transchar_byte(*buf);
299 	    trs_len = (int)STRLEN(trs);
300 	    if (trs_len > 1)
301 	    {
302 		room -= trs_len - 1;
303 		if (room <= 0)
304 		    return;
305 		mch_memmove(buf + trs_len, buf + 1, (size_t)len);
306 	    }
307 	    mch_memmove(buf, trs, (size_t)trs_len);
308 	    --len;
309 	}
310 	buf += trs_len;
311     }
312 }
313 
314 /*
315  * Translate a string into allocated memory, replacing special chars with
316  * printable chars.  Returns NULL when out of memory.
317  */
318     char_u *
transstr(char_u * s)319 transstr(char_u *s)
320 {
321     char_u	*res;
322     char_u	*p;
323     int		l, len, c;
324     char_u	hexbuf[11];
325 
326     if (has_mbyte)
327     {
328 	// Compute the length of the result, taking account of unprintable
329 	// multi-byte characters.
330 	len = 0;
331 	p = s;
332 	while (*p != NUL)
333 	{
334 	    if ((l = (*mb_ptr2len)(p)) > 1)
335 	    {
336 		c = (*mb_ptr2char)(p);
337 		p += l;
338 		if (vim_isprintc(c))
339 		    len += l;
340 		else
341 		{
342 		    transchar_hex(hexbuf, c);
343 		    len += (int)STRLEN(hexbuf);
344 		}
345 	    }
346 	    else
347 	    {
348 		l = byte2cells(*p++);
349 		if (l > 0)
350 		    len += l;
351 		else
352 		    len += 4;	// illegal byte sequence
353 	    }
354 	}
355 	res = alloc(len + 1);
356     }
357     else
358 	res = alloc(vim_strsize(s) + 1);
359     if (res != NULL)
360     {
361 	*res = NUL;
362 	p = s;
363 	while (*p != NUL)
364 	{
365 	    if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
366 	    {
367 		c = (*mb_ptr2char)(p);
368 		if (vim_isprintc(c))
369 		    STRNCAT(res, p, l);	// append printable multi-byte char
370 		else
371 		    transchar_hex(res + STRLEN(res), c);
372 		p += l;
373 	    }
374 	    else
375 		STRCAT(res, transchar_byte(*p++));
376 	}
377     }
378     return res;
379 }
380 
381 /*
382  * Convert the string "str[orglen]" to do ignore-case comparing.  Uses the
383  * current locale.
384  * When "buf" is NULL returns an allocated string (NULL for out-of-memory).
385  * Otherwise puts the result in "buf[buflen]".
386  */
387     char_u *
str_foldcase(char_u * str,int orglen,char_u * buf,int buflen)388 str_foldcase(
389     char_u	*str,
390     int		orglen,
391     char_u	*buf,
392     int		buflen)
393 {
394     garray_T	ga;
395     int		i;
396     int		len = orglen;
397 
398 #define GA_CHAR(i)  ((char_u *)ga.ga_data)[i]
399 #define GA_PTR(i)   ((char_u *)ga.ga_data + i)
400 #define STR_CHAR(i)  (buf == NULL ? GA_CHAR(i) : buf[i])
401 #define STR_PTR(i)   (buf == NULL ? GA_PTR(i) : buf + i)
402 
403     // Copy "str" into "buf" or allocated memory, unmodified.
404     if (buf == NULL)
405     {
406 	ga_init2(&ga, 1, 10);
407 	if (ga_grow(&ga, len + 1) == FAIL)
408 	    return NULL;
409 	mch_memmove(ga.ga_data, str, (size_t)len);
410 	ga.ga_len = len;
411     }
412     else
413     {
414 	if (len >= buflen)	    // Ugly!
415 	    len = buflen - 1;
416 	mch_memmove(buf, str, (size_t)len);
417     }
418     if (buf == NULL)
419 	GA_CHAR(len) = NUL;
420     else
421 	buf[len] = NUL;
422 
423     // Make each character lower case.
424     i = 0;
425     while (STR_CHAR(i) != NUL)
426     {
427 	if (enc_utf8 || (has_mbyte && MB_BYTE2LEN(STR_CHAR(i)) > 1))
428 	{
429 	    if (enc_utf8)
430 	    {
431 		int	c = utf_ptr2char(STR_PTR(i));
432 		int	olen = utf_ptr2len(STR_PTR(i));
433 		int	lc = utf_tolower(c);
434 
435 		// Only replace the character when it is not an invalid
436 		// sequence (ASCII character or more than one byte) and
437 		// utf_tolower() doesn't return the original character.
438 		if ((c < 0x80 || olen > 1) && c != lc)
439 		{
440 		    int	    nlen = utf_char2len(lc);
441 
442 		    // If the byte length changes need to shift the following
443 		    // characters forward or backward.
444 		    if (olen != nlen)
445 		    {
446 			if (nlen > olen)
447 			{
448 			    if (buf == NULL
449 				    ? ga_grow(&ga, nlen - olen + 1) == FAIL
450 				    : len + nlen - olen >= buflen)
451 			    {
452 				// out of memory, keep old char
453 				lc = c;
454 				nlen = olen;
455 			    }
456 			}
457 			if (olen != nlen)
458 			{
459 			    if (buf == NULL)
460 			    {
461 				STRMOVE(GA_PTR(i) + nlen, GA_PTR(i) + olen);
462 				ga.ga_len += nlen - olen;
463 			    }
464 			    else
465 			    {
466 				STRMOVE(buf + i + nlen, buf + i + olen);
467 				len += nlen - olen;
468 			    }
469 			}
470 		    }
471 		    (void)utf_char2bytes(lc, STR_PTR(i));
472 		}
473 	    }
474 	    // skip to next multi-byte char
475 	    i += (*mb_ptr2len)(STR_PTR(i));
476 	}
477 	else
478 	{
479 	    if (buf == NULL)
480 		GA_CHAR(i) = TOLOWER_LOC(GA_CHAR(i));
481 	    else
482 		buf[i] = TOLOWER_LOC(buf[i]);
483 	    ++i;
484 	}
485     }
486 
487     if (buf == NULL)
488 	return (char_u *)ga.ga_data;
489     return buf;
490 }
491 
492 /*
493  * Catch 22: g_chartab[] can't be initialized before the options are
494  * initialized, and initializing options may cause transchar() to be called!
495  * When chartab_initialized == FALSE don't use g_chartab[].
496  * Does NOT work for multi-byte characters, c must be <= 255.
497  * Also doesn't work for the first byte of a multi-byte, "c" must be a
498  * character!
499  */
500 static char_u	transchar_charbuf[7];
501 
502     char_u *
transchar(int c)503 transchar(int c)
504 {
505     return transchar_buf(curbuf, c);
506 }
507 
508     char_u *
transchar_buf(buf_T * buf,int c)509 transchar_buf(buf_T *buf, int c)
510 {
511     int			i;
512 
513     i = 0;
514     if (IS_SPECIAL(c))	    // special key code, display as ~@ char
515     {
516 	transchar_charbuf[0] = '~';
517 	transchar_charbuf[1] = '@';
518 	i = 2;
519 	c = K_SECOND(c);
520     }
521 
522     if ((!chartab_initialized && (
523 #ifdef EBCDIC
524 		    (c >= 64 && c < 255)
525 #else
526 		    (c >= ' ' && c <= '~')
527 #endif
528 		)) || (c < 256 && vim_isprintc_strict(c)))
529     {
530 	// printable character
531 	transchar_charbuf[i] = c;
532 	transchar_charbuf[i + 1] = NUL;
533     }
534     else
535 	transchar_nonprint(buf, transchar_charbuf + i, c);
536     return transchar_charbuf;
537 }
538 
539 /*
540  * Like transchar(), but called with a byte instead of a character.  Checks
541  * for an illegal UTF-8 byte.
542  */
543     char_u *
transchar_byte(int c)544 transchar_byte(int c)
545 {
546     if (enc_utf8 && c >= 0x80)
547     {
548 	transchar_nonprint(curbuf, transchar_charbuf, c);
549 	return transchar_charbuf;
550     }
551     return transchar(c);
552 }
553 
554 /*
555  * Convert non-printable character to two or more printable characters in
556  * "buf[]".  "charbuf" needs to be able to hold five bytes.
557  * Does NOT work for multi-byte characters, c must be <= 255.
558  */
559     void
transchar_nonprint(buf_T * buf,char_u * charbuf,int c)560 transchar_nonprint(buf_T *buf, char_u *charbuf, int c)
561 {
562     if (c == NL)
563 	c = NUL;		// we use newline in place of a NUL
564     else if (c == CAR && get_fileformat(buf) == EOL_MAC)
565 	c = NL;			// we use CR in place of  NL in this case
566 
567     if (dy_flags & DY_UHEX)		// 'display' has "uhex"
568 	transchar_hex(charbuf, c);
569 
570 #ifdef EBCDIC
571     // For EBCDIC only the characters 0-63 and 255 are not printable
572     else if (CtrlChar(c) != 0 || c == DEL)
573 #else
574     else if (c <= 0x7f)			// 0x00 - 0x1f and 0x7f
575 #endif
576     {
577 	charbuf[0] = '^';
578 #ifdef EBCDIC
579 	if (c == DEL)
580 	    charbuf[1] = '?';		// DEL displayed as ^?
581 	else
582 	    charbuf[1] = CtrlChar(c);
583 #else
584 	charbuf[1] = c ^ 0x40;		// DEL displayed as ^?
585 #endif
586 
587 	charbuf[2] = NUL;
588     }
589     else if (enc_utf8 && c >= 0x80)
590     {
591 	transchar_hex(charbuf, c);
592     }
593 #ifndef EBCDIC
594     else if (c >= ' ' + 0x80 && c <= '~' + 0x80)    // 0xa0 - 0xfe
595     {
596 	charbuf[0] = '|';
597 	charbuf[1] = c - 0x80;
598 	charbuf[2] = NUL;
599     }
600 #else
601     else if (c < 64)
602     {
603 	charbuf[0] = '~';
604 	charbuf[1] = MetaChar(c);
605 	charbuf[2] = NUL;
606     }
607 #endif
608     else					    // 0x80 - 0x9f and 0xff
609     {
610 	/*
611 	 * TODO: EBCDIC I don't know what to do with this chars, so I display
612 	 * them as '~?' for now
613 	 */
614 	charbuf[0] = '~';
615 #ifdef EBCDIC
616 	charbuf[1] = '?';			// 0xff displayed as ~?
617 #else
618 	charbuf[1] = (c - 0x80) ^ 0x40;	// 0xff displayed as ~?
619 #endif
620 	charbuf[2] = NUL;
621     }
622 }
623 
624     void
transchar_hex(char_u * buf,int c)625 transchar_hex(char_u *buf, int c)
626 {
627     int		i = 0;
628 
629     buf[0] = '<';
630     if (c > 255)
631     {
632 	buf[++i] = nr2hex((unsigned)c >> 12);
633 	buf[++i] = nr2hex((unsigned)c >> 8);
634     }
635     buf[++i] = nr2hex((unsigned)c >> 4);
636     buf[++i] = nr2hex((unsigned)c);
637     buf[++i] = '>';
638     buf[++i] = NUL;
639 }
640 
641 /*
642  * Convert the lower 4 bits of byte "c" to its hex character.
643  * Lower case letters are used to avoid the confusion of <F1> being 0xf1 or
644  * function key 1.
645  */
646     static unsigned
nr2hex(unsigned c)647 nr2hex(unsigned c)
648 {
649     if ((c & 0xf) <= 9)
650 	return (c & 0xf) + '0';
651     return (c & 0xf) - 10 + 'a';
652 }
653 
654 /*
655  * Return number of display cells occupied by byte "b".
656  * Caller must make sure 0 <= b <= 255.
657  * For multi-byte mode "b" must be the first byte of a character.
658  * A TAB is counted as two cells: "^I".
659  * For UTF-8 mode this will return 0 for bytes >= 0x80, because the number of
660  * cells depends on further bytes.
661  */
662     int
byte2cells(int b)663 byte2cells(int b)
664 {
665     if (enc_utf8 && b >= 0x80)
666 	return 0;
667     return (g_chartab[b] & CT_CELL_MASK);
668 }
669 
670 /*
671  * Return number of display cells occupied by character "c".
672  * "c" can be a special key (negative number) in which case 3 or 4 is returned.
673  * A TAB is counted as two cells: "^I" or four: "<09>".
674  */
675     int
char2cells(int c)676 char2cells(int c)
677 {
678     if (IS_SPECIAL(c))
679 	return char2cells(K_SECOND(c)) + 2;
680     if (c >= 0x80)
681     {
682 	// UTF-8: above 0x80 need to check the value
683 	if (enc_utf8)
684 	    return utf_char2cells(c);
685 	// DBCS: double-byte means double-width, except for euc-jp with first
686 	// byte 0x8e
687 	if (enc_dbcs != 0 && c >= 0x100)
688 	{
689 	    if (enc_dbcs == DBCS_JPNU && ((unsigned)c >> 8) == 0x8e)
690 		return 1;
691 	    return 2;
692 	}
693     }
694     return (g_chartab[c & 0xff] & CT_CELL_MASK);
695 }
696 
697 /*
698  * Return number of display cells occupied by character at "*p".
699  * A TAB is counted as two cells: "^I" or four: "<09>".
700  */
701     int
ptr2cells(char_u * p)702 ptr2cells(char_u *p)
703 {
704     // For UTF-8 we need to look at more bytes if the first byte is >= 0x80.
705     if (enc_utf8 && *p >= 0x80)
706 	return utf_ptr2cells(p);
707     // For DBCS we can tell the cell count from the first byte.
708     return (g_chartab[*p] & CT_CELL_MASK);
709 }
710 
711 /*
712  * Return the number of character cells string "s" will take on the screen,
713  * counting TABs as two characters: "^I".
714  */
715     int
vim_strsize(char_u * s)716 vim_strsize(char_u *s)
717 {
718     return vim_strnsize(s, (int)MAXCOL);
719 }
720 
721 /*
722  * Return the number of character cells string "s[len]" will take on the
723  * screen, counting TABs as two characters: "^I".
724  */
725     int
vim_strnsize(char_u * s,int len)726 vim_strnsize(char_u *s, int len)
727 {
728     int		size = 0;
729 
730     while (*s != NUL && --len >= 0)
731 	if (has_mbyte)
732 	{
733 	    int	    l = (*mb_ptr2len)(s);
734 
735 	    size += ptr2cells(s);
736 	    s += l;
737 	    len -= l - 1;
738 	}
739 	else
740 	    size += byte2cells(*s++);
741 
742     return size;
743 }
744 
745 /*
746  * Return the number of characters 'c' will take on the screen, taking
747  * into account the size of a tab.
748  * Use a define to make it fast, this is used very often!!!
749  * Also see getvcol() below.
750  */
751 
752 #ifdef FEAT_VARTABS
753 # define RET_WIN_BUF_CHARTABSIZE(wp, buf, p, col) \
754     if (*(p) == TAB && (!(wp)->w_p_list || wp->w_lcs_chars.tab1)) \
755     { \
756 	return tabstop_padding(col, (buf)->b_p_ts, (buf)->b_p_vts_array); \
757     } \
758     else \
759 	return ptr2cells(p);
760 #else
761 # define RET_WIN_BUF_CHARTABSIZE(wp, buf, p, col) \
762     if (*(p) == TAB && (!(wp)->w_p_list || wp->w_lcs_chars.tab1)) \
763     { \
764 	int ts; \
765 	ts = (buf)->b_p_ts; \
766 	return (int)(ts - (col % ts)); \
767     } \
768     else \
769 	return ptr2cells(p);
770 #endif
771 
772     int
chartabsize(char_u * p,colnr_T col)773 chartabsize(char_u *p, colnr_T col)
774 {
775     RET_WIN_BUF_CHARTABSIZE(curwin, curbuf, p, col)
776 }
777 
778 #ifdef FEAT_LINEBREAK
779     static int
win_chartabsize(win_T * wp,char_u * p,colnr_T col)780 win_chartabsize(win_T *wp, char_u *p, colnr_T col)
781 {
782     RET_WIN_BUF_CHARTABSIZE(wp, wp->w_buffer, p, col)
783 }
784 #endif
785 
786 /*
787  * Return the number of characters the string 's' will take on the screen,
788  * taking into account the size of a tab.
789  */
790     int
linetabsize(char_u * s)791 linetabsize(char_u *s)
792 {
793     return linetabsize_col(0, s);
794 }
795 
796 /*
797  * Like linetabsize(), but starting at column "startcol".
798  */
799     int
linetabsize_col(int startcol,char_u * s)800 linetabsize_col(int startcol, char_u *s)
801 {
802     colnr_T	col = startcol;
803     char_u	*line = s; // pointer to start of line, for breakindent
804 
805     while (*s != NUL)
806 	col += lbr_chartabsize_adv(line, &s, col);
807     return (int)col;
808 }
809 
810 /*
811  * Like linetabsize(), but for a given window instead of the current one.
812  */
813     int
win_linetabsize(win_T * wp,char_u * line,colnr_T len)814 win_linetabsize(win_T *wp, char_u *line, colnr_T len)
815 {
816     colnr_T	col = 0;
817     char_u	*s;
818 
819     for (s = line; *s != NUL && (len == MAXCOL || s < line + len);
820 								MB_PTR_ADV(s))
821 	col += win_lbr_chartabsize(wp, line, s, col, NULL);
822     return (int)col;
823 }
824 
825 /*
826  * Return TRUE if 'c' is a normal identifier character:
827  * Letters and characters from the 'isident' option.
828  */
829     int
vim_isIDc(int c)830 vim_isIDc(int c)
831 {
832     return (c > 0 && c < 0x100 && (g_chartab[c] & CT_ID_CHAR));
833 }
834 
835 /*
836  * Like vim_isIDc() but not using the 'isident' option: letters, numbers and
837  * underscore.
838  */
839     int
vim_isNormalIDc(int c)840 vim_isNormalIDc(int c)
841 {
842     return ASCII_ISALNUM(c) || c == '_';
843 }
844 
845 /*
846  * return TRUE if 'c' is a keyword character: Letters and characters from
847  * 'iskeyword' option for the current buffer.
848  * For multi-byte characters mb_get_class() is used (builtin rules).
849  */
850     int
vim_iswordc(int c)851 vim_iswordc(int c)
852 {
853     return vim_iswordc_buf(c, curbuf);
854 }
855 
856     int
vim_iswordc_buf(int c,buf_T * buf)857 vim_iswordc_buf(int c, buf_T *buf)
858 {
859     if (c >= 0x100)
860     {
861 	if (enc_dbcs != 0)
862 	    return dbcs_class((unsigned)c >> 8, (unsigned)(c & 0xff)) >= 2;
863 	if (enc_utf8)
864 	    return utf_class_buf(c, buf) >= 2;
865 	return FALSE;
866     }
867     return (c > 0 && GET_CHARTAB(buf, c) != 0);
868 }
869 
870 /*
871  * Just like vim_iswordc() but uses a pointer to the (multi-byte) character.
872  */
873     int
vim_iswordp(char_u * p)874 vim_iswordp(char_u *p)
875 {
876     return vim_iswordp_buf(p, curbuf);
877 }
878 
879     int
vim_iswordp_buf(char_u * p,buf_T * buf)880 vim_iswordp_buf(char_u *p, buf_T *buf)
881 {
882     int	c = *p;
883 
884     if (has_mbyte && MB_BYTE2LEN(c) > 1)
885 	c = (*mb_ptr2char)(p);
886     return vim_iswordc_buf(c, buf);
887 }
888 
889 /*
890  * return TRUE if 'c' is a valid file-name character
891  * Assume characters above 0x100 are valid (multi-byte).
892  */
893     int
vim_isfilec(int c)894 vim_isfilec(int c)
895 {
896     return (c >= 0x100 || (c > 0 && (g_chartab[c] & CT_FNAME_CHAR)));
897 }
898 
899 /*
900  * return TRUE if 'c' is a valid file-name character or a wildcard character
901  * Assume characters above 0x100 are valid (multi-byte).
902  * Explicitly interpret ']' as a wildcard character as mch_has_wildcard("]")
903  * returns false.
904  */
905     int
vim_isfilec_or_wc(int c)906 vim_isfilec_or_wc(int c)
907 {
908     char_u buf[2];
909 
910     buf[0] = (char_u)c;
911     buf[1] = NUL;
912     return vim_isfilec(c) || c == ']' || mch_has_wildcard(buf);
913 }
914 
915 /*
916  * Return TRUE if 'c' is a printable character.
917  * Assume characters above 0x100 are printable (multi-byte), except for
918  * Unicode.
919  */
920     int
vim_isprintc(int c)921 vim_isprintc(int c)
922 {
923     if (enc_utf8 && c >= 0x100)
924 	return utf_printable(c);
925     return (c >= 0x100 || (c > 0 && (g_chartab[c] & CT_PRINT_CHAR)));
926 }
927 
928 /*
929  * Strict version of vim_isprintc(c), don't return TRUE if "c" is the head
930  * byte of a double-byte character.
931  */
932     int
vim_isprintc_strict(int c)933 vim_isprintc_strict(int c)
934 {
935     if (enc_dbcs != 0 && c < 0x100 && MB_BYTE2LEN(c) > 1)
936 	return FALSE;
937     if (enc_utf8 && c >= 0x100)
938 	return utf_printable(c);
939     return (c >= 0x100 || (c > 0 && (g_chartab[c] & CT_PRINT_CHAR)));
940 }
941 
942 /*
943  * like chartabsize(), but also check for line breaks on the screen
944  */
945     int
lbr_chartabsize(char_u * line UNUSED,unsigned char * s,colnr_T col)946 lbr_chartabsize(
947     char_u		*line UNUSED, // start of the line
948     unsigned char	*s,
949     colnr_T		col)
950 {
951 #ifdef FEAT_LINEBREAK
952     if (!curwin->w_p_lbr && *get_showbreak_value(curwin) == NUL
953 							   && !curwin->w_p_bri)
954     {
955 #endif
956 	if (curwin->w_p_wrap)
957 	    return win_nolbr_chartabsize(curwin, s, col, NULL);
958 	RET_WIN_BUF_CHARTABSIZE(curwin, curbuf, s, col)
959 #ifdef FEAT_LINEBREAK
960     }
961     return win_lbr_chartabsize(curwin, line == NULL ? s : line, s, col, NULL);
962 #endif
963 }
964 
965 /*
966  * Call lbr_chartabsize() and advance the pointer.
967  */
968     int
lbr_chartabsize_adv(char_u * line,char_u ** s,colnr_T col)969 lbr_chartabsize_adv(
970     char_u	*line, // start of the line
971     char_u	**s,
972     colnr_T	col)
973 {
974     int		retval;
975 
976     retval = lbr_chartabsize(line, *s, col);
977     MB_PTR_ADV(*s);
978     return retval;
979 }
980 
981 /*
982  * This function is used very often, keep it fast!!!!
983  *
984  * If "headp" not NULL, set *headp to the size of what we for 'showbreak'
985  * string at start of line.  Warning: *headp is only set if it's a non-zero
986  * value, init to 0 before calling.
987  */
988     int
win_lbr_chartabsize(win_T * wp,char_u * line UNUSED,char_u * s,colnr_T col,int * headp UNUSED)989 win_lbr_chartabsize(
990     win_T	*wp,
991     char_u	*line UNUSED, // start of the line
992     char_u	*s,
993     colnr_T	col,
994     int		*headp UNUSED)
995 {
996 #ifdef FEAT_LINEBREAK
997     int		c;
998     int		size;
999     colnr_T	col2;
1000     colnr_T	col_adj = 0; // col + screen size of tab
1001     colnr_T	colmax;
1002     int		added;
1003     int		mb_added = 0;
1004     int		numberextra;
1005     char_u	*ps;
1006     int		tab_corr = (*s == TAB);
1007     int		n;
1008     char_u	*sbr;
1009 
1010     /*
1011      * No 'linebreak', 'showbreak' and 'breakindent': return quickly.
1012      */
1013     if (!wp->w_p_lbr && !wp->w_p_bri && *get_showbreak_value(wp) == NUL)
1014 #endif
1015     {
1016 	if (wp->w_p_wrap)
1017 	    return win_nolbr_chartabsize(wp, s, col, headp);
1018 	RET_WIN_BUF_CHARTABSIZE(wp, wp->w_buffer, s, col)
1019     }
1020 
1021 #ifdef FEAT_LINEBREAK
1022     /*
1023      * First get normal size, without 'linebreak'
1024      */
1025     size = win_chartabsize(wp, s, col);
1026     c = *s;
1027     if (tab_corr)
1028 	col_adj = size - 1;
1029 
1030     /*
1031      * If 'linebreak' set check at a blank before a non-blank if the line
1032      * needs a break here
1033      */
1034     if (wp->w_p_lbr
1035 	    && VIM_ISBREAK(c)
1036 	    && !VIM_ISBREAK((int)s[1])
1037 	    && wp->w_p_wrap
1038 	    && wp->w_width != 0)
1039     {
1040 	/*
1041 	 * Count all characters from first non-blank after a blank up to next
1042 	 * non-blank after a blank.
1043 	 */
1044 	numberextra = win_col_off(wp);
1045 	col2 = col;
1046 	colmax = (colnr_T)(wp->w_width - numberextra - col_adj);
1047 	if (col >= colmax)
1048 	{
1049 	    colmax += col_adj;
1050 	    n = colmax +  win_col_off2(wp);
1051 	    if (n > 0)
1052 		colmax += (((col - colmax) / n) + 1) * n - col_adj;
1053 	}
1054 
1055 	for (;;)
1056 	{
1057 	    ps = s;
1058 	    MB_PTR_ADV(s);
1059 	    c = *s;
1060 	    if (!(c != NUL
1061 		    && (VIM_ISBREAK(c)
1062 			|| (!VIM_ISBREAK(c)
1063 			    && (col2 == col || !VIM_ISBREAK((int)*ps))))))
1064 		break;
1065 
1066 	    col2 += win_chartabsize(wp, s, col2);
1067 	    if (col2 >= colmax)		// doesn't fit
1068 	    {
1069 		size = colmax - col + col_adj;
1070 		break;
1071 	    }
1072 	}
1073     }
1074     else if (has_mbyte && size == 2 && MB_BYTE2LEN(*s) > 1
1075 				    && wp->w_p_wrap && in_win_border(wp, col))
1076     {
1077 	++size;		// Count the ">" in the last column.
1078 	mb_added = 1;
1079     }
1080 
1081     /*
1082      * May have to add something for 'breakindent' and/or 'showbreak'
1083      * string at start of line.
1084      * Set *headp to the size of what we add.
1085      */
1086     added = 0;
1087     sbr = get_showbreak_value(wp);
1088     if ((*sbr != NUL || wp->w_p_bri) && wp->w_p_wrap && col != 0)
1089     {
1090 	colnr_T sbrlen = 0;
1091 	int	numberwidth = win_col_off(wp);
1092 
1093 	numberextra = numberwidth;
1094 	col += numberextra + mb_added;
1095 	if (col >= (colnr_T)wp->w_width)
1096 	{
1097 	    col -= wp->w_width;
1098 	    numberextra = wp->w_width - (numberextra - win_col_off2(wp));
1099 	    if (col >= numberextra && numberextra > 0)
1100 		col %= numberextra;
1101 	    if (*sbr != NUL)
1102 	    {
1103 		sbrlen = (colnr_T)MB_CHARLEN(sbr);
1104 		if (col >= sbrlen)
1105 		    col -= sbrlen;
1106 	    }
1107 	    if (col >= numberextra && numberextra > 0)
1108 		col = col % numberextra;
1109 	    else if (col > 0 && numberextra > 0)
1110 		col += numberwidth - win_col_off2(wp);
1111 
1112 	    numberwidth -= win_col_off2(wp);
1113 	}
1114 	if (col == 0 || col + size + sbrlen > (colnr_T)wp->w_width)
1115 	{
1116 	    added = 0;
1117 	    if (*sbr != NUL)
1118 	    {
1119 		if (size + sbrlen + numberwidth > (colnr_T)wp->w_width)
1120 		{
1121 		    // calculate effective window width
1122 		    int width = (colnr_T)wp->w_width - sbrlen - numberwidth;
1123 		    int prev_width = col
1124 				 ? ((colnr_T)wp->w_width - (sbrlen + col)) : 0;
1125 
1126 		    if (width <= 0)
1127 			width = (colnr_T)1;
1128 		    added += ((size - prev_width) / width) * vim_strsize(sbr);
1129 		    if ((size - prev_width) % width)
1130 			// wrapped, add another length of 'sbr'
1131 			added += vim_strsize(sbr);
1132 		}
1133 		else
1134 		    added += vim_strsize(sbr);
1135 	    }
1136 	    if (wp->w_p_bri)
1137 		added += get_breakindent_win(wp, line);
1138 
1139 	    size += added;
1140 	    if (col != 0)
1141 		added = 0;
1142 	}
1143     }
1144     if (headp != NULL)
1145 	*headp = added + mb_added;
1146     return size;
1147 #endif
1148 }
1149 
1150 /*
1151  * Like win_lbr_chartabsize(), except that we know 'linebreak' is off and
1152  * 'wrap' is on.  This means we need to check for a double-byte character that
1153  * doesn't fit at the end of the screen line.
1154  */
1155     static int
win_nolbr_chartabsize(win_T * wp,char_u * s,colnr_T col,int * headp)1156 win_nolbr_chartabsize(
1157     win_T	*wp,
1158     char_u	*s,
1159     colnr_T	col,
1160     int		*headp)
1161 {
1162     int		n;
1163 
1164     if (*s == TAB && (!wp->w_p_list || wp->w_lcs_chars.tab1))
1165     {
1166 # ifdef FEAT_VARTABS
1167 	return tabstop_padding(col, wp->w_buffer->b_p_ts,
1168 				    wp->w_buffer->b_p_vts_array);
1169 # else
1170 	n = wp->w_buffer->b_p_ts;
1171 	return (int)(n - (col % n));
1172 # endif
1173     }
1174     n = ptr2cells(s);
1175     // Add one cell for a double-width character in the last column of the
1176     // window, displayed with a ">".
1177     if (n == 2 && MB_BYTE2LEN(*s) > 1 && in_win_border(wp, col))
1178     {
1179 	if (headp != NULL)
1180 	    *headp = 1;
1181 	return 3;
1182     }
1183     return n;
1184 }
1185 
1186 /*
1187  * Return TRUE if virtual column "vcol" is in the rightmost column of window
1188  * "wp".
1189  */
1190     static int
in_win_border(win_T * wp,colnr_T vcol)1191 in_win_border(win_T *wp, colnr_T vcol)
1192 {
1193     int		width1;		// width of first line (after line number)
1194     int		width2;		// width of further lines
1195 
1196     if (wp->w_width == 0)	// there is no border
1197 	return FALSE;
1198     width1 = wp->w_width - win_col_off(wp);
1199     if ((int)vcol < width1 - 1)
1200 	return FALSE;
1201     if ((int)vcol == width1 - 1)
1202 	return TRUE;
1203     width2 = width1 + win_col_off2(wp);
1204     if (width2 <= 0)
1205 	return FALSE;
1206     return ((vcol - width1) % width2 == width2 - 1);
1207 }
1208 
1209 /*
1210  * Get virtual column number of pos.
1211  *  start: on the first position of this character (TAB, ctrl)
1212  * cursor: where the cursor is on this character (first char, except for TAB)
1213  *    end: on the last position of this character (TAB, ctrl)
1214  *
1215  * This is used very often, keep it fast!
1216  */
1217     void
getvcol(win_T * wp,pos_T * pos,colnr_T * start,colnr_T * cursor,colnr_T * end)1218 getvcol(
1219     win_T	*wp,
1220     pos_T	*pos,
1221     colnr_T	*start,
1222     colnr_T	*cursor,
1223     colnr_T	*end)
1224 {
1225     colnr_T	vcol;
1226     char_u	*ptr;		// points to current char
1227     char_u	*posptr;	// points to char at pos->col
1228     char_u	*line;		// start of the line
1229     int		incr;
1230     int		head;
1231 #ifdef FEAT_VARTABS
1232     int		*vts = wp->w_buffer->b_p_vts_array;
1233 #endif
1234     int		ts = wp->w_buffer->b_p_ts;
1235     int		c;
1236 
1237     vcol = 0;
1238     line = ptr = ml_get_buf(wp->w_buffer, pos->lnum, FALSE);
1239     if (pos->col == MAXCOL)
1240 	posptr = NULL;  // continue until the NUL
1241     else
1242     {
1243 	// Special check for an empty line, which can happen on exit, when
1244 	// ml_get_buf() always returns an empty string.
1245 	if (*ptr == NUL)
1246 	    pos->col = 0;
1247 	posptr = ptr + pos->col;
1248 	if (has_mbyte)
1249 	    // always start on the first byte
1250 	    posptr -= (*mb_head_off)(line, posptr);
1251     }
1252 
1253     /*
1254      * This function is used very often, do some speed optimizations.
1255      * When 'list', 'linebreak', 'showbreak' and 'breakindent' are not set
1256      * use a simple loop.
1257      * Also use this when 'list' is set but tabs take their normal size.
1258      */
1259     if ((!wp->w_p_list || wp->w_lcs_chars.tab1 != NUL)
1260 #ifdef FEAT_LINEBREAK
1261 	    && !wp->w_p_lbr && *get_showbreak_value(wp) == NUL && !wp->w_p_bri
1262 #endif
1263        )
1264     {
1265 	for (;;)
1266 	{
1267 	    head = 0;
1268 	    c = *ptr;
1269 	    // make sure we don't go past the end of the line
1270 	    if (c == NUL)
1271 	    {
1272 		incr = 1;	// NUL at end of line only takes one column
1273 		break;
1274 	    }
1275 	    // A tab gets expanded, depending on the current column
1276 	    if (c == TAB)
1277 #ifdef FEAT_VARTABS
1278 		incr = tabstop_padding(vcol, ts, vts);
1279 #else
1280 		incr = ts - (vcol % ts);
1281 #endif
1282 	    else
1283 	    {
1284 		if (has_mbyte)
1285 		{
1286 		    // For utf-8, if the byte is >= 0x80, need to look at
1287 		    // further bytes to find the cell width.
1288 		    if (enc_utf8 && c >= 0x80)
1289 			incr = utf_ptr2cells(ptr);
1290 		    else
1291 			incr = g_chartab[c] & CT_CELL_MASK;
1292 
1293 		    // If a double-cell char doesn't fit at the end of a line
1294 		    // it wraps to the next line, it's like this char is three
1295 		    // cells wide.
1296 		    if (incr == 2 && wp->w_p_wrap && MB_BYTE2LEN(*ptr) > 1
1297 			    && in_win_border(wp, vcol))
1298 		    {
1299 			++incr;
1300 			head = 1;
1301 		    }
1302 		}
1303 		else
1304 		    incr = g_chartab[c] & CT_CELL_MASK;
1305 	    }
1306 
1307 	    if (posptr != NULL && ptr >= posptr) // character at pos->col
1308 		break;
1309 
1310 	    vcol += incr;
1311 	    MB_PTR_ADV(ptr);
1312 	}
1313     }
1314     else
1315     {
1316 	for (;;)
1317 	{
1318 	    // A tab gets expanded, depending on the current column
1319 	    head = 0;
1320 	    incr = win_lbr_chartabsize(wp, line, ptr, vcol, &head);
1321 	    // make sure we don't go past the end of the line
1322 	    if (*ptr == NUL)
1323 	    {
1324 		incr = 1;	// NUL at end of line only takes one column
1325 		break;
1326 	    }
1327 
1328 	    if (posptr != NULL && ptr >= posptr) // character at pos->col
1329 		break;
1330 
1331 	    vcol += incr;
1332 	    MB_PTR_ADV(ptr);
1333 	}
1334     }
1335     if (start != NULL)
1336 	*start = vcol + head;
1337     if (end != NULL)
1338 	*end = vcol + incr - 1;
1339     if (cursor != NULL)
1340     {
1341 	if (*ptr == TAB
1342 		&& (State & NORMAL)
1343 		&& !wp->w_p_list
1344 		&& !virtual_active()
1345 		&& !(VIsual_active
1346 				&& (*p_sel == 'e' || LTOREQ_POS(*pos, VIsual)))
1347 		)
1348 	    *cursor = vcol + incr - 1;	    // cursor at end
1349 	else
1350 	    *cursor = vcol + head;	    // cursor at start
1351     }
1352 }
1353 
1354 /*
1355  * Get virtual cursor column in the current window, pretending 'list' is off.
1356  */
1357     colnr_T
getvcol_nolist(pos_T * posp)1358 getvcol_nolist(pos_T *posp)
1359 {
1360     int		list_save = curwin->w_p_list;
1361     colnr_T	vcol;
1362 
1363     curwin->w_p_list = FALSE;
1364     if (posp->coladd)
1365 	getvvcol(curwin, posp, NULL, &vcol, NULL);
1366     else
1367 	getvcol(curwin, posp, NULL, &vcol, NULL);
1368     curwin->w_p_list = list_save;
1369     return vcol;
1370 }
1371 
1372 /*
1373  * Get virtual column in virtual mode.
1374  */
1375     void
getvvcol(win_T * wp,pos_T * pos,colnr_T * start,colnr_T * cursor,colnr_T * end)1376 getvvcol(
1377     win_T	*wp,
1378     pos_T	*pos,
1379     colnr_T	*start,
1380     colnr_T	*cursor,
1381     colnr_T	*end)
1382 {
1383     colnr_T	col;
1384     colnr_T	coladd;
1385     colnr_T	endadd;
1386     char_u	*ptr;
1387 
1388     if (virtual_active())
1389     {
1390 	// For virtual mode, only want one value
1391 	getvcol(wp, pos, &col, NULL, NULL);
1392 
1393 	coladd = pos->coladd;
1394 	endadd = 0;
1395 	// Cannot put the cursor on part of a wide character.
1396 	ptr = ml_get_buf(wp->w_buffer, pos->lnum, FALSE);
1397 	if (pos->col < (colnr_T)STRLEN(ptr))
1398 	{
1399 	    int c = (*mb_ptr2char)(ptr + pos->col);
1400 
1401 	    if (c != TAB && vim_isprintc(c))
1402 	    {
1403 		endadd = (colnr_T)(char2cells(c) - 1);
1404 		if (coladd > endadd)	// past end of line
1405 		    endadd = 0;
1406 		else
1407 		    coladd = 0;
1408 	    }
1409 	}
1410 	col += coladd;
1411 	if (start != NULL)
1412 	    *start = col;
1413 	if (cursor != NULL)
1414 	    *cursor = col;
1415 	if (end != NULL)
1416 	    *end = col + endadd;
1417     }
1418     else
1419 	getvcol(wp, pos, start, cursor, end);
1420 }
1421 
1422 /*
1423  * Get the leftmost and rightmost virtual column of pos1 and pos2.
1424  * Used for Visual block mode.
1425  */
1426     void
getvcols(win_T * wp,pos_T * pos1,pos_T * pos2,colnr_T * left,colnr_T * right)1427 getvcols(
1428     win_T	*wp,
1429     pos_T	*pos1,
1430     pos_T	*pos2,
1431     colnr_T	*left,
1432     colnr_T	*right)
1433 {
1434     colnr_T	from1, from2, to1, to2;
1435 
1436     if (LT_POSP(pos1, pos2))
1437     {
1438 	getvvcol(wp, pos1, &from1, NULL, &to1);
1439 	getvvcol(wp, pos2, &from2, NULL, &to2);
1440     }
1441     else
1442     {
1443 	getvvcol(wp, pos2, &from1, NULL, &to1);
1444 	getvvcol(wp, pos1, &from2, NULL, &to2);
1445     }
1446     if (from2 < from1)
1447 	*left = from2;
1448     else
1449 	*left = from1;
1450     if (to2 > to1)
1451     {
1452 	if (*p_sel == 'e' && from2 - 1 >= to1)
1453 	    *right = from2 - 1;
1454 	else
1455 	    *right = to2;
1456     }
1457     else
1458 	*right = to1;
1459 }
1460 
1461 /*
1462  * skipwhite: skip over ' ' and '\t'.
1463  */
1464     char_u *
skipwhite(char_u * q)1465 skipwhite(char_u *q)
1466 {
1467     char_u	*p = q;
1468 
1469     while (VIM_ISWHITE(*p)) // skip to next non-white
1470 	++p;
1471     return p;
1472 }
1473 
1474 /*
1475  * getwhitecols: return the number of whitespace
1476  * columns (bytes) at the start of a given line
1477  */
1478     int
getwhitecols_curline()1479 getwhitecols_curline()
1480 {
1481     return getwhitecols(ml_get_curline());
1482 }
1483 
1484     int
getwhitecols(char_u * p)1485 getwhitecols(char_u *p)
1486 {
1487     return skipwhite(p) - p;
1488 }
1489 
1490 /*
1491  * skip over digits
1492  */
1493     char_u *
skipdigits(char_u * q)1494 skipdigits(char_u *q)
1495 {
1496     char_u	*p = q;
1497 
1498     while (VIM_ISDIGIT(*p))	// skip to next non-digit
1499 	++p;
1500     return p;
1501 }
1502 
1503 #if defined(FEAT_SYN_HL) || defined(FEAT_SPELL) || defined(PROTO)
1504 /*
1505  * skip over binary digits
1506  */
1507     char_u *
skipbin(char_u * q)1508 skipbin(char_u *q)
1509 {
1510     char_u	*p = q;
1511 
1512     while (vim_isbdigit(*p))	// skip to next non-digit
1513 	++p;
1514     return p;
1515 }
1516 
1517 /*
1518  * skip over digits and hex characters
1519  */
1520     char_u *
skiphex(char_u * q)1521 skiphex(char_u *q)
1522 {
1523     char_u	*p = q;
1524 
1525     while (vim_isxdigit(*p))	// skip to next non-digit
1526 	++p;
1527     return p;
1528 }
1529 #endif
1530 
1531 /*
1532  * skip to bin digit (or NUL after the string)
1533  */
1534     char_u *
skiptobin(char_u * q)1535 skiptobin(char_u *q)
1536 {
1537     char_u	*p = q;
1538 
1539     while (*p != NUL && !vim_isbdigit(*p))	// skip to next digit
1540 	++p;
1541     return p;
1542 }
1543 
1544 /*
1545  * skip to digit (or NUL after the string)
1546  */
1547     char_u *
skiptodigit(char_u * q)1548 skiptodigit(char_u *q)
1549 {
1550     char_u	*p = q;
1551 
1552     while (*p != NUL && !VIM_ISDIGIT(*p))	// skip to next digit
1553 	++p;
1554     return p;
1555 }
1556 
1557 /*
1558  * skip to hex character (or NUL after the string)
1559  */
1560     char_u *
skiptohex(char_u * q)1561 skiptohex(char_u *q)
1562 {
1563     char_u	*p = q;
1564 
1565     while (*p != NUL && !vim_isxdigit(*p))	// skip to next digit
1566 	++p;
1567     return p;
1568 }
1569 
1570 /*
1571  * Variant of isdigit() that can handle characters > 0x100.
1572  * We don't use isdigit() here, because on some systems it also considers
1573  * superscript 1 to be a digit.
1574  * Use the VIM_ISDIGIT() macro for simple arguments.
1575  */
1576     int
vim_isdigit(int c)1577 vim_isdigit(int c)
1578 {
1579     return (c >= '0' && c <= '9');
1580 }
1581 
1582 /*
1583  * Variant of isxdigit() that can handle characters > 0x100.
1584  * We don't use isxdigit() here, because on some systems it also considers
1585  * superscript 1 to be a digit.
1586  */
1587     int
vim_isxdigit(int c)1588 vim_isxdigit(int c)
1589 {
1590     return (c >= '0' && c <= '9')
1591 	|| (c >= 'a' && c <= 'f')
1592 	|| (c >= 'A' && c <= 'F');
1593 }
1594 
1595 /*
1596  * Corollary of vim_isdigit and vim_isxdigit() that can handle
1597  * characters > 0x100.
1598  */
1599     int
vim_isbdigit(int c)1600 vim_isbdigit(int c)
1601 {
1602     return (c == '0' || c == '1');
1603 }
1604 
1605     static int
vim_isodigit(int c)1606 vim_isodigit(int c)
1607 {
1608     return (c >= '0' && c <= '7');
1609 }
1610 
1611 /*
1612  * Vim's own character class functions.  These exist because many library
1613  * islower()/toupper() etc. do not work properly: they crash when used with
1614  * invalid values or can't handle latin1 when the locale is C.
1615  * Speed is most important here.
1616  */
1617 #define LATIN1LOWER 'l'
1618 #define LATIN1UPPER 'U'
1619 
1620 static char_u latin1flags[257] = "                                                                 UUUUUUUUUUUUUUUUUUUUUUUUUU      llllllllllllllllllllllllll                                                                     UUUUUUUUUUUUUUUUUUUUUUU UUUUUUUllllllllllllllllllllllll llllllll";
1621 static char_u latin1upper[257] = "                                 !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`ABCDEFGHIJKLMNOPQRSTUVWXYZ{|}~\x7f\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xf7\xd8\xd9\xda\xdb\xdc\xdd\xde\xff";
1622 static char_u latin1lower[257] = "                                 !\"#$%&'()*+,-./0123456789:;<=>?@abcdefghijklmnopqrstuvwxyz[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x7f\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xd7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff";
1623 
1624     int
vim_islower(int c)1625 vim_islower(int c)
1626 {
1627     if (c <= '@')
1628 	return FALSE;
1629     if (c >= 0x80)
1630     {
1631 	if (enc_utf8)
1632 	    return utf_islower(c);
1633 	if (c >= 0x100)
1634 	{
1635 #ifdef HAVE_ISWLOWER
1636 	    if (has_mbyte)
1637 		return iswlower(c);
1638 #endif
1639 	    // islower() can't handle these chars and may crash
1640 	    return FALSE;
1641 	}
1642 	if (enc_latin1like)
1643 	    return (latin1flags[c] & LATIN1LOWER) == LATIN1LOWER;
1644     }
1645     return islower(c);
1646 }
1647 
1648     int
vim_isupper(int c)1649 vim_isupper(int c)
1650 {
1651     if (c <= '@')
1652 	return FALSE;
1653     if (c >= 0x80)
1654     {
1655 	if (enc_utf8)
1656 	    return utf_isupper(c);
1657 	if (c >= 0x100)
1658 	{
1659 #ifdef HAVE_ISWUPPER
1660 	    if (has_mbyte)
1661 		return iswupper(c);
1662 #endif
1663 	    // islower() can't handle these chars and may crash
1664 	    return FALSE;
1665 	}
1666 	if (enc_latin1like)
1667 	    return (latin1flags[c] & LATIN1UPPER) == LATIN1UPPER;
1668     }
1669     return isupper(c);
1670 }
1671 
1672     int
vim_toupper(int c)1673 vim_toupper(int c)
1674 {
1675     if (c <= '@')
1676 	return c;
1677     if (c >= 0x80 || !(cmp_flags & CMP_KEEPASCII))
1678     {
1679 	if (enc_utf8)
1680 	    return utf_toupper(c);
1681 	if (c >= 0x100)
1682 	{
1683 #ifdef HAVE_TOWUPPER
1684 	    if (has_mbyte)
1685 		return towupper(c);
1686 #endif
1687 	    // toupper() can't handle these chars and may crash
1688 	    return c;
1689 	}
1690 	if (enc_latin1like)
1691 	    return latin1upper[c];
1692     }
1693     if (c < 0x80 && (cmp_flags & CMP_KEEPASCII))
1694 	return TOUPPER_ASC(c);
1695     return TOUPPER_LOC(c);
1696 }
1697 
1698     int
vim_tolower(int c)1699 vim_tolower(int c)
1700 {
1701     if (c <= '@')
1702 	return c;
1703     if (c >= 0x80 || !(cmp_flags & CMP_KEEPASCII))
1704     {
1705 	if (enc_utf8)
1706 	    return utf_tolower(c);
1707 	if (c >= 0x100)
1708 	{
1709 #ifdef HAVE_TOWLOWER
1710 	    if (has_mbyte)
1711 		return towlower(c);
1712 #endif
1713 	    // tolower() can't handle these chars and may crash
1714 	    return c;
1715 	}
1716 	if (enc_latin1like)
1717 	    return latin1lower[c];
1718     }
1719     if (c < 0x80 && (cmp_flags & CMP_KEEPASCII))
1720 	return TOLOWER_ASC(c);
1721     return TOLOWER_LOC(c);
1722 }
1723 
1724 /*
1725  * skiptowhite: skip over text until ' ' or '\t' or NUL.
1726  */
1727     char_u *
skiptowhite(char_u * p)1728 skiptowhite(char_u *p)
1729 {
1730     while (*p != ' ' && *p != '\t' && *p != NUL)
1731 	++p;
1732     return p;
1733 }
1734 
1735 /*
1736  * skiptowhite_esc: Like skiptowhite(), but also skip escaped chars
1737  */
1738     char_u *
skiptowhite_esc(char_u * p)1739 skiptowhite_esc(char_u *p)
1740 {
1741     while (*p != ' ' && *p != '\t' && *p != NUL)
1742     {
1743 	if ((*p == '\\' || *p == Ctrl_V) && *(p + 1) != NUL)
1744 	    ++p;
1745 	++p;
1746     }
1747     return p;
1748 }
1749 
1750 /*
1751  * Getdigits: Get a number from a string and skip over it.
1752  * Note: the argument is a pointer to a char_u pointer!
1753  */
1754     long
getdigits(char_u ** pp)1755 getdigits(char_u **pp)
1756 {
1757     char_u	*p;
1758     long	retval;
1759 
1760     p = *pp;
1761     retval = atol((char *)p);
1762     if (*p == '-')		// skip negative sign
1763 	++p;
1764     p = skipdigits(p);		// skip to next non-digit
1765     *pp = p;
1766     return retval;
1767 }
1768 
1769 /*
1770  * Return TRUE if "lbuf" is empty or only contains blanks.
1771  */
1772     int
vim_isblankline(char_u * lbuf)1773 vim_isblankline(char_u *lbuf)
1774 {
1775     char_u	*p;
1776 
1777     p = skipwhite(lbuf);
1778     return (*p == NUL || *p == '\r' || *p == '\n');
1779 }
1780 
1781 /*
1782  * Convert a string into a long and/or unsigned long, taking care of
1783  * hexadecimal, octal, and binary numbers.  Accepts a '-' sign.
1784  * If "prep" is not NULL, returns a flag to indicate the type of the number:
1785  *  0	    decimal
1786  *  '0'	    octal
1787  *  'O'	    octal
1788  *  'o'	    octal
1789  *  'B'	    bin
1790  *  'b'	    bin
1791  *  'X'	    hex
1792  *  'x'	    hex
1793  * If "len" is not NULL, the length of the number in characters is returned.
1794  * If "nptr" is not NULL, the signed result is returned in it.
1795  * If "unptr" is not NULL, the unsigned result is returned in it.
1796  * If "what" contains STR2NR_BIN recognize binary numbers
1797  * If "what" contains STR2NR_OCT recognize octal numbers
1798  * If "what" contains STR2NR_HEX recognize hex numbers
1799  * If "what" contains STR2NR_FORCE always assume bin/oct/hex.
1800  * If "what" contains STR2NR_QUOTE ignore embedded single quotes
1801  * If maxlen > 0, check at a maximum maxlen chars.
1802  * If strict is TRUE, check the number strictly. return *len = 0 if fail.
1803  */
1804     void
vim_str2nr(char_u * start,int * prep,int * len,int what,varnumber_T * nptr,uvarnumber_T * unptr,int maxlen,int strict)1805 vim_str2nr(
1806     char_u		*start,
1807     int			*prep,	    // return: type of number 0 = decimal, 'x'
1808 				    // or 'X' is hex, '0', 'o' or 'O' is octal,
1809 				    // 'b' or 'B' is bin
1810     int			*len,	    // return: detected length of number
1811     int			what,	    // what numbers to recognize
1812     varnumber_T		*nptr,	    // return: signed result
1813     uvarnumber_T	*unptr,	    // return: unsigned result
1814     int			maxlen,     // max length of string to check
1815     int			strict)     // check strictly
1816 {
1817     char_u	    *ptr = start;
1818     int		    pre = 0;		// default is decimal
1819     int		    negative = FALSE;
1820     uvarnumber_T    un = 0;
1821     int		    n;
1822 
1823     if (len != NULL)
1824 	*len = 0;
1825 
1826     if (ptr[0] == '-')
1827     {
1828 	negative = TRUE;
1829 	++ptr;
1830     }
1831 
1832     // Recognize hex, octal, and bin.
1833     if (ptr[0] == '0' && ptr[1] != '8' && ptr[1] != '9'
1834 					       && (maxlen == 0 || maxlen > 1))
1835     {
1836 	pre = ptr[1];
1837 	if ((what & STR2NR_HEX)
1838 		&& (pre == 'X' || pre == 'x') && vim_isxdigit(ptr[2])
1839 		&& (maxlen == 0 || maxlen > 2))
1840 	    // hexadecimal
1841 	    ptr += 2;
1842 	else if ((what & STR2NR_BIN)
1843 		&& (pre == 'B' || pre == 'b') && vim_isbdigit(ptr[2])
1844 		&& (maxlen == 0 || maxlen > 2))
1845 	    // binary
1846 	    ptr += 2;
1847 	else if ((what & STR2NR_OOCT)
1848 		&& (pre == 'O' || pre == 'o') && vim_isodigit(ptr[2])
1849 		&& (maxlen == 0 || maxlen > 2))
1850 	    // octal with prefix "0o"
1851 	    ptr += 2;
1852 	else
1853 	{
1854 	    // decimal or octal, default is decimal
1855 	    pre = 0;
1856 	    if (what & STR2NR_OCT)
1857 	    {
1858 		// Don't interpret "0", "08" or "0129" as octal.
1859 		for (n = 1; n != maxlen && VIM_ISDIGIT(ptr[n]); ++n)
1860 		{
1861 		    if (ptr[n] > '7')
1862 		    {
1863 			pre = 0;	// can't be octal
1864 			break;
1865 		    }
1866 		    pre = '0';	// assume octal
1867 		}
1868 	    }
1869 	}
1870     }
1871 
1872     // Do the conversion manually to avoid sscanf() quirks.
1873     n = 1;
1874     if (pre == 'B' || pre == 'b'
1875 			     || ((what & STR2NR_BIN) && (what & STR2NR_FORCE)))
1876     {
1877 	// bin
1878 	if (pre != 0)
1879 	    n += 2;	    // skip over "0b"
1880 	while ('0' <= *ptr && *ptr <= '1')
1881 	{
1882 	    // avoid ubsan error for overflow
1883 	    if (un <= UVARNUM_MAX / 2)
1884 		un = 2 * un + (uvarnumber_T)(*ptr - '0');
1885 	    else
1886 		un = UVARNUM_MAX;
1887 	    ++ptr;
1888 	    if (n++ == maxlen)
1889 		break;
1890 	    if ((what & STR2NR_QUOTE) && *ptr == '\''
1891 					     && '0' <= ptr[1] && ptr[1] <= '1')
1892 	    {
1893 		++ptr;
1894 		if (n++ == maxlen)
1895 		    break;
1896 	    }
1897 	}
1898     }
1899     else if (pre == 'O' || pre == 'o' ||
1900 		pre == '0' || ((what & STR2NR_OCT) && (what & STR2NR_FORCE)))
1901     {
1902 	// octal
1903 	if (pre != 0 && pre != '0')
1904 	    n += 2;	    // skip over "0o"
1905 	while ('0' <= *ptr && *ptr <= '7')
1906 	{
1907 	    // avoid ubsan error for overflow
1908 	    if (un <= UVARNUM_MAX / 8)
1909 		un = 8 * un + (uvarnumber_T)(*ptr - '0');
1910 	    else
1911 		un = UVARNUM_MAX;
1912 	    ++ptr;
1913 	    if (n++ == maxlen)
1914 		break;
1915 	    if ((what & STR2NR_QUOTE) && *ptr == '\''
1916 					     && '0' <= ptr[1] && ptr[1] <= '7')
1917 	    {
1918 		++ptr;
1919 		if (n++ == maxlen)
1920 		    break;
1921 	    }
1922 	}
1923     }
1924     else if (pre != 0 || ((what & STR2NR_HEX) && (what & STR2NR_FORCE)))
1925     {
1926 	// hex
1927 	if (pre != 0)
1928 	    n += 2;	    // skip over "0x"
1929 	while (vim_isxdigit(*ptr))
1930 	{
1931 	    // avoid ubsan error for overflow
1932 	    if (un <= UVARNUM_MAX / 16)
1933 		un = 16 * un + (uvarnumber_T)hex2nr(*ptr);
1934 	    else
1935 		un = UVARNUM_MAX;
1936 	    ++ptr;
1937 	    if (n++ == maxlen)
1938 		break;
1939 	    if ((what & STR2NR_QUOTE) && *ptr == '\'' && vim_isxdigit(ptr[1]))
1940 	    {
1941 		++ptr;
1942 		if (n++ == maxlen)
1943 		    break;
1944 	    }
1945 	}
1946     }
1947     else
1948     {
1949 	// decimal
1950 	while (VIM_ISDIGIT(*ptr))
1951 	{
1952 	    uvarnumber_T    digit = (uvarnumber_T)(*ptr - '0');
1953 
1954 	    // avoid ubsan error for overflow
1955 	    if (un < UVARNUM_MAX / 10
1956 		    || (un == UVARNUM_MAX / 10 && digit <= UVARNUM_MAX % 10))
1957 		un = 10 * un + digit;
1958 	    else
1959 		un = UVARNUM_MAX;
1960 	    ++ptr;
1961 	    if (n++ == maxlen)
1962 		break;
1963 	    if ((what & STR2NR_QUOTE) && *ptr == '\'' && VIM_ISDIGIT(ptr[1]))
1964 	    {
1965 		++ptr;
1966 		if (n++ == maxlen)
1967 		    break;
1968 	    }
1969 	}
1970     }
1971 
1972     // Check for an alphanumeric character immediately following, that is
1973     // most likely a typo.
1974     if (strict && n - 1 != maxlen && ASCII_ISALNUM(*ptr))
1975 	return;
1976 
1977     if (prep != NULL)
1978 	*prep = pre;
1979     if (len != NULL)
1980 	*len = (int)(ptr - start);
1981     if (nptr != NULL)
1982     {
1983 	if (negative)   // account for leading '-' for decimal numbers
1984 	{
1985 	    // avoid ubsan error for overflow
1986 	    if (un > VARNUM_MAX)
1987 		*nptr = VARNUM_MIN;
1988 	    else
1989 		*nptr = -(varnumber_T)un;
1990 	}
1991 	else
1992 	{
1993 	    if (un > VARNUM_MAX)
1994 		un = VARNUM_MAX;
1995 	    *nptr = (varnumber_T)un;
1996 	}
1997     }
1998     if (unptr != NULL)
1999 	*unptr = un;
2000 }
2001 
2002 /*
2003  * Return the value of a single hex character.
2004  * Only valid when the argument is '0' - '9', 'A' - 'F' or 'a' - 'f'.
2005  */
2006     int
hex2nr(int c)2007 hex2nr(int c)
2008 {
2009     if (c >= 'a' && c <= 'f')
2010 	return c - 'a' + 10;
2011     if (c >= 'A' && c <= 'F')
2012 	return c - 'A' + 10;
2013     return c - '0';
2014 }
2015 
2016 /*
2017  * Convert two hex characters to a byte.
2018  * Return -1 if one of the characters is not hex.
2019  */
2020     int
hexhex2nr(char_u * p)2021 hexhex2nr(char_u *p)
2022 {
2023     if (!vim_isxdigit(p[0]) || !vim_isxdigit(p[1]))
2024 	return -1;
2025     return (hex2nr(p[0]) << 4) + hex2nr(p[1]);
2026 }
2027 
2028 /*
2029  * Return TRUE if "str" starts with a backslash that should be removed.
2030  * For MS-DOS, MSWIN and OS/2 this is only done when the character after the
2031  * backslash is not a normal file name character.
2032  * '$' is a valid file name character, we don't remove the backslash before
2033  * it.  This means it is not possible to use an environment variable after a
2034  * backslash.  "C:\$VIM\doc" is taken literally, only "$VIM\doc" works.
2035  * Although "\ name" is valid, the backslash in "Program\ files" must be
2036  * removed.  Assume a file name doesn't start with a space.
2037  * For multi-byte names, never remove a backslash before a non-ascii
2038  * character, assume that all multi-byte characters are valid file name
2039  * characters.
2040  */
2041     int
rem_backslash(char_u * str)2042 rem_backslash(char_u *str)
2043 {
2044 #ifdef BACKSLASH_IN_FILENAME
2045     return (str[0] == '\\'
2046 	    && str[1] < 0x80
2047 	    && (str[1] == ' '
2048 		|| (str[1] != NUL
2049 		    && str[1] != '*'
2050 		    && str[1] != '?'
2051 		    && !vim_isfilec(str[1]))));
2052 #else
2053     return (str[0] == '\\' && str[1] != NUL);
2054 #endif
2055 }
2056 
2057 /*
2058  * Halve the number of backslashes in a file name argument.
2059  * For MS-DOS we only do this if the character after the backslash
2060  * is not a normal file character.
2061  */
2062     void
backslash_halve(char_u * p)2063 backslash_halve(char_u *p)
2064 {
2065     for ( ; *p; ++p)
2066 	if (rem_backslash(p))
2067 	    STRMOVE(p, p + 1);
2068 }
2069 
2070 /*
2071  * backslash_halve() plus save the result in allocated memory.
2072  * However, returns "p" when out of memory.
2073  */
2074     char_u *
backslash_halve_save(char_u * p)2075 backslash_halve_save(char_u *p)
2076 {
2077     char_u	*res;
2078 
2079     res = vim_strsave(p);
2080     if (res == NULL)
2081 	return p;
2082     backslash_halve(res);
2083     return res;
2084 }
2085 
2086 #if (defined(EBCDIC) && defined(FEAT_POSTSCRIPT)) || defined(PROTO)
2087 /*
2088  * Table for EBCDIC to ASCII conversion unashamedly taken from xxd.c!
2089  * The first 64 entries have been added to map control characters defined in
2090  * ascii.h
2091  */
2092 static char_u ebcdic2ascii_tab[256] =
2093 {
2094     0000, 0001, 0002, 0003, 0004, 0011, 0006, 0177,
2095     0010, 0011, 0012, 0013, 0014, 0015, 0016, 0017,
2096     0020, 0021, 0022, 0023, 0024, 0012, 0010, 0027,
2097     0030, 0031, 0032, 0033, 0033, 0035, 0036, 0037,
2098     0040, 0041, 0042, 0043, 0044, 0045, 0046, 0047,
2099     0050, 0051, 0052, 0053, 0054, 0055, 0056, 0057,
2100     0060, 0061, 0062, 0063, 0064, 0065, 0066, 0067,
2101     0070, 0071, 0072, 0073, 0074, 0075, 0076, 0077,
2102     0040, 0240, 0241, 0242, 0243, 0244, 0245, 0246,
2103     0247, 0250, 0325, 0056, 0074, 0050, 0053, 0174,
2104     0046, 0251, 0252, 0253, 0254, 0255, 0256, 0257,
2105     0260, 0261, 0041, 0044, 0052, 0051, 0073, 0176,
2106     0055, 0057, 0262, 0263, 0264, 0265, 0266, 0267,
2107     0270, 0271, 0313, 0054, 0045, 0137, 0076, 0077,
2108     0272, 0273, 0274, 0275, 0276, 0277, 0300, 0301,
2109     0302, 0140, 0072, 0043, 0100, 0047, 0075, 0042,
2110     0303, 0141, 0142, 0143, 0144, 0145, 0146, 0147,
2111     0150, 0151, 0304, 0305, 0306, 0307, 0310, 0311,
2112     0312, 0152, 0153, 0154, 0155, 0156, 0157, 0160,
2113     0161, 0162, 0136, 0314, 0315, 0316, 0317, 0320,
2114     0321, 0345, 0163, 0164, 0165, 0166, 0167, 0170,
2115     0171, 0172, 0322, 0323, 0324, 0133, 0326, 0327,
2116     0330, 0331, 0332, 0333, 0334, 0335, 0336, 0337,
2117     0340, 0341, 0342, 0343, 0344, 0135, 0346, 0347,
2118     0173, 0101, 0102, 0103, 0104, 0105, 0106, 0107,
2119     0110, 0111, 0350, 0351, 0352, 0353, 0354, 0355,
2120     0175, 0112, 0113, 0114, 0115, 0116, 0117, 0120,
2121     0121, 0122, 0356, 0357, 0360, 0361, 0362, 0363,
2122     0134, 0237, 0123, 0124, 0125, 0126, 0127, 0130,
2123     0131, 0132, 0364, 0365, 0366, 0367, 0370, 0371,
2124     0060, 0061, 0062, 0063, 0064, 0065, 0066, 0067,
2125     0070, 0071, 0372, 0373, 0374, 0375, 0376, 0377
2126 };
2127 
2128 /*
2129  * Convert a buffer worth of characters from EBCDIC to ASCII.  Only useful if
2130  * wanting 7-bit ASCII characters out the other end.
2131  */
2132     void
ebcdic2ascii(char_u * buffer,int len)2133 ebcdic2ascii(char_u *buffer, int len)
2134 {
2135     int		i;
2136 
2137     for (i = 0; i < len; i++)
2138 	buffer[i] = ebcdic2ascii_tab[buffer[i]];
2139 }
2140 #endif
2141