xref: /vim-8.2.3635/src/charset.c (revision 2bf24176)
1 /* vi:set ts=8 sts=4 sw=4:
2  *
3  * VIM - Vi IMproved	by Bram Moolenaar
4  *
5  * Do ":help uganda"  in Vim to read copying and usage conditions.
6  * Do ":help credits" in Vim to see a list of people who contributed.
7  * See README.txt for an overview of the Vim source code.
8  */
9 
10 #include "vim.h"
11 
12 #ifdef FEAT_LINEBREAK
13 static int win_chartabsize __ARGS((win_T *wp, char_u *p, colnr_T col));
14 #endif
15 
16 #ifdef FEAT_MBYTE
17 # if defined(HAVE_WCHAR_H)
18 #  include <wchar.h>	    /* for towupper() and towlower() */
19 # endif
20 static int win_nolbr_chartabsize __ARGS((win_T *wp, char_u *s, colnr_T col, int *headp));
21 #endif
22 
23 static unsigned nr2hex __ARGS((unsigned c));
24 
25 static int    chartab_initialized = FALSE;
26 
27 /* b_chartab[] is an array of 32 bytes, each bit representing one of the
28  * characters 0-255. */
29 #define SET_CHARTAB(buf, c) (buf)->b_chartab[(unsigned)(c) >> 3] |= (1 << ((c) & 0x7))
30 #define RESET_CHARTAB(buf, c) (buf)->b_chartab[(unsigned)(c) >> 3] &= ~(1 << ((c) & 0x7))
31 #define GET_CHARTAB(buf, c) ((buf)->b_chartab[(unsigned)(c) >> 3] & (1 << ((c) & 0x7)))
32 
33 /*
34  * Fill chartab[].  Also fills curbuf->b_chartab[] with flags for keyword
35  * characters for current buffer.
36  *
37  * Depends on the option settings 'iskeyword', 'isident', 'isfname',
38  * 'isprint' and 'encoding'.
39  *
40  * The index in chartab[] depends on 'encoding':
41  * - For non-multi-byte index with the byte (same as the character).
42  * - For DBCS index with the first byte.
43  * - For UTF-8 index with the character (when first byte is up to 0x80 it is
44  *   the same as the character, if the first byte is 0x80 and above it depends
45  *   on further bytes).
46  *
47  * The contents of chartab[]:
48  * - The lower two bits, masked by CT_CELL_MASK, give the number of display
49  *   cells the character occupies (1 or 2).  Not valid for UTF-8 above 0x80.
50  * - CT_PRINT_CHAR bit is set when the character is printable (no need to
51  *   translate the character before displaying it).  Note that only DBCS
52  *   characters can have 2 display cells and still be printable.
53  * - CT_FNAME_CHAR bit is set when the character can be in a file name.
54  * - CT_ID_CHAR bit is set when the character can be in an identifier.
55  *
56  * Return FAIL if 'iskeyword', 'isident', 'isfname' or 'isprint' option has an
57  * error, OK otherwise.
58  */
59     int
60 init_chartab()
61 {
62     return buf_init_chartab(curbuf, TRUE);
63 }
64 
65     int
66 buf_init_chartab(buf, global)
67     buf_T	*buf;
68     int		global;		/* FALSE: only set buf->b_chartab[] */
69 {
70     int		c;
71     int		c2;
72     char_u	*p;
73     int		i;
74     int		tilde;
75     int		do_isalpha;
76 
77     if (global)
78     {
79 	/*
80 	 * Set the default size for printable characters:
81 	 * From <Space> to '~' is 1 (printable), others are 2 (not printable).
82 	 * This also inits all 'isident' and 'isfname' flags to FALSE.
83 	 *
84 	 * EBCDIC: all chars below ' ' are not printable, all others are
85 	 * printable.
86 	 */
87 	c = 0;
88 	while (c < ' ')
89 	    chartab[c++] = (dy_flags & DY_UHEX) ? 4 : 2;
90 #ifdef EBCDIC
91 	while (c < 255)
92 #else
93 	while (c <= '~')
94 #endif
95 	    chartab[c++] = 1 + CT_PRINT_CHAR;
96 #ifdef FEAT_FKMAP
97 	if (p_altkeymap)
98 	{
99 	    while (c < YE)
100 		chartab[c++] = 1 + CT_PRINT_CHAR;
101 	}
102 #endif
103 	while (c < 256)
104 	{
105 #ifdef FEAT_MBYTE
106 	    /* UTF-8: bytes 0xa0 - 0xff are printable (latin1) */
107 	    if (enc_utf8 && c >= 0xa0)
108 		chartab[c++] = CT_PRINT_CHAR + 1;
109 	    /* euc-jp characters starting with 0x8e are single width */
110 	    else if (enc_dbcs == DBCS_JPNU && c == 0x8e)
111 		chartab[c++] = CT_PRINT_CHAR + 1;
112 	    /* other double-byte chars can be printable AND double-width */
113 	    else if (enc_dbcs != 0 && MB_BYTE2LEN(c) == 2)
114 		chartab[c++] = CT_PRINT_CHAR + 2;
115 	    else
116 #endif
117 		/* the rest is unprintable by default */
118 		chartab[c++] = (dy_flags & DY_UHEX) ? 4 : 2;
119 	}
120 
121 #ifdef FEAT_MBYTE
122 	/* Assume that every multi-byte char is a filename character. */
123 	for (c = 1; c < 256; ++c)
124 	    if ((enc_dbcs != 0 && MB_BYTE2LEN(c) > 1)
125 		    || (enc_dbcs == DBCS_JPNU && c == 0x8e)
126 		    || (enc_utf8 && c >= 0xa0))
127 		chartab[c] |= CT_FNAME_CHAR;
128 #endif
129     }
130 
131     /*
132      * Init word char flags all to FALSE
133      */
134     vim_memset(buf->b_chartab, 0, (size_t)32);
135 #ifdef FEAT_MBYTE
136     if (enc_dbcs != 0)
137 	for (c = 0; c < 256; ++c)
138 	{
139 	    /* double-byte characters are probably word characters */
140 	    if (MB_BYTE2LEN(c) == 2)
141 		SET_CHARTAB(buf, c);
142 	}
143 #endif
144 
145 #ifdef FEAT_LISP
146     /*
147      * In lisp mode the '-' character is included in keywords.
148      */
149     if (buf->b_p_lisp)
150 	SET_CHARTAB(buf, '-');
151 #endif
152 
153     /* Walk through the 'isident', 'iskeyword', 'isfname' and 'isprint'
154      * options Each option is a list of characters, character numbers or
155      * ranges, separated by commas, e.g.: "200-210,x,#-178,-"
156      */
157     for (i = global ? 0 : 3; i <= 3; ++i)
158     {
159 	if (i == 0)
160 	    p = p_isi;		/* first round: 'isident' */
161 	else if (i == 1)
162 	    p = p_isp;		/* second round: 'isprint' */
163 	else if (i == 2)
164 	    p = p_isf;		/* third round: 'isfname' */
165 	else	/* i == 3 */
166 	    p = buf->b_p_isk;	/* fourth round: 'iskeyword' */
167 
168 	while (*p)
169 	{
170 	    tilde = FALSE;
171 	    do_isalpha = FALSE;
172 	    if (*p == '^' && p[1] != NUL)
173 	    {
174 		tilde = TRUE;
175 		++p;
176 	    }
177 	    if (VIM_ISDIGIT(*p))
178 		c = getdigits(&p);
179 	    else
180 #ifdef FEAT_MBYTE
181 		 if (has_mbyte)
182 		c = mb_ptr2char_adv(&p);
183 	    else
184 #endif
185 		c = *p++;
186 	    c2 = -1;
187 	    if (*p == '-' && p[1] != NUL)
188 	    {
189 		++p;
190 		if (VIM_ISDIGIT(*p))
191 		    c2 = getdigits(&p);
192 		else
193 #ifdef FEAT_MBYTE
194 		     if (has_mbyte)
195 		    c2 = mb_ptr2char_adv(&p);
196 		else
197 #endif
198 		    c2 = *p++;
199 	    }
200 	    if (c <= 0 || c >= 256 || (c2 < c && c2 != -1) || c2 >= 256
201 						 || !(*p == NUL || *p == ','))
202 		return FAIL;
203 
204 	    if (c2 == -1)	/* not a range */
205 	    {
206 		/*
207 		 * A single '@' (not "@-@"):
208 		 * Decide on letters being ID/printable/keyword chars with
209 		 * standard function isalpha(). This takes care of locale for
210 		 * single-byte characters).
211 		 */
212 		if (c == '@')
213 		{
214 		    do_isalpha = TRUE;
215 		    c = 1;
216 		    c2 = 255;
217 		}
218 		else
219 		    c2 = c;
220 	    }
221 	    while (c <= c2)
222 	    {
223 		/* Use the MB_ functions here, because isalpha() doesn't
224 		 * work properly when 'encoding' is "latin1" and the locale is
225 		 * "C".  */
226 		if (!do_isalpha || MB_ISLOWER(c) || MB_ISUPPER(c)
227 #ifdef FEAT_FKMAP
228 			|| (p_altkeymap && (F_isalpha(c) || F_isdigit(c)))
229 #endif
230 			    )
231 		{
232 		    if (i == 0)			/* (re)set ID flag */
233 		    {
234 			if (tilde)
235 			    chartab[c] &= ~CT_ID_CHAR;
236 			else
237 			    chartab[c] |= CT_ID_CHAR;
238 		    }
239 		    else if (i == 1)		/* (re)set printable */
240 		    {
241 			if ((c < ' '
242 #ifndef EBCDIC
243 				    || c > '~'
244 #endif
245 #ifdef FEAT_FKMAP
246 				    || (p_altkeymap
247 					&& (F_isalpha(c) || F_isdigit(c)))
248 #endif
249 			    )
250 #ifdef FEAT_MBYTE
251 				/* For double-byte we keep the cell width, so
252 				 * that we can detect it from the first byte. */
253 				&& !(enc_dbcs && MB_BYTE2LEN(c) == 2)
254 #endif
255 			   )
256 			{
257 			    if (tilde)
258 			    {
259 				chartab[c] = (chartab[c] & ~CT_CELL_MASK)
260 					     + ((dy_flags & DY_UHEX) ? 4 : 2);
261 				chartab[c] &= ~CT_PRINT_CHAR;
262 			    }
263 			    else
264 			    {
265 				chartab[c] = (chartab[c] & ~CT_CELL_MASK) + 1;
266 				chartab[c] |= CT_PRINT_CHAR;
267 			    }
268 			}
269 		    }
270 		    else if (i == 2)		/* (re)set fname flag */
271 		    {
272 			if (tilde)
273 			    chartab[c] &= ~CT_FNAME_CHAR;
274 			else
275 			    chartab[c] |= CT_FNAME_CHAR;
276 		    }
277 		    else /* i == 3 */		/* (re)set keyword flag */
278 		    {
279 			if (tilde)
280 			    RESET_CHARTAB(buf, c);
281 			else
282 			    SET_CHARTAB(buf, c);
283 		    }
284 		}
285 		++c;
286 	    }
287 
288 	    c = *p;
289 	    p = skip_to_option_part(p);
290 	    if (c == ',' && *p == NUL)
291 		/* Trailing comma is not allowed. */
292 		return FAIL;
293 	}
294     }
295     chartab_initialized = TRUE;
296     return OK;
297 }
298 
299 /*
300  * Translate any special characters in buf[bufsize] in-place.
301  * The result is a string with only printable characters, but if there is not
302  * enough room, not all characters will be translated.
303  */
304     void
305 trans_characters(buf, bufsize)
306     char_u	*buf;
307     int		bufsize;
308 {
309     int		len;		/* length of string needing translation */
310     int		room;		/* room in buffer after string */
311     char_u	*trs;		/* translated character */
312     int		trs_len;	/* length of trs[] */
313 
314     len = (int)STRLEN(buf);
315     room = bufsize - len;
316     while (*buf != 0)
317     {
318 # ifdef FEAT_MBYTE
319 	/* Assume a multi-byte character doesn't need translation. */
320 	if (has_mbyte && (trs_len = (*mb_ptr2len)(buf)) > 1)
321 	    len -= trs_len;
322 	else
323 # endif
324 	{
325 	    trs = transchar_byte(*buf);
326 	    trs_len = (int)STRLEN(trs);
327 	    if (trs_len > 1)
328 	    {
329 		room -= trs_len - 1;
330 		if (room <= 0)
331 		    return;
332 		mch_memmove(buf + trs_len, buf + 1, (size_t)len);
333 	    }
334 	    mch_memmove(buf, trs, (size_t)trs_len);
335 	    --len;
336 	}
337 	buf += trs_len;
338     }
339 }
340 
341 #if defined(FEAT_EVAL) || defined(FEAT_TITLE) || defined(FEAT_INS_EXPAND) \
342 	|| defined(PROTO)
343 /*
344  * Translate a string into allocated memory, replacing special chars with
345  * printable chars.  Returns NULL when out of memory.
346  */
347     char_u *
348 transstr(s)
349     char_u	*s;
350 {
351     char_u	*res;
352     char_u	*p;
353 #ifdef FEAT_MBYTE
354     int		l, len, c;
355     char_u	hexbuf[11];
356 #endif
357 
358 #ifdef FEAT_MBYTE
359     if (has_mbyte)
360     {
361 	/* Compute the length of the result, taking account of unprintable
362 	 * multi-byte characters. */
363 	len = 0;
364 	p = s;
365 	while (*p != NUL)
366 	{
367 	    if ((l = (*mb_ptr2len)(p)) > 1)
368 	    {
369 		c = (*mb_ptr2char)(p);
370 		p += l;
371 		if (vim_isprintc(c))
372 		    len += l;
373 		else
374 		{
375 		    transchar_hex(hexbuf, c);
376 		    len += (int)STRLEN(hexbuf);
377 		}
378 	    }
379 	    else
380 	    {
381 		l = byte2cells(*p++);
382 		if (l > 0)
383 		    len += l;
384 		else
385 		    len += 4;	/* illegal byte sequence */
386 	    }
387 	}
388 	res = alloc((unsigned)(len + 1));
389     }
390     else
391 #endif
392 	res = alloc((unsigned)(vim_strsize(s) + 1));
393     if (res != NULL)
394     {
395 	*res = NUL;
396 	p = s;
397 	while (*p != NUL)
398 	{
399 #ifdef FEAT_MBYTE
400 	    if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
401 	    {
402 		c = (*mb_ptr2char)(p);
403 		if (vim_isprintc(c))
404 		    STRNCAT(res, p, l);	/* append printable multi-byte char */
405 		else
406 		    transchar_hex(res + STRLEN(res), c);
407 		p += l;
408 	    }
409 	    else
410 #endif
411 		STRCAT(res, transchar_byte(*p++));
412 	}
413     }
414     return res;
415 }
416 #endif
417 
418 #if defined(FEAT_SYN_HL) || defined(FEAT_INS_EXPAND) || defined(PROTO)
419 /*
420  * Convert the string "str[orglen]" to do ignore-case comparing.  Uses the
421  * current locale.
422  * When "buf" is NULL returns an allocated string (NULL for out-of-memory).
423  * Otherwise puts the result in "buf[buflen]".
424  */
425     char_u *
426 str_foldcase(str, orglen, buf, buflen)
427     char_u	*str;
428     int		orglen;
429     char_u	*buf;
430     int		buflen;
431 {
432     garray_T	ga;
433     int		i;
434     int		len = orglen;
435 
436 #define GA_CHAR(i)  ((char_u *)ga.ga_data)[i]
437 #define GA_PTR(i)   ((char_u *)ga.ga_data + i)
438 #define STR_CHAR(i)  (buf == NULL ? GA_CHAR(i) : buf[i])
439 #define STR_PTR(i)   (buf == NULL ? GA_PTR(i) : buf + i)
440 
441     /* Copy "str" into "buf" or allocated memory, unmodified. */
442     if (buf == NULL)
443     {
444 	ga_init2(&ga, 1, 10);
445 	if (ga_grow(&ga, len + 1) == FAIL)
446 	    return NULL;
447 	mch_memmove(ga.ga_data, str, (size_t)len);
448 	ga.ga_len = len;
449     }
450     else
451     {
452 	if (len >= buflen)	    /* Ugly! */
453 	    len = buflen - 1;
454 	mch_memmove(buf, str, (size_t)len);
455     }
456     if (buf == NULL)
457 	GA_CHAR(len) = NUL;
458     else
459 	buf[len] = NUL;
460 
461     /* Make each character lower case. */
462     i = 0;
463     while (STR_CHAR(i) != NUL)
464     {
465 #ifdef FEAT_MBYTE
466 	if (enc_utf8 || (has_mbyte && MB_BYTE2LEN(STR_CHAR(i)) > 1))
467 	{
468 	    if (enc_utf8)
469 	    {
470 		int	c = utf_ptr2char(STR_PTR(i));
471 		int	olen = utf_ptr2len(STR_PTR(i));
472 		int	lc = utf_tolower(c);
473 
474 		/* Only replace the character when it is not an invalid
475 		 * sequence (ASCII character or more than one byte) and
476 		 * utf_tolower() doesn't return the original character. */
477 		if ((c < 0x80 || olen > 1) && c != lc)
478 		{
479 		    int	    nlen = utf_char2len(lc);
480 
481 		    /* If the byte length changes need to shift the following
482 		     * characters forward or backward. */
483 		    if (olen != nlen)
484 		    {
485 			if (nlen > olen)
486 			{
487 			    if (buf == NULL
488 				    ? ga_grow(&ga, nlen - olen + 1) == FAIL
489 				    : len + nlen - olen >= buflen)
490 			    {
491 				/* out of memory, keep old char */
492 				lc = c;
493 				nlen = olen;
494 			    }
495 			}
496 			if (olen != nlen)
497 			{
498 			    if (buf == NULL)
499 			    {
500 				STRMOVE(GA_PTR(i) + nlen, GA_PTR(i) + olen);
501 				ga.ga_len += nlen - olen;
502 			    }
503 			    else
504 			    {
505 				STRMOVE(buf + i + nlen, buf + i + olen);
506 				len += nlen - olen;
507 			    }
508 			}
509 		    }
510 		    (void)utf_char2bytes(lc, STR_PTR(i));
511 		}
512 	    }
513 	    /* skip to next multi-byte char */
514 	    i += (*mb_ptr2len)(STR_PTR(i));
515 	}
516 	else
517 #endif
518 	{
519 	    if (buf == NULL)
520 		GA_CHAR(i) = TOLOWER_LOC(GA_CHAR(i));
521 	    else
522 		buf[i] = TOLOWER_LOC(buf[i]);
523 	    ++i;
524 	}
525     }
526 
527     if (buf == NULL)
528 	return (char_u *)ga.ga_data;
529     return buf;
530 }
531 #endif
532 
533 /*
534  * Catch 22: chartab[] can't be initialized before the options are
535  * initialized, and initializing options may cause transchar() to be called!
536  * When chartab_initialized == FALSE don't use chartab[].
537  * Does NOT work for multi-byte characters, c must be <= 255.
538  * Also doesn't work for the first byte of a multi-byte, "c" must be a
539  * character!
540  */
541 static char_u	transchar_buf[7];
542 
543     char_u *
544 transchar(c)
545     int		c;
546 {
547     int			i;
548 
549     i = 0;
550     if (IS_SPECIAL(c))	    /* special key code, display as ~@ char */
551     {
552 	transchar_buf[0] = '~';
553 	transchar_buf[1] = '@';
554 	i = 2;
555 	c = K_SECOND(c);
556     }
557 
558     if ((!chartab_initialized && (
559 #ifdef EBCDIC
560 		    (c >= 64 && c < 255)
561 #else
562 		    (c >= ' ' && c <= '~')
563 #endif
564 #ifdef FEAT_FKMAP
565 			|| F_ischar(c)
566 #endif
567 		)) || (c < 256 && vim_isprintc_strict(c)))
568     {
569 	/* printable character */
570 	transchar_buf[i] = c;
571 	transchar_buf[i + 1] = NUL;
572     }
573     else
574 	transchar_nonprint(transchar_buf + i, c);
575     return transchar_buf;
576 }
577 
578 #if defined(FEAT_MBYTE) || defined(PROTO)
579 /*
580  * Like transchar(), but called with a byte instead of a character.  Checks
581  * for an illegal UTF-8 byte.
582  */
583     char_u *
584 transchar_byte(c)
585     int		c;
586 {
587     if (enc_utf8 && c >= 0x80)
588     {
589 	transchar_nonprint(transchar_buf, c);
590 	return transchar_buf;
591     }
592     return transchar(c);
593 }
594 #endif
595 
596 /*
597  * Convert non-printable character to two or more printable characters in
598  * "buf[]".  "buf" needs to be able to hold five bytes.
599  * Does NOT work for multi-byte characters, c must be <= 255.
600  */
601     void
602 transchar_nonprint(buf, c)
603     char_u	*buf;
604     int		c;
605 {
606     if (c == NL)
607 	c = NUL;		/* we use newline in place of a NUL */
608     else if (c == CAR && get_fileformat(curbuf) == EOL_MAC)
609 	c = NL;			/* we use CR in place of  NL in this case */
610 
611     if (dy_flags & DY_UHEX)		/* 'display' has "uhex" */
612 	transchar_hex(buf, c);
613 
614 #ifdef EBCDIC
615     /* For EBCDIC only the characters 0-63 and 255 are not printable */
616     else if (CtrlChar(c) != 0 || c == DEL)
617 #else
618     else if (c <= 0x7f)				/* 0x00 - 0x1f and 0x7f */
619 #endif
620     {
621 	buf[0] = '^';
622 #ifdef EBCDIC
623 	if (c == DEL)
624 	    buf[1] = '?';		/* DEL displayed as ^? */
625 	else
626 	    buf[1] = CtrlChar(c);
627 #else
628 	buf[1] = c ^ 0x40;		/* DEL displayed as ^? */
629 #endif
630 
631 	buf[2] = NUL;
632     }
633 #ifdef FEAT_MBYTE
634     else if (enc_utf8 && c >= 0x80)
635     {
636 	transchar_hex(buf, c);
637     }
638 #endif
639 #ifndef EBCDIC
640     else if (c >= ' ' + 0x80 && c <= '~' + 0x80)    /* 0xa0 - 0xfe */
641     {
642 	buf[0] = '|';
643 	buf[1] = c - 0x80;
644 	buf[2] = NUL;
645     }
646 #else
647     else if (c < 64)
648     {
649 	buf[0] = '~';
650 	buf[1] = MetaChar(c);
651 	buf[2] = NUL;
652     }
653 #endif
654     else					    /* 0x80 - 0x9f and 0xff */
655     {
656 	/*
657 	 * TODO: EBCDIC I don't know what to do with this chars, so I display
658 	 * them as '~?' for now
659 	 */
660 	buf[0] = '~';
661 #ifdef EBCDIC
662 	buf[1] = '?';			/* 0xff displayed as ~? */
663 #else
664 	buf[1] = (c - 0x80) ^ 0x40;	/* 0xff displayed as ~? */
665 #endif
666 	buf[2] = NUL;
667     }
668 }
669 
670     void
671 transchar_hex(buf, c)
672     char_u	*buf;
673     int		c;
674 {
675     int		i = 0;
676 
677     buf[0] = '<';
678 #ifdef FEAT_MBYTE
679     if (c > 255)
680     {
681 	buf[++i] = nr2hex((unsigned)c >> 12);
682 	buf[++i] = nr2hex((unsigned)c >> 8);
683     }
684 #endif
685     buf[++i] = nr2hex((unsigned)c >> 4);
686     buf[++i] = nr2hex((unsigned)c);
687     buf[++i] = '>';
688     buf[++i] = NUL;
689 }
690 
691 /*
692  * Convert the lower 4 bits of byte "c" to its hex character.
693  * Lower case letters are used to avoid the confusion of <F1> being 0xf1 or
694  * function key 1.
695  */
696     static unsigned
697 nr2hex(c)
698     unsigned	c;
699 {
700     if ((c & 0xf) <= 9)
701 	return (c & 0xf) + '0';
702     return (c & 0xf) - 10 + 'a';
703 }
704 
705 /*
706  * Return number of display cells occupied by byte "b".
707  * Caller must make sure 0 <= b <= 255.
708  * For multi-byte mode "b" must be the first byte of a character.
709  * A TAB is counted as two cells: "^I".
710  * For UTF-8 mode this will return 0 for bytes >= 0x80, because the number of
711  * cells depends on further bytes.
712  */
713     int
714 byte2cells(b)
715     int		b;
716 {
717 #ifdef FEAT_MBYTE
718     if (enc_utf8 && b >= 0x80)
719 	return 0;
720 #endif
721     return (chartab[b] & CT_CELL_MASK);
722 }
723 
724 /*
725  * Return number of display cells occupied by character "c".
726  * "c" can be a special key (negative number) in which case 3 or 4 is returned.
727  * A TAB is counted as two cells: "^I" or four: "<09>".
728  */
729     int
730 char2cells(c)
731     int		c;
732 {
733     if (IS_SPECIAL(c))
734 	return char2cells(K_SECOND(c)) + 2;
735 #ifdef FEAT_MBYTE
736     if (c >= 0x80)
737     {
738 	/* UTF-8: above 0x80 need to check the value */
739 	if (enc_utf8)
740 	    return utf_char2cells(c);
741 	/* DBCS: double-byte means double-width, except for euc-jp with first
742 	 * byte 0x8e */
743 	if (enc_dbcs != 0 && c >= 0x100)
744 	{
745 	    if (enc_dbcs == DBCS_JPNU && ((unsigned)c >> 8) == 0x8e)
746 		return 1;
747 	    return 2;
748 	}
749     }
750 #endif
751     return (chartab[c & 0xff] & CT_CELL_MASK);
752 }
753 
754 /*
755  * Return number of display cells occupied by character at "*p".
756  * A TAB is counted as two cells: "^I" or four: "<09>".
757  */
758     int
759 ptr2cells(p)
760     char_u	*p;
761 {
762 #ifdef FEAT_MBYTE
763     /* For UTF-8 we need to look at more bytes if the first byte is >= 0x80. */
764     if (enc_utf8 && *p >= 0x80)
765 	return utf_ptr2cells(p);
766     /* For DBCS we can tell the cell count from the first byte. */
767 #endif
768     return (chartab[*p] & CT_CELL_MASK);
769 }
770 
771 /*
772  * Return the number of character cells string "s" will take on the screen,
773  * counting TABs as two characters: "^I".
774  */
775     int
776 vim_strsize(s)
777     char_u	*s;
778 {
779     return vim_strnsize(s, (int)MAXCOL);
780 }
781 
782 /*
783  * Return the number of character cells string "s[len]" will take on the
784  * screen, counting TABs as two characters: "^I".
785  */
786     int
787 vim_strnsize(s, len)
788     char_u	*s;
789     int		len;
790 {
791     int		size = 0;
792 
793     while (*s != NUL && --len >= 0)
794     {
795 #ifdef FEAT_MBYTE
796 	if (has_mbyte)
797 	{
798 	    int	    l = (*mb_ptr2len)(s);
799 
800 	    size += ptr2cells(s);
801 	    s += l;
802 	    len -= l - 1;
803 	}
804 	else
805 #endif
806 	    size += byte2cells(*s++);
807     }
808     return size;
809 }
810 
811 /*
812  * Return the number of characters 'c' will take on the screen, taking
813  * into account the size of a tab.
814  * Use a define to make it fast, this is used very often!!!
815  * Also see getvcol() below.
816  */
817 
818 #define RET_WIN_BUF_CHARTABSIZE(wp, buf, p, col) \
819     if (*(p) == TAB && (!(wp)->w_p_list || lcs_tab1)) \
820     { \
821 	int ts; \
822 	ts = (buf)->b_p_ts; \
823 	return (int)(ts - (col % ts)); \
824     } \
825     else \
826 	return ptr2cells(p);
827 
828 #if defined(FEAT_VREPLACE) || defined(FEAT_EX_EXTRA) || defined(FEAT_GUI) \
829 	|| defined(FEAT_VIRTUALEDIT) || defined(PROTO)
830     int
831 chartabsize(p, col)
832     char_u	*p;
833     colnr_T	col;
834 {
835     RET_WIN_BUF_CHARTABSIZE(curwin, curbuf, p, col)
836 }
837 #endif
838 
839 #ifdef FEAT_LINEBREAK
840     static int
841 win_chartabsize(wp, p, col)
842     win_T	*wp;
843     char_u	*p;
844     colnr_T	col;
845 {
846     RET_WIN_BUF_CHARTABSIZE(wp, wp->w_buffer, p, col)
847 }
848 #endif
849 
850 /*
851  * Return the number of characters the string 's' will take on the screen,
852  * taking into account the size of a tab.
853  */
854     int
855 linetabsize(s)
856     char_u	*s;
857 {
858     return linetabsize_col(0, s);
859 }
860 
861 /*
862  * Like linetabsize(), but starting at column "startcol".
863  */
864     int
865 linetabsize_col(startcol, s)
866     int		startcol;
867     char_u	*s;
868 {
869     colnr_T	col = startcol;
870     char_u	*line = s; /* pointer to start of line, for breakindent */
871 
872     while (*s != NUL)
873 	col += lbr_chartabsize_adv(line, &s, col);
874     return (int)col;
875 }
876 
877 /*
878  * Like linetabsize(), but for a given window instead of the current one.
879  */
880     int
881 win_linetabsize(wp, line, len)
882     win_T	*wp;
883     char_u	*line;
884     colnr_T	len;
885 {
886     colnr_T	col = 0;
887     char_u	*s;
888 
889     for (s = line; *s != NUL && (len == MAXCOL || s < line + len);
890 								mb_ptr_adv(s))
891 	col += win_lbr_chartabsize(wp, line, s, col, NULL);
892     return (int)col;
893 }
894 
895 /*
896  * Return TRUE if 'c' is a normal identifier character:
897  * Letters and characters from the 'isident' option.
898  */
899     int
900 vim_isIDc(c)
901     int c;
902 {
903     return (c > 0 && c < 0x100 && (chartab[c] & CT_ID_CHAR));
904 }
905 
906 /*
907  * return TRUE if 'c' is a keyword character: Letters and characters from
908  * 'iskeyword' option for current buffer.
909  * For multi-byte characters mb_get_class() is used (builtin rules).
910  */
911     int
912 vim_iswordc(c)
913     int c;
914 {
915     return vim_iswordc_buf(c, curbuf);
916 }
917 
918     int
919 vim_iswordc_buf(c, buf)
920     int		c;
921     buf_T	*buf;
922 {
923 #ifdef FEAT_MBYTE
924     if (c >= 0x100)
925     {
926 	if (enc_dbcs != 0)
927 	    return dbcs_class((unsigned)c >> 8, (unsigned)(c & 0xff)) >= 2;
928 	if (enc_utf8)
929 	    return utf_class(c) >= 2;
930     }
931 #endif
932     return (c > 0 && c < 0x100 && GET_CHARTAB(buf, c) != 0);
933 }
934 
935 /*
936  * Just like vim_iswordc() but uses a pointer to the (multi-byte) character.
937  */
938     int
939 vim_iswordp(p)
940     char_u *p;
941 {
942 #ifdef FEAT_MBYTE
943     if (has_mbyte && MB_BYTE2LEN(*p) > 1)
944 	return mb_get_class(p) >= 2;
945 #endif
946     return GET_CHARTAB(curbuf, *p) != 0;
947 }
948 
949     int
950 vim_iswordp_buf(p, buf)
951     char_u	*p;
952     buf_T	*buf;
953 {
954 #ifdef FEAT_MBYTE
955     if (has_mbyte && MB_BYTE2LEN(*p) > 1)
956 	return mb_get_class(p) >= 2;
957 #endif
958     return (GET_CHARTAB(buf, *p) != 0);
959 }
960 
961 /*
962  * return TRUE if 'c' is a valid file-name character
963  * Assume characters above 0x100 are valid (multi-byte).
964  */
965     int
966 vim_isfilec(c)
967     int	c;
968 {
969     return (c >= 0x100 || (c > 0 && (chartab[c] & CT_FNAME_CHAR)));
970 }
971 
972 /*
973  * return TRUE if 'c' is a valid file-name character or a wildcard character
974  * Assume characters above 0x100 are valid (multi-byte).
975  * Explicitly interpret ']' as a wildcard character as mch_has_wildcard("]")
976  * returns false.
977  */
978     int
979 vim_isfilec_or_wc(c)
980     int c;
981 {
982     char_u buf[2];
983 
984     buf[0] = (char_u)c;
985     buf[1] = NUL;
986     return vim_isfilec(c) || c == ']' || mch_has_wildcard(buf);
987 }
988 
989 /*
990  * return TRUE if 'c' is a printable character
991  * Assume characters above 0x100 are printable (multi-byte), except for
992  * Unicode.
993  */
994     int
995 vim_isprintc(c)
996     int c;
997 {
998 #ifdef FEAT_MBYTE
999     if (enc_utf8 && c >= 0x100)
1000 	return utf_printable(c);
1001 #endif
1002     return (c >= 0x100 || (c > 0 && (chartab[c] & CT_PRINT_CHAR)));
1003 }
1004 
1005 /*
1006  * Strict version of vim_isprintc(c), don't return TRUE if "c" is the head
1007  * byte of a double-byte character.
1008  */
1009     int
1010 vim_isprintc_strict(c)
1011     int	c;
1012 {
1013 #ifdef FEAT_MBYTE
1014     if (enc_dbcs != 0 && c < 0x100 && MB_BYTE2LEN(c) > 1)
1015 	return FALSE;
1016     if (enc_utf8 && c >= 0x100)
1017 	return utf_printable(c);
1018 #endif
1019     return (c >= 0x100 || (c > 0 && (chartab[c] & CT_PRINT_CHAR)));
1020 }
1021 
1022 /*
1023  * like chartabsize(), but also check for line breaks on the screen
1024  */
1025     int
1026 lbr_chartabsize(line, s, col)
1027     char_u		*line UNUSED; /* start of the line */
1028     unsigned char	*s;
1029     colnr_T		col;
1030 {
1031 #ifdef FEAT_LINEBREAK
1032     if (!curwin->w_p_lbr && *p_sbr == NUL && !curwin->w_p_bri)
1033     {
1034 #endif
1035 #ifdef FEAT_MBYTE
1036 	if (curwin->w_p_wrap)
1037 	    return win_nolbr_chartabsize(curwin, s, col, NULL);
1038 #endif
1039 	RET_WIN_BUF_CHARTABSIZE(curwin, curbuf, s, col)
1040 #ifdef FEAT_LINEBREAK
1041     }
1042     return win_lbr_chartabsize(curwin, line == NULL ? s : line, s, col, NULL);
1043 #endif
1044 }
1045 
1046 /*
1047  * Call lbr_chartabsize() and advance the pointer.
1048  */
1049     int
1050 lbr_chartabsize_adv(line, s, col)
1051     char_u	*line; /* start of the line */
1052     char_u	**s;
1053     colnr_T	col;
1054 {
1055     int		retval;
1056 
1057     retval = lbr_chartabsize(line, *s, col);
1058     mb_ptr_adv(*s);
1059     return retval;
1060 }
1061 
1062 /*
1063  * This function is used very often, keep it fast!!!!
1064  *
1065  * If "headp" not NULL, set *headp to the size of what we for 'showbreak'
1066  * string at start of line.  Warning: *headp is only set if it's a non-zero
1067  * value, init to 0 before calling.
1068  */
1069     int
1070 win_lbr_chartabsize(wp, line, s, col, headp)
1071     win_T	*wp;
1072     char_u	*line UNUSED; /* start of the line */
1073     char_u	*s;
1074     colnr_T	col;
1075     int		*headp UNUSED;
1076 {
1077 #ifdef FEAT_LINEBREAK
1078     int		c;
1079     int		size;
1080     colnr_T	col2;
1081     colnr_T	col_adj = 0; /* col + screen size of tab */
1082     colnr_T	colmax;
1083     int		added;
1084 # ifdef FEAT_MBYTE
1085     int		mb_added = 0;
1086 # else
1087 #  define mb_added 0
1088 # endif
1089     int		numberextra;
1090     char_u	*ps;
1091     int		tab_corr = (*s == TAB);
1092     int		n;
1093 
1094     /*
1095      * No 'linebreak', 'showbreak' and 'breakindent': return quickly.
1096      */
1097     if (!wp->w_p_lbr && !wp->w_p_bri && *p_sbr == NUL)
1098 #endif
1099     {
1100 #ifdef FEAT_MBYTE
1101 	if (wp->w_p_wrap)
1102 	    return win_nolbr_chartabsize(wp, s, col, headp);
1103 #endif
1104 	RET_WIN_BUF_CHARTABSIZE(wp, wp->w_buffer, s, col)
1105     }
1106 
1107 #ifdef FEAT_LINEBREAK
1108     /*
1109      * First get normal size, without 'linebreak'
1110      */
1111     size = win_chartabsize(wp, s, col);
1112     c = *s;
1113     if (tab_corr)
1114 	col_adj = size - 1;
1115 
1116     /*
1117      * If 'linebreak' set check at a blank before a non-blank if the line
1118      * needs a break here
1119      */
1120     if (wp->w_p_lbr
1121 	    && vim_isbreak(c)
1122 	    && !vim_isbreak(s[1])
1123 	    && wp->w_p_wrap
1124 # ifdef FEAT_VERTSPLIT
1125 	    && wp->w_width != 0
1126 # endif
1127        )
1128     {
1129 	/*
1130 	 * Count all characters from first non-blank after a blank up to next
1131 	 * non-blank after a blank.
1132 	 */
1133 	numberextra = win_col_off(wp);
1134 	col2 = col;
1135 	colmax = (colnr_T)(W_WIDTH(wp) - numberextra - col_adj);
1136 	if (col >= colmax)
1137 	{
1138 	    colmax += col_adj;
1139 	    n = colmax +  win_col_off2(wp);
1140 	    if (n > 0)
1141 		colmax += (((col - colmax) / n) + 1) * n - col_adj;
1142 	}
1143 
1144 	for (;;)
1145 	{
1146 	    ps = s;
1147 	    mb_ptr_adv(s);
1148 	    c = *s;
1149 	    if (!(c != NUL
1150 		    && (vim_isbreak(c)
1151 			|| (!vim_isbreak(c)
1152 			    && (col2 == col || !vim_isbreak(*ps))))))
1153 		break;
1154 
1155 	    col2 += win_chartabsize(wp, s, col2);
1156 	    if (col2 >= colmax)		/* doesn't fit */
1157 	    {
1158 		size = colmax - col + col_adj;
1159 		tab_corr = FALSE;
1160 		break;
1161 	    }
1162 	}
1163     }
1164 # ifdef FEAT_MBYTE
1165     else if (has_mbyte && size == 2 && MB_BYTE2LEN(*s) > 1
1166 				    && wp->w_p_wrap && in_win_border(wp, col))
1167     {
1168 	++size;		/* Count the ">" in the last column. */
1169 	mb_added = 1;
1170     }
1171 # endif
1172 
1173     /*
1174      * May have to add something for 'breakindent' and/or 'showbreak'
1175      * string at start of line.
1176      * Set *headp to the size of what we add.
1177      */
1178     added = 0;
1179     if ((*p_sbr != NUL || wp->w_p_bri) && wp->w_p_wrap && col != 0)
1180     {
1181 	colnr_T sbrlen = 0;
1182 	int	numberwidth = win_col_off(wp);
1183 
1184 	numberextra = numberwidth;
1185 	col += numberextra + mb_added;
1186 	if (col >= (colnr_T)W_WIDTH(wp))
1187 	{
1188 	    col -= W_WIDTH(wp);
1189 	    numberextra = W_WIDTH(wp) - (numberextra - win_col_off2(wp));
1190 	    if (col >= numberextra && numberextra > 0)
1191 		col %= numberextra;
1192 	    if (*p_sbr != NUL)
1193 	    {
1194 		sbrlen = (colnr_T)MB_CHARLEN(p_sbr);
1195 		if (col >= sbrlen)
1196 		    col -= sbrlen;
1197 	    }
1198 	    if (col >= numberextra && numberextra > 0)
1199 		col = col % numberextra;
1200 	    else if (col > 0 && numberextra > 0)
1201 		col += numberwidth - win_col_off2(wp);
1202 
1203 	    numberwidth -= win_col_off2(wp);
1204 	}
1205 	if (col == 0 || col + size + sbrlen > (colnr_T)W_WIDTH(wp))
1206 	{
1207 	    added = 0;
1208 	    if (*p_sbr != NUL)
1209 	    {
1210 		if (size + sbrlen + numberwidth > (colnr_T)W_WIDTH(wp))
1211 		{
1212 		    /* calculate effective window width */
1213 		    int width = (colnr_T)W_WIDTH(wp) - sbrlen - numberwidth;
1214 		    int prev_width = col ? ((colnr_T)W_WIDTH(wp) - (sbrlen + col)) : 0;
1215 		    if (width == 0)
1216 			width = (colnr_T)W_WIDTH(wp);
1217 		    added += ((size - prev_width) / width) * vim_strsize(p_sbr);
1218 		    if ((size - prev_width) % width)
1219 			/* wrapped, add another length of 'sbr' */
1220 			added += vim_strsize(p_sbr);
1221 		}
1222 		else
1223 		    added += vim_strsize(p_sbr);
1224 	    }
1225 	    if (wp->w_p_bri)
1226 		added += get_breakindent_win(wp, line);
1227 
1228 	    size += added;
1229 	    if (col != 0)
1230 		added = 0;
1231 	}
1232     }
1233     if (headp != NULL)
1234 	*headp = added + mb_added;
1235     return size;
1236 #endif
1237 }
1238 
1239 #if defined(FEAT_MBYTE) || defined(PROTO)
1240 /*
1241  * Like win_lbr_chartabsize(), except that we know 'linebreak' is off and
1242  * 'wrap' is on.  This means we need to check for a double-byte character that
1243  * doesn't fit at the end of the screen line.
1244  */
1245     static int
1246 win_nolbr_chartabsize(wp, s, col, headp)
1247     win_T	*wp;
1248     char_u	*s;
1249     colnr_T	col;
1250     int		*headp;
1251 {
1252     int		n;
1253 
1254     if (*s == TAB && (!wp->w_p_list || lcs_tab1))
1255     {
1256 	n = wp->w_buffer->b_p_ts;
1257 	return (int)(n - (col % n));
1258     }
1259     n = ptr2cells(s);
1260     /* Add one cell for a double-width character in the last column of the
1261      * window, displayed with a ">". */
1262     if (n == 2 && MB_BYTE2LEN(*s) > 1 && in_win_border(wp, col))
1263     {
1264 	if (headp != NULL)
1265 	    *headp = 1;
1266 	return 3;
1267     }
1268     return n;
1269 }
1270 
1271 /*
1272  * Return TRUE if virtual column "vcol" is in the rightmost column of window
1273  * "wp".
1274  */
1275     int
1276 in_win_border(wp, vcol)
1277     win_T	*wp;
1278     colnr_T	vcol;
1279 {
1280     int		width1;		/* width of first line (after line number) */
1281     int		width2;		/* width of further lines */
1282 
1283 #ifdef FEAT_VERTSPLIT
1284     if (wp->w_width == 0)	/* there is no border */
1285 	return FALSE;
1286 #endif
1287     width1 = W_WIDTH(wp) - win_col_off(wp);
1288     if ((int)vcol < width1 - 1)
1289 	return FALSE;
1290     if ((int)vcol == width1 - 1)
1291 	return TRUE;
1292     width2 = width1 + win_col_off2(wp);
1293     if (width2 <= 0)
1294 	return FALSE;
1295     return ((vcol - width1) % width2 == width2 - 1);
1296 }
1297 #endif /* FEAT_MBYTE */
1298 
1299 /*
1300  * Get virtual column number of pos.
1301  *  start: on the first position of this character (TAB, ctrl)
1302  * cursor: where the cursor is on this character (first char, except for TAB)
1303  *    end: on the last position of this character (TAB, ctrl)
1304  *
1305  * This is used very often, keep it fast!
1306  */
1307     void
1308 getvcol(wp, pos, start, cursor, end)
1309     win_T	*wp;
1310     pos_T	*pos;
1311     colnr_T	*start;
1312     colnr_T	*cursor;
1313     colnr_T	*end;
1314 {
1315     colnr_T	vcol;
1316     char_u	*ptr;		/* points to current char */
1317     char_u	*posptr;	/* points to char at pos->col */
1318     char_u	*line;		/* start of the line */
1319     int		incr;
1320     int		head;
1321     int		ts = wp->w_buffer->b_p_ts;
1322     int		c;
1323 
1324     vcol = 0;
1325     line = ptr = ml_get_buf(wp->w_buffer, pos->lnum, FALSE);
1326     if (pos->col == MAXCOL)
1327 	posptr = NULL;  /* continue until the NUL */
1328     else
1329 	posptr = ptr + pos->col;
1330 
1331     /*
1332      * This function is used very often, do some speed optimizations.
1333      * When 'list', 'linebreak', 'showbreak' and 'breakindent' are not set
1334      * use a simple loop.
1335      * Also use this when 'list' is set but tabs take their normal size.
1336      */
1337     if ((!wp->w_p_list || lcs_tab1 != NUL)
1338 #ifdef FEAT_LINEBREAK
1339 	    && !wp->w_p_lbr && *p_sbr == NUL && !wp->w_p_bri
1340 #endif
1341        )
1342     {
1343 #ifndef FEAT_MBYTE
1344 	head = 0;
1345 #endif
1346 	for (;;)
1347 	{
1348 #ifdef FEAT_MBYTE
1349 	    head = 0;
1350 #endif
1351 	    c = *ptr;
1352 	    /* make sure we don't go past the end of the line */
1353 	    if (c == NUL)
1354 	    {
1355 		incr = 1;	/* NUL at end of line only takes one column */
1356 		break;
1357 	    }
1358 	    /* A tab gets expanded, depending on the current column */
1359 	    if (c == TAB)
1360 		incr = ts - (vcol % ts);
1361 	    else
1362 	    {
1363 #ifdef FEAT_MBYTE
1364 		if (has_mbyte)
1365 		{
1366 		    /* For utf-8, if the byte is >= 0x80, need to look at
1367 		     * further bytes to find the cell width. */
1368 		    if (enc_utf8 && c >= 0x80)
1369 			incr = utf_ptr2cells(ptr);
1370 		    else
1371 			incr = CHARSIZE(c);
1372 
1373 		    /* If a double-cell char doesn't fit at the end of a line
1374 		     * it wraps to the next line, it's like this char is three
1375 		     * cells wide. */
1376 		    if (incr == 2 && wp->w_p_wrap && MB_BYTE2LEN(*ptr) > 1
1377 			    && in_win_border(wp, vcol))
1378 		    {
1379 			++incr;
1380 			head = 1;
1381 		    }
1382 		}
1383 		else
1384 #endif
1385 		    incr = CHARSIZE(c);
1386 	    }
1387 
1388 	    if (posptr != NULL && ptr >= posptr) /* character at pos->col */
1389 		break;
1390 
1391 	    vcol += incr;
1392 	    mb_ptr_adv(ptr);
1393 	}
1394     }
1395     else
1396     {
1397 	for (;;)
1398 	{
1399 	    /* A tab gets expanded, depending on the current column */
1400 	    head = 0;
1401 	    incr = win_lbr_chartabsize(wp, line, ptr, vcol, &head);
1402 	    /* make sure we don't go past the end of the line */
1403 	    if (*ptr == NUL)
1404 	    {
1405 		incr = 1;	/* NUL at end of line only takes one column */
1406 		break;
1407 	    }
1408 
1409 	    if (posptr != NULL && ptr >= posptr) /* character at pos->col */
1410 		break;
1411 
1412 	    vcol += incr;
1413 	    mb_ptr_adv(ptr);
1414 	}
1415     }
1416     if (start != NULL)
1417 	*start = vcol + head;
1418     if (end != NULL)
1419 	*end = vcol + incr - 1;
1420     if (cursor != NULL)
1421     {
1422 	if (*ptr == TAB
1423 		&& (State & NORMAL)
1424 		&& !wp->w_p_list
1425 		&& !virtual_active()
1426 		&& !(VIsual_active && (*p_sel == 'e' || ltoreq(*pos, VIsual)))
1427 		)
1428 	    *cursor = vcol + incr - 1;	    /* cursor at end */
1429 	else
1430 	    *cursor = vcol + head;	    /* cursor at start */
1431     }
1432 }
1433 
1434 /*
1435  * Get virtual cursor column in the current window, pretending 'list' is off.
1436  */
1437     colnr_T
1438 getvcol_nolist(posp)
1439     pos_T	*posp;
1440 {
1441     int		list_save = curwin->w_p_list;
1442     colnr_T	vcol;
1443 
1444     curwin->w_p_list = FALSE;
1445     getvcol(curwin, posp, NULL, &vcol, NULL);
1446     curwin->w_p_list = list_save;
1447     return vcol;
1448 }
1449 
1450 #if defined(FEAT_VIRTUALEDIT) || defined(PROTO)
1451 /*
1452  * Get virtual column in virtual mode.
1453  */
1454     void
1455 getvvcol(wp, pos, start, cursor, end)
1456     win_T	*wp;
1457     pos_T	*pos;
1458     colnr_T	*start;
1459     colnr_T	*cursor;
1460     colnr_T	*end;
1461 {
1462     colnr_T	col;
1463     colnr_T	coladd;
1464     colnr_T	endadd;
1465 # ifdef FEAT_MBYTE
1466     char_u	*ptr;
1467 # endif
1468 
1469     if (virtual_active())
1470     {
1471 	/* For virtual mode, only want one value */
1472 	getvcol(wp, pos, &col, NULL, NULL);
1473 
1474 	coladd = pos->coladd;
1475 	endadd = 0;
1476 # ifdef FEAT_MBYTE
1477 	/* Cannot put the cursor on part of a wide character. */
1478 	ptr = ml_get_buf(wp->w_buffer, pos->lnum, FALSE);
1479 	if (pos->col < (colnr_T)STRLEN(ptr))
1480 	{
1481 	    int c = (*mb_ptr2char)(ptr + pos->col);
1482 
1483 	    if (c != TAB && vim_isprintc(c))
1484 	    {
1485 		endadd = (colnr_T)(char2cells(c) - 1);
1486 		if (coladd > endadd)	/* past end of line */
1487 		    endadd = 0;
1488 		else
1489 		    coladd = 0;
1490 	    }
1491 	}
1492 # endif
1493 	col += coladd;
1494 	if (start != NULL)
1495 	    *start = col;
1496 	if (cursor != NULL)
1497 	    *cursor = col;
1498 	if (end != NULL)
1499 	    *end = col + endadd;
1500     }
1501     else
1502 	getvcol(wp, pos, start, cursor, end);
1503 }
1504 #endif
1505 
1506 /*
1507  * Get the leftmost and rightmost virtual column of pos1 and pos2.
1508  * Used for Visual block mode.
1509  */
1510     void
1511 getvcols(wp, pos1, pos2, left, right)
1512     win_T	*wp;
1513     pos_T	*pos1, *pos2;
1514     colnr_T	*left, *right;
1515 {
1516     colnr_T	from1, from2, to1, to2;
1517 
1518     if (ltp(pos1, pos2))
1519     {
1520 	getvvcol(wp, pos1, &from1, NULL, &to1);
1521 	getvvcol(wp, pos2, &from2, NULL, &to2);
1522     }
1523     else
1524     {
1525 	getvvcol(wp, pos2, &from1, NULL, &to1);
1526 	getvvcol(wp, pos1, &from2, NULL, &to2);
1527     }
1528     if (from2 < from1)
1529 	*left = from2;
1530     else
1531 	*left = from1;
1532     if (to2 > to1)
1533     {
1534 	if (*p_sel == 'e' && from2 - 1 >= to1)
1535 	    *right = from2 - 1;
1536 	else
1537 	    *right = to2;
1538     }
1539     else
1540 	*right = to1;
1541 }
1542 
1543 /*
1544  * skipwhite: skip over ' ' and '\t'.
1545  */
1546     char_u *
1547 skipwhite(q)
1548     char_u	*q;
1549 {
1550     char_u	*p = q;
1551 
1552     while (vim_iswhite(*p)) /* skip to next non-white */
1553 	++p;
1554     return p;
1555 }
1556 
1557 /*
1558  * skip over digits
1559  */
1560     char_u *
1561 skipdigits(q)
1562     char_u	*q;
1563 {
1564     char_u	*p = q;
1565 
1566     while (VIM_ISDIGIT(*p))	/* skip to next non-digit */
1567 	++p;
1568     return p;
1569 }
1570 
1571 #if defined(FEAT_SYN_HL) || defined(FEAT_SPELL) || defined(PROTO)
1572 /*
1573  * skip over digits and hex characters
1574  */
1575     char_u *
1576 skiphex(q)
1577     char_u	*q;
1578 {
1579     char_u	*p = q;
1580 
1581     while (vim_isxdigit(*p))	/* skip to next non-digit */
1582 	++p;
1583     return p;
1584 }
1585 #endif
1586 
1587 #if defined(FEAT_EX_EXTRA) || defined(PROTO)
1588 /*
1589  * skip to digit (or NUL after the string)
1590  */
1591     char_u *
1592 skiptodigit(q)
1593     char_u	*q;
1594 {
1595     char_u	*p = q;
1596 
1597     while (*p != NUL && !VIM_ISDIGIT(*p))	/* skip to next digit */
1598 	++p;
1599     return p;
1600 }
1601 
1602 /*
1603  * skip to hex character (or NUL after the string)
1604  */
1605     char_u *
1606 skiptohex(q)
1607     char_u	*q;
1608 {
1609     char_u	*p = q;
1610 
1611     while (*p != NUL && !vim_isxdigit(*p))	/* skip to next digit */
1612 	++p;
1613     return p;
1614 }
1615 #endif
1616 
1617 /*
1618  * Variant of isdigit() that can handle characters > 0x100.
1619  * We don't use isdigit() here, because on some systems it also considers
1620  * superscript 1 to be a digit.
1621  * Use the VIM_ISDIGIT() macro for simple arguments.
1622  */
1623     int
1624 vim_isdigit(c)
1625     int		c;
1626 {
1627     return (c >= '0' && c <= '9');
1628 }
1629 
1630 /*
1631  * Variant of isxdigit() that can handle characters > 0x100.
1632  * We don't use isxdigit() here, because on some systems it also considers
1633  * superscript 1 to be a digit.
1634  */
1635     int
1636 vim_isxdigit(c)
1637     int		c;
1638 {
1639     return (c >= '0' && c <= '9')
1640 	|| (c >= 'a' && c <= 'f')
1641 	|| (c >= 'A' && c <= 'F');
1642 }
1643 
1644 #if defined(FEAT_MBYTE) || defined(PROTO)
1645 /*
1646  * Vim's own character class functions.  These exist because many library
1647  * islower()/toupper() etc. do not work properly: they crash when used with
1648  * invalid values or can't handle latin1 when the locale is C.
1649  * Speed is most important here.
1650  */
1651 #define LATIN1LOWER 'l'
1652 #define LATIN1UPPER 'U'
1653 
1654 static char_u latin1flags[257] = "                                                                 UUUUUUUUUUUUUUUUUUUUUUUUUU      llllllllllllllllllllllllll                                                                     UUUUUUUUUUUUUUUUUUUUUUU UUUUUUUllllllllllllllllllllllll llllllll";
1655 static char_u latin1upper[257] = "                                 !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`ABCDEFGHIJKLMNOPQRSTUVWXYZ{|}~\x7f\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xf7\xd8\xd9\xda\xdb\xdc\xdd\xde\xff";
1656 static char_u latin1lower[257] = "                                 !\"#$%&'()*+,-./0123456789:;<=>?@abcdefghijklmnopqrstuvwxyz[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x7f\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xd7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff";
1657 
1658     int
1659 vim_islower(c)
1660     int	    c;
1661 {
1662     if (c <= '@')
1663 	return FALSE;
1664     if (c >= 0x80)
1665     {
1666 	if (enc_utf8)
1667 	    return utf_islower(c);
1668 	if (c >= 0x100)
1669 	{
1670 #ifdef HAVE_ISWLOWER
1671 	    if (has_mbyte)
1672 		return iswlower(c);
1673 #endif
1674 	    /* islower() can't handle these chars and may crash */
1675 	    return FALSE;
1676 	}
1677 	if (enc_latin1like)
1678 	    return (latin1flags[c] & LATIN1LOWER) == LATIN1LOWER;
1679     }
1680     return islower(c);
1681 }
1682 
1683     int
1684 vim_isupper(c)
1685     int	    c;
1686 {
1687     if (c <= '@')
1688 	return FALSE;
1689     if (c >= 0x80)
1690     {
1691 	if (enc_utf8)
1692 	    return utf_isupper(c);
1693 	if (c >= 0x100)
1694 	{
1695 #ifdef HAVE_ISWUPPER
1696 	    if (has_mbyte)
1697 		return iswupper(c);
1698 #endif
1699 	    /* islower() can't handle these chars and may crash */
1700 	    return FALSE;
1701 	}
1702 	if (enc_latin1like)
1703 	    return (latin1flags[c] & LATIN1UPPER) == LATIN1UPPER;
1704     }
1705     return isupper(c);
1706 }
1707 
1708     int
1709 vim_toupper(c)
1710     int	    c;
1711 {
1712     if (c <= '@')
1713 	return c;
1714     if (c >= 0x80)
1715     {
1716 	if (enc_utf8)
1717 	    return utf_toupper(c);
1718 	if (c >= 0x100)
1719 	{
1720 #ifdef HAVE_TOWUPPER
1721 	    if (has_mbyte)
1722 		return towupper(c);
1723 #endif
1724 	    /* toupper() can't handle these chars and may crash */
1725 	    return c;
1726 	}
1727 	if (enc_latin1like)
1728 	    return latin1upper[c];
1729     }
1730     return TOUPPER_LOC(c);
1731 }
1732 
1733     int
1734 vim_tolower(c)
1735     int	    c;
1736 {
1737     if (c <= '@')
1738 	return c;
1739     if (c >= 0x80)
1740     {
1741 	if (enc_utf8)
1742 	    return utf_tolower(c);
1743 	if (c >= 0x100)
1744 	{
1745 #ifdef HAVE_TOWLOWER
1746 	    if (has_mbyte)
1747 		return towlower(c);
1748 #endif
1749 	    /* tolower() can't handle these chars and may crash */
1750 	    return c;
1751 	}
1752 	if (enc_latin1like)
1753 	    return latin1lower[c];
1754     }
1755     return TOLOWER_LOC(c);
1756 }
1757 #endif
1758 
1759 /*
1760  * skiptowhite: skip over text until ' ' or '\t' or NUL.
1761  */
1762     char_u *
1763 skiptowhite(p)
1764     char_u	*p;
1765 {
1766     while (*p != ' ' && *p != '\t' && *p != NUL)
1767 	++p;
1768     return p;
1769 }
1770 
1771 #if defined(FEAT_LISTCMDS) || defined(FEAT_SIGNS) || defined(FEAT_SNIFF) \
1772 	|| defined(PROTO)
1773 /*
1774  * skiptowhite_esc: Like skiptowhite(), but also skip escaped chars
1775  */
1776     char_u *
1777 skiptowhite_esc(p)
1778     char_u	*p;
1779 {
1780     while (*p != ' ' && *p != '\t' && *p != NUL)
1781     {
1782 	if ((*p == '\\' || *p == Ctrl_V) && *(p + 1) != NUL)
1783 	    ++p;
1784 	++p;
1785     }
1786     return p;
1787 }
1788 #endif
1789 
1790 /*
1791  * Getdigits: Get a number from a string and skip over it.
1792  * Note: the argument is a pointer to a char_u pointer!
1793  */
1794     long
1795 getdigits(pp)
1796     char_u **pp;
1797 {
1798     char_u	*p;
1799     long	retval;
1800 
1801     p = *pp;
1802     retval = atol((char *)p);
1803     if (*p == '-')		/* skip negative sign */
1804 	++p;
1805     p = skipdigits(p);		/* skip to next non-digit */
1806     *pp = p;
1807     return retval;
1808 }
1809 
1810 /*
1811  * Return TRUE if "lbuf" is empty or only contains blanks.
1812  */
1813     int
1814 vim_isblankline(lbuf)
1815     char_u	*lbuf;
1816 {
1817     char_u	*p;
1818 
1819     p = skipwhite(lbuf);
1820     return (*p == NUL || *p == '\r' || *p == '\n');
1821 }
1822 
1823 /*
1824  * Convert a string into a long and/or unsigned long, taking care of
1825  * hexadecimal and octal numbers.  Accepts a '-' sign.
1826  * If "hexp" is not NULL, returns a flag to indicate the type of the number:
1827  *  0	    decimal
1828  *  '0'	    octal
1829  *  'X'	    hex
1830  *  'x'	    hex
1831  * If "len" is not NULL, the length of the number in characters is returned.
1832  * If "nptr" is not NULL, the signed result is returned in it.
1833  * If "unptr" is not NULL, the unsigned result is returned in it.
1834  * If "dooct" is non-zero recognize octal numbers, when > 1 always assume
1835  * octal number.
1836  * If "dohex" is non-zero recognize hex numbers, when > 1 always assume
1837  * hex number.
1838  * If maxlen > 0, check at a maximum maxlen chars
1839  */
1840     void
1841 vim_str2nr(start, hexp, len, dooct, dohex, nptr, unptr, maxlen)
1842     char_u		*start;
1843     int			*hexp;	    /* return: type of number 0 = decimal, 'x'
1844 				       or 'X' is hex, '0' = octal */
1845     int			*len;	    /* return: detected length of number */
1846     int			dooct;	    /* recognize octal number */
1847     int			dohex;	    /* recognize hex number */
1848     long		*nptr;	    /* return: signed result */
1849     unsigned long	*unptr;	    /* return: unsigned result */
1850     int			maxlen;     /* max length of string to check */
1851 {
1852     char_u	    *ptr = start;
1853     int		    hex = 0;		/* default is decimal */
1854     int		    negative = FALSE;
1855     unsigned long   un = 0;
1856     int		    n;
1857 
1858     if (ptr[0] == '-')
1859     {
1860 	negative = TRUE;
1861 	++ptr;
1862     }
1863 
1864     /* Recognize hex and octal. */
1865     if (ptr[0] == '0' && ptr[1] != '8' && ptr[1] != '9'
1866 					       && (maxlen == 0 || maxlen > 1))
1867     {
1868 	hex = ptr[1];
1869 	if (dohex && (hex == 'X' || hex == 'x') && vim_isxdigit(ptr[2])
1870 					       && (maxlen == 0 || maxlen > 2))
1871 	    ptr += 2;			/* hexadecimal */
1872 	else
1873 	{
1874 	    hex = 0;			/* default is decimal */
1875 	    if (dooct)
1876 	    {
1877 		/* Don't interpret "0", "08" or "0129" as octal. */
1878 		for (n = 1; VIM_ISDIGIT(ptr[n]); ++n)
1879 		{
1880 		    if (ptr[n] > '7')
1881 		    {
1882 			hex = 0;	/* can't be octal */
1883 			break;
1884 		    }
1885 		    if (ptr[n] >= '0')
1886 			hex = '0';	/* assume octal */
1887 		    if (n == maxlen)
1888 			break;
1889 		}
1890 	    }
1891 	}
1892     }
1893 
1894     /*
1895      * Do the string-to-numeric conversion "manually" to avoid sscanf quirks.
1896      */
1897     n = 1;
1898     if (hex == '0' || dooct > 1)
1899     {
1900 	/* octal */
1901 	while ('0' <= *ptr && *ptr <= '7')
1902 	{
1903 	    un = 8 * un + (unsigned long)(*ptr - '0');
1904 	    ++ptr;
1905 	    if (n++ == maxlen)
1906 		break;
1907 	}
1908     }
1909     else if (hex != 0 || dohex > 1)
1910     {
1911 	/* hex */
1912 	if (hex != 0)
1913 	    n += 2;	    /* skip over "0x" */
1914 	while (vim_isxdigit(*ptr))
1915 	{
1916 	    un = 16 * un + (unsigned long)hex2nr(*ptr);
1917 	    ++ptr;
1918 	    if (n++ == maxlen)
1919 		break;
1920 	}
1921     }
1922     else
1923     {
1924 	/* decimal */
1925 	while (VIM_ISDIGIT(*ptr))
1926 	{
1927 	    un = 10 * un + (unsigned long)(*ptr - '0');
1928 	    ++ptr;
1929 	    if (n++ == maxlen)
1930 		break;
1931 	}
1932     }
1933 
1934     if (hexp != NULL)
1935 	*hexp = hex;
1936     if (len != NULL)
1937 	*len = (int)(ptr - start);
1938     if (nptr != NULL)
1939     {
1940 	if (negative)   /* account for leading '-' for decimal numbers */
1941 	    *nptr = -(long)un;
1942 	else
1943 	    *nptr = (long)un;
1944     }
1945     if (unptr != NULL)
1946 	*unptr = un;
1947 }
1948 
1949 /*
1950  * Return the value of a single hex character.
1951  * Only valid when the argument is '0' - '9', 'A' - 'F' or 'a' - 'f'.
1952  */
1953     int
1954 hex2nr(c)
1955     int		c;
1956 {
1957     if (c >= 'a' && c <= 'f')
1958 	return c - 'a' + 10;
1959     if (c >= 'A' && c <= 'F')
1960 	return c - 'A' + 10;
1961     return c - '0';
1962 }
1963 
1964 #if defined(FEAT_TERMRESPONSE) \
1965 	|| (defined(FEAT_GUI_GTK) && defined(FEAT_WINDOWS)) || defined(PROTO)
1966 /*
1967  * Convert two hex characters to a byte.
1968  * Return -1 if one of the characters is not hex.
1969  */
1970     int
1971 hexhex2nr(p)
1972     char_u	*p;
1973 {
1974     if (!vim_isxdigit(p[0]) || !vim_isxdigit(p[1]))
1975 	return -1;
1976     return (hex2nr(p[0]) << 4) + hex2nr(p[1]);
1977 }
1978 #endif
1979 
1980 /*
1981  * Return TRUE if "str" starts with a backslash that should be removed.
1982  * For MS-DOS, WIN32 and OS/2 this is only done when the character after the
1983  * backslash is not a normal file name character.
1984  * '$' is a valid file name character, we don't remove the backslash before
1985  * it.  This means it is not possible to use an environment variable after a
1986  * backslash.  "C:\$VIM\doc" is taken literally, only "$VIM\doc" works.
1987  * Although "\ name" is valid, the backslash in "Program\ files" must be
1988  * removed.  Assume a file name doesn't start with a space.
1989  * For multi-byte names, never remove a backslash before a non-ascii
1990  * character, assume that all multi-byte characters are valid file name
1991  * characters.
1992  */
1993     int
1994 rem_backslash(str)
1995     char_u  *str;
1996 {
1997 #ifdef BACKSLASH_IN_FILENAME
1998     return (str[0] == '\\'
1999 # ifdef FEAT_MBYTE
2000 	    && str[1] < 0x80
2001 # endif
2002 	    && (str[1] == ' '
2003 		|| (str[1] != NUL
2004 		    && str[1] != '*'
2005 		    && str[1] != '?'
2006 		    && !vim_isfilec(str[1]))));
2007 #else
2008     return (str[0] == '\\' && str[1] != NUL);
2009 #endif
2010 }
2011 
2012 /*
2013  * Halve the number of backslashes in a file name argument.
2014  * For MS-DOS we only do this if the character after the backslash
2015  * is not a normal file character.
2016  */
2017     void
2018 backslash_halve(p)
2019     char_u	*p;
2020 {
2021     for ( ; *p; ++p)
2022 	if (rem_backslash(p))
2023 	    STRMOVE(p, p + 1);
2024 }
2025 
2026 /*
2027  * backslash_halve() plus save the result in allocated memory.
2028  */
2029     char_u *
2030 backslash_halve_save(p)
2031     char_u	*p;
2032 {
2033     char_u	*res;
2034 
2035     res = vim_strsave(p);
2036     if (res == NULL)
2037 	return p;
2038     backslash_halve(res);
2039     return res;
2040 }
2041 
2042 #if (defined(EBCDIC) && defined(FEAT_POSTSCRIPT)) || defined(PROTO)
2043 /*
2044  * Table for EBCDIC to ASCII conversion unashamedly taken from xxd.c!
2045  * The first 64 entries have been added to map control characters defined in
2046  * ascii.h
2047  */
2048 static char_u ebcdic2ascii_tab[256] =
2049 {
2050     0000, 0001, 0002, 0003, 0004, 0011, 0006, 0177,
2051     0010, 0011, 0012, 0013, 0014, 0015, 0016, 0017,
2052     0020, 0021, 0022, 0023, 0024, 0012, 0010, 0027,
2053     0030, 0031, 0032, 0033, 0033, 0035, 0036, 0037,
2054     0040, 0041, 0042, 0043, 0044, 0045, 0046, 0047,
2055     0050, 0051, 0052, 0053, 0054, 0055, 0056, 0057,
2056     0060, 0061, 0062, 0063, 0064, 0065, 0066, 0067,
2057     0070, 0071, 0072, 0073, 0074, 0075, 0076, 0077,
2058     0040, 0240, 0241, 0242, 0243, 0244, 0245, 0246,
2059     0247, 0250, 0325, 0056, 0074, 0050, 0053, 0174,
2060     0046, 0251, 0252, 0253, 0254, 0255, 0256, 0257,
2061     0260, 0261, 0041, 0044, 0052, 0051, 0073, 0176,
2062     0055, 0057, 0262, 0263, 0264, 0265, 0266, 0267,
2063     0270, 0271, 0313, 0054, 0045, 0137, 0076, 0077,
2064     0272, 0273, 0274, 0275, 0276, 0277, 0300, 0301,
2065     0302, 0140, 0072, 0043, 0100, 0047, 0075, 0042,
2066     0303, 0141, 0142, 0143, 0144, 0145, 0146, 0147,
2067     0150, 0151, 0304, 0305, 0306, 0307, 0310, 0311,
2068     0312, 0152, 0153, 0154, 0155, 0156, 0157, 0160,
2069     0161, 0162, 0136, 0314, 0315, 0316, 0317, 0320,
2070     0321, 0345, 0163, 0164, 0165, 0166, 0167, 0170,
2071     0171, 0172, 0322, 0323, 0324, 0133, 0326, 0327,
2072     0330, 0331, 0332, 0333, 0334, 0335, 0336, 0337,
2073     0340, 0341, 0342, 0343, 0344, 0135, 0346, 0347,
2074     0173, 0101, 0102, 0103, 0104, 0105, 0106, 0107,
2075     0110, 0111, 0350, 0351, 0352, 0353, 0354, 0355,
2076     0175, 0112, 0113, 0114, 0115, 0116, 0117, 0120,
2077     0121, 0122, 0356, 0357, 0360, 0361, 0362, 0363,
2078     0134, 0237, 0123, 0124, 0125, 0126, 0127, 0130,
2079     0131, 0132, 0364, 0365, 0366, 0367, 0370, 0371,
2080     0060, 0061, 0062, 0063, 0064, 0065, 0066, 0067,
2081     0070, 0071, 0372, 0373, 0374, 0375, 0376, 0377
2082 };
2083 
2084 /*
2085  * Convert a buffer worth of characters from EBCDIC to ASCII.  Only useful if
2086  * wanting 7-bit ASCII characters out the other end.
2087  */
2088     void
2089 ebcdic2ascii(buffer, len)
2090     char_u	*buffer;
2091     int		len;
2092 {
2093     int		i;
2094 
2095     for (i = 0; i < len; i++)
2096 	buffer[i] = ebcdic2ascii_tab[buffer[i]];
2097 }
2098 #endif
2099