1 /* vi:set ts=8 sts=4 sw=4 noet: 2 * 3 * VIM - Vi IMproved by Bram Moolenaar 4 * 5 * Do ":help uganda" in Vim to read copying and usage conditions. 6 * Do ":help credits" in Vim to see a list of people who contributed. 7 * See README.txt for an overview of the Vim source code. 8 */ 9 10 #include "vim.h" 11 12 #if defined(HAVE_WCHAR_H) 13 # include <wchar.h> /* for towupper() and towlower() */ 14 #endif 15 static int win_nolbr_chartabsize(win_T *wp, char_u *s, colnr_T col, int *headp); 16 17 static unsigned nr2hex(unsigned c); 18 19 static int chartab_initialized = FALSE; 20 21 /* b_chartab[] is an array of 32 bytes, each bit representing one of the 22 * characters 0-255. */ 23 #define SET_CHARTAB(buf, c) (buf)->b_chartab[(unsigned)(c) >> 3] |= (1 << ((c) & 0x7)) 24 #define RESET_CHARTAB(buf, c) (buf)->b_chartab[(unsigned)(c) >> 3] &= ~(1 << ((c) & 0x7)) 25 #define GET_CHARTAB(buf, c) ((buf)->b_chartab[(unsigned)(c) >> 3] & (1 << ((c) & 0x7))) 26 27 /* table used below, see init_chartab() for an explanation */ 28 static char_u g_chartab[256]; 29 30 /* 31 * Flags for g_chartab[]. 32 */ 33 #define CT_CELL_MASK 0x07 /* mask: nr of display cells (1, 2 or 4) */ 34 #define CT_PRINT_CHAR 0x10 /* flag: set for printable chars */ 35 #define CT_ID_CHAR 0x20 /* flag: set for ID chars */ 36 #define CT_FNAME_CHAR 0x40 /* flag: set for file name chars */ 37 38 static int in_win_border(win_T *wp, colnr_T vcol); 39 40 /* 41 * Fill g_chartab[]. Also fills curbuf->b_chartab[] with flags for keyword 42 * characters for current buffer. 43 * 44 * Depends on the option settings 'iskeyword', 'isident', 'isfname', 45 * 'isprint' and 'encoding'. 46 * 47 * The index in g_chartab[] depends on 'encoding': 48 * - For non-multi-byte index with the byte (same as the character). 49 * - For DBCS index with the first byte. 50 * - For UTF-8 index with the character (when first byte is up to 0x80 it is 51 * the same as the character, if the first byte is 0x80 and above it depends 52 * on further bytes). 53 * 54 * The contents of g_chartab[]: 55 * - The lower two bits, masked by CT_CELL_MASK, give the number of display 56 * cells the character occupies (1 or 2). Not valid for UTF-8 above 0x80. 57 * - CT_PRINT_CHAR bit is set when the character is printable (no need to 58 * translate the character before displaying it). Note that only DBCS 59 * characters can have 2 display cells and still be printable. 60 * - CT_FNAME_CHAR bit is set when the character can be in a file name. 61 * - CT_ID_CHAR bit is set when the character can be in an identifier. 62 * 63 * Return FAIL if 'iskeyword', 'isident', 'isfname' or 'isprint' option has an 64 * error, OK otherwise. 65 */ 66 int 67 init_chartab(void) 68 { 69 return buf_init_chartab(curbuf, TRUE); 70 } 71 72 int 73 buf_init_chartab( 74 buf_T *buf, 75 int global) /* FALSE: only set buf->b_chartab[] */ 76 { 77 int c; 78 int c2; 79 char_u *p; 80 int i; 81 int tilde; 82 int do_isalpha; 83 84 if (global) 85 { 86 /* 87 * Set the default size for printable characters: 88 * From <Space> to '~' is 1 (printable), others are 2 (not printable). 89 * This also inits all 'isident' and 'isfname' flags to FALSE. 90 * 91 * EBCDIC: all chars below ' ' are not printable, all others are 92 * printable. 93 */ 94 c = 0; 95 while (c < ' ') 96 g_chartab[c++] = (dy_flags & DY_UHEX) ? 4 : 2; 97 #ifdef EBCDIC 98 while (c < 255) 99 #else 100 while (c <= '~') 101 #endif 102 g_chartab[c++] = 1 + CT_PRINT_CHAR; 103 while (c < 256) 104 { 105 /* UTF-8: bytes 0xa0 - 0xff are printable (latin1) */ 106 if (enc_utf8 && c >= 0xa0) 107 g_chartab[c++] = CT_PRINT_CHAR + 1; 108 /* euc-jp characters starting with 0x8e are single width */ 109 else if (enc_dbcs == DBCS_JPNU && c == 0x8e) 110 g_chartab[c++] = CT_PRINT_CHAR + 1; 111 /* other double-byte chars can be printable AND double-width */ 112 else if (enc_dbcs != 0 && MB_BYTE2LEN(c) == 2) 113 g_chartab[c++] = CT_PRINT_CHAR + 2; 114 else 115 /* the rest is unprintable by default */ 116 g_chartab[c++] = (dy_flags & DY_UHEX) ? 4 : 2; 117 } 118 119 /* Assume that every multi-byte char is a filename character. */ 120 for (c = 1; c < 256; ++c) 121 if ((enc_dbcs != 0 && MB_BYTE2LEN(c) > 1) 122 || (enc_dbcs == DBCS_JPNU && c == 0x8e) 123 || (enc_utf8 && c >= 0xa0)) 124 g_chartab[c] |= CT_FNAME_CHAR; 125 } 126 127 /* 128 * Init word char flags all to FALSE 129 */ 130 vim_memset(buf->b_chartab, 0, (size_t)32); 131 if (enc_dbcs != 0) 132 for (c = 0; c < 256; ++c) 133 { 134 /* double-byte characters are probably word characters */ 135 if (MB_BYTE2LEN(c) == 2) 136 SET_CHARTAB(buf, c); 137 } 138 139 #ifdef FEAT_LISP 140 /* 141 * In lisp mode the '-' character is included in keywords. 142 */ 143 if (buf->b_p_lisp) 144 SET_CHARTAB(buf, '-'); 145 #endif 146 147 /* Walk through the 'isident', 'iskeyword', 'isfname' and 'isprint' 148 * options Each option is a list of characters, character numbers or 149 * ranges, separated by commas, e.g.: "200-210,x,#-178,-" 150 */ 151 for (i = global ? 0 : 3; i <= 3; ++i) 152 { 153 if (i == 0) 154 p = p_isi; /* first round: 'isident' */ 155 else if (i == 1) 156 p = p_isp; /* second round: 'isprint' */ 157 else if (i == 2) 158 p = p_isf; /* third round: 'isfname' */ 159 else /* i == 3 */ 160 p = buf->b_p_isk; /* fourth round: 'iskeyword' */ 161 162 while (*p) 163 { 164 tilde = FALSE; 165 do_isalpha = FALSE; 166 if (*p == '^' && p[1] != NUL) 167 { 168 tilde = TRUE; 169 ++p; 170 } 171 if (VIM_ISDIGIT(*p)) 172 c = getdigits(&p); 173 else 174 if (has_mbyte) 175 c = mb_ptr2char_adv(&p); 176 else 177 c = *p++; 178 c2 = -1; 179 if (*p == '-' && p[1] != NUL) 180 { 181 ++p; 182 if (VIM_ISDIGIT(*p)) 183 c2 = getdigits(&p); 184 else 185 if (has_mbyte) 186 c2 = mb_ptr2char_adv(&p); 187 else 188 c2 = *p++; 189 } 190 if (c <= 0 || c >= 256 || (c2 < c && c2 != -1) || c2 >= 256 191 || !(*p == NUL || *p == ',')) 192 return FAIL; 193 194 if (c2 == -1) /* not a range */ 195 { 196 /* 197 * A single '@' (not "@-@"): 198 * Decide on letters being ID/printable/keyword chars with 199 * standard function isalpha(). This takes care of locale for 200 * single-byte characters). 201 */ 202 if (c == '@') 203 { 204 do_isalpha = TRUE; 205 c = 1; 206 c2 = 255; 207 } 208 else 209 c2 = c; 210 } 211 while (c <= c2) 212 { 213 /* Use the MB_ functions here, because isalpha() doesn't 214 * work properly when 'encoding' is "latin1" and the locale is 215 * "C". */ 216 if (!do_isalpha || MB_ISLOWER(c) || MB_ISUPPER(c)) 217 { 218 if (i == 0) /* (re)set ID flag */ 219 { 220 if (tilde) 221 g_chartab[c] &= ~CT_ID_CHAR; 222 else 223 g_chartab[c] |= CT_ID_CHAR; 224 } 225 else if (i == 1) /* (re)set printable */ 226 { 227 if ((c < ' ' 228 #ifndef EBCDIC 229 || c > '~' 230 #endif 231 // For double-byte we keep the cell width, so 232 // that we can detect it from the first byte. 233 ) && !(enc_dbcs && MB_BYTE2LEN(c) == 2)) 234 { 235 if (tilde) 236 { 237 g_chartab[c] = (g_chartab[c] & ~CT_CELL_MASK) 238 + ((dy_flags & DY_UHEX) ? 4 : 2); 239 g_chartab[c] &= ~CT_PRINT_CHAR; 240 } 241 else 242 { 243 g_chartab[c] = (g_chartab[c] & ~CT_CELL_MASK) + 1; 244 g_chartab[c] |= CT_PRINT_CHAR; 245 } 246 } 247 } 248 else if (i == 2) /* (re)set fname flag */ 249 { 250 if (tilde) 251 g_chartab[c] &= ~CT_FNAME_CHAR; 252 else 253 g_chartab[c] |= CT_FNAME_CHAR; 254 } 255 else /* i == 3 */ /* (re)set keyword flag */ 256 { 257 if (tilde) 258 RESET_CHARTAB(buf, c); 259 else 260 SET_CHARTAB(buf, c); 261 } 262 } 263 ++c; 264 } 265 266 c = *p; 267 p = skip_to_option_part(p); 268 if (c == ',' && *p == NUL) 269 /* Trailing comma is not allowed. */ 270 return FAIL; 271 } 272 } 273 chartab_initialized = TRUE; 274 return OK; 275 } 276 277 /* 278 * Translate any special characters in buf[bufsize] in-place. 279 * The result is a string with only printable characters, but if there is not 280 * enough room, not all characters will be translated. 281 */ 282 void 283 trans_characters( 284 char_u *buf, 285 int bufsize) 286 { 287 int len; /* length of string needing translation */ 288 int room; /* room in buffer after string */ 289 char_u *trs; /* translated character */ 290 int trs_len; /* length of trs[] */ 291 292 len = (int)STRLEN(buf); 293 room = bufsize - len; 294 while (*buf != 0) 295 { 296 /* Assume a multi-byte character doesn't need translation. */ 297 if (has_mbyte && (trs_len = (*mb_ptr2len)(buf)) > 1) 298 len -= trs_len; 299 else 300 { 301 trs = transchar_byte(*buf); 302 trs_len = (int)STRLEN(trs); 303 if (trs_len > 1) 304 { 305 room -= trs_len - 1; 306 if (room <= 0) 307 return; 308 mch_memmove(buf + trs_len, buf + 1, (size_t)len); 309 } 310 mch_memmove(buf, trs, (size_t)trs_len); 311 --len; 312 } 313 buf += trs_len; 314 } 315 } 316 317 /* 318 * Translate a string into allocated memory, replacing special chars with 319 * printable chars. Returns NULL when out of memory. 320 */ 321 char_u * 322 transstr(char_u *s) 323 { 324 char_u *res; 325 char_u *p; 326 int l, len, c; 327 char_u hexbuf[11]; 328 329 if (has_mbyte) 330 { 331 /* Compute the length of the result, taking account of unprintable 332 * multi-byte characters. */ 333 len = 0; 334 p = s; 335 while (*p != NUL) 336 { 337 if ((l = (*mb_ptr2len)(p)) > 1) 338 { 339 c = (*mb_ptr2char)(p); 340 p += l; 341 if (vim_isprintc(c)) 342 len += l; 343 else 344 { 345 transchar_hex(hexbuf, c); 346 len += (int)STRLEN(hexbuf); 347 } 348 } 349 else 350 { 351 l = byte2cells(*p++); 352 if (l > 0) 353 len += l; 354 else 355 len += 4; /* illegal byte sequence */ 356 } 357 } 358 res = alloc(len + 1); 359 } 360 else 361 res = alloc(vim_strsize(s) + 1); 362 if (res != NULL) 363 { 364 *res = NUL; 365 p = s; 366 while (*p != NUL) 367 { 368 if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1) 369 { 370 c = (*mb_ptr2char)(p); 371 if (vim_isprintc(c)) 372 STRNCAT(res, p, l); /* append printable multi-byte char */ 373 else 374 transchar_hex(res + STRLEN(res), c); 375 p += l; 376 } 377 else 378 STRCAT(res, transchar_byte(*p++)); 379 } 380 } 381 return res; 382 } 383 384 /* 385 * Convert the string "str[orglen]" to do ignore-case comparing. Uses the 386 * current locale. 387 * When "buf" is NULL returns an allocated string (NULL for out-of-memory). 388 * Otherwise puts the result in "buf[buflen]". 389 */ 390 char_u * 391 str_foldcase( 392 char_u *str, 393 int orglen, 394 char_u *buf, 395 int buflen) 396 { 397 garray_T ga; 398 int i; 399 int len = orglen; 400 401 #define GA_CHAR(i) ((char_u *)ga.ga_data)[i] 402 #define GA_PTR(i) ((char_u *)ga.ga_data + i) 403 #define STR_CHAR(i) (buf == NULL ? GA_CHAR(i) : buf[i]) 404 #define STR_PTR(i) (buf == NULL ? GA_PTR(i) : buf + i) 405 406 /* Copy "str" into "buf" or allocated memory, unmodified. */ 407 if (buf == NULL) 408 { 409 ga_init2(&ga, 1, 10); 410 if (ga_grow(&ga, len + 1) == FAIL) 411 return NULL; 412 mch_memmove(ga.ga_data, str, (size_t)len); 413 ga.ga_len = len; 414 } 415 else 416 { 417 if (len >= buflen) /* Ugly! */ 418 len = buflen - 1; 419 mch_memmove(buf, str, (size_t)len); 420 } 421 if (buf == NULL) 422 GA_CHAR(len) = NUL; 423 else 424 buf[len] = NUL; 425 426 /* Make each character lower case. */ 427 i = 0; 428 while (STR_CHAR(i) != NUL) 429 { 430 if (enc_utf8 || (has_mbyte && MB_BYTE2LEN(STR_CHAR(i)) > 1)) 431 { 432 if (enc_utf8) 433 { 434 int c = utf_ptr2char(STR_PTR(i)); 435 int olen = utf_ptr2len(STR_PTR(i)); 436 int lc = utf_tolower(c); 437 438 /* Only replace the character when it is not an invalid 439 * sequence (ASCII character or more than one byte) and 440 * utf_tolower() doesn't return the original character. */ 441 if ((c < 0x80 || olen > 1) && c != lc) 442 { 443 int nlen = utf_char2len(lc); 444 445 /* If the byte length changes need to shift the following 446 * characters forward or backward. */ 447 if (olen != nlen) 448 { 449 if (nlen > olen) 450 { 451 if (buf == NULL 452 ? ga_grow(&ga, nlen - olen + 1) == FAIL 453 : len + nlen - olen >= buflen) 454 { 455 /* out of memory, keep old char */ 456 lc = c; 457 nlen = olen; 458 } 459 } 460 if (olen != nlen) 461 { 462 if (buf == NULL) 463 { 464 STRMOVE(GA_PTR(i) + nlen, GA_PTR(i) + olen); 465 ga.ga_len += nlen - olen; 466 } 467 else 468 { 469 STRMOVE(buf + i + nlen, buf + i + olen); 470 len += nlen - olen; 471 } 472 } 473 } 474 (void)utf_char2bytes(lc, STR_PTR(i)); 475 } 476 } 477 /* skip to next multi-byte char */ 478 i += (*mb_ptr2len)(STR_PTR(i)); 479 } 480 else 481 { 482 if (buf == NULL) 483 GA_CHAR(i) = TOLOWER_LOC(GA_CHAR(i)); 484 else 485 buf[i] = TOLOWER_LOC(buf[i]); 486 ++i; 487 } 488 } 489 490 if (buf == NULL) 491 return (char_u *)ga.ga_data; 492 return buf; 493 } 494 495 /* 496 * Catch 22: g_chartab[] can't be initialized before the options are 497 * initialized, and initializing options may cause transchar() to be called! 498 * When chartab_initialized == FALSE don't use g_chartab[]. 499 * Does NOT work for multi-byte characters, c must be <= 255. 500 * Also doesn't work for the first byte of a multi-byte, "c" must be a 501 * character! 502 */ 503 static char_u transchar_buf[7]; 504 505 char_u * 506 transchar(int c) 507 { 508 int i; 509 510 i = 0; 511 if (IS_SPECIAL(c)) /* special key code, display as ~@ char */ 512 { 513 transchar_buf[0] = '~'; 514 transchar_buf[1] = '@'; 515 i = 2; 516 c = K_SECOND(c); 517 } 518 519 if ((!chartab_initialized && ( 520 #ifdef EBCDIC 521 (c >= 64 && c < 255) 522 #else 523 (c >= ' ' && c <= '~') 524 #endif 525 )) || (c < 256 && vim_isprintc_strict(c))) 526 { 527 /* printable character */ 528 transchar_buf[i] = c; 529 transchar_buf[i + 1] = NUL; 530 } 531 else 532 transchar_nonprint(transchar_buf + i, c); 533 return transchar_buf; 534 } 535 536 /* 537 * Like transchar(), but called with a byte instead of a character. Checks 538 * for an illegal UTF-8 byte. 539 */ 540 char_u * 541 transchar_byte(int c) 542 { 543 if (enc_utf8 && c >= 0x80) 544 { 545 transchar_nonprint(transchar_buf, c); 546 return transchar_buf; 547 } 548 return transchar(c); 549 } 550 551 /* 552 * Convert non-printable character to two or more printable characters in 553 * "buf[]". "buf" needs to be able to hold five bytes. 554 * Does NOT work for multi-byte characters, c must be <= 255. 555 */ 556 void 557 transchar_nonprint(char_u *buf, int c) 558 { 559 if (c == NL) 560 c = NUL; /* we use newline in place of a NUL */ 561 else if (c == CAR && get_fileformat(curbuf) == EOL_MAC) 562 c = NL; /* we use CR in place of NL in this case */ 563 564 if (dy_flags & DY_UHEX) /* 'display' has "uhex" */ 565 transchar_hex(buf, c); 566 567 #ifdef EBCDIC 568 /* For EBCDIC only the characters 0-63 and 255 are not printable */ 569 else if (CtrlChar(c) != 0 || c == DEL) 570 #else 571 else if (c <= 0x7f) /* 0x00 - 0x1f and 0x7f */ 572 #endif 573 { 574 buf[0] = '^'; 575 #ifdef EBCDIC 576 if (c == DEL) 577 buf[1] = '?'; /* DEL displayed as ^? */ 578 else 579 buf[1] = CtrlChar(c); 580 #else 581 buf[1] = c ^ 0x40; /* DEL displayed as ^? */ 582 #endif 583 584 buf[2] = NUL; 585 } 586 else if (enc_utf8 && c >= 0x80) 587 { 588 transchar_hex(buf, c); 589 } 590 #ifndef EBCDIC 591 else if (c >= ' ' + 0x80 && c <= '~' + 0x80) /* 0xa0 - 0xfe */ 592 { 593 buf[0] = '|'; 594 buf[1] = c - 0x80; 595 buf[2] = NUL; 596 } 597 #else 598 else if (c < 64) 599 { 600 buf[0] = '~'; 601 buf[1] = MetaChar(c); 602 buf[2] = NUL; 603 } 604 #endif 605 else /* 0x80 - 0x9f and 0xff */ 606 { 607 /* 608 * TODO: EBCDIC I don't know what to do with this chars, so I display 609 * them as '~?' for now 610 */ 611 buf[0] = '~'; 612 #ifdef EBCDIC 613 buf[1] = '?'; /* 0xff displayed as ~? */ 614 #else 615 buf[1] = (c - 0x80) ^ 0x40; /* 0xff displayed as ~? */ 616 #endif 617 buf[2] = NUL; 618 } 619 } 620 621 void 622 transchar_hex(char_u *buf, int c) 623 { 624 int i = 0; 625 626 buf[0] = '<'; 627 if (c > 255) 628 { 629 buf[++i] = nr2hex((unsigned)c >> 12); 630 buf[++i] = nr2hex((unsigned)c >> 8); 631 } 632 buf[++i] = nr2hex((unsigned)c >> 4); 633 buf[++i] = nr2hex((unsigned)c); 634 buf[++i] = '>'; 635 buf[++i] = NUL; 636 } 637 638 /* 639 * Convert the lower 4 bits of byte "c" to its hex character. 640 * Lower case letters are used to avoid the confusion of <F1> being 0xf1 or 641 * function key 1. 642 */ 643 static unsigned 644 nr2hex(unsigned c) 645 { 646 if ((c & 0xf) <= 9) 647 return (c & 0xf) + '0'; 648 return (c & 0xf) - 10 + 'a'; 649 } 650 651 /* 652 * Return number of display cells occupied by byte "b". 653 * Caller must make sure 0 <= b <= 255. 654 * For multi-byte mode "b" must be the first byte of a character. 655 * A TAB is counted as two cells: "^I". 656 * For UTF-8 mode this will return 0 for bytes >= 0x80, because the number of 657 * cells depends on further bytes. 658 */ 659 int 660 byte2cells(int b) 661 { 662 if (enc_utf8 && b >= 0x80) 663 return 0; 664 return (g_chartab[b] & CT_CELL_MASK); 665 } 666 667 /* 668 * Return number of display cells occupied by character "c". 669 * "c" can be a special key (negative number) in which case 3 or 4 is returned. 670 * A TAB is counted as two cells: "^I" or four: "<09>". 671 */ 672 int 673 char2cells(int c) 674 { 675 if (IS_SPECIAL(c)) 676 return char2cells(K_SECOND(c)) + 2; 677 if (c >= 0x80) 678 { 679 /* UTF-8: above 0x80 need to check the value */ 680 if (enc_utf8) 681 return utf_char2cells(c); 682 /* DBCS: double-byte means double-width, except for euc-jp with first 683 * byte 0x8e */ 684 if (enc_dbcs != 0 && c >= 0x100) 685 { 686 if (enc_dbcs == DBCS_JPNU && ((unsigned)c >> 8) == 0x8e) 687 return 1; 688 return 2; 689 } 690 } 691 return (g_chartab[c & 0xff] & CT_CELL_MASK); 692 } 693 694 /* 695 * Return number of display cells occupied by character at "*p". 696 * A TAB is counted as two cells: "^I" or four: "<09>". 697 */ 698 int 699 ptr2cells(char_u *p) 700 { 701 /* For UTF-8 we need to look at more bytes if the first byte is >= 0x80. */ 702 if (enc_utf8 && *p >= 0x80) 703 return utf_ptr2cells(p); 704 /* For DBCS we can tell the cell count from the first byte. */ 705 return (g_chartab[*p] & CT_CELL_MASK); 706 } 707 708 /* 709 * Return the number of character cells string "s" will take on the screen, 710 * counting TABs as two characters: "^I". 711 */ 712 int 713 vim_strsize(char_u *s) 714 { 715 return vim_strnsize(s, (int)MAXCOL); 716 } 717 718 /* 719 * Return the number of character cells string "s[len]" will take on the 720 * screen, counting TABs as two characters: "^I". 721 */ 722 int 723 vim_strnsize(char_u *s, int len) 724 { 725 int size = 0; 726 727 while (*s != NUL && --len >= 0) 728 if (has_mbyte) 729 { 730 int l = (*mb_ptr2len)(s); 731 732 size += ptr2cells(s); 733 s += l; 734 len -= l - 1; 735 } 736 else 737 size += byte2cells(*s++); 738 739 return size; 740 } 741 742 /* 743 * Return the number of characters 'c' will take on the screen, taking 744 * into account the size of a tab. 745 * Use a define to make it fast, this is used very often!!! 746 * Also see getvcol() below. 747 */ 748 749 #ifdef FEAT_VARTABS 750 # define RET_WIN_BUF_CHARTABSIZE(wp, buf, p, col) \ 751 if (*(p) == TAB && (!(wp)->w_p_list || lcs_tab1)) \ 752 { \ 753 return tabstop_padding(col, (buf)->b_p_ts, (buf)->b_p_vts_array); \ 754 } \ 755 else \ 756 return ptr2cells(p); 757 #else 758 # define RET_WIN_BUF_CHARTABSIZE(wp, buf, p, col) \ 759 if (*(p) == TAB && (!(wp)->w_p_list || lcs_tab1)) \ 760 { \ 761 int ts; \ 762 ts = (buf)->b_p_ts; \ 763 return (int)(ts - (col % ts)); \ 764 } \ 765 else \ 766 return ptr2cells(p); 767 #endif 768 769 int 770 chartabsize(char_u *p, colnr_T col) 771 { 772 RET_WIN_BUF_CHARTABSIZE(curwin, curbuf, p, col) 773 } 774 775 #ifdef FEAT_LINEBREAK 776 static int 777 win_chartabsize(win_T *wp, char_u *p, colnr_T col) 778 { 779 RET_WIN_BUF_CHARTABSIZE(wp, wp->w_buffer, p, col) 780 } 781 #endif 782 783 /* 784 * Return the number of characters the string 's' will take on the screen, 785 * taking into account the size of a tab. 786 */ 787 int 788 linetabsize(char_u *s) 789 { 790 return linetabsize_col(0, s); 791 } 792 793 /* 794 * Like linetabsize(), but starting at column "startcol". 795 */ 796 int 797 linetabsize_col(int startcol, char_u *s) 798 { 799 colnr_T col = startcol; 800 char_u *line = s; /* pointer to start of line, for breakindent */ 801 802 while (*s != NUL) 803 col += lbr_chartabsize_adv(line, &s, col); 804 return (int)col; 805 } 806 807 /* 808 * Like linetabsize(), but for a given window instead of the current one. 809 */ 810 int 811 win_linetabsize(win_T *wp, char_u *line, colnr_T len) 812 { 813 colnr_T col = 0; 814 char_u *s; 815 816 for (s = line; *s != NUL && (len == MAXCOL || s < line + len); 817 MB_PTR_ADV(s)) 818 col += win_lbr_chartabsize(wp, line, s, col, NULL); 819 return (int)col; 820 } 821 822 /* 823 * Return TRUE if 'c' is a normal identifier character: 824 * Letters and characters from the 'isident' option. 825 */ 826 int 827 vim_isIDc(int c) 828 { 829 return (c > 0 && c < 0x100 && (g_chartab[c] & CT_ID_CHAR)); 830 } 831 832 /* 833 * return TRUE if 'c' is a keyword character: Letters and characters from 834 * 'iskeyword' option for the current buffer. 835 * For multi-byte characters mb_get_class() is used (builtin rules). 836 */ 837 int 838 vim_iswordc(int c) 839 { 840 return vim_iswordc_buf(c, curbuf); 841 } 842 843 int 844 vim_iswordc_buf(int c, buf_T *buf) 845 { 846 if (c >= 0x100) 847 { 848 if (enc_dbcs != 0) 849 return dbcs_class((unsigned)c >> 8, (unsigned)(c & 0xff)) >= 2; 850 if (enc_utf8) 851 return utf_class_buf(c, buf) >= 2; 852 return FALSE; 853 } 854 return (c > 0 && GET_CHARTAB(buf, c) != 0); 855 } 856 857 /* 858 * Just like vim_iswordc() but uses a pointer to the (multi-byte) character. 859 */ 860 int 861 vim_iswordp(char_u *p) 862 { 863 return vim_iswordp_buf(p, curbuf); 864 } 865 866 int 867 vim_iswordp_buf(char_u *p, buf_T *buf) 868 { 869 int c = *p; 870 871 if (has_mbyte && MB_BYTE2LEN(c) > 1) 872 c = (*mb_ptr2char)(p); 873 return vim_iswordc_buf(c, buf); 874 } 875 876 /* 877 * return TRUE if 'c' is a valid file-name character 878 * Assume characters above 0x100 are valid (multi-byte). 879 */ 880 int 881 vim_isfilec(int c) 882 { 883 return (c >= 0x100 || (c > 0 && (g_chartab[c] & CT_FNAME_CHAR))); 884 } 885 886 /* 887 * return TRUE if 'c' is a valid file-name character or a wildcard character 888 * Assume characters above 0x100 are valid (multi-byte). 889 * Explicitly interpret ']' as a wildcard character as mch_has_wildcard("]") 890 * returns false. 891 */ 892 int 893 vim_isfilec_or_wc(int c) 894 { 895 char_u buf[2]; 896 897 buf[0] = (char_u)c; 898 buf[1] = NUL; 899 return vim_isfilec(c) || c == ']' || mch_has_wildcard(buf); 900 } 901 902 /* 903 * Return TRUE if 'c' is a printable character. 904 * Assume characters above 0x100 are printable (multi-byte), except for 905 * Unicode. 906 */ 907 int 908 vim_isprintc(int c) 909 { 910 if (enc_utf8 && c >= 0x100) 911 return utf_printable(c); 912 return (c >= 0x100 || (c > 0 && (g_chartab[c] & CT_PRINT_CHAR))); 913 } 914 915 /* 916 * Strict version of vim_isprintc(c), don't return TRUE if "c" is the head 917 * byte of a double-byte character. 918 */ 919 int 920 vim_isprintc_strict(int c) 921 { 922 if (enc_dbcs != 0 && c < 0x100 && MB_BYTE2LEN(c) > 1) 923 return FALSE; 924 if (enc_utf8 && c >= 0x100) 925 return utf_printable(c); 926 return (c >= 0x100 || (c > 0 && (g_chartab[c] & CT_PRINT_CHAR))); 927 } 928 929 /* 930 * like chartabsize(), but also check for line breaks on the screen 931 */ 932 int 933 lbr_chartabsize( 934 char_u *line UNUSED, /* start of the line */ 935 unsigned char *s, 936 colnr_T col) 937 { 938 #ifdef FEAT_LINEBREAK 939 if (!curwin->w_p_lbr && *p_sbr == NUL && !curwin->w_p_bri) 940 { 941 #endif 942 if (curwin->w_p_wrap) 943 return win_nolbr_chartabsize(curwin, s, col, NULL); 944 RET_WIN_BUF_CHARTABSIZE(curwin, curbuf, s, col) 945 #ifdef FEAT_LINEBREAK 946 } 947 return win_lbr_chartabsize(curwin, line == NULL ? s : line, s, col, NULL); 948 #endif 949 } 950 951 /* 952 * Call lbr_chartabsize() and advance the pointer. 953 */ 954 int 955 lbr_chartabsize_adv( 956 char_u *line, /* start of the line */ 957 char_u **s, 958 colnr_T col) 959 { 960 int retval; 961 962 retval = lbr_chartabsize(line, *s, col); 963 MB_PTR_ADV(*s); 964 return retval; 965 } 966 967 /* 968 * This function is used very often, keep it fast!!!! 969 * 970 * If "headp" not NULL, set *headp to the size of what we for 'showbreak' 971 * string at start of line. Warning: *headp is only set if it's a non-zero 972 * value, init to 0 before calling. 973 */ 974 int 975 win_lbr_chartabsize( 976 win_T *wp, 977 char_u *line UNUSED, /* start of the line */ 978 char_u *s, 979 colnr_T col, 980 int *headp UNUSED) 981 { 982 #ifdef FEAT_LINEBREAK 983 int c; 984 int size; 985 colnr_T col2; 986 colnr_T col_adj = 0; /* col + screen size of tab */ 987 colnr_T colmax; 988 int added; 989 int mb_added = 0; 990 int numberextra; 991 char_u *ps; 992 int tab_corr = (*s == TAB); 993 int n; 994 995 /* 996 * No 'linebreak', 'showbreak' and 'breakindent': return quickly. 997 */ 998 if (!wp->w_p_lbr && !wp->w_p_bri && *p_sbr == NUL) 999 #endif 1000 { 1001 if (wp->w_p_wrap) 1002 return win_nolbr_chartabsize(wp, s, col, headp); 1003 RET_WIN_BUF_CHARTABSIZE(wp, wp->w_buffer, s, col) 1004 } 1005 1006 #ifdef FEAT_LINEBREAK 1007 /* 1008 * First get normal size, without 'linebreak' 1009 */ 1010 size = win_chartabsize(wp, s, col); 1011 c = *s; 1012 if (tab_corr) 1013 col_adj = size - 1; 1014 1015 /* 1016 * If 'linebreak' set check at a blank before a non-blank if the line 1017 * needs a break here 1018 */ 1019 if (wp->w_p_lbr 1020 && VIM_ISBREAK(c) 1021 && !VIM_ISBREAK((int)s[1]) 1022 && wp->w_p_wrap 1023 && wp->w_width != 0) 1024 { 1025 /* 1026 * Count all characters from first non-blank after a blank up to next 1027 * non-blank after a blank. 1028 */ 1029 numberextra = win_col_off(wp); 1030 col2 = col; 1031 colmax = (colnr_T)(wp->w_width - numberextra - col_adj); 1032 if (col >= colmax) 1033 { 1034 colmax += col_adj; 1035 n = colmax + win_col_off2(wp); 1036 if (n > 0) 1037 colmax += (((col - colmax) / n) + 1) * n - col_adj; 1038 } 1039 1040 for (;;) 1041 { 1042 ps = s; 1043 MB_PTR_ADV(s); 1044 c = *s; 1045 if (!(c != NUL 1046 && (VIM_ISBREAK(c) 1047 || (!VIM_ISBREAK(c) 1048 && (col2 == col || !VIM_ISBREAK((int)*ps)))))) 1049 break; 1050 1051 col2 += win_chartabsize(wp, s, col2); 1052 if (col2 >= colmax) /* doesn't fit */ 1053 { 1054 size = colmax - col + col_adj; 1055 break; 1056 } 1057 } 1058 } 1059 else if (has_mbyte && size == 2 && MB_BYTE2LEN(*s) > 1 1060 && wp->w_p_wrap && in_win_border(wp, col)) 1061 { 1062 ++size; /* Count the ">" in the last column. */ 1063 mb_added = 1; 1064 } 1065 1066 /* 1067 * May have to add something for 'breakindent' and/or 'showbreak' 1068 * string at start of line. 1069 * Set *headp to the size of what we add. 1070 */ 1071 added = 0; 1072 if ((*p_sbr != NUL || wp->w_p_bri) && wp->w_p_wrap && col != 0) 1073 { 1074 colnr_T sbrlen = 0; 1075 int numberwidth = win_col_off(wp); 1076 1077 numberextra = numberwidth; 1078 col += numberextra + mb_added; 1079 if (col >= (colnr_T)wp->w_width) 1080 { 1081 col -= wp->w_width; 1082 numberextra = wp->w_width - (numberextra - win_col_off2(wp)); 1083 if (col >= numberextra && numberextra > 0) 1084 col %= numberextra; 1085 if (*p_sbr != NUL) 1086 { 1087 sbrlen = (colnr_T)MB_CHARLEN(p_sbr); 1088 if (col >= sbrlen) 1089 col -= sbrlen; 1090 } 1091 if (col >= numberextra && numberextra > 0) 1092 col = col % numberextra; 1093 else if (col > 0 && numberextra > 0) 1094 col += numberwidth - win_col_off2(wp); 1095 1096 numberwidth -= win_col_off2(wp); 1097 } 1098 if (col == 0 || col + size + sbrlen > (colnr_T)wp->w_width) 1099 { 1100 added = 0; 1101 if (*p_sbr != NUL) 1102 { 1103 if (size + sbrlen + numberwidth > (colnr_T)wp->w_width) 1104 { 1105 // calculate effective window width 1106 int width = (colnr_T)wp->w_width - sbrlen - numberwidth; 1107 int prev_width = col 1108 ? ((colnr_T)wp->w_width - (sbrlen + col)) : 0; 1109 1110 if (width <= 0) 1111 width = (colnr_T)1; 1112 added += ((size - prev_width) / width) * vim_strsize(p_sbr); 1113 if ((size - prev_width) % width) 1114 // wrapped, add another length of 'sbr' 1115 added += vim_strsize(p_sbr); 1116 } 1117 else 1118 added += vim_strsize(p_sbr); 1119 } 1120 if (wp->w_p_bri) 1121 added += get_breakindent_win(wp, line); 1122 1123 size += added; 1124 if (col != 0) 1125 added = 0; 1126 } 1127 } 1128 if (headp != NULL) 1129 *headp = added + mb_added; 1130 return size; 1131 #endif 1132 } 1133 1134 /* 1135 * Like win_lbr_chartabsize(), except that we know 'linebreak' is off and 1136 * 'wrap' is on. This means we need to check for a double-byte character that 1137 * doesn't fit at the end of the screen line. 1138 */ 1139 static int 1140 win_nolbr_chartabsize( 1141 win_T *wp, 1142 char_u *s, 1143 colnr_T col, 1144 int *headp) 1145 { 1146 int n; 1147 1148 if (*s == TAB && (!wp->w_p_list || lcs_tab1)) 1149 { 1150 # ifdef FEAT_VARTABS 1151 return tabstop_padding(col, wp->w_buffer->b_p_ts, 1152 wp->w_buffer->b_p_vts_array); 1153 # else 1154 n = wp->w_buffer->b_p_ts; 1155 return (int)(n - (col % n)); 1156 # endif 1157 } 1158 n = ptr2cells(s); 1159 /* Add one cell for a double-width character in the last column of the 1160 * window, displayed with a ">". */ 1161 if (n == 2 && MB_BYTE2LEN(*s) > 1 && in_win_border(wp, col)) 1162 { 1163 if (headp != NULL) 1164 *headp = 1; 1165 return 3; 1166 } 1167 return n; 1168 } 1169 1170 /* 1171 * Return TRUE if virtual column "vcol" is in the rightmost column of window 1172 * "wp". 1173 */ 1174 static int 1175 in_win_border(win_T *wp, colnr_T vcol) 1176 { 1177 int width1; /* width of first line (after line number) */ 1178 int width2; /* width of further lines */ 1179 1180 if (wp->w_width == 0) /* there is no border */ 1181 return FALSE; 1182 width1 = wp->w_width - win_col_off(wp); 1183 if ((int)vcol < width1 - 1) 1184 return FALSE; 1185 if ((int)vcol == width1 - 1) 1186 return TRUE; 1187 width2 = width1 + win_col_off2(wp); 1188 if (width2 <= 0) 1189 return FALSE; 1190 return ((vcol - width1) % width2 == width2 - 1); 1191 } 1192 1193 /* 1194 * Get virtual column number of pos. 1195 * start: on the first position of this character (TAB, ctrl) 1196 * cursor: where the cursor is on this character (first char, except for TAB) 1197 * end: on the last position of this character (TAB, ctrl) 1198 * 1199 * This is used very often, keep it fast! 1200 */ 1201 void 1202 getvcol( 1203 win_T *wp, 1204 pos_T *pos, 1205 colnr_T *start, 1206 colnr_T *cursor, 1207 colnr_T *end) 1208 { 1209 colnr_T vcol; 1210 char_u *ptr; /* points to current char */ 1211 char_u *posptr; /* points to char at pos->col */ 1212 char_u *line; /* start of the line */ 1213 int incr; 1214 int head; 1215 #ifdef FEAT_VARTABS 1216 int *vts = wp->w_buffer->b_p_vts_array; 1217 #endif 1218 int ts = wp->w_buffer->b_p_ts; 1219 int c; 1220 1221 vcol = 0; 1222 line = ptr = ml_get_buf(wp->w_buffer, pos->lnum, FALSE); 1223 if (pos->col == MAXCOL) 1224 posptr = NULL; /* continue until the NUL */ 1225 else 1226 { 1227 /* Special check for an empty line, which can happen on exit, when 1228 * ml_get_buf() always returns an empty string. */ 1229 if (*ptr == NUL) 1230 pos->col = 0; 1231 posptr = ptr + pos->col; 1232 if (has_mbyte) 1233 /* always start on the first byte */ 1234 posptr -= (*mb_head_off)(line, posptr); 1235 } 1236 1237 /* 1238 * This function is used very often, do some speed optimizations. 1239 * When 'list', 'linebreak', 'showbreak' and 'breakindent' are not set 1240 * use a simple loop. 1241 * Also use this when 'list' is set but tabs take their normal size. 1242 */ 1243 if ((!wp->w_p_list || lcs_tab1 != NUL) 1244 #ifdef FEAT_LINEBREAK 1245 && !wp->w_p_lbr && *p_sbr == NUL && !wp->w_p_bri 1246 #endif 1247 ) 1248 { 1249 for (;;) 1250 { 1251 head = 0; 1252 c = *ptr; 1253 /* make sure we don't go past the end of the line */ 1254 if (c == NUL) 1255 { 1256 incr = 1; /* NUL at end of line only takes one column */ 1257 break; 1258 } 1259 /* A tab gets expanded, depending on the current column */ 1260 if (c == TAB) 1261 #ifdef FEAT_VARTABS 1262 incr = tabstop_padding(vcol, ts, vts); 1263 #else 1264 incr = ts - (vcol % ts); 1265 #endif 1266 else 1267 { 1268 if (has_mbyte) 1269 { 1270 /* For utf-8, if the byte is >= 0x80, need to look at 1271 * further bytes to find the cell width. */ 1272 if (enc_utf8 && c >= 0x80) 1273 incr = utf_ptr2cells(ptr); 1274 else 1275 incr = g_chartab[c] & CT_CELL_MASK; 1276 1277 /* If a double-cell char doesn't fit at the end of a line 1278 * it wraps to the next line, it's like this char is three 1279 * cells wide. */ 1280 if (incr == 2 && wp->w_p_wrap && MB_BYTE2LEN(*ptr) > 1 1281 && in_win_border(wp, vcol)) 1282 { 1283 ++incr; 1284 head = 1; 1285 } 1286 } 1287 else 1288 incr = g_chartab[c] & CT_CELL_MASK; 1289 } 1290 1291 if (posptr != NULL && ptr >= posptr) /* character at pos->col */ 1292 break; 1293 1294 vcol += incr; 1295 MB_PTR_ADV(ptr); 1296 } 1297 } 1298 else 1299 { 1300 for (;;) 1301 { 1302 /* A tab gets expanded, depending on the current column */ 1303 head = 0; 1304 incr = win_lbr_chartabsize(wp, line, ptr, vcol, &head); 1305 /* make sure we don't go past the end of the line */ 1306 if (*ptr == NUL) 1307 { 1308 incr = 1; /* NUL at end of line only takes one column */ 1309 break; 1310 } 1311 1312 if (posptr != NULL && ptr >= posptr) /* character at pos->col */ 1313 break; 1314 1315 vcol += incr; 1316 MB_PTR_ADV(ptr); 1317 } 1318 } 1319 if (start != NULL) 1320 *start = vcol + head; 1321 if (end != NULL) 1322 *end = vcol + incr - 1; 1323 if (cursor != NULL) 1324 { 1325 if (*ptr == TAB 1326 && (State & NORMAL) 1327 && !wp->w_p_list 1328 && !virtual_active() 1329 && !(VIsual_active 1330 && (*p_sel == 'e' || LTOREQ_POS(*pos, VIsual))) 1331 ) 1332 *cursor = vcol + incr - 1; /* cursor at end */ 1333 else 1334 *cursor = vcol + head; /* cursor at start */ 1335 } 1336 } 1337 1338 /* 1339 * Get virtual cursor column in the current window, pretending 'list' is off. 1340 */ 1341 colnr_T 1342 getvcol_nolist(pos_T *posp) 1343 { 1344 int list_save = curwin->w_p_list; 1345 colnr_T vcol; 1346 1347 curwin->w_p_list = FALSE; 1348 if (posp->coladd) 1349 getvvcol(curwin, posp, NULL, &vcol, NULL); 1350 else 1351 getvcol(curwin, posp, NULL, &vcol, NULL); 1352 curwin->w_p_list = list_save; 1353 return vcol; 1354 } 1355 1356 /* 1357 * Get virtual column in virtual mode. 1358 */ 1359 void 1360 getvvcol( 1361 win_T *wp, 1362 pos_T *pos, 1363 colnr_T *start, 1364 colnr_T *cursor, 1365 colnr_T *end) 1366 { 1367 colnr_T col; 1368 colnr_T coladd; 1369 colnr_T endadd; 1370 char_u *ptr; 1371 1372 if (virtual_active()) 1373 { 1374 /* For virtual mode, only want one value */ 1375 getvcol(wp, pos, &col, NULL, NULL); 1376 1377 coladd = pos->coladd; 1378 endadd = 0; 1379 /* Cannot put the cursor on part of a wide character. */ 1380 ptr = ml_get_buf(wp->w_buffer, pos->lnum, FALSE); 1381 if (pos->col < (colnr_T)STRLEN(ptr)) 1382 { 1383 int c = (*mb_ptr2char)(ptr + pos->col); 1384 1385 if (c != TAB && vim_isprintc(c)) 1386 { 1387 endadd = (colnr_T)(char2cells(c) - 1); 1388 if (coladd > endadd) /* past end of line */ 1389 endadd = 0; 1390 else 1391 coladd = 0; 1392 } 1393 } 1394 col += coladd; 1395 if (start != NULL) 1396 *start = col; 1397 if (cursor != NULL) 1398 *cursor = col; 1399 if (end != NULL) 1400 *end = col + endadd; 1401 } 1402 else 1403 getvcol(wp, pos, start, cursor, end); 1404 } 1405 1406 /* 1407 * Get the leftmost and rightmost virtual column of pos1 and pos2. 1408 * Used for Visual block mode. 1409 */ 1410 void 1411 getvcols( 1412 win_T *wp, 1413 pos_T *pos1, 1414 pos_T *pos2, 1415 colnr_T *left, 1416 colnr_T *right) 1417 { 1418 colnr_T from1, from2, to1, to2; 1419 1420 if (LT_POSP(pos1, pos2)) 1421 { 1422 getvvcol(wp, pos1, &from1, NULL, &to1); 1423 getvvcol(wp, pos2, &from2, NULL, &to2); 1424 } 1425 else 1426 { 1427 getvvcol(wp, pos2, &from1, NULL, &to1); 1428 getvvcol(wp, pos1, &from2, NULL, &to2); 1429 } 1430 if (from2 < from1) 1431 *left = from2; 1432 else 1433 *left = from1; 1434 if (to2 > to1) 1435 { 1436 if (*p_sel == 'e' && from2 - 1 >= to1) 1437 *right = from2 - 1; 1438 else 1439 *right = to2; 1440 } 1441 else 1442 *right = to1; 1443 } 1444 1445 /* 1446 * skipwhite: skip over ' ' and '\t'. 1447 */ 1448 char_u * 1449 skipwhite(char_u *q) 1450 { 1451 char_u *p = q; 1452 1453 while (VIM_ISWHITE(*p)) /* skip to next non-white */ 1454 ++p; 1455 return p; 1456 } 1457 1458 /* 1459 * getwhitecols: return the number of whitespace 1460 * columns (bytes) at the start of a given line 1461 */ 1462 int 1463 getwhitecols_curline() 1464 { 1465 return getwhitecols(ml_get_curline()); 1466 } 1467 1468 int 1469 getwhitecols(char_u *p) 1470 { 1471 return skipwhite(p) - p; 1472 } 1473 1474 /* 1475 * skip over digits 1476 */ 1477 char_u * 1478 skipdigits(char_u *q) 1479 { 1480 char_u *p = q; 1481 1482 while (VIM_ISDIGIT(*p)) /* skip to next non-digit */ 1483 ++p; 1484 return p; 1485 } 1486 1487 #if defined(FEAT_SYN_HL) || defined(FEAT_SPELL) || defined(PROTO) 1488 /* 1489 * skip over binary digits 1490 */ 1491 char_u * 1492 skipbin(char_u *q) 1493 { 1494 char_u *p = q; 1495 1496 while (vim_isbdigit(*p)) /* skip to next non-digit */ 1497 ++p; 1498 return p; 1499 } 1500 1501 /* 1502 * skip over digits and hex characters 1503 */ 1504 char_u * 1505 skiphex(char_u *q) 1506 { 1507 char_u *p = q; 1508 1509 while (vim_isxdigit(*p)) /* skip to next non-digit */ 1510 ++p; 1511 return p; 1512 } 1513 #endif 1514 1515 /* 1516 * skip to bin digit (or NUL after the string) 1517 */ 1518 char_u * 1519 skiptobin(char_u *q) 1520 { 1521 char_u *p = q; 1522 1523 while (*p != NUL && !vim_isbdigit(*p)) /* skip to next digit */ 1524 ++p; 1525 return p; 1526 } 1527 1528 /* 1529 * skip to digit (or NUL after the string) 1530 */ 1531 char_u * 1532 skiptodigit(char_u *q) 1533 { 1534 char_u *p = q; 1535 1536 while (*p != NUL && !VIM_ISDIGIT(*p)) /* skip to next digit */ 1537 ++p; 1538 return p; 1539 } 1540 1541 /* 1542 * skip to hex character (or NUL after the string) 1543 */ 1544 char_u * 1545 skiptohex(char_u *q) 1546 { 1547 char_u *p = q; 1548 1549 while (*p != NUL && !vim_isxdigit(*p)) /* skip to next digit */ 1550 ++p; 1551 return p; 1552 } 1553 1554 /* 1555 * Variant of isdigit() that can handle characters > 0x100. 1556 * We don't use isdigit() here, because on some systems it also considers 1557 * superscript 1 to be a digit. 1558 * Use the VIM_ISDIGIT() macro for simple arguments. 1559 */ 1560 int 1561 vim_isdigit(int c) 1562 { 1563 return (c >= '0' && c <= '9'); 1564 } 1565 1566 /* 1567 * Variant of isxdigit() that can handle characters > 0x100. 1568 * We don't use isxdigit() here, because on some systems it also considers 1569 * superscript 1 to be a digit. 1570 */ 1571 int 1572 vim_isxdigit(int c) 1573 { 1574 return (c >= '0' && c <= '9') 1575 || (c >= 'a' && c <= 'f') 1576 || (c >= 'A' && c <= 'F'); 1577 } 1578 1579 /* 1580 * Corollary of vim_isdigit and vim_isxdigit() that can handle 1581 * characters > 0x100. 1582 */ 1583 int 1584 vim_isbdigit(int c) 1585 { 1586 return (c == '0' || c == '1'); 1587 } 1588 1589 /* 1590 * Vim's own character class functions. These exist because many library 1591 * islower()/toupper() etc. do not work properly: they crash when used with 1592 * invalid values or can't handle latin1 when the locale is C. 1593 * Speed is most important here. 1594 */ 1595 #define LATIN1LOWER 'l' 1596 #define LATIN1UPPER 'U' 1597 1598 static char_u latin1flags[257] = " UUUUUUUUUUUUUUUUUUUUUUUUUU llllllllllllllllllllllllll UUUUUUUUUUUUUUUUUUUUUUU UUUUUUUllllllllllllllllllllllll llllllll"; 1599 static char_u latin1upper[257] = " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`ABCDEFGHIJKLMNOPQRSTUVWXYZ{|}~\x7f\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xf7\xd8\xd9\xda\xdb\xdc\xdd\xde\xff"; 1600 static char_u latin1lower[257] = " !\"#$%&'()*+,-./0123456789:;<=>?@abcdefghijklmnopqrstuvwxyz[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x7f\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xd7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"; 1601 1602 int 1603 vim_islower(int c) 1604 { 1605 if (c <= '@') 1606 return FALSE; 1607 if (c >= 0x80) 1608 { 1609 if (enc_utf8) 1610 return utf_islower(c); 1611 if (c >= 0x100) 1612 { 1613 #ifdef HAVE_ISWLOWER 1614 if (has_mbyte) 1615 return iswlower(c); 1616 #endif 1617 /* islower() can't handle these chars and may crash */ 1618 return FALSE; 1619 } 1620 if (enc_latin1like) 1621 return (latin1flags[c] & LATIN1LOWER) == LATIN1LOWER; 1622 } 1623 return islower(c); 1624 } 1625 1626 int 1627 vim_isupper(int c) 1628 { 1629 if (c <= '@') 1630 return FALSE; 1631 if (c >= 0x80) 1632 { 1633 if (enc_utf8) 1634 return utf_isupper(c); 1635 if (c >= 0x100) 1636 { 1637 #ifdef HAVE_ISWUPPER 1638 if (has_mbyte) 1639 return iswupper(c); 1640 #endif 1641 /* islower() can't handle these chars and may crash */ 1642 return FALSE; 1643 } 1644 if (enc_latin1like) 1645 return (latin1flags[c] & LATIN1UPPER) == LATIN1UPPER; 1646 } 1647 return isupper(c); 1648 } 1649 1650 int 1651 vim_toupper(int c) 1652 { 1653 if (c <= '@') 1654 return c; 1655 if (c >= 0x80 || !(cmp_flags & CMP_KEEPASCII)) 1656 { 1657 if (enc_utf8) 1658 return utf_toupper(c); 1659 if (c >= 0x100) 1660 { 1661 #ifdef HAVE_TOWUPPER 1662 if (has_mbyte) 1663 return towupper(c); 1664 #endif 1665 /* toupper() can't handle these chars and may crash */ 1666 return c; 1667 } 1668 if (enc_latin1like) 1669 return latin1upper[c]; 1670 } 1671 if (c < 0x80 && (cmp_flags & CMP_KEEPASCII)) 1672 return TOUPPER_ASC(c); 1673 return TOUPPER_LOC(c); 1674 } 1675 1676 int 1677 vim_tolower(int c) 1678 { 1679 if (c <= '@') 1680 return c; 1681 if (c >= 0x80 || !(cmp_flags & CMP_KEEPASCII)) 1682 { 1683 if (enc_utf8) 1684 return utf_tolower(c); 1685 if (c >= 0x100) 1686 { 1687 #ifdef HAVE_TOWLOWER 1688 if (has_mbyte) 1689 return towlower(c); 1690 #endif 1691 /* tolower() can't handle these chars and may crash */ 1692 return c; 1693 } 1694 if (enc_latin1like) 1695 return latin1lower[c]; 1696 } 1697 if (c < 0x80 && (cmp_flags & CMP_KEEPASCII)) 1698 return TOLOWER_ASC(c); 1699 return TOLOWER_LOC(c); 1700 } 1701 1702 /* 1703 * skiptowhite: skip over text until ' ' or '\t' or NUL. 1704 */ 1705 char_u * 1706 skiptowhite(char_u *p) 1707 { 1708 while (*p != ' ' && *p != '\t' && *p != NUL) 1709 ++p; 1710 return p; 1711 } 1712 1713 /* 1714 * skiptowhite_esc: Like skiptowhite(), but also skip escaped chars 1715 */ 1716 char_u * 1717 skiptowhite_esc(char_u *p) 1718 { 1719 while (*p != ' ' && *p != '\t' && *p != NUL) 1720 { 1721 if ((*p == '\\' || *p == Ctrl_V) && *(p + 1) != NUL) 1722 ++p; 1723 ++p; 1724 } 1725 return p; 1726 } 1727 1728 /* 1729 * Getdigits: Get a number from a string and skip over it. 1730 * Note: the argument is a pointer to a char_u pointer! 1731 */ 1732 long 1733 getdigits(char_u **pp) 1734 { 1735 char_u *p; 1736 long retval; 1737 1738 p = *pp; 1739 retval = atol((char *)p); 1740 if (*p == '-') /* skip negative sign */ 1741 ++p; 1742 p = skipdigits(p); /* skip to next non-digit */ 1743 *pp = p; 1744 return retval; 1745 } 1746 1747 /* 1748 * Return TRUE if "lbuf" is empty or only contains blanks. 1749 */ 1750 int 1751 vim_isblankline(char_u *lbuf) 1752 { 1753 char_u *p; 1754 1755 p = skipwhite(lbuf); 1756 return (*p == NUL || *p == '\r' || *p == '\n'); 1757 } 1758 1759 /* 1760 * Convert a string into a long and/or unsigned long, taking care of 1761 * hexadecimal, octal, and binary numbers. Accepts a '-' sign. 1762 * If "prep" is not NULL, returns a flag to indicate the type of the number: 1763 * 0 decimal 1764 * '0' octal 1765 * 'B' bin 1766 * 'b' bin 1767 * 'X' hex 1768 * 'x' hex 1769 * If "len" is not NULL, the length of the number in characters is returned. 1770 * If "nptr" is not NULL, the signed result is returned in it. 1771 * If "unptr" is not NULL, the unsigned result is returned in it. 1772 * If "what" contains STR2NR_BIN recognize binary numbers 1773 * If "what" contains STR2NR_OCT recognize octal numbers 1774 * If "what" contains STR2NR_HEX recognize hex numbers 1775 * If "what" contains STR2NR_FORCE always assume bin/oct/hex. 1776 * If "what" contains STR2NR_QUOTE ignore embedded single quotes 1777 * If maxlen > 0, check at a maximum maxlen chars. 1778 * If strict is TRUE, check the number strictly. return *len = 0 if fail. 1779 */ 1780 void 1781 vim_str2nr( 1782 char_u *start, 1783 int *prep, // return: type of number 0 = decimal, 'x' 1784 // or 'X' is hex, '0' = octal, 'b' or 'B' 1785 // is bin 1786 int *len, // return: detected length of number 1787 int what, // what numbers to recognize 1788 varnumber_T *nptr, // return: signed result 1789 uvarnumber_T *unptr, // return: unsigned result 1790 int maxlen, // max length of string to check 1791 int strict) // check strictly 1792 { 1793 char_u *ptr = start; 1794 int pre = 0; // default is decimal 1795 int negative = FALSE; 1796 uvarnumber_T un = 0; 1797 int n; 1798 1799 if (len != NULL) 1800 *len = 0; 1801 1802 if (ptr[0] == '-') 1803 { 1804 negative = TRUE; 1805 ++ptr; 1806 } 1807 1808 /* Recognize hex, octal, and bin. */ 1809 if (ptr[0] == '0' && ptr[1] != '8' && ptr[1] != '9' 1810 && (maxlen == 0 || maxlen > 1)) 1811 { 1812 pre = ptr[1]; 1813 if ((what & STR2NR_HEX) 1814 && (pre == 'X' || pre == 'x') && vim_isxdigit(ptr[2]) 1815 && (maxlen == 0 || maxlen > 2)) 1816 /* hexadecimal */ 1817 ptr += 2; 1818 else if ((what & STR2NR_BIN) 1819 && (pre == 'B' || pre == 'b') && vim_isbdigit(ptr[2]) 1820 && (maxlen == 0 || maxlen > 2)) 1821 /* binary */ 1822 ptr += 2; 1823 else 1824 { 1825 /* decimal or octal, default is decimal */ 1826 pre = 0; 1827 if (what & STR2NR_OCT) 1828 { 1829 /* Don't interpret "0", "08" or "0129" as octal. */ 1830 for (n = 1; n != maxlen && VIM_ISDIGIT(ptr[n]); ++n) 1831 { 1832 if (ptr[n] > '7') 1833 { 1834 pre = 0; /* can't be octal */ 1835 break; 1836 } 1837 pre = '0'; /* assume octal */ 1838 } 1839 } 1840 } 1841 } 1842 1843 // Do the conversion manually to avoid sscanf() quirks. 1844 n = 1; 1845 if (pre == 'B' || pre == 'b' 1846 || ((what & STR2NR_BIN) && (what & STR2NR_FORCE))) 1847 { 1848 /* bin */ 1849 if (pre != 0) 1850 n += 2; /* skip over "0b" */ 1851 while ('0' <= *ptr && *ptr <= '1') 1852 { 1853 /* avoid ubsan error for overflow */ 1854 if (un <= UVARNUM_MAX / 2) 1855 un = 2 * un + (uvarnumber_T)(*ptr - '0'); 1856 else 1857 un = UVARNUM_MAX; 1858 ++ptr; 1859 if (n++ == maxlen) 1860 break; 1861 if ((what & STR2NR_QUOTE) && *ptr == '\'' 1862 && '0' <= ptr[1] && ptr[1] <= '1') 1863 { 1864 ++ptr; 1865 if (n++ == maxlen) 1866 break; 1867 } 1868 } 1869 } 1870 else if (pre == '0' || ((what & STR2NR_OCT) && (what & STR2NR_FORCE))) 1871 { 1872 /* octal */ 1873 while ('0' <= *ptr && *ptr <= '7') 1874 { 1875 /* avoid ubsan error for overflow */ 1876 if (un <= UVARNUM_MAX / 8) 1877 un = 8 * un + (uvarnumber_T)(*ptr - '0'); 1878 else 1879 un = UVARNUM_MAX; 1880 ++ptr; 1881 if (n++ == maxlen) 1882 break; 1883 if ((what & STR2NR_QUOTE) && *ptr == '\'' 1884 && '0' <= ptr[1] && ptr[1] <= '7') 1885 { 1886 ++ptr; 1887 if (n++ == maxlen) 1888 break; 1889 } 1890 } 1891 } 1892 else if (pre != 0 || ((what & STR2NR_HEX) && (what & STR2NR_FORCE))) 1893 { 1894 /* hex */ 1895 if (pre != 0) 1896 n += 2; /* skip over "0x" */ 1897 while (vim_isxdigit(*ptr)) 1898 { 1899 /* avoid ubsan error for overflow */ 1900 if (un <= UVARNUM_MAX / 16) 1901 un = 16 * un + (uvarnumber_T)hex2nr(*ptr); 1902 else 1903 un = UVARNUM_MAX; 1904 ++ptr; 1905 if (n++ == maxlen) 1906 break; 1907 if ((what & STR2NR_QUOTE) && *ptr == '\'' && vim_isxdigit(ptr[1])) 1908 { 1909 ++ptr; 1910 if (n++ == maxlen) 1911 break; 1912 } 1913 } 1914 } 1915 else 1916 { 1917 /* decimal */ 1918 while (VIM_ISDIGIT(*ptr)) 1919 { 1920 uvarnumber_T digit = (uvarnumber_T)(*ptr - '0'); 1921 1922 /* avoid ubsan error for overflow */ 1923 if (un < UVARNUM_MAX / 10 1924 || (un == UVARNUM_MAX / 10 && digit <= UVARNUM_MAX % 10)) 1925 un = 10 * un + digit; 1926 else 1927 un = UVARNUM_MAX; 1928 ++ptr; 1929 if (n++ == maxlen) 1930 break; 1931 if ((what & STR2NR_QUOTE) && *ptr == '\'' && VIM_ISDIGIT(ptr[1])) 1932 { 1933 ++ptr; 1934 if (n++ == maxlen) 1935 break; 1936 } 1937 } 1938 } 1939 1940 // Check for an alpha-numeric character immediately following, that is 1941 // most likely a typo. 1942 if (strict && n - 1 != maxlen && ASCII_ISALNUM(*ptr)) 1943 return; 1944 1945 if (prep != NULL) 1946 *prep = pre; 1947 if (len != NULL) 1948 *len = (int)(ptr - start); 1949 if (nptr != NULL) 1950 { 1951 if (negative) /* account for leading '-' for decimal numbers */ 1952 { 1953 /* avoid ubsan error for overflow */ 1954 if (un > VARNUM_MAX) 1955 *nptr = VARNUM_MIN; 1956 else 1957 *nptr = -(varnumber_T)un; 1958 } 1959 else 1960 { 1961 if (un > VARNUM_MAX) 1962 un = VARNUM_MAX; 1963 *nptr = (varnumber_T)un; 1964 } 1965 } 1966 if (unptr != NULL) 1967 *unptr = un; 1968 } 1969 1970 /* 1971 * Return the value of a single hex character. 1972 * Only valid when the argument is '0' - '9', 'A' - 'F' or 'a' - 'f'. 1973 */ 1974 int 1975 hex2nr(int c) 1976 { 1977 if (c >= 'a' && c <= 'f') 1978 return c - 'a' + 10; 1979 if (c >= 'A' && c <= 'F') 1980 return c - 'A' + 10; 1981 return c - '0'; 1982 } 1983 1984 #if defined(FEAT_TERMRESPONSE) || defined(FEAT_GUI_GTK) || defined(PROTO) 1985 /* 1986 * Convert two hex characters to a byte. 1987 * Return -1 if one of the characters is not hex. 1988 */ 1989 int 1990 hexhex2nr(char_u *p) 1991 { 1992 if (!vim_isxdigit(p[0]) || !vim_isxdigit(p[1])) 1993 return -1; 1994 return (hex2nr(p[0]) << 4) + hex2nr(p[1]); 1995 } 1996 #endif 1997 1998 /* 1999 * Return TRUE if "str" starts with a backslash that should be removed. 2000 * For MS-DOS, MSWIN and OS/2 this is only done when the character after the 2001 * backslash is not a normal file name character. 2002 * '$' is a valid file name character, we don't remove the backslash before 2003 * it. This means it is not possible to use an environment variable after a 2004 * backslash. "C:\$VIM\doc" is taken literally, only "$VIM\doc" works. 2005 * Although "\ name" is valid, the backslash in "Program\ files" must be 2006 * removed. Assume a file name doesn't start with a space. 2007 * For multi-byte names, never remove a backslash before a non-ascii 2008 * character, assume that all multi-byte characters are valid file name 2009 * characters. 2010 */ 2011 int 2012 rem_backslash(char_u *str) 2013 { 2014 #ifdef BACKSLASH_IN_FILENAME 2015 return (str[0] == '\\' 2016 && str[1] < 0x80 2017 && (str[1] == ' ' 2018 || (str[1] != NUL 2019 && str[1] != '*' 2020 && str[1] != '?' 2021 && !vim_isfilec(str[1])))); 2022 #else 2023 return (str[0] == '\\' && str[1] != NUL); 2024 #endif 2025 } 2026 2027 /* 2028 * Halve the number of backslashes in a file name argument. 2029 * For MS-DOS we only do this if the character after the backslash 2030 * is not a normal file character. 2031 */ 2032 void 2033 backslash_halve(char_u *p) 2034 { 2035 for ( ; *p; ++p) 2036 if (rem_backslash(p)) 2037 STRMOVE(p, p + 1); 2038 } 2039 2040 /* 2041 * backslash_halve() plus save the result in allocated memory. 2042 * However, returns "p" when out of memory. 2043 */ 2044 char_u * 2045 backslash_halve_save(char_u *p) 2046 { 2047 char_u *res; 2048 2049 res = vim_strsave(p); 2050 if (res == NULL) 2051 return p; 2052 backslash_halve(res); 2053 return res; 2054 } 2055 2056 #if (defined(EBCDIC) && defined(FEAT_POSTSCRIPT)) || defined(PROTO) 2057 /* 2058 * Table for EBCDIC to ASCII conversion unashamedly taken from xxd.c! 2059 * The first 64 entries have been added to map control characters defined in 2060 * ascii.h 2061 */ 2062 static char_u ebcdic2ascii_tab[256] = 2063 { 2064 0000, 0001, 0002, 0003, 0004, 0011, 0006, 0177, 2065 0010, 0011, 0012, 0013, 0014, 0015, 0016, 0017, 2066 0020, 0021, 0022, 0023, 0024, 0012, 0010, 0027, 2067 0030, 0031, 0032, 0033, 0033, 0035, 0036, 0037, 2068 0040, 0041, 0042, 0043, 0044, 0045, 0046, 0047, 2069 0050, 0051, 0052, 0053, 0054, 0055, 0056, 0057, 2070 0060, 0061, 0062, 0063, 0064, 0065, 0066, 0067, 2071 0070, 0071, 0072, 0073, 0074, 0075, 0076, 0077, 2072 0040, 0240, 0241, 0242, 0243, 0244, 0245, 0246, 2073 0247, 0250, 0325, 0056, 0074, 0050, 0053, 0174, 2074 0046, 0251, 0252, 0253, 0254, 0255, 0256, 0257, 2075 0260, 0261, 0041, 0044, 0052, 0051, 0073, 0176, 2076 0055, 0057, 0262, 0263, 0264, 0265, 0266, 0267, 2077 0270, 0271, 0313, 0054, 0045, 0137, 0076, 0077, 2078 0272, 0273, 0274, 0275, 0276, 0277, 0300, 0301, 2079 0302, 0140, 0072, 0043, 0100, 0047, 0075, 0042, 2080 0303, 0141, 0142, 0143, 0144, 0145, 0146, 0147, 2081 0150, 0151, 0304, 0305, 0306, 0307, 0310, 0311, 2082 0312, 0152, 0153, 0154, 0155, 0156, 0157, 0160, 2083 0161, 0162, 0136, 0314, 0315, 0316, 0317, 0320, 2084 0321, 0345, 0163, 0164, 0165, 0166, 0167, 0170, 2085 0171, 0172, 0322, 0323, 0324, 0133, 0326, 0327, 2086 0330, 0331, 0332, 0333, 0334, 0335, 0336, 0337, 2087 0340, 0341, 0342, 0343, 0344, 0135, 0346, 0347, 2088 0173, 0101, 0102, 0103, 0104, 0105, 0106, 0107, 2089 0110, 0111, 0350, 0351, 0352, 0353, 0354, 0355, 2090 0175, 0112, 0113, 0114, 0115, 0116, 0117, 0120, 2091 0121, 0122, 0356, 0357, 0360, 0361, 0362, 0363, 2092 0134, 0237, 0123, 0124, 0125, 0126, 0127, 0130, 2093 0131, 0132, 0364, 0365, 0366, 0367, 0370, 0371, 2094 0060, 0061, 0062, 0063, 0064, 0065, 0066, 0067, 2095 0070, 0071, 0372, 0373, 0374, 0375, 0376, 0377 2096 }; 2097 2098 /* 2099 * Convert a buffer worth of characters from EBCDIC to ASCII. Only useful if 2100 * wanting 7-bit ASCII characters out the other end. 2101 */ 2102 void 2103 ebcdic2ascii(char_u *buffer, int len) 2104 { 2105 int i; 2106 2107 for (i = 0; i < len; i++) 2108 buffer[i] = ebcdic2ascii_tab[buffer[i]]; 2109 } 2110 #endif 2111