1 /* vi:set ts=8 sts=4 sw=4: 2 * 3 * VIM - Vi IMproved by Bram Moolenaar 4 * 5 * Do ":help uganda" in Vim to read copying and usage conditions. 6 * Do ":help credits" in Vim to see a list of people who contributed. 7 * See README.txt for an overview of the Vim source code. 8 */ 9 10 #include "vim.h" 11 12 #ifdef FEAT_LINEBREAK 13 static int win_chartabsize __ARGS((win_T *wp, char_u *p, colnr_T col)); 14 #endif 15 16 #ifdef FEAT_MBYTE 17 # if defined(HAVE_WCHAR_H) 18 # include <wchar.h> /* for towupper() and towlower() */ 19 # endif 20 static int win_nolbr_chartabsize __ARGS((win_T *wp, char_u *s, colnr_T col, int *headp)); 21 #endif 22 23 static unsigned nr2hex __ARGS((unsigned c)); 24 25 static int chartab_initialized = FALSE; 26 27 /* b_chartab[] is an array of 32 bytes, each bit representing one of the 28 * characters 0-255. */ 29 #define SET_CHARTAB(buf, c) (buf)->b_chartab[(unsigned)(c) >> 3] |= (1 << ((c) & 0x7)) 30 #define RESET_CHARTAB(buf, c) (buf)->b_chartab[(unsigned)(c) >> 3] &= ~(1 << ((c) & 0x7)) 31 #define GET_CHARTAB(buf, c) ((buf)->b_chartab[(unsigned)(c) >> 3] & (1 << ((c) & 0x7))) 32 33 /* 34 * Fill chartab[]. Also fills curbuf->b_chartab[] with flags for keyword 35 * characters for current buffer. 36 * 37 * Depends on the option settings 'iskeyword', 'isident', 'isfname', 38 * 'isprint' and 'encoding'. 39 * 40 * The index in chartab[] depends on 'encoding': 41 * - For non-multi-byte index with the byte (same as the character). 42 * - For DBCS index with the first byte. 43 * - For UTF-8 index with the character (when first byte is up to 0x80 it is 44 * the same as the character, if the first byte is 0x80 and above it depends 45 * on further bytes). 46 * 47 * The contents of chartab[]: 48 * - The lower two bits, masked by CT_CELL_MASK, give the number of display 49 * cells the character occupies (1 or 2). Not valid for UTF-8 above 0x80. 50 * - CT_PRINT_CHAR bit is set when the character is printable (no need to 51 * translate the character before displaying it). Note that only DBCS 52 * characters can have 2 display cells and still be printable. 53 * - CT_FNAME_CHAR bit is set when the character can be in a file name. 54 * - CT_ID_CHAR bit is set when the character can be in an identifier. 55 * 56 * Return FAIL if 'iskeyword', 'isident', 'isfname' or 'isprint' option has an 57 * error, OK otherwise. 58 */ 59 int 60 init_chartab() 61 { 62 return buf_init_chartab(curbuf, TRUE); 63 } 64 65 int 66 buf_init_chartab(buf, global) 67 buf_T *buf; 68 int global; /* FALSE: only set buf->b_chartab[] */ 69 { 70 int c; 71 int c2; 72 char_u *p; 73 int i; 74 int tilde; 75 int do_isalpha; 76 77 if (global) 78 { 79 /* 80 * Set the default size for printable characters: 81 * From <Space> to '~' is 1 (printable), others are 2 (not printable). 82 * This also inits all 'isident' and 'isfname' flags to FALSE. 83 * 84 * EBCDIC: all chars below ' ' are not printable, all others are 85 * printable. 86 */ 87 c = 0; 88 while (c < ' ') 89 chartab[c++] = (dy_flags & DY_UHEX) ? 4 : 2; 90 #ifdef EBCDIC 91 while (c < 255) 92 #else 93 while (c <= '~') 94 #endif 95 chartab[c++] = 1 + CT_PRINT_CHAR; 96 #ifdef FEAT_FKMAP 97 if (p_altkeymap) 98 { 99 while (c < YE) 100 chartab[c++] = 1 + CT_PRINT_CHAR; 101 } 102 #endif 103 while (c < 256) 104 { 105 #ifdef FEAT_MBYTE 106 /* UTF-8: bytes 0xa0 - 0xff are printable (latin1) */ 107 if (enc_utf8 && c >= 0xa0) 108 chartab[c++] = CT_PRINT_CHAR + 1; 109 /* euc-jp characters starting with 0x8e are single width */ 110 else if (enc_dbcs == DBCS_JPNU && c == 0x8e) 111 chartab[c++] = CT_PRINT_CHAR + 1; 112 /* other double-byte chars can be printable AND double-width */ 113 else if (enc_dbcs != 0 && MB_BYTE2LEN(c) == 2) 114 chartab[c++] = CT_PRINT_CHAR + 2; 115 else 116 #endif 117 /* the rest is unprintable by default */ 118 chartab[c++] = (dy_flags & DY_UHEX) ? 4 : 2; 119 } 120 121 #ifdef FEAT_MBYTE 122 /* Assume that every multi-byte char is a filename character. */ 123 for (c = 1; c < 256; ++c) 124 if ((enc_dbcs != 0 && MB_BYTE2LEN(c) > 1) 125 || (enc_dbcs == DBCS_JPNU && c == 0x8e) 126 || (enc_utf8 && c >= 0xa0)) 127 chartab[c] |= CT_FNAME_CHAR; 128 #endif 129 } 130 131 /* 132 * Init word char flags all to FALSE 133 */ 134 vim_memset(buf->b_chartab, 0, (size_t)32); 135 #ifdef FEAT_MBYTE 136 if (enc_dbcs != 0) 137 for (c = 0; c < 256; ++c) 138 { 139 /* double-byte characters are probably word characters */ 140 if (MB_BYTE2LEN(c) == 2) 141 SET_CHARTAB(buf, c); 142 } 143 #endif 144 145 #ifdef FEAT_LISP 146 /* 147 * In lisp mode the '-' character is included in keywords. 148 */ 149 if (buf->b_p_lisp) 150 SET_CHARTAB(buf, '-'); 151 #endif 152 153 /* Walk through the 'isident', 'iskeyword', 'isfname' and 'isprint' 154 * options Each option is a list of characters, character numbers or 155 * ranges, separated by commas, e.g.: "200-210,x,#-178,-" 156 */ 157 for (i = global ? 0 : 3; i <= 3; ++i) 158 { 159 if (i == 0) 160 p = p_isi; /* first round: 'isident' */ 161 else if (i == 1) 162 p = p_isp; /* second round: 'isprint' */ 163 else if (i == 2) 164 p = p_isf; /* third round: 'isfname' */ 165 else /* i == 3 */ 166 p = buf->b_p_isk; /* fourth round: 'iskeyword' */ 167 168 while (*p) 169 { 170 tilde = FALSE; 171 do_isalpha = FALSE; 172 if (*p == '^' && p[1] != NUL) 173 { 174 tilde = TRUE; 175 ++p; 176 } 177 if (VIM_ISDIGIT(*p)) 178 c = getdigits(&p); 179 else 180 #ifdef FEAT_MBYTE 181 if (has_mbyte) 182 c = mb_ptr2char_adv(&p); 183 else 184 #endif 185 c = *p++; 186 c2 = -1; 187 if (*p == '-' && p[1] != NUL) 188 { 189 ++p; 190 if (VIM_ISDIGIT(*p)) 191 c2 = getdigits(&p); 192 else 193 #ifdef FEAT_MBYTE 194 if (has_mbyte) 195 c2 = mb_ptr2char_adv(&p); 196 else 197 #endif 198 c2 = *p++; 199 } 200 if (c <= 0 || c >= 256 || (c2 < c && c2 != -1) || c2 >= 256 201 || !(*p == NUL || *p == ',')) 202 return FAIL; 203 204 if (c2 == -1) /* not a range */ 205 { 206 /* 207 * A single '@' (not "@-@"): 208 * Decide on letters being ID/printable/keyword chars with 209 * standard function isalpha(). This takes care of locale for 210 * single-byte characters). 211 */ 212 if (c == '@') 213 { 214 do_isalpha = TRUE; 215 c = 1; 216 c2 = 255; 217 } 218 else 219 c2 = c; 220 } 221 while (c <= c2) 222 { 223 /* Use the MB_ functions here, because isalpha() doesn't 224 * work properly when 'encoding' is "latin1" and the locale is 225 * "C". */ 226 if (!do_isalpha || MB_ISLOWER(c) || MB_ISUPPER(c) 227 #ifdef FEAT_FKMAP 228 || (p_altkeymap && (F_isalpha(c) || F_isdigit(c))) 229 #endif 230 ) 231 { 232 if (i == 0) /* (re)set ID flag */ 233 { 234 if (tilde) 235 chartab[c] &= ~CT_ID_CHAR; 236 else 237 chartab[c] |= CT_ID_CHAR; 238 } 239 else if (i == 1) /* (re)set printable */ 240 { 241 if ((c < ' ' 242 #ifndef EBCDIC 243 || c > '~' 244 #endif 245 #ifdef FEAT_FKMAP 246 || (p_altkeymap 247 && (F_isalpha(c) || F_isdigit(c))) 248 #endif 249 ) 250 #ifdef FEAT_MBYTE 251 /* For double-byte we keep the cell width, so 252 * that we can detect it from the first byte. */ 253 && !(enc_dbcs && MB_BYTE2LEN(c) == 2) 254 #endif 255 ) 256 { 257 if (tilde) 258 { 259 chartab[c] = (chartab[c] & ~CT_CELL_MASK) 260 + ((dy_flags & DY_UHEX) ? 4 : 2); 261 chartab[c] &= ~CT_PRINT_CHAR; 262 } 263 else 264 { 265 chartab[c] = (chartab[c] & ~CT_CELL_MASK) + 1; 266 chartab[c] |= CT_PRINT_CHAR; 267 } 268 } 269 } 270 else if (i == 2) /* (re)set fname flag */ 271 { 272 if (tilde) 273 chartab[c] &= ~CT_FNAME_CHAR; 274 else 275 chartab[c] |= CT_FNAME_CHAR; 276 } 277 else /* i == 3 */ /* (re)set keyword flag */ 278 { 279 if (tilde) 280 RESET_CHARTAB(buf, c); 281 else 282 SET_CHARTAB(buf, c); 283 } 284 } 285 ++c; 286 } 287 288 c = *p; 289 p = skip_to_option_part(p); 290 if (c == ',' && *p == NUL) 291 /* Trailing comma is not allowed. */ 292 return FAIL; 293 } 294 } 295 chartab_initialized = TRUE; 296 return OK; 297 } 298 299 /* 300 * Translate any special characters in buf[bufsize] in-place. 301 * The result is a string with only printable characters, but if there is not 302 * enough room, not all characters will be translated. 303 */ 304 void 305 trans_characters(buf, bufsize) 306 char_u *buf; 307 int bufsize; 308 { 309 int len; /* length of string needing translation */ 310 int room; /* room in buffer after string */ 311 char_u *trs; /* translated character */ 312 int trs_len; /* length of trs[] */ 313 314 len = (int)STRLEN(buf); 315 room = bufsize - len; 316 while (*buf != 0) 317 { 318 # ifdef FEAT_MBYTE 319 /* Assume a multi-byte character doesn't need translation. */ 320 if (has_mbyte && (trs_len = (*mb_ptr2len)(buf)) > 1) 321 len -= trs_len; 322 else 323 # endif 324 { 325 trs = transchar_byte(*buf); 326 trs_len = (int)STRLEN(trs); 327 if (trs_len > 1) 328 { 329 room -= trs_len - 1; 330 if (room <= 0) 331 return; 332 mch_memmove(buf + trs_len, buf + 1, (size_t)len); 333 } 334 mch_memmove(buf, trs, (size_t)trs_len); 335 --len; 336 } 337 buf += trs_len; 338 } 339 } 340 341 #if defined(FEAT_EVAL) || defined(FEAT_TITLE) || defined(FEAT_INS_EXPAND) \ 342 || defined(PROTO) 343 /* 344 * Translate a string into allocated memory, replacing special chars with 345 * printable chars. Returns NULL when out of memory. 346 */ 347 char_u * 348 transstr(s) 349 char_u *s; 350 { 351 char_u *res; 352 char_u *p; 353 #ifdef FEAT_MBYTE 354 int l, len, c; 355 char_u hexbuf[11]; 356 #endif 357 358 #ifdef FEAT_MBYTE 359 if (has_mbyte) 360 { 361 /* Compute the length of the result, taking account of unprintable 362 * multi-byte characters. */ 363 len = 0; 364 p = s; 365 while (*p != NUL) 366 { 367 if ((l = (*mb_ptr2len)(p)) > 1) 368 { 369 c = (*mb_ptr2char)(p); 370 p += l; 371 if (vim_isprintc(c)) 372 len += l; 373 else 374 { 375 transchar_hex(hexbuf, c); 376 len += (int)STRLEN(hexbuf); 377 } 378 } 379 else 380 { 381 l = byte2cells(*p++); 382 if (l > 0) 383 len += l; 384 else 385 len += 4; /* illegal byte sequence */ 386 } 387 } 388 res = alloc((unsigned)(len + 1)); 389 } 390 else 391 #endif 392 res = alloc((unsigned)(vim_strsize(s) + 1)); 393 if (res != NULL) 394 { 395 *res = NUL; 396 p = s; 397 while (*p != NUL) 398 { 399 #ifdef FEAT_MBYTE 400 if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1) 401 { 402 c = (*mb_ptr2char)(p); 403 if (vim_isprintc(c)) 404 STRNCAT(res, p, l); /* append printable multi-byte char */ 405 else 406 transchar_hex(res + STRLEN(res), c); 407 p += l; 408 } 409 else 410 #endif 411 STRCAT(res, transchar_byte(*p++)); 412 } 413 } 414 return res; 415 } 416 #endif 417 418 #if defined(FEAT_SYN_HL) || defined(FEAT_INS_EXPAND) || defined(PROTO) 419 /* 420 * Convert the string "str[orglen]" to do ignore-case comparing. Uses the 421 * current locale. 422 * When "buf" is NULL returns an allocated string (NULL for out-of-memory). 423 * Otherwise puts the result in "buf[buflen]". 424 */ 425 char_u * 426 str_foldcase(str, orglen, buf, buflen) 427 char_u *str; 428 int orglen; 429 char_u *buf; 430 int buflen; 431 { 432 garray_T ga; 433 int i; 434 int len = orglen; 435 436 #define GA_CHAR(i) ((char_u *)ga.ga_data)[i] 437 #define GA_PTR(i) ((char_u *)ga.ga_data + i) 438 #define STR_CHAR(i) (buf == NULL ? GA_CHAR(i) : buf[i]) 439 #define STR_PTR(i) (buf == NULL ? GA_PTR(i) : buf + i) 440 441 /* Copy "str" into "buf" or allocated memory, unmodified. */ 442 if (buf == NULL) 443 { 444 ga_init2(&ga, 1, 10); 445 if (ga_grow(&ga, len + 1) == FAIL) 446 return NULL; 447 mch_memmove(ga.ga_data, str, (size_t)len); 448 ga.ga_len = len; 449 } 450 else 451 { 452 if (len >= buflen) /* Ugly! */ 453 len = buflen - 1; 454 mch_memmove(buf, str, (size_t)len); 455 } 456 if (buf == NULL) 457 GA_CHAR(len) = NUL; 458 else 459 buf[len] = NUL; 460 461 /* Make each character lower case. */ 462 i = 0; 463 while (STR_CHAR(i) != NUL) 464 { 465 #ifdef FEAT_MBYTE 466 if (enc_utf8 || (has_mbyte && MB_BYTE2LEN(STR_CHAR(i)) > 1)) 467 { 468 if (enc_utf8) 469 { 470 int c = utf_ptr2char(STR_PTR(i)); 471 int olen = utf_ptr2len(STR_PTR(i)); 472 int lc = utf_tolower(c); 473 474 /* Only replace the character when it is not an invalid 475 * sequence (ASCII character or more than one byte) and 476 * utf_tolower() doesn't return the original character. */ 477 if ((c < 0x80 || olen > 1) && c != lc) 478 { 479 int nlen = utf_char2len(lc); 480 481 /* If the byte length changes need to shift the following 482 * characters forward or backward. */ 483 if (olen != nlen) 484 { 485 if (nlen > olen) 486 { 487 if (buf == NULL 488 ? ga_grow(&ga, nlen - olen + 1) == FAIL 489 : len + nlen - olen >= buflen) 490 { 491 /* out of memory, keep old char */ 492 lc = c; 493 nlen = olen; 494 } 495 } 496 if (olen != nlen) 497 { 498 if (buf == NULL) 499 { 500 STRMOVE(GA_PTR(i) + nlen, GA_PTR(i) + olen); 501 ga.ga_len += nlen - olen; 502 } 503 else 504 { 505 STRMOVE(buf + i + nlen, buf + i + olen); 506 len += nlen - olen; 507 } 508 } 509 } 510 (void)utf_char2bytes(lc, STR_PTR(i)); 511 } 512 } 513 /* skip to next multi-byte char */ 514 i += (*mb_ptr2len)(STR_PTR(i)); 515 } 516 else 517 #endif 518 { 519 if (buf == NULL) 520 GA_CHAR(i) = TOLOWER_LOC(GA_CHAR(i)); 521 else 522 buf[i] = TOLOWER_LOC(buf[i]); 523 ++i; 524 } 525 } 526 527 if (buf == NULL) 528 return (char_u *)ga.ga_data; 529 return buf; 530 } 531 #endif 532 533 /* 534 * Catch 22: chartab[] can't be initialized before the options are 535 * initialized, and initializing options may cause transchar() to be called! 536 * When chartab_initialized == FALSE don't use chartab[]. 537 * Does NOT work for multi-byte characters, c must be <= 255. 538 * Also doesn't work for the first byte of a multi-byte, "c" must be a 539 * character! 540 */ 541 static char_u transchar_buf[7]; 542 543 char_u * 544 transchar(c) 545 int c; 546 { 547 int i; 548 549 i = 0; 550 if (IS_SPECIAL(c)) /* special key code, display as ~@ char */ 551 { 552 transchar_buf[0] = '~'; 553 transchar_buf[1] = '@'; 554 i = 2; 555 c = K_SECOND(c); 556 } 557 558 if ((!chartab_initialized && ( 559 #ifdef EBCDIC 560 (c >= 64 && c < 255) 561 #else 562 (c >= ' ' && c <= '~') 563 #endif 564 #ifdef FEAT_FKMAP 565 || F_ischar(c) 566 #endif 567 )) || (c < 256 && vim_isprintc_strict(c))) 568 { 569 /* printable character */ 570 transchar_buf[i] = c; 571 transchar_buf[i + 1] = NUL; 572 } 573 else 574 transchar_nonprint(transchar_buf + i, c); 575 return transchar_buf; 576 } 577 578 #if defined(FEAT_MBYTE) || defined(PROTO) 579 /* 580 * Like transchar(), but called with a byte instead of a character. Checks 581 * for an illegal UTF-8 byte. 582 */ 583 char_u * 584 transchar_byte(c) 585 int c; 586 { 587 if (enc_utf8 && c >= 0x80) 588 { 589 transchar_nonprint(transchar_buf, c); 590 return transchar_buf; 591 } 592 return transchar(c); 593 } 594 #endif 595 596 /* 597 * Convert non-printable character to two or more printable characters in 598 * "buf[]". "buf" needs to be able to hold five bytes. 599 * Does NOT work for multi-byte characters, c must be <= 255. 600 */ 601 void 602 transchar_nonprint(buf, c) 603 char_u *buf; 604 int c; 605 { 606 if (c == NL) 607 c = NUL; /* we use newline in place of a NUL */ 608 else if (c == CAR && get_fileformat(curbuf) == EOL_MAC) 609 c = NL; /* we use CR in place of NL in this case */ 610 611 if (dy_flags & DY_UHEX) /* 'display' has "uhex" */ 612 transchar_hex(buf, c); 613 614 #ifdef EBCDIC 615 /* For EBCDIC only the characters 0-63 and 255 are not printable */ 616 else if (CtrlChar(c) != 0 || c == DEL) 617 #else 618 else if (c <= 0x7f) /* 0x00 - 0x1f and 0x7f */ 619 #endif 620 { 621 buf[0] = '^'; 622 #ifdef EBCDIC 623 if (c == DEL) 624 buf[1] = '?'; /* DEL displayed as ^? */ 625 else 626 buf[1] = CtrlChar(c); 627 #else 628 buf[1] = c ^ 0x40; /* DEL displayed as ^? */ 629 #endif 630 631 buf[2] = NUL; 632 } 633 #ifdef FEAT_MBYTE 634 else if (enc_utf8 && c >= 0x80) 635 { 636 transchar_hex(buf, c); 637 } 638 #endif 639 #ifndef EBCDIC 640 else if (c >= ' ' + 0x80 && c <= '~' + 0x80) /* 0xa0 - 0xfe */ 641 { 642 buf[0] = '|'; 643 buf[1] = c - 0x80; 644 buf[2] = NUL; 645 } 646 #else 647 else if (c < 64) 648 { 649 buf[0] = '~'; 650 buf[1] = MetaChar(c); 651 buf[2] = NUL; 652 } 653 #endif 654 else /* 0x80 - 0x9f and 0xff */ 655 { 656 /* 657 * TODO: EBCDIC I don't know what to do with this chars, so I display 658 * them as '~?' for now 659 */ 660 buf[0] = '~'; 661 #ifdef EBCDIC 662 buf[1] = '?'; /* 0xff displayed as ~? */ 663 #else 664 buf[1] = (c - 0x80) ^ 0x40; /* 0xff displayed as ~? */ 665 #endif 666 buf[2] = NUL; 667 } 668 } 669 670 void 671 transchar_hex(buf, c) 672 char_u *buf; 673 int c; 674 { 675 int i = 0; 676 677 buf[0] = '<'; 678 #ifdef FEAT_MBYTE 679 if (c > 255) 680 { 681 buf[++i] = nr2hex((unsigned)c >> 12); 682 buf[++i] = nr2hex((unsigned)c >> 8); 683 } 684 #endif 685 buf[++i] = nr2hex((unsigned)c >> 4); 686 buf[++i] = nr2hex((unsigned)c); 687 buf[++i] = '>'; 688 buf[++i] = NUL; 689 } 690 691 /* 692 * Convert the lower 4 bits of byte "c" to its hex character. 693 * Lower case letters are used to avoid the confusion of <F1> being 0xf1 or 694 * function key 1. 695 */ 696 static unsigned 697 nr2hex(c) 698 unsigned c; 699 { 700 if ((c & 0xf) <= 9) 701 return (c & 0xf) + '0'; 702 return (c & 0xf) - 10 + 'a'; 703 } 704 705 /* 706 * Return number of display cells occupied by byte "b". 707 * Caller must make sure 0 <= b <= 255. 708 * For multi-byte mode "b" must be the first byte of a character. 709 * A TAB is counted as two cells: "^I". 710 * For UTF-8 mode this will return 0 for bytes >= 0x80, because the number of 711 * cells depends on further bytes. 712 */ 713 int 714 byte2cells(b) 715 int b; 716 { 717 #ifdef FEAT_MBYTE 718 if (enc_utf8 && b >= 0x80) 719 return 0; 720 #endif 721 return (chartab[b] & CT_CELL_MASK); 722 } 723 724 /* 725 * Return number of display cells occupied by character "c". 726 * "c" can be a special key (negative number) in which case 3 or 4 is returned. 727 * A TAB is counted as two cells: "^I" or four: "<09>". 728 */ 729 int 730 char2cells(c) 731 int c; 732 { 733 if (IS_SPECIAL(c)) 734 return char2cells(K_SECOND(c)) + 2; 735 #ifdef FEAT_MBYTE 736 if (c >= 0x80) 737 { 738 /* UTF-8: above 0x80 need to check the value */ 739 if (enc_utf8) 740 return utf_char2cells(c); 741 /* DBCS: double-byte means double-width, except for euc-jp with first 742 * byte 0x8e */ 743 if (enc_dbcs != 0 && c >= 0x100) 744 { 745 if (enc_dbcs == DBCS_JPNU && ((unsigned)c >> 8) == 0x8e) 746 return 1; 747 return 2; 748 } 749 } 750 #endif 751 return (chartab[c & 0xff] & CT_CELL_MASK); 752 } 753 754 /* 755 * Return number of display cells occupied by character at "*p". 756 * A TAB is counted as two cells: "^I" or four: "<09>". 757 */ 758 int 759 ptr2cells(p) 760 char_u *p; 761 { 762 #ifdef FEAT_MBYTE 763 /* For UTF-8 we need to look at more bytes if the first byte is >= 0x80. */ 764 if (enc_utf8 && *p >= 0x80) 765 return utf_ptr2cells(p); 766 /* For DBCS we can tell the cell count from the first byte. */ 767 #endif 768 return (chartab[*p] & CT_CELL_MASK); 769 } 770 771 /* 772 * Return the number of character cells string "s" will take on the screen, 773 * counting TABs as two characters: "^I". 774 */ 775 int 776 vim_strsize(s) 777 char_u *s; 778 { 779 return vim_strnsize(s, (int)MAXCOL); 780 } 781 782 /* 783 * Return the number of character cells string "s[len]" will take on the 784 * screen, counting TABs as two characters: "^I". 785 */ 786 int 787 vim_strnsize(s, len) 788 char_u *s; 789 int len; 790 { 791 int size = 0; 792 793 while (*s != NUL && --len >= 0) 794 { 795 #ifdef FEAT_MBYTE 796 if (has_mbyte) 797 { 798 int l = (*mb_ptr2len)(s); 799 800 size += ptr2cells(s); 801 s += l; 802 len -= l - 1; 803 } 804 else 805 #endif 806 size += byte2cells(*s++); 807 } 808 return size; 809 } 810 811 /* 812 * Return the number of characters 'c' will take on the screen, taking 813 * into account the size of a tab. 814 * Use a define to make it fast, this is used very often!!! 815 * Also see getvcol() below. 816 */ 817 818 #define RET_WIN_BUF_CHARTABSIZE(wp, buf, p, col) \ 819 if (*(p) == TAB && (!(wp)->w_p_list || lcs_tab1)) \ 820 { \ 821 int ts; \ 822 ts = (buf)->b_p_ts; \ 823 return (int)(ts - (col % ts)); \ 824 } \ 825 else \ 826 return ptr2cells(p); 827 828 #if defined(FEAT_VREPLACE) || defined(FEAT_EX_EXTRA) || defined(FEAT_GUI) \ 829 || defined(FEAT_VIRTUALEDIT) || defined(PROTO) 830 int 831 chartabsize(p, col) 832 char_u *p; 833 colnr_T col; 834 { 835 RET_WIN_BUF_CHARTABSIZE(curwin, curbuf, p, col) 836 } 837 #endif 838 839 #ifdef FEAT_LINEBREAK 840 static int 841 win_chartabsize(wp, p, col) 842 win_T *wp; 843 char_u *p; 844 colnr_T col; 845 { 846 RET_WIN_BUF_CHARTABSIZE(wp, wp->w_buffer, p, col) 847 } 848 #endif 849 850 /* 851 * Return the number of characters the string 's' will take on the screen, 852 * taking into account the size of a tab. 853 */ 854 int 855 linetabsize(s) 856 char_u *s; 857 { 858 return linetabsize_col(0, s); 859 } 860 861 /* 862 * Like linetabsize(), but starting at column "startcol". 863 */ 864 int 865 linetabsize_col(startcol, s) 866 int startcol; 867 char_u *s; 868 { 869 colnr_T col = startcol; 870 char_u *line = s; /* pointer to start of line, for breakindent */ 871 872 while (*s != NUL) 873 col += lbr_chartabsize_adv(line, &s, col); 874 return (int)col; 875 } 876 877 /* 878 * Like linetabsize(), but for a given window instead of the current one. 879 */ 880 int 881 win_linetabsize(wp, line, len) 882 win_T *wp; 883 char_u *line; 884 colnr_T len; 885 { 886 colnr_T col = 0; 887 char_u *s; 888 889 for (s = line; *s != NUL && (len == MAXCOL || s < line + len); 890 mb_ptr_adv(s)) 891 col += win_lbr_chartabsize(wp, line, s, col, NULL); 892 return (int)col; 893 } 894 895 /* 896 * Return TRUE if 'c' is a normal identifier character: 897 * Letters and characters from the 'isident' option. 898 */ 899 int 900 vim_isIDc(c) 901 int c; 902 { 903 return (c > 0 && c < 0x100 && (chartab[c] & CT_ID_CHAR)); 904 } 905 906 /* 907 * return TRUE if 'c' is a keyword character: Letters and characters from 908 * 'iskeyword' option for current buffer. 909 * For multi-byte characters mb_get_class() is used (builtin rules). 910 */ 911 int 912 vim_iswordc(c) 913 int c; 914 { 915 return vim_iswordc_buf(c, curbuf); 916 } 917 918 int 919 vim_iswordc_buf(c, buf) 920 int c; 921 buf_T *buf; 922 { 923 #ifdef FEAT_MBYTE 924 if (c >= 0x100) 925 { 926 if (enc_dbcs != 0) 927 return dbcs_class((unsigned)c >> 8, (unsigned)(c & 0xff)) >= 2; 928 if (enc_utf8) 929 return utf_class(c) >= 2; 930 } 931 #endif 932 return (c > 0 && c < 0x100 && GET_CHARTAB(buf, c) != 0); 933 } 934 935 /* 936 * Just like vim_iswordc() but uses a pointer to the (multi-byte) character. 937 */ 938 int 939 vim_iswordp(p) 940 char_u *p; 941 { 942 #ifdef FEAT_MBYTE 943 if (has_mbyte && MB_BYTE2LEN(*p) > 1) 944 return mb_get_class(p) >= 2; 945 #endif 946 return GET_CHARTAB(curbuf, *p) != 0; 947 } 948 949 int 950 vim_iswordp_buf(p, buf) 951 char_u *p; 952 buf_T *buf; 953 { 954 #ifdef FEAT_MBYTE 955 if (has_mbyte && MB_BYTE2LEN(*p) > 1) 956 return mb_get_class(p) >= 2; 957 #endif 958 return (GET_CHARTAB(buf, *p) != 0); 959 } 960 961 /* 962 * return TRUE if 'c' is a valid file-name character 963 * Assume characters above 0x100 are valid (multi-byte). 964 */ 965 int 966 vim_isfilec(c) 967 int c; 968 { 969 return (c >= 0x100 || (c > 0 && (chartab[c] & CT_FNAME_CHAR))); 970 } 971 972 /* 973 * return TRUE if 'c' is a valid file-name character or a wildcard character 974 * Assume characters above 0x100 are valid (multi-byte). 975 * Explicitly interpret ']' as a wildcard character as mch_has_wildcard("]") 976 * returns false. 977 */ 978 int 979 vim_isfilec_or_wc(c) 980 int c; 981 { 982 char_u buf[2]; 983 984 buf[0] = (char_u)c; 985 buf[1] = NUL; 986 return vim_isfilec(c) || c == ']' || mch_has_wildcard(buf); 987 } 988 989 /* 990 * return TRUE if 'c' is a printable character 991 * Assume characters above 0x100 are printable (multi-byte), except for 992 * Unicode. 993 */ 994 int 995 vim_isprintc(c) 996 int c; 997 { 998 #ifdef FEAT_MBYTE 999 if (enc_utf8 && c >= 0x100) 1000 return utf_printable(c); 1001 #endif 1002 return (c >= 0x100 || (c > 0 && (chartab[c] & CT_PRINT_CHAR))); 1003 } 1004 1005 /* 1006 * Strict version of vim_isprintc(c), don't return TRUE if "c" is the head 1007 * byte of a double-byte character. 1008 */ 1009 int 1010 vim_isprintc_strict(c) 1011 int c; 1012 { 1013 #ifdef FEAT_MBYTE 1014 if (enc_dbcs != 0 && c < 0x100 && MB_BYTE2LEN(c) > 1) 1015 return FALSE; 1016 if (enc_utf8 && c >= 0x100) 1017 return utf_printable(c); 1018 #endif 1019 return (c >= 0x100 || (c > 0 && (chartab[c] & CT_PRINT_CHAR))); 1020 } 1021 1022 /* 1023 * like chartabsize(), but also check for line breaks on the screen 1024 */ 1025 int 1026 lbr_chartabsize(line, s, col) 1027 char_u *line UNUSED; /* start of the line */ 1028 unsigned char *s; 1029 colnr_T col; 1030 { 1031 #ifdef FEAT_LINEBREAK 1032 if (!curwin->w_p_lbr && *p_sbr == NUL && !curwin->w_p_bri) 1033 { 1034 #endif 1035 #ifdef FEAT_MBYTE 1036 if (curwin->w_p_wrap) 1037 return win_nolbr_chartabsize(curwin, s, col, NULL); 1038 #endif 1039 RET_WIN_BUF_CHARTABSIZE(curwin, curbuf, s, col) 1040 #ifdef FEAT_LINEBREAK 1041 } 1042 return win_lbr_chartabsize(curwin, line == NULL ? s : line, s, col, NULL); 1043 #endif 1044 } 1045 1046 /* 1047 * Call lbr_chartabsize() and advance the pointer. 1048 */ 1049 int 1050 lbr_chartabsize_adv(line, s, col) 1051 char_u *line; /* start of the line */ 1052 char_u **s; 1053 colnr_T col; 1054 { 1055 int retval; 1056 1057 retval = lbr_chartabsize(line, *s, col); 1058 mb_ptr_adv(*s); 1059 return retval; 1060 } 1061 1062 /* 1063 * This function is used very often, keep it fast!!!! 1064 * 1065 * If "headp" not NULL, set *headp to the size of what we for 'showbreak' 1066 * string at start of line. Warning: *headp is only set if it's a non-zero 1067 * value, init to 0 before calling. 1068 */ 1069 int 1070 win_lbr_chartabsize(wp, line, s, col, headp) 1071 win_T *wp; 1072 char_u *line UNUSED; /* start of the line */ 1073 char_u *s; 1074 colnr_T col; 1075 int *headp UNUSED; 1076 { 1077 #ifdef FEAT_LINEBREAK 1078 int c; 1079 int size; 1080 colnr_T col2; 1081 colnr_T col_adj = 0; /* col + screen size of tab */ 1082 colnr_T colmax; 1083 int added; 1084 # ifdef FEAT_MBYTE 1085 int mb_added = 0; 1086 # else 1087 # define mb_added 0 1088 # endif 1089 int numberextra; 1090 char_u *ps; 1091 int tab_corr = (*s == TAB); 1092 int n; 1093 1094 /* 1095 * No 'linebreak', 'showbreak' and 'breakindent': return quickly. 1096 */ 1097 if (!wp->w_p_lbr && !wp->w_p_bri && *p_sbr == NUL) 1098 #endif 1099 { 1100 #ifdef FEAT_MBYTE 1101 if (wp->w_p_wrap) 1102 return win_nolbr_chartabsize(wp, s, col, headp); 1103 #endif 1104 RET_WIN_BUF_CHARTABSIZE(wp, wp->w_buffer, s, col) 1105 } 1106 1107 #ifdef FEAT_LINEBREAK 1108 /* 1109 * First get normal size, without 'linebreak' 1110 */ 1111 size = win_chartabsize(wp, s, col); 1112 c = *s; 1113 if (tab_corr) 1114 col_adj = size - 1; 1115 1116 /* 1117 * If 'linebreak' set check at a blank before a non-blank if the line 1118 * needs a break here 1119 */ 1120 if (wp->w_p_lbr 1121 && vim_isbreak(c) 1122 && !vim_isbreak(s[1]) 1123 && wp->w_p_wrap 1124 # ifdef FEAT_VERTSPLIT 1125 && wp->w_width != 0 1126 # endif 1127 ) 1128 { 1129 /* 1130 * Count all characters from first non-blank after a blank up to next 1131 * non-blank after a blank. 1132 */ 1133 numberextra = win_col_off(wp); 1134 col2 = col; 1135 colmax = (colnr_T)(W_WIDTH(wp) - numberextra - col_adj); 1136 if (col >= colmax) 1137 { 1138 colmax += col_adj; 1139 n = colmax + win_col_off2(wp); 1140 if (n > 0) 1141 colmax += (((col - colmax) / n) + 1) * n - col_adj; 1142 } 1143 1144 for (;;) 1145 { 1146 ps = s; 1147 mb_ptr_adv(s); 1148 c = *s; 1149 if (!(c != NUL 1150 && (vim_isbreak(c) 1151 || (!vim_isbreak(c) 1152 && (col2 == col || !vim_isbreak(*ps)))))) 1153 break; 1154 1155 col2 += win_chartabsize(wp, s, col2); 1156 if (col2 >= colmax) /* doesn't fit */ 1157 { 1158 size = colmax - col + col_adj; 1159 tab_corr = FALSE; 1160 break; 1161 } 1162 } 1163 } 1164 # ifdef FEAT_MBYTE 1165 else if (has_mbyte && size == 2 && MB_BYTE2LEN(*s) > 1 1166 && wp->w_p_wrap && in_win_border(wp, col)) 1167 { 1168 ++size; /* Count the ">" in the last column. */ 1169 mb_added = 1; 1170 } 1171 # endif 1172 1173 /* 1174 * May have to add something for 'breakindent' and/or 'showbreak' 1175 * string at start of line. 1176 * Set *headp to the size of what we add. 1177 */ 1178 added = 0; 1179 if ((*p_sbr != NUL || wp->w_p_bri) && wp->w_p_wrap && col != 0) 1180 { 1181 colnr_T sbrlen = 0; 1182 int numberwidth = win_col_off(wp); 1183 1184 numberextra = numberwidth; 1185 col += numberextra + mb_added; 1186 if (col >= (colnr_T)W_WIDTH(wp)) 1187 { 1188 col -= W_WIDTH(wp); 1189 numberextra = W_WIDTH(wp) - (numberextra - win_col_off2(wp)); 1190 if (col >= numberextra && numberextra > 0) 1191 col %= numberextra; 1192 if (*p_sbr != NUL) 1193 { 1194 sbrlen = (colnr_T)MB_CHARLEN(p_sbr); 1195 if (col >= sbrlen) 1196 col -= sbrlen; 1197 } 1198 if (col >= numberextra && numberextra > 0) 1199 col = col % numberextra; 1200 else if (col > 0 && numberextra > 0) 1201 col += numberwidth - win_col_off2(wp); 1202 1203 numberwidth -= win_col_off2(wp); 1204 } 1205 if (col == 0 || col + size + sbrlen > (colnr_T)W_WIDTH(wp)) 1206 { 1207 added = 0; 1208 if (*p_sbr != NUL) 1209 { 1210 if (size + sbrlen + numberwidth > (colnr_T)W_WIDTH(wp)) 1211 { 1212 /* calculate effective window width */ 1213 int width = (colnr_T)W_WIDTH(wp) - sbrlen - numberwidth; 1214 int prev_width = col ? ((colnr_T)W_WIDTH(wp) - (sbrlen + col)) : 0; 1215 if (width == 0) 1216 width = (colnr_T)W_WIDTH(wp); 1217 added += ((size - prev_width) / width) * vim_strsize(p_sbr); 1218 if ((size - prev_width) % width) 1219 /* wrapped, add another length of 'sbr' */ 1220 added += vim_strsize(p_sbr); 1221 } 1222 else 1223 added += vim_strsize(p_sbr); 1224 } 1225 if (wp->w_p_bri) 1226 added += get_breakindent_win(wp, line); 1227 1228 size += added; 1229 if (col != 0) 1230 added = 0; 1231 } 1232 } 1233 if (headp != NULL) 1234 *headp = added + mb_added; 1235 return size; 1236 #endif 1237 } 1238 1239 #if defined(FEAT_MBYTE) || defined(PROTO) 1240 /* 1241 * Like win_lbr_chartabsize(), except that we know 'linebreak' is off and 1242 * 'wrap' is on. This means we need to check for a double-byte character that 1243 * doesn't fit at the end of the screen line. 1244 */ 1245 static int 1246 win_nolbr_chartabsize(wp, s, col, headp) 1247 win_T *wp; 1248 char_u *s; 1249 colnr_T col; 1250 int *headp; 1251 { 1252 int n; 1253 1254 if (*s == TAB && (!wp->w_p_list || lcs_tab1)) 1255 { 1256 n = wp->w_buffer->b_p_ts; 1257 return (int)(n - (col % n)); 1258 } 1259 n = ptr2cells(s); 1260 /* Add one cell for a double-width character in the last column of the 1261 * window, displayed with a ">". */ 1262 if (n == 2 && MB_BYTE2LEN(*s) > 1 && in_win_border(wp, col)) 1263 { 1264 if (headp != NULL) 1265 *headp = 1; 1266 return 3; 1267 } 1268 return n; 1269 } 1270 1271 /* 1272 * Return TRUE if virtual column "vcol" is in the rightmost column of window 1273 * "wp". 1274 */ 1275 int 1276 in_win_border(wp, vcol) 1277 win_T *wp; 1278 colnr_T vcol; 1279 { 1280 int width1; /* width of first line (after line number) */ 1281 int width2; /* width of further lines */ 1282 1283 #ifdef FEAT_VERTSPLIT 1284 if (wp->w_width == 0) /* there is no border */ 1285 return FALSE; 1286 #endif 1287 width1 = W_WIDTH(wp) - win_col_off(wp); 1288 if ((int)vcol < width1 - 1) 1289 return FALSE; 1290 if ((int)vcol == width1 - 1) 1291 return TRUE; 1292 width2 = width1 + win_col_off2(wp); 1293 if (width2 <= 0) 1294 return FALSE; 1295 return ((vcol - width1) % width2 == width2 - 1); 1296 } 1297 #endif /* FEAT_MBYTE */ 1298 1299 /* 1300 * Get virtual column number of pos. 1301 * start: on the first position of this character (TAB, ctrl) 1302 * cursor: where the cursor is on this character (first char, except for TAB) 1303 * end: on the last position of this character (TAB, ctrl) 1304 * 1305 * This is used very often, keep it fast! 1306 */ 1307 void 1308 getvcol(wp, pos, start, cursor, end) 1309 win_T *wp; 1310 pos_T *pos; 1311 colnr_T *start; 1312 colnr_T *cursor; 1313 colnr_T *end; 1314 { 1315 colnr_T vcol; 1316 char_u *ptr; /* points to current char */ 1317 char_u *posptr; /* points to char at pos->col */ 1318 char_u *line; /* start of the line */ 1319 int incr; 1320 int head; 1321 int ts = wp->w_buffer->b_p_ts; 1322 int c; 1323 1324 vcol = 0; 1325 line = ptr = ml_get_buf(wp->w_buffer, pos->lnum, FALSE); 1326 if (pos->col == MAXCOL) 1327 posptr = NULL; /* continue until the NUL */ 1328 else 1329 posptr = ptr + pos->col; 1330 1331 /* 1332 * This function is used very often, do some speed optimizations. 1333 * When 'list', 'linebreak', 'showbreak' and 'breakindent' are not set 1334 * use a simple loop. 1335 * Also use this when 'list' is set but tabs take their normal size. 1336 */ 1337 if ((!wp->w_p_list || lcs_tab1 != NUL) 1338 #ifdef FEAT_LINEBREAK 1339 && !wp->w_p_lbr && *p_sbr == NUL && !wp->w_p_bri 1340 #endif 1341 ) 1342 { 1343 #ifndef FEAT_MBYTE 1344 head = 0; 1345 #endif 1346 for (;;) 1347 { 1348 #ifdef FEAT_MBYTE 1349 head = 0; 1350 #endif 1351 c = *ptr; 1352 /* make sure we don't go past the end of the line */ 1353 if (c == NUL) 1354 { 1355 incr = 1; /* NUL at end of line only takes one column */ 1356 break; 1357 } 1358 /* A tab gets expanded, depending on the current column */ 1359 if (c == TAB) 1360 incr = ts - (vcol % ts); 1361 else 1362 { 1363 #ifdef FEAT_MBYTE 1364 if (has_mbyte) 1365 { 1366 /* For utf-8, if the byte is >= 0x80, need to look at 1367 * further bytes to find the cell width. */ 1368 if (enc_utf8 && c >= 0x80) 1369 incr = utf_ptr2cells(ptr); 1370 else 1371 incr = CHARSIZE(c); 1372 1373 /* If a double-cell char doesn't fit at the end of a line 1374 * it wraps to the next line, it's like this char is three 1375 * cells wide. */ 1376 if (incr == 2 && wp->w_p_wrap && MB_BYTE2LEN(*ptr) > 1 1377 && in_win_border(wp, vcol)) 1378 { 1379 ++incr; 1380 head = 1; 1381 } 1382 } 1383 else 1384 #endif 1385 incr = CHARSIZE(c); 1386 } 1387 1388 if (posptr != NULL && ptr >= posptr) /* character at pos->col */ 1389 break; 1390 1391 vcol += incr; 1392 mb_ptr_adv(ptr); 1393 } 1394 } 1395 else 1396 { 1397 for (;;) 1398 { 1399 /* A tab gets expanded, depending on the current column */ 1400 head = 0; 1401 incr = win_lbr_chartabsize(wp, line, ptr, vcol, &head); 1402 /* make sure we don't go past the end of the line */ 1403 if (*ptr == NUL) 1404 { 1405 incr = 1; /* NUL at end of line only takes one column */ 1406 break; 1407 } 1408 1409 if (posptr != NULL && ptr >= posptr) /* character at pos->col */ 1410 break; 1411 1412 vcol += incr; 1413 mb_ptr_adv(ptr); 1414 } 1415 } 1416 if (start != NULL) 1417 *start = vcol + head; 1418 if (end != NULL) 1419 *end = vcol + incr - 1; 1420 if (cursor != NULL) 1421 { 1422 if (*ptr == TAB 1423 && (State & NORMAL) 1424 && !wp->w_p_list 1425 && !virtual_active() 1426 && !(VIsual_active && (*p_sel == 'e' || ltoreq(*pos, VIsual))) 1427 ) 1428 *cursor = vcol + incr - 1; /* cursor at end */ 1429 else 1430 *cursor = vcol + head; /* cursor at start */ 1431 } 1432 } 1433 1434 /* 1435 * Get virtual cursor column in the current window, pretending 'list' is off. 1436 */ 1437 colnr_T 1438 getvcol_nolist(posp) 1439 pos_T *posp; 1440 { 1441 int list_save = curwin->w_p_list; 1442 colnr_T vcol; 1443 1444 curwin->w_p_list = FALSE; 1445 getvcol(curwin, posp, NULL, &vcol, NULL); 1446 curwin->w_p_list = list_save; 1447 return vcol; 1448 } 1449 1450 #if defined(FEAT_VIRTUALEDIT) || defined(PROTO) 1451 /* 1452 * Get virtual column in virtual mode. 1453 */ 1454 void 1455 getvvcol(wp, pos, start, cursor, end) 1456 win_T *wp; 1457 pos_T *pos; 1458 colnr_T *start; 1459 colnr_T *cursor; 1460 colnr_T *end; 1461 { 1462 colnr_T col; 1463 colnr_T coladd; 1464 colnr_T endadd; 1465 # ifdef FEAT_MBYTE 1466 char_u *ptr; 1467 # endif 1468 1469 if (virtual_active()) 1470 { 1471 /* For virtual mode, only want one value */ 1472 getvcol(wp, pos, &col, NULL, NULL); 1473 1474 coladd = pos->coladd; 1475 endadd = 0; 1476 # ifdef FEAT_MBYTE 1477 /* Cannot put the cursor on part of a wide character. */ 1478 ptr = ml_get_buf(wp->w_buffer, pos->lnum, FALSE); 1479 if (pos->col < (colnr_T)STRLEN(ptr)) 1480 { 1481 int c = (*mb_ptr2char)(ptr + pos->col); 1482 1483 if (c != TAB && vim_isprintc(c)) 1484 { 1485 endadd = (colnr_T)(char2cells(c) - 1); 1486 if (coladd > endadd) /* past end of line */ 1487 endadd = 0; 1488 else 1489 coladd = 0; 1490 } 1491 } 1492 # endif 1493 col += coladd; 1494 if (start != NULL) 1495 *start = col; 1496 if (cursor != NULL) 1497 *cursor = col; 1498 if (end != NULL) 1499 *end = col + endadd; 1500 } 1501 else 1502 getvcol(wp, pos, start, cursor, end); 1503 } 1504 #endif 1505 1506 /* 1507 * Get the leftmost and rightmost virtual column of pos1 and pos2. 1508 * Used for Visual block mode. 1509 */ 1510 void 1511 getvcols(wp, pos1, pos2, left, right) 1512 win_T *wp; 1513 pos_T *pos1, *pos2; 1514 colnr_T *left, *right; 1515 { 1516 colnr_T from1, from2, to1, to2; 1517 1518 if (ltp(pos1, pos2)) 1519 { 1520 getvvcol(wp, pos1, &from1, NULL, &to1); 1521 getvvcol(wp, pos2, &from2, NULL, &to2); 1522 } 1523 else 1524 { 1525 getvvcol(wp, pos2, &from1, NULL, &to1); 1526 getvvcol(wp, pos1, &from2, NULL, &to2); 1527 } 1528 if (from2 < from1) 1529 *left = from2; 1530 else 1531 *left = from1; 1532 if (to2 > to1) 1533 { 1534 if (*p_sel == 'e' && from2 - 1 >= to1) 1535 *right = from2 - 1; 1536 else 1537 *right = to2; 1538 } 1539 else 1540 *right = to1; 1541 } 1542 1543 /* 1544 * skipwhite: skip over ' ' and '\t'. 1545 */ 1546 char_u * 1547 skipwhite(q) 1548 char_u *q; 1549 { 1550 char_u *p = q; 1551 1552 while (vim_iswhite(*p)) /* skip to next non-white */ 1553 ++p; 1554 return p; 1555 } 1556 1557 /* 1558 * skip over digits 1559 */ 1560 char_u * 1561 skipdigits(q) 1562 char_u *q; 1563 { 1564 char_u *p = q; 1565 1566 while (VIM_ISDIGIT(*p)) /* skip to next non-digit */ 1567 ++p; 1568 return p; 1569 } 1570 1571 #if defined(FEAT_SYN_HL) || defined(FEAT_SPELL) || defined(PROTO) 1572 /* 1573 * skip over digits and hex characters 1574 */ 1575 char_u * 1576 skiphex(q) 1577 char_u *q; 1578 { 1579 char_u *p = q; 1580 1581 while (vim_isxdigit(*p)) /* skip to next non-digit */ 1582 ++p; 1583 return p; 1584 } 1585 #endif 1586 1587 #if defined(FEAT_EX_EXTRA) || defined(PROTO) 1588 /* 1589 * skip to digit (or NUL after the string) 1590 */ 1591 char_u * 1592 skiptodigit(q) 1593 char_u *q; 1594 { 1595 char_u *p = q; 1596 1597 while (*p != NUL && !VIM_ISDIGIT(*p)) /* skip to next digit */ 1598 ++p; 1599 return p; 1600 } 1601 1602 /* 1603 * skip to hex character (or NUL after the string) 1604 */ 1605 char_u * 1606 skiptohex(q) 1607 char_u *q; 1608 { 1609 char_u *p = q; 1610 1611 while (*p != NUL && !vim_isxdigit(*p)) /* skip to next digit */ 1612 ++p; 1613 return p; 1614 } 1615 #endif 1616 1617 /* 1618 * Variant of isdigit() that can handle characters > 0x100. 1619 * We don't use isdigit() here, because on some systems it also considers 1620 * superscript 1 to be a digit. 1621 * Use the VIM_ISDIGIT() macro for simple arguments. 1622 */ 1623 int 1624 vim_isdigit(c) 1625 int c; 1626 { 1627 return (c >= '0' && c <= '9'); 1628 } 1629 1630 /* 1631 * Variant of isxdigit() that can handle characters > 0x100. 1632 * We don't use isxdigit() here, because on some systems it also considers 1633 * superscript 1 to be a digit. 1634 */ 1635 int 1636 vim_isxdigit(c) 1637 int c; 1638 { 1639 return (c >= '0' && c <= '9') 1640 || (c >= 'a' && c <= 'f') 1641 || (c >= 'A' && c <= 'F'); 1642 } 1643 1644 #if defined(FEAT_MBYTE) || defined(PROTO) 1645 /* 1646 * Vim's own character class functions. These exist because many library 1647 * islower()/toupper() etc. do not work properly: they crash when used with 1648 * invalid values or can't handle latin1 when the locale is C. 1649 * Speed is most important here. 1650 */ 1651 #define LATIN1LOWER 'l' 1652 #define LATIN1UPPER 'U' 1653 1654 static char_u latin1flags[257] = " UUUUUUUUUUUUUUUUUUUUUUUUUU llllllllllllllllllllllllll UUUUUUUUUUUUUUUUUUUUUUU UUUUUUUllllllllllllllllllllllll llllllll"; 1655 static char_u latin1upper[257] = " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`ABCDEFGHIJKLMNOPQRSTUVWXYZ{|}~\x7f\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xf7\xd8\xd9\xda\xdb\xdc\xdd\xde\xff"; 1656 static char_u latin1lower[257] = " !\"#$%&'()*+,-./0123456789:;<=>?@abcdefghijklmnopqrstuvwxyz[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x7f\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xd7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"; 1657 1658 int 1659 vim_islower(c) 1660 int c; 1661 { 1662 if (c <= '@') 1663 return FALSE; 1664 if (c >= 0x80) 1665 { 1666 if (enc_utf8) 1667 return utf_islower(c); 1668 if (c >= 0x100) 1669 { 1670 #ifdef HAVE_ISWLOWER 1671 if (has_mbyte) 1672 return iswlower(c); 1673 #endif 1674 /* islower() can't handle these chars and may crash */ 1675 return FALSE; 1676 } 1677 if (enc_latin1like) 1678 return (latin1flags[c] & LATIN1LOWER) == LATIN1LOWER; 1679 } 1680 return islower(c); 1681 } 1682 1683 int 1684 vim_isupper(c) 1685 int c; 1686 { 1687 if (c <= '@') 1688 return FALSE; 1689 if (c >= 0x80) 1690 { 1691 if (enc_utf8) 1692 return utf_isupper(c); 1693 if (c >= 0x100) 1694 { 1695 #ifdef HAVE_ISWUPPER 1696 if (has_mbyte) 1697 return iswupper(c); 1698 #endif 1699 /* islower() can't handle these chars and may crash */ 1700 return FALSE; 1701 } 1702 if (enc_latin1like) 1703 return (latin1flags[c] & LATIN1UPPER) == LATIN1UPPER; 1704 } 1705 return isupper(c); 1706 } 1707 1708 int 1709 vim_toupper(c) 1710 int c; 1711 { 1712 if (c <= '@') 1713 return c; 1714 if (c >= 0x80) 1715 { 1716 if (enc_utf8) 1717 return utf_toupper(c); 1718 if (c >= 0x100) 1719 { 1720 #ifdef HAVE_TOWUPPER 1721 if (has_mbyte) 1722 return towupper(c); 1723 #endif 1724 /* toupper() can't handle these chars and may crash */ 1725 return c; 1726 } 1727 if (enc_latin1like) 1728 return latin1upper[c]; 1729 } 1730 return TOUPPER_LOC(c); 1731 } 1732 1733 int 1734 vim_tolower(c) 1735 int c; 1736 { 1737 if (c <= '@') 1738 return c; 1739 if (c >= 0x80) 1740 { 1741 if (enc_utf8) 1742 return utf_tolower(c); 1743 if (c >= 0x100) 1744 { 1745 #ifdef HAVE_TOWLOWER 1746 if (has_mbyte) 1747 return towlower(c); 1748 #endif 1749 /* tolower() can't handle these chars and may crash */ 1750 return c; 1751 } 1752 if (enc_latin1like) 1753 return latin1lower[c]; 1754 } 1755 return TOLOWER_LOC(c); 1756 } 1757 #endif 1758 1759 /* 1760 * skiptowhite: skip over text until ' ' or '\t' or NUL. 1761 */ 1762 char_u * 1763 skiptowhite(p) 1764 char_u *p; 1765 { 1766 while (*p != ' ' && *p != '\t' && *p != NUL) 1767 ++p; 1768 return p; 1769 } 1770 1771 #if defined(FEAT_LISTCMDS) || defined(FEAT_SIGNS) || defined(FEAT_SNIFF) \ 1772 || defined(PROTO) 1773 /* 1774 * skiptowhite_esc: Like skiptowhite(), but also skip escaped chars 1775 */ 1776 char_u * 1777 skiptowhite_esc(p) 1778 char_u *p; 1779 { 1780 while (*p != ' ' && *p != '\t' && *p != NUL) 1781 { 1782 if ((*p == '\\' || *p == Ctrl_V) && *(p + 1) != NUL) 1783 ++p; 1784 ++p; 1785 } 1786 return p; 1787 } 1788 #endif 1789 1790 /* 1791 * Getdigits: Get a number from a string and skip over it. 1792 * Note: the argument is a pointer to a char_u pointer! 1793 */ 1794 long 1795 getdigits(pp) 1796 char_u **pp; 1797 { 1798 char_u *p; 1799 long retval; 1800 1801 p = *pp; 1802 retval = atol((char *)p); 1803 if (*p == '-') /* skip negative sign */ 1804 ++p; 1805 p = skipdigits(p); /* skip to next non-digit */ 1806 *pp = p; 1807 return retval; 1808 } 1809 1810 /* 1811 * Return TRUE if "lbuf" is empty or only contains blanks. 1812 */ 1813 int 1814 vim_isblankline(lbuf) 1815 char_u *lbuf; 1816 { 1817 char_u *p; 1818 1819 p = skipwhite(lbuf); 1820 return (*p == NUL || *p == '\r' || *p == '\n'); 1821 } 1822 1823 /* 1824 * Convert a string into a long and/or unsigned long, taking care of 1825 * hexadecimal and octal numbers. Accepts a '-' sign. 1826 * If "hexp" is not NULL, returns a flag to indicate the type of the number: 1827 * 0 decimal 1828 * '0' octal 1829 * 'X' hex 1830 * 'x' hex 1831 * If "len" is not NULL, the length of the number in characters is returned. 1832 * If "nptr" is not NULL, the signed result is returned in it. 1833 * If "unptr" is not NULL, the unsigned result is returned in it. 1834 * If "dooct" is non-zero recognize octal numbers, when > 1 always assume 1835 * octal number. 1836 * If "dohex" is non-zero recognize hex numbers, when > 1 always assume 1837 * hex number. 1838 * If maxlen > 0, check at a maximum maxlen chars 1839 */ 1840 void 1841 vim_str2nr(start, hexp, len, dooct, dohex, nptr, unptr, maxlen) 1842 char_u *start; 1843 int *hexp; /* return: type of number 0 = decimal, 'x' 1844 or 'X' is hex, '0' = octal */ 1845 int *len; /* return: detected length of number */ 1846 int dooct; /* recognize octal number */ 1847 int dohex; /* recognize hex number */ 1848 long *nptr; /* return: signed result */ 1849 unsigned long *unptr; /* return: unsigned result */ 1850 int maxlen; /* max length of string to check */ 1851 { 1852 char_u *ptr = start; 1853 int hex = 0; /* default is decimal */ 1854 int negative = FALSE; 1855 unsigned long un = 0; 1856 int n; 1857 1858 if (ptr[0] == '-') 1859 { 1860 negative = TRUE; 1861 ++ptr; 1862 } 1863 1864 /* Recognize hex and octal. */ 1865 if (ptr[0] == '0' && ptr[1] != '8' && ptr[1] != '9' 1866 && (maxlen == 0 || maxlen > 1)) 1867 { 1868 hex = ptr[1]; 1869 if (dohex && (hex == 'X' || hex == 'x') && vim_isxdigit(ptr[2]) 1870 && (maxlen == 0 || maxlen > 2)) 1871 ptr += 2; /* hexadecimal */ 1872 else 1873 { 1874 hex = 0; /* default is decimal */ 1875 if (dooct) 1876 { 1877 /* Don't interpret "0", "08" or "0129" as octal. */ 1878 for (n = 1; VIM_ISDIGIT(ptr[n]); ++n) 1879 { 1880 if (ptr[n] > '7') 1881 { 1882 hex = 0; /* can't be octal */ 1883 break; 1884 } 1885 if (ptr[n] >= '0') 1886 hex = '0'; /* assume octal */ 1887 if (n == maxlen) 1888 break; 1889 } 1890 } 1891 } 1892 } 1893 1894 /* 1895 * Do the string-to-numeric conversion "manually" to avoid sscanf quirks. 1896 */ 1897 n = 1; 1898 if (hex == '0' || dooct > 1) 1899 { 1900 /* octal */ 1901 while ('0' <= *ptr && *ptr <= '7') 1902 { 1903 un = 8 * un + (unsigned long)(*ptr - '0'); 1904 ++ptr; 1905 if (n++ == maxlen) 1906 break; 1907 } 1908 } 1909 else if (hex != 0 || dohex > 1) 1910 { 1911 /* hex */ 1912 if (hex != 0) 1913 n += 2; /* skip over "0x" */ 1914 while (vim_isxdigit(*ptr)) 1915 { 1916 un = 16 * un + (unsigned long)hex2nr(*ptr); 1917 ++ptr; 1918 if (n++ == maxlen) 1919 break; 1920 } 1921 } 1922 else 1923 { 1924 /* decimal */ 1925 while (VIM_ISDIGIT(*ptr)) 1926 { 1927 un = 10 * un + (unsigned long)(*ptr - '0'); 1928 ++ptr; 1929 if (n++ == maxlen) 1930 break; 1931 } 1932 } 1933 1934 if (hexp != NULL) 1935 *hexp = hex; 1936 if (len != NULL) 1937 *len = (int)(ptr - start); 1938 if (nptr != NULL) 1939 { 1940 if (negative) /* account for leading '-' for decimal numbers */ 1941 *nptr = -(long)un; 1942 else 1943 *nptr = (long)un; 1944 } 1945 if (unptr != NULL) 1946 *unptr = un; 1947 } 1948 1949 /* 1950 * Return the value of a single hex character. 1951 * Only valid when the argument is '0' - '9', 'A' - 'F' or 'a' - 'f'. 1952 */ 1953 int 1954 hex2nr(c) 1955 int c; 1956 { 1957 if (c >= 'a' && c <= 'f') 1958 return c - 'a' + 10; 1959 if (c >= 'A' && c <= 'F') 1960 return c - 'A' + 10; 1961 return c - '0'; 1962 } 1963 1964 #if defined(FEAT_TERMRESPONSE) \ 1965 || (defined(FEAT_GUI_GTK) && defined(FEAT_WINDOWS)) || defined(PROTO) 1966 /* 1967 * Convert two hex characters to a byte. 1968 * Return -1 if one of the characters is not hex. 1969 */ 1970 int 1971 hexhex2nr(p) 1972 char_u *p; 1973 { 1974 if (!vim_isxdigit(p[0]) || !vim_isxdigit(p[1])) 1975 return -1; 1976 return (hex2nr(p[0]) << 4) + hex2nr(p[1]); 1977 } 1978 #endif 1979 1980 /* 1981 * Return TRUE if "str" starts with a backslash that should be removed. 1982 * For MS-DOS, WIN32 and OS/2 this is only done when the character after the 1983 * backslash is not a normal file name character. 1984 * '$' is a valid file name character, we don't remove the backslash before 1985 * it. This means it is not possible to use an environment variable after a 1986 * backslash. "C:\$VIM\doc" is taken literally, only "$VIM\doc" works. 1987 * Although "\ name" is valid, the backslash in "Program\ files" must be 1988 * removed. Assume a file name doesn't start with a space. 1989 * For multi-byte names, never remove a backslash before a non-ascii 1990 * character, assume that all multi-byte characters are valid file name 1991 * characters. 1992 */ 1993 int 1994 rem_backslash(str) 1995 char_u *str; 1996 { 1997 #ifdef BACKSLASH_IN_FILENAME 1998 return (str[0] == '\\' 1999 # ifdef FEAT_MBYTE 2000 && str[1] < 0x80 2001 # endif 2002 && (str[1] == ' ' 2003 || (str[1] != NUL 2004 && str[1] != '*' 2005 && str[1] != '?' 2006 && !vim_isfilec(str[1])))); 2007 #else 2008 return (str[0] == '\\' && str[1] != NUL); 2009 #endif 2010 } 2011 2012 /* 2013 * Halve the number of backslashes in a file name argument. 2014 * For MS-DOS we only do this if the character after the backslash 2015 * is not a normal file character. 2016 */ 2017 void 2018 backslash_halve(p) 2019 char_u *p; 2020 { 2021 for ( ; *p; ++p) 2022 if (rem_backslash(p)) 2023 STRMOVE(p, p + 1); 2024 } 2025 2026 /* 2027 * backslash_halve() plus save the result in allocated memory. 2028 */ 2029 char_u * 2030 backslash_halve_save(p) 2031 char_u *p; 2032 { 2033 char_u *res; 2034 2035 res = vim_strsave(p); 2036 if (res == NULL) 2037 return p; 2038 backslash_halve(res); 2039 return res; 2040 } 2041 2042 #if (defined(EBCDIC) && defined(FEAT_POSTSCRIPT)) || defined(PROTO) 2043 /* 2044 * Table for EBCDIC to ASCII conversion unashamedly taken from xxd.c! 2045 * The first 64 entries have been added to map control characters defined in 2046 * ascii.h 2047 */ 2048 static char_u ebcdic2ascii_tab[256] = 2049 { 2050 0000, 0001, 0002, 0003, 0004, 0011, 0006, 0177, 2051 0010, 0011, 0012, 0013, 0014, 0015, 0016, 0017, 2052 0020, 0021, 0022, 0023, 0024, 0012, 0010, 0027, 2053 0030, 0031, 0032, 0033, 0033, 0035, 0036, 0037, 2054 0040, 0041, 0042, 0043, 0044, 0045, 0046, 0047, 2055 0050, 0051, 0052, 0053, 0054, 0055, 0056, 0057, 2056 0060, 0061, 0062, 0063, 0064, 0065, 0066, 0067, 2057 0070, 0071, 0072, 0073, 0074, 0075, 0076, 0077, 2058 0040, 0240, 0241, 0242, 0243, 0244, 0245, 0246, 2059 0247, 0250, 0325, 0056, 0074, 0050, 0053, 0174, 2060 0046, 0251, 0252, 0253, 0254, 0255, 0256, 0257, 2061 0260, 0261, 0041, 0044, 0052, 0051, 0073, 0176, 2062 0055, 0057, 0262, 0263, 0264, 0265, 0266, 0267, 2063 0270, 0271, 0313, 0054, 0045, 0137, 0076, 0077, 2064 0272, 0273, 0274, 0275, 0276, 0277, 0300, 0301, 2065 0302, 0140, 0072, 0043, 0100, 0047, 0075, 0042, 2066 0303, 0141, 0142, 0143, 0144, 0145, 0146, 0147, 2067 0150, 0151, 0304, 0305, 0306, 0307, 0310, 0311, 2068 0312, 0152, 0153, 0154, 0155, 0156, 0157, 0160, 2069 0161, 0162, 0136, 0314, 0315, 0316, 0317, 0320, 2070 0321, 0345, 0163, 0164, 0165, 0166, 0167, 0170, 2071 0171, 0172, 0322, 0323, 0324, 0133, 0326, 0327, 2072 0330, 0331, 0332, 0333, 0334, 0335, 0336, 0337, 2073 0340, 0341, 0342, 0343, 0344, 0135, 0346, 0347, 2074 0173, 0101, 0102, 0103, 0104, 0105, 0106, 0107, 2075 0110, 0111, 0350, 0351, 0352, 0353, 0354, 0355, 2076 0175, 0112, 0113, 0114, 0115, 0116, 0117, 0120, 2077 0121, 0122, 0356, 0357, 0360, 0361, 0362, 0363, 2078 0134, 0237, 0123, 0124, 0125, 0126, 0127, 0130, 2079 0131, 0132, 0364, 0365, 0366, 0367, 0370, 0371, 2080 0060, 0061, 0062, 0063, 0064, 0065, 0066, 0067, 2081 0070, 0071, 0372, 0373, 0374, 0375, 0376, 0377 2082 }; 2083 2084 /* 2085 * Convert a buffer worth of characters from EBCDIC to ASCII. Only useful if 2086 * wanting 7-bit ASCII characters out the other end. 2087 */ 2088 void 2089 ebcdic2ascii(buffer, len) 2090 char_u *buffer; 2091 int len; 2092 { 2093 int i; 2094 2095 for (i = 0; i < len; i++) 2096 buffer[i] = ebcdic2ascii_tab[buffer[i]]; 2097 } 2098 #endif 2099