1 /* vi:set ts=8 sts=4 sw=4: 2 * 3 * VIM - Vi IMproved by Bram Moolenaar 4 * 5 * Do ":help uganda" in Vim to read copying and usage conditions. 6 * Do ":help credits" in Vim to see a list of people who contributed. 7 * See README.txt for an overview of the Vim source code. 8 */ 9 /* 10 * os_mac_conv.c: Code specifically for Mac string conversions. 11 * 12 * This code has been put in a separate file to avoid the conflicts that are 13 * caused by including both the X11 and Carbon header files. 14 */ 15 16 #define NO_X11_INCLUDES 17 #define BalloonEval int /* used in header files */ 18 19 #include "vim.h" 20 #ifndef FEAT_GUI_MAC 21 # include <CoreServices/CoreServices.h> 22 #endif 23 24 25 #if defined(MACOS_CONVERT) || defined(PROTO) 26 27 # ifdef PROTO 28 /* A few dummy types to be able to generate function prototypes. */ 29 typedef int UniChar; 30 typedef int *TECObjectRef; 31 typedef int CFStringRef; 32 # endif 33 34 static char_u *mac_utf16_to_utf8 __ARGS((UniChar *from, size_t fromLen, size_t *actualLen)); 35 static UniChar *mac_utf8_to_utf16 __ARGS((char_u *from, size_t fromLen, size_t *actualLen)); 36 37 /* Converter for composing decomposed HFS+ file paths */ 38 static TECObjectRef gPathConverter; 39 /* Converter used by mac_utf16_to_utf8 */ 40 static TECObjectRef gUTF16ToUTF8Converter; 41 42 /* 43 * A Mac version of string_convert_ext() for special cases. 44 */ 45 char_u * 46 mac_string_convert(ptr, len, lenp, fail_on_error, from_enc, to_enc, unconvlenp) 47 char_u *ptr; 48 int len; 49 int *lenp; 50 int fail_on_error; 51 int from_enc; 52 int to_enc; 53 int *unconvlenp; 54 { 55 char_u *retval, *d; 56 CFStringRef cfstr; 57 int buflen, in, out, l, i; 58 CFStringEncoding from; 59 CFStringEncoding to; 60 61 switch (from_enc) 62 { 63 case 'l': from = kCFStringEncodingISOLatin1; break; 64 case 'm': from = kCFStringEncodingMacRoman; break; 65 case 'u': from = kCFStringEncodingUTF8; break; 66 default: return NULL; 67 } 68 switch (to_enc) 69 { 70 case 'l': to = kCFStringEncodingISOLatin1; break; 71 case 'm': to = kCFStringEncodingMacRoman; break; 72 case 'u': to = kCFStringEncodingUTF8; break; 73 default: return NULL; 74 } 75 76 if (unconvlenp != NULL) 77 *unconvlenp = 0; 78 cfstr = CFStringCreateWithBytes(NULL, ptr, len, from, 0); 79 80 if (cfstr == NULL) 81 fprintf(stderr, "Encoding failed\n"); 82 /* When conversion failed, try excluding bytes from the end, helps when 83 * there is an incomplete byte sequence. Only do up to 6 bytes to avoid 84 * looping a long time when there really is something unconvertible. */ 85 while (cfstr == NULL && unconvlenp != NULL && len > 1 && *unconvlenp < 6) 86 { 87 --len; 88 ++*unconvlenp; 89 cfstr = CFStringCreateWithBytes(NULL, ptr, len, from, 0); 90 } 91 if (cfstr == NULL) 92 return NULL; 93 94 if (to == kCFStringEncodingUTF8) 95 buflen = len * 6 + 1; 96 else 97 buflen = len + 1; 98 retval = alloc(buflen); 99 if (retval == NULL) 100 { 101 CFRelease(cfstr); 102 return NULL; 103 } 104 105 #if 0 106 CFRange convertRange = CFRangeMake(0, CFStringGetLength(cfstr)); 107 /* Determine output buffer size */ 108 CFStringGetBytes(cfstr, convertRange, to, NULL, FALSE, NULL, 0, (CFIndex *)&buflen); 109 retval = (buflen > 0) ? alloc(buflen) : NULL; 110 if (retval == NULL) { 111 CFRelease(cfstr); 112 return NULL; 113 } 114 115 if (lenp) 116 *lenp = buflen / sizeof(char_u); 117 118 if (!CFStringGetBytes(cfstr, convertRange, to, NULL, FALSE, retval, buflen, NULL)) 119 #endif 120 if (!CFStringGetCString(cfstr, (char *)retval, buflen, to)) 121 { 122 CFRelease(cfstr); 123 if (fail_on_error) 124 { 125 vim_free(retval); 126 return NULL; 127 } 128 129 fprintf(stderr, "Trying char-by-char conversion...\n"); 130 /* conversion failed for the whole string, but maybe it will work 131 * for each character */ 132 for (d = retval, in = 0, out = 0; in < len && out < buflen - 1;) 133 { 134 if (from == kCFStringEncodingUTF8) 135 l = utf_ptr2len(ptr + in); 136 else 137 l = 1; 138 cfstr = CFStringCreateWithBytes(NULL, ptr + in, l, from, 0); 139 if (cfstr == NULL) 140 { 141 *d++ = '?'; 142 out++; 143 } 144 else 145 { 146 if (!CFStringGetCString(cfstr, (char *)d, buflen - out, to)) 147 { 148 *d++ = '?'; 149 out++; 150 } 151 else 152 { 153 i = STRLEN(d); 154 d += i; 155 out += i; 156 } 157 CFRelease(cfstr); 158 } 159 in += l; 160 } 161 *d = NUL; 162 if (lenp != NULL) 163 *lenp = out; 164 return retval; 165 } 166 CFRelease(cfstr); 167 if (lenp != NULL) 168 *lenp = STRLEN(retval); 169 170 return retval; 171 } 172 173 /* 174 * Conversion from Apple MacRoman char encoding to UTF-8 or latin1, using 175 * standard Carbon framework. 176 * Input: "ptr[*sizep]". 177 * "real_size" is the size of the buffer that "ptr" points to. 178 * output is in-place, "sizep" is adjusted. 179 * Returns OK or FAIL. 180 */ 181 int 182 macroman2enc(ptr, sizep, real_size) 183 char_u *ptr; 184 long *sizep; 185 long real_size; 186 { 187 CFStringRef cfstr; 188 CFRange r; 189 CFIndex len = *sizep; 190 191 /* MacRoman is an 8-bit encoding, no need to move bytes to 192 * conv_rest[]. */ 193 cfstr = CFStringCreateWithBytes(NULL, ptr, len, 194 kCFStringEncodingMacRoman, 0); 195 /* 196 * If there is a conversion error, try using another 197 * conversion. 198 */ 199 if (cfstr == NULL) 200 return FAIL; 201 202 r.location = 0; 203 r.length = CFStringGetLength(cfstr); 204 if (r.length != CFStringGetBytes(cfstr, r, 205 (enc_utf8) ? kCFStringEncodingUTF8 : kCFStringEncodingISOLatin1, 206 0, /* no lossy conversion */ 207 0, /* not external representation */ 208 ptr + *sizep, real_size - *sizep, &len)) 209 { 210 CFRelease(cfstr); 211 return FAIL; 212 } 213 CFRelease(cfstr); 214 mch_memmove(ptr, ptr + *sizep, len); 215 *sizep = len; 216 217 return OK; 218 } 219 220 /* 221 * Conversion from UTF-8 or latin1 to MacRoman. 222 * Input: "from[fromlen]" 223 * Output: "to[maxtolen]" length in "*tolenp" 224 * Unconverted rest in rest[*restlenp]. 225 * Returns OK or FAIL. 226 */ 227 int 228 enc2macroman(from, fromlen, to, tolenp, maxtolen, rest, restlenp) 229 char_u *from; 230 size_t fromlen; 231 char_u *to; 232 int *tolenp; 233 int maxtolen; 234 char_u *rest; 235 int *restlenp; 236 { 237 CFStringRef cfstr; 238 CFRange r; 239 CFIndex l; 240 241 *restlenp = 0; 242 cfstr = CFStringCreateWithBytes(NULL, from, fromlen, 243 (enc_utf8) ? kCFStringEncodingUTF8 : kCFStringEncodingISOLatin1, 244 0); 245 while (cfstr == NULL && *restlenp < 3 && fromlen > 1) 246 { 247 rest[*restlenp++] = from[--fromlen]; 248 cfstr = CFStringCreateWithBytes(NULL, from, fromlen, 249 (enc_utf8) ? kCFStringEncodingUTF8 : kCFStringEncodingISOLatin1, 250 0); 251 } 252 if (cfstr == NULL) 253 return FAIL; 254 255 r.location = 0; 256 r.length = CFStringGetLength(cfstr); 257 if (r.length != CFStringGetBytes(cfstr, r, 258 kCFStringEncodingMacRoman, 259 0, /* no lossy conversion */ 260 0, /* not external representation (since vim 261 * handles this internally */ 262 to, maxtolen, &l)) 263 { 264 CFRelease(cfstr); 265 return FAIL; 266 } 267 CFRelease(cfstr); 268 *tolenp = l; 269 return OK; 270 } 271 272 /* 273 * Initializes text converters 274 */ 275 void 276 mac_conv_init() 277 { 278 TextEncoding utf8_encoding; 279 TextEncoding utf8_hfsplus_encoding; 280 TextEncoding utf8_canon_encoding; 281 TextEncoding utf16_encoding; 282 283 utf8_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault, 284 kTextEncodingDefaultVariant, kUnicodeUTF8Format); 285 utf8_hfsplus_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault, 286 kUnicodeHFSPlusCompVariant, kUnicodeUTF8Format); 287 utf8_canon_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault, 288 kUnicodeCanonicalCompVariant, kUnicodeUTF8Format); 289 utf16_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault, 290 kTextEncodingDefaultVariant, kUnicode16BitFormat); 291 292 if (TECCreateConverter(&gPathConverter, utf8_encoding, 293 utf8_hfsplus_encoding) != noErr) 294 gPathConverter = NULL; 295 296 if (TECCreateConverter(&gUTF16ToUTF8Converter, utf16_encoding, 297 utf8_canon_encoding) != noErr) 298 { 299 /* On pre-10.3, Unicode normalization is not available so 300 * fall back to non-normalizing converter */ 301 if (TECCreateConverter(&gUTF16ToUTF8Converter, utf16_encoding, 302 utf8_encoding) != noErr) 303 gUTF16ToUTF8Converter = NULL; 304 } 305 } 306 307 /* 308 * Destroys text converters 309 */ 310 void 311 mac_conv_cleanup() 312 { 313 if (gUTF16ToUTF8Converter) 314 { 315 TECDisposeConverter(gUTF16ToUTF8Converter); 316 gUTF16ToUTF8Converter = NULL; 317 } 318 319 if (gPathConverter) 320 { 321 TECDisposeConverter(gPathConverter); 322 gPathConverter = NULL; 323 } 324 } 325 326 /* 327 * Conversion from UTF-16 UniChars to 'encoding' 328 * The function signature uses the real type of UniChar (as typedef'ed in 329 * CFBase.h) to avoid clashes with X11 header files in the .pro file 330 */ 331 char_u * 332 mac_utf16_to_enc(from, fromLen, actualLen) 333 unsigned short *from; 334 size_t fromLen; 335 size_t *actualLen; 336 { 337 /* Following code borrows somewhat from os_mswin.c */ 338 vimconv_T conv; 339 size_t utf8_len; 340 char_u *utf8_str; 341 char_u *result = NULL; 342 343 /* Convert to utf-8 first, works better with iconv */ 344 utf8_len = 0; 345 utf8_str = mac_utf16_to_utf8(from, fromLen, &utf8_len); 346 347 if (utf8_str) 348 { 349 /* We might be called before we have p_enc set up. */ 350 conv.vc_type = CONV_NONE; 351 352 /* If encoding (p_enc) is any unicode, it is actually in utf-8 (vim 353 * internal unicode is always utf-8) so don't convert in such cases */ 354 355 if ((enc_canon_props(p_enc) & ENC_UNICODE) == 0) 356 convert_setup(&conv, (char_u *)"utf-8", 357 p_enc? p_enc: (char_u *)"macroman"); 358 if (conv.vc_type == CONV_NONE) 359 { 360 /* p_enc is utf-8, so we're done. */ 361 result = utf8_str; 362 } 363 else 364 { 365 result = string_convert(&conv, utf8_str, (int *)&utf8_len); 366 vim_free(utf8_str); 367 } 368 369 convert_setup(&conv, NULL, NULL); 370 371 if (actualLen) 372 *actualLen = utf8_len; 373 } 374 else if (actualLen) 375 *actualLen = 0; 376 377 return result; 378 } 379 380 /* 381 * Conversion from 'encoding' to UTF-16 UniChars 382 * The function return uses the real type of UniChar (as typedef'ed in 383 * CFBase.h) to avoid clashes with X11 header files in the .pro file 384 */ 385 unsigned short * 386 mac_enc_to_utf16(from, fromLen, actualLen) 387 char_u *from; 388 size_t fromLen; 389 size_t *actualLen; 390 { 391 /* Following code borrows somewhat from os_mswin.c */ 392 vimconv_T conv; 393 size_t utf8_len; 394 char_u *utf8_str; 395 UniChar *result = NULL; 396 Boolean should_free_utf8 = FALSE; 397 398 do 399 { 400 /* Use MacRoman by default, we might be called before we have p_enc 401 * set up. Convert to utf-8 first, works better with iconv(). Does 402 * nothing if 'encoding' is "utf-8". */ 403 conv.vc_type = CONV_NONE; 404 if ((enc_canon_props(p_enc) & ENC_UNICODE) == 0 && 405 convert_setup(&conv, p_enc ? p_enc : (char_u *)"macroman", 406 (char_u *)"utf-8") == FAIL) 407 break; 408 409 if (conv.vc_type != CONV_NONE) 410 { 411 utf8_len = fromLen; 412 utf8_str = string_convert(&conv, from, (int *)&utf8_len); 413 should_free_utf8 = TRUE; 414 } 415 else 416 { 417 utf8_str = from; 418 utf8_len = fromLen; 419 } 420 421 if (utf8_str == NULL) 422 break; 423 424 convert_setup(&conv, NULL, NULL); 425 426 result = mac_utf8_to_utf16(utf8_str, utf8_len, actualLen); 427 428 if (should_free_utf8) 429 vim_free(utf8_str); 430 return result; 431 } 432 while (0); 433 434 if (actualLen) 435 *actualLen = 0; 436 437 return result; 438 } 439 440 /* 441 * Converts from UTF-16 UniChars to CFString 442 * The void * return type is actually a CFStringRef 443 */ 444 void * 445 mac_enc_to_cfstring(from, fromLen) 446 char_u *from; 447 size_t fromLen; 448 { 449 UniChar *utf16_str; 450 size_t utf16_len; 451 CFStringRef result = NULL; 452 453 utf16_str = mac_enc_to_utf16(from, fromLen, &utf16_len); 454 if (utf16_str) 455 { 456 result = CFStringCreateWithCharacters(NULL, utf16_str, utf16_len/sizeof(UniChar)); 457 vim_free(utf16_str); 458 } 459 460 return (void *)result; 461 } 462 463 /* 464 * Converts a decomposed HFS+ UTF-8 path to precomposed UTF-8 465 */ 466 char_u * 467 mac_precompose_path(decompPath, decompLen, precompLen) 468 char_u *decompPath; 469 size_t decompLen; 470 size_t *precompLen; 471 { 472 char_u *result = NULL; 473 size_t actualLen = 0; 474 475 if (gPathConverter) 476 { 477 result = alloc(decompLen); 478 if (result) 479 { 480 if (TECConvertText(gPathConverter, decompPath, 481 decompLen, &decompLen, result, 482 decompLen, &actualLen) != noErr) 483 { 484 vim_free(result); 485 result = NULL; 486 } 487 } 488 } 489 490 if (precompLen) 491 *precompLen = actualLen; 492 493 return result; 494 } 495 496 /* 497 * Converts from UTF-16 UniChars to precomposed UTF-8 498 */ 499 static char_u * 500 mac_utf16_to_utf8(from, fromLen, actualLen) 501 UniChar *from; 502 size_t fromLen; 503 size_t *actualLen; 504 { 505 ByteCount utf8_len; 506 ByteCount inputRead; 507 char_u *result; 508 509 if (gUTF16ToUTF8Converter) 510 { 511 result = alloc(fromLen * 6 + 1); 512 if (result && TECConvertText(gUTF16ToUTF8Converter, (ConstTextPtr)from, 513 fromLen, &inputRead, result, 514 (fromLen*6+1)*sizeof(char_u), &utf8_len) == noErr) 515 { 516 TECFlushText(gUTF16ToUTF8Converter, result, (fromLen*6+1)*sizeof(char_u), &inputRead); 517 utf8_len += inputRead; 518 } 519 else 520 { 521 vim_free(result); 522 result = NULL; 523 } 524 } 525 else 526 { 527 result = NULL; 528 } 529 530 if (actualLen) 531 *actualLen = result ? utf8_len : 0; 532 533 return result; 534 } 535 536 /* 537 * Converts from UTF-8 to UTF-16 UniChars 538 */ 539 static UniChar * 540 mac_utf8_to_utf16(from, fromLen, actualLen) 541 char_u *from; 542 size_t fromLen; 543 size_t *actualLen; 544 { 545 CFStringRef utf8_str; 546 CFRange convertRange; 547 UniChar *result = NULL; 548 549 utf8_str = CFStringCreateWithBytes(NULL, from, fromLen, 550 kCFStringEncodingUTF8, FALSE); 551 552 if (utf8_str == NULL) { 553 if (actualLen) 554 *actualLen = 0; 555 return NULL; 556 } 557 558 convertRange = CFRangeMake(0, CFStringGetLength(utf8_str)); 559 result = (UniChar *)alloc(convertRange.length * sizeof(UniChar)); 560 561 CFStringGetCharacters(utf8_str, convertRange, result); 562 563 CFRelease(utf8_str); 564 565 if (actualLen) 566 *actualLen = convertRange.length * sizeof(UniChar); 567 568 return result; 569 } 570 571 /* 572 * Sets LANG environment variable in Vim from Mac locale 573 */ 574 void 575 mac_lang_init() { 576 if (mch_getenv((char_u *)"LANG") == NULL) 577 { 578 char buf[20]; 579 if (LocaleRefGetPartString(NULL, 580 kLocaleLanguageMask | kLocaleLanguageVariantMask | 581 kLocaleRegionMask | kLocaleRegionVariantMask, 582 sizeof buf, buf) == noErr && *buf) 583 { 584 vim_setenv((char_u *)"LANG", (char_u *)buf); 585 # ifdef HAVE_LOCALE_H 586 setlocale(LC_ALL, ""); 587 # endif 588 } 589 } 590 } 591 #endif /* MACOS_CONVERT */ 592