1 /* vi:set ts=8 sts=4 sw=4: 2 * 3 * VIM - Vi IMproved by Bram Moolenaar 4 * 5 * Do ":help uganda" in Vim to read copying and usage conditions. 6 * Do ":help credits" in Vim to see a list of people who contributed. 7 * See README.txt for an overview of the Vim source code. 8 */ 9 /* 10 * os_mac_conv.c: Code specifically for Mac string conversions. 11 * 12 * This code has been put in a separate file to avoid the conflicts that are 13 * caused by including both the X11 and Carbon header files. 14 */ 15 16 #define NO_X11_INCLUDES 17 #define BalloonEval int /* used in header files */ 18 19 #include "vim.h" 20 21 #if !defined(FEAT_GUI_MAC) && !defined(PROTO) 22 # include <CoreServices/CoreServices.h> 23 #endif 24 25 26 #if defined(MACOS_CONVERT) || defined(PROTO) 27 28 # ifdef PROTO 29 /* A few dummy types to be able to generate function prototypes. */ 30 typedef int UniChar; 31 typedef int *TECObjectRef; 32 typedef int CFStringRef; 33 # endif 34 35 static char_u *mac_utf16_to_utf8(UniChar *from, size_t fromLen, size_t *actualLen); 36 static UniChar *mac_utf8_to_utf16(char_u *from, size_t fromLen, size_t *actualLen); 37 38 /* Converter for composing decomposed HFS+ file paths */ 39 static TECObjectRef gPathConverter; 40 /* Converter used by mac_utf16_to_utf8 */ 41 static TECObjectRef gUTF16ToUTF8Converter; 42 43 /* 44 * A Mac version of string_convert_ext() for special cases. 45 */ 46 char_u * 47 mac_string_convert( 48 char_u *ptr, 49 int len, 50 int *lenp, 51 int fail_on_error, 52 int from_enc, 53 int to_enc, 54 int *unconvlenp) 55 { 56 char_u *retval, *d; 57 CFStringRef cfstr; 58 int buflen, in, out, l, i; 59 CFStringEncoding from; 60 CFStringEncoding to; 61 62 switch (from_enc) 63 { 64 case 'l': from = kCFStringEncodingISOLatin1; break; 65 case 'm': from = kCFStringEncodingMacRoman; break; 66 case 'u': from = kCFStringEncodingUTF8; break; 67 default: return NULL; 68 } 69 switch (to_enc) 70 { 71 case 'l': to = kCFStringEncodingISOLatin1; break; 72 case 'm': to = kCFStringEncodingMacRoman; break; 73 case 'u': to = kCFStringEncodingUTF8; break; 74 default: return NULL; 75 } 76 77 if (unconvlenp != NULL) 78 *unconvlenp = 0; 79 cfstr = CFStringCreateWithBytes(NULL, ptr, len, from, 0); 80 81 if (cfstr == NULL) 82 fprintf(stderr, "Encoding failed\n"); 83 /* When conversion failed, try excluding bytes from the end, helps when 84 * there is an incomplete byte sequence. Only do up to 6 bytes to avoid 85 * looping a long time when there really is something unconvertible. */ 86 while (cfstr == NULL && unconvlenp != NULL && len > 1 && *unconvlenp < 6) 87 { 88 --len; 89 ++*unconvlenp; 90 cfstr = CFStringCreateWithBytes(NULL, ptr, len, from, 0); 91 } 92 if (cfstr == NULL) 93 return NULL; 94 95 if (to == kCFStringEncodingUTF8) 96 buflen = len * 6 + 1; 97 else 98 buflen = len + 1; 99 retval = alloc(buflen); 100 if (retval == NULL) 101 { 102 CFRelease(cfstr); 103 return NULL; 104 } 105 106 #if 0 107 CFRange convertRange = CFRangeMake(0, CFStringGetLength(cfstr)); 108 /* Determine output buffer size */ 109 CFStringGetBytes(cfstr, convertRange, to, NULL, FALSE, NULL, 0, (CFIndex *)&buflen); 110 retval = (buflen > 0) ? alloc(buflen) : NULL; 111 if (retval == NULL) { 112 CFRelease(cfstr); 113 return NULL; 114 } 115 116 if (lenp) 117 *lenp = buflen / sizeof(char_u); 118 119 if (!CFStringGetBytes(cfstr, convertRange, to, NULL, FALSE, retval, buflen, NULL)) 120 #endif 121 if (!CFStringGetCString(cfstr, (char *)retval, buflen, to)) 122 { 123 CFRelease(cfstr); 124 if (fail_on_error) 125 { 126 vim_free(retval); 127 return NULL; 128 } 129 130 fprintf(stderr, "Trying char-by-char conversion...\n"); 131 /* conversion failed for the whole string, but maybe it will work 132 * for each character */ 133 for (d = retval, in = 0, out = 0; in < len && out < buflen - 1;) 134 { 135 if (from == kCFStringEncodingUTF8) 136 l = utf_ptr2len(ptr + in); 137 else 138 l = 1; 139 cfstr = CFStringCreateWithBytes(NULL, ptr + in, l, from, 0); 140 if (cfstr == NULL) 141 { 142 *d++ = '?'; 143 out++; 144 } 145 else 146 { 147 if (!CFStringGetCString(cfstr, (char *)d, buflen - out, to)) 148 { 149 *d++ = '?'; 150 out++; 151 } 152 else 153 { 154 i = STRLEN(d); 155 d += i; 156 out += i; 157 } 158 CFRelease(cfstr); 159 } 160 in += l; 161 } 162 *d = NUL; 163 if (lenp != NULL) 164 *lenp = out; 165 return retval; 166 } 167 CFRelease(cfstr); 168 if (lenp != NULL) 169 *lenp = STRLEN(retval); 170 171 return retval; 172 } 173 174 /* 175 * Conversion from Apple MacRoman char encoding to UTF-8 or latin1, using 176 * standard Carbon framework. 177 * Input: "ptr[*sizep]". 178 * "real_size" is the size of the buffer that "ptr" points to. 179 * output is in-place, "sizep" is adjusted. 180 * Returns OK or FAIL. 181 */ 182 int 183 macroman2enc( 184 char_u *ptr, 185 long *sizep, 186 long real_size) 187 { 188 CFStringRef cfstr; 189 CFRange r; 190 CFIndex len = *sizep; 191 192 /* MacRoman is an 8-bit encoding, no need to move bytes to 193 * conv_rest[]. */ 194 cfstr = CFStringCreateWithBytes(NULL, ptr, len, 195 kCFStringEncodingMacRoman, 0); 196 /* 197 * If there is a conversion error, try using another 198 * conversion. 199 */ 200 if (cfstr == NULL) 201 return FAIL; 202 203 r.location = 0; 204 r.length = CFStringGetLength(cfstr); 205 if (r.length != CFStringGetBytes(cfstr, r, 206 (enc_utf8) ? kCFStringEncodingUTF8 : kCFStringEncodingISOLatin1, 207 0, /* no lossy conversion */ 208 0, /* not external representation */ 209 ptr + *sizep, real_size - *sizep, &len)) 210 { 211 CFRelease(cfstr); 212 return FAIL; 213 } 214 CFRelease(cfstr); 215 mch_memmove(ptr, ptr + *sizep, len); 216 *sizep = len; 217 218 return OK; 219 } 220 221 /* 222 * Conversion from UTF-8 or latin1 to MacRoman. 223 * Input: "from[fromlen]" 224 * Output: "to[maxtolen]" length in "*tolenp" 225 * Unconverted rest in rest[*restlenp]. 226 * Returns OK or FAIL. 227 */ 228 int 229 enc2macroman( 230 char_u *from, 231 size_t fromlen, 232 char_u *to, 233 int *tolenp, 234 int maxtolen, 235 char_u *rest, 236 int *restlenp) 237 { 238 CFStringRef cfstr; 239 CFRange r; 240 CFIndex l; 241 242 *restlenp = 0; 243 cfstr = CFStringCreateWithBytes(NULL, from, fromlen, 244 (enc_utf8) ? kCFStringEncodingUTF8 : kCFStringEncodingISOLatin1, 245 0); 246 while (cfstr == NULL && *restlenp < 3 && fromlen > 1) 247 { 248 rest[*restlenp++] = from[--fromlen]; 249 cfstr = CFStringCreateWithBytes(NULL, from, fromlen, 250 (enc_utf8) ? kCFStringEncodingUTF8 : kCFStringEncodingISOLatin1, 251 0); 252 } 253 if (cfstr == NULL) 254 return FAIL; 255 256 r.location = 0; 257 r.length = CFStringGetLength(cfstr); 258 if (r.length != CFStringGetBytes(cfstr, r, 259 kCFStringEncodingMacRoman, 260 0, /* no lossy conversion */ 261 0, /* not external representation (since vim 262 * handles this internally */ 263 to, maxtolen, &l)) 264 { 265 CFRelease(cfstr); 266 return FAIL; 267 } 268 CFRelease(cfstr); 269 *tolenp = l; 270 return OK; 271 } 272 273 /* 274 * Initializes text converters 275 */ 276 void 277 mac_conv_init(void) 278 { 279 TextEncoding utf8_encoding; 280 TextEncoding utf8_hfsplus_encoding; 281 TextEncoding utf8_canon_encoding; 282 TextEncoding utf16_encoding; 283 284 utf8_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault, 285 kTextEncodingDefaultVariant, kUnicodeUTF8Format); 286 utf8_hfsplus_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault, 287 kUnicodeHFSPlusCompVariant, kUnicodeUTF8Format); 288 utf8_canon_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault, 289 kUnicodeCanonicalCompVariant, kUnicodeUTF8Format); 290 utf16_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault, 291 kTextEncodingDefaultVariant, kUnicode16BitFormat); 292 293 if (TECCreateConverter(&gPathConverter, utf8_encoding, 294 utf8_hfsplus_encoding) != noErr) 295 gPathConverter = NULL; 296 297 if (TECCreateConverter(&gUTF16ToUTF8Converter, utf16_encoding, 298 utf8_canon_encoding) != noErr) 299 { 300 /* On pre-10.3, Unicode normalization is not available so 301 * fall back to non-normalizing converter */ 302 if (TECCreateConverter(&gUTF16ToUTF8Converter, utf16_encoding, 303 utf8_encoding) != noErr) 304 gUTF16ToUTF8Converter = NULL; 305 } 306 } 307 308 /* 309 * Destroys text converters 310 */ 311 void 312 mac_conv_cleanup(void) 313 { 314 if (gUTF16ToUTF8Converter) 315 { 316 TECDisposeConverter(gUTF16ToUTF8Converter); 317 gUTF16ToUTF8Converter = NULL; 318 } 319 320 if (gPathConverter) 321 { 322 TECDisposeConverter(gPathConverter); 323 gPathConverter = NULL; 324 } 325 } 326 327 /* 328 * Conversion from UTF-16 UniChars to 'encoding' 329 * The function signature uses the real type of UniChar (as typedef'ed in 330 * CFBase.h) to avoid clashes with X11 header files in the .pro file 331 */ 332 char_u * 333 mac_utf16_to_enc( 334 unsigned short *from, 335 size_t fromLen, 336 size_t *actualLen) 337 { 338 /* Following code borrows somewhat from os_mswin.c */ 339 vimconv_T conv; 340 size_t utf8_len; 341 char_u *utf8_str; 342 char_u *result = NULL; 343 344 /* Convert to utf-8 first, works better with iconv */ 345 utf8_len = 0; 346 utf8_str = mac_utf16_to_utf8(from, fromLen, &utf8_len); 347 348 if (utf8_str) 349 { 350 /* We might be called before we have p_enc set up. */ 351 conv.vc_type = CONV_NONE; 352 353 /* If encoding (p_enc) is any unicode, it is actually in utf-8 (vim 354 * internal unicode is always utf-8) so don't convert in such cases */ 355 356 if ((enc_canon_props(p_enc) & ENC_UNICODE) == 0) 357 convert_setup(&conv, (char_u *)"utf-8", 358 p_enc? p_enc: (char_u *)"macroman"); 359 if (conv.vc_type == CONV_NONE) 360 { 361 /* p_enc is utf-8, so we're done. */ 362 result = utf8_str; 363 } 364 else 365 { 366 result = string_convert(&conv, utf8_str, (int *)&utf8_len); 367 vim_free(utf8_str); 368 } 369 370 convert_setup(&conv, NULL, NULL); 371 372 if (actualLen) 373 *actualLen = utf8_len; 374 } 375 else if (actualLen) 376 *actualLen = 0; 377 378 return result; 379 } 380 381 /* 382 * Conversion from 'encoding' to UTF-16 UniChars 383 * The function return uses the real type of UniChar (as typedef'ed in 384 * CFBase.h) to avoid clashes with X11 header files in the .pro file 385 */ 386 unsigned short * 387 mac_enc_to_utf16( 388 char_u *from, 389 size_t fromLen, 390 size_t *actualLen) 391 { 392 /* Following code borrows somewhat from os_mswin.c */ 393 vimconv_T conv; 394 size_t utf8_len; 395 char_u *utf8_str; 396 UniChar *result = NULL; 397 Boolean should_free_utf8 = FALSE; 398 399 do 400 { 401 /* Use MacRoman by default, we might be called before we have p_enc 402 * set up. Convert to utf-8 first, works better with iconv(). Does 403 * nothing if 'encoding' is "utf-8". */ 404 conv.vc_type = CONV_NONE; 405 if ((enc_canon_props(p_enc) & ENC_UNICODE) == 0 && 406 convert_setup(&conv, p_enc ? p_enc : (char_u *)"macroman", 407 (char_u *)"utf-8") == FAIL) 408 break; 409 410 if (conv.vc_type != CONV_NONE) 411 { 412 utf8_len = fromLen; 413 utf8_str = string_convert(&conv, from, (int *)&utf8_len); 414 should_free_utf8 = TRUE; 415 } 416 else 417 { 418 utf8_str = from; 419 utf8_len = fromLen; 420 } 421 422 if (utf8_str == NULL) 423 break; 424 425 convert_setup(&conv, NULL, NULL); 426 427 result = mac_utf8_to_utf16(utf8_str, utf8_len, actualLen); 428 429 if (should_free_utf8) 430 vim_free(utf8_str); 431 return result; 432 } 433 while (0); 434 435 if (actualLen) 436 *actualLen = 0; 437 438 return result; 439 } 440 441 /* 442 * Converts from UTF-16 UniChars to CFString 443 * The void * return type is actually a CFStringRef 444 */ 445 void * 446 mac_enc_to_cfstring( 447 char_u *from, 448 size_t fromLen) 449 { 450 UniChar *utf16_str; 451 size_t utf16_len; 452 CFStringRef result = NULL; 453 454 utf16_str = mac_enc_to_utf16(from, fromLen, &utf16_len); 455 if (utf16_str) 456 { 457 result = CFStringCreateWithCharacters(NULL, utf16_str, utf16_len/sizeof(UniChar)); 458 vim_free(utf16_str); 459 } 460 461 return (void *)result; 462 } 463 464 /* 465 * Converts a decomposed HFS+ UTF-8 path to precomposed UTF-8 466 */ 467 char_u * 468 mac_precompose_path( 469 char_u *decompPath, 470 size_t decompLen, 471 size_t *precompLen) 472 { 473 char_u *result = NULL; 474 size_t actualLen = 0; 475 476 if (gPathConverter) 477 { 478 result = alloc(decompLen); 479 if (result) 480 { 481 if (TECConvertText(gPathConverter, decompPath, 482 decompLen, &decompLen, result, 483 decompLen, &actualLen) != noErr) 484 { 485 vim_free(result); 486 result = NULL; 487 } 488 } 489 } 490 491 if (precompLen) 492 *precompLen = actualLen; 493 494 return result; 495 } 496 497 /* 498 * Converts from UTF-16 UniChars to precomposed UTF-8 499 */ 500 static char_u * 501 mac_utf16_to_utf8( 502 UniChar *from, 503 size_t fromLen, 504 size_t *actualLen) 505 { 506 ByteCount utf8_len; 507 ByteCount inputRead; 508 char_u *result; 509 510 if (gUTF16ToUTF8Converter) 511 { 512 result = alloc(fromLen * 6 + 1); 513 if (result && TECConvertText(gUTF16ToUTF8Converter, (ConstTextPtr)from, 514 fromLen, &inputRead, result, 515 (fromLen*6+1)*sizeof(char_u), &utf8_len) == noErr) 516 { 517 TECFlushText(gUTF16ToUTF8Converter, result, (fromLen*6+1)*sizeof(char_u), &inputRead); 518 utf8_len += inputRead; 519 } 520 else 521 { 522 vim_free(result); 523 result = NULL; 524 } 525 } 526 else 527 { 528 result = NULL; 529 } 530 531 if (actualLen) 532 *actualLen = result ? utf8_len : 0; 533 534 return result; 535 } 536 537 /* 538 * Converts from UTF-8 to UTF-16 UniChars 539 */ 540 static UniChar * 541 mac_utf8_to_utf16( 542 char_u *from, 543 size_t fromLen, 544 size_t *actualLen) 545 { 546 CFStringRef utf8_str; 547 CFRange convertRange; 548 UniChar *result = NULL; 549 550 utf8_str = CFStringCreateWithBytes(NULL, from, fromLen, 551 kCFStringEncodingUTF8, FALSE); 552 553 if (utf8_str == NULL) { 554 if (actualLen) 555 *actualLen = 0; 556 return NULL; 557 } 558 559 convertRange = CFRangeMake(0, CFStringGetLength(utf8_str)); 560 result = (UniChar *)alloc(convertRange.length * sizeof(UniChar)); 561 562 CFStringGetCharacters(utf8_str, convertRange, result); 563 564 CFRelease(utf8_str); 565 566 if (actualLen) 567 *actualLen = convertRange.length * sizeof(UniChar); 568 569 return result; 570 } 571 572 /* 573 * Sets LANG environment variable in Vim from Mac locale 574 */ 575 void 576 mac_lang_init(void) 577 { 578 if (mch_getenv((char_u *)"LANG") == NULL) 579 { 580 char buf[20]; 581 if (LocaleRefGetPartString(NULL, 582 kLocaleLanguageMask | kLocaleLanguageVariantMask | 583 kLocaleRegionMask | kLocaleRegionVariantMask, 584 sizeof buf, buf) == noErr && *buf) 585 { 586 vim_setenv((char_u *)"LANG", (char_u *)buf); 587 # ifdef HAVE_LOCALE_H 588 setlocale(LC_ALL, ""); 589 # endif 590 } 591 } 592 } 593 #endif /* MACOS_CONVERT */ 594