1 /* vi:set ts=8 sts=4 sw=4 noet: 2 * 3 * VIM - Vi IMproved by Bram Moolenaar 4 * 5 * Do ":help uganda" in Vim to read copying and usage conditions. 6 * Do ":help credits" in Vim to see a list of people who contributed. 7 * See README.txt for an overview of the Vim source code. 8 */ 9 /* 10 * os_mac_conv.c: Code specifically for Mac string conversions. 11 * 12 * This code has been put in a separate file to avoid the conflicts that are 13 * caused by including both the X11 and Carbon header files. 14 */ 15 16 #define NO_X11_INCLUDES 17 18 #include "vim.h" 19 20 #if !defined(FEAT_GUI_MAC) && !defined(PROTO) 21 # include <CoreServices/CoreServices.h> 22 #endif 23 24 25 #if defined(MACOS_CONVERT) || defined(PROTO) 26 27 # ifdef PROTO 28 // A few dummy types to be able to generate function prototypes. 29 typedef int UniChar; 30 typedef int *TECObjectRef; 31 typedef int CFStringRef; 32 # endif 33 34 static char_u *mac_utf16_to_utf8(UniChar *from, size_t fromLen, size_t *actualLen); 35 static UniChar *mac_utf8_to_utf16(char_u *from, size_t fromLen, size_t *actualLen); 36 37 // Converter for composing decomposed HFS+ file paths 38 static TECObjectRef gPathConverter; 39 // Converter used by mac_utf16_to_utf8 40 static TECObjectRef gUTF16ToUTF8Converter; 41 42 /* 43 * A Mac version of string_convert_ext() for special cases. 44 */ 45 char_u * 46 mac_string_convert( 47 char_u *ptr, 48 int len, 49 int *lenp, 50 int fail_on_error, 51 int from_enc, 52 int to_enc, 53 int *unconvlenp) 54 { 55 char_u *retval, *d; 56 CFStringRef cfstr; 57 int buflen, in, out, l, i; 58 CFStringEncoding from; 59 CFStringEncoding to; 60 61 switch (from_enc) 62 { 63 case 'l': from = kCFStringEncodingISOLatin1; break; 64 case 'm': from = kCFStringEncodingMacRoman; break; 65 case 'u': from = kCFStringEncodingUTF8; break; 66 default: return NULL; 67 } 68 switch (to_enc) 69 { 70 case 'l': to = kCFStringEncodingISOLatin1; break; 71 case 'm': to = kCFStringEncodingMacRoman; break; 72 case 'u': to = kCFStringEncodingUTF8; break; 73 default: return NULL; 74 } 75 76 if (unconvlenp != NULL) 77 *unconvlenp = 0; 78 cfstr = CFStringCreateWithBytes(NULL, ptr, len, from, 0); 79 80 if (cfstr == NULL) 81 fprintf(stderr, "Encoding failed\n"); 82 // When conversion failed, try excluding bytes from the end, helps when 83 // there is an incomplete byte sequence. Only do up to 6 bytes to avoid 84 // looping a long time when there really is something unconvertible. 85 while (cfstr == NULL && unconvlenp != NULL && len > 1 && *unconvlenp < 6) 86 { 87 --len; 88 ++*unconvlenp; 89 cfstr = CFStringCreateWithBytes(NULL, ptr, len, from, 0); 90 } 91 if (cfstr == NULL) 92 return NULL; 93 94 if (to == kCFStringEncodingUTF8) 95 buflen = len * 6 + 1; 96 else 97 buflen = len + 1; 98 retval = alloc(buflen); 99 if (retval == NULL) 100 { 101 CFRelease(cfstr); 102 return NULL; 103 } 104 105 #if 0 106 CFRange convertRange = CFRangeMake(0, CFStringGetLength(cfstr)); 107 // Determine output buffer size 108 CFStringGetBytes(cfstr, convertRange, to, NULL, FALSE, NULL, 0, (CFIndex *)&buflen); 109 retval = (buflen > 0) ? alloc(buflen) : NULL; 110 if (retval == NULL) { 111 CFRelease(cfstr); 112 return NULL; 113 } 114 115 if (lenp) 116 *lenp = buflen / sizeof(char_u); 117 118 if (!CFStringGetBytes(cfstr, convertRange, to, NULL, FALSE, retval, buflen, NULL)) 119 #endif 120 if (!CFStringGetCString(cfstr, (char *)retval, buflen, to)) 121 { 122 CFRelease(cfstr); 123 if (fail_on_error) 124 { 125 vim_free(retval); 126 return NULL; 127 } 128 129 fprintf(stderr, "Trying char-by-char conversion...\n"); 130 // conversion failed for the whole string, but maybe it will work 131 // for each character 132 for (d = retval, in = 0, out = 0; in < len && out < buflen - 1;) 133 { 134 if (from == kCFStringEncodingUTF8) 135 l = utf_ptr2len(ptr + in); 136 else 137 l = 1; 138 cfstr = CFStringCreateWithBytes(NULL, ptr + in, l, from, 0); 139 if (cfstr == NULL) 140 { 141 *d++ = '?'; 142 out++; 143 } 144 else 145 { 146 if (!CFStringGetCString(cfstr, (char *)d, buflen - out, to)) 147 { 148 *d++ = '?'; 149 out++; 150 } 151 else 152 { 153 i = STRLEN(d); 154 d += i; 155 out += i; 156 } 157 CFRelease(cfstr); 158 } 159 in += l; 160 } 161 *d = NUL; 162 if (lenp != NULL) 163 *lenp = out; 164 return retval; 165 } 166 CFRelease(cfstr); 167 if (lenp != NULL) 168 *lenp = STRLEN(retval); 169 170 return retval; 171 } 172 173 /* 174 * Conversion from Apple MacRoman char encoding to UTF-8 or latin1, using 175 * standard Carbon framework. 176 * Input: "ptr[*sizep]". 177 * "real_size" is the size of the buffer that "ptr" points to. 178 * output is in-place, "sizep" is adjusted. 179 * Returns OK or FAIL. 180 */ 181 int 182 macroman2enc( 183 char_u *ptr, 184 long *sizep, 185 long real_size) 186 { 187 CFStringRef cfstr; 188 CFRange r; 189 CFIndex len = *sizep; 190 191 // MacRoman is an 8-bit encoding, no need to move bytes to 192 // conv_rest[]. 193 cfstr = CFStringCreateWithBytes(NULL, ptr, len, 194 kCFStringEncodingMacRoman, 0); 195 /* 196 * If there is a conversion error, try using another 197 * conversion. 198 */ 199 if (cfstr == NULL) 200 return FAIL; 201 202 r.location = 0; 203 r.length = CFStringGetLength(cfstr); 204 if (r.length != CFStringGetBytes(cfstr, r, 205 (enc_utf8) ? kCFStringEncodingUTF8 : kCFStringEncodingISOLatin1, 206 0, // no lossy conversion 207 0, // not external representation 208 ptr + *sizep, real_size - *sizep, &len)) 209 { 210 CFRelease(cfstr); 211 return FAIL; 212 } 213 CFRelease(cfstr); 214 mch_memmove(ptr, ptr + *sizep, len); 215 *sizep = len; 216 217 return OK; 218 } 219 220 /* 221 * Conversion from UTF-8 or latin1 to MacRoman. 222 * Input: "from[fromlen]" 223 * Output: "to[maxtolen]" length in "*tolenp" 224 * Unconverted rest in rest[*restlenp]. 225 * Returns OK or FAIL. 226 */ 227 int 228 enc2macroman( 229 char_u *from, 230 size_t fromlen, 231 char_u *to, 232 int *tolenp, 233 int maxtolen, 234 char_u *rest, 235 int *restlenp) 236 { 237 CFStringRef cfstr; 238 CFRange r; 239 CFIndex l; 240 241 *restlenp = 0; 242 cfstr = CFStringCreateWithBytes(NULL, from, fromlen, 243 (enc_utf8) ? kCFStringEncodingUTF8 : kCFStringEncodingISOLatin1, 244 0); 245 while (cfstr == NULL && *restlenp < 3 && fromlen > 1) 246 { 247 rest[*restlenp++] = from[--fromlen]; 248 cfstr = CFStringCreateWithBytes(NULL, from, fromlen, 249 (enc_utf8) ? kCFStringEncodingUTF8 : kCFStringEncodingISOLatin1, 250 0); 251 } 252 if (cfstr == NULL) 253 return FAIL; 254 255 r.location = 0; 256 r.length = CFStringGetLength(cfstr); 257 if (r.length != CFStringGetBytes(cfstr, r, 258 kCFStringEncodingMacRoman, 259 0, // no lossy conversion 260 0, // not external representation (since vim 261 // handles this internally 262 to, maxtolen, &l)) 263 { 264 CFRelease(cfstr); 265 return FAIL; 266 } 267 CFRelease(cfstr); 268 *tolenp = l; 269 return OK; 270 } 271 272 /* 273 * Initializes text converters 274 */ 275 void 276 mac_conv_init(void) 277 { 278 TextEncoding utf8_encoding; 279 TextEncoding utf8_hfsplus_encoding; 280 TextEncoding utf8_canon_encoding; 281 TextEncoding utf16_encoding; 282 283 utf8_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault, 284 kTextEncodingDefaultVariant, kUnicodeUTF8Format); 285 utf8_hfsplus_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault, 286 kUnicodeHFSPlusCompVariant, kUnicodeUTF8Format); 287 utf8_canon_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault, 288 kUnicodeCanonicalCompVariant, kUnicodeUTF8Format); 289 utf16_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault, 290 kTextEncodingDefaultVariant, kUnicode16BitFormat); 291 292 if (TECCreateConverter(&gPathConverter, utf8_encoding, 293 utf8_hfsplus_encoding) != noErr) 294 gPathConverter = NULL; 295 296 if (TECCreateConverter(&gUTF16ToUTF8Converter, utf16_encoding, 297 utf8_canon_encoding) != noErr) 298 { 299 // On pre-10.3, Unicode normalization is not available so 300 // fall back to non-normalizing converter 301 if (TECCreateConverter(&gUTF16ToUTF8Converter, utf16_encoding, 302 utf8_encoding) != noErr) 303 gUTF16ToUTF8Converter = NULL; 304 } 305 } 306 307 /* 308 * Destroys text converters 309 */ 310 void 311 mac_conv_cleanup(void) 312 { 313 if (gUTF16ToUTF8Converter) 314 { 315 TECDisposeConverter(gUTF16ToUTF8Converter); 316 gUTF16ToUTF8Converter = NULL; 317 } 318 319 if (gPathConverter) 320 { 321 TECDisposeConverter(gPathConverter); 322 gPathConverter = NULL; 323 } 324 } 325 326 /* 327 * Conversion from UTF-16 UniChars to 'encoding' 328 * The function signature uses the real type of UniChar (as typedef'ed in 329 * CFBase.h) to avoid clashes with X11 header files in the .pro file 330 */ 331 char_u * 332 mac_utf16_to_enc( 333 unsigned short *from, 334 size_t fromLen, 335 size_t *actualLen) 336 { 337 // Following code borrows somewhat from os_mswin.c 338 vimconv_T conv; 339 size_t utf8_len; 340 char_u *utf8_str; 341 char_u *result = NULL; 342 343 // Convert to utf-8 first, works better with iconv 344 utf8_len = 0; 345 utf8_str = mac_utf16_to_utf8(from, fromLen, &utf8_len); 346 347 if (utf8_str) 348 { 349 // We might be called before we have p_enc set up. 350 conv.vc_type = CONV_NONE; 351 352 // If encoding (p_enc) is any unicode, it is actually in utf-8 (vim 353 // internal unicode is always utf-8) so don't convert in such cases 354 355 if ((enc_canon_props(p_enc) & ENC_UNICODE) == 0) 356 convert_setup(&conv, (char_u *)"utf-8", 357 p_enc? p_enc: (char_u *)"macroman"); 358 if (conv.vc_type == CONV_NONE) 359 { 360 // p_enc is utf-8, so we're done. 361 result = utf8_str; 362 } 363 else 364 { 365 result = string_convert(&conv, utf8_str, (int *)&utf8_len); 366 vim_free(utf8_str); 367 } 368 369 convert_setup(&conv, NULL, NULL); 370 371 if (actualLen) 372 *actualLen = utf8_len; 373 } 374 else if (actualLen) 375 *actualLen = 0; 376 377 return result; 378 } 379 380 /* 381 * Conversion from 'encoding' to UTF-16 UniChars 382 * The function return uses the real type of UniChar (as typedef'ed in 383 * CFBase.h) to avoid clashes with X11 header files in the .pro file 384 */ 385 unsigned short * 386 mac_enc_to_utf16( 387 char_u *from, 388 size_t fromLen, 389 size_t *actualLen) 390 { 391 // Following code borrows somewhat from os_mswin.c 392 vimconv_T conv; 393 size_t utf8_len; 394 char_u *utf8_str; 395 UniChar *result = NULL; 396 Boolean should_free_utf8 = FALSE; 397 398 do 399 { 400 // Use MacRoman by default, we might be called before we have p_enc 401 // set up. Convert to utf-8 first, works better with iconv(). Does 402 // nothing if 'encoding' is "utf-8". 403 conv.vc_type = CONV_NONE; 404 if ((enc_canon_props(p_enc) & ENC_UNICODE) == 0 && 405 convert_setup(&conv, p_enc ? p_enc : (char_u *)"macroman", 406 (char_u *)"utf-8") == FAIL) 407 break; 408 409 if (conv.vc_type != CONV_NONE) 410 { 411 utf8_len = fromLen; 412 utf8_str = string_convert(&conv, from, (int *)&utf8_len); 413 should_free_utf8 = TRUE; 414 } 415 else 416 { 417 utf8_str = from; 418 utf8_len = fromLen; 419 } 420 421 if (utf8_str == NULL) 422 break; 423 424 convert_setup(&conv, NULL, NULL); 425 426 result = mac_utf8_to_utf16(utf8_str, utf8_len, actualLen); 427 428 if (should_free_utf8) 429 vim_free(utf8_str); 430 return result; 431 } 432 while (0); 433 434 if (actualLen) 435 *actualLen = 0; 436 437 return result; 438 } 439 440 /* 441 * Converts from UTF-16 UniChars to CFString 442 * The void * return type is actually a CFStringRef 443 */ 444 void * 445 mac_enc_to_cfstring( 446 char_u *from, 447 size_t fromLen) 448 { 449 UniChar *utf16_str; 450 size_t utf16_len; 451 CFStringRef result = NULL; 452 453 utf16_str = mac_enc_to_utf16(from, fromLen, &utf16_len); 454 if (utf16_str) 455 { 456 result = CFStringCreateWithCharacters(NULL, utf16_str, utf16_len/sizeof(UniChar)); 457 vim_free(utf16_str); 458 } 459 460 return (void *)result; 461 } 462 463 /* 464 * Converts a decomposed HFS+ UTF-8 path to precomposed UTF-8 465 */ 466 char_u * 467 mac_precompose_path( 468 char_u *decompPath, 469 size_t decompLen, 470 size_t *precompLen) 471 { 472 char_u *result = NULL; 473 size_t actualLen = 0; 474 475 if (gPathConverter) 476 { 477 result = alloc(decompLen); 478 if (result) 479 { 480 if (TECConvertText(gPathConverter, decompPath, 481 decompLen, &decompLen, result, 482 decompLen, &actualLen) != noErr) 483 VIM_CLEAR(result); 484 } 485 } 486 487 if (precompLen) 488 *precompLen = actualLen; 489 490 return result; 491 } 492 493 /* 494 * Converts from UTF-16 UniChars to precomposed UTF-8 495 */ 496 static char_u * 497 mac_utf16_to_utf8( 498 UniChar *from, 499 size_t fromLen, 500 size_t *actualLen) 501 { 502 ByteCount utf8_len; 503 ByteCount inputRead; 504 char_u *result; 505 506 if (gUTF16ToUTF8Converter) 507 { 508 result = alloc(fromLen * 6 + 1); 509 if (result && TECConvertText(gUTF16ToUTF8Converter, (ConstTextPtr)from, 510 fromLen, &inputRead, result, 511 (fromLen*6+1)*sizeof(char_u), &utf8_len) == noErr) 512 { 513 TECFlushText(gUTF16ToUTF8Converter, result, (fromLen*6+1)*sizeof(char_u), &inputRead); 514 utf8_len += inputRead; 515 } 516 else 517 VIM_CLEAR(result); 518 } 519 else 520 { 521 result = NULL; 522 } 523 524 if (actualLen) 525 *actualLen = result ? utf8_len : 0; 526 527 return result; 528 } 529 530 /* 531 * Converts from UTF-8 to UTF-16 UniChars 532 */ 533 static UniChar * 534 mac_utf8_to_utf16( 535 char_u *from, 536 size_t fromLen, 537 size_t *actualLen) 538 { 539 CFStringRef utf8_str; 540 CFRange convertRange; 541 UniChar *result = NULL; 542 543 utf8_str = CFStringCreateWithBytes(NULL, from, fromLen, 544 kCFStringEncodingUTF8, FALSE); 545 546 if (utf8_str == NULL) { 547 if (actualLen) 548 *actualLen = 0; 549 return NULL; 550 } 551 552 convertRange = CFRangeMake(0, CFStringGetLength(utf8_str)); 553 result = ALLOC_MULT(UniChar, convertRange.length); 554 555 CFStringGetCharacters(utf8_str, convertRange, result); 556 557 CFRelease(utf8_str); 558 559 if (actualLen) 560 *actualLen = convertRange.length * sizeof(UniChar); 561 562 return result; 563 } 564 565 /* 566 * Sets LANG environment variable in Vim from Mac locale 567 */ 568 void 569 mac_lang_init(void) 570 { 571 if (mch_getenv((char_u *)"LANG") == NULL) 572 { 573 char buf[20]; 574 if (LocaleRefGetPartString(NULL, 575 kLocaleLanguageMask | kLocaleLanguageVariantMask | 576 kLocaleRegionMask | kLocaleRegionVariantMask, 577 sizeof buf, buf) == noErr && *buf) 578 { 579 vim_setenv((char_u *)"LANG", (char_u *)buf); 580 # ifdef HAVE_LOCALE_H 581 setlocale(LC_ALL, ""); 582 # endif 583 } 584 } 585 } 586 #endif // MACOS_CONVERT 587