1 /* vi:set ts=8 sts=4 sw=4 noet:
2 *
3 * VIM - Vi IMproved by Bram Moolenaar
4 *
5 * Do ":help uganda" in Vim to read copying and usage conditions.
6 * Do ":help credits" in Vim to see a list of people who contributed.
7 * See README.txt for an overview of the Vim source code.
8 */
9 /*
10 * os_mac_conv.c: Code specifically for Mac string conversions.
11 *
12 * This code has been put in a separate file to avoid the conflicts that are
13 * caused by including both the X11 and Carbon header files.
14 */
15
16 #define NO_X11_INCLUDES
17
18 #include "vim.h"
19
20 #if !defined(PROTO)
21 # include <CoreServices/CoreServices.h>
22 #endif
23
24
25 #if defined(MACOS_CONVERT) || defined(PROTO)
26
27 # ifdef PROTO
28 // A few dummy types to be able to generate function prototypes.
29 typedef int UniChar;
30 typedef int *TECObjectRef;
31 typedef int CFStringRef;
32 # endif
33
34 static char_u *mac_utf16_to_utf8(UniChar *from, size_t fromLen, size_t *actualLen);
35 static UniChar *mac_utf8_to_utf16(char_u *from, size_t fromLen, size_t *actualLen);
36
37 // Converter for composing decomposed HFS+ file paths
38 static TECObjectRef gPathConverter;
39 // Converter used by mac_utf16_to_utf8
40 static TECObjectRef gUTF16ToUTF8Converter;
41
42 /*
43 * A Mac version of string_convert_ext() for special cases.
44 */
45 char_u *
mac_string_convert(char_u * ptr,int len,int * lenp,int fail_on_error,int from_enc,int to_enc,int * unconvlenp)46 mac_string_convert(
47 char_u *ptr,
48 int len,
49 int *lenp,
50 int fail_on_error,
51 int from_enc,
52 int to_enc,
53 int *unconvlenp)
54 {
55 char_u *retval, *d;
56 CFStringRef cfstr;
57 int buflen, in, out, l, i;
58 CFStringEncoding from;
59 CFStringEncoding to;
60
61 switch (from_enc)
62 {
63 case 'l': from = kCFStringEncodingISOLatin1; break;
64 case 'm': from = kCFStringEncodingMacRoman; break;
65 case 'u': from = kCFStringEncodingUTF8; break;
66 default: return NULL;
67 }
68 switch (to_enc)
69 {
70 case 'l': to = kCFStringEncodingISOLatin1; break;
71 case 'm': to = kCFStringEncodingMacRoman; break;
72 case 'u': to = kCFStringEncodingUTF8; break;
73 default: return NULL;
74 }
75
76 if (unconvlenp != NULL)
77 *unconvlenp = 0;
78 cfstr = CFStringCreateWithBytes(NULL, ptr, len, from, 0);
79
80 if (cfstr == NULL)
81 fprintf(stderr, "Encoding failed\n");
82 // When conversion failed, try excluding bytes from the end, helps when
83 // there is an incomplete byte sequence. Only do up to 6 bytes to avoid
84 // looping a long time when there really is something unconvertible.
85 while (cfstr == NULL && unconvlenp != NULL && len > 1 && *unconvlenp < 6)
86 {
87 --len;
88 ++*unconvlenp;
89 cfstr = CFStringCreateWithBytes(NULL, ptr, len, from, 0);
90 }
91 if (cfstr == NULL)
92 return NULL;
93
94 if (to == kCFStringEncodingUTF8)
95 buflen = len * 6 + 1;
96 else
97 buflen = len + 1;
98 retval = alloc(buflen);
99 if (retval == NULL)
100 {
101 CFRelease(cfstr);
102 return NULL;
103 }
104
105 #if 0
106 CFRange convertRange = CFRangeMake(0, CFStringGetLength(cfstr));
107 // Determine output buffer size
108 CFStringGetBytes(cfstr, convertRange, to, NULL, FALSE, NULL, 0, (CFIndex *)&buflen);
109 retval = (buflen > 0) ? alloc(buflen) : NULL;
110 if (retval == NULL) {
111 CFRelease(cfstr);
112 return NULL;
113 }
114
115 if (lenp)
116 *lenp = buflen / sizeof(char_u);
117
118 if (!CFStringGetBytes(cfstr, convertRange, to, NULL, FALSE, retval, buflen, NULL))
119 #endif
120 if (!CFStringGetCString(cfstr, (char *)retval, buflen, to))
121 {
122 CFRelease(cfstr);
123 if (fail_on_error)
124 {
125 vim_free(retval);
126 return NULL;
127 }
128
129 fprintf(stderr, "Trying char-by-char conversion...\n");
130 // conversion failed for the whole string, but maybe it will work
131 // for each character
132 for (d = retval, in = 0, out = 0; in < len && out < buflen - 1;)
133 {
134 if (from == kCFStringEncodingUTF8)
135 l = utf_ptr2len(ptr + in);
136 else
137 l = 1;
138 cfstr = CFStringCreateWithBytes(NULL, ptr + in, l, from, 0);
139 if (cfstr == NULL)
140 {
141 *d++ = '?';
142 out++;
143 }
144 else
145 {
146 if (!CFStringGetCString(cfstr, (char *)d, buflen - out, to))
147 {
148 *d++ = '?';
149 out++;
150 }
151 else
152 {
153 i = STRLEN(d);
154 d += i;
155 out += i;
156 }
157 CFRelease(cfstr);
158 }
159 in += l;
160 }
161 *d = NUL;
162 if (lenp != NULL)
163 *lenp = out;
164 return retval;
165 }
166 CFRelease(cfstr);
167 if (lenp != NULL)
168 *lenp = STRLEN(retval);
169
170 return retval;
171 }
172
173 /*
174 * Conversion from Apple MacRoman char encoding to UTF-8 or latin1, using
175 * standard Carbon framework.
176 * Input: "ptr[*sizep]".
177 * "real_size" is the size of the buffer that "ptr" points to.
178 * output is in-place, "sizep" is adjusted.
179 * Returns OK or FAIL.
180 */
181 int
macroman2enc(char_u * ptr,long * sizep,long real_size)182 macroman2enc(
183 char_u *ptr,
184 long *sizep,
185 long real_size)
186 {
187 CFStringRef cfstr;
188 CFRange r;
189 CFIndex len = *sizep;
190
191 // MacRoman is an 8-bit encoding, no need to move bytes to
192 // conv_rest[].
193 cfstr = CFStringCreateWithBytes(NULL, ptr, len,
194 kCFStringEncodingMacRoman, 0);
195 /*
196 * If there is a conversion error, try using another
197 * conversion.
198 */
199 if (cfstr == NULL)
200 return FAIL;
201
202 r.location = 0;
203 r.length = CFStringGetLength(cfstr);
204 if (r.length != CFStringGetBytes(cfstr, r,
205 (enc_utf8) ? kCFStringEncodingUTF8 : kCFStringEncodingISOLatin1,
206 0, // no lossy conversion
207 0, // not external representation
208 ptr + *sizep, real_size - *sizep, &len))
209 {
210 CFRelease(cfstr);
211 return FAIL;
212 }
213 CFRelease(cfstr);
214 mch_memmove(ptr, ptr + *sizep, len);
215 *sizep = len;
216
217 return OK;
218 }
219
220 /*
221 * Conversion from UTF-8 or latin1 to MacRoman.
222 * Input: "from[fromlen]"
223 * Output: "to[maxtolen]" length in "*tolenp"
224 * Unconverted rest in rest[*restlenp].
225 * Returns OK or FAIL.
226 */
227 int
enc2macroman(char_u * from,size_t fromlen,char_u * to,int * tolenp,int maxtolen,char_u * rest,int * restlenp)228 enc2macroman(
229 char_u *from,
230 size_t fromlen,
231 char_u *to,
232 int *tolenp,
233 int maxtolen,
234 char_u *rest,
235 int *restlenp)
236 {
237 CFStringRef cfstr;
238 CFRange r;
239 CFIndex l;
240
241 *restlenp = 0;
242 cfstr = CFStringCreateWithBytes(NULL, from, fromlen,
243 (enc_utf8) ? kCFStringEncodingUTF8 : kCFStringEncodingISOLatin1,
244 0);
245 while (cfstr == NULL && *restlenp < 3 && fromlen > 1)
246 {
247 rest[*restlenp++] = from[--fromlen];
248 cfstr = CFStringCreateWithBytes(NULL, from, fromlen,
249 (enc_utf8) ? kCFStringEncodingUTF8 : kCFStringEncodingISOLatin1,
250 0);
251 }
252 if (cfstr == NULL)
253 return FAIL;
254
255 r.location = 0;
256 r.length = CFStringGetLength(cfstr);
257 if (r.length != CFStringGetBytes(cfstr, r,
258 kCFStringEncodingMacRoman,
259 0, // no lossy conversion
260 0, // not external representation (since vim
261 // handles this internally
262 to, maxtolen, &l))
263 {
264 CFRelease(cfstr);
265 return FAIL;
266 }
267 CFRelease(cfstr);
268 *tolenp = l;
269 return OK;
270 }
271
272 /*
273 * Initializes text converters
274 */
275 void
mac_conv_init(void)276 mac_conv_init(void)
277 {
278 TextEncoding utf8_encoding;
279 TextEncoding utf8_hfsplus_encoding;
280 TextEncoding utf8_canon_encoding;
281 TextEncoding utf16_encoding;
282
283 utf8_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault,
284 kTextEncodingDefaultVariant, kUnicodeUTF8Format);
285 utf8_hfsplus_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault,
286 kUnicodeHFSPlusCompVariant, kUnicodeUTF8Format);
287 utf8_canon_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault,
288 kUnicodeCanonicalCompVariant, kUnicodeUTF8Format);
289 utf16_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault,
290 kTextEncodingDefaultVariant, kUnicode16BitFormat);
291
292 if (TECCreateConverter(&gPathConverter, utf8_encoding,
293 utf8_hfsplus_encoding) != noErr)
294 gPathConverter = NULL;
295
296 if (TECCreateConverter(&gUTF16ToUTF8Converter, utf16_encoding,
297 utf8_canon_encoding) != noErr)
298 {
299 // On pre-10.3, Unicode normalization is not available so
300 // fall back to non-normalizing converter
301 if (TECCreateConverter(&gUTF16ToUTF8Converter, utf16_encoding,
302 utf8_encoding) != noErr)
303 gUTF16ToUTF8Converter = NULL;
304 }
305 }
306
307 /*
308 * Destroys text converters
309 */
310 void
mac_conv_cleanup(void)311 mac_conv_cleanup(void)
312 {
313 if (gUTF16ToUTF8Converter)
314 {
315 TECDisposeConverter(gUTF16ToUTF8Converter);
316 gUTF16ToUTF8Converter = NULL;
317 }
318
319 if (gPathConverter)
320 {
321 TECDisposeConverter(gPathConverter);
322 gPathConverter = NULL;
323 }
324 }
325
326 /*
327 * Conversion from UTF-16 UniChars to 'encoding'
328 * The function signature uses the real type of UniChar (as typedef'ed in
329 * CFBase.h) to avoid clashes with X11 header files in the .pro file
330 */
331 char_u *
mac_utf16_to_enc(unsigned short * from,size_t fromLen,size_t * actualLen)332 mac_utf16_to_enc(
333 unsigned short *from,
334 size_t fromLen,
335 size_t *actualLen)
336 {
337 // Following code borrows somewhat from os_mswin.c
338 vimconv_T conv;
339 size_t utf8_len;
340 char_u *utf8_str;
341 char_u *result = NULL;
342
343 // Convert to utf-8 first, works better with iconv
344 utf8_len = 0;
345 utf8_str = mac_utf16_to_utf8(from, fromLen, &utf8_len);
346
347 if (utf8_str)
348 {
349 // We might be called before we have p_enc set up.
350 conv.vc_type = CONV_NONE;
351
352 // If encoding (p_enc) is any unicode, it is actually in utf-8 (vim
353 // internal unicode is always utf-8) so don't convert in such cases
354
355 if ((enc_canon_props(p_enc) & ENC_UNICODE) == 0)
356 convert_setup(&conv, (char_u *)"utf-8",
357 p_enc? p_enc: (char_u *)"macroman");
358 if (conv.vc_type == CONV_NONE)
359 {
360 // p_enc is utf-8, so we're done.
361 result = utf8_str;
362 }
363 else
364 {
365 result = string_convert(&conv, utf8_str, (int *)&utf8_len);
366 vim_free(utf8_str);
367 }
368
369 convert_setup(&conv, NULL, NULL);
370
371 if (actualLen)
372 *actualLen = utf8_len;
373 }
374 else if (actualLen)
375 *actualLen = 0;
376
377 return result;
378 }
379
380 /*
381 * Conversion from 'encoding' to UTF-16 UniChars
382 * The function return uses the real type of UniChar (as typedef'ed in
383 * CFBase.h) to avoid clashes with X11 header files in the .pro file
384 */
385 unsigned short *
mac_enc_to_utf16(char_u * from,size_t fromLen,size_t * actualLen)386 mac_enc_to_utf16(
387 char_u *from,
388 size_t fromLen,
389 size_t *actualLen)
390 {
391 // Following code borrows somewhat from os_mswin.c
392 vimconv_T conv;
393 size_t utf8_len;
394 char_u *utf8_str;
395 UniChar *result = NULL;
396 Boolean should_free_utf8 = FALSE;
397
398 do
399 {
400 // Use MacRoman by default, we might be called before we have p_enc
401 // set up. Convert to utf-8 first, works better with iconv(). Does
402 // nothing if 'encoding' is "utf-8".
403 conv.vc_type = CONV_NONE;
404 if ((enc_canon_props(p_enc) & ENC_UNICODE) == 0 &&
405 convert_setup(&conv, p_enc ? p_enc : (char_u *)"macroman",
406 (char_u *)"utf-8") == FAIL)
407 break;
408
409 if (conv.vc_type != CONV_NONE)
410 {
411 utf8_len = fromLen;
412 utf8_str = string_convert(&conv, from, (int *)&utf8_len);
413 should_free_utf8 = TRUE;
414 }
415 else
416 {
417 utf8_str = from;
418 utf8_len = fromLen;
419 }
420
421 if (utf8_str == NULL)
422 break;
423
424 convert_setup(&conv, NULL, NULL);
425
426 result = mac_utf8_to_utf16(utf8_str, utf8_len, actualLen);
427
428 if (should_free_utf8)
429 vim_free(utf8_str);
430 return result;
431 }
432 while (0);
433
434 if (actualLen)
435 *actualLen = 0;
436
437 return result;
438 }
439
440 /*
441 * Converts from UTF-16 UniChars to CFString
442 * The void * return type is actually a CFStringRef
443 */
444 void *
mac_enc_to_cfstring(char_u * from,size_t fromLen)445 mac_enc_to_cfstring(
446 char_u *from,
447 size_t fromLen)
448 {
449 UniChar *utf16_str;
450 size_t utf16_len;
451 CFStringRef result = NULL;
452
453 utf16_str = mac_enc_to_utf16(from, fromLen, &utf16_len);
454 if (utf16_str)
455 {
456 result = CFStringCreateWithCharacters(NULL, utf16_str, utf16_len/sizeof(UniChar));
457 vim_free(utf16_str);
458 }
459
460 return (void *)result;
461 }
462
463 /*
464 * Converts a decomposed HFS+ UTF-8 path to precomposed UTF-8
465 */
466 char_u *
mac_precompose_path(char_u * decompPath,size_t decompLen,size_t * precompLen)467 mac_precompose_path(
468 char_u *decompPath,
469 size_t decompLen,
470 size_t *precompLen)
471 {
472 char_u *result = NULL;
473 size_t actualLen = 0;
474
475 if (gPathConverter)
476 {
477 result = alloc(decompLen);
478 if (result)
479 {
480 if (TECConvertText(gPathConverter, decompPath,
481 decompLen, &decompLen, result,
482 decompLen, &actualLen) != noErr)
483 VIM_CLEAR(result);
484 }
485 }
486
487 if (precompLen)
488 *precompLen = actualLen;
489
490 return result;
491 }
492
493 /*
494 * Converts from UTF-16 UniChars to precomposed UTF-8
495 */
496 static char_u *
mac_utf16_to_utf8(UniChar * from,size_t fromLen,size_t * actualLen)497 mac_utf16_to_utf8(
498 UniChar *from,
499 size_t fromLen,
500 size_t *actualLen)
501 {
502 ByteCount utf8_len;
503 ByteCount inputRead;
504 char_u *result;
505
506 if (gUTF16ToUTF8Converter)
507 {
508 result = alloc(fromLen * 6 + 1);
509 if (result && TECConvertText(gUTF16ToUTF8Converter, (ConstTextPtr)from,
510 fromLen, &inputRead, result,
511 (fromLen*6+1)*sizeof(char_u), &utf8_len) == noErr)
512 {
513 TECFlushText(gUTF16ToUTF8Converter, result, (fromLen*6+1)*sizeof(char_u), &inputRead);
514 utf8_len += inputRead;
515 }
516 else
517 VIM_CLEAR(result);
518 }
519 else
520 {
521 result = NULL;
522 }
523
524 if (actualLen)
525 *actualLen = result ? utf8_len : 0;
526
527 return result;
528 }
529
530 /*
531 * Converts from UTF-8 to UTF-16 UniChars
532 */
533 static UniChar *
mac_utf8_to_utf16(char_u * from,size_t fromLen,size_t * actualLen)534 mac_utf8_to_utf16(
535 char_u *from,
536 size_t fromLen,
537 size_t *actualLen)
538 {
539 CFStringRef utf8_str;
540 CFRange convertRange;
541 UniChar *result = NULL;
542
543 utf8_str = CFStringCreateWithBytes(NULL, from, fromLen,
544 kCFStringEncodingUTF8, FALSE);
545
546 if (utf8_str == NULL) {
547 if (actualLen)
548 *actualLen = 0;
549 return NULL;
550 }
551
552 convertRange = CFRangeMake(0, CFStringGetLength(utf8_str));
553 result = ALLOC_MULT(UniChar, convertRange.length);
554
555 CFStringGetCharacters(utf8_str, convertRange, result);
556
557 CFRelease(utf8_str);
558
559 if (actualLen)
560 *actualLen = convertRange.length * sizeof(UniChar);
561
562 return result;
563 }
564
565 /*
566 * Sets LANG environment variable in Vim from Mac locale
567 */
568 void
mac_lang_init(void)569 mac_lang_init(void)
570 {
571 if (mch_getenv((char_u *)"LANG") == NULL)
572 {
573 char buf[50];
574
575 // $LANG is not set, either because it was unset or Vim was started
576 // from the Dock. Query the system locale.
577 if (LocaleRefGetPartString(NULL,
578 kLocaleLanguageMask | kLocaleLanguageVariantMask |
579 kLocaleRegionMask | kLocaleRegionVariantMask,
580 sizeof(buf) - 10, buf) == noErr && *buf)
581 {
582 if (strcasestr(buf, "utf-8") == NULL)
583 strcat(buf, ".UTF-8");
584 vim_setenv((char_u *)"LANG", (char_u *)buf);
585 # ifdef HAVE_LOCALE_H
586 setlocale(LC_ALL, "");
587 # endif
588 # if defined(FEAT_FLOAT) && defined(LC_NUMERIC)
589 // Make sure strtod() uses a decimal point, not a comma.
590 setlocale(LC_NUMERIC, "C");
591 # endif
592 }
593 }
594 }
595 #endif // MACOS_CONVERT
596