1 /* vi:set ts=8 sts=4 sw=4:
3 * VIM - Vi IMproved by Bram Moolenaar
5 * Do ":help uganda" in Vim to read copying and usage conditions.
6 * Do ":help credits" in Vim to see a list of people who contributed.
7 * See README.txt for an overview of the Vim source code.
10 * os_mac_conv.c: Code specifically for Mac string conversions.
12 * This code has been put in a separate file to avoid the conflicts that are
13 * caused by including both the X11 and Carbon header files.
16 #define NO_X11_INCLUDES
19 #if defined(MACOS_CONVERT) || defined(PROTO)
21 /* A few dummy types to be able to generate function prototypes. */
23 typedef int *TECObjectRef
;
24 typedef int CFStringRef
;
26 typedef unsigned short UniChar
;
27 #include <CoreServices/CoreServices.h>
30 static char_u
*mac_utf16_to_utf8
__ARGS((UniChar
*from
, size_t fromLen
, size_t *actualLen
));
31 static UniChar
*mac_utf8_to_utf16
__ARGS((char_u
*from
, size_t fromLen
, size_t *actualLen
));
33 /* Converter for composing decomposed HFS+ file paths */
34 static TECObjectRef gPathConverter
;
35 /* Converter used by mac_utf16_to_utf8 */
36 static TECObjectRef gUTF16ToUTF8Converter
;
39 * A Mac version of string_convert_ext() for special cases.
42 mac_string_convert(ptr
, len
, lenp
, fail_on_error
, from_enc
, to_enc
, unconvlenp
)
53 int buflen
, in
, out
, l
, i
;
54 CFStringEncoding from
;
59 case 'l': from
= kCFStringEncodingISOLatin1
; break;
60 case 'm': from
= kCFStringEncodingMacRoman
; break;
61 case 'u': from
= kCFStringEncodingUTF8
; break;
66 case 'l': to
= kCFStringEncodingISOLatin1
; break;
67 case 'm': to
= kCFStringEncodingMacRoman
; break;
68 case 'u': to
= kCFStringEncodingUTF8
; break;
72 if (unconvlenp
!= NULL
)
74 cfstr
= CFStringCreateWithBytes(NULL
, ptr
, len
, from
, 0);
77 fprintf(stderr
, "Encoding failed\n");
78 /* When conversion failed, try excluding bytes from the end, helps when
79 * there is an incomplete byte sequence. Only do up to 6 bytes to avoid
80 * looping a long time when there really is something unconvertible. */
81 while (cfstr
== NULL
&& unconvlenp
!= NULL
&& len
> 1 && *unconvlenp
< 6)
85 cfstr
= CFStringCreateWithBytes(NULL
, ptr
, len
, from
, 0);
90 if (to
== kCFStringEncodingUTF8
)
94 retval
= alloc(buflen
);
102 CFRange convertRange
= CFRangeMake(0, CFStringGetLength(cfstr
));
103 /* Determine output buffer size */
104 CFStringGetBytes(cfstr
, convertRange
, to
, NULL
, FALSE
, NULL
, 0, (CFIndex
*)&buflen
);
105 retval
= (buflen
> 0) ? alloc(buflen
) : NULL
;
106 if (retval
== NULL
) {
112 *lenp
= buflen
/ sizeof(char_u
);
114 if (!CFStringGetBytes(cfstr
, convertRange
, to
, NULL
, FALSE
, retval
, buflen
, NULL
))
116 if (!CFStringGetCString(cfstr
, (char *)retval
, buflen
, to
))
125 fprintf(stderr
, "Trying char-by-char conversion...\n");
126 /* conversion failed for the whole string, but maybe it will work
127 * for each character */
128 for (d
= retval
, in
= 0, out
= 0; in
< len
&& out
< buflen
- 1;)
130 if (from
== kCFStringEncodingUTF8
)
131 l
= utf_ptr2len(ptr
+ in
);
134 cfstr
= CFStringCreateWithBytes(NULL
, ptr
+ in
, l
, from
, 0);
142 if (!CFStringGetCString(cfstr
, (char *)d
, buflen
- out
, to
))
164 *lenp
= STRLEN(retval
);
170 * Conversion from Apple MacRoman char encoding to UTF-8 or latin1, using
171 * standard Carbon framework.
172 * Input: "ptr[*sizep]".
173 * "real_size" is the size of the buffer that "ptr" points to.
174 * output is in-place, "sizep" is adjusted.
175 * Returns OK or FAIL.
178 macroman2enc(ptr
, sizep
, real_size
)
185 CFIndex len
= *sizep
;
187 /* MacRoman is an 8-bit encoding, no need to move bytes to
189 cfstr
= CFStringCreateWithBytes(NULL
, ptr
, len
,
190 kCFStringEncodingMacRoman
, 0);
192 * If there is a conversion error, try using another
199 r
.length
= CFStringGetLength(cfstr
);
200 if (r
.length
!= CFStringGetBytes(cfstr
, r
,
201 (enc_utf8
) ? kCFStringEncodingUTF8
: kCFStringEncodingISOLatin1
,
202 0, /* no lossy conversion */
203 0, /* not external representation */
204 ptr
+ *sizep
, real_size
- *sizep
, &len
))
210 mch_memmove(ptr
, ptr
+ *sizep
, len
);
217 * Conversion from UTF-8 or latin1 to MacRoman.
218 * Input: "from[fromlen]"
219 * Output: "to[maxtolen]" length in "*tolenp"
220 * Unconverted rest in rest[*restlenp].
221 * Returns OK or FAIL.
224 enc2macroman(from
, fromlen
, to
, tolenp
, maxtolen
, rest
, restlenp
)
238 cfstr
= CFStringCreateWithBytes(NULL
, from
, fromlen
,
239 (enc_utf8
) ? kCFStringEncodingUTF8
: kCFStringEncodingISOLatin1
,
241 while (cfstr
== NULL
&& *restlenp
< 3 && fromlen
> 1)
243 rest
[*restlenp
++] = from
[--fromlen
];
244 cfstr
= CFStringCreateWithBytes(NULL
, from
, fromlen
,
245 (enc_utf8
) ? kCFStringEncodingUTF8
: kCFStringEncodingISOLatin1
,
252 r
.length
= CFStringGetLength(cfstr
);
253 if (r
.length
!= CFStringGetBytes(cfstr
, r
,
254 kCFStringEncodingMacRoman
,
255 0, /* no lossy conversion */
256 0, /* not external representation (since vim
257 * handles this internally */
269 * Initializes text converters
274 TextEncoding utf8_encoding
;
275 TextEncoding utf8_hfsplus_encoding
;
276 TextEncoding utf8_canon_encoding
;
277 TextEncoding utf16_encoding
;
279 utf8_encoding
= CreateTextEncoding(kTextEncodingUnicodeDefault
,
280 kTextEncodingDefaultVariant
, kUnicodeUTF8Format
);
281 utf8_hfsplus_encoding
= CreateTextEncoding(kTextEncodingUnicodeDefault
,
282 kUnicodeHFSPlusCompVariant
, kUnicodeUTF8Format
);
283 utf8_canon_encoding
= CreateTextEncoding(kTextEncodingUnicodeDefault
,
284 kUnicodeCanonicalCompVariant
, kUnicodeUTF8Format
);
285 utf16_encoding
= CreateTextEncoding(kTextEncodingUnicodeDefault
,
286 kTextEncodingDefaultVariant
, kUnicode16BitFormat
);
288 if (TECCreateConverter(&gPathConverter
, utf8_encoding
,
289 utf8_hfsplus_encoding
) != noErr
)
290 gPathConverter
= NULL
;
292 if (TECCreateConverter(&gUTF16ToUTF8Converter
, utf16_encoding
,
293 utf8_canon_encoding
) != noErr
)
295 /* On pre-10.3, Unicode normalization is not available so
296 * fall back to non-normalizing converter */
297 if (TECCreateConverter(&gUTF16ToUTF8Converter
, utf16_encoding
,
298 utf8_encoding
) != noErr
)
299 gUTF16ToUTF8Converter
= NULL
;
304 * Destroys text converters
309 if (gUTF16ToUTF8Converter
)
311 TECDisposeConverter(gUTF16ToUTF8Converter
);
312 gUTF16ToUTF8Converter
= NULL
;
317 TECDisposeConverter(gPathConverter
);
318 gPathConverter
= NULL
;
323 * Conversion from UTF-16 UniChars to 'encoding'
324 * The function signature uses the real type of UniChar (as typedef'ed in
325 * CFBase.h) to avoid clashes with X11 header files in the .pro file
328 mac_utf16_to_enc(from
, fromLen
, actualLen
)
329 unsigned short *from
;
333 /* Following code borrows somewhat from os_mswin.c */
337 char_u
*result
= NULL
;
339 /* Convert to utf-8 first, works better with iconv */
341 utf8_str
= mac_utf16_to_utf8(from
, fromLen
, &utf8_len
);
345 /* We might be called before we have p_enc set up. */
346 conv
.vc_type
= CONV_NONE
;
348 /* If encoding (p_enc) is any unicode, it is actually in utf-8 (vim
349 * internal unicode is always utf-8) so don't convert in such cases */
351 if ((enc_canon_props(p_enc
) & ENC_UNICODE
) == 0)
352 convert_setup(&conv
, (char_u
*)"utf-8",
353 p_enc
? p_enc
: (char_u
*)"macroman");
354 if (conv
.vc_type
== CONV_NONE
)
356 /* p_enc is utf-8, so we're done. */
361 result
= string_convert(&conv
, utf8_str
, (int *)&utf8_len
);
365 convert_setup(&conv
, NULL
, NULL
);
368 *actualLen
= utf8_len
;
377 * Conversion from 'encoding' to UTF-16 UniChars
378 * The function return uses the real type of UniChar (as typedef'ed in
379 * CFBase.h) to avoid clashes with X11 header files in the .pro file
382 mac_enc_to_utf16(from
, fromLen
, actualLen
)
387 /* Following code borrows somewhat from os_mswin.c */
391 UniChar
*result
= NULL
;
392 Boolean should_free_utf8
= FALSE
;
396 /* Use MacRoman by default, we might be called before we have p_enc
397 * set up. Convert to utf-8 first, works better with iconv(). Does
398 * nothing if 'encoding' is "utf-8". */
399 conv
.vc_type
= CONV_NONE
;
400 if ((enc_canon_props(p_enc
) & ENC_UNICODE
) == 0 &&
401 convert_setup(&conv
, p_enc
? p_enc
: (char_u
*)"macroman",
402 (char_u
*)"utf-8") == FAIL
)
405 if (conv
.vc_type
!= CONV_NONE
)
408 utf8_str
= string_convert(&conv
, from
, (int *)&utf8_len
);
409 should_free_utf8
= TRUE
;
417 if (utf8_str
== NULL
)
420 convert_setup(&conv
, NULL
, NULL
);
422 result
= mac_utf8_to_utf16(utf8_str
, utf8_len
, actualLen
);
424 if (should_free_utf8
)
437 * Converts from UTF-16 UniChars to CFString
438 * The void * return type is actually a CFStringRef
441 mac_enc_to_cfstring(from
, fromLen
)
447 CFStringRef result
= NULL
;
449 utf16_str
= mac_enc_to_utf16(from
, fromLen
, &utf16_len
);
452 result
= CFStringCreateWithCharacters(NULL
, utf16_str
, utf16_len
/sizeof(UniChar
));
456 return (void *)result
;
460 * Converts a decomposed HFS+ UTF-8 path to precomposed UTF-8
463 mac_precompose_path(decompPath
, decompLen
, precompLen
)
468 char_u
*result
= NULL
;
469 size_t actualLen
= 0;
473 result
= alloc(decompLen
);
476 if (TECConvertText(gPathConverter
, decompPath
,
477 decompLen
, &decompLen
, result
,
478 decompLen
, &actualLen
) != noErr
)
487 *precompLen
= actualLen
;
493 * Converts from UTF-16 UniChars to precomposed UTF-8
496 mac_utf16_to_utf8(from
, fromLen
, actualLen
)
505 if (gUTF16ToUTF8Converter
)
507 result
= alloc(fromLen
* 6 + 1);
508 if (result
&& TECConvertText(gUTF16ToUTF8Converter
, (ConstTextPtr
)from
,
509 fromLen
, &inputRead
, result
,
510 (fromLen
*6+1)*sizeof(char_u
), &utf8_len
) == noErr
)
512 TECFlushText(gUTF16ToUTF8Converter
, result
, (fromLen
*6+1)*sizeof(char_u
), &inputRead
);
513 utf8_len
+= inputRead
;
527 *actualLen
= result
? utf8_len
: 0;
533 * Converts from UTF-8 to UTF-16 UniChars
536 mac_utf8_to_utf16(from
, fromLen
, actualLen
)
541 CFStringRef utf8_str
;
542 CFRange convertRange
;
543 UniChar
*result
= NULL
;
545 utf8_str
= CFStringCreateWithBytes(NULL
, from
, fromLen
,
546 kCFStringEncodingUTF8
, FALSE
);
548 if (utf8_str
== NULL
) {
554 convertRange
= CFRangeMake(0, CFStringGetLength(utf8_str
));
555 result
= (UniChar
*)alloc(convertRange
.length
* sizeof(UniChar
));
557 CFStringGetCharacters(utf8_str
, convertRange
, result
);
562 *actualLen
= convertRange
.length
* sizeof(UniChar
);
568 * Sets LANG environment variable in Vim from Mac locale
572 if (mch_getenv((char_u
*)"LANG") == NULL
)
575 if (LocaleRefGetPartString(NULL
,
576 kLocaleLanguageMask
| kLocaleLanguageVariantMask
|
577 kLocaleRegionMask
| kLocaleRegionVariantMask
,
578 sizeof buf
, buf
) == noErr
&& *buf
)
580 vim_setenv((char_u
*)"LANG", (char_u
*)buf
);
581 # ifdef HAVE_LOCALE_H
582 setlocale(LC_ALL
, "");
587 #endif /* MACOS_CONVERT */