1 /* vi:set ts=8 sts=4 sw=4:
3 * VIM - Vi IMproved by Bram Moolenaar
5 * Do ":help uganda" in Vim to read copying and usage conditions.
6 * Do ":help credits" in Vim to see a list of people who contributed.
7 * See README.txt for an overview of the Vim source code.
10 * os_mac_conv.c: Code specifically for Mac string conversions.
12 * This code has been put in a separate file to avoid the conflicts that are
13 * caused by including both the X11 and Carbon header files.
16 #define NO_X11_INCLUDES
19 #ifdef FEAT_GUI_MACVIM
20 /* Include Carbon here instead of in one of the header files to avoid polluting
21 * the global namespace. */
22 # include <Carbon/Carbon.h>
25 #if defined(MACOS_CONVERT) || defined(PROTO)
27 /* A few dummy types to be able to generate function prototypes. */
29 typedef int *TECObjectRef
;
30 typedef int CFStringRef
;
33 static char_u
*mac_utf16_to_utf8
__ARGS((UniChar
*from
, size_t fromLen
, size_t *actualLen
));
34 static UniChar
*mac_utf8_to_utf16
__ARGS((char_u
*from
, size_t fromLen
, size_t *actualLen
));
36 /* Converter for composing decomposed HFS+ file paths */
37 static TECObjectRef gPathConverter
;
38 /* Converter used by mac_utf16_to_utf8 */
39 static TECObjectRef gUTF16ToUTF8Converter
;
42 * A Mac version of string_convert_ext() for special cases.
45 mac_string_convert(ptr
, len
, lenp
, fail_on_error
, from_enc
, to_enc
, unconvlenp
)
56 int buflen
, in
, out
, l
, i
;
57 CFStringEncoding from
;
62 case 'l': from
= kCFStringEncodingISOLatin1
; break;
63 case 'm': from
= kCFStringEncodingMacRoman
; break;
64 case 'u': from
= kCFStringEncodingUTF8
; break;
69 case 'l': to
= kCFStringEncodingISOLatin1
; break;
70 case 'm': to
= kCFStringEncodingMacRoman
; break;
71 case 'u': to
= kCFStringEncodingUTF8
; break;
75 if (unconvlenp
!= NULL
)
77 cfstr
= CFStringCreateWithBytes(NULL
, ptr
, len
, from
, 0);
80 fprintf(stderr
, "Encoding failed\n");
81 /* When conversion failed, try excluding bytes from the end, helps when
82 * there is an incomplete byte sequence. Only do up to 6 bytes to avoid
83 * looping a long time when there really is something unconvertible. */
84 while (cfstr
== NULL
&& unconvlenp
!= NULL
&& len
> 1 && *unconvlenp
< 6)
88 cfstr
= CFStringCreateWithBytes(NULL
, ptr
, len
, from
, 0);
93 if (to
== kCFStringEncodingUTF8
)
97 retval
= alloc(buflen
);
105 CFRange convertRange
= CFRangeMake(0, CFStringGetLength(cfstr
));
106 /* Determine output buffer size */
107 CFStringGetBytes(cfstr
, convertRange
, to
, NULL
, FALSE
, NULL
, 0, (CFIndex
*)&buflen
);
108 retval
= (buflen
> 0) ? alloc(buflen
) : NULL
;
109 if (retval
== NULL
) {
115 *lenp
= buflen
/ sizeof(char_u
);
117 if (!CFStringGetBytes(cfstr
, convertRange
, to
, NULL
, FALSE
, retval
, buflen
, NULL
))
119 if (!CFStringGetCString(cfstr
, (char *)retval
, buflen
, to
))
128 fprintf(stderr
, "Trying char-by-char conversion...\n");
129 /* conversion failed for the whole string, but maybe it will work
130 * for each character */
131 for (d
= retval
, in
= 0, out
= 0; in
< len
&& out
< buflen
- 1;)
133 if (from
== kCFStringEncodingUTF8
)
134 l
= utf_ptr2len(ptr
+ in
);
137 cfstr
= CFStringCreateWithBytes(NULL
, ptr
+ in
, l
, from
, 0);
145 if (!CFStringGetCString(cfstr
, (char *)d
, buflen
- out
, to
))
167 *lenp
= STRLEN(retval
);
173 * Conversion from Apple MacRoman char encoding to UTF-8 or latin1, using
174 * standard Carbon framework.
175 * Input: "ptr[*sizep]".
176 * "real_size" is the size of the buffer that "ptr" points to.
177 * output is in-place, "sizep" is adjusted.
178 * Returns OK or FAIL.
181 macroman2enc(ptr
, sizep
, real_size
)
188 CFIndex len
= *sizep
;
190 /* MacRoman is an 8-bit encoding, no need to move bytes to
192 cfstr
= CFStringCreateWithBytes(NULL
, ptr
, len
,
193 kCFStringEncodingMacRoman
, 0);
195 * If there is a conversion error, try using another
202 r
.length
= CFStringGetLength(cfstr
);
203 if (r
.length
!= CFStringGetBytes(cfstr
, r
,
204 (enc_utf8
) ? kCFStringEncodingUTF8
: kCFStringEncodingISOLatin1
,
205 0, /* no lossy conversion */
206 0, /* not external representation */
207 ptr
+ *sizep
, real_size
- *sizep
, &len
))
213 mch_memmove(ptr
, ptr
+ *sizep
, len
);
220 * Conversion from UTF-8 or latin1 to MacRoman.
221 * Input: "from[fromlen]"
222 * Output: "to[maxtolen]" length in "*tolenp"
223 * Unconverted rest in rest[*restlenp].
224 * Returns OK or FAIL.
227 enc2macroman(from
, fromlen
, to
, tolenp
, maxtolen
, rest
, restlenp
)
241 cfstr
= CFStringCreateWithBytes(NULL
, from
, fromlen
,
242 (enc_utf8
) ? kCFStringEncodingUTF8
: kCFStringEncodingISOLatin1
,
244 while (cfstr
== NULL
&& *restlenp
< 3 && fromlen
> 1)
246 rest
[*restlenp
++] = from
[--fromlen
];
247 cfstr
= CFStringCreateWithBytes(NULL
, from
, fromlen
,
248 (enc_utf8
) ? kCFStringEncodingUTF8
: kCFStringEncodingISOLatin1
,
255 r
.length
= CFStringGetLength(cfstr
);
256 if (r
.length
!= CFStringGetBytes(cfstr
, r
,
257 kCFStringEncodingMacRoman
,
258 0, /* no lossy conversion */
259 0, /* not external representation (since vim
260 * handles this internally */
272 * Initializes text converters
277 TextEncoding utf8_encoding
;
278 TextEncoding utf8_hfsplus_encoding
;
279 TextEncoding utf8_canon_encoding
;
280 TextEncoding utf16_encoding
;
282 utf8_encoding
= CreateTextEncoding(kTextEncodingUnicodeDefault
,
283 kTextEncodingDefaultVariant
, kUnicodeUTF8Format
);
284 utf8_hfsplus_encoding
= CreateTextEncoding(kTextEncodingUnicodeDefault
,
285 kUnicodeHFSPlusCompVariant
, kUnicodeUTF8Format
);
286 utf8_canon_encoding
= CreateTextEncoding(kTextEncodingUnicodeDefault
,
287 kUnicodeCanonicalCompVariant
, kUnicodeUTF8Format
);
288 utf16_encoding
= CreateTextEncoding(kTextEncodingUnicodeDefault
,
289 kTextEncodingDefaultVariant
, kUnicode16BitFormat
);
291 if (TECCreateConverter(&gPathConverter
, utf8_encoding
,
292 utf8_hfsplus_encoding
) != noErr
)
293 gPathConverter
= NULL
;
295 if (TECCreateConverter(&gUTF16ToUTF8Converter
, utf16_encoding
,
296 utf8_canon_encoding
) != noErr
)
298 /* On pre-10.3, Unicode normalization is not available so
299 * fall back to non-normalizing converter */
300 if (TECCreateConverter(&gUTF16ToUTF8Converter
, utf16_encoding
,
301 utf8_encoding
) != noErr
)
302 gUTF16ToUTF8Converter
= NULL
;
307 * Destroys text converters
312 if (gUTF16ToUTF8Converter
)
314 TECDisposeConverter(gUTF16ToUTF8Converter
);
315 gUTF16ToUTF8Converter
= NULL
;
320 TECDisposeConverter(gPathConverter
);
321 gPathConverter
= NULL
;
326 * Conversion from UTF-16 UniChars to 'encoding'
327 * The function signature uses the real type of UniChar (as typedef'ed in
328 * CFBase.h) to avoid clashes with X11 header files in the .pro file
331 mac_utf16_to_enc(from
, fromLen
, actualLen
)
332 unsigned short *from
;
336 /* Following code borrows somewhat from os_mswin.c */
340 char_u
*result
= NULL
;
342 /* Convert to utf-8 first, works better with iconv */
344 utf8_str
= mac_utf16_to_utf8(from
, fromLen
, &utf8_len
);
348 /* We might be called before we have p_enc set up. */
349 conv
.vc_type
= CONV_NONE
;
351 /* If encoding (p_enc) is any unicode, it is actually in utf-8 (vim
352 * internal unicode is always utf-8) so don't convert in such cases */
354 if ((enc_canon_props(p_enc
) & ENC_UNICODE
) == 0)
355 convert_setup(&conv
, (char_u
*)"utf-8",
356 p_enc
? p_enc
: (char_u
*)"macroman");
357 if (conv
.vc_type
== CONV_NONE
)
359 /* p_enc is utf-8, so we're done. */
364 result
= string_convert(&conv
, utf8_str
, (int *)&utf8_len
);
368 convert_setup(&conv
, NULL
, NULL
);
371 *actualLen
= utf8_len
;
380 * Conversion from 'encoding' to UTF-16 UniChars
381 * The function return uses the real type of UniChar (as typedef'ed in
382 * CFBase.h) to avoid clashes with X11 header files in the .pro file
385 mac_enc_to_utf16(from
, fromLen
, actualLen
)
390 /* Following code borrows somewhat from os_mswin.c */
394 UniChar
*result
= NULL
;
395 Boolean should_free_utf8
= FALSE
;
399 /* Use MacRoman by default, we might be called before we have p_enc
400 * set up. Convert to utf-8 first, works better with iconv(). Does
401 * nothing if 'encoding' is "utf-8". */
402 conv
.vc_type
= CONV_NONE
;
403 if ((enc_canon_props(p_enc
) & ENC_UNICODE
) == 0 &&
404 convert_setup(&conv
, p_enc
? p_enc
: (char_u
*)"macroman",
405 (char_u
*)"utf-8") == FAIL
)
408 if (conv
.vc_type
!= CONV_NONE
)
411 utf8_str
= string_convert(&conv
, from
, (int *)&utf8_len
);
412 should_free_utf8
= TRUE
;
420 if (utf8_str
== NULL
)
423 convert_setup(&conv
, NULL
, NULL
);
425 result
= mac_utf8_to_utf16(utf8_str
, utf8_len
, actualLen
);
427 if (should_free_utf8
)
440 * Converts from UTF-16 UniChars to CFString
441 * The void * return type is actually a CFStringRef
444 mac_enc_to_cfstring(from
, fromLen
)
450 CFStringRef result
= NULL
;
452 utf16_str
= mac_enc_to_utf16(from
, fromLen
, &utf16_len
);
455 result
= CFStringCreateWithCharacters(NULL
, utf16_str
, utf16_len
/sizeof(UniChar
));
459 return (void *)result
;
463 * Converts a decomposed HFS+ UTF-8 path to precomposed UTF-8
466 mac_precompose_path(decompPath
, decompLen
, precompLen
)
471 char_u
*result
= NULL
;
472 size_t actualLen
= 0;
476 result
= alloc(decompLen
);
479 if (TECConvertText(gPathConverter
, decompPath
,
480 decompLen
, &decompLen
, result
,
481 decompLen
, &actualLen
) != noErr
)
490 *precompLen
= actualLen
;
496 * Converts from UTF-16 UniChars to precomposed UTF-8
499 mac_utf16_to_utf8(from
, fromLen
, actualLen
)
508 if (gUTF16ToUTF8Converter
)
510 result
= alloc(fromLen
* 6 + 1);
511 if (result
&& TECConvertText(gUTF16ToUTF8Converter
, (ConstTextPtr
)from
,
512 fromLen
, &inputRead
, result
,
513 (fromLen
*6+1)*sizeof(char_u
), &utf8_len
) == noErr
)
515 TECFlushText(gUTF16ToUTF8Converter
, result
, (fromLen
*6+1)*sizeof(char_u
), &inputRead
);
516 utf8_len
+= inputRead
;
530 *actualLen
= result
? utf8_len
: 0;
536 * Converts from UTF-8 to UTF-16 UniChars
539 mac_utf8_to_utf16(from
, fromLen
, actualLen
)
544 CFStringRef utf8_str
;
545 CFRange convertRange
;
546 UniChar
*result
= NULL
;
548 utf8_str
= CFStringCreateWithBytes(NULL
, from
, fromLen
,
549 kCFStringEncodingUTF8
, FALSE
);
551 if (utf8_str
== NULL
) {
557 convertRange
= CFRangeMake(0, CFStringGetLength(utf8_str
));
558 result
= (UniChar
*)alloc(convertRange
.length
* sizeof(UniChar
));
560 CFStringGetCharacters(utf8_str
, convertRange
, result
);
565 *actualLen
= convertRange
.length
* sizeof(UniChar
);
571 * Sets LANG environment variable in Vim from Mac locale
575 if (mch_getenv((char_u
*)"LANG") == NULL
)
578 if (LocaleRefGetPartString(NULL
,
579 kLocaleLanguageMask
| kLocaleLanguageVariantMask
|
580 kLocaleRegionMask
| kLocaleRegionVariantMask
,
581 sizeof buf
, buf
) == noErr
&& *buf
)
583 vim_setenv((char_u
*)"LANG", (char_u
*)buf
);
584 # ifdef HAVE_LOCALE_H
585 setlocale(LC_ALL
, "");
590 #endif /* MACOS_CONVERT */