1 /* vi:set ts=8 sts=4 sw=4:
3 * VIM - Vi IMproved by Bram Moolenaar
5 * Do ":help uganda" in Vim to read copying and usage conditions.
6 * Do ":help credits" in Vim to see a list of people who contributed.
7 * See README.txt for an overview of the Vim source code.
10 * os_mac_conv.c: Code specifically for Mac string conversions.
12 * This code has been put in a separate file to avoid the conflicts that are
13 * caused by including both the X11 and Carbon header files.
16 #define NO_X11_INCLUDES
19 #if defined(MACOS_CONVERT) || defined(PROTO)
21 /* A few dummy types to be able to generate function prototypes. */
23 typedef int *TECObjectRef
;
24 typedef int CFStringRef
;
27 static char_u
*mac_utf16_to_utf8
__ARGS((UniChar
*from
, size_t fromLen
, size_t *actualLen
));
28 static UniChar
*mac_utf8_to_utf16
__ARGS((char_u
*from
, size_t fromLen
, size_t *actualLen
));
30 /* Converter for composing decomposed HFS+ file paths */
31 static TECObjectRef gPathConverter
;
32 /* Converter used by mac_utf16_to_utf8 */
33 static TECObjectRef gUTF16ToUTF8Converter
;
36 * A Mac version of string_convert_ext() for special cases.
39 mac_string_convert(ptr
, len
, lenp
, fail_on_error
, from_enc
, to_enc
, unconvlenp
)
50 int buflen
, in
, out
, l
, i
;
51 CFStringEncoding from
;
56 case 'l': from
= kCFStringEncodingISOLatin1
; break;
57 case 'm': from
= kCFStringEncodingMacRoman
; break;
58 case 'u': from
= kCFStringEncodingUTF8
; break;
63 case 'l': to
= kCFStringEncodingISOLatin1
; break;
64 case 'm': to
= kCFStringEncodingMacRoman
; break;
65 case 'u': to
= kCFStringEncodingUTF8
; break;
69 if (unconvlenp
!= NULL
)
71 cfstr
= CFStringCreateWithBytes(NULL
, ptr
, len
, from
, 0);
74 fprintf(stderr
, "Encoding failed\n");
75 /* When conversion failed, try excluding bytes from the end, helps when
76 * there is an incomplete byte sequence. Only do up to 6 bytes to avoid
77 * looping a long time when there really is something unconvertible. */
78 while (cfstr
== NULL
&& unconvlenp
!= NULL
&& len
> 1 && *unconvlenp
< 6)
82 cfstr
= CFStringCreateWithBytes(NULL
, ptr
, len
, from
, 0);
87 if (to
== kCFStringEncodingUTF8
)
91 retval
= alloc(buflen
);
99 CFRange convertRange
= CFRangeMake(0, CFStringGetLength(cfstr
));
100 /* Determine output buffer size */
101 CFStringGetBytes(cfstr
, convertRange
, to
, NULL
, FALSE
, NULL
, 0, (CFIndex
*)&buflen
);
102 retval
= (buflen
> 0) ? alloc(buflen
) : NULL
;
103 if (retval
== NULL
) {
109 *lenp
= buflen
/ sizeof(char_u
);
111 if (!CFStringGetBytes(cfstr
, convertRange
, to
, NULL
, FALSE
, retval
, buflen
, NULL
))
113 if (!CFStringGetCString(cfstr
, (char *)retval
, buflen
, to
))
122 fprintf(stderr
, "Trying char-by-char conversion...\n");
123 /* conversion failed for the whole string, but maybe it will work
124 * for each character */
125 for (d
= retval
, in
= 0, out
= 0; in
< len
&& out
< buflen
- 1;)
127 if (from
== kCFStringEncodingUTF8
)
128 l
= utf_ptr2len(ptr
+ in
);
131 cfstr
= CFStringCreateWithBytes(NULL
, ptr
+ in
, l
, from
, 0);
139 if (!CFStringGetCString(cfstr
, (char *)d
, buflen
- out
, to
))
161 *lenp
= STRLEN(retval
);
167 * Conversion from Apple MacRoman char encoding to UTF-8 or latin1, using
168 * standard Carbon framework.
169 * Input: "ptr[*sizep]".
170 * "real_size" is the size of the buffer that "ptr" points to.
171 * output is in-place, "sizep" is adjusted.
172 * Returns OK or FAIL.
175 macroman2enc(ptr
, sizep
, real_size
)
182 CFIndex len
= *sizep
;
184 /* MacRoman is an 8-bit encoding, no need to move bytes to
186 cfstr
= CFStringCreateWithBytes(NULL
, ptr
, len
,
187 kCFStringEncodingMacRoman
, 0);
189 * If there is a conversion error, try using another
196 r
.length
= CFStringGetLength(cfstr
);
197 if (r
.length
!= CFStringGetBytes(cfstr
, r
,
198 (enc_utf8
) ? kCFStringEncodingUTF8
: kCFStringEncodingISOLatin1
,
199 0, /* no lossy conversion */
200 0, /* not external representation */
201 ptr
+ *sizep
, real_size
- *sizep
, &len
))
207 mch_memmove(ptr
, ptr
+ *sizep
, len
);
214 * Conversion from UTF-8 or latin1 to MacRoman.
215 * Input: "from[fromlen]"
216 * Output: "to[maxtolen]" length in "*tolenp"
217 * Unconverted rest in rest[*restlenp].
218 * Returns OK or FAIL.
221 enc2macroman(from
, fromlen
, to
, tolenp
, maxtolen
, rest
, restlenp
)
235 cfstr
= CFStringCreateWithBytes(NULL
, from
, fromlen
,
236 (enc_utf8
) ? kCFStringEncodingUTF8
: kCFStringEncodingISOLatin1
,
238 while (cfstr
== NULL
&& *restlenp
< 3 && fromlen
> 1)
240 rest
[*restlenp
++] = from
[--fromlen
];
241 cfstr
= CFStringCreateWithBytes(NULL
, from
, fromlen
,
242 (enc_utf8
) ? kCFStringEncodingUTF8
: kCFStringEncodingISOLatin1
,
249 r
.length
= CFStringGetLength(cfstr
);
250 if (r
.length
!= CFStringGetBytes(cfstr
, r
,
251 kCFStringEncodingMacRoman
,
252 0, /* no lossy conversion */
253 0, /* not external representation (since vim
254 * handles this internally */
266 * Initializes text converters
271 TextEncoding utf8_encoding
;
272 TextEncoding utf8_hfsplus_encoding
;
273 TextEncoding utf8_canon_encoding
;
274 TextEncoding utf16_encoding
;
276 utf8_encoding
= CreateTextEncoding(kTextEncodingUnicodeDefault
,
277 kTextEncodingDefaultVariant
, kUnicodeUTF8Format
);
278 utf8_hfsplus_encoding
= CreateTextEncoding(kTextEncodingUnicodeDefault
,
279 kUnicodeHFSPlusCompVariant
, kUnicodeUTF8Format
);
280 utf8_canon_encoding
= CreateTextEncoding(kTextEncodingUnicodeDefault
,
281 kUnicodeCanonicalCompVariant
, kUnicodeUTF8Format
);
282 utf16_encoding
= CreateTextEncoding(kTextEncodingUnicodeDefault
,
283 kTextEncodingDefaultVariant
, kUnicode16BitFormat
);
285 if (TECCreateConverter(&gPathConverter
, utf8_encoding
,
286 utf8_hfsplus_encoding
) != noErr
)
287 gPathConverter
= NULL
;
289 if (TECCreateConverter(&gUTF16ToUTF8Converter
, utf16_encoding
,
290 utf8_canon_encoding
) != noErr
)
292 /* On pre-10.3, Unicode normalization is not available so
293 * fall back to non-normalizing converter */
294 if (TECCreateConverter(&gUTF16ToUTF8Converter
, utf16_encoding
,
295 utf8_encoding
) != noErr
)
296 gUTF16ToUTF8Converter
= NULL
;
301 * Destroys text converters
306 if (gUTF16ToUTF8Converter
)
308 TECDisposeConverter(gUTF16ToUTF8Converter
);
309 gUTF16ToUTF8Converter
= NULL
;
314 TECDisposeConverter(gPathConverter
);
315 gPathConverter
= NULL
;
320 * Conversion from UTF-16 UniChars to 'encoding'
323 mac_utf16_to_enc(from
, fromLen
, actualLen
)
328 /* Following code borrows somewhat from os_mswin.c */
332 char_u
*result
= NULL
;
334 /* Convert to utf-8 first, works better with iconv */
336 utf8_str
= mac_utf16_to_utf8(from
, fromLen
, &utf8_len
);
340 /* We might be called before we have p_enc set up. */
341 conv
.vc_type
= CONV_NONE
;
343 /* If encoding (p_enc) is any unicode, it is actually in utf-8 (vim
344 * internal unicode is always utf-8) so don't convert in such cases */
346 if ((enc_canon_props(p_enc
) & ENC_UNICODE
) == 0)
347 convert_setup(&conv
, (char_u
*)"utf-8",
348 p_enc
? p_enc
: (char_u
*)"macroman");
349 if (conv
.vc_type
== CONV_NONE
)
351 /* p_enc is utf-8, so we're done. */
356 result
= string_convert(&conv
, utf8_str
, (int *)&utf8_len
);
360 convert_setup(&conv
, NULL
, NULL
);
363 *actualLen
= utf8_len
;
372 * Conversion from 'encoding' to UTF-16 UniChars
375 mac_enc_to_utf16(from
, fromLen
, actualLen
)
380 /* Following code borrows somewhat from os_mswin.c */
384 UniChar
*result
= NULL
;
385 Boolean should_free_utf8
= FALSE
;
389 /* Use MacRoman by default, we might be called before we have p_enc
390 * set up. Convert to utf-8 first, works better with iconv(). Does
391 * nothing if 'encoding' is "utf-8". */
392 conv
.vc_type
= CONV_NONE
;
393 if ((enc_canon_props(p_enc
) & ENC_UNICODE
) == 0 &&
394 convert_setup(&conv
, p_enc
? p_enc
: (char_u
*)"macroman",
395 (char_u
*)"utf-8") == FAIL
)
398 if (conv
.vc_type
!= CONV_NONE
)
401 utf8_str
= string_convert(&conv
, from
, (int *)&utf8_len
);
402 should_free_utf8
= TRUE
;
410 if (utf8_str
== NULL
)
413 convert_setup(&conv
, NULL
, NULL
);
415 result
= mac_utf8_to_utf16(utf8_str
, utf8_len
, actualLen
);
417 if (should_free_utf8
)
430 * Converts from UTF-16 UniChars to CFString
433 mac_enc_to_cfstring(from
, fromLen
)
439 CFStringRef result
= NULL
;
441 utf16_str
= mac_enc_to_utf16(from
, fromLen
, &utf16_len
);
444 result
= CFStringCreateWithCharacters(NULL
, utf16_str
, utf16_len
/sizeof(UniChar
));
452 * Converts a decomposed HFS+ UTF-8 path to precomposed UTF-8
455 mac_precompose_path(decompPath
, decompLen
, precompLen
)
460 char_u
*result
= NULL
;
461 size_t actualLen
= 0;
465 result
= alloc(decompLen
);
468 if (TECConvertText(gPathConverter
, decompPath
,
469 decompLen
, &decompLen
, result
,
470 decompLen
, &actualLen
) != noErr
)
479 *precompLen
= actualLen
;
485 * Converts from UTF-16 UniChars to precomposed UTF-8
488 mac_utf16_to_utf8(from
, fromLen
, actualLen
)
497 if (gUTF16ToUTF8Converter
)
499 result
= alloc(fromLen
* 6 + 1);
500 if (result
&& TECConvertText(gUTF16ToUTF8Converter
, (ConstTextPtr
)from
,
501 fromLen
, &inputRead
, result
,
502 (fromLen
*6+1)*sizeof(char_u
), &utf8_len
) == noErr
)
504 TECFlushText(gUTF16ToUTF8Converter
, result
, (fromLen
*6+1)*sizeof(char_u
), &inputRead
);
505 utf8_len
+= inputRead
;
519 *actualLen
= result
? utf8_len
: 0;
525 * Converts from UTF-8 to UTF-16 UniChars
528 mac_utf8_to_utf16(from
, fromLen
, actualLen
)
533 CFStringRef utf8_str
;
534 CFRange convertRange
;
535 UniChar
*result
= NULL
;
537 utf8_str
= CFStringCreateWithBytes(NULL
, from
, fromLen
,
538 kCFStringEncodingUTF8
, FALSE
);
540 if (utf8_str
== NULL
) {
546 convertRange
= CFRangeMake(0, CFStringGetLength(utf8_str
));
547 result
= (UniChar
*)alloc(convertRange
.length
* sizeof(UniChar
));
549 CFStringGetCharacters(utf8_str
, convertRange
, result
);
554 *actualLen
= convertRange
.length
* sizeof(UniChar
);
558 #endif /* MACOS_CONVERT */