Merge branch 'vim'
[MacVim.git] / src / os_mac_conv.c
blob15ac533af6dba6e68b9904247197409c81c33d85
1 /* vi:set ts=8 sts=4 sw=4:
3 * VIM - Vi IMproved by Bram Moolenaar
5 * Do ":help uganda" in Vim to read copying and usage conditions.
6 * Do ":help credits" in Vim to see a list of people who contributed.
7 * See README.txt for an overview of the Vim source code.
8 */
9 /*
10 * os_mac_conv.c: Code specifically for Mac string conversions.
12 * This code has been put in a separate file to avoid the conflicts that are
13 * caused by including both the X11 and Carbon header files.
16 #define NO_X11_INCLUDES
17 #include "vim.h"
19 #ifdef FEAT_GUI_MACVIM
20 /* Include Carbon here instead of in one of the header files to avoid polluting
21 * the global namespace. */
22 # include <Carbon/Carbon.h>
23 #endif
25 #if defined(MACOS_CONVERT) || defined(PROTO)
26 # ifdef PROTO
27 /* A few dummy types to be able to generate function prototypes. */
28 typedef int UniChar;
29 typedef int *TECObjectRef;
30 typedef int CFStringRef;
31 # endif
33 static char_u *mac_utf16_to_utf8 __ARGS((UniChar *from, size_t fromLen, size_t *actualLen));
34 static UniChar *mac_utf8_to_utf16 __ARGS((char_u *from, size_t fromLen, size_t *actualLen));
36 /* Converter for composing decomposed HFS+ file paths */
37 static TECObjectRef gPathConverter;
38 /* Converter used by mac_utf16_to_utf8 */
39 static TECObjectRef gUTF16ToUTF8Converter;
42 * A Mac version of string_convert_ext() for special cases.
44 char_u *
45 mac_string_convert(ptr, len, lenp, fail_on_error, from_enc, to_enc, unconvlenp)
46 char_u *ptr;
47 int len;
48 int *lenp;
49 int fail_on_error;
50 int from_enc;
51 int to_enc;
52 int *unconvlenp;
54 char_u *retval, *d;
55 CFStringRef cfstr;
56 int buflen, in, out, l, i;
57 CFStringEncoding from;
58 CFStringEncoding to;
60 switch (from_enc)
62 case 'l': from = kCFStringEncodingISOLatin1; break;
63 case 'm': from = kCFStringEncodingMacRoman; break;
64 case 'u': from = kCFStringEncodingUTF8; break;
65 default: return NULL;
67 switch (to_enc)
69 case 'l': to = kCFStringEncodingISOLatin1; break;
70 case 'm': to = kCFStringEncodingMacRoman; break;
71 case 'u': to = kCFStringEncodingUTF8; break;
72 default: return NULL;
75 if (unconvlenp != NULL)
76 *unconvlenp = 0;
77 cfstr = CFStringCreateWithBytes(NULL, ptr, len, from, 0);
79 if(cfstr == NULL)
80 fprintf(stderr, "Encoding failed\n");
81 /* When conversion failed, try excluding bytes from the end, helps when
82 * there is an incomplete byte sequence. Only do up to 6 bytes to avoid
83 * looping a long time when there really is something unconvertible. */
84 while (cfstr == NULL && unconvlenp != NULL && len > 1 && *unconvlenp < 6)
86 --len;
87 ++*unconvlenp;
88 cfstr = CFStringCreateWithBytes(NULL, ptr, len, from, 0);
90 if (cfstr == NULL)
91 return NULL;
93 if (to == kCFStringEncodingUTF8)
94 buflen = len * 6 + 1;
95 else
96 buflen = len + 1;
97 retval = alloc(buflen);
98 if (retval == NULL)
100 CFRelease(cfstr);
101 return NULL;
104 #if 0
105 CFRange convertRange = CFRangeMake(0, CFStringGetLength(cfstr));
106 /* Determine output buffer size */
107 CFStringGetBytes(cfstr, convertRange, to, NULL, FALSE, NULL, 0, (CFIndex *)&buflen);
108 retval = (buflen > 0) ? alloc(buflen) : NULL;
109 if (retval == NULL) {
110 CFRelease(cfstr);
111 return NULL;
114 if (lenp)
115 *lenp = buflen / sizeof(char_u);
117 if (!CFStringGetBytes(cfstr, convertRange, to, NULL, FALSE, retval, buflen, NULL))
118 #endif
119 if (!CFStringGetCString(cfstr, (char *)retval, buflen, to))
121 CFRelease(cfstr);
122 if (fail_on_error)
124 vim_free(retval);
125 return NULL;
128 fprintf(stderr, "Trying char-by-char conversion...\n");
129 /* conversion failed for the whole string, but maybe it will work
130 * for each character */
131 for (d = retval, in = 0, out = 0; in < len && out < buflen - 1;)
133 if (from == kCFStringEncodingUTF8)
134 l = utf_ptr2len(ptr + in);
135 else
136 l = 1;
137 cfstr = CFStringCreateWithBytes(NULL, ptr + in, l, from, 0);
138 if (cfstr == NULL)
140 *d++ = '?';
141 out++;
143 else
145 if (!CFStringGetCString(cfstr, (char *)d, buflen - out, to))
147 *d++ = '?';
148 out++;
150 else
152 i = STRLEN(d);
153 d += i;
154 out += i;
156 CFRelease(cfstr);
158 in += l;
160 *d = NUL;
161 if (lenp != NULL)
162 *lenp = out;
163 return retval;
165 CFRelease(cfstr);
166 if (lenp != NULL)
167 *lenp = STRLEN(retval);
169 return retval;
173 * Conversion from Apple MacRoman char encoding to UTF-8 or latin1, using
174 * standard Carbon framework.
175 * Input: "ptr[*sizep]".
176 * "real_size" is the size of the buffer that "ptr" points to.
177 * output is in-place, "sizep" is adjusted.
178 * Returns OK or FAIL.
181 macroman2enc(ptr, sizep, real_size)
182 char_u *ptr;
183 long *sizep;
184 long real_size;
186 CFStringRef cfstr;
187 CFRange r;
188 CFIndex len = *sizep;
190 /* MacRoman is an 8-bit encoding, no need to move bytes to
191 * conv_rest[]. */
192 cfstr = CFStringCreateWithBytes(NULL, ptr, len,
193 kCFStringEncodingMacRoman, 0);
195 * If there is a conversion error, try using another
196 * conversion.
198 if (cfstr == NULL)
199 return FAIL;
201 r.location = 0;
202 r.length = CFStringGetLength(cfstr);
203 if (r.length != CFStringGetBytes(cfstr, r,
204 (enc_utf8) ? kCFStringEncodingUTF8 : kCFStringEncodingISOLatin1,
205 0, /* no lossy conversion */
206 0, /* not external representation */
207 ptr + *sizep, real_size - *sizep, &len))
209 CFRelease(cfstr);
210 return FAIL;
212 CFRelease(cfstr);
213 mch_memmove(ptr, ptr + *sizep, len);
214 *sizep = len;
216 return OK;
220 * Conversion from UTF-8 or latin1 to MacRoman.
221 * Input: "from[fromlen]"
222 * Output: "to[maxtolen]" length in "*tolenp"
223 * Unconverted rest in rest[*restlenp].
224 * Returns OK or FAIL.
227 enc2macroman(from, fromlen, to, tolenp, maxtolen, rest, restlenp)
228 char_u *from;
229 size_t fromlen;
230 char_u *to;
231 int *tolenp;
232 int maxtolen;
233 char_u *rest;
234 int *restlenp;
236 CFStringRef cfstr;
237 CFRange r;
238 CFIndex l;
240 *restlenp = 0;
241 cfstr = CFStringCreateWithBytes(NULL, from, fromlen,
242 (enc_utf8) ? kCFStringEncodingUTF8 : kCFStringEncodingISOLatin1,
244 while (cfstr == NULL && *restlenp < 3 && fromlen > 1)
246 rest[*restlenp++] = from[--fromlen];
247 cfstr = CFStringCreateWithBytes(NULL, from, fromlen,
248 (enc_utf8) ? kCFStringEncodingUTF8 : kCFStringEncodingISOLatin1,
251 if (cfstr == NULL)
252 return FAIL;
254 r.location = 0;
255 r.length = CFStringGetLength(cfstr);
256 if (r.length != CFStringGetBytes(cfstr, r,
257 kCFStringEncodingMacRoman,
258 0, /* no lossy conversion */
259 0, /* not external representation (since vim
260 * handles this internally */
261 to, maxtolen, &l))
263 CFRelease(cfstr);
264 return FAIL;
266 CFRelease(cfstr);
267 *tolenp = l;
268 return OK;
272 * Initializes text converters
274 void
275 mac_conv_init()
277 TextEncoding utf8_encoding;
278 TextEncoding utf8_hfsplus_encoding;
279 TextEncoding utf8_canon_encoding;
280 TextEncoding utf16_encoding;
282 utf8_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault,
283 kTextEncodingDefaultVariant, kUnicodeUTF8Format);
284 utf8_hfsplus_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault,
285 kUnicodeHFSPlusCompVariant, kUnicodeUTF8Format);
286 utf8_canon_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault,
287 kUnicodeCanonicalCompVariant, kUnicodeUTF8Format);
288 utf16_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault,
289 kTextEncodingDefaultVariant, kUnicode16BitFormat);
291 if (TECCreateConverter(&gPathConverter, utf8_encoding,
292 utf8_hfsplus_encoding) != noErr)
293 gPathConverter = NULL;
295 if (TECCreateConverter(&gUTF16ToUTF8Converter, utf16_encoding,
296 utf8_canon_encoding) != noErr)
298 /* On pre-10.3, Unicode normalization is not available so
299 * fall back to non-normalizing converter */
300 if (TECCreateConverter(&gUTF16ToUTF8Converter, utf16_encoding,
301 utf8_encoding) != noErr)
302 gUTF16ToUTF8Converter = NULL;
307 * Destroys text converters
309 void
310 mac_conv_cleanup()
312 if (gUTF16ToUTF8Converter)
314 TECDisposeConverter(gUTF16ToUTF8Converter);
315 gUTF16ToUTF8Converter = NULL;
318 if (gPathConverter)
320 TECDisposeConverter(gPathConverter);
321 gPathConverter = NULL;
326 * Conversion from UTF-16 UniChars to 'encoding'
327 * The function signature uses the real type of UniChar (as typedef'ed in
328 * CFBase.h) to avoid clashes with X11 header files in the .pro file
330 char_u *
331 mac_utf16_to_enc(from, fromLen, actualLen)
332 unsigned short *from;
333 size_t fromLen;
334 size_t *actualLen;
336 /* Following code borrows somewhat from os_mswin.c */
337 vimconv_T conv;
338 size_t utf8_len;
339 char_u *utf8_str;
340 char_u *result = NULL;
342 /* Convert to utf-8 first, works better with iconv */
343 utf8_len = 0;
344 utf8_str = mac_utf16_to_utf8(from, fromLen, &utf8_len);
346 if (utf8_str)
348 /* We might be called before we have p_enc set up. */
349 conv.vc_type = CONV_NONE;
351 /* If encoding (p_enc) is any unicode, it is actually in utf-8 (vim
352 * internal unicode is always utf-8) so don't convert in such cases */
354 if ((enc_canon_props(p_enc) & ENC_UNICODE) == 0)
355 convert_setup(&conv, (char_u *)"utf-8",
356 p_enc? p_enc: (char_u *)"macroman");
357 if (conv.vc_type == CONV_NONE)
359 /* p_enc is utf-8, so we're done. */
360 result = utf8_str;
362 else
364 result = string_convert(&conv, utf8_str, (int *)&utf8_len);
365 vim_free(utf8_str);
368 convert_setup(&conv, NULL, NULL);
370 if (actualLen)
371 *actualLen = utf8_len;
373 else if (actualLen)
374 *actualLen = 0;
376 return result;
380 * Conversion from 'encoding' to UTF-16 UniChars
381 * The function return uses the real type of UniChar (as typedef'ed in
382 * CFBase.h) to avoid clashes with X11 header files in the .pro file
384 unsigned short *
385 mac_enc_to_utf16(from, fromLen, actualLen)
386 char_u *from;
387 size_t fromLen;
388 size_t *actualLen;
390 /* Following code borrows somewhat from os_mswin.c */
391 vimconv_T conv;
392 size_t utf8_len;
393 char_u *utf8_str;
394 UniChar *result = NULL;
395 Boolean should_free_utf8 = FALSE;
399 /* Use MacRoman by default, we might be called before we have p_enc
400 * set up. Convert to utf-8 first, works better with iconv(). Does
401 * nothing if 'encoding' is "utf-8". */
402 conv.vc_type = CONV_NONE;
403 if ((enc_canon_props(p_enc) & ENC_UNICODE) == 0 &&
404 convert_setup(&conv, p_enc ? p_enc : (char_u *)"macroman",
405 (char_u *)"utf-8") == FAIL)
406 break;
408 if (conv.vc_type != CONV_NONE)
410 utf8_len = fromLen;
411 utf8_str = string_convert(&conv, from, (int *)&utf8_len);
412 should_free_utf8 = TRUE;
414 else
416 utf8_str = from;
417 utf8_len = fromLen;
420 if (utf8_str == NULL)
421 break;
423 convert_setup(&conv, NULL, NULL);
425 result = mac_utf8_to_utf16(utf8_str, utf8_len, actualLen);
427 if (should_free_utf8)
428 vim_free(utf8_str);
429 return result;
431 while (0);
433 if (actualLen)
434 *actualLen = 0;
436 return result;
440 * Converts from UTF-16 UniChars to CFString
441 * The void * return type is actually a CFStringRef
443 void *
444 mac_enc_to_cfstring(from, fromLen)
445 char_u *from;
446 size_t fromLen;
448 UniChar *utf16_str;
449 size_t utf16_len;
450 CFStringRef result = NULL;
452 utf16_str = mac_enc_to_utf16(from, fromLen, &utf16_len);
453 if (utf16_str)
455 result = CFStringCreateWithCharacters(NULL, utf16_str, utf16_len/sizeof(UniChar));
456 vim_free(utf16_str);
459 return (void *)result;
463 * Converts a decomposed HFS+ UTF-8 path to precomposed UTF-8
465 char_u *
466 mac_precompose_path(decompPath, decompLen, precompLen)
467 char_u *decompPath;
468 size_t decompLen;
469 size_t *precompLen;
471 char_u *result = NULL;
472 size_t actualLen = 0;
474 if (gPathConverter)
476 result = alloc(decompLen);
477 if (result)
479 if (TECConvertText(gPathConverter, decompPath,
480 decompLen, &decompLen, result,
481 decompLen, &actualLen) != noErr)
483 vim_free(result);
484 result = NULL;
489 if (precompLen)
490 *precompLen = actualLen;
492 return result;
496 * Converts from UTF-16 UniChars to precomposed UTF-8
498 static char_u *
499 mac_utf16_to_utf8(from, fromLen, actualLen)
500 UniChar *from;
501 size_t fromLen;
502 size_t *actualLen;
504 ByteCount utf8_len;
505 ByteCount inputRead;
506 char_u *result;
508 if (gUTF16ToUTF8Converter)
510 result = alloc(fromLen * 6 + 1);
511 if (result && TECConvertText(gUTF16ToUTF8Converter, (ConstTextPtr)from,
512 fromLen, &inputRead, result,
513 (fromLen*6+1)*sizeof(char_u), &utf8_len) == noErr)
515 TECFlushText(gUTF16ToUTF8Converter, result, (fromLen*6+1)*sizeof(char_u), &inputRead);
516 utf8_len += inputRead;
518 else
520 vim_free(result);
521 result = NULL;
524 else
526 result = NULL;
529 if (actualLen)
530 *actualLen = result ? utf8_len : 0;
532 return result;
536 * Converts from UTF-8 to UTF-16 UniChars
538 static UniChar *
539 mac_utf8_to_utf16(from, fromLen, actualLen)
540 char_u *from;
541 size_t fromLen;
542 size_t *actualLen;
544 CFStringRef utf8_str;
545 CFRange convertRange;
546 UniChar *result = NULL;
548 utf8_str = CFStringCreateWithBytes(NULL, from, fromLen,
549 kCFStringEncodingUTF8, FALSE);
551 if (utf8_str == NULL) {
552 if (actualLen)
553 *actualLen = 0;
554 return NULL;
557 convertRange = CFRangeMake(0, CFStringGetLength(utf8_str));
558 result = (UniChar *)alloc(convertRange.length * sizeof(UniChar));
560 CFStringGetCharacters(utf8_str, convertRange, result);
562 CFRelease(utf8_str);
564 if (actualLen)
565 *actualLen = convertRange.length * sizeof(UniChar);
567 return result;
571 * Sets LANG environment variable in Vim from Mac locale
573 void
574 mac_lang_init() {
575 if (mch_getenv((char_u *)"LANG") == NULL)
577 char buf[20];
578 if (LocaleRefGetPartString(NULL,
579 kLocaleLanguageMask | kLocaleLanguageVariantMask |
580 kLocaleRegionMask | kLocaleRegionVariantMask,
581 sizeof buf, buf) == noErr && *buf)
583 vim_setenv((char_u *)"LANG", (char_u *)buf);
584 # ifdef HAVE_LOCALE_H
585 setlocale(LC_ALL, "");
586 # endif
590 #endif /* MACOS_CONVERT */