Build fix for clang.
[vim-cocoa.git] / src / os_mac_conv.c
blob6a7a6debcbb43b6fb4bf4201669b8358e14fd17f
1 /* vi:set ts=8 sts=4 sw=4:
3 * VIM - Vi IMproved by Bram Moolenaar
5 * Do ":help uganda" in Vim to read copying and usage conditions.
6 * Do ":help credits" in Vim to see a list of people who contributed.
7 * See README.txt for an overview of the Vim source code.
8 */
9 /*
10 * os_mac_conv.c: Code specifically for Mac string conversions.
12 * This code has been put in a separate file to avoid the conflicts that are
13 * caused by including both the X11 and Carbon header files.
16 #define NO_X11_INCLUDES
17 #include "vim.h"
19 #if defined(MACOS_CONVERT) || defined(PROTO)
20 # ifdef PROTO
21 /* A few dummy types to be able to generate function prototypes. */
22 typedef int UniChar;
23 typedef int *TECObjectRef;
24 typedef int CFStringRef;
25 # else
26 typedef unsigned short UniChar;
27 #include <CoreServices/CoreServices.h>
28 # endif
30 static char_u *mac_utf16_to_utf8 __ARGS((UniChar *from, size_t fromLen, size_t *actualLen));
31 static UniChar *mac_utf8_to_utf16 __ARGS((char_u *from, size_t fromLen, size_t *actualLen));
33 /* Converter for composing decomposed HFS+ file paths */
34 static TECObjectRef gPathConverter;
35 /* Converter used by mac_utf16_to_utf8 */
36 static TECObjectRef gUTF16ToUTF8Converter;
39 * A Mac version of string_convert_ext() for special cases.
41 char_u *
42 mac_string_convert(ptr, len, lenp, fail_on_error, from_enc, to_enc, unconvlenp)
43 char_u *ptr;
44 int len;
45 int *lenp;
46 int fail_on_error;
47 int from_enc;
48 int to_enc;
49 int *unconvlenp;
51 char_u *retval, *d;
52 CFStringRef cfstr;
53 int buflen, in, out, l, i;
54 CFStringEncoding from;
55 CFStringEncoding to;
57 switch (from_enc)
59 case 'l': from = kCFStringEncodingISOLatin1; break;
60 case 'm': from = kCFStringEncodingMacRoman; break;
61 case 'u': from = kCFStringEncodingUTF8; break;
62 default: return NULL;
64 switch (to_enc)
66 case 'l': to = kCFStringEncodingISOLatin1; break;
67 case 'm': to = kCFStringEncodingMacRoman; break;
68 case 'u': to = kCFStringEncodingUTF8; break;
69 default: return NULL;
72 if (unconvlenp != NULL)
73 *unconvlenp = 0;
74 cfstr = CFStringCreateWithBytes(NULL, ptr, len, from, 0);
76 if(cfstr == NULL)
77 fprintf(stderr, "Encoding failed\n");
78 /* When conversion failed, try excluding bytes from the end, helps when
79 * there is an incomplete byte sequence. Only do up to 6 bytes to avoid
80 * looping a long time when there really is something unconvertible. */
81 while (cfstr == NULL && unconvlenp != NULL && len > 1 && *unconvlenp < 6)
83 --len;
84 ++*unconvlenp;
85 cfstr = CFStringCreateWithBytes(NULL, ptr, len, from, 0);
87 if (cfstr == NULL)
88 return NULL;
90 if (to == kCFStringEncodingUTF8)
91 buflen = len * 6 + 1;
92 else
93 buflen = len + 1;
94 retval = alloc(buflen);
95 if (retval == NULL)
97 CFRelease(cfstr);
98 return NULL;
101 #if 0
102 CFRange convertRange = CFRangeMake(0, CFStringGetLength(cfstr));
103 /* Determine output buffer size */
104 CFStringGetBytes(cfstr, convertRange, to, NULL, FALSE, NULL, 0, (CFIndex *)&buflen);
105 retval = (buflen > 0) ? alloc(buflen) : NULL;
106 if (retval == NULL) {
107 CFRelease(cfstr);
108 return NULL;
111 if (lenp)
112 *lenp = buflen / sizeof(char_u);
114 if (!CFStringGetBytes(cfstr, convertRange, to, NULL, FALSE, retval, buflen, NULL))
115 #endif
116 if (!CFStringGetCString(cfstr, (char *)retval, buflen, to))
118 CFRelease(cfstr);
119 if (fail_on_error)
121 vim_free(retval);
122 return NULL;
125 fprintf(stderr, "Trying char-by-char conversion...\n");
126 /* conversion failed for the whole string, but maybe it will work
127 * for each character */
128 for (d = retval, in = 0, out = 0; in < len && out < buflen - 1;)
130 if (from == kCFStringEncodingUTF8)
131 l = utf_ptr2len(ptr + in);
132 else
133 l = 1;
134 cfstr = CFStringCreateWithBytes(NULL, ptr + in, l, from, 0);
135 if (cfstr == NULL)
137 *d++ = '?';
138 out++;
140 else
142 if (!CFStringGetCString(cfstr, (char *)d, buflen - out, to))
144 *d++ = '?';
145 out++;
147 else
149 i = STRLEN(d);
150 d += i;
151 out += i;
153 CFRelease(cfstr);
155 in += l;
157 *d = NUL;
158 if (lenp != NULL)
159 *lenp = out;
160 return retval;
162 CFRelease(cfstr);
163 if (lenp != NULL)
164 *lenp = STRLEN(retval);
166 return retval;
170 * Conversion from Apple MacRoman char encoding to UTF-8 or latin1, using
171 * standard Carbon framework.
172 * Input: "ptr[*sizep]".
173 * "real_size" is the size of the buffer that "ptr" points to.
174 * output is in-place, "sizep" is adjusted.
175 * Returns OK or FAIL.
178 macroman2enc(ptr, sizep, real_size)
179 char_u *ptr;
180 long *sizep;
181 long real_size;
183 CFStringRef cfstr;
184 CFRange r;
185 CFIndex len = *sizep;
187 /* MacRoman is an 8-bit encoding, no need to move bytes to
188 * conv_rest[]. */
189 cfstr = CFStringCreateWithBytes(NULL, ptr, len,
190 kCFStringEncodingMacRoman, 0);
192 * If there is a conversion error, try using another
193 * conversion.
195 if (cfstr == NULL)
196 return FAIL;
198 r.location = 0;
199 r.length = CFStringGetLength(cfstr);
200 if (r.length != CFStringGetBytes(cfstr, r,
201 (enc_utf8) ? kCFStringEncodingUTF8 : kCFStringEncodingISOLatin1,
202 0, /* no lossy conversion */
203 0, /* not external representation */
204 ptr + *sizep, real_size - *sizep, &len))
206 CFRelease(cfstr);
207 return FAIL;
209 CFRelease(cfstr);
210 mch_memmove(ptr, ptr + *sizep, len);
211 *sizep = len;
213 return OK;
217 * Conversion from UTF-8 or latin1 to MacRoman.
218 * Input: "from[fromlen]"
219 * Output: "to[maxtolen]" length in "*tolenp"
220 * Unconverted rest in rest[*restlenp].
221 * Returns OK or FAIL.
224 enc2macroman(from, fromlen, to, tolenp, maxtolen, rest, restlenp)
225 char_u *from;
226 size_t fromlen;
227 char_u *to;
228 int *tolenp;
229 int maxtolen;
230 char_u *rest;
231 int *restlenp;
233 CFStringRef cfstr;
234 CFRange r;
235 CFIndex l;
237 *restlenp = 0;
238 cfstr = CFStringCreateWithBytes(NULL, from, fromlen,
239 (enc_utf8) ? kCFStringEncodingUTF8 : kCFStringEncodingISOLatin1,
241 while (cfstr == NULL && *restlenp < 3 && fromlen > 1)
243 rest[*restlenp++] = from[--fromlen];
244 cfstr = CFStringCreateWithBytes(NULL, from, fromlen,
245 (enc_utf8) ? kCFStringEncodingUTF8 : kCFStringEncodingISOLatin1,
248 if (cfstr == NULL)
249 return FAIL;
251 r.location = 0;
252 r.length = CFStringGetLength(cfstr);
253 if (r.length != CFStringGetBytes(cfstr, r,
254 kCFStringEncodingMacRoman,
255 0, /* no lossy conversion */
256 0, /* not external representation (since vim
257 * handles this internally */
258 to, maxtolen, &l))
260 CFRelease(cfstr);
261 return FAIL;
263 CFRelease(cfstr);
264 *tolenp = l;
265 return OK;
269 * Initializes text converters
271 void
272 mac_conv_init()
274 TextEncoding utf8_encoding;
275 TextEncoding utf8_hfsplus_encoding;
276 TextEncoding utf8_canon_encoding;
277 TextEncoding utf16_encoding;
279 utf8_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault,
280 kTextEncodingDefaultVariant, kUnicodeUTF8Format);
281 utf8_hfsplus_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault,
282 kUnicodeHFSPlusCompVariant, kUnicodeUTF8Format);
283 utf8_canon_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault,
284 kUnicodeCanonicalCompVariant, kUnicodeUTF8Format);
285 utf16_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault,
286 kTextEncodingDefaultVariant, kUnicode16BitFormat);
288 if (TECCreateConverter(&gPathConverter, utf8_encoding,
289 utf8_hfsplus_encoding) != noErr)
290 gPathConverter = NULL;
292 if (TECCreateConverter(&gUTF16ToUTF8Converter, utf16_encoding,
293 utf8_canon_encoding) != noErr)
295 /* On pre-10.3, Unicode normalization is not available so
296 * fall back to non-normalizing converter */
297 if (TECCreateConverter(&gUTF16ToUTF8Converter, utf16_encoding,
298 utf8_encoding) != noErr)
299 gUTF16ToUTF8Converter = NULL;
304 * Destroys text converters
306 void
307 mac_conv_cleanup()
309 if (gUTF16ToUTF8Converter)
311 TECDisposeConverter(gUTF16ToUTF8Converter);
312 gUTF16ToUTF8Converter = NULL;
315 if (gPathConverter)
317 TECDisposeConverter(gPathConverter);
318 gPathConverter = NULL;
323 * Conversion from UTF-16 UniChars to 'encoding'
324 * The function signature uses the real type of UniChar (as typedef'ed in
325 * CFBase.h) to avoid clashes with X11 header files in the .pro file
327 char_u *
328 mac_utf16_to_enc(from, fromLen, actualLen)
329 unsigned short *from;
330 size_t fromLen;
331 size_t *actualLen;
333 /* Following code borrows somewhat from os_mswin.c */
334 vimconv_T conv;
335 size_t utf8_len;
336 char_u *utf8_str;
337 char_u *result = NULL;
339 /* Convert to utf-8 first, works better with iconv */
340 utf8_len = 0;
341 utf8_str = mac_utf16_to_utf8(from, fromLen, &utf8_len);
343 if (utf8_str)
345 /* We might be called before we have p_enc set up. */
346 conv.vc_type = CONV_NONE;
348 /* If encoding (p_enc) is any unicode, it is actually in utf-8 (vim
349 * internal unicode is always utf-8) so don't convert in such cases */
351 if ((enc_canon_props(p_enc) & ENC_UNICODE) == 0)
352 convert_setup(&conv, (char_u *)"utf-8",
353 p_enc? p_enc: (char_u *)"macroman");
354 if (conv.vc_type == CONV_NONE)
356 /* p_enc is utf-8, so we're done. */
357 result = utf8_str;
359 else
361 result = string_convert(&conv, utf8_str, (int *)&utf8_len);
362 vim_free(utf8_str);
365 convert_setup(&conv, NULL, NULL);
367 if (actualLen)
368 *actualLen = utf8_len;
370 else if (actualLen)
371 *actualLen = 0;
373 return result;
377 * Conversion from 'encoding' to UTF-16 UniChars
378 * The function return uses the real type of UniChar (as typedef'ed in
379 * CFBase.h) to avoid clashes with X11 header files in the .pro file
381 unsigned short *
382 mac_enc_to_utf16(from, fromLen, actualLen)
383 char_u *from;
384 size_t fromLen;
385 size_t *actualLen;
387 /* Following code borrows somewhat from os_mswin.c */
388 vimconv_T conv;
389 size_t utf8_len;
390 char_u *utf8_str;
391 UniChar *result = NULL;
392 Boolean should_free_utf8 = FALSE;
396 /* Use MacRoman by default, we might be called before we have p_enc
397 * set up. Convert to utf-8 first, works better with iconv(). Does
398 * nothing if 'encoding' is "utf-8". */
399 conv.vc_type = CONV_NONE;
400 if ((enc_canon_props(p_enc) & ENC_UNICODE) == 0 &&
401 convert_setup(&conv, p_enc ? p_enc : (char_u *)"macroman",
402 (char_u *)"utf-8") == FAIL)
403 break;
405 if (conv.vc_type != CONV_NONE)
407 utf8_len = fromLen;
408 utf8_str = string_convert(&conv, from, (int *)&utf8_len);
409 should_free_utf8 = TRUE;
411 else
413 utf8_str = from;
414 utf8_len = fromLen;
417 if (utf8_str == NULL)
418 break;
420 convert_setup(&conv, NULL, NULL);
422 result = mac_utf8_to_utf16(utf8_str, utf8_len, actualLen);
424 if (should_free_utf8)
425 vim_free(utf8_str);
426 return result;
428 while (0);
430 if (actualLen)
431 *actualLen = 0;
433 return result;
437 * Converts from UTF-16 UniChars to CFString
438 * The void * return type is actually a CFStringRef
440 void *
441 mac_enc_to_cfstring(from, fromLen)
442 char_u *from;
443 size_t fromLen;
445 UniChar *utf16_str;
446 size_t utf16_len;
447 CFStringRef result = NULL;
449 utf16_str = mac_enc_to_utf16(from, fromLen, &utf16_len);
450 if (utf16_str)
452 result = CFStringCreateWithCharacters(NULL, utf16_str, utf16_len/sizeof(UniChar));
453 vim_free(utf16_str);
456 return (void *)result;
460 * Converts a decomposed HFS+ UTF-8 path to precomposed UTF-8
462 char_u *
463 mac_precompose_path(decompPath, decompLen, precompLen)
464 char_u *decompPath;
465 size_t decompLen;
466 size_t *precompLen;
468 char_u *result = NULL;
469 size_t actualLen = 0;
471 if (gPathConverter)
473 result = alloc(decompLen);
474 if (result)
476 if (TECConvertText(gPathConverter, decompPath,
477 decompLen, &decompLen, result,
478 decompLen, &actualLen) != noErr)
480 vim_free(result);
481 result = NULL;
486 if (precompLen)
487 *precompLen = actualLen;
489 return result;
493 * Converts from UTF-16 UniChars to precomposed UTF-8
495 static char_u *
496 mac_utf16_to_utf8(from, fromLen, actualLen)
497 UniChar *from;
498 size_t fromLen;
499 size_t *actualLen;
501 ByteCount utf8_len;
502 ByteCount inputRead;
503 char_u *result;
505 if (gUTF16ToUTF8Converter)
507 result = alloc(fromLen * 6 + 1);
508 if (result && TECConvertText(gUTF16ToUTF8Converter, (ConstTextPtr)from,
509 fromLen, &inputRead, result,
510 (fromLen*6+1)*sizeof(char_u), &utf8_len) == noErr)
512 TECFlushText(gUTF16ToUTF8Converter, result, (fromLen*6+1)*sizeof(char_u), &inputRead);
513 utf8_len += inputRead;
515 else
517 vim_free(result);
518 result = NULL;
521 else
523 result = NULL;
526 if (actualLen)
527 *actualLen = result ? utf8_len : 0;
529 return result;
533 * Converts from UTF-8 to UTF-16 UniChars
535 static UniChar *
536 mac_utf8_to_utf16(from, fromLen, actualLen)
537 char_u *from;
538 size_t fromLen;
539 size_t *actualLen;
541 CFStringRef utf8_str;
542 CFRange convertRange;
543 UniChar *result = NULL;
545 utf8_str = CFStringCreateWithBytes(NULL, from, fromLen,
546 kCFStringEncodingUTF8, FALSE);
548 if (utf8_str == NULL) {
549 if (actualLen)
550 *actualLen = 0;
551 return NULL;
554 convertRange = CFRangeMake(0, CFStringGetLength(utf8_str));
555 result = (UniChar *)alloc(convertRange.length * sizeof(UniChar));
557 CFStringGetCharacters(utf8_str, convertRange, result);
559 CFRelease(utf8_str);
561 if (actualLen)
562 *actualLen = convertRange.length * sizeof(UniChar);
564 return result;
568 * Sets LANG environment variable in Vim from Mac locale
570 void
571 mac_lang_init() {
572 if (mch_getenv((char_u *)"LANG") == NULL)
574 char buf[20];
575 if (LocaleRefGetPartString(NULL,
576 kLocaleLanguageMask | kLocaleLanguageVariantMask |
577 kLocaleRegionMask | kLocaleRegionVariantMask,
578 sizeof buf, buf) == noErr && *buf)
580 vim_setenv((char_u *)"LANG", (char_u *)buf);
581 # ifdef HAVE_LOCALE_H
582 setlocale(LC_ALL, "");
583 # endif
587 #endif /* MACOS_CONVERT */