lib:util: Fix code spelling
[Samba.git] / lib / util / charset / charset_macosxfs.c
blob2ecfdff74f93ae2fbd11339728ea6f05155510ae
1 /*
2 Unix SMB/CIFS implementation.
3 Samba charset module for Mac OS X/Darwin
4 Copyright (C) Benjamin Riefenstahl 2003
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program. If not, see <http://www.gnu.org/licenses/>.
21 * modules/charset_macosxfs.c
23 * A Samba charset module to use on Mac OS X/Darwin as the filesystem
24 * and display encoding.
26 * Actually two implementations are provided here. The default
27 * implementation is based on the official CFString API. The other is
28 * based on internal CFString APIs as defined in the OpenDarwin
29 * source.
32 #include "replace.h"
33 #include "charset.h"
34 #include "charset_proto.h"
35 #include "lib/util/debug.h"
36 #undef realloc
38 #ifdef DARWINOS
41 * Include OS frameworks. These are only needed in this module.
43 #include <CoreFoundation/CFString.h>
46 * See if autoconf has found us the internal headers in some form.
48 #if defined(HAVE_COREFOUNDATION_CFSTRINGENCODINGCONVERTER_H)
49 # include <CoreFoundation/CFStringEncodingConverter.h>
50 # include <CoreFoundation/CFUnicodePrecomposition.h>
51 # define USE_INTERNAL_API 1
52 #elif defined(HAVE_CFSTRINGENCODINGCONVERTER_H)
53 # include <CFStringEncodingConverter.h>
54 # include <CFUnicodePrecomposition.h>
55 # define USE_INTERNAL_API 1
56 #endif
59 * Compile time configuration: Do we want debug output?
61 /* #define DEBUG_STRINGS 1 */
64 * A simple, but efficient memory provider for our buffers.
66 static inline void *resize_buffer (void *buffer, size_t *size, size_t newsize)
68 if (newsize > *size) {
69 *size = newsize + 128;
70 buffer = realloc(buffer, *size);
72 return buffer;
76 * While there is a version of OpenDarwin for intel, the usual case is
77 * big-endian PPC. So we need byte swapping to handle the
78 * little-endian byte order of the network protocol. We also need an
79 * additional dynamic buffer to do this work for incoming data blocks,
80 * because we have to consider the original data as constant.
82 * We abstract the differences away by providing a simple facade with
83 * these functions/macros:
85 * le_to_native(dst,src,len)
86 * native_to_le(cp,len)
87 * set_ucbuffer_with_le(buffer,bufsize,data,size)
88 * set_ucbuffer_with_le_copy(buffer,bufsize,data,size,reserve)
90 #ifdef WORDS_BIGENDIAN
92 static inline void swap_bytes (char * dst, const char * src, size_t len)
94 const char *srcend = src + len;
95 while (src < srcend) {
96 dst[0] = src[1];
97 dst[1] = src[0];
98 dst += 2;
99 src += 2;
102 static inline void swap_bytes_inplace (char * cp, size_t len)
104 char temp;
105 char *end = cp + len;
106 while (cp < end) {
107 temp = cp[1];
108 cp[1] = cp[0];
109 cp[0] = temp;
110 cp += 2;
114 #define le_to_native(dst,src,len) swap_bytes(dst,src,len)
115 #define native_to_le(cp,len) swap_bytes_inplace(cp,len)
116 #define set_ucbuffer_with_le(buffer,bufsize,data,size) \
117 set_ucbuffer_with_le_copy(buffer,bufsize,data,size,0)
119 #else /* ! WORDS_BIGENDIAN */
121 #define le_to_native(dst,src,len) memcpy(dst,src,len)
122 #define native_to_le(cp,len) /* nothing */
123 #define set_ucbuffer_with_le(buffer,bufsize,data,size) \
124 (((void)(bufsize)),(UniChar*)(data))
126 #endif
128 static inline UniChar *set_ucbuffer_with_le_copy (
129 UniChar *buffer, size_t *bufsize,
130 const void *data, size_t size, size_t reserve)
132 buffer = resize_buffer(buffer, bufsize, size+reserve);
133 le_to_native((char*)buffer,data,size);
134 return buffer;
139 * A simple hexdump function for debugging error conditions.
141 #define debug_out(s) DEBUG(0,(s))
143 #ifdef DEBUG_STRINGS
145 static void hexdump( const char * label, const char * s, size_t len )
147 size_t restlen = len;
148 debug_out("<<<<<<<\n");
149 debug_out(label);
150 debug_out("\n");
151 while (restlen > 0) {
152 char line[100];
153 size_t i, j;
154 char * d = line;
155 #undef sprintf
156 d += sprintf(d, "%04X ", (unsigned)(len-restlen));
157 *d++ = ' ';
158 for( i = 0; i<restlen && i<8; ++i ) {
159 d += sprintf(d, "%02X ", ((unsigned)s[i]) & 0xFF);
161 for( j = i; j<8; ++j ) {
162 d += sprintf(d, " ");
164 *d++ = ' ';
165 for( i = 8; i<restlen && i<16; ++i ) {
166 d += sprintf(d, "%02X ", ((unsigned)s[i]) & 0xFF);
168 for( j = i; j<16; ++j ) {
169 d += sprintf(d, " ");
171 *d++ = ' ';
172 for( i = 0; i<restlen && i<16; ++i ) {
173 if(s[i] < ' ' || s[i] >= 0x7F || !isprint(s[i]))
174 *d++ = '.';
175 else
176 *d++ = s[i];
178 *d++ = '\n';
179 *d = 0;
180 restlen -= i;
181 s += i;
182 debug_out(line);
184 debug_out(">>>>>>>\n");
187 #else /* !DEBUG_STRINGS */
189 #define hexdump(label,s,len) /* nothing */
191 #endif
194 #if !USE_INTERNAL_API
197 * An implementation based on documented Mac OS X APIs.
199 * This does a certain amount of memory management, creating and
200 * manipulating CFString objects. We try to minimize the impact by
201 * keeping those objects around and re-using them. We also use
202 * external backing store for the CFStrings where this is possible and
203 * benficial.
205 * The Unicode normalizations forms available at this level are
206 * generic, not specifically for the file system. So they may not be
207 * perfect fits.
209 size_t macosxfs_encoding_pull(
210 void *cd, /* Encoder handle */
211 const char **inbuf, size_t *inbytesleft, /* Script string */
212 char **outbuf, size_t *outbytesleft) /* UTF-16-LE string */
214 static const int script_code = kCFStringEncodingUTF8;
215 static CFMutableStringRef cfstring = NULL;
216 size_t outsize;
217 CFRange range;
219 (void) cd; /* UNUSED */
221 if (0 == *inbytesleft) {
222 return 0;
225 if (NULL == cfstring) {
227 * A version with an external backing store as in the
228 * push function should have been more efficient, but
229 * testing shows, that it is actually slower (!).
230 * Maybe kCFAllocatorDefault gets shortcut evaluation
231 * internally, while kCFAllocatorNull doesn't.
233 cfstring = CFStringCreateMutable(kCFAllocatorDefault,0);
237 * Three methods of appending to a CFString, choose the most
238 * efficient.
240 if (0 == (*inbuf)[*inbytesleft-1]) {
241 CFStringAppendCString(cfstring, *inbuf, script_code);
242 } else if (*inbytesleft <= 255) {
243 Str255 buffer;
244 buffer[0] = *inbytesleft;
245 memcpy(buffer+1, *inbuf, buffer[0]);
246 CFStringAppendPascalString(cfstring, buffer, script_code);
247 } else {
249 * We would like to use a fixed buffer and a loop
250 * here, but then we can't guarantee that the input is
251 * well-formed UTF-8, as we are supposed to do.
253 static char *buffer = NULL;
254 static size_t buflen = 0;
255 buffer = resize_buffer(buffer, &buflen, *inbytesleft+1);
256 memcpy(buffer, *inbuf, *inbytesleft);
257 buffer[*inbytesleft] = 0;
258 CFStringAppendCString(cfstring, *inbuf, script_code);
262 * Compose characters, using the non-canonical composition
263 * form.
265 CFStringNormalize(cfstring, kCFStringNormalizationFormC);
267 outsize = CFStringGetLength(cfstring);
268 range = CFRangeMake(0,outsize);
270 if (outsize == 0) {
272 * HACK: smbd/mangle_hash2.c:is_legal_name() expects
273 * errors here. That function will always pass 2
274 * characters. smbd/open.c:check_for_pipe() cuts a
275 * patchname to 10 characters blindly. Suppress the
276 * debug output in those cases.
278 if(2 != *inbytesleft && 10 != *inbytesleft) {
279 debug_out("String conversion: "
280 "An unknown error occurred\n");
281 hexdump("UTF8->UTF16LE (old) input",
282 *inbuf, *inbytesleft);
284 errno = EILSEQ; /* Not sure, but this is what we have
285 * actually seen. */
286 return -1;
288 if (outsize*2 > *outbytesleft) {
289 CFStringDelete(cfstring, range);
290 debug_out("String conversion: "
291 "Output buffer too small\n");
292 hexdump("UTF8->UTF16LE (old) input",
293 *inbuf, *inbytesleft);
294 errno = E2BIG;
295 return -1;
298 CFStringGetCharacters(cfstring, range, (UniChar*)*outbuf);
299 CFStringDelete(cfstring, range);
301 native_to_le(*outbuf, outsize*2);
304 * Add a converted null byte, if the CFString conversions
305 * prevented that until now.
307 if (0 == (*inbuf)[*inbytesleft-1] &&
308 (0 != (*outbuf)[outsize*2-1] || 0 != (*outbuf)[outsize*2-2])) {
310 if ((outsize*2+2) > *outbytesleft) {
311 debug_out("String conversion: "
312 "Output buffer too small\n");
313 hexdump("UTF8->UTF16LE (old) input",
314 *inbuf, *inbytesleft);
315 errno = E2BIG;
316 return -1;
319 (*outbuf)[outsize*2] = (*outbuf)[outsize*2+1] = 0;
320 outsize += 2;
323 *inbuf += *inbytesleft;
324 *inbytesleft = 0;
325 *outbuf += outsize*2;
326 *outbytesleft -= outsize*2;
328 return 0;
331 size_t macosxfs_encoding_push(
332 void *cd, /* Encoder handle */
333 const char **inbuf, size_t *inbytesleft, /* UTF-16-LE string */
334 char **outbuf, size_t *outbytesleft) /* Script string */
336 static const int script_code = kCFStringEncodingUTF8;
337 static CFMutableStringRef cfstring = NULL;
338 static UniChar *buffer = NULL;
339 static size_t buflen = 0;
340 CFIndex outsize, cfsize, charsconverted;
342 (void) cd; /* UNUSED */
344 if (0 == *inbytesleft) {
345 return 0;
349 * We need a buffer that can hold 4 times the original data,
350 * because that is the theoretical maximum that decomposition
351 * can create currently (in Unicode 4.0).
353 buffer = set_ucbuffer_with_le_copy(
354 buffer, &buflen, *inbuf, *inbytesleft, 3 * *inbytesleft);
356 if (NULL == cfstring) {
357 cfstring = CFStringCreateMutableWithExternalCharactersNoCopy(
358 kCFAllocatorDefault,
359 buffer, *inbytesleft/2, buflen/2,
360 kCFAllocatorNull);
361 } else {
362 CFStringSetExternalCharactersNoCopy(
363 cfstring,
364 buffer, *inbytesleft/2, buflen/2);
368 * Decompose characters, using the non-canonical decomposition
369 * form.
371 * NB: This isn't exactly what HFS+ wants (see note on
372 * kCFStringEncodingUseHFSPlusCanonical in
373 * CFStringEncodingConverter.h), but AFAIK it's the best that
374 * the official API can do.
376 CFStringNormalize(cfstring, kCFStringNormalizationFormD);
378 cfsize = CFStringGetLength(cfstring);
379 charsconverted = CFStringGetBytes(
380 cfstring, CFRangeMake(0,cfsize),
381 script_code, 0, false,
382 *(UInt8 **)outbuf, *outbytesleft, &outsize);
384 if (0 == charsconverted) {
385 debug_out("String conversion: "
386 "Buffer too small or not convertible\n");
387 hexdump("UTF16LE->UTF8 (old) input",
388 *inbuf, *inbytesleft);
389 errno = EILSEQ; /* Probably more likely. */
390 return -1;
394 * Add a converted null byte, if the CFString conversions
395 * prevented that until now.
397 if (0 == (*inbuf)[*inbytesleft-1] && 0 == (*inbuf)[*inbytesleft-2] &&
398 (0 != (*outbuf)[outsize-1])) {
400 if (((size_t)outsize+1) > *outbytesleft) {
401 debug_out("String conversion: "
402 "Output buffer too small\n");
403 hexdump("UTF16LE->UTF8 (old) input",
404 *inbuf, *inbytesleft);
405 errno = E2BIG;
406 return -1;
409 (*outbuf)[outsize] = 0;
410 ++outsize;
413 *inbuf += *inbytesleft;
414 *inbytesleft = 0;
415 *outbuf += outsize;
416 *outbytesleft -= outsize;
418 return 0;
421 #else /* USE_INTERNAL_API */
424 * An implementation based on internal code as known from the
425 * OpenDarwin CVS.
427 * This code doesn't need much memory management because it uses
428 * functions that operate on the raw memory directly.
430 * The push routine here is faster and more compatible with HFS+ than
431 * the other implementation above. The pull routine is only faster
432 * for some strings, slightly slower for others. The pull routine
433 * looses because it has to iterate over the data twice, once to
434 * decode UTF-8 and than to do the character composition required by
435 * Windows.
437 static size_t macosxfs_encoding_pull(
438 void *cd, /* Encoder handle */
439 const char **inbuf, size_t *inbytesleft, /* Script string */
440 char **outbuf, size_t *outbytesleft) /* UTF-16-LE string */
442 static const int script_code = kCFStringEncodingUTF8;
443 UInt32 srcCharsUsed = 0;
444 UInt32 dstCharsUsed = 0;
445 UInt32 result;
446 uint32_t dstDecomposedUsed = 0;
447 uint32_t dstPrecomposedUsed = 0;
449 (void) cd; /* UNUSED */
451 if (0 == *inbytesleft) {
452 return 0;
455 result = CFStringEncodingBytesToUnicode(
456 script_code, kCFStringEncodingComposeCombinings,
457 *inbuf, *inbytesleft, &srcCharsUsed,
458 (UniChar*)*outbuf, *outbytesleft, &dstCharsUsed);
460 switch(result) {
461 case kCFStringEncodingConversionSuccess:
462 if (*inbytesleft == srcCharsUsed) {
463 break;
466 FALL_THROUGH;
467 case kCFStringEncodingInsufficientOutputBufferLength:
468 debug_out("String conversion: "
469 "Output buffer too small\n");
470 hexdump("UTF8->UTF16LE (new) input",
471 *inbuf, *inbytesleft);
472 errno = E2BIG;
473 return -1;
474 case kCFStringEncodingInvalidInputStream:
476 * HACK: smbd/mangle_hash2.c:is_legal_name() expects
477 * errors here. That function will always pass 2
478 * characters. smbd/open.c:check_for_pipe() cuts a
479 * patchname to 10 characters blindly. Suppress the
480 * debug output in those cases.
482 if(2 != *inbytesleft && 10 != *inbytesleft) {
483 debug_out("String conversion: "
484 "Invalid input sequence\n");
485 hexdump("UTF8->UTF16LE (new) input",
486 *inbuf, *inbytesleft);
488 errno = EILSEQ;
489 return -1;
490 case kCFStringEncodingConverterUnavailable:
491 debug_out("String conversion: "
492 "Unknown encoding\n");
493 hexdump("UTF8->UTF16LE (new) input",
494 *inbuf, *inbytesleft);
495 errno = EINVAL;
496 return -1;
500 * It doesn't look like CFStringEncodingBytesToUnicode() can
501 * produce precomposed characters (flags=ComposeCombinings
502 * doesn't do it), so we need another pass over the data here.
503 * We can do this in-place, as the string can only get
504 * shorter.
506 * (Actually in theory there should be an internal
507 * decomposition and reordering before the actual composition
508 * step. But we should be able to rely on that we always get
509 * fully decomposed strings for input, so this can't create
510 * problems in reality.)
512 CFUniCharPrecompose(
513 (const UTF16Char *)*outbuf, dstCharsUsed, &dstDecomposedUsed,
514 (UTF16Char *)*outbuf, dstCharsUsed, &dstPrecomposedUsed);
516 native_to_le(*outbuf, dstPrecomposedUsed*2);
518 *inbuf += srcCharsUsed;
519 *inbytesleft -= srcCharsUsed;
520 *outbuf += dstPrecomposedUsed*2;
521 *outbytesleft -= dstPrecomposedUsed*2;
523 return 0;
526 static size_t macosxfs_encoding_push(
527 void *cd, /* Encoder handle */
528 const char **inbuf, size_t *inbytesleft, /* UTF-16-LE string */
529 char **outbuf, size_t *outbytesleft) /* Script string */
531 static const int script_code = kCFStringEncodingUTF8;
532 static UniChar *buffer = NULL;
533 static size_t buflen = 0;
534 UInt32 srcCharsUsed=0, dstCharsUsed=0, result;
536 (void) cd; /* UNUSED */
538 if (0 == *inbytesleft) {
539 return 0;
542 buffer = set_ucbuffer_with_le(
543 buffer, &buflen, *inbuf, *inbytesleft);
545 result = CFStringEncodingUnicodeToBytes(
546 script_code, kCFStringEncodingUseHFSPlusCanonical,
547 buffer, *inbytesleft/2, &srcCharsUsed,
548 *outbuf, *outbytesleft, &dstCharsUsed);
550 switch(result) {
551 case kCFStringEncodingConversionSuccess:
552 if (*inbytesleft/2 == srcCharsUsed) {
553 break;
556 FALL_THROUGH;
557 case kCFStringEncodingInsufficientOutputBufferLength:
558 debug_out("String conversion: "
559 "Output buffer too small\n");
560 hexdump("UTF16LE->UTF8 (new) input",
561 *inbuf, *inbytesleft);
562 errno = E2BIG;
563 return -1;
564 case kCFStringEncodingInvalidInputStream:
566 * HACK: smbd/open.c:check_for_pipe():is_legal_name()
567 * cuts a pathname to 10 characters blindly. Suppress
568 * the debug output in those cases.
570 if(10 != *inbytesleft) {
571 debug_out("String conversion: "
572 "Invalid input sequence\n");
573 hexdump("UTF16LE->UTF8 (new) input",
574 *inbuf, *inbytesleft);
576 errno = EILSEQ;
577 return -1;
578 case kCFStringEncodingConverterUnavailable:
579 debug_out("String conversion: "
580 "Unknown encoding\n");
581 hexdump("UTF16LE->UTF8 (new) input",
582 *inbuf, *inbytesleft);
583 errno = EINVAL;
584 return -1;
587 *inbuf += srcCharsUsed*2;
588 *inbytesleft -= srcCharsUsed*2;
589 *outbuf += dstCharsUsed;
590 *outbytesleft -= dstCharsUsed;
592 return 0;
595 #endif /* USE_INTERNAL_API */
597 #else /* DARWIN */
599 void charset_macosfs_dummy(void);
600 void charset_macosfs_dummy(void)
602 return;
605 #endif /* DARWIN */