s3:dbwrap_tool: add --persistent switch and mode for non-persistent DBs
[Samba/gebeck_regimport.git] / lib / util / charset / charset_macosxfs.c
blob895277d001aa41c8f55748433854c8ee4b9eeb79
1 /*
2 Unix SMB/CIFS implementation.
3 Samba charset module for Mac OS X/Darwin
4 Copyright (C) Benjamin Riefenstahl 2003
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program. If not, see <http://www.gnu.org/licenses/>.
21 * modules/charset_macosxfs.c
23 * A Samba charset module to use on Mac OS X/Darwin as the filesystem
24 * and display encoding.
26 * Actually two implementations are provided here. The default
27 * implementation is based on the official CFString API. The other is
28 * based on internal CFString APIs as defined in the OpenDarwin
29 * source.
32 #include "includes.h"
33 #include "charset_proto.h"
34 #undef realloc
36 #ifdef DARWINOS
39 * Include OS frameworks. These are only needed in this module.
41 #include <CoreFoundation/CFString.h>
44 * See if autoconf has found us the internal headers in some form.
46 #if HAVE_COREFOUNDATION_CFSTRINGENCODINGCONVERTER_H
47 # include <CoreFoundation/CFStringEncodingConverter.h>
48 # include <CoreFoundation/CFUnicodePrecomposition.h>
49 # define USE_INTERNAL_API 1
50 #elif HAVE_CFSTRINGENCODINGCONVERTER_H
51 # include <CFStringEncodingConverter.h>
52 # include <CFUnicodePrecomposition.h>
53 # define USE_INTERNAL_API 1
54 #endif
57 * Compile time configuration: Do we want debug output?
59 /* #define DEBUG_STRINGS 1 */
62 * A simple, but efficient memory provider for our buffers.
64 static inline void *resize_buffer (void *buffer, size_t *size, size_t newsize)
66 if (newsize > *size) {
67 *size = newsize + 128;
68 buffer = realloc(buffer, *size);
70 return buffer;
74 * While there is a version of OpenDarwin for intel, the usual case is
75 * big-endian PPC. So we need byte swapping to handle the
76 * little-endian byte order of the network protocol. We also need an
77 * additional dynamic buffer to do this work for incoming data blocks,
78 * because we have to consider the original data as constant.
80 * We abstract the differences away by providing a simple facade with
81 * these functions/macros:
83 * le_to_native(dst,src,len)
84 * native_to_le(cp,len)
85 * set_ucbuffer_with_le(buffer,bufsize,data,size)
86 * set_ucbuffer_with_le_copy(buffer,bufsize,data,size,reserve)
88 #ifdef WORDS_BIGENDIAN
90 static inline void swap_bytes (char * dst, const char * src, size_t len)
92 const char *srcend = src + len;
93 while (src < srcend) {
94 dst[0] = src[1];
95 dst[1] = src[0];
96 dst += 2;
97 src += 2;
100 static inline void swap_bytes_inplace (char * cp, size_t len)
102 char temp;
103 char *end = cp + len;
104 while (cp < end) {
105 temp = cp[1];
106 cp[1] = cp[0];
107 cp[0] = temp;
108 cp += 2;
112 #define le_to_native(dst,src,len) swap_bytes(dst,src,len)
113 #define native_to_le(cp,len) swap_bytes_inplace(cp,len)
114 #define set_ucbuffer_with_le(buffer,bufsize,data,size) \
115 set_ucbuffer_with_le_copy(buffer,bufsize,data,size,0)
117 #else /* ! WORDS_BIGENDIAN */
119 #define le_to_native(dst,src,len) memcpy(dst,src,len)
120 #define native_to_le(cp,len) /* nothing */
121 #define set_ucbuffer_with_le(buffer,bufsize,data,size) \
122 (((void)(bufsize)),(UniChar*)(data))
124 #endif
126 static inline UniChar *set_ucbuffer_with_le_copy (
127 UniChar *buffer, size_t *bufsize,
128 const void *data, size_t size, size_t reserve)
130 buffer = resize_buffer(buffer, bufsize, size+reserve);
131 le_to_native((char*)buffer,data,size);
132 return buffer;
137 * A simple hexdump function for debugging error conditions.
139 #define debug_out(s) DEBUG(0,(s))
141 #ifdef DEBUG_STRINGS
143 static void hexdump( const char * label, const char * s, size_t len )
145 size_t restlen = len;
146 debug_out("<<<<<<<\n");
147 debug_out(label);
148 debug_out("\n");
149 while (restlen > 0) {
150 char line[100];
151 size_t i, j;
152 char * d = line;
153 #undef sprintf
154 d += sprintf(d, "%04X ", (unsigned)(len-restlen));
155 *d++ = ' ';
156 for( i = 0; i<restlen && i<8; ++i ) {
157 d += sprintf(d, "%02X ", ((unsigned)s[i]) & 0xFF);
159 for( j = i; j<8; ++j ) {
160 d += sprintf(d, " ");
162 *d++ = ' ';
163 for( i = 8; i<restlen && i<16; ++i ) {
164 d += sprintf(d, "%02X ", ((unsigned)s[i]) & 0xFF);
166 for( j = i; j<16; ++j ) {
167 d += sprintf(d, " ");
169 *d++ = ' ';
170 for( i = 0; i<restlen && i<16; ++i ) {
171 if(s[i] < ' ' || s[i] >= 0x7F || !isprint(s[i]))
172 *d++ = '.';
173 else
174 *d++ = s[i];
176 *d++ = '\n';
177 *d = 0;
178 restlen -= i;
179 s += i;
180 debug_out(line);
182 debug_out(">>>>>>>\n");
185 #else /* !DEBUG_STRINGS */
187 #define hexdump(label,s,len) /* nothing */
189 #endif
192 #if !USE_INTERNAL_API
195 * An implementation based on documented Mac OS X APIs.
197 * This does a certain amount of memory management, creating and
198 * manipulating CFString objects. We try to minimize the impact by
199 * keeping those objects around and re-using them. We also use
200 * external backing store for the CFStrings where this is possible and
201 * benficial.
203 * The Unicode normalizations forms available at this level are
204 * generic, not specifically for the file system. So they may not be
205 * perfect fits.
207 size_t macosxfs_encoding_pull(
208 void *cd, /* Encoder handle */
209 const char **inbuf, size_t *inbytesleft, /* Script string */
210 char **outbuf, size_t *outbytesleft) /* UTF-16-LE string */
212 static const int script_code = kCFStringEncodingUTF8;
213 static CFMutableStringRef cfstring = NULL;
214 size_t outsize;
215 CFRange range;
217 (void) cd; /* UNUSED */
219 if (0 == *inbytesleft) {
220 return 0;
223 if (NULL == cfstring) {
225 * A version with an external backing store as in the
226 * push function should have been more efficient, but
227 * testing shows, that it is actually slower (!).
228 * Maybe kCFAllocatorDefault gets shortcut evaluation
229 * internally, while kCFAllocatorNull doesn't.
231 cfstring = CFStringCreateMutable(kCFAllocatorDefault,0);
235 * Three methods of appending to a CFString, choose the most
236 * efficient.
238 if (0 == (*inbuf)[*inbytesleft-1]) {
239 CFStringAppendCString(cfstring, *inbuf, script_code);
240 } else if (*inbytesleft <= 255) {
241 Str255 buffer;
242 buffer[0] = *inbytesleft;
243 memcpy(buffer+1, *inbuf, buffer[0]);
244 CFStringAppendPascalString(cfstring, buffer, script_code);
245 } else {
247 * We would like to use a fixed buffer and a loop
248 * here, but than we can't garantee that the input is
249 * well-formed UTF-8, as we are supposed to do.
251 static char *buffer = NULL;
252 static size_t buflen = 0;
253 buffer = resize_buffer(buffer, &buflen, *inbytesleft+1);
254 memcpy(buffer, *inbuf, *inbytesleft);
255 buffer[*inbytesleft] = 0;
256 CFStringAppendCString(cfstring, *inbuf, script_code);
260 * Compose characters, using the non-canonical composition
261 * form.
263 CFStringNormalize(cfstring, kCFStringNormalizationFormC);
265 outsize = CFStringGetLength(cfstring);
266 range = CFRangeMake(0,outsize);
268 if (outsize == 0) {
270 * HACK: smbd/mangle_hash2.c:is_legal_name() expects
271 * errors here. That function will always pass 2
272 * characters. smbd/open.c:check_for_pipe() cuts a
273 * patchname to 10 characters blindly. Suppress the
274 * debug output in those cases.
276 if(2 != *inbytesleft && 10 != *inbytesleft) {
277 debug_out("String conversion: "
278 "An unknown error occurred\n");
279 hexdump("UTF8->UTF16LE (old) input",
280 *inbuf, *inbytesleft);
282 errno = EILSEQ; /* Not sure, but this is what we have
283 * actually seen. */
284 return -1;
286 if (outsize*2 > *outbytesleft) {
287 CFStringDelete(cfstring, range);
288 debug_out("String conversion: "
289 "Output buffer too small\n");
290 hexdump("UTF8->UTF16LE (old) input",
291 *inbuf, *inbytesleft);
292 errno = E2BIG;
293 return -1;
296 CFStringGetCharacters(cfstring, range, (UniChar*)*outbuf);
297 CFStringDelete(cfstring, range);
299 native_to_le(*outbuf, outsize*2);
302 * Add a converted null byte, if the CFString conversions
303 * prevented that until now.
305 if (0 == (*inbuf)[*inbytesleft-1] &&
306 (0 != (*outbuf)[outsize*2-1] || 0 != (*outbuf)[outsize*2-2])) {
308 if ((outsize*2+2) > *outbytesleft) {
309 debug_out("String conversion: "
310 "Output buffer too small\n");
311 hexdump("UTF8->UTF16LE (old) input",
312 *inbuf, *inbytesleft);
313 errno = E2BIG;
314 return -1;
317 (*outbuf)[outsize*2] = (*outbuf)[outsize*2+1] = 0;
318 outsize += 2;
321 *inbuf += *inbytesleft;
322 *inbytesleft = 0;
323 *outbuf += outsize*2;
324 *outbytesleft -= outsize*2;
326 return 0;
329 size_t macosxfs_encoding_push(
330 void *cd, /* Encoder handle */
331 const char **inbuf, size_t *inbytesleft, /* UTF-16-LE string */
332 char **outbuf, size_t *outbytesleft) /* Script string */
334 static const int script_code = kCFStringEncodingUTF8;
335 static CFMutableStringRef cfstring = NULL;
336 static UniChar *buffer = NULL;
337 static size_t buflen = 0;
338 CFIndex outsize, cfsize, charsconverted;
340 (void) cd; /* UNUSED */
342 if (0 == *inbytesleft) {
343 return 0;
347 * We need a buffer that can hold 4 times the original data,
348 * because that is the theoretical maximum that decomposition
349 * can create currently (in Unicode 4.0).
351 buffer = set_ucbuffer_with_le_copy(
352 buffer, &buflen, *inbuf, *inbytesleft, 3 * *inbytesleft);
354 if (NULL == cfstring) {
355 cfstring = CFStringCreateMutableWithExternalCharactersNoCopy(
356 kCFAllocatorDefault,
357 buffer, *inbytesleft/2, buflen/2,
358 kCFAllocatorNull);
359 } else {
360 CFStringSetExternalCharactersNoCopy(
361 cfstring,
362 buffer, *inbytesleft/2, buflen/2);
366 * Decompose characters, using the non-canonical decomposition
367 * form.
369 * NB: This isn't exactly what HFS+ wants (see note on
370 * kCFStringEncodingUseHFSPlusCanonical in
371 * CFStringEncodingConverter.h), but AFAIK it's the best that
372 * the official API can do.
374 CFStringNormalize(cfstring, kCFStringNormalizationFormD);
376 cfsize = CFStringGetLength(cfstring);
377 charsconverted = CFStringGetBytes(
378 cfstring, CFRangeMake(0,cfsize),
379 script_code, 0, false,
380 *outbuf, *outbytesleft, &outsize);
382 if (0 == charsconverted) {
383 debug_out("String conversion: "
384 "Buffer too small or not convertable\n");
385 hexdump("UTF16LE->UTF8 (old) input",
386 *inbuf, *inbytesleft);
387 errno = EILSEQ; /* Probably more likely. */
388 return -1;
392 * Add a converted null byte, if the CFString conversions
393 * prevented that until now.
395 if (0 == (*inbuf)[*inbytesleft-1] && 0 == (*inbuf)[*inbytesleft-2] &&
396 (0 != (*outbuf)[outsize-1])) {
398 if (((size_t)outsize+1) > *outbytesleft) {
399 debug_out("String conversion: "
400 "Output buffer too small\n");
401 hexdump("UTF16LE->UTF8 (old) input",
402 *inbuf, *inbytesleft);
403 errno = E2BIG;
404 return -1;
407 (*outbuf)[outsize] = 0;
408 ++outsize;
411 *inbuf += *inbytesleft;
412 *inbytesleft = 0;
413 *outbuf += outsize;
414 *outbytesleft -= outsize;
416 return 0;
419 #else /* USE_INTERNAL_API */
422 * An implementation based on internal code as known from the
423 * OpenDarwin CVS.
425 * This code doesn't need much memory management because it uses
426 * functions that operate on the raw memory directly.
428 * The push routine here is faster and more compatible with HFS+ than
429 * the other implementation above. The pull routine is only faster
430 * for some strings, slightly slower for others. The pull routine
431 * looses because it has to iterate over the data twice, once to
432 * decode UTF-8 and than to do the character composition required by
433 * Windows.
435 static size_t macosxfs_encoding_pull(
436 void *cd, /* Encoder handle */
437 const char **inbuf, size_t *inbytesleft, /* Script string */
438 char **outbuf, size_t *outbytesleft) /* UTF-16-LE string */
440 static const int script_code = kCFStringEncodingUTF8;
441 UInt32 srcCharsUsed = 0;
442 UInt32 dstCharsUsed = 0;
443 UInt32 result;
444 uint32_t dstDecomposedUsed = 0;
445 uint32_t dstPrecomposedUsed = 0;
447 (void) cd; /* UNUSED */
449 if (0 == *inbytesleft) {
450 return 0;
453 result = CFStringEncodingBytesToUnicode(
454 script_code, kCFStringEncodingComposeCombinings,
455 *inbuf, *inbytesleft, &srcCharsUsed,
456 (UniChar*)*outbuf, *outbytesleft, &dstCharsUsed);
458 switch(result) {
459 case kCFStringEncodingConversionSuccess:
460 if (*inbytesleft == srcCharsUsed)
461 break;
462 else
463 ; /*fall through*/
464 case kCFStringEncodingInsufficientOutputBufferLength:
465 debug_out("String conversion: "
466 "Output buffer too small\n");
467 hexdump("UTF8->UTF16LE (new) input",
468 *inbuf, *inbytesleft);
469 errno = E2BIG;
470 return -1;
471 case kCFStringEncodingInvalidInputStream:
473 * HACK: smbd/mangle_hash2.c:is_legal_name() expects
474 * errors here. That function will always pass 2
475 * characters. smbd/open.c:check_for_pipe() cuts a
476 * patchname to 10 characters blindly. Suppress the
477 * debug output in those cases.
479 if(2 != *inbytesleft && 10 != *inbytesleft) {
480 debug_out("String conversion: "
481 "Invalid input sequence\n");
482 hexdump("UTF8->UTF16LE (new) input",
483 *inbuf, *inbytesleft);
485 errno = EILSEQ;
486 return -1;
487 case kCFStringEncodingConverterUnavailable:
488 debug_out("String conversion: "
489 "Unknown encoding\n");
490 hexdump("UTF8->UTF16LE (new) input",
491 *inbuf, *inbytesleft);
492 errno = EINVAL;
493 return -1;
497 * It doesn't look like CFStringEncodingBytesToUnicode() can
498 * produce precomposed characters (flags=ComposeCombinings
499 * doesn't do it), so we need another pass over the data here.
500 * We can do this in-place, as the string can only get
501 * shorter.
503 * (Actually in theory there should be an internal
504 * decomposition and reordering before the actual composition
505 * step. But we should be able to rely on that we always get
506 * fully decomposed strings for input, so this can't create
507 * problems in reality.)
509 CFUniCharPrecompose(
510 (const UTF16Char *)*outbuf, dstCharsUsed, &dstDecomposedUsed,
511 (UTF16Char *)*outbuf, dstCharsUsed, &dstPrecomposedUsed);
513 native_to_le(*outbuf, dstPrecomposedUsed*2);
515 *inbuf += srcCharsUsed;
516 *inbytesleft -= srcCharsUsed;
517 *outbuf += dstPrecomposedUsed*2;
518 *outbytesleft -= dstPrecomposedUsed*2;
520 return 0;
523 static size_t macosxfs_encoding_push(
524 void *cd, /* Encoder handle */
525 const char **inbuf, size_t *inbytesleft, /* UTF-16-LE string */
526 char **outbuf, size_t *outbytesleft) /* Script string */
528 static const int script_code = kCFStringEncodingUTF8;
529 static UniChar *buffer = NULL;
530 static size_t buflen = 0;
531 UInt32 srcCharsUsed=0, dstCharsUsed=0, result;
533 (void) cd; /* UNUSED */
535 if (0 == *inbytesleft) {
536 return 0;
539 buffer = set_ucbuffer_with_le(
540 buffer, &buflen, *inbuf, *inbytesleft);
542 result = CFStringEncodingUnicodeToBytes(
543 script_code, kCFStringEncodingUseHFSPlusCanonical,
544 buffer, *inbytesleft/2, &srcCharsUsed,
545 *outbuf, *outbytesleft, &dstCharsUsed);
547 switch(result) {
548 case kCFStringEncodingConversionSuccess:
549 if (*inbytesleft/2 == srcCharsUsed)
550 break;
551 else
552 ; /*fall through*/
553 case kCFStringEncodingInsufficientOutputBufferLength:
554 debug_out("String conversion: "
555 "Output buffer too small\n");
556 hexdump("UTF16LE->UTF8 (new) input",
557 *inbuf, *inbytesleft);
558 errno = E2BIG;
559 return -1;
560 case kCFStringEncodingInvalidInputStream:
562 * HACK: smbd/open.c:check_for_pipe():is_legal_name()
563 * cuts a pathname to 10 characters blindly. Suppress
564 * the debug output in those cases.
566 if(10 != *inbytesleft) {
567 debug_out("String conversion: "
568 "Invalid input sequence\n");
569 hexdump("UTF16LE->UTF8 (new) input",
570 *inbuf, *inbytesleft);
572 errno = EILSEQ;
573 return -1;
574 case kCFStringEncodingConverterUnavailable:
575 debug_out("String conversion: "
576 "Unknown encoding\n");
577 hexdump("UTF16LE->UTF8 (new) input",
578 *inbuf, *inbytesleft);
579 errno = EINVAL;
580 return -1;
583 *inbuf += srcCharsUsed*2;
584 *inbytesleft -= srcCharsUsed*2;
585 *outbuf += dstCharsUsed;
586 *outbytesleft -= dstCharsUsed;
588 return 0;
591 #endif /* USE_INTERNAL_API */
593 #else /* DARWIN */
595 void charset_macosfs_dummy(void);
596 void charset_macosfs_dummy(void)
598 return;
601 #endif /* DARWIN */