Add missing end tags
[Samba/bjacke.git] / source / modules / charset_macosxfs.c
blobbaf2a0071cbc64fcf50ef4ad2330bddc61b23898
1 /*
2 Unix SMB/CIFS implementation.
3 Samba charset module for Mac OS X/Darwin
4 Copyright (C) Benjamin Riefenstahl 2003
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program. If not, see <http://www.gnu.org/licenses/>.
21 * modules/charset_macosxfs.c
23 * A Samba charset module to use on Mac OS X/Darwin as the filesystem
24 * and display encoding.
26 * Actually two implementations are provided here. The default
27 * implementation is based on the official CFString API. The other is
28 * based on internal CFString APIs as defined in the OpenDarwin
29 * source.
32 #include "includes.h"
35 * Include OS frameworks. These are only needed in this module.
37 #include <CoreFoundation/CFString.h>
40 * See if autoconf has found us the internal headers in some form.
42 #if HAVE_COREFOUNDATION_CFSTRINGENCODINGCONVERTER_H
43 # include <CoreFoundation/CFStringEncodingConverter.h>
44 # include <CoreFoundation/CFUnicodePrecomposition.h>
45 # define USE_INTERNAL_API 1
46 #elif HAVE_CFSTRINGENCODINGCONVERTER_H
47 # include <CFStringEncodingConverter.h>
48 # include <CFUnicodePrecomposition.h>
49 # define USE_INTERNAL_API 1
50 #endif
53 * Compile time configuration: Do we want debug output?
55 /* #define DEBUG_STRINGS 1 */
58 * A simple, but efficient memory provider for our buffers.
60 static inline void *resize_buffer (void *buffer, size_t *size, size_t newsize)
62 if (newsize > *size) {
63 *size = newsize + 128;
64 buffer = SMB_REALLOC(buffer, *size);
66 return buffer;
70 * While there is a version of OpenDarwin for intel, the usual case is
71 * big-endian PPC. So we need byte swapping to handle the
72 * little-endian byte order of the network protocol. We also need an
73 * additional dynamic buffer to do this work for incoming data blocks,
74 * because we have to consider the original data as constant.
76 * We abstract the differences away by providing a simple facade with
77 * these functions/macros:
79 * le_to_native(dst,src,len)
80 * native_to_le(cp,len)
81 * set_ucbuffer_with_le(buffer,bufsize,data,size)
82 * set_ucbuffer_with_le_copy(buffer,bufsize,data,size,reserve)
84 #ifdef WORDS_BIGENDIAN
86 static inline void swap_bytes (char * dst, const char * src, size_t len)
88 const char *srcend = src + len;
89 while (src < srcend) {
90 dst[0] = src[1];
91 dst[1] = src[0];
92 dst += 2;
93 src += 2;
96 static inline void swap_bytes_inplace (char * cp, size_t len)
98 char temp;
99 char *end = cp + len;
100 while (cp < end) {
101 temp = cp[1];
102 cp[1] = cp[0];
103 cp[0] = temp;
104 cp += 2;
108 #define le_to_native(dst,src,len) swap_bytes(dst,src,len)
109 #define native_to_le(cp,len) swap_bytes_inplace(cp,len)
110 #define set_ucbuffer_with_le(buffer,bufsize,data,size) \
111 set_ucbuffer_with_le_copy(buffer,bufsize,data,size,0)
113 #else /* ! WORDS_BIGENDIAN */
115 #define le_to_native(dst,src,len) memcpy(dst,src,len)
116 #define native_to_le(cp,len) /* nothing */
117 #define set_ucbuffer_with_le(buffer,bufsize,data,size) \
118 (((void)(bufsize)),(UniChar*)(data))
120 #endif
122 static inline UniChar *set_ucbuffer_with_le_copy (
123 UniChar *buffer, size_t *bufsize,
124 const void *data, size_t size, size_t reserve)
126 buffer = resize_buffer(buffer, bufsize, size+reserve);
127 le_to_native((char*)buffer,data,size);
128 return buffer;
133 * A simple hexdump function for debugging error conditions.
135 #define debug_out(s) DEBUG(0,(s))
137 #ifdef DEBUG_STRINGS
139 static void hexdump( const char * label, const char * s, size_t len )
141 size_t restlen = len;
142 debug_out("<<<<<<<\n");
143 debug_out(label);
144 debug_out("\n");
145 while (restlen > 0) {
146 char line[100];
147 size_t i, j;
148 char * d = line;
149 #undef sprintf
150 d += sprintf(d, "%04X ", (unsigned)(len-restlen));
151 *d++ = ' ';
152 for( i = 0; i<restlen && i<8; ++i ) {
153 d += sprintf(d, "%02X ", ((unsigned)s[i]) & 0xFF);
155 for( j = i; j<8; ++j ) {
156 d += sprintf(d, " ");
158 *d++ = ' ';
159 for( i = 8; i<restlen && i<16; ++i ) {
160 d += sprintf(d, "%02X ", ((unsigned)s[i]) & 0xFF);
162 for( j = i; j<16; ++j ) {
163 d += sprintf(d, " ");
165 *d++ = ' ';
166 for( i = 0; i<restlen && i<16; ++i ) {
167 if(s[i] < ' ' || s[i] >= 0x7F || !isprint(s[i]))
168 *d++ = '.';
169 else
170 *d++ = s[i];
172 *d++ = '\n';
173 *d = 0;
174 restlen -= i;
175 s += i;
176 debug_out(line);
178 debug_out(">>>>>>>\n");
181 #else /* !DEBUG_STRINGS */
183 #define hexdump(label,s,len) /* nothing */
185 #endif
188 #if !USE_INTERNAL_API
191 * An implementation based on documented Mac OS X APIs.
193 * This does a certain amount of memory management, creating and
194 * manipulating CFString objects. We try to minimize the impact by
195 * keeping those objects around and re-using them. We also use
196 * external backing store for the CFStrings where this is possible and
197 * benficial.
199 * The Unicode normalizations forms available at this level are
200 * generic, not specifically for the file system. So they may not be
201 * perfect fits.
203 static size_t macosxfs_encoding_pull(
204 void *cd, /* Encoder handle */
205 char **inbuf, size_t *inbytesleft, /* Script string */
206 char **outbuf, size_t *outbytesleft) /* UTF-16-LE string */
208 static const int script_code = kCFStringEncodingUTF8;
209 static CFMutableStringRef cfstring = NULL;
210 size_t outsize;
211 CFRange range;
213 (void) cd; /* UNUSED */
215 if (0 == *inbytesleft) {
216 return 0;
219 if (NULL == cfstring) {
221 * A version with an external backing store as in the
222 * push function should have been more efficient, but
223 * testing shows, that it is actually slower (!).
224 * Maybe kCFAllocatorDefault gets shortcut evaluation
225 * internally, while kCFAllocatorNull doesn't.
227 cfstring = CFStringCreateMutable(kCFAllocatorDefault,0);
231 * Three methods of appending to a CFString, choose the most
232 * efficient.
234 if (0 == (*inbuf)[*inbytesleft-1]) {
235 CFStringAppendCString(cfstring, *inbuf, script_code);
236 } else if (*inbytesleft <= 255) {
237 Str255 buffer;
238 buffer[0] = *inbytesleft;
239 memcpy(buffer+1, *inbuf, buffer[0]);
240 CFStringAppendPascalString(cfstring, buffer, script_code);
241 } else {
243 * We would like to use a fixed buffer and a loop
244 * here, but than we can't garantee that the input is
245 * well-formed UTF-8, as we are supposed to do.
247 static char *buffer = NULL;
248 static size_t buflen = 0;
249 buffer = resize_buffer(buffer, &buflen, *inbytesleft+1);
250 memcpy(buffer, *inbuf, *inbytesleft);
251 buffer[*inbytesleft] = 0;
252 CFStringAppendCString(cfstring, *inbuf, script_code);
256 * Compose characters, using the non-canonical composition
257 * form.
259 CFStringNormalize(cfstring, kCFStringNormalizationFormC);
261 outsize = CFStringGetLength(cfstring);
262 range = CFRangeMake(0,outsize);
264 if (outsize == 0) {
266 * HACK: smbd/mangle_hash2.c:is_legal_name() expects
267 * errors here. That function will always pass 2
268 * characters. smbd/open.c:check_for_pipe() cuts a
269 * patchname to 10 characters blindly. Suppress the
270 * debug output in those cases.
272 if(2 != *inbytesleft && 10 != *inbytesleft) {
273 debug_out("String conversion: "
274 "An unknown error occurred\n");
275 hexdump("UTF8->UTF16LE (old) input",
276 *inbuf, *inbytesleft);
278 errno = EILSEQ; /* Not sure, but this is what we have
279 * actually seen. */
280 return -1;
282 if (outsize*2 > *outbytesleft) {
283 CFStringDelete(cfstring, range);
284 debug_out("String conversion: "
285 "Output buffer too small\n");
286 hexdump("UTF8->UTF16LE (old) input",
287 *inbuf, *inbytesleft);
288 errno = E2BIG;
289 return -1;
292 CFStringGetCharacters(cfstring, range, (UniChar*)*outbuf);
293 CFStringDelete(cfstring, range);
295 native_to_le(*outbuf, outsize*2);
298 * Add a converted null byte, if the CFString conversions
299 * prevented that until now.
301 if (0 == (*inbuf)[*inbytesleft-1] &&
302 (0 != (*outbuf)[outsize*2-1] || 0 != (*outbuf)[outsize*2-2])) {
304 if ((outsize*2+2) > *outbytesleft) {
305 debug_out("String conversion: "
306 "Output buffer too small\n");
307 hexdump("UTF8->UTF16LE (old) input",
308 *inbuf, *inbytesleft);
309 errno = E2BIG;
310 return -1;
313 (*outbuf)[outsize*2] = (*outbuf)[outsize*2+1] = 0;
314 outsize += 2;
317 *inbuf += *inbytesleft;
318 *inbytesleft = 0;
319 *outbuf += outsize*2;
320 *outbytesleft -= outsize*2;
322 return 0;
325 static size_t macosxfs_encoding_push(
326 void *cd, /* Encoder handle */
327 char **inbuf, size_t *inbytesleft, /* UTF-16-LE string */
328 char **outbuf, size_t *outbytesleft) /* Script string */
330 static const int script_code = kCFStringEncodingUTF8;
331 static CFMutableStringRef cfstring = NULL;
332 static UniChar *buffer = NULL;
333 static size_t buflen = 0;
334 CFIndex outsize, cfsize, charsconverted;
336 (void) cd; /* UNUSED */
338 if (0 == *inbytesleft) {
339 return 0;
343 * We need a buffer that can hold 4 times the original data,
344 * because that is the theoretical maximum that decomposition
345 * can create currently (in Unicode 4.0).
347 buffer = set_ucbuffer_with_le_copy(
348 buffer, &buflen, *inbuf, *inbytesleft, 3 * *inbytesleft);
350 if (NULL == cfstring) {
351 cfstring = CFStringCreateMutableWithExternalCharactersNoCopy(
352 kCFAllocatorDefault,
353 buffer, *inbytesleft/2, buflen/2,
354 kCFAllocatorNull);
355 } else {
356 CFStringSetExternalCharactersNoCopy(
357 cfstring,
358 buffer, *inbytesleft/2, buflen/2);
362 * Decompose characters, using the non-canonical decomposition
363 * form.
365 * NB: This isn't exactly what HFS+ wants (see note on
366 * kCFStringEncodingUseHFSPlusCanonical in
367 * CFStringEncodingConverter.h), but AFAIK it's the best that
368 * the official API can do.
370 CFStringNormalize(cfstring, kCFStringNormalizationFormD);
372 cfsize = CFStringGetLength(cfstring);
373 charsconverted = CFStringGetBytes(
374 cfstring, CFRangeMake(0,cfsize),
375 script_code, 0, False,
376 *outbuf, *outbytesleft, &outsize);
378 if (0 == charsconverted) {
379 debug_out("String conversion: "
380 "Buffer too small or not convertable\n");
381 hexdump("UTF16LE->UTF8 (old) input",
382 *inbuf, *inbytesleft);
383 errno = EILSEQ; /* Probably more likely. */
384 return -1;
388 * Add a converted null byte, if the CFString conversions
389 * prevented that until now.
391 if (0 == (*inbuf)[*inbytesleft-1] && 0 == (*inbuf)[*inbytesleft-2] &&
392 (0 != (*outbuf)[outsize-1])) {
394 if (((size_t)outsize+1) > *outbytesleft) {
395 debug_out("String conversion: "
396 "Output buffer too small\n");
397 hexdump("UTF16LE->UTF8 (old) input",
398 *inbuf, *inbytesleft);
399 errno = E2BIG;
400 return -1;
403 (*outbuf)[outsize] = 0;
404 ++outsize;
407 *inbuf += *inbytesleft;
408 *inbytesleft = 0;
409 *outbuf += outsize;
410 *outbytesleft -= outsize;
412 return 0;
415 #else /* USE_INTERNAL_API */
418 * An implementation based on internal code as known from the
419 * OpenDarwin CVS.
421 * This code doesn't need much memory management because it uses
422 * functions that operate on the raw memory directly.
424 * The push routine here is faster and more compatible with HFS+ than
425 * the other implementation above. The pull routine is only faster
426 * for some strings, slightly slower for others. The pull routine
427 * looses because it has to iterate over the data twice, once to
428 * decode UTF-8 and than to do the character composition required by
429 * Windows.
431 static size_t macosxfs_encoding_pull(
432 void *cd, /* Encoder handle */
433 char **inbuf, size_t *inbytesleft, /* Script string */
434 char **outbuf, size_t *outbytesleft) /* UTF-16-LE string */
436 static const int script_code = kCFStringEncodingUTF8;
437 UInt32 srcCharsUsed = 0;
438 UInt32 dstCharsUsed = 0;
439 UInt32 result;
440 uint32_t dstDecomposedUsed = 0;
441 uint32_t dstPrecomposedUsed = 0;
443 (void) cd; /* UNUSED */
445 if (0 == *inbytesleft) {
446 return 0;
449 result = CFStringEncodingBytesToUnicode(
450 script_code, kCFStringEncodingComposeCombinings,
451 *inbuf, *inbytesleft, &srcCharsUsed,
452 (UniChar*)*outbuf, *outbytesleft, &dstCharsUsed);
454 switch(result) {
455 case kCFStringEncodingConversionSuccess:
456 if (*inbytesleft == srcCharsUsed)
457 break;
458 else
459 ; /*fall through*/
460 case kCFStringEncodingInsufficientOutputBufferLength:
461 debug_out("String conversion: "
462 "Output buffer too small\n");
463 hexdump("UTF8->UTF16LE (new) input",
464 *inbuf, *inbytesleft);
465 errno = E2BIG;
466 return -1;
467 case kCFStringEncodingInvalidInputStream:
469 * HACK: smbd/mangle_hash2.c:is_legal_name() expects
470 * errors here. That function will always pass 2
471 * characters. smbd/open.c:check_for_pipe() cuts a
472 * patchname to 10 characters blindly. Suppress the
473 * debug output in those cases.
475 if(2 != *inbytesleft && 10 != *inbytesleft) {
476 debug_out("String conversion: "
477 "Invalid input sequence\n");
478 hexdump("UTF8->UTF16LE (new) input",
479 *inbuf, *inbytesleft);
481 errno = EILSEQ;
482 return -1;
483 case kCFStringEncodingConverterUnavailable:
484 debug_out("String conversion: "
485 "Unknown encoding\n");
486 hexdump("UTF8->UTF16LE (new) input",
487 *inbuf, *inbytesleft);
488 errno = EINVAL;
489 return -1;
493 * It doesn't look like CFStringEncodingBytesToUnicode() can
494 * produce precomposed characters (flags=ComposeCombinings
495 * doesn't do it), so we need another pass over the data here.
496 * We can do this in-place, as the string can only get
497 * shorter.
499 * (Actually in theory there should be an internal
500 * decomposition and reordering before the actual composition
501 * step. But we should be able to rely on that we always get
502 * fully decomposed strings for input, so this can't create
503 * problems in reality.)
505 CFUniCharPrecompose(
506 (const UTF16Char *)*outbuf, dstCharsUsed, &dstDecomposedUsed,
507 (UTF16Char *)*outbuf, dstCharsUsed, &dstPrecomposedUsed);
509 native_to_le(*outbuf, dstPrecomposedUsed*2);
511 *inbuf += srcCharsUsed;
512 *inbytesleft -= srcCharsUsed;
513 *outbuf += dstPrecomposedUsed*2;
514 *outbytesleft -= dstPrecomposedUsed*2;
516 return 0;
519 static size_t macosxfs_encoding_push(
520 void *cd, /* Encoder handle */
521 char **inbuf, size_t *inbytesleft, /* UTF-16-LE string */
522 char **outbuf, size_t *outbytesleft) /* Script string */
524 static const int script_code = kCFStringEncodingUTF8;
525 static UniChar *buffer = NULL;
526 static size_t buflen = 0;
527 UInt32 srcCharsUsed=0, dstCharsUsed=0, result;
529 (void) cd; /* UNUSED */
531 if (0 == *inbytesleft) {
532 return 0;
535 buffer = set_ucbuffer_with_le(
536 buffer, &buflen, *inbuf, *inbytesleft);
538 result = CFStringEncodingUnicodeToBytes(
539 script_code, kCFStringEncodingUseHFSPlusCanonical,
540 buffer, *inbytesleft/2, &srcCharsUsed,
541 *outbuf, *outbytesleft, &dstCharsUsed);
543 switch(result) {
544 case kCFStringEncodingConversionSuccess:
545 if (*inbytesleft/2 == srcCharsUsed)
546 break;
547 else
548 ; /*fall through*/
549 case kCFStringEncodingInsufficientOutputBufferLength:
550 debug_out("String conversion: "
551 "Output buffer too small\n");
552 hexdump("UTF16LE->UTF8 (new) input",
553 *inbuf, *inbytesleft);
554 errno = E2BIG;
555 return -1;
556 case kCFStringEncodingInvalidInputStream:
558 * HACK: smbd/open.c:check_for_pipe():is_legal_name()
559 * cuts a pathname to 10 characters blindly. Suppress
560 * the debug output in those cases.
562 if(10 != *inbytesleft) {
563 debug_out("String conversion: "
564 "Invalid input sequence\n");
565 hexdump("UTF16LE->UTF8 (new) input",
566 *inbuf, *inbytesleft);
568 errno = EILSEQ;
569 return -1;
570 case kCFStringEncodingConverterUnavailable:
571 debug_out("String conversion: "
572 "Unknown encoding\n");
573 hexdump("UTF16LE->UTF8 (new) input",
574 *inbuf, *inbytesleft);
575 errno = EINVAL;
576 return -1;
579 *inbuf += srcCharsUsed*2;
580 *inbytesleft -= srcCharsUsed*2;
581 *outbuf += dstCharsUsed;
582 *outbytesleft -= dstCharsUsed;
584 return 0;
587 #endif /* USE_INTERNAL_API */
590 * For initialization, actually install the encoding as "macosxfs".
592 static struct charset_functions macosxfs_encoding_functions = {
593 "MACOSXFS", macosxfs_encoding_pull, macosxfs_encoding_push
596 NTSTATUS charset_macosxfs_init(void)
598 return smb_register_charset(&macosxfs_encoding_functions);
601 /* eof */