2 Unix SMB/CIFS implementation.
3 Samba charset module for Mac OS X/Darwin
4 Copyright (C) Benjamin Riefenstahl 2003
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software
18 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
22 * modules/charset_macosxfs.c
24 * A Samba charset module to use on Mac OS X/Darwin as the filesystem
25 * and display encoding.
27 * Actually two implementations are provided here. The default
28 * implementation is based on the official CFString API. The other is
29 * based on internal CFString APIs as defined in the OpenDarwin
36 * Include OS frameworks. These are only needed in this module.
38 #include <CoreFoundation/CFString.h>
41 * See if autoconf has found us the internal headers in some form.
43 #if HAVE_COREFOUNDATION_CFSTRINGENCODINGCONVERTER_H
44 # include <Corefoundation/CFStringEncodingConverter.h>
45 # include <Corefoundation/CFUnicodePrecomposition.h>
46 # define USE_INTERNAL_API 1
47 #elif HAVE_CFSTRINGENCODINGCONVERTER_H
48 # include <CFStringEncodingConverter.h>
49 # include <CFUnicodePrecomposition.h>
50 # define USE_INTERNAL_API 1
54 * Compile time configuration: Do we want debug output?
56 /* #define DEBUG_STRINGS 1 */
59 * A simple, but efficient memory provider for our buffers.
61 static inline void *resize_buffer (void *buffer
, size_t *size
, size_t newsize
)
63 if (newsize
> *size
) {
64 *size
= newsize
+ 128;
65 buffer
= realloc(buffer
, *size
);
71 * While there is a version of OpenDarwin for intel, the usual case is
72 * big-endian PPC. So we need byte swapping to handle the
73 * little-endian byte order of the network protocol. We also need an
74 * additional dynamic buffer to do this work for incoming data blocks,
75 * because we have to consider the original data as constant.
77 * We abstract the differences away by providing a simple facade with
78 * these functions/macros:
80 * le_to_native(dst,src,len)
81 * native_to_le(cp,len)
82 * set_ucbuffer_with_le(buffer,bufsize,data,size)
83 * set_ucbuffer_with_le_copy(buffer,bufsize,data,size,reserve)
85 #ifdef WORDS_BIGENDIAN
87 static inline void swap_bytes (char * dst
, const char * src
, size_t len
)
89 const char *srcend
= src
+ len
;
90 while (src
< srcend
) {
97 static inline void swap_bytes_inplace (char * cp
, size_t len
)
100 char *end
= cp
+ len
;
109 #define le_to_native(dst,src,len) swap_bytes(dst,src,len)
110 #define native_to_le(cp,len) swap_bytes_inplace(cp,len)
111 #define set_ucbuffer_with_le(buffer,bufsize,data,size) \
112 set_ucbuffer_with_le_copy(buffer,bufsize,data,size,0)
114 #else /* ! WORDS_BIGENDIAN */
116 #define le_to_native(dst,src,len) memcpy(dst,src,len)
117 #define native_to_le(cp,len) /* nothing */
118 #define set_ucbuffer_with_le(buffer,bufsize,data,size) \
119 (((void)(bufsize)),(UniChar*)(data))
123 static inline UniChar
*set_ucbuffer_with_le_copy (
124 UniChar
*buffer
, size_t *bufsize
,
125 const void *data
, size_t size
, size_t reserve
)
127 buffer
= resize_buffer(buffer
, bufsize
, size
+reserve
);
128 le_to_native((char*)buffer
,data
,size
);
134 * A simple hexdump function for debugging error conditions.
136 #define debug_out(s) DEBUG(0,(s))
140 static void hexdump( const char * label
, const char * s
, size_t len
)
142 size_t restlen
= len
;
143 debug_out("<<<<<<<\n");
146 while (restlen
> 0) {
151 d
+= sprintf(d
, "%04X ", (unsigned)(len
-restlen
));
153 for( i
= 0; i
<restlen
&& i
<8; ++i
) {
154 d
+= sprintf(d
, "%02X ", ((unsigned)s
[i
]) & 0xFF);
156 for( j
= i
; j
<8; ++j
) {
157 d
+= sprintf(d
, " ");
160 for( i
= 8; i
<restlen
&& i
<16; ++i
) {
161 d
+= sprintf(d
, "%02X ", ((unsigned)s
[i
]) & 0xFF);
163 for( j
= i
; j
<16; ++j
) {
164 d
+= sprintf(d
, " ");
167 for( i
= 0; i
<restlen
&& i
<16; ++i
) {
168 if(s
[i
] < ' ' || s
[i
] >= 0x7F || !isprint(s
[i
]))
179 debug_out(">>>>>>>\n");
182 #else /* !DEBUG_STRINGS */
184 #define hexdump(label,s,len) /* nothing */
189 #if !USE_INTERNAL_API
192 * An implementation based on documented Mac OS X APIs.
194 * This does a certain amount of memory management, creating and
195 * manipulating CFString objects. We try to minimize the impact by
196 * keeping those objects around and re-using them. We also use
197 * external backing store for the CFStrings where this is possible and
200 * The Unicode normalizations forms available at this level are
201 * generic, not specifically for the file system. So they may not be
204 static size_t macosxfs_encoding_pull(
205 void *cd
, /* Encoder handle */
206 char **inbuf
, size_t *inbytesleft
, /* Script string */
207 char **outbuf
, size_t *outbytesleft
) /* UTF-16-LE string */
209 static const int script_code
= kCFStringEncodingUTF8
;
210 static CFMutableStringRef cfstring
= NULL
;
214 (void) cd
; /* UNUSED */
216 if (0 == *inbytesleft
) {
220 if (NULL
== cfstring
) {
222 * A version with an external backing store as in the
223 * push function should have been more efficient, but
224 * testing shows, that it is actually slower (!).
225 * Maybe kCFAllocatorDefault gets shortcut evaluation
226 * internally, while kCFAllocatorNull doesn't.
228 cfstring
= CFStringCreateMutable(kCFAllocatorDefault
,0);
232 * Three methods of appending to a CFString, choose the most
235 if (0 == (*inbuf
)[*inbytesleft
-1]) {
236 CFStringAppendCString(cfstring
, *inbuf
, script_code
);
237 } else if (*inbytesleft
<= 255) {
239 buffer
[0] = *inbytesleft
;
240 memcpy(buffer
+1, *inbuf
, buffer
[0]);
241 CFStringAppendPascalString(cfstring
, buffer
, script_code
);
244 * We would like to use a fixed buffer and a loop
245 * here, but than we can't garantee that the input is
246 * well-formed UTF-8, as we are supposed to do.
248 static char *buffer
= NULL
;
249 static size_t buflen
= 0;
250 buffer
= resize_buffer(buffer
, &buflen
, *inbytesleft
+1);
251 memcpy(buffer
, *inbuf
, *inbytesleft
);
252 buffer
[*inbytesleft
] = 0;
253 CFStringAppendCString(cfstring
, *inbuf
, script_code
);
257 * Compose characters, using the non-canonical composition
260 CFStringNormalize(cfstring
, kCFStringNormalizationFormC
);
262 outsize
= CFStringGetLength(cfstring
);
263 range
= CFRangeMake(0,outsize
);
267 * HACK: smbd/mangle_hash2.c:is_legal_name() expects
268 * errors here. That function will always pass 2
269 * characters. smbd/open.c:check_for_pipe() cuts a
270 * patchname to 10 characters blindly. Suppress the
271 * debug output in those cases.
273 if(2 != *inbytesleft
&& 10 != *inbytesleft
) {
274 debug_out("String conversion: "
275 "An unknown error occurred\n");
276 hexdump("UTF8->UTF16LE (old) input",
277 *inbuf
, *inbytesleft
);
279 errno
= EILSEQ
; /* Not sure, but this is what we have
283 if (outsize
*2 > *outbytesleft
) {
284 CFStringDelete(cfstring
, range
);
285 debug_out("String conversion: "
286 "Output buffer too small\n");
287 hexdump("UTF8->UTF16LE (old) input",
288 *inbuf
, *inbytesleft
);
293 CFStringGetCharacters(cfstring
, range
, (UniChar
*)*outbuf
);
294 CFStringDelete(cfstring
, range
);
296 native_to_le(*outbuf
, outsize
*2);
299 * Add a converted null byte, if the CFString conversions
300 * prevented that until now.
302 if (0 == (*inbuf
)[*inbytesleft
-1] &&
303 (0 != (*outbuf
)[outsize
*2-1] || 0 != (*outbuf
)[outsize
*2-2])) {
305 if ((outsize
*2+2) > *outbytesleft
) {
306 debug_out("String conversion: "
307 "Output buffer too small\n");
308 hexdump("UTF8->UTF16LE (old) input",
309 *inbuf
, *inbytesleft
);
314 (*outbuf
)[outsize
*2] = (*outbuf
)[outsize
*2+1] = 0;
318 *inbuf
+= *inbytesleft
;
320 *outbuf
+= outsize
*2;
321 *outbytesleft
-= outsize
*2;
326 static size_t macosxfs_encoding_push(
327 void *cd
, /* Encoder handle */
328 char **inbuf
, size_t *inbytesleft
, /* UTF-16-LE string */
329 char **outbuf
, size_t *outbytesleft
) /* Script string */
331 static const int script_code
= kCFStringEncodingUTF8
;
332 static CFMutableStringRef cfstring
= NULL
;
333 static UniChar
*buffer
= NULL
;
334 static size_t buflen
= 0;
335 CFIndex outsize
, cfsize
, charsconverted
;
337 (void) cd
; /* UNUSED */
339 if (0 == *inbytesleft
) {
344 * We need a buffer that can hold 4 times the original data,
345 * because that is the theoretical maximum that decomposition
346 * can create currently (in Unicode 4.0).
348 buffer
= set_ucbuffer_with_le_copy(
349 buffer
, &buflen
, *inbuf
, *inbytesleft
, 3 * *inbytesleft
);
351 if (NULL
== cfstring
) {
352 cfstring
= CFStringCreateMutableWithExternalCharactersNoCopy(
354 buffer
, *inbytesleft
/2, buflen
/2,
357 CFStringSetExternalCharactersNoCopy(
359 buffer
, *inbytesleft
/2, buflen
/2);
363 * Decompose characters, using the non-canonical decomposition
366 * NB: This isn't exactly what HFS+ wants (see note on
367 * kCFStringEncodingUseHFSPlusCanonical in
368 * CFStringEncodingConverter.h), but AFAIK it's the best that
369 * the official API can do.
371 CFStringNormalize(cfstring
, kCFStringNormalizationFormD
);
373 cfsize
= CFStringGetLength(cfstring
);
374 charsconverted
= CFStringGetBytes(
375 cfstring
, CFRangeMake(0,cfsize
),
376 script_code
, 0, False
,
377 *outbuf
, *outbytesleft
, &outsize
);
379 if (0 == charsconverted
) {
380 debug_out("String conversion: "
381 "Buffer too small or not convertable\n");
382 hexdump("UTF16LE->UTF8 (old) input",
383 *inbuf
, *inbytesleft
);
384 errno
= EILSEQ
; /* Probably more likely. */
389 * Add a converted null byte, if the CFString conversions
390 * prevented that until now.
392 if (0 == (*inbuf
)[*inbytesleft
-1] && 0 == (*inbuf
)[*inbytesleft
-2] &&
393 (0 != (*outbuf
)[outsize
-1])) {
395 if (((size_t)outsize
+1) > *outbytesleft
) {
396 debug_out("String conversion: "
397 "Output buffer too small\n");
398 hexdump("UTF16LE->UTF8 (old) input",
399 *inbuf
, *inbytesleft
);
404 (*outbuf
)[outsize
] = 0;
408 *inbuf
+= *inbytesleft
;
411 *outbytesleft
-= outsize
;
416 #else /* USE_INTERNAL_API */
419 * An implementation based on internal code as known from the
422 * This code doesn't need much memory management because it uses
423 * functions that operate on the raw memory directly.
425 * The push routine here is faster and more compatible with HFS+ than
426 * the other implementation above. The pull routine is only faster
427 * for some strings, slightly slower for others. The pull routine
428 * looses because it has to iterate over the data twice, once to
429 * decode UTF-8 and than to do the character composition required by
432 static size_t macosxfs_encoding_pull(
433 void *cd
, /* Encoder handle */
434 char **inbuf
, size_t *inbytesleft
, /* Script string */
435 char **outbuf
, size_t *outbytesleft
) /* UTF-16-LE string */
437 static const int script_code
= kCFStringEncodingUTF8
;
438 UInt32 srcCharsUsed
= 0;
439 UInt32 dstCharsUsed
= 0;
441 uint32_t dstDecomposedUsed
= 0;
442 uint32_t dstPrecomposedUsed
= 0;
444 (void) cd
; /* UNUSED */
446 if (0 == *inbytesleft
) {
450 result
= CFStringEncodingBytesToUnicode(
451 script_code
, kCFStringEncodingComposeCombinings
,
452 *inbuf
, *inbytesleft
, &srcCharsUsed
,
453 (UniChar
*)*outbuf
, *outbytesleft
, &dstCharsUsed
);
456 case kCFStringEncodingConversionSuccess
:
457 if (*inbytesleft
== srcCharsUsed
)
461 case kCFStringEncodingInsufficientOutputBufferLength
:
462 debug_out("String conversion: "
463 "Output buffer too small\n");
464 hexdump("UTF8->UTF16LE (new) input",
465 *inbuf
, *inbytesleft
);
468 case kCFStringEncodingInvalidInputStream
:
470 * HACK: smbd/mangle_hash2.c:is_legal_name() expects
471 * errors here. That function will always pass 2
472 * characters. smbd/open.c:check_for_pipe() cuts a
473 * patchname to 10 characters blindly. Suppress the
474 * debug output in those cases.
476 if(2 != *inbytesleft
&& 10 != *inbytesleft
) {
477 debug_out("String conversion: "
478 "Invalid input sequence\n");
479 hexdump("UTF8->UTF16LE (new) input",
480 *inbuf
, *inbytesleft
);
484 case kCFStringEncodingConverterUnavailable
:
485 debug_out("String conversion: "
486 "Unknown encoding\n");
487 hexdump("UTF8->UTF16LE (new) input",
488 *inbuf
, *inbytesleft
);
494 * It doesn't look like CFStringEncodingBytesToUnicode() can
495 * produce precomposed characters (flags=ComposeCombinings
496 * doesn't do it), so we need another pass over the data here.
497 * We can do this in-place, as the string can only get
500 * (Actually in theory there should be an internal
501 * decomposition and reordering before the actual composition
502 * step. But we should be able to rely on that we always get
503 * fully decomposed strings for input, so this can't create
504 * problems in reality.)
507 (const UTF16Char
*)*outbuf
, dstCharsUsed
, &dstDecomposedUsed
,
508 (UTF16Char
*)*outbuf
, dstCharsUsed
, &dstPrecomposedUsed
);
510 native_to_le(*outbuf
, dstPrecomposedUsed
*2);
512 *inbuf
+= srcCharsUsed
;
513 *inbytesleft
-= srcCharsUsed
;
514 *outbuf
+= dstPrecomposedUsed
*2;
515 *outbytesleft
-= dstPrecomposedUsed
*2;
520 static size_t macosxfs_encoding_push(
521 void *cd
, /* Encoder handle */
522 char **inbuf
, size_t *inbytesleft
, /* UTF-16-LE string */
523 char **outbuf
, size_t *outbytesleft
) /* Script string */
525 static const int script_code
= kCFStringEncodingUTF8
;
526 static UniChar
*buffer
= NULL
;
527 static size_t buflen
= 0;
528 UInt32 srcCharsUsed
=0, dstCharsUsed
=0, result
;
530 (void) cd
; /* UNUSED */
532 if (0 == *inbytesleft
) {
536 buffer
= set_ucbuffer_with_le(
537 buffer
, &buflen
, *inbuf
, *inbytesleft
);
539 result
= CFStringEncodingUnicodeToBytes(
540 script_code
, kCFStringEncodingUseHFSPlusCanonical
,
541 buffer
, *inbytesleft
/2, &srcCharsUsed
,
542 *outbuf
, *outbytesleft
, &dstCharsUsed
);
545 case kCFStringEncodingConversionSuccess
:
546 if (*inbytesleft
/2 == srcCharsUsed
)
550 case kCFStringEncodingInsufficientOutputBufferLength
:
551 debug_out("String conversion: "
552 "Output buffer too small\n");
553 hexdump("UTF16LE->UTF8 (new) input",
554 *inbuf
, *inbytesleft
);
557 case kCFStringEncodingInvalidInputStream
:
559 * HACK: smbd/open.c:check_for_pipe():is_legal_name()
560 * cuts a pathname to 10 characters blindly. Suppress
561 * the debug output in those cases.
563 if(10 != *inbytesleft
) {
564 debug_out("String conversion: "
565 "Invalid input sequence\n");
566 hexdump("UTF16LE->UTF8 (new) input",
567 *inbuf
, *inbytesleft
);
571 case kCFStringEncodingConverterUnavailable
:
572 debug_out("String conversion: "
573 "Unknown encoding\n");
574 hexdump("UTF16LE->UTF8 (new) input",
575 *inbuf
, *inbytesleft
);
580 *inbuf
+= srcCharsUsed
*2;
581 *inbytesleft
-= srcCharsUsed
*2;
582 *outbuf
+= dstCharsUsed
;
583 *outbytesleft
-= dstCharsUsed
;
588 #endif /* USE_INTERNAL_API */
591 * For initialization, actually install the encoding as "macosxfs".
593 static struct charset_functions macosxfs_encoding_functions
= {
594 "MACOSXFS", macosxfs_encoding_pull
, macosxfs_encoding_push
597 NTSTATUS
init_module(void)
599 return smb_register_charset(&macosxfs_encoding_functions
);