2 Unix SMB/CIFS implementation.
3 Samba charset module for Mac OS X/Darwin
4 Copyright (C) Benjamin Riefenstahl 2003
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program. If not, see <http://www.gnu.org/licenses/>.
21 * modules/charset_macosxfs.c
23 * A Samba charset module to use on Mac OS X/Darwin as the filesystem
24 * and display encoding.
26 * Actually two implementations are provided here. The default
27 * implementation is based on the official CFString API. The other is
28 * based on internal CFString APIs as defined in the OpenDarwin
35 * Include OS frameworks. These are only needed in this module.
37 #include <CoreFoundation/CFString.h>
40 * See if autoconf has found us the internal headers in some form.
42 #if HAVE_COREFOUNDATION_CFSTRINGENCODINGCONVERTER_H
43 # include <CoreFoundation/CFStringEncodingConverter.h>
44 # include <CoreFoundation/CFUnicodePrecomposition.h>
45 # define USE_INTERNAL_API 1
46 #elif HAVE_CFSTRINGENCODINGCONVERTER_H
47 # include <CFStringEncodingConverter.h>
48 # include <CFUnicodePrecomposition.h>
49 # define USE_INTERNAL_API 1
53 * Compile time configuration: Do we want debug output?
55 /* #define DEBUG_STRINGS 1 */
58 * A simple, but efficient memory provider for our buffers.
60 static inline void *resize_buffer (void *buffer
, size_t *size
, size_t newsize
)
62 if (newsize
> *size
) {
63 *size
= newsize
+ 128;
64 buffer
= SMB_REALLOC(buffer
, *size
);
70 * While there is a version of OpenDarwin for intel, the usual case is
71 * big-endian PPC. So we need byte swapping to handle the
72 * little-endian byte order of the network protocol. We also need an
73 * additional dynamic buffer to do this work for incoming data blocks,
74 * because we have to consider the original data as constant.
76 * We abstract the differences away by providing a simple facade with
77 * these functions/macros:
79 * le_to_native(dst,src,len)
80 * native_to_le(cp,len)
81 * set_ucbuffer_with_le(buffer,bufsize,data,size)
82 * set_ucbuffer_with_le_copy(buffer,bufsize,data,size,reserve)
84 #ifdef WORDS_BIGENDIAN
86 static inline void swap_bytes (char * dst
, const char * src
, size_t len
)
88 const char *srcend
= src
+ len
;
89 while (src
< srcend
) {
96 static inline void swap_bytes_inplace (char * cp
, size_t len
)
108 #define le_to_native(dst,src,len) swap_bytes(dst,src,len)
109 #define native_to_le(cp,len) swap_bytes_inplace(cp,len)
110 #define set_ucbuffer_with_le(buffer,bufsize,data,size) \
111 set_ucbuffer_with_le_copy(buffer,bufsize,data,size,0)
113 #else /* ! WORDS_BIGENDIAN */
115 #define le_to_native(dst,src,len) memcpy(dst,src,len)
116 #define native_to_le(cp,len) /* nothing */
117 #define set_ucbuffer_with_le(buffer,bufsize,data,size) \
118 (((void)(bufsize)),(UniChar*)(data))
122 static inline UniChar
*set_ucbuffer_with_le_copy (
123 UniChar
*buffer
, size_t *bufsize
,
124 const void *data
, size_t size
, size_t reserve
)
126 buffer
= resize_buffer(buffer
, bufsize
, size
+reserve
);
127 le_to_native((char*)buffer
,data
,size
);
133 * A simple hexdump function for debugging error conditions.
135 #define debug_out(s) DEBUG(0,(s))
139 static void hexdump( const char * label
, const char * s
, size_t len
)
141 size_t restlen
= len
;
142 debug_out("<<<<<<<\n");
145 while (restlen
> 0) {
150 d
+= sprintf(d
, "%04X ", (unsigned)(len
-restlen
));
152 for( i
= 0; i
<restlen
&& i
<8; ++i
) {
153 d
+= sprintf(d
, "%02X ", ((unsigned)s
[i
]) & 0xFF);
155 for( j
= i
; j
<8; ++j
) {
156 d
+= sprintf(d
, " ");
159 for( i
= 8; i
<restlen
&& i
<16; ++i
) {
160 d
+= sprintf(d
, "%02X ", ((unsigned)s
[i
]) & 0xFF);
162 for( j
= i
; j
<16; ++j
) {
163 d
+= sprintf(d
, " ");
166 for( i
= 0; i
<restlen
&& i
<16; ++i
) {
167 if(s
[i
] < ' ' || s
[i
] >= 0x7F || !isprint(s
[i
]))
178 debug_out(">>>>>>>\n");
181 #else /* !DEBUG_STRINGS */
183 #define hexdump(label,s,len) /* nothing */
188 #if !USE_INTERNAL_API
191 * An implementation based on documented Mac OS X APIs.
193 * This does a certain amount of memory management, creating and
194 * manipulating CFString objects. We try to minimize the impact by
195 * keeping those objects around and re-using them. We also use
196 * external backing store for the CFStrings where this is possible and
199 * The Unicode normalizations forms available at this level are
200 * generic, not specifically for the file system. So they may not be
203 static size_t macosxfs_encoding_pull(
204 void *cd
, /* Encoder handle */
205 char **inbuf
, size_t *inbytesleft
, /* Script string */
206 char **outbuf
, size_t *outbytesleft
) /* UTF-16-LE string */
208 static const int script_code
= kCFStringEncodingUTF8
;
209 static CFMutableStringRef cfstring
= NULL
;
213 (void) cd
; /* UNUSED */
215 if (0 == *inbytesleft
) {
219 if (NULL
== cfstring
) {
221 * A version with an external backing store as in the
222 * push function should have been more efficient, but
223 * testing shows, that it is actually slower (!).
224 * Maybe kCFAllocatorDefault gets shortcut evaluation
225 * internally, while kCFAllocatorNull doesn't.
227 cfstring
= CFStringCreateMutable(kCFAllocatorDefault
,0);
231 * Three methods of appending to a CFString, choose the most
234 if (0 == (*inbuf
)[*inbytesleft
-1]) {
235 CFStringAppendCString(cfstring
, *inbuf
, script_code
);
236 } else if (*inbytesleft
<= 255) {
238 buffer
[0] = *inbytesleft
;
239 memcpy(buffer
+1, *inbuf
, buffer
[0]);
240 CFStringAppendPascalString(cfstring
, buffer
, script_code
);
243 * We would like to use a fixed buffer and a loop
244 * here, but than we can't garantee that the input is
245 * well-formed UTF-8, as we are supposed to do.
247 static char *buffer
= NULL
;
248 static size_t buflen
= 0;
249 buffer
= resize_buffer(buffer
, &buflen
, *inbytesleft
+1);
250 memcpy(buffer
, *inbuf
, *inbytesleft
);
251 buffer
[*inbytesleft
] = 0;
252 CFStringAppendCString(cfstring
, *inbuf
, script_code
);
256 * Compose characters, using the non-canonical composition
259 CFStringNormalize(cfstring
, kCFStringNormalizationFormC
);
261 outsize
= CFStringGetLength(cfstring
);
262 range
= CFRangeMake(0,outsize
);
266 * HACK: smbd/mangle_hash2.c:is_legal_name() expects
267 * errors here. That function will always pass 2
268 * characters. smbd/open.c:check_for_pipe() cuts a
269 * patchname to 10 characters blindly. Suppress the
270 * debug output in those cases.
272 if(2 != *inbytesleft
&& 10 != *inbytesleft
) {
273 debug_out("String conversion: "
274 "An unknown error occurred\n");
275 hexdump("UTF8->UTF16LE (old) input",
276 *inbuf
, *inbytesleft
);
278 errno
= EILSEQ
; /* Not sure, but this is what we have
282 if (outsize
*2 > *outbytesleft
) {
283 CFStringDelete(cfstring
, range
);
284 debug_out("String conversion: "
285 "Output buffer too small\n");
286 hexdump("UTF8->UTF16LE (old) input",
287 *inbuf
, *inbytesleft
);
292 CFStringGetCharacters(cfstring
, range
, (UniChar
*)*outbuf
);
293 CFStringDelete(cfstring
, range
);
295 native_to_le(*outbuf
, outsize
*2);
298 * Add a converted null byte, if the CFString conversions
299 * prevented that until now.
301 if (0 == (*inbuf
)[*inbytesleft
-1] &&
302 (0 != (*outbuf
)[outsize
*2-1] || 0 != (*outbuf
)[outsize
*2-2])) {
304 if ((outsize
*2+2) > *outbytesleft
) {
305 debug_out("String conversion: "
306 "Output buffer too small\n");
307 hexdump("UTF8->UTF16LE (old) input",
308 *inbuf
, *inbytesleft
);
313 (*outbuf
)[outsize
*2] = (*outbuf
)[outsize
*2+1] = 0;
317 *inbuf
+= *inbytesleft
;
319 *outbuf
+= outsize
*2;
320 *outbytesleft
-= outsize
*2;
325 static size_t macosxfs_encoding_push(
326 void *cd
, /* Encoder handle */
327 char **inbuf
, size_t *inbytesleft
, /* UTF-16-LE string */
328 char **outbuf
, size_t *outbytesleft
) /* Script string */
330 static const int script_code
= kCFStringEncodingUTF8
;
331 static CFMutableStringRef cfstring
= NULL
;
332 static UniChar
*buffer
= NULL
;
333 static size_t buflen
= 0;
334 CFIndex outsize
, cfsize
, charsconverted
;
336 (void) cd
; /* UNUSED */
338 if (0 == *inbytesleft
) {
343 * We need a buffer that can hold 4 times the original data,
344 * because that is the theoretical maximum that decomposition
345 * can create currently (in Unicode 4.0).
347 buffer
= set_ucbuffer_with_le_copy(
348 buffer
, &buflen
, *inbuf
, *inbytesleft
, 3 * *inbytesleft
);
350 if (NULL
== cfstring
) {
351 cfstring
= CFStringCreateMutableWithExternalCharactersNoCopy(
353 buffer
, *inbytesleft
/2, buflen
/2,
356 CFStringSetExternalCharactersNoCopy(
358 buffer
, *inbytesleft
/2, buflen
/2);
362 * Decompose characters, using the non-canonical decomposition
365 * NB: This isn't exactly what HFS+ wants (see note on
366 * kCFStringEncodingUseHFSPlusCanonical in
367 * CFStringEncodingConverter.h), but AFAIK it's the best that
368 * the official API can do.
370 CFStringNormalize(cfstring
, kCFStringNormalizationFormD
);
372 cfsize
= CFStringGetLength(cfstring
);
373 charsconverted
= CFStringGetBytes(
374 cfstring
, CFRangeMake(0,cfsize
),
375 script_code
, 0, False
,
376 *outbuf
, *outbytesleft
, &outsize
);
378 if (0 == charsconverted
) {
379 debug_out("String conversion: "
380 "Buffer too small or not convertable\n");
381 hexdump("UTF16LE->UTF8 (old) input",
382 *inbuf
, *inbytesleft
);
383 errno
= EILSEQ
; /* Probably more likely. */
388 * Add a converted null byte, if the CFString conversions
389 * prevented that until now.
391 if (0 == (*inbuf
)[*inbytesleft
-1] && 0 == (*inbuf
)[*inbytesleft
-2] &&
392 (0 != (*outbuf
)[outsize
-1])) {
394 if (((size_t)outsize
+1) > *outbytesleft
) {
395 debug_out("String conversion: "
396 "Output buffer too small\n");
397 hexdump("UTF16LE->UTF8 (old) input",
398 *inbuf
, *inbytesleft
);
403 (*outbuf
)[outsize
] = 0;
407 *inbuf
+= *inbytesleft
;
410 *outbytesleft
-= outsize
;
415 #else /* USE_INTERNAL_API */
418 * An implementation based on internal code as known from the
421 * This code doesn't need much memory management because it uses
422 * functions that operate on the raw memory directly.
424 * The push routine here is faster and more compatible with HFS+ than
425 * the other implementation above. The pull routine is only faster
426 * for some strings, slightly slower for others. The pull routine
427 * looses because it has to iterate over the data twice, once to
428 * decode UTF-8 and than to do the character composition required by
431 static size_t macosxfs_encoding_pull(
432 void *cd
, /* Encoder handle */
433 char **inbuf
, size_t *inbytesleft
, /* Script string */
434 char **outbuf
, size_t *outbytesleft
) /* UTF-16-LE string */
436 static const int script_code
= kCFStringEncodingUTF8
;
437 UInt32 srcCharsUsed
= 0;
438 UInt32 dstCharsUsed
= 0;
440 uint32_t dstDecomposedUsed
= 0;
441 uint32_t dstPrecomposedUsed
= 0;
443 (void) cd
; /* UNUSED */
445 if (0 == *inbytesleft
) {
449 result
= CFStringEncodingBytesToUnicode(
450 script_code
, kCFStringEncodingComposeCombinings
,
451 *inbuf
, *inbytesleft
, &srcCharsUsed
,
452 (UniChar
*)*outbuf
, *outbytesleft
, &dstCharsUsed
);
455 case kCFStringEncodingConversionSuccess
:
456 if (*inbytesleft
== srcCharsUsed
)
460 case kCFStringEncodingInsufficientOutputBufferLength
:
461 debug_out("String conversion: "
462 "Output buffer too small\n");
463 hexdump("UTF8->UTF16LE (new) input",
464 *inbuf
, *inbytesleft
);
467 case kCFStringEncodingInvalidInputStream
:
469 * HACK: smbd/mangle_hash2.c:is_legal_name() expects
470 * errors here. That function will always pass 2
471 * characters. smbd/open.c:check_for_pipe() cuts a
472 * patchname to 10 characters blindly. Suppress the
473 * debug output in those cases.
475 if(2 != *inbytesleft
&& 10 != *inbytesleft
) {
476 debug_out("String conversion: "
477 "Invalid input sequence\n");
478 hexdump("UTF8->UTF16LE (new) input",
479 *inbuf
, *inbytesleft
);
483 case kCFStringEncodingConverterUnavailable
:
484 debug_out("String conversion: "
485 "Unknown encoding\n");
486 hexdump("UTF8->UTF16LE (new) input",
487 *inbuf
, *inbytesleft
);
493 * It doesn't look like CFStringEncodingBytesToUnicode() can
494 * produce precomposed characters (flags=ComposeCombinings
495 * doesn't do it), so we need another pass over the data here.
496 * We can do this in-place, as the string can only get
499 * (Actually in theory there should be an internal
500 * decomposition and reordering before the actual composition
501 * step. But we should be able to rely on that we always get
502 * fully decomposed strings for input, so this can't create
503 * problems in reality.)
506 (const UTF16Char
*)*outbuf
, dstCharsUsed
, &dstDecomposedUsed
,
507 (UTF16Char
*)*outbuf
, dstCharsUsed
, &dstPrecomposedUsed
);
509 native_to_le(*outbuf
, dstPrecomposedUsed
*2);
511 *inbuf
+= srcCharsUsed
;
512 *inbytesleft
-= srcCharsUsed
;
513 *outbuf
+= dstPrecomposedUsed
*2;
514 *outbytesleft
-= dstPrecomposedUsed
*2;
519 static size_t macosxfs_encoding_push(
520 void *cd
, /* Encoder handle */
521 char **inbuf
, size_t *inbytesleft
, /* UTF-16-LE string */
522 char **outbuf
, size_t *outbytesleft
) /* Script string */
524 static const int script_code
= kCFStringEncodingUTF8
;
525 static UniChar
*buffer
= NULL
;
526 static size_t buflen
= 0;
527 UInt32 srcCharsUsed
=0, dstCharsUsed
=0, result
;
529 (void) cd
; /* UNUSED */
531 if (0 == *inbytesleft
) {
535 buffer
= set_ucbuffer_with_le(
536 buffer
, &buflen
, *inbuf
, *inbytesleft
);
538 result
= CFStringEncodingUnicodeToBytes(
539 script_code
, kCFStringEncodingUseHFSPlusCanonical
,
540 buffer
, *inbytesleft
/2, &srcCharsUsed
,
541 *outbuf
, *outbytesleft
, &dstCharsUsed
);
544 case kCFStringEncodingConversionSuccess
:
545 if (*inbytesleft
/2 == srcCharsUsed
)
549 case kCFStringEncodingInsufficientOutputBufferLength
:
550 debug_out("String conversion: "
551 "Output buffer too small\n");
552 hexdump("UTF16LE->UTF8 (new) input",
553 *inbuf
, *inbytesleft
);
556 case kCFStringEncodingInvalidInputStream
:
558 * HACK: smbd/open.c:check_for_pipe():is_legal_name()
559 * cuts a pathname to 10 characters blindly. Suppress
560 * the debug output in those cases.
562 if(10 != *inbytesleft
) {
563 debug_out("String conversion: "
564 "Invalid input sequence\n");
565 hexdump("UTF16LE->UTF8 (new) input",
566 *inbuf
, *inbytesleft
);
570 case kCFStringEncodingConverterUnavailable
:
571 debug_out("String conversion: "
572 "Unknown encoding\n");
573 hexdump("UTF16LE->UTF8 (new) input",
574 *inbuf
, *inbytesleft
);
579 *inbuf
+= srcCharsUsed
*2;
580 *inbytesleft
-= srcCharsUsed
*2;
581 *outbuf
+= dstCharsUsed
;
582 *outbytesleft
-= dstCharsUsed
;
587 #endif /* USE_INTERNAL_API */
590 * For initialization, actually install the encoding as "macosxfs".
592 static struct charset_functions macosxfs_encoding_functions
= {
593 "MACOSXFS", macosxfs_encoding_pull
, macosxfs_encoding_push
596 NTSTATUS
charset_macosxfs_init(void)
598 return smb_register_charset(&macosxfs_encoding_functions
);