2 * utf.c: UTF-8 conversion routines
4 * ====================================================================
5 * Copyright (c) 2000-2007, 2009 CollabNet. All rights reserved.
7 * This software is licensed as described in the file COPYING, which
8 * you should have received as part of this distribution. The terms
9 * are also available at http://subversion.tigris.org/license-1.html.
10 * If newer versions of this license are posted there, you may use a
11 * newer version instead, at your option.
13 * This software consists of voluntary contributions made by many
14 * individuals. For exact contribution history, see the revision
15 * history and logs, available at http://subversion.tigris.org/.
16 * ====================================================================
24 #include <apr_strings.h>
26 #include <apr_xlate.h>
28 #include "svn_string.h"
29 #include "svn_error.h"
30 #include "svn_pools.h"
31 #include "svn_ctype.h"
33 //#include "svn_private_config.h"
34 #include "win32_xlate.h"
36 #include "svn_utf_private.h"
40 #define SVN_UTF_NTOU_XLATE_HANDLE "svn-utf-ntou-xlate-handle"
41 #define SVN_UTF_UTON_XLATE_HANDLE "svn-utf-uton-xlate-handle"
42 #define SVN_APR_UTF8_CHARSET "UTF-8"
45 static apr_thread_mutex_t
*xlate_handle_mutex
= NULL
;
48 /* The xlate handle cache is a global hash table with linked lists of xlate
49 * handles. In multi-threaded environments, a thread "borrows" an xlate
50 * handle from the cache during a translation and puts it back afterwards.
51 * This avoids holding a global lock for all translations.
52 * If there is no handle for a particular key when needed, a new is
53 * handle is created and put in the cache after use.
54 * This means that there will be at most N handles open for a key, where N
55 * is the number of simultanous handles in use for that key. */
57 typedef struct xlate_handle_node_t
{
59 /* FALSE if the handle is not valid, since its pool is being
62 /* The name of a char encoding or APR_LOCALE_CHARSET. */
63 const char *frompage
, *topage
;
64 struct xlate_handle_node_t
*next
;
65 } xlate_handle_node_t
;
67 /* This maps const char * userdata_key strings to xlate_handle_node_t **
68 handles to the first entry in the linked list of xlate handles. We don't
69 store the pointer to the list head directly in the hash table, since we
70 remove/insert entries at the head in the list in the code below, and
71 we can't use apr_hash_set() in each character translation because that
72 function allocates memory in each call where the value is non-NULL.
73 Since these allocations take place in a global pool, this would be a
75 static apr_hash_t
*xlate_handle_hash
= NULL
;
77 /* Clean up the xlate handle cache. */
79 xlate_cleanup(void *arg
)
81 /* We set the cache variables to NULL so that translation works in other
82 cleanup functions, even if it isn't cached then. */
84 apr_thread_mutex_destroy(xlate_handle_mutex
);
85 xlate_handle_mutex
= NULL
;
87 xlate_handle_hash
= NULL
;
92 /* Set the handle of ARG to NULL. */
94 xlate_handle_node_cleanup(void *arg
)
96 xlate_handle_node_t
*node
= arg
;
103 svn_utf_initialize(apr_pool_t
*pool
)
107 apr_thread_mutex_t
*mutex
;
110 if (!xlate_handle_hash
)
112 /* We create our own subpool, which we protect with the mutex.
113 We can't use the pool passed to us by the caller, since we will
114 use it for xlate handle allocations, possibly in multiple threads,
115 and pool allocation is not thread-safe. */
116 subpool
= svn_pool_create(pool
);
118 if (apr_thread_mutex_create(&mutex
, APR_THREAD_MUTEX_DEFAULT
, subpool
)
120 xlate_handle_mutex
= mutex
;
125 xlate_handle_hash
= apr_hash_make(subpool
);
126 apr_pool_cleanup_register(subpool
, NULL
, xlate_cleanup
,
127 apr_pool_cleanup_null
);
131 /* Return a unique string key based on TOPAGE and FROMPAGE. TOPAGE and
132 * FROMPAGE can be any valid arguments of the same name to
133 * apr_xlate_open(). Allocate the returned string in POOL. */
135 get_xlate_key(const char *topage
,
136 const char *frompage
,
139 /* In the cases of SVN_APR_LOCALE_CHARSET and SVN_APR_DEFAULT_CHARSET
140 * topage/frompage is really an int, not a valid string. So generate a
141 * unique key accordingly. */
142 if (frompage
== SVN_APR_LOCALE_CHARSET
)
143 frompage
= "APR_LOCALE_CHARSET";
144 else if (frompage
== SVN_APR_DEFAULT_CHARSET
)
145 frompage
= "APR_DEFAULT_CHARSET";
147 if (topage
== SVN_APR_LOCALE_CHARSET
)
148 topage
= "APR_LOCALE_CHARSET";
149 else if (topage
== SVN_APR_DEFAULT_CHARSET
)
150 topage
= "APR_DEFAULT_CHARSET";
152 return apr_pstrcat(pool
, "svn-utf-", frompage
, "to", topage
,
153 "-xlate-handle", NULL
);
156 /* Set *RET to a handle node for converting from FROMPAGE to TOPAGE,
157 creating the handle node if it doesn't exist in USERDATA_KEY.
158 If a node is not cached and apr_xlate_open() returns APR_EINVAL or
159 APR_ENOTIMPL, set (*RET)->handle to NULL. If fail for any other
160 reason, return the error.
162 Allocate *RET and its xlate handle in POOL if svn_utf_initialize()
163 hasn't been called or USERDATA_KEY is NULL. Else, allocate them
164 in the pool of xlate_handle_hash. */
166 get_xlate_handle_node(xlate_handle_node_t
**ret
,
167 const char *topage
, const char *frompage
,
168 const char *userdata_key
, apr_pool_t
*pool
)
170 xlate_handle_node_t
**old_node_p
;
171 xlate_handle_node_t
*old_node
= NULL
;
172 apr_status_t apr_err
;
174 svn_error_t
*err
= NULL
;
176 /* If we already have a handle, just return it. */
179 if (xlate_handle_hash
)
182 apr_err
= apr_thread_mutex_lock(xlate_handle_mutex
);
183 if (apr_err
!= APR_SUCCESS
)
184 return svn_error_create(apr_err
, NULL
,
185 _("Can't lock charset translation mutex"));
187 old_node_p
= apr_hash_get(xlate_handle_hash
, userdata_key
,
188 APR_HASH_KEY_STRING
);
190 old_node
= *old_node_p
;
193 /* Ensure that the handle is still valid. */
196 /* Remove from the list. */
197 *old_node_p
= old_node
->next
;
198 old_node
->next
= NULL
;
200 apr_err
= apr_thread_mutex_unlock(xlate_handle_mutex
);
201 if (apr_err
!= APR_SUCCESS
)
202 return svn_error_create(apr_err
, NULL
,
203 _("Can't unlock charset "
204 "translation mutex"));
214 /* We fall back on a per-pool cache instead. */
215 apr_pool_userdata_get(&p
, userdata_key
, pool
);
217 /* Ensure that the handle is still valid. */
218 if (old_node
&& old_node
->valid
)
226 /* Note that we still have the mutex locked (if it is initialized), so we
227 can use the global pool for creating the new xlate handle. */
229 /* The error handling doesn't support the following cases, since we don't
230 use them currently. Catch this here. */
231 SVN_ERR_ASSERT(frompage
!= SVN_APR_DEFAULT_CHARSET
232 && topage
!= SVN_APR_DEFAULT_CHARSET
233 && (frompage
!= SVN_APR_LOCALE_CHARSET
234 || topage
!= SVN_APR_LOCALE_CHARSET
));
236 /* Use the correct pool for creating the handle. */
237 if (userdata_key
&& xlate_handle_hash
)
238 pool
= apr_hash_pool_get(xlate_handle_hash
);
240 /* Try to create a handle. */
242 apr_err
= svn_subr__win32_xlate_open((win32_xlate_t
**)&handle
, topage
,
245 apr_err
= apr_xlate_open(&handle
, topage
, frompage
, pool
);
248 if (APR_STATUS_IS_EINVAL(apr_err
) || APR_STATUS_IS_ENOTIMPL(apr_err
))
250 else if (apr_err
!= APR_SUCCESS
)
253 /* Can't use svn_error_wrap_apr here because it calls functions in
254 this file, leading to infinite recursion. */
255 if (frompage
== SVN_APR_LOCALE_CHARSET
)
256 errstr
= apr_psprintf(pool
,
257 _("Can't create a character converter from "
258 "native encoding to '%s'"), topage
);
259 else if (topage
== SVN_APR_LOCALE_CHARSET
)
260 errstr
= apr_psprintf(pool
,
261 _("Can't create a character converter from "
262 "'%s' to native encoding"), frompage
);
264 errstr
= apr_psprintf(pool
,
265 _("Can't create a character converter from "
266 "'%s' to '%s'"), frompage
, topage
);
268 err
= svn_error_create(apr_err
, NULL
, errstr
);
272 /* Allocate and initialize the node. */
273 *ret
= apr_palloc(pool
, sizeof(xlate_handle_node_t
));
274 (*ret
)->handle
= handle
;
275 (*ret
)->valid
= TRUE
;
276 (*ret
)->frompage
= ((frompage
!= SVN_APR_LOCALE_CHARSET
)
277 ? apr_pstrdup(pool
, frompage
) : frompage
);
278 (*ret
)->topage
= ((topage
!= SVN_APR_LOCALE_CHARSET
)
279 ? apr_pstrdup(pool
, topage
) : topage
);
282 /* If we are called from inside a pool cleanup handler, the just created
283 xlate handle will be closed when that handler returns by a newly
284 registered cleanup handler, however, the handle is still cached by us.
285 To prevent this, we register a cleanup handler that will reset the valid
286 flag of our node, so we don't use an invalid handle. */
288 apr_pool_cleanup_register(pool
, *ret
, xlate_handle_node_cleanup
,
289 apr_pool_cleanup_null
);
292 /* Don't need the lock anymore. */
294 if (userdata_key
&& xlate_handle_hash
)
296 apr_status_t unlock_err
= apr_thread_mutex_unlock(xlate_handle_mutex
);
297 if (unlock_err
!= APR_SUCCESS
)
298 return svn_error_create(unlock_err
, NULL
,
299 _("Can't unlock charset translation mutex"));
306 /* Put back NODE into the xlate handle cache for use by other calls.
307 If there is no global cache, store the handle in POOL.
308 Ignore errors related to locking/unlocking the mutex.
309 ### Mutex errors here are very weird. Should we handle them "correctly"
310 ### even if that complicates error handling in the routines below? */
312 put_xlate_handle_node(xlate_handle_node_t
*node
,
313 const char *userdata_key
,
316 assert(node
->next
== NULL
);
319 if (xlate_handle_hash
)
321 xlate_handle_node_t
**node_p
;
323 if (apr_thread_mutex_lock(xlate_handle_mutex
) != APR_SUCCESS
)
324 SVN_ERR_MALFUNCTION_NO_RETURN();
326 node_p
= apr_hash_get(xlate_handle_hash
, userdata_key
,
327 APR_HASH_KEY_STRING
);
330 userdata_key
= apr_pstrdup(apr_hash_pool_get(xlate_handle_hash
),
332 node_p
= apr_palloc(apr_hash_pool_get(xlate_handle_hash
),
335 apr_hash_set(xlate_handle_hash
, userdata_key
,
336 APR_HASH_KEY_STRING
, node_p
);
338 node
->next
= *node_p
;
341 if (apr_thread_mutex_unlock(xlate_handle_mutex
) != APR_SUCCESS
)
342 SVN_ERR_MALFUNCTION_NO_RETURN();
347 /* Store it in the per-pool cache. */
348 apr_pool_userdata_set(node
, userdata_key
, apr_pool_cleanup_null
, pool
);
352 /* Return the apr_xlate handle for converting native characters to UTF-8. */
354 get_ntou_xlate_handle_node(xlate_handle_node_t
**ret
, apr_pool_t
*pool
)
356 return get_xlate_handle_node(ret
, SVN_APR_UTF8_CHARSET
,
357 SVN_APR_LOCALE_CHARSET
,
358 SVN_UTF_NTOU_XLATE_HANDLE
, pool
);
362 /* Return the apr_xlate handle for converting UTF-8 to native characters.
363 Create one if it doesn't exist. If unable to find a handle, or
364 unable to create one because apr_xlate_open returned APR_EINVAL, then
365 set *RET to null and return SVN_NO_ERROR; if fail for some other
366 reason, return error. */
368 get_uton_xlate_handle_node(xlate_handle_node_t
**ret
, apr_pool_t
*pool
)
370 return get_xlate_handle_node(ret
, SVN_APR_LOCALE_CHARSET
,
371 SVN_APR_UTF8_CHARSET
,
372 SVN_UTF_UTON_XLATE_HANDLE
, pool
);
376 /* Copy LEN bytes of SRC, converting non-ASCII and zero bytes to ?\nnn
377 sequences, allocating the result in POOL. */
379 fuzzy_escape(const char *src
, apr_size_t len
, apr_pool_t
*pool
)
381 const char *src_orig
= src
, *src_end
= src
+ len
;
382 apr_size_t new_len
= 0;
384 const char *new_orig
;
386 /* First count how big a dest string we'll need. */
387 while (src
< src_end
)
389 if (! svn_ctype_isascii(*src
) || *src
== '\0')
390 new_len
+= 5; /* 5 slots, for "?\XXX" */
392 new_len
+= 1; /* one slot for the 7-bit char */
397 /* Allocate that amount. */
398 new = apr_palloc(pool
, new_len
+ 1);
402 /* And fill it up. */
403 while (src_orig
< src_end
)
405 if (! svn_ctype_isascii(*src_orig
) || src_orig
== '\0')
407 /* This is the same format as svn_xml_fuzzy_escape uses, but that
408 function escapes different characters. Please keep in sync!
409 ### If we add another fuzzy escape somewhere, we should abstract
410 ### this out to a common function. */
411 sprintf(new, "?\\%03u", (unsigned char) *src_orig
);
428 /* Convert SRC_LENGTH bytes of SRC_DATA in NODE->handle, store the result
429 in *DEST, which is allocated in POOL. */
431 convert_to_stringbuf(xlate_handle_node_t
*node
,
432 const char *src_data
,
433 apr_size_t src_length
,
434 svn_stringbuf_t
**dest
,
438 apr_status_t apr_err
;
440 apr_err
= svn_subr__win32_xlate_to_stringbuf((win32_xlate_t
*) node
->handle
,
441 src_data
, src_length
,
444 apr_size_t buflen
= src_length
* 2;
445 apr_status_t apr_err
;
446 apr_size_t srclen
= src_length
;
447 apr_size_t destlen
= buflen
;
450 /* Initialize *DEST to an empty stringbuf.
451 A 1:2 ratio of input bytes to output bytes (as assigned above)
452 should be enough for most translations, and if it turns out not
453 to be enough, we'll grow the buffer again, sizing it based on a
454 1:3 ratio of the remainder of the string. */
455 *dest
= svn_stringbuf_create_ensure(buflen
+ 1, pool
);
456 destbuf
= (*dest
)->data
;
458 /* Not only does it not make sense to convert an empty string, but
459 apr-iconv is quite unreasonable about not allowing that. */
465 /* Set up state variables for xlate. */
466 destlen
= buflen
- (*dest
)->len
;
468 /* Attempt the conversion. */
469 apr_err
= apr_xlate_conv_buffer(node
->handle
,
470 src_data
+ (src_length
- srclen
),
472 (*dest
)->data
+ (*dest
)->len
,
475 /* Now, update the *DEST->len to track the amount of output data
476 churned out so far from this loop. */
477 (*dest
)->len
+= ((buflen
- (*dest
)->len
) - destlen
);
478 buflen
+= srclen
* 3; /* 3 is middle ground, 2 wasn't enough
479 for all characters in the buffer, 4 is
480 maximum character size (currently) */
483 } while (apr_err
== APR_SUCCESS
&& srclen
!= 0);
486 /* If we exited the loop with an error, return the error. */
492 /* Can't use svn_error_wrap_apr here because it calls functions in
493 this file, leading to infinite recursion. */
494 if (node
->frompage
== SVN_APR_LOCALE_CHARSET
)
495 errstr
= apr_psprintf
496 (pool
, _("Can't convert string from native encoding to '%s':"),
498 else if (node
->topage
== SVN_APR_LOCALE_CHARSET
)
499 errstr
= apr_psprintf
500 (pool
, _("Can't convert string from '%s' to native encoding:"),
503 errstr
= apr_psprintf
504 (pool
, _("Can't convert string from '%s' to '%s':"),
505 node
->frompage
, node
->topage
);
507 err
= svn_error_create(apr_err
, NULL
, fuzzy_escape(src_data
,
509 return svn_error_create(apr_err
, err
, errstr
);
511 /* Else, exited due to success. Trim the result buffer down to the
513 (*dest
)->data
[(*dest
)->len
] = '\0';
519 /* Return APR_EINVAL if the first LEN bytes of DATA contain anything
520 other than seven-bit, non-control (except for whitespace) ASCII
521 characters, finding the error pool from POOL. Otherwise, return
524 check_non_ascii(const char *data
, apr_size_t len
, apr_pool_t
*pool
)
526 const char *data_start
= data
;
528 for (; len
> 0; --len
, data
++)
530 if ((! apr_isascii(*data
))
531 || ((! apr_isspace(*data
))
532 && apr_iscntrl(*data
)))
534 /* Show the printable part of the data, followed by the
535 decimal code of the questionable character. Because if a
536 user ever gets this error, she's going to have to spend
537 time tracking down the non-ASCII data, so we want to help
538 as much as possible. And yes, we just call the unsafe
539 data "non-ASCII", even though the actual constraint is
540 somewhat more complex than that. */
542 if (data
- data_start
)
544 const char *error_data
545 = apr_pstrndup(pool
, data_start
, (data
- data_start
));
547 return svn_error_createf
549 _("Safe data '%s' was followed by non-ASCII byte %d: "
550 "unable to convert to/from UTF-8"),
551 error_data
, *((const unsigned char *) data
));
555 return svn_error_createf
557 _("Non-ASCII character (code %d) detected, "
558 "and unable to convert to/from UTF-8"),
559 *((const unsigned char *) data
));
567 /* Construct an error with code APR_EINVAL and with a suitable message
568 * to describe the invalid UTF-8 sequence DATA of length LEN (which
569 * may have embedded NULLs). We can't simply print the data, almost
570 * by definition we don't really know how it is encoded.
573 invalid_utf8(const char *data
, apr_size_t len
, apr_pool_t
*pool
)
575 const char *last
= svn_utf__last_valid(data
, len
);
576 const char *valid_txt
= "", *invalid_txt
= "";
577 int i
, valid
, invalid
;
579 /* We will display at most 24 valid octets (this may split a leading
580 multi-byte character) as that should fit on one 80 character line. */
584 for (i
= 0; i
< valid
; ++i
)
585 valid_txt
= apr_pstrcat(pool
, valid_txt
,
586 apr_psprintf(pool
, " %02x",
587 (unsigned char)last
[i
-valid
]), NULL
);
589 /* 4 invalid octets will guarantee that the faulty octet is displayed */
590 invalid
= data
+ len
- last
;
593 for (i
= 0; i
< invalid
; ++i
)
594 invalid_txt
= apr_pstrcat(pool
, invalid_txt
,
595 apr_psprintf(pool
, " %02x",
596 (unsigned char)last
[i
]), NULL
);
598 return svn_error_createf(APR_EINVAL
, NULL
,
599 _("Valid UTF-8 data\n(hex:%s)\n"
600 "followed by invalid UTF-8 sequence\n(hex:%s)"),
601 valid_txt
, invalid_txt
);
604 /* Verify that the sequence DATA of length LEN is valid UTF-8.
605 If it is not, return an error with code APR_EINVAL. */
607 check_utf8(const char *data
, apr_size_t len
, apr_pool_t
*pool
)
609 if (! svn_utf__is_valid(data
, len
))
610 return invalid_utf8(data
, len
, pool
);
614 /* Verify that the NULL terminated sequence DATA is valid UTF-8.
615 If it is not, return an error with code APR_EINVAL. */
617 check_cstring_utf8(const char *data
, apr_pool_t
*pool
)
620 if (! svn_utf__cstring_is_valid(data
))
621 return invalid_utf8(data
, strlen(data
), pool
);
627 svn_utf_stringbuf_to_utf8(svn_stringbuf_t
**dest
,
628 const svn_stringbuf_t
*src
,
631 xlate_handle_node_t
*node
;
634 SVN_ERR(get_ntou_xlate_handle_node(&node
, pool
));
638 err
= convert_to_stringbuf(node
, src
->data
, src
->len
, dest
, pool
);
640 err
= check_utf8((*dest
)->data
, (*dest
)->len
, pool
);
644 err
= check_non_ascii(src
->data
, src
->len
, pool
);
646 *dest
= svn_stringbuf_dup(src
, pool
);
649 put_xlate_handle_node(node
, SVN_UTF_NTOU_XLATE_HANDLE
, pool
);
656 svn_utf_string_to_utf8(const svn_string_t
**dest
,
657 const svn_string_t
*src
,
660 svn_stringbuf_t
*destbuf
;
661 xlate_handle_node_t
*node
;
664 SVN_ERR(get_ntou_xlate_handle_node(&node
, pool
));
668 err
= convert_to_stringbuf(node
, src
->data
, src
->len
, &destbuf
, pool
);
670 err
= check_utf8(destbuf
->data
, destbuf
->len
, pool
);
672 *dest
= svn_string_create_from_buf(destbuf
, pool
);
676 err
= check_non_ascii(src
->data
, src
->len
, pool
);
678 *dest
= svn_string_dup(src
, pool
);
681 put_xlate_handle_node(node
, SVN_UTF_NTOU_XLATE_HANDLE
, pool
);
687 /* Common implementation for svn_utf_cstring_to_utf8,
688 svn_utf_cstring_to_utf8_ex, svn_utf_cstring_from_utf8 and
689 svn_utf_cstring_from_utf8_ex. Convert SRC to DEST using NODE->handle as
690 the translator and allocating from POOL. */
692 convert_cstring(const char **dest
,
694 xlate_handle_node_t
*node
,
699 svn_stringbuf_t
*destbuf
;
700 SVN_ERR(convert_to_stringbuf(node
, src
, strlen(src
),
702 *dest
= destbuf
->data
;
706 apr_size_t len
= strlen(src
);
707 SVN_ERR(check_non_ascii(src
, len
, pool
));
708 *dest
= apr_pstrmemdup(pool
, src
, len
);
715 svn_utf_cstring_to_utf8(const char **dest
,
719 xlate_handle_node_t
*node
;
722 SVN_ERR(get_ntou_xlate_handle_node(&node
, pool
));
723 err
= convert_cstring(dest
, src
, node
, pool
);
724 put_xlate_handle_node(node
, SVN_UTF_NTOU_XLATE_HANDLE
, pool
);
726 return check_cstring_utf8(*dest
, pool
);
731 svn_utf_cstring_to_utf8_ex2(const char **dest
,
733 const char *frompage
,
736 xlate_handle_node_t
*node
;
738 const char *convset_key
= get_xlate_key(SVN_APR_UTF8_CHARSET
, frompage
,
741 SVN_ERR(get_xlate_handle_node(&node
, SVN_APR_UTF8_CHARSET
, frompage
,
743 err
= convert_cstring(dest
, src
, node
, pool
);
744 put_xlate_handle_node(node
, convset_key
, pool
);
746 return check_cstring_utf8(*dest
, pool
);
751 svn_utf_cstring_to_utf8_ex(const char **dest
,
753 const char *frompage
,
754 const char *convset_key
,
757 return svn_utf_cstring_to_utf8_ex2(dest
, src
, frompage
, pool
);
762 svn_utf_stringbuf_from_utf8(svn_stringbuf_t
**dest
,
763 const svn_stringbuf_t
*src
,
766 xlate_handle_node_t
*node
;
769 SVN_ERR(get_uton_xlate_handle_node(&node
, pool
));
773 err
= check_utf8(src
->data
, src
->len
, pool
);
775 err
= convert_to_stringbuf(node
, src
->data
, src
->len
, dest
, pool
);
779 err
= check_non_ascii(src
->data
, src
->len
, pool
);
781 *dest
= svn_stringbuf_dup(src
, pool
);
784 put_xlate_handle_node(node
, SVN_UTF_UTON_XLATE_HANDLE
, pool
);
791 svn_utf_string_from_utf8(const svn_string_t
**dest
,
792 const svn_string_t
*src
,
795 svn_stringbuf_t
*dbuf
;
796 xlate_handle_node_t
*node
;
799 SVN_ERR(get_uton_xlate_handle_node(&node
, pool
));
803 err
= check_utf8(src
->data
, src
->len
, pool
);
805 err
= convert_to_stringbuf(node
, src
->data
, src
->len
,
808 *dest
= svn_string_create_from_buf(dbuf
, pool
);
812 err
= check_non_ascii(src
->data
, src
->len
, pool
);
814 *dest
= svn_string_dup(src
, pool
);
817 put_xlate_handle_node(node
, SVN_UTF_UTON_XLATE_HANDLE
, pool
);
824 svn_utf_cstring_from_utf8(const char **dest
,
828 xlate_handle_node_t
*node
;
831 SVN_ERR(check_utf8(src
, strlen(src
), pool
));
833 SVN_ERR(get_uton_xlate_handle_node(&node
, pool
));
834 err
= convert_cstring(dest
, src
, node
, pool
);
835 put_xlate_handle_node(node
, SVN_UTF_UTON_XLATE_HANDLE
, pool
);
842 svn_utf_cstring_from_utf8_ex2(const char **dest
,
847 xlate_handle_node_t
*node
;
849 const char *convset_key
= get_xlate_key(topage
, SVN_APR_UTF8_CHARSET
,
852 SVN_ERR(check_utf8(src
, strlen(src
), pool
));
854 SVN_ERR(get_xlate_handle_node(&node
, topage
, SVN_APR_UTF8_CHARSET
,
856 err
= convert_cstring(dest
, src
, node
, pool
);
857 put_xlate_handle_node(node
, convset_key
, pool
);
864 svn_utf_cstring_from_utf8_ex(const char **dest
,
867 const char *convset_key
,
870 return svn_utf_cstring_from_utf8_ex2(dest
, src
, topage
, pool
);
875 svn_utf__cstring_from_utf8_fuzzy(const char *src
,
877 svn_error_t
*(*convert_from_utf8
)
878 (const char **, const char *, apr_pool_t
*))
880 const char *escaped
, *converted
;
883 escaped
= fuzzy_escape(src
, strlen(src
), pool
);
885 /* Okay, now we have a *new* UTF-8 string, one that's guaranteed to
886 contain only 7-bit bytes :-). Recode to native... */
887 err
= convert_from_utf8(((const char **) &converted
), escaped
, pool
);
891 svn_error_clear(err
);
897 /* ### Check the client locale, maybe we can avoid that second
898 * conversion! See Ulrich Drepper's patch at
899 * http://subversion.tigris.org/issues/show_bug.cgi?id=807.
905 svn_utf_cstring_from_utf8_fuzzy(const char *src
,
908 return svn_utf__cstring_from_utf8_fuzzy(src
, pool
,
909 svn_utf_cstring_from_utf8
);
914 svn_utf_cstring_from_utf8_stringbuf(const char **dest
,
915 const svn_stringbuf_t
*src
,
918 svn_stringbuf_t
*destbuf
;
920 SVN_ERR(svn_utf_stringbuf_from_utf8(&destbuf
, src
, pool
));
921 *dest
= destbuf
->data
;
928 svn_utf_cstring_from_utf8_string(const char **dest
,
929 const svn_string_t
*src
,
932 svn_stringbuf_t
*dbuf
;
933 xlate_handle_node_t
*node
;
936 SVN_ERR(get_uton_xlate_handle_node(&node
, pool
));
940 err
= check_utf8(src
->data
, src
->len
, pool
);
942 err
= convert_to_stringbuf(node
, src
->data
, src
->len
,
949 err
= check_non_ascii(src
->data
, src
->len
, pool
);
951 *dest
= apr_pstrmemdup(pool
, src
->data
, src
->len
);
954 put_xlate_handle_node(node
, SVN_UTF_UTON_XLATE_HANDLE
, pool
);