TortoiseGitMerge: Use shipped zlib1.dll instead of own copy
[TortoiseGit.git] / src / TortoiseMerge / libsvn_diff / utf.c
blobe7a3d8d887d42cf103673d5f54988468e9a23d2a
1 /*
2 * utf.c: UTF-8 conversion routines
4 * ====================================================================
5 * Copyright (c) 2000-2007, 2009 CollabNet. All rights reserved.
7 * This software is licensed as described in the file COPYING, which
8 * you should have received as part of this distribution. The terms
9 * are also available at http://subversion.tigris.org/license-1.html.
10 * If newer versions of this license are posted there, you may use a
11 * newer version instead, at your option.
13 * This software consists of voluntary contributions made by many
14 * individuals. For exact contribution history, see the revision
15 * history and logs, available at http://subversion.tigris.org/.
16 * ====================================================================
21 #include <string.h>
22 #include <assert.h>
24 #include <apr_strings.h>
25 #include <apr_lib.h>
26 #include <apr_xlate.h>
28 #include "svn_string.h"
29 #include "svn_error.h"
30 #include "svn_pools.h"
31 #include "svn_ctype.h"
32 #include "svn_utf.h"
33 //#include "svn_private_config.h"
34 #include "win32_xlate.h"
36 #include "svn_utf_private.h"
40 #define SVN_UTF_NTOU_XLATE_HANDLE "svn-utf-ntou-xlate-handle"
41 #define SVN_UTF_UTON_XLATE_HANDLE "svn-utf-uton-xlate-handle"
42 #define SVN_APR_UTF8_CHARSET "UTF-8"
44 #if APR_HAS_THREADS
45 static apr_thread_mutex_t *xlate_handle_mutex = NULL;
46 #endif
48 /* The xlate handle cache is a global hash table with linked lists of xlate
49 * handles. In multi-threaded environments, a thread "borrows" an xlate
50 * handle from the cache during a translation and puts it back afterwards.
51 * This avoids holding a global lock for all translations.
52 * If there is no handle for a particular key when needed, a new is
53 * handle is created and put in the cache after use.
54 * This means that there will be at most N handles open for a key, where N
55 * is the number of simultanous handles in use for that key. */
57 typedef struct xlate_handle_node_t {
58 apr_xlate_t *handle;
59 /* FALSE if the handle is not valid, since its pool is being
60 destroyed. */
61 svn_boolean_t valid;
62 /* The name of a char encoding or APR_LOCALE_CHARSET. */
63 const char *frompage, *topage;
64 struct xlate_handle_node_t *next;
65 } xlate_handle_node_t;
67 /* This maps const char * userdata_key strings to xlate_handle_node_t **
68 handles to the first entry in the linked list of xlate handles. We don't
69 store the pointer to the list head directly in the hash table, since we
70 remove/insert entries at the head in the list in the code below, and
71 we can't use apr_hash_set() in each character translation because that
72 function allocates memory in each call where the value is non-NULL.
73 Since these allocations take place in a global pool, this would be a
74 memory leak. */
75 static apr_hash_t *xlate_handle_hash = NULL;
77 /* Clean up the xlate handle cache. */
78 static apr_status_t
79 xlate_cleanup(void *arg)
81 /* We set the cache variables to NULL so that translation works in other
82 cleanup functions, even if it isn't cached then. */
83 #if APR_HAS_THREADS
84 apr_thread_mutex_destroy(xlate_handle_mutex);
85 xlate_handle_mutex = NULL;
86 #endif
87 xlate_handle_hash = NULL;
89 return APR_SUCCESS;
92 /* Set the handle of ARG to NULL. */
93 static apr_status_t
94 xlate_handle_node_cleanup(void *arg)
96 xlate_handle_node_t *node = arg;
98 node->valid = FALSE;
99 return APR_SUCCESS;
102 void
103 svn_utf_initialize(apr_pool_t *pool)
105 apr_pool_t *subpool;
106 #if APR_HAS_THREADS
107 apr_thread_mutex_t *mutex;
108 #endif
110 if (!xlate_handle_hash)
112 /* We create our own subpool, which we protect with the mutex.
113 We can't use the pool passed to us by the caller, since we will
114 use it for xlate handle allocations, possibly in multiple threads,
115 and pool allocation is not thread-safe. */
116 subpool = svn_pool_create(pool);
117 #if APR_HAS_THREADS
118 if (apr_thread_mutex_create(&mutex, APR_THREAD_MUTEX_DEFAULT, subpool)
119 == APR_SUCCESS)
120 xlate_handle_mutex = mutex;
121 else
122 return;
123 #endif
125 xlate_handle_hash = apr_hash_make(subpool);
126 apr_pool_cleanup_register(subpool, NULL, xlate_cleanup,
127 apr_pool_cleanup_null);
131 /* Return a unique string key based on TOPAGE and FROMPAGE. TOPAGE and
132 * FROMPAGE can be any valid arguments of the same name to
133 * apr_xlate_open(). Allocate the returned string in POOL. */
134 static const char*
135 get_xlate_key(const char *topage,
136 const char *frompage,
137 apr_pool_t *pool)
139 /* In the cases of SVN_APR_LOCALE_CHARSET and SVN_APR_DEFAULT_CHARSET
140 * topage/frompage is really an int, not a valid string. So generate a
141 * unique key accordingly. */
142 if (frompage == SVN_APR_LOCALE_CHARSET)
143 frompage = "APR_LOCALE_CHARSET";
144 else if (frompage == SVN_APR_DEFAULT_CHARSET)
145 frompage = "APR_DEFAULT_CHARSET";
147 if (topage == SVN_APR_LOCALE_CHARSET)
148 topage = "APR_LOCALE_CHARSET";
149 else if (topage == SVN_APR_DEFAULT_CHARSET)
150 topage = "APR_DEFAULT_CHARSET";
152 return apr_pstrcat(pool, "svn-utf-", frompage, "to", topage,
153 "-xlate-handle", NULL);
156 /* Set *RET to a handle node for converting from FROMPAGE to TOPAGE,
157 creating the handle node if it doesn't exist in USERDATA_KEY.
158 If a node is not cached and apr_xlate_open() returns APR_EINVAL or
159 APR_ENOTIMPL, set (*RET)->handle to NULL. If fail for any other
160 reason, return the error.
162 Allocate *RET and its xlate handle in POOL if svn_utf_initialize()
163 hasn't been called or USERDATA_KEY is NULL. Else, allocate them
164 in the pool of xlate_handle_hash. */
165 static svn_error_t *
166 get_xlate_handle_node(xlate_handle_node_t **ret,
167 const char *topage, const char *frompage,
168 const char *userdata_key, apr_pool_t *pool)
170 xlate_handle_node_t **old_node_p;
171 xlate_handle_node_t *old_node = NULL;
172 apr_status_t apr_err;
173 apr_xlate_t *handle;
174 svn_error_t *err = NULL;
176 /* If we already have a handle, just return it. */
177 if (userdata_key)
179 if (xlate_handle_hash)
181 #if APR_HAS_THREADS
182 apr_err = apr_thread_mutex_lock(xlate_handle_mutex);
183 if (apr_err != APR_SUCCESS)
184 return svn_error_create(apr_err, NULL,
185 _("Can't lock charset translation mutex"));
186 #endif
187 old_node_p = apr_hash_get(xlate_handle_hash, userdata_key,
188 APR_HASH_KEY_STRING);
189 if (old_node_p)
190 old_node = *old_node_p;
191 if (old_node)
193 /* Ensure that the handle is still valid. */
194 if (old_node->valid)
196 /* Remove from the list. */
197 *old_node_p = old_node->next;
198 old_node->next = NULL;
199 #if APR_HAS_THREADS
200 apr_err = apr_thread_mutex_unlock(xlate_handle_mutex);
201 if (apr_err != APR_SUCCESS)
202 return svn_error_create(apr_err, NULL,
203 _("Can't unlock charset "
204 "translation mutex"));
205 #endif
206 *ret = old_node;
207 return SVN_NO_ERROR;
211 else
213 void *p;
214 /* We fall back on a per-pool cache instead. */
215 apr_pool_userdata_get(&p, userdata_key, pool);
216 old_node = p;
217 /* Ensure that the handle is still valid. */
218 if (old_node && old_node->valid)
220 *ret = old_node;
221 return SVN_NO_ERROR;
226 /* Note that we still have the mutex locked (if it is initialized), so we
227 can use the global pool for creating the new xlate handle. */
229 /* The error handling doesn't support the following cases, since we don't
230 use them currently. Catch this here. */
231 SVN_ERR_ASSERT(frompage != SVN_APR_DEFAULT_CHARSET
232 && topage != SVN_APR_DEFAULT_CHARSET
233 && (frompage != SVN_APR_LOCALE_CHARSET
234 || topage != SVN_APR_LOCALE_CHARSET));
236 /* Use the correct pool for creating the handle. */
237 if (userdata_key && xlate_handle_hash)
238 pool = apr_hash_pool_get(xlate_handle_hash);
240 /* Try to create a handle. */
241 #if defined(WIN32)
242 apr_err = svn_subr__win32_xlate_open((win32_xlate_t **)&handle, topage,
243 frompage, pool);
244 #else
245 apr_err = apr_xlate_open(&handle, topage, frompage, pool);
246 #endif
248 if (APR_STATUS_IS_EINVAL(apr_err) || APR_STATUS_IS_ENOTIMPL(apr_err))
249 handle = NULL;
250 else if (apr_err != APR_SUCCESS)
252 const char *errstr;
253 /* Can't use svn_error_wrap_apr here because it calls functions in
254 this file, leading to infinite recursion. */
255 if (frompage == SVN_APR_LOCALE_CHARSET)
256 errstr = apr_psprintf(pool,
257 _("Can't create a character converter from "
258 "native encoding to '%s'"), topage);
259 else if (topage == SVN_APR_LOCALE_CHARSET)
260 errstr = apr_psprintf(pool,
261 _("Can't create a character converter from "
262 "'%s' to native encoding"), frompage);
263 else
264 errstr = apr_psprintf(pool,
265 _("Can't create a character converter from "
266 "'%s' to '%s'"), frompage, topage);
268 err = svn_error_create(apr_err, NULL, errstr);
269 goto cleanup;
272 /* Allocate and initialize the node. */
273 *ret = apr_palloc(pool, sizeof(xlate_handle_node_t));
274 (*ret)->handle = handle;
275 (*ret)->valid = TRUE;
276 (*ret)->frompage = ((frompage != SVN_APR_LOCALE_CHARSET)
277 ? apr_pstrdup(pool, frompage) : frompage);
278 (*ret)->topage = ((topage != SVN_APR_LOCALE_CHARSET)
279 ? apr_pstrdup(pool, topage) : topage);
280 (*ret)->next = NULL;
282 /* If we are called from inside a pool cleanup handler, the just created
283 xlate handle will be closed when that handler returns by a newly
284 registered cleanup handler, however, the handle is still cached by us.
285 To prevent this, we register a cleanup handler that will reset the valid
286 flag of our node, so we don't use an invalid handle. */
287 if (handle)
288 apr_pool_cleanup_register(pool, *ret, xlate_handle_node_cleanup,
289 apr_pool_cleanup_null);
291 cleanup:
292 /* Don't need the lock anymore. */
293 #if APR_HAS_THREADS
294 if (userdata_key && xlate_handle_hash)
296 apr_status_t unlock_err = apr_thread_mutex_unlock(xlate_handle_mutex);
297 if (unlock_err != APR_SUCCESS)
298 return svn_error_create(unlock_err, NULL,
299 _("Can't unlock charset translation mutex"));
301 #endif
303 return err;
306 /* Put back NODE into the xlate handle cache for use by other calls.
307 If there is no global cache, store the handle in POOL.
308 Ignore errors related to locking/unlocking the mutex.
309 ### Mutex errors here are very weird. Should we handle them "correctly"
310 ### even if that complicates error handling in the routines below? */
311 static void
312 put_xlate_handle_node(xlate_handle_node_t *node,
313 const char *userdata_key,
314 apr_pool_t *pool)
316 assert(node->next == NULL);
317 if (!userdata_key)
318 return;
319 if (xlate_handle_hash)
321 xlate_handle_node_t **node_p;
322 #if APR_HAS_THREADS
323 if (apr_thread_mutex_lock(xlate_handle_mutex) != APR_SUCCESS)
324 SVN_ERR_MALFUNCTION_NO_RETURN();
325 #endif
326 node_p = apr_hash_get(xlate_handle_hash, userdata_key,
327 APR_HASH_KEY_STRING);
328 if (node_p == NULL)
330 userdata_key = apr_pstrdup(apr_hash_pool_get(xlate_handle_hash),
331 userdata_key);
332 node_p = apr_palloc(apr_hash_pool_get(xlate_handle_hash),
333 sizeof(*node_p));
334 *node_p = NULL;
335 apr_hash_set(xlate_handle_hash, userdata_key,
336 APR_HASH_KEY_STRING, node_p);
338 node->next = *node_p;
339 *node_p = node;
340 #if APR_HAS_THREADS
341 if (apr_thread_mutex_unlock(xlate_handle_mutex) != APR_SUCCESS)
342 SVN_ERR_MALFUNCTION_NO_RETURN();
343 #endif
345 else
347 /* Store it in the per-pool cache. */
348 apr_pool_userdata_set(node, userdata_key, apr_pool_cleanup_null, pool);
352 /* Return the apr_xlate handle for converting native characters to UTF-8. */
353 static svn_error_t *
354 get_ntou_xlate_handle_node(xlate_handle_node_t **ret, apr_pool_t *pool)
356 return get_xlate_handle_node(ret, SVN_APR_UTF8_CHARSET,
357 SVN_APR_LOCALE_CHARSET,
358 SVN_UTF_NTOU_XLATE_HANDLE, pool);
362 /* Return the apr_xlate handle for converting UTF-8 to native characters.
363 Create one if it doesn't exist. If unable to find a handle, or
364 unable to create one because apr_xlate_open returned APR_EINVAL, then
365 set *RET to null and return SVN_NO_ERROR; if fail for some other
366 reason, return error. */
367 static svn_error_t *
368 get_uton_xlate_handle_node(xlate_handle_node_t **ret, apr_pool_t *pool)
370 return get_xlate_handle_node(ret, SVN_APR_LOCALE_CHARSET,
371 SVN_APR_UTF8_CHARSET,
372 SVN_UTF_UTON_XLATE_HANDLE, pool);
376 /* Copy LEN bytes of SRC, converting non-ASCII and zero bytes to ?\nnn
377 sequences, allocating the result in POOL. */
378 static const char *
379 fuzzy_escape(const char *src, apr_size_t len, apr_pool_t *pool)
381 const char *src_orig = src, *src_end = src + len;
382 apr_size_t new_len = 0;
383 char *new;
384 const char *new_orig;
386 /* First count how big a dest string we'll need. */
387 while (src < src_end)
389 if (! svn_ctype_isascii(*src) || *src == '\0')
390 new_len += 5; /* 5 slots, for "?\XXX" */
391 else
392 new_len += 1; /* one slot for the 7-bit char */
394 src++;
397 /* Allocate that amount. */
398 new = apr_palloc(pool, new_len + 1);
400 new_orig = new;
402 /* And fill it up. */
403 while (src_orig < src_end)
405 if (! svn_ctype_isascii(*src_orig) || src_orig == '\0')
407 /* This is the same format as svn_xml_fuzzy_escape uses, but that
408 function escapes different characters. Please keep in sync!
409 ### If we add another fuzzy escape somewhere, we should abstract
410 ### this out to a common function. */
411 sprintf(new, "?\\%03u", (unsigned char) *src_orig);
412 new += 5;
414 else
416 *new = *src_orig;
417 new += 1;
420 src_orig++;
423 *new = '\0';
425 return new_orig;
428 /* Convert SRC_LENGTH bytes of SRC_DATA in NODE->handle, store the result
429 in *DEST, which is allocated in POOL. */
430 static svn_error_t *
431 convert_to_stringbuf(xlate_handle_node_t *node,
432 const char *src_data,
433 apr_size_t src_length,
434 svn_stringbuf_t **dest,
435 apr_pool_t *pool)
437 #ifdef WIN32
438 apr_status_t apr_err;
440 apr_err = svn_subr__win32_xlate_to_stringbuf((win32_xlate_t *) node->handle,
441 src_data, src_length,
442 dest, pool);
443 #else
444 apr_size_t buflen = src_length * 2;
445 apr_status_t apr_err;
446 apr_size_t srclen = src_length;
447 apr_size_t destlen = buflen;
448 char *destbuf;
450 /* Initialize *DEST to an empty stringbuf.
451 A 1:2 ratio of input bytes to output bytes (as assigned above)
452 should be enough for most translations, and if it turns out not
453 to be enough, we'll grow the buffer again, sizing it based on a
454 1:3 ratio of the remainder of the string. */
455 *dest = svn_stringbuf_create_ensure(buflen + 1, pool);
456 destbuf = (*dest)->data;
458 /* Not only does it not make sense to convert an empty string, but
459 apr-iconv is quite unreasonable about not allowing that. */
460 if (src_length == 0)
461 return SVN_NO_ERROR;
465 /* Set up state variables for xlate. */
466 destlen = buflen - (*dest)->len;
468 /* Attempt the conversion. */
469 apr_err = apr_xlate_conv_buffer(node->handle,
470 src_data + (src_length - srclen),
471 &srclen,
472 (*dest)->data + (*dest)->len,
473 &destlen);
475 /* Now, update the *DEST->len to track the amount of output data
476 churned out so far from this loop. */
477 (*dest)->len += ((buflen - (*dest)->len) - destlen);
478 buflen += srclen * 3; /* 3 is middle ground, 2 wasn't enough
479 for all characters in the buffer, 4 is
480 maximum character size (currently) */
483 } while (apr_err == APR_SUCCESS && srclen != 0);
484 #endif
486 /* If we exited the loop with an error, return the error. */
487 if (apr_err)
489 const char *errstr;
490 svn_error_t *err;
492 /* Can't use svn_error_wrap_apr here because it calls functions in
493 this file, leading to infinite recursion. */
494 if (node->frompage == SVN_APR_LOCALE_CHARSET)
495 errstr = apr_psprintf
496 (pool, _("Can't convert string from native encoding to '%s':"),
497 node->topage);
498 else if (node->topage == SVN_APR_LOCALE_CHARSET)
499 errstr = apr_psprintf
500 (pool, _("Can't convert string from '%s' to native encoding:"),
501 node->frompage);
502 else
503 errstr = apr_psprintf
504 (pool, _("Can't convert string from '%s' to '%s':"),
505 node->frompage, node->topage);
507 err = svn_error_create(apr_err, NULL, fuzzy_escape(src_data,
508 src_length, pool));
509 return svn_error_create(apr_err, err, errstr);
511 /* Else, exited due to success. Trim the result buffer down to the
512 right length. */
513 (*dest)->data[(*dest)->len] = '\0';
515 return SVN_NO_ERROR;
519 /* Return APR_EINVAL if the first LEN bytes of DATA contain anything
520 other than seven-bit, non-control (except for whitespace) ASCII
521 characters, finding the error pool from POOL. Otherwise, return
522 SVN_NO_ERROR. */
523 static svn_error_t *
524 check_non_ascii(const char *data, apr_size_t len, apr_pool_t *pool)
526 const char *data_start = data;
528 for (; len > 0; --len, data++)
530 if ((! apr_isascii(*data))
531 || ((! apr_isspace(*data))
532 && apr_iscntrl(*data)))
534 /* Show the printable part of the data, followed by the
535 decimal code of the questionable character. Because if a
536 user ever gets this error, she's going to have to spend
537 time tracking down the non-ASCII data, so we want to help
538 as much as possible. And yes, we just call the unsafe
539 data "non-ASCII", even though the actual constraint is
540 somewhat more complex than that. */
542 if (data - data_start)
544 const char *error_data
545 = apr_pstrndup(pool, data_start, (data - data_start));
547 return svn_error_createf
548 (APR_EINVAL, NULL,
549 _("Safe data '%s' was followed by non-ASCII byte %d: "
550 "unable to convert to/from UTF-8"),
551 error_data, *((const unsigned char *) data));
553 else
555 return svn_error_createf
556 (APR_EINVAL, NULL,
557 _("Non-ASCII character (code %d) detected, "
558 "and unable to convert to/from UTF-8"),
559 *((const unsigned char *) data));
564 return SVN_NO_ERROR;
567 /* Construct an error with code APR_EINVAL and with a suitable message
568 * to describe the invalid UTF-8 sequence DATA of length LEN (which
569 * may have embedded NULLs). We can't simply print the data, almost
570 * by definition we don't really know how it is encoded.
572 static svn_error_t *
573 invalid_utf8(const char *data, apr_size_t len, apr_pool_t *pool)
575 const char *last = svn_utf__last_valid(data, len);
576 const char *valid_txt = "", *invalid_txt = "";
577 int i, valid, invalid;
579 /* We will display at most 24 valid octets (this may split a leading
580 multi-byte character) as that should fit on one 80 character line. */
581 valid = last - data;
582 if (valid > 24)
583 valid = 24;
584 for (i = 0; i < valid; ++i)
585 valid_txt = apr_pstrcat(pool, valid_txt,
586 apr_psprintf(pool, " %02x",
587 (unsigned char)last[i-valid]), NULL);
589 /* 4 invalid octets will guarantee that the faulty octet is displayed */
590 invalid = data + len - last;
591 if (invalid > 4)
592 invalid = 4;
593 for (i = 0; i < invalid; ++i)
594 invalid_txt = apr_pstrcat(pool, invalid_txt,
595 apr_psprintf(pool, " %02x",
596 (unsigned char)last[i]), NULL);
598 return svn_error_createf(APR_EINVAL, NULL,
599 _("Valid UTF-8 data\n(hex:%s)\n"
600 "followed by invalid UTF-8 sequence\n(hex:%s)"),
601 valid_txt, invalid_txt);
604 /* Verify that the sequence DATA of length LEN is valid UTF-8.
605 If it is not, return an error with code APR_EINVAL. */
606 static svn_error_t *
607 check_utf8(const char *data, apr_size_t len, apr_pool_t *pool)
609 if (! svn_utf__is_valid(data, len))
610 return invalid_utf8(data, len, pool);
611 return SVN_NO_ERROR;
614 /* Verify that the NULL terminated sequence DATA is valid UTF-8.
615 If it is not, return an error with code APR_EINVAL. */
616 static svn_error_t *
617 check_cstring_utf8(const char *data, apr_pool_t *pool)
620 if (! svn_utf__cstring_is_valid(data))
621 return invalid_utf8(data, strlen(data), pool);
622 return SVN_NO_ERROR;
626 svn_error_t *
627 svn_utf_stringbuf_to_utf8(svn_stringbuf_t **dest,
628 const svn_stringbuf_t *src,
629 apr_pool_t *pool)
631 xlate_handle_node_t *node;
632 svn_error_t *err;
634 SVN_ERR(get_ntou_xlate_handle_node(&node, pool));
636 if (node->handle)
638 err = convert_to_stringbuf(node, src->data, src->len, dest, pool);
639 if (! err)
640 err = check_utf8((*dest)->data, (*dest)->len, pool);
642 else
644 err = check_non_ascii(src->data, src->len, pool);
645 if (! err)
646 *dest = svn_stringbuf_dup(src, pool);
649 put_xlate_handle_node(node, SVN_UTF_NTOU_XLATE_HANDLE, pool);
651 return err;
655 svn_error_t *
656 svn_utf_string_to_utf8(const svn_string_t **dest,
657 const svn_string_t *src,
658 apr_pool_t *pool)
660 svn_stringbuf_t *destbuf;
661 xlate_handle_node_t *node;
662 svn_error_t *err;
664 SVN_ERR(get_ntou_xlate_handle_node(&node, pool));
666 if (node->handle)
668 err = convert_to_stringbuf(node, src->data, src->len, &destbuf, pool);
669 if (! err)
670 err = check_utf8(destbuf->data, destbuf->len, pool);
671 if (! err)
672 *dest = svn_string_create_from_buf(destbuf, pool);
674 else
676 err = check_non_ascii(src->data, src->len, pool);
677 if (! err)
678 *dest = svn_string_dup(src, pool);
681 put_xlate_handle_node(node, SVN_UTF_NTOU_XLATE_HANDLE, pool);
683 return err;
687 /* Common implementation for svn_utf_cstring_to_utf8,
688 svn_utf_cstring_to_utf8_ex, svn_utf_cstring_from_utf8 and
689 svn_utf_cstring_from_utf8_ex. Convert SRC to DEST using NODE->handle as
690 the translator and allocating from POOL. */
691 static svn_error_t *
692 convert_cstring(const char **dest,
693 const char *src,
694 xlate_handle_node_t *node,
695 apr_pool_t *pool)
697 if (node->handle)
699 svn_stringbuf_t *destbuf;
700 SVN_ERR(convert_to_stringbuf(node, src, strlen(src),
701 &destbuf, pool));
702 *dest = destbuf->data;
704 else
706 apr_size_t len = strlen(src);
707 SVN_ERR(check_non_ascii(src, len, pool));
708 *dest = apr_pstrmemdup(pool, src, len);
710 return SVN_NO_ERROR;
714 svn_error_t *
715 svn_utf_cstring_to_utf8(const char **dest,
716 const char *src,
717 apr_pool_t *pool)
719 xlate_handle_node_t *node;
720 svn_error_t *err;
722 SVN_ERR(get_ntou_xlate_handle_node(&node, pool));
723 err = convert_cstring(dest, src, node, pool);
724 put_xlate_handle_node(node, SVN_UTF_NTOU_XLATE_HANDLE, pool);
725 SVN_ERR(err);
726 return check_cstring_utf8(*dest, pool);
730 svn_error_t *
731 svn_utf_cstring_to_utf8_ex2(const char **dest,
732 const char *src,
733 const char *frompage,
734 apr_pool_t *pool)
736 xlate_handle_node_t *node;
737 svn_error_t *err;
738 const char *convset_key = get_xlate_key(SVN_APR_UTF8_CHARSET, frompage,
739 pool);
741 SVN_ERR(get_xlate_handle_node(&node, SVN_APR_UTF8_CHARSET, frompage,
742 convset_key, pool));
743 err = convert_cstring(dest, src, node, pool);
744 put_xlate_handle_node(node, convset_key, pool);
745 SVN_ERR(err);
746 return check_cstring_utf8(*dest, pool);
750 svn_error_t *
751 svn_utf_cstring_to_utf8_ex(const char **dest,
752 const char *src,
753 const char *frompage,
754 const char *convset_key,
755 apr_pool_t *pool)
757 return svn_utf_cstring_to_utf8_ex2(dest, src, frompage, pool);
761 svn_error_t *
762 svn_utf_stringbuf_from_utf8(svn_stringbuf_t **dest,
763 const svn_stringbuf_t *src,
764 apr_pool_t *pool)
766 xlate_handle_node_t *node;
767 svn_error_t *err;
769 SVN_ERR(get_uton_xlate_handle_node(&node, pool));
771 if (node->handle)
773 err = check_utf8(src->data, src->len, pool);
774 if (! err)
775 err = convert_to_stringbuf(node, src->data, src->len, dest, pool);
777 else
779 err = check_non_ascii(src->data, src->len, pool);
780 if (! err)
781 *dest = svn_stringbuf_dup(src, pool);
784 put_xlate_handle_node(node, SVN_UTF_UTON_XLATE_HANDLE, pool);
786 return err;
790 svn_error_t *
791 svn_utf_string_from_utf8(const svn_string_t **dest,
792 const svn_string_t *src,
793 apr_pool_t *pool)
795 svn_stringbuf_t *dbuf;
796 xlate_handle_node_t *node;
797 svn_error_t *err;
799 SVN_ERR(get_uton_xlate_handle_node(&node, pool));
801 if (node->handle)
803 err = check_utf8(src->data, src->len, pool);
804 if (! err)
805 err = convert_to_stringbuf(node, src->data, src->len,
806 &dbuf, pool);
807 if (! err)
808 *dest = svn_string_create_from_buf(dbuf, pool);
810 else
812 err = check_non_ascii(src->data, src->len, pool);
813 if (! err)
814 *dest = svn_string_dup(src, pool);
817 put_xlate_handle_node(node, SVN_UTF_UTON_XLATE_HANDLE, pool);
819 return err;
823 svn_error_t *
824 svn_utf_cstring_from_utf8(const char **dest,
825 const char *src,
826 apr_pool_t *pool)
828 xlate_handle_node_t *node;
829 svn_error_t *err;
831 SVN_ERR(check_utf8(src, strlen(src), pool));
833 SVN_ERR(get_uton_xlate_handle_node(&node, pool));
834 err = convert_cstring(dest, src, node, pool);
835 put_xlate_handle_node(node, SVN_UTF_UTON_XLATE_HANDLE, pool);
837 return err;
841 svn_error_t *
842 svn_utf_cstring_from_utf8_ex2(const char **dest,
843 const char *src,
844 const char *topage,
845 apr_pool_t *pool)
847 xlate_handle_node_t *node;
848 svn_error_t *err;
849 const char *convset_key = get_xlate_key(topage, SVN_APR_UTF8_CHARSET,
850 pool);
852 SVN_ERR(check_utf8(src, strlen(src), pool));
854 SVN_ERR(get_xlate_handle_node(&node, topage, SVN_APR_UTF8_CHARSET,
855 convset_key, pool));
856 err = convert_cstring(dest, src, node, pool);
857 put_xlate_handle_node(node, convset_key, pool);
859 return err;
863 svn_error_t *
864 svn_utf_cstring_from_utf8_ex(const char **dest,
865 const char *src,
866 const char *topage,
867 const char *convset_key,
868 apr_pool_t *pool)
870 return svn_utf_cstring_from_utf8_ex2(dest, src, topage, pool);
874 const char *
875 svn_utf__cstring_from_utf8_fuzzy(const char *src,
876 apr_pool_t *pool,
877 svn_error_t *(*convert_from_utf8)
878 (const char **, const char *, apr_pool_t *))
880 const char *escaped, *converted;
881 svn_error_t *err;
883 escaped = fuzzy_escape(src, strlen(src), pool);
885 /* Okay, now we have a *new* UTF-8 string, one that's guaranteed to
886 contain only 7-bit bytes :-). Recode to native... */
887 err = convert_from_utf8(((const char **) &converted), escaped, pool);
889 if (err)
891 svn_error_clear(err);
892 return escaped;
894 else
895 return converted;
897 /* ### Check the client locale, maybe we can avoid that second
898 * conversion! See Ulrich Drepper's patch at
899 * http://subversion.tigris.org/issues/show_bug.cgi?id=807.
904 const char *
905 svn_utf_cstring_from_utf8_fuzzy(const char *src,
906 apr_pool_t *pool)
908 return svn_utf__cstring_from_utf8_fuzzy(src, pool,
909 svn_utf_cstring_from_utf8);
913 svn_error_t *
914 svn_utf_cstring_from_utf8_stringbuf(const char **dest,
915 const svn_stringbuf_t *src,
916 apr_pool_t *pool)
918 svn_stringbuf_t *destbuf;
920 SVN_ERR(svn_utf_stringbuf_from_utf8(&destbuf, src, pool));
921 *dest = destbuf->data;
923 return SVN_NO_ERROR;
927 svn_error_t *
928 svn_utf_cstring_from_utf8_string(const char **dest,
929 const svn_string_t *src,
930 apr_pool_t *pool)
932 svn_stringbuf_t *dbuf;
933 xlate_handle_node_t *node;
934 svn_error_t *err;
936 SVN_ERR(get_uton_xlate_handle_node(&node, pool));
938 if (node->handle)
940 err = check_utf8(src->data, src->len, pool);
941 if (! err)
942 err = convert_to_stringbuf(node, src->data, src->len,
943 &dbuf, pool);
944 if (! err)
945 *dest = dbuf->data;
947 else
949 err = check_non_ascii(src->data, src->len, pool);
950 if (! err)
951 *dest = apr_pstrmemdup(pool, src->data, src->len);
954 put_xlate_handle_node(node, SVN_UTF_UTON_XLATE_HANDLE, pool);
956 return err;